{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 27591, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.62437026566634e-05, "grad_norm": 15.541576556365733, "learning_rate": 1.2077294685990339e-08, "loss": 2.0002, "step": 1 }, { "epoch": 7.24874053133268e-05, "grad_norm": 14.700270699179145, "learning_rate": 2.4154589371980678e-08, "loss": 1.7149, "step": 2 }, { "epoch": 0.00010873110796999022, "grad_norm": 17.58229976195923, "learning_rate": 3.623188405797102e-08, "loss": 1.9768, "step": 3 }, { "epoch": 0.0001449748106266536, "grad_norm": 12.016776084200494, "learning_rate": 4.8309178743961356e-08, "loss": 1.8272, "step": 4 }, { "epoch": 0.00018121851328331703, "grad_norm": 11.920371478573102, "learning_rate": 6.038647342995169e-08, "loss": 1.675, "step": 5 }, { "epoch": 0.00021746221593998044, "grad_norm": 13.968337012251448, "learning_rate": 7.246376811594204e-08, "loss": 1.8352, "step": 6 }, { "epoch": 0.00025370591859664386, "grad_norm": 15.137335917686986, "learning_rate": 8.454106280193236e-08, "loss": 2.1616, "step": 7 }, { "epoch": 0.0002899496212533072, "grad_norm": 11.119762853091148, "learning_rate": 9.661835748792271e-08, "loss": 1.7471, "step": 8 }, { "epoch": 0.00032619332390997064, "grad_norm": 14.251440912711505, "learning_rate": 1.0869565217391305e-07, "loss": 1.8542, "step": 9 }, { "epoch": 0.00036243702656663405, "grad_norm": 10.424815354423203, "learning_rate": 1.2077294685990338e-07, "loss": 1.7818, "step": 10 }, { "epoch": 0.00039868072922329747, "grad_norm": 16.873226516125477, "learning_rate": 1.3285024154589373e-07, "loss": 2.0187, "step": 11 }, { "epoch": 0.0004349244318799609, "grad_norm": 12.972700344015015, "learning_rate": 1.4492753623188408e-07, "loss": 1.8531, "step": 12 }, { "epoch": 0.00047116813453662424, "grad_norm": 12.362666127188547, "learning_rate": 1.570048309178744e-07, "loss": 1.9923, "step": 13 }, { "epoch": 0.0005074118371932877, "grad_norm": 12.324331544955783, "learning_rate": 1.6908212560386473e-07, "loss": 1.8471, "step": 14 }, { "epoch": 0.0005436555398499511, "grad_norm": 18.462301882331467, "learning_rate": 1.811594202898551e-07, "loss": 2.11, "step": 15 }, { "epoch": 0.0005798992425066144, "grad_norm": 14.601000567442254, "learning_rate": 1.9323671497584542e-07, "loss": 2.0283, "step": 16 }, { "epoch": 0.0006161429451632779, "grad_norm": 13.974674798150971, "learning_rate": 2.0531400966183577e-07, "loss": 2.0272, "step": 17 }, { "epoch": 0.0006523866478199413, "grad_norm": 19.010874960114915, "learning_rate": 2.173913043478261e-07, "loss": 1.9145, "step": 18 }, { "epoch": 0.0006886303504766047, "grad_norm": 14.236546400236085, "learning_rate": 2.2946859903381647e-07, "loss": 2.3082, "step": 19 }, { "epoch": 0.0007248740531332681, "grad_norm": 13.101681688053562, "learning_rate": 2.4154589371980677e-07, "loss": 1.8439, "step": 20 }, { "epoch": 0.0007611177557899315, "grad_norm": 15.000701927265586, "learning_rate": 2.536231884057971e-07, "loss": 1.9457, "step": 21 }, { "epoch": 0.0007973614584465949, "grad_norm": 13.763865939986726, "learning_rate": 2.6570048309178746e-07, "loss": 1.9744, "step": 22 }, { "epoch": 0.0008336051611032583, "grad_norm": 11.73276671246369, "learning_rate": 2.7777777777777776e-07, "loss": 1.938, "step": 23 }, { "epoch": 0.0008698488637599218, "grad_norm": 13.140035262758559, "learning_rate": 2.8985507246376816e-07, "loss": 1.8828, "step": 24 }, { "epoch": 0.0009060925664165851, "grad_norm": 11.233960216507924, "learning_rate": 3.0193236714975846e-07, "loss": 1.8809, "step": 25 }, { "epoch": 0.0009423362690732485, "grad_norm": 13.259228337478175, "learning_rate": 3.140096618357488e-07, "loss": 1.8748, "step": 26 }, { "epoch": 0.0009785799717299119, "grad_norm": 16.055259024589553, "learning_rate": 3.2608695652173915e-07, "loss": 2.0102, "step": 27 }, { "epoch": 0.0010148236743865754, "grad_norm": 11.99151285990157, "learning_rate": 3.3816425120772945e-07, "loss": 1.8531, "step": 28 }, { "epoch": 0.0010510673770432388, "grad_norm": 11.776833846508, "learning_rate": 3.5024154589371985e-07, "loss": 1.8117, "step": 29 }, { "epoch": 0.0010873110796999022, "grad_norm": 12.675829174180953, "learning_rate": 3.623188405797102e-07, "loss": 1.9612, "step": 30 }, { "epoch": 0.0011235547823565655, "grad_norm": 14.130159474597122, "learning_rate": 3.743961352657005e-07, "loss": 1.9373, "step": 31 }, { "epoch": 0.0011597984850132289, "grad_norm": 13.122185806285959, "learning_rate": 3.8647342995169085e-07, "loss": 1.9944, "step": 32 }, { "epoch": 0.0011960421876698925, "grad_norm": 13.11182042904279, "learning_rate": 3.9855072463768114e-07, "loss": 2.0124, "step": 33 }, { "epoch": 0.0012322858903265558, "grad_norm": 12.177251755269427, "learning_rate": 4.1062801932367154e-07, "loss": 1.7412, "step": 34 }, { "epoch": 0.0012685295929832192, "grad_norm": 12.459638339741316, "learning_rate": 4.227053140096619e-07, "loss": 1.9236, "step": 35 }, { "epoch": 0.0013047732956398825, "grad_norm": 11.423094866180122, "learning_rate": 4.347826086956522e-07, "loss": 1.7914, "step": 36 }, { "epoch": 0.001341016998296546, "grad_norm": 15.61000861261294, "learning_rate": 4.4685990338164254e-07, "loss": 1.8827, "step": 37 }, { "epoch": 0.0013772607009532095, "grad_norm": 14.132254708804936, "learning_rate": 4.5893719806763294e-07, "loss": 2.0148, "step": 38 }, { "epoch": 0.0014135044036098728, "grad_norm": 10.721250722839192, "learning_rate": 4.7101449275362324e-07, "loss": 1.7117, "step": 39 }, { "epoch": 0.0014497481062665362, "grad_norm": 11.105093475162871, "learning_rate": 4.830917874396135e-07, "loss": 1.9087, "step": 40 }, { "epoch": 0.0014859918089231996, "grad_norm": 10.098328877026193, "learning_rate": 4.951690821256039e-07, "loss": 1.849, "step": 41 }, { "epoch": 0.001522235511579863, "grad_norm": 9.963340479459834, "learning_rate": 5.072463768115942e-07, "loss": 1.7266, "step": 42 }, { "epoch": 0.0015584792142365265, "grad_norm": 11.9912945728687, "learning_rate": 5.193236714975846e-07, "loss": 1.9599, "step": 43 }, { "epoch": 0.0015947229168931899, "grad_norm": 10.518370185054414, "learning_rate": 5.314009661835749e-07, "loss": 1.7356, "step": 44 }, { "epoch": 0.0016309666195498532, "grad_norm": 10.047920844488067, "learning_rate": 5.434782608695653e-07, "loss": 1.7396, "step": 45 }, { "epoch": 0.0016672103222065166, "grad_norm": 9.111004360282767, "learning_rate": 5.555555555555555e-07, "loss": 1.6445, "step": 46 }, { "epoch": 0.00170345402486318, "grad_norm": 9.519628626047304, "learning_rate": 5.67632850241546e-07, "loss": 1.8945, "step": 47 }, { "epoch": 0.0017396977275198435, "grad_norm": 9.174851777987637, "learning_rate": 5.797101449275363e-07, "loss": 1.553, "step": 48 }, { "epoch": 0.001775941430176507, "grad_norm": 11.332213767967904, "learning_rate": 5.917874396135266e-07, "loss": 1.9908, "step": 49 }, { "epoch": 0.0018121851328331703, "grad_norm": 8.834319367320894, "learning_rate": 6.038647342995169e-07, "loss": 1.7672, "step": 50 }, { "epoch": 0.0018484288354898336, "grad_norm": 9.27092888184086, "learning_rate": 6.159420289855074e-07, "loss": 1.8666, "step": 51 }, { "epoch": 0.001884672538146497, "grad_norm": 9.243608687167761, "learning_rate": 6.280193236714976e-07, "loss": 1.6917, "step": 52 }, { "epoch": 0.0019209162408031606, "grad_norm": 8.47002257958698, "learning_rate": 6.400966183574881e-07, "loss": 1.7742, "step": 53 }, { "epoch": 0.0019571599434598237, "grad_norm": 8.445727492622932, "learning_rate": 6.521739130434783e-07, "loss": 1.6147, "step": 54 }, { "epoch": 0.001993403646116487, "grad_norm": 7.865263281702655, "learning_rate": 6.642512077294687e-07, "loss": 1.7328, "step": 55 }, { "epoch": 0.002029647348773151, "grad_norm": 7.492082139806146, "learning_rate": 6.763285024154589e-07, "loss": 1.692, "step": 56 }, { "epoch": 0.0020658910514298142, "grad_norm": 6.9752015675251, "learning_rate": 6.884057971014494e-07, "loss": 1.6767, "step": 57 }, { "epoch": 0.0021021347540864776, "grad_norm": 6.646407611161348, "learning_rate": 7.004830917874397e-07, "loss": 1.6158, "step": 58 }, { "epoch": 0.002138378456743141, "grad_norm": 7.434510135989838, "learning_rate": 7.1256038647343e-07, "loss": 1.6233, "step": 59 }, { "epoch": 0.0021746221593998043, "grad_norm": 7.255148275018753, "learning_rate": 7.246376811594204e-07, "loss": 1.7192, "step": 60 }, { "epoch": 0.0022108658620564677, "grad_norm": 5.878009309530304, "learning_rate": 7.367149758454106e-07, "loss": 1.5493, "step": 61 }, { "epoch": 0.002247109564713131, "grad_norm": 6.647769948387906, "learning_rate": 7.48792270531401e-07, "loss": 1.7208, "step": 62 }, { "epoch": 0.0022833532673697944, "grad_norm": 6.5200803572076165, "learning_rate": 7.608695652173914e-07, "loss": 1.6361, "step": 63 }, { "epoch": 0.0023195969700264578, "grad_norm": 7.181828324735773, "learning_rate": 7.729468599033817e-07, "loss": 1.7137, "step": 64 }, { "epoch": 0.002355840672683121, "grad_norm": 5.9128406962989954, "learning_rate": 7.85024154589372e-07, "loss": 1.5063, "step": 65 }, { "epoch": 0.002392084375339785, "grad_norm": 5.606706798203036, "learning_rate": 7.971014492753623e-07, "loss": 1.5862, "step": 66 }, { "epoch": 0.0024283280779964483, "grad_norm": 6.1203184669474195, "learning_rate": 8.091787439613527e-07, "loss": 1.7936, "step": 67 }, { "epoch": 0.0024645717806531116, "grad_norm": 5.922154956629735, "learning_rate": 8.212560386473431e-07, "loss": 1.7501, "step": 68 }, { "epoch": 0.002500815483309775, "grad_norm": 6.14469274023686, "learning_rate": 8.333333333333333e-07, "loss": 1.6239, "step": 69 }, { "epoch": 0.0025370591859664384, "grad_norm": 5.399948187644755, "learning_rate": 8.454106280193238e-07, "loss": 1.5579, "step": 70 }, { "epoch": 0.0025733028886231017, "grad_norm": 4.909719956560067, "learning_rate": 8.574879227053141e-07, "loss": 1.5845, "step": 71 }, { "epoch": 0.002609546591279765, "grad_norm": 4.535324755566154, "learning_rate": 8.695652173913044e-07, "loss": 1.6114, "step": 72 }, { "epoch": 0.0026457902939364284, "grad_norm": 4.205766877377601, "learning_rate": 8.816425120772948e-07, "loss": 1.4374, "step": 73 }, { "epoch": 0.002682033996593092, "grad_norm": 4.073396694698473, "learning_rate": 8.937198067632851e-07, "loss": 1.5606, "step": 74 }, { "epoch": 0.002718277699249755, "grad_norm": 4.345430353752058, "learning_rate": 9.057971014492754e-07, "loss": 1.6136, "step": 75 }, { "epoch": 0.002754521401906419, "grad_norm": 5.690751021018, "learning_rate": 9.178743961352659e-07, "loss": 1.5319, "step": 76 }, { "epoch": 0.0027907651045630823, "grad_norm": 4.327329459010088, "learning_rate": 9.299516908212561e-07, "loss": 1.4262, "step": 77 }, { "epoch": 0.0028270088072197457, "grad_norm": 4.063592924365533, "learning_rate": 9.420289855072465e-07, "loss": 1.5015, "step": 78 }, { "epoch": 0.002863252509876409, "grad_norm": 4.162988747720827, "learning_rate": 9.541062801932368e-07, "loss": 1.4866, "step": 79 }, { "epoch": 0.0028994962125330724, "grad_norm": 3.94984052042244, "learning_rate": 9.66183574879227e-07, "loss": 1.5983, "step": 80 }, { "epoch": 0.0029357399151897358, "grad_norm": 3.7853892032852445, "learning_rate": 9.782608695652175e-07, "loss": 1.2316, "step": 81 }, { "epoch": 0.002971983617846399, "grad_norm": 3.667561893648874, "learning_rate": 9.903381642512078e-07, "loss": 1.4731, "step": 82 }, { "epoch": 0.0030082273205030625, "grad_norm": 3.4261806843631883, "learning_rate": 1.0024154589371982e-06, "loss": 1.4752, "step": 83 }, { "epoch": 0.003044471023159726, "grad_norm": 3.508808509598411, "learning_rate": 1.0144927536231885e-06, "loss": 1.3029, "step": 84 }, { "epoch": 0.0030807147258163892, "grad_norm": 3.5552264246383753, "learning_rate": 1.026570048309179e-06, "loss": 1.4232, "step": 85 }, { "epoch": 0.003116958428473053, "grad_norm": 3.8799227331925983, "learning_rate": 1.0386473429951692e-06, "loss": 1.5786, "step": 86 }, { "epoch": 0.0031532021311297164, "grad_norm": 3.832934868412185, "learning_rate": 1.0507246376811594e-06, "loss": 1.6365, "step": 87 }, { "epoch": 0.0031894458337863797, "grad_norm": 3.1075721027479046, "learning_rate": 1.0628019323671499e-06, "loss": 1.4614, "step": 88 }, { "epoch": 0.003225689536443043, "grad_norm": 3.9952949644104745, "learning_rate": 1.0748792270531403e-06, "loss": 1.5015, "step": 89 }, { "epoch": 0.0032619332390997065, "grad_norm": 3.429252472066974, "learning_rate": 1.0869565217391306e-06, "loss": 1.5686, "step": 90 }, { "epoch": 0.00329817694175637, "grad_norm": 3.496041250964603, "learning_rate": 1.0990338164251208e-06, "loss": 1.3799, "step": 91 }, { "epoch": 0.003334420644413033, "grad_norm": 3.2825543328568823, "learning_rate": 1.111111111111111e-06, "loss": 1.4871, "step": 92 }, { "epoch": 0.0033706643470696965, "grad_norm": 3.422933769307419, "learning_rate": 1.1231884057971015e-06, "loss": 1.5642, "step": 93 }, { "epoch": 0.00340690804972636, "grad_norm": 3.5128147941444996, "learning_rate": 1.135265700483092e-06, "loss": 1.4436, "step": 94 }, { "epoch": 0.0034431517523830233, "grad_norm": 3.72900343550033, "learning_rate": 1.1473429951690822e-06, "loss": 1.4671, "step": 95 }, { "epoch": 0.003479395455039687, "grad_norm": 3.193279843098619, "learning_rate": 1.1594202898550726e-06, "loss": 1.4854, "step": 96 }, { "epoch": 0.0035156391576963504, "grad_norm": 3.1856393042706186, "learning_rate": 1.1714975845410629e-06, "loss": 1.4317, "step": 97 }, { "epoch": 0.003551882860353014, "grad_norm": 3.490605847058073, "learning_rate": 1.1835748792270531e-06, "loss": 1.6272, "step": 98 }, { "epoch": 0.003588126563009677, "grad_norm": 3.2017755726565498, "learning_rate": 1.1956521739130436e-06, "loss": 1.4095, "step": 99 }, { "epoch": 0.0036243702656663405, "grad_norm": 3.4118580244205834, "learning_rate": 1.2077294685990338e-06, "loss": 1.4619, "step": 100 }, { "epoch": 0.003660613968323004, "grad_norm": 3.524468372902288, "learning_rate": 1.2198067632850243e-06, "loss": 1.4835, "step": 101 }, { "epoch": 0.0036968576709796672, "grad_norm": 2.8804644288334926, "learning_rate": 1.2318840579710147e-06, "loss": 1.1659, "step": 102 }, { "epoch": 0.0037331013736363306, "grad_norm": 3.763371901612892, "learning_rate": 1.243961352657005e-06, "loss": 1.5729, "step": 103 }, { "epoch": 0.003769345076292994, "grad_norm": 3.1227843181626684, "learning_rate": 1.2560386473429952e-06, "loss": 1.4206, "step": 104 }, { "epoch": 0.0038055887789496573, "grad_norm": 3.0856997785179265, "learning_rate": 1.2681159420289857e-06, "loss": 1.2126, "step": 105 }, { "epoch": 0.003841832481606321, "grad_norm": 3.470041776807232, "learning_rate": 1.2801932367149761e-06, "loss": 1.533, "step": 106 }, { "epoch": 0.0038780761842629845, "grad_norm": 3.249261170858702, "learning_rate": 1.2922705314009662e-06, "loss": 1.5528, "step": 107 }, { "epoch": 0.003914319886919647, "grad_norm": 4.088251099197208, "learning_rate": 1.3043478260869566e-06, "loss": 1.4307, "step": 108 }, { "epoch": 0.003950563589576311, "grad_norm": 3.1996870642805506, "learning_rate": 1.316425120772947e-06, "loss": 1.2898, "step": 109 }, { "epoch": 0.003986807292232974, "grad_norm": 3.1201072342864706, "learning_rate": 1.3285024154589373e-06, "loss": 1.4011, "step": 110 }, { "epoch": 0.004023050994889638, "grad_norm": 3.35686569233338, "learning_rate": 1.3405797101449278e-06, "loss": 1.4299, "step": 111 }, { "epoch": 0.004059294697546302, "grad_norm": 3.639617975011387, "learning_rate": 1.3526570048309178e-06, "loss": 1.3558, "step": 112 }, { "epoch": 0.004095538400202965, "grad_norm": 3.2620701848831066, "learning_rate": 1.3647342995169083e-06, "loss": 1.3246, "step": 113 }, { "epoch": 0.0041317821028596284, "grad_norm": 3.1902496362463597, "learning_rate": 1.3768115942028987e-06, "loss": 1.162, "step": 114 }, { "epoch": 0.004168025805516291, "grad_norm": 3.0876866045502465, "learning_rate": 1.3888888888888892e-06, "loss": 1.5032, "step": 115 }, { "epoch": 0.004204269508172955, "grad_norm": 3.3639888785181347, "learning_rate": 1.4009661835748794e-06, "loss": 1.383, "step": 116 }, { "epoch": 0.004240513210829618, "grad_norm": 2.834566683671836, "learning_rate": 1.4130434782608697e-06, "loss": 1.2097, "step": 117 }, { "epoch": 0.004276756913486282, "grad_norm": 2.895523002762449, "learning_rate": 1.42512077294686e-06, "loss": 1.4947, "step": 118 }, { "epoch": 0.004313000616142945, "grad_norm": 2.9636850144217237, "learning_rate": 1.4371980676328504e-06, "loss": 1.4923, "step": 119 }, { "epoch": 0.004349244318799609, "grad_norm": 3.219360463090135, "learning_rate": 1.4492753623188408e-06, "loss": 1.2621, "step": 120 }, { "epoch": 0.0043854880214562715, "grad_norm": 3.342404200841675, "learning_rate": 1.461352657004831e-06, "loss": 1.4203, "step": 121 }, { "epoch": 0.004421731724112935, "grad_norm": 3.24550594343313, "learning_rate": 1.4734299516908213e-06, "loss": 1.3235, "step": 122 }, { "epoch": 0.004457975426769599, "grad_norm": 3.1559947550335985, "learning_rate": 1.4855072463768117e-06, "loss": 1.2196, "step": 123 }, { "epoch": 0.004494219129426262, "grad_norm": 3.2928638342026186, "learning_rate": 1.497584541062802e-06, "loss": 1.4163, "step": 124 }, { "epoch": 0.004530462832082926, "grad_norm": 3.499434697418916, "learning_rate": 1.5096618357487924e-06, "loss": 1.4869, "step": 125 }, { "epoch": 0.004566706534739589, "grad_norm": 2.9280214043387067, "learning_rate": 1.521739130434783e-06, "loss": 1.3595, "step": 126 }, { "epoch": 0.004602950237396253, "grad_norm": 3.4933948931817804, "learning_rate": 1.533816425120773e-06, "loss": 1.3404, "step": 127 }, { "epoch": 0.0046391939400529155, "grad_norm": 3.1312261803201826, "learning_rate": 1.5458937198067634e-06, "loss": 1.291, "step": 128 }, { "epoch": 0.004675437642709579, "grad_norm": 2.6778900259218825, "learning_rate": 1.5579710144927536e-06, "loss": 1.3491, "step": 129 }, { "epoch": 0.004711681345366242, "grad_norm": 3.0302229525259032, "learning_rate": 1.570048309178744e-06, "loss": 1.5076, "step": 130 }, { "epoch": 0.004747925048022906, "grad_norm": 2.886602096911634, "learning_rate": 1.5821256038647345e-06, "loss": 1.2876, "step": 131 }, { "epoch": 0.00478416875067957, "grad_norm": 3.222985470929, "learning_rate": 1.5942028985507246e-06, "loss": 1.3261, "step": 132 }, { "epoch": 0.004820412453336233, "grad_norm": 3.4627140008902235, "learning_rate": 1.606280193236715e-06, "loss": 1.3133, "step": 133 }, { "epoch": 0.0048566561559928965, "grad_norm": 2.7309876013122985, "learning_rate": 1.6183574879227055e-06, "loss": 1.3502, "step": 134 }, { "epoch": 0.0048928998586495595, "grad_norm": 3.1292741905146357, "learning_rate": 1.6304347826086957e-06, "loss": 1.3001, "step": 135 }, { "epoch": 0.004929143561306223, "grad_norm": 2.969642672824231, "learning_rate": 1.6425120772946862e-06, "loss": 1.2209, "step": 136 }, { "epoch": 0.004965387263962886, "grad_norm": 3.4802576307021957, "learning_rate": 1.6545893719806766e-06, "loss": 1.4396, "step": 137 }, { "epoch": 0.00500163096661955, "grad_norm": 3.3507141018901923, "learning_rate": 1.6666666666666667e-06, "loss": 1.568, "step": 138 }, { "epoch": 0.005037874669276213, "grad_norm": 3.4143507234582198, "learning_rate": 1.6787439613526571e-06, "loss": 1.3227, "step": 139 }, { "epoch": 0.005074118371932877, "grad_norm": 2.857781110797985, "learning_rate": 1.6908212560386476e-06, "loss": 1.4199, "step": 140 }, { "epoch": 0.00511036207458954, "grad_norm": 3.4450312724187313, "learning_rate": 1.7028985507246378e-06, "loss": 1.5283, "step": 141 }, { "epoch": 0.0051466057772462034, "grad_norm": 2.66637008469826, "learning_rate": 1.7149758454106283e-06, "loss": 1.3213, "step": 142 }, { "epoch": 0.005182849479902867, "grad_norm": 3.1082995682234533, "learning_rate": 1.7270531400966183e-06, "loss": 1.4184, "step": 143 }, { "epoch": 0.00521909318255953, "grad_norm": 3.0257125170645733, "learning_rate": 1.7391304347826088e-06, "loss": 1.4513, "step": 144 }, { "epoch": 0.005255336885216194, "grad_norm": 3.0804280365826164, "learning_rate": 1.7512077294685992e-06, "loss": 1.2856, "step": 145 }, { "epoch": 0.005291580587872857, "grad_norm": 3.033124959828326, "learning_rate": 1.7632850241545897e-06, "loss": 1.3906, "step": 146 }, { "epoch": 0.005327824290529521, "grad_norm": 3.4573089273412756, "learning_rate": 1.77536231884058e-06, "loss": 1.3368, "step": 147 }, { "epoch": 0.005364067993186184, "grad_norm": 3.3888358534446072, "learning_rate": 1.7874396135265702e-06, "loss": 1.4667, "step": 148 }, { "epoch": 0.005400311695842847, "grad_norm": 3.46354181036685, "learning_rate": 1.7995169082125604e-06, "loss": 1.4434, "step": 149 }, { "epoch": 0.00543655539849951, "grad_norm": 3.3341359048832846, "learning_rate": 1.8115942028985508e-06, "loss": 1.2073, "step": 150 }, { "epoch": 0.005472799101156174, "grad_norm": 3.026144245219725, "learning_rate": 1.8236714975845413e-06, "loss": 1.3605, "step": 151 }, { "epoch": 0.005509042803812838, "grad_norm": 3.168974665035374, "learning_rate": 1.8357487922705318e-06, "loss": 1.332, "step": 152 }, { "epoch": 0.005545286506469501, "grad_norm": 3.212577438912976, "learning_rate": 1.8478260869565218e-06, "loss": 1.1934, "step": 153 }, { "epoch": 0.005581530209126165, "grad_norm": 2.7896453212594867, "learning_rate": 1.8599033816425122e-06, "loss": 1.3758, "step": 154 }, { "epoch": 0.005617773911782828, "grad_norm": 3.2781235930562085, "learning_rate": 1.8719806763285025e-06, "loss": 1.3811, "step": 155 }, { "epoch": 0.005654017614439491, "grad_norm": 3.074796720063564, "learning_rate": 1.884057971014493e-06, "loss": 1.1748, "step": 156 }, { "epoch": 0.005690261317096154, "grad_norm": 3.4143550507475644, "learning_rate": 1.8961352657004834e-06, "loss": 1.459, "step": 157 }, { "epoch": 0.005726505019752818, "grad_norm": 3.221821006415869, "learning_rate": 1.9082125603864736e-06, "loss": 1.3686, "step": 158 }, { "epoch": 0.005762748722409481, "grad_norm": 15.194809851631012, "learning_rate": 1.920289855072464e-06, "loss": 2.892, "step": 159 }, { "epoch": 0.005798992425066145, "grad_norm": 2.728168626517563, "learning_rate": 1.932367149758454e-06, "loss": 1.2833, "step": 160 }, { "epoch": 0.005835236127722808, "grad_norm": 3.1682079631087396, "learning_rate": 1.944444444444445e-06, "loss": 1.2487, "step": 161 }, { "epoch": 0.0058714798303794715, "grad_norm": 2.7294323011887833, "learning_rate": 1.956521739130435e-06, "loss": 1.2669, "step": 162 }, { "epoch": 0.005907723533036135, "grad_norm": 3.2184327000668413, "learning_rate": 1.9685990338164253e-06, "loss": 1.1739, "step": 163 }, { "epoch": 0.005943967235692798, "grad_norm": 2.743640358011156, "learning_rate": 1.9806763285024155e-06, "loss": 1.2244, "step": 164 }, { "epoch": 0.005980210938349462, "grad_norm": 3.031696607733691, "learning_rate": 1.9927536231884058e-06, "loss": 1.2306, "step": 165 }, { "epoch": 0.006016454641006125, "grad_norm": 2.9783529654608722, "learning_rate": 2.0048309178743964e-06, "loss": 1.2703, "step": 166 }, { "epoch": 0.006052698343662789, "grad_norm": 2.672086199214665, "learning_rate": 2.0169082125603867e-06, "loss": 1.303, "step": 167 }, { "epoch": 0.006088942046319452, "grad_norm": 3.2389296141190753, "learning_rate": 2.028985507246377e-06, "loss": 1.5085, "step": 168 }, { "epoch": 0.0061251857489761155, "grad_norm": 3.436862525139368, "learning_rate": 2.041062801932367e-06, "loss": 1.3809, "step": 169 }, { "epoch": 0.0061614294516327784, "grad_norm": 2.6386923272242933, "learning_rate": 2.053140096618358e-06, "loss": 1.3603, "step": 170 }, { "epoch": 0.006197673154289442, "grad_norm": 3.366861450699732, "learning_rate": 2.065217391304348e-06, "loss": 1.1354, "step": 171 }, { "epoch": 0.006233916856946106, "grad_norm": 3.1414114165166733, "learning_rate": 2.0772946859903383e-06, "loss": 1.3952, "step": 172 }, { "epoch": 0.006270160559602769, "grad_norm": 2.6501395774884515, "learning_rate": 2.089371980676329e-06, "loss": 1.3609, "step": 173 }, { "epoch": 0.006306404262259433, "grad_norm": 2.774539813353046, "learning_rate": 2.101449275362319e-06, "loss": 1.2527, "step": 174 }, { "epoch": 0.006342647964916096, "grad_norm": 3.093147301909389, "learning_rate": 2.1135265700483095e-06, "loss": 1.4269, "step": 175 }, { "epoch": 0.0063788916675727595, "grad_norm": 2.9975679113587645, "learning_rate": 2.1256038647342997e-06, "loss": 1.1497, "step": 176 }, { "epoch": 0.006415135370229422, "grad_norm": 3.5512207163612275, "learning_rate": 2.13768115942029e-06, "loss": 1.3974, "step": 177 }, { "epoch": 0.006451379072886086, "grad_norm": 2.7750852118785834, "learning_rate": 2.1497584541062806e-06, "loss": 1.2047, "step": 178 }, { "epoch": 0.006487622775542749, "grad_norm": 3.13700352059728, "learning_rate": 2.1618357487922704e-06, "loss": 1.377, "step": 179 }, { "epoch": 0.006523866478199413, "grad_norm": 3.576112684340687, "learning_rate": 2.173913043478261e-06, "loss": 1.3734, "step": 180 }, { "epoch": 0.006560110180856076, "grad_norm": 2.9374118216839564, "learning_rate": 2.1859903381642513e-06, "loss": 1.3254, "step": 181 }, { "epoch": 0.00659635388351274, "grad_norm": 3.316057005700557, "learning_rate": 2.1980676328502416e-06, "loss": 1.4251, "step": 182 }, { "epoch": 0.0066325975861694034, "grad_norm": 2.763998043295116, "learning_rate": 2.2101449275362323e-06, "loss": 1.3644, "step": 183 }, { "epoch": 0.006668841288826066, "grad_norm": 3.3521903795091683, "learning_rate": 2.222222222222222e-06, "loss": 1.2851, "step": 184 }, { "epoch": 0.00670508499148273, "grad_norm": 3.0760611796075596, "learning_rate": 2.2342995169082127e-06, "loss": 1.1983, "step": 185 }, { "epoch": 0.006741328694139393, "grad_norm": 2.945231378630885, "learning_rate": 2.246376811594203e-06, "loss": 1.3122, "step": 186 }, { "epoch": 0.006777572396796057, "grad_norm": 2.9186754299481845, "learning_rate": 2.2584541062801937e-06, "loss": 1.1554, "step": 187 }, { "epoch": 0.00681381609945272, "grad_norm": 2.851277081745042, "learning_rate": 2.270531400966184e-06, "loss": 1.2237, "step": 188 }, { "epoch": 0.006850059802109384, "grad_norm": 3.0216143795906905, "learning_rate": 2.282608695652174e-06, "loss": 1.4164, "step": 189 }, { "epoch": 0.0068863035047660465, "grad_norm": 3.673339075741259, "learning_rate": 2.2946859903381644e-06, "loss": 1.3771, "step": 190 }, { "epoch": 0.00692254720742271, "grad_norm": 3.2194812570196922, "learning_rate": 2.3067632850241546e-06, "loss": 1.2261, "step": 191 }, { "epoch": 0.006958790910079374, "grad_norm": 2.985188685217318, "learning_rate": 2.3188405797101453e-06, "loss": 1.2725, "step": 192 }, { "epoch": 0.006995034612736037, "grad_norm": 3.3343012078022793, "learning_rate": 2.3309178743961355e-06, "loss": 1.1257, "step": 193 }, { "epoch": 0.007031278315392701, "grad_norm": 2.5069744129960956, "learning_rate": 2.3429951690821258e-06, "loss": 1.104, "step": 194 }, { "epoch": 0.007067522018049364, "grad_norm": 3.1026188684151528, "learning_rate": 2.355072463768116e-06, "loss": 1.2945, "step": 195 }, { "epoch": 0.007103765720706028, "grad_norm": 3.034957462679344, "learning_rate": 2.3671497584541063e-06, "loss": 1.2875, "step": 196 }, { "epoch": 0.0071400094233626905, "grad_norm": 2.723837735145023, "learning_rate": 2.379227053140097e-06, "loss": 1.2858, "step": 197 }, { "epoch": 0.007176253126019354, "grad_norm": 3.729109300624449, "learning_rate": 2.391304347826087e-06, "loss": 1.3095, "step": 198 }, { "epoch": 0.007212496828676017, "grad_norm": 3.1126873198627614, "learning_rate": 2.4033816425120774e-06, "loss": 1.3741, "step": 199 }, { "epoch": 0.007248740531332681, "grad_norm": 3.11633107016182, "learning_rate": 2.4154589371980677e-06, "loss": 1.3804, "step": 200 }, { "epoch": 0.007284984233989344, "grad_norm": 2.9271472248708714, "learning_rate": 2.4275362318840583e-06, "loss": 1.2164, "step": 201 }, { "epoch": 0.007321227936646008, "grad_norm": 3.0910838051795193, "learning_rate": 2.4396135265700486e-06, "loss": 1.3243, "step": 202 }, { "epoch": 0.0073574716393026715, "grad_norm": 2.9551283276592524, "learning_rate": 2.451690821256039e-06, "loss": 1.113, "step": 203 }, { "epoch": 0.0073937153419593345, "grad_norm": 3.2341499118302375, "learning_rate": 2.4637681159420295e-06, "loss": 1.2743, "step": 204 }, { "epoch": 0.007429959044615998, "grad_norm": 2.8753164042615174, "learning_rate": 2.4758454106280193e-06, "loss": 1.1903, "step": 205 }, { "epoch": 0.007466202747272661, "grad_norm": 2.780734345499002, "learning_rate": 2.48792270531401e-06, "loss": 1.1498, "step": 206 }, { "epoch": 0.007502446449929325, "grad_norm": 2.8149859621320195, "learning_rate": 2.5e-06, "loss": 1.3505, "step": 207 }, { "epoch": 0.007538690152585988, "grad_norm": 2.7839373067694626, "learning_rate": 2.5120772946859904e-06, "loss": 1.3008, "step": 208 }, { "epoch": 0.007574933855242652, "grad_norm": 2.8765738553886413, "learning_rate": 2.524154589371981e-06, "loss": 1.2331, "step": 209 }, { "epoch": 0.007611177557899315, "grad_norm": 3.010032106650318, "learning_rate": 2.5362318840579714e-06, "loss": 1.2241, "step": 210 }, { "epoch": 0.0076474212605559784, "grad_norm": 3.1171180803964758, "learning_rate": 2.5483091787439616e-06, "loss": 1.3541, "step": 211 }, { "epoch": 0.007683664963212642, "grad_norm": 3.084603465657791, "learning_rate": 2.5603864734299523e-06, "loss": 1.407, "step": 212 }, { "epoch": 0.007719908665869305, "grad_norm": 2.9373764426632634, "learning_rate": 2.572463768115942e-06, "loss": 1.2787, "step": 213 }, { "epoch": 0.007756152368525969, "grad_norm": 2.9508488571980154, "learning_rate": 2.5845410628019323e-06, "loss": 1.3466, "step": 214 }, { "epoch": 0.007792396071182632, "grad_norm": 3.3688016169274237, "learning_rate": 2.596618357487923e-06, "loss": 1.2258, "step": 215 }, { "epoch": 0.007828639773839295, "grad_norm": 2.882951138553063, "learning_rate": 2.6086956521739132e-06, "loss": 1.151, "step": 216 }, { "epoch": 0.007864883476495959, "grad_norm": 3.1337464497033682, "learning_rate": 2.6207729468599035e-06, "loss": 1.1632, "step": 217 }, { "epoch": 0.007901127179152622, "grad_norm": 3.237791667988456, "learning_rate": 2.632850241545894e-06, "loss": 1.3278, "step": 218 }, { "epoch": 0.007937370881809286, "grad_norm": 3.205599731651379, "learning_rate": 2.6449275362318844e-06, "loss": 1.0922, "step": 219 }, { "epoch": 0.007973614584465948, "grad_norm": 3.0834533504362223, "learning_rate": 2.6570048309178746e-06, "loss": 1.2953, "step": 220 }, { "epoch": 0.008009858287122612, "grad_norm": 2.611255059221203, "learning_rate": 2.6690821256038653e-06, "loss": 1.097, "step": 221 }, { "epoch": 0.008046101989779276, "grad_norm": 3.5098345383516407, "learning_rate": 2.6811594202898555e-06, "loss": 1.1677, "step": 222 }, { "epoch": 0.00808234569243594, "grad_norm": 2.3975101319368894, "learning_rate": 2.6932367149758454e-06, "loss": 1.1513, "step": 223 }, { "epoch": 0.008118589395092603, "grad_norm": 2.905082820422074, "learning_rate": 2.7053140096618356e-06, "loss": 1.2111, "step": 224 }, { "epoch": 0.008154833097749265, "grad_norm": 2.998939638216275, "learning_rate": 2.7173913043478263e-06, "loss": 1.3016, "step": 225 }, { "epoch": 0.00819107680040593, "grad_norm": 2.994025853951724, "learning_rate": 2.7294685990338165e-06, "loss": 1.286, "step": 226 }, { "epoch": 0.008227320503062593, "grad_norm": 3.062305389732486, "learning_rate": 2.7415458937198068e-06, "loss": 1.2735, "step": 227 }, { "epoch": 0.008263564205719257, "grad_norm": 3.0955075492173254, "learning_rate": 2.7536231884057974e-06, "loss": 1.231, "step": 228 }, { "epoch": 0.008299807908375919, "grad_norm": 3.183929096096444, "learning_rate": 2.7657004830917877e-06, "loss": 1.3637, "step": 229 }, { "epoch": 0.008336051611032583, "grad_norm": 3.422332442467265, "learning_rate": 2.7777777777777783e-06, "loss": 1.3085, "step": 230 }, { "epoch": 0.008372295313689247, "grad_norm": 2.753874107160562, "learning_rate": 2.7898550724637686e-06, "loss": 1.4786, "step": 231 }, { "epoch": 0.00840853901634591, "grad_norm": 3.0372523517662917, "learning_rate": 2.801932367149759e-06, "loss": 1.1987, "step": 232 }, { "epoch": 0.008444782719002574, "grad_norm": 3.001293879361471, "learning_rate": 2.8140096618357486e-06, "loss": 1.4148, "step": 233 }, { "epoch": 0.008481026421659236, "grad_norm": 2.82424728322551, "learning_rate": 2.8260869565217393e-06, "loss": 1.0211, "step": 234 }, { "epoch": 0.0085172701243159, "grad_norm": 3.5470330527671527, "learning_rate": 2.8381642512077295e-06, "loss": 1.278, "step": 235 }, { "epoch": 0.008553513826972564, "grad_norm": 3.271969278609087, "learning_rate": 2.85024154589372e-06, "loss": 1.0618, "step": 236 }, { "epoch": 0.008589757529629228, "grad_norm": 3.212612135725504, "learning_rate": 2.8623188405797105e-06, "loss": 1.2563, "step": 237 }, { "epoch": 0.00862600123228589, "grad_norm": 3.1265328341384717, "learning_rate": 2.8743961352657007e-06, "loss": 1.3677, "step": 238 }, { "epoch": 0.008662244934942553, "grad_norm": 2.7160790038012537, "learning_rate": 2.886473429951691e-06, "loss": 1.3024, "step": 239 }, { "epoch": 0.008698488637599217, "grad_norm": 2.860920924097655, "learning_rate": 2.8985507246376816e-06, "loss": 1.2119, "step": 240 }, { "epoch": 0.008734732340255881, "grad_norm": 3.289143902537554, "learning_rate": 2.910628019323672e-06, "loss": 1.2272, "step": 241 }, { "epoch": 0.008770976042912543, "grad_norm": 2.796949135856763, "learning_rate": 2.922705314009662e-06, "loss": 1.3727, "step": 242 }, { "epoch": 0.008807219745569207, "grad_norm": 3.10411142972998, "learning_rate": 2.9347826086956528e-06, "loss": 1.286, "step": 243 }, { "epoch": 0.00884346344822587, "grad_norm": 3.2165508849156987, "learning_rate": 2.9468599033816426e-06, "loss": 1.196, "step": 244 }, { "epoch": 0.008879707150882534, "grad_norm": 2.802288227723487, "learning_rate": 2.958937198067633e-06, "loss": 1.2357, "step": 245 }, { "epoch": 0.008915950853539198, "grad_norm": 3.0532930373542833, "learning_rate": 2.9710144927536235e-06, "loss": 1.4623, "step": 246 }, { "epoch": 0.00895219455619586, "grad_norm": 2.988873883133446, "learning_rate": 2.9830917874396137e-06, "loss": 1.077, "step": 247 }, { "epoch": 0.008988438258852524, "grad_norm": 2.672791922243381, "learning_rate": 2.995169082125604e-06, "loss": 1.2713, "step": 248 }, { "epoch": 0.009024681961509188, "grad_norm": 3.2113485957507053, "learning_rate": 3.0072463768115946e-06, "loss": 1.2572, "step": 249 }, { "epoch": 0.009060925664165852, "grad_norm": 3.3277193907470686, "learning_rate": 3.019323671497585e-06, "loss": 1.1126, "step": 250 }, { "epoch": 0.009097169366822514, "grad_norm": 2.9141573186688294, "learning_rate": 3.031400966183575e-06, "loss": 1.1996, "step": 251 }, { "epoch": 0.009133413069479178, "grad_norm": 3.0086692238821464, "learning_rate": 3.043478260869566e-06, "loss": 1.5174, "step": 252 }, { "epoch": 0.009169656772135841, "grad_norm": 2.8517137070730554, "learning_rate": 3.055555555555556e-06, "loss": 1.3195, "step": 253 }, { "epoch": 0.009205900474792505, "grad_norm": 2.9196741071177037, "learning_rate": 3.067632850241546e-06, "loss": 1.1555, "step": 254 }, { "epoch": 0.009242144177449169, "grad_norm": 3.466324507870952, "learning_rate": 3.079710144927536e-06, "loss": 1.0183, "step": 255 }, { "epoch": 0.009278387880105831, "grad_norm": 3.0935039673096125, "learning_rate": 3.0917874396135268e-06, "loss": 1.2351, "step": 256 }, { "epoch": 0.009314631582762495, "grad_norm": 2.723106942129673, "learning_rate": 3.103864734299517e-06, "loss": 1.1105, "step": 257 }, { "epoch": 0.009350875285419159, "grad_norm": 2.930207157903193, "learning_rate": 3.1159420289855073e-06, "loss": 1.4142, "step": 258 }, { "epoch": 0.009387118988075822, "grad_norm": 3.20383352366659, "learning_rate": 3.128019323671498e-06, "loss": 1.2208, "step": 259 }, { "epoch": 0.009423362690732484, "grad_norm": 2.899138018544511, "learning_rate": 3.140096618357488e-06, "loss": 1.2322, "step": 260 }, { "epoch": 0.009459606393389148, "grad_norm": 3.0201099784009697, "learning_rate": 3.152173913043479e-06, "loss": 1.3153, "step": 261 }, { "epoch": 0.009495850096045812, "grad_norm": 3.1588429318503866, "learning_rate": 3.164251207729469e-06, "loss": 1.3401, "step": 262 }, { "epoch": 0.009532093798702476, "grad_norm": 3.2216708026208924, "learning_rate": 3.1763285024154593e-06, "loss": 1.2771, "step": 263 }, { "epoch": 0.00956833750135914, "grad_norm": 2.4505006484844767, "learning_rate": 3.188405797101449e-06, "loss": 0.921, "step": 264 }, { "epoch": 0.009604581204015802, "grad_norm": 2.734580455836712, "learning_rate": 3.20048309178744e-06, "loss": 1.0549, "step": 265 }, { "epoch": 0.009640824906672465, "grad_norm": 2.9626662269295454, "learning_rate": 3.21256038647343e-06, "loss": 1.3861, "step": 266 }, { "epoch": 0.00967706860932913, "grad_norm": 2.8783298453630564, "learning_rate": 3.2246376811594203e-06, "loss": 1.1064, "step": 267 }, { "epoch": 0.009713312311985793, "grad_norm": 2.91547125423197, "learning_rate": 3.236714975845411e-06, "loss": 1.3933, "step": 268 }, { "epoch": 0.009749556014642455, "grad_norm": 2.6316568160890483, "learning_rate": 3.248792270531401e-06, "loss": 1.1859, "step": 269 }, { "epoch": 0.009785799717299119, "grad_norm": 2.7835555071782627, "learning_rate": 3.2608695652173914e-06, "loss": 1.4013, "step": 270 }, { "epoch": 0.009822043419955783, "grad_norm": 2.9985101659383657, "learning_rate": 3.272946859903382e-06, "loss": 1.1125, "step": 271 }, { "epoch": 0.009858287122612447, "grad_norm": 2.7731685726485495, "learning_rate": 3.2850241545893724e-06, "loss": 1.2022, "step": 272 }, { "epoch": 0.00989453082526911, "grad_norm": 2.9376194510502227, "learning_rate": 3.2971014492753626e-06, "loss": 1.2275, "step": 273 }, { "epoch": 0.009930774527925772, "grad_norm": 2.9416143265407126, "learning_rate": 3.3091787439613533e-06, "loss": 1.1697, "step": 274 }, { "epoch": 0.009967018230582436, "grad_norm": 3.005087936867803, "learning_rate": 3.321256038647343e-06, "loss": 1.2232, "step": 275 }, { "epoch": 0.0100032619332391, "grad_norm": 3.074323075980932, "learning_rate": 3.3333333333333333e-06, "loss": 1.2796, "step": 276 }, { "epoch": 0.010039505635895764, "grad_norm": 3.072293697147605, "learning_rate": 3.345410628019324e-06, "loss": 1.2803, "step": 277 }, { "epoch": 0.010075749338552426, "grad_norm": 2.9328887301820914, "learning_rate": 3.3574879227053142e-06, "loss": 1.368, "step": 278 }, { "epoch": 0.01011199304120909, "grad_norm": 3.388685071685179, "learning_rate": 3.3695652173913045e-06, "loss": 1.3365, "step": 279 }, { "epoch": 0.010148236743865753, "grad_norm": 2.6442499652357934, "learning_rate": 3.381642512077295e-06, "loss": 1.2826, "step": 280 }, { "epoch": 0.010184480446522417, "grad_norm": 3.088907304349357, "learning_rate": 3.3937198067632854e-06, "loss": 1.2542, "step": 281 }, { "epoch": 0.01022072414917908, "grad_norm": 3.0027861975965413, "learning_rate": 3.4057971014492756e-06, "loss": 1.1737, "step": 282 }, { "epoch": 0.010256967851835743, "grad_norm": 3.0397894132115955, "learning_rate": 3.4178743961352663e-06, "loss": 1.1809, "step": 283 }, { "epoch": 0.010293211554492407, "grad_norm": 2.882802299893196, "learning_rate": 3.4299516908212565e-06, "loss": 1.2458, "step": 284 }, { "epoch": 0.01032945525714907, "grad_norm": 2.8727736124418737, "learning_rate": 3.4420289855072464e-06, "loss": 1.1385, "step": 285 }, { "epoch": 0.010365698959805734, "grad_norm": 2.865155905099332, "learning_rate": 3.4541062801932366e-06, "loss": 1.303, "step": 286 }, { "epoch": 0.010401942662462397, "grad_norm": 3.1579263313036146, "learning_rate": 3.4661835748792273e-06, "loss": 1.3493, "step": 287 }, { "epoch": 0.01043818636511906, "grad_norm": 3.1701332019975936, "learning_rate": 3.4782608695652175e-06, "loss": 1.3157, "step": 288 }, { "epoch": 0.010474430067775724, "grad_norm": 3.0199166054573943, "learning_rate": 3.490338164251208e-06, "loss": 1.2456, "step": 289 }, { "epoch": 0.010510673770432388, "grad_norm": 2.7076909107220732, "learning_rate": 3.5024154589371984e-06, "loss": 1.2534, "step": 290 }, { "epoch": 0.01054691747308905, "grad_norm": 2.989615718943697, "learning_rate": 3.5144927536231887e-06, "loss": 1.3958, "step": 291 }, { "epoch": 0.010583161175745714, "grad_norm": 2.948848608668554, "learning_rate": 3.5265700483091793e-06, "loss": 1.2801, "step": 292 }, { "epoch": 0.010619404878402378, "grad_norm": 3.1062455536834594, "learning_rate": 3.5386473429951696e-06, "loss": 1.2642, "step": 293 }, { "epoch": 0.010655648581059041, "grad_norm": 3.029792387419532, "learning_rate": 3.55072463768116e-06, "loss": 1.1808, "step": 294 }, { "epoch": 0.010691892283715705, "grad_norm": 2.7173500908297474, "learning_rate": 3.5628019323671496e-06, "loss": 1.3093, "step": 295 }, { "epoch": 0.010728135986372367, "grad_norm": 3.5244265061755753, "learning_rate": 3.5748792270531403e-06, "loss": 0.982, "step": 296 }, { "epoch": 0.010764379689029031, "grad_norm": 2.701711072150066, "learning_rate": 3.5869565217391305e-06, "loss": 1.1304, "step": 297 }, { "epoch": 0.010800623391685695, "grad_norm": 3.5685678527873312, "learning_rate": 3.5990338164251208e-06, "loss": 1.0134, "step": 298 }, { "epoch": 0.010836867094342359, "grad_norm": 3.032012544043511, "learning_rate": 3.6111111111111115e-06, "loss": 1.2179, "step": 299 }, { "epoch": 0.01087311079699902, "grad_norm": 3.1036663283052714, "learning_rate": 3.6231884057971017e-06, "loss": 1.1257, "step": 300 }, { "epoch": 0.010909354499655684, "grad_norm": 2.960757002567923, "learning_rate": 3.635265700483092e-06, "loss": 1.4276, "step": 301 }, { "epoch": 0.010945598202312348, "grad_norm": 2.9263237906147084, "learning_rate": 3.6473429951690826e-06, "loss": 1.1221, "step": 302 }, { "epoch": 0.010981841904969012, "grad_norm": 2.8273917050830137, "learning_rate": 3.659420289855073e-06, "loss": 1.1293, "step": 303 }, { "epoch": 0.011018085607625676, "grad_norm": 2.7410556179939847, "learning_rate": 3.6714975845410635e-06, "loss": 1.1394, "step": 304 }, { "epoch": 0.011054329310282338, "grad_norm": 3.333176123438642, "learning_rate": 3.6835748792270538e-06, "loss": 1.3133, "step": 305 }, { "epoch": 0.011090573012939002, "grad_norm": 2.9550935585406495, "learning_rate": 3.6956521739130436e-06, "loss": 1.1057, "step": 306 }, { "epoch": 0.011126816715595665, "grad_norm": 2.657945525267782, "learning_rate": 3.707729468599034e-06, "loss": 0.9506, "step": 307 }, { "epoch": 0.01116306041825233, "grad_norm": 3.0168184662535014, "learning_rate": 3.7198067632850245e-06, "loss": 1.238, "step": 308 }, { "epoch": 0.011199304120908991, "grad_norm": 3.0062693393349957, "learning_rate": 3.7318840579710147e-06, "loss": 1.076, "step": 309 }, { "epoch": 0.011235547823565655, "grad_norm": 3.126534561834279, "learning_rate": 3.743961352657005e-06, "loss": 1.1453, "step": 310 }, { "epoch": 0.011271791526222319, "grad_norm": 2.994457989583592, "learning_rate": 3.7560386473429956e-06, "loss": 1.2232, "step": 311 }, { "epoch": 0.011308035228878983, "grad_norm": 3.0873540582936263, "learning_rate": 3.768115942028986e-06, "loss": 1.2398, "step": 312 }, { "epoch": 0.011344278931535647, "grad_norm": 2.851209807724208, "learning_rate": 3.780193236714976e-06, "loss": 1.3422, "step": 313 }, { "epoch": 0.011380522634192309, "grad_norm": 2.8315826039142054, "learning_rate": 3.792270531400967e-06, "loss": 1.21, "step": 314 }, { "epoch": 0.011416766336848972, "grad_norm": 2.479261726450079, "learning_rate": 3.804347826086957e-06, "loss": 0.9705, "step": 315 }, { "epoch": 0.011453010039505636, "grad_norm": 2.7679644558038716, "learning_rate": 3.816425120772947e-06, "loss": 1.1772, "step": 316 }, { "epoch": 0.0114892537421623, "grad_norm": 3.408198000330723, "learning_rate": 3.8285024154589375e-06, "loss": 1.0927, "step": 317 }, { "epoch": 0.011525497444818962, "grad_norm": 15.124972959505307, "learning_rate": 3.840579710144928e-06, "loss": 2.6049, "step": 318 }, { "epoch": 0.011561741147475626, "grad_norm": 3.2377590983426994, "learning_rate": 3.852657004830918e-06, "loss": 1.2186, "step": 319 }, { "epoch": 0.01159798485013229, "grad_norm": 2.863247769866177, "learning_rate": 3.864734299516908e-06, "loss": 1.1948, "step": 320 }, { "epoch": 0.011634228552788953, "grad_norm": 3.2849097879312925, "learning_rate": 3.8768115942028985e-06, "loss": 1.1652, "step": 321 }, { "epoch": 0.011670472255445615, "grad_norm": 2.962843618377536, "learning_rate": 3.88888888888889e-06, "loss": 1.2148, "step": 322 }, { "epoch": 0.01170671595810228, "grad_norm": 3.0250015521871605, "learning_rate": 3.90096618357488e-06, "loss": 1.306, "step": 323 }, { "epoch": 0.011742959660758943, "grad_norm": 2.9849545795240786, "learning_rate": 3.91304347826087e-06, "loss": 1.3467, "step": 324 }, { "epoch": 0.011779203363415607, "grad_norm": 2.9361400572238816, "learning_rate": 3.92512077294686e-06, "loss": 1.2063, "step": 325 }, { "epoch": 0.01181544706607227, "grad_norm": 2.7290005014486978, "learning_rate": 3.9371980676328506e-06, "loss": 1.2613, "step": 326 }, { "epoch": 0.011851690768728933, "grad_norm": 2.888524283046229, "learning_rate": 3.949275362318841e-06, "loss": 1.331, "step": 327 }, { "epoch": 0.011887934471385597, "grad_norm": 3.1677613188821487, "learning_rate": 3.961352657004831e-06, "loss": 1.262, "step": 328 }, { "epoch": 0.01192417817404226, "grad_norm": 3.18436456838836, "learning_rate": 3.973429951690821e-06, "loss": 1.2489, "step": 329 }, { "epoch": 0.011960421876698924, "grad_norm": 2.945521213976269, "learning_rate": 3.9855072463768115e-06, "loss": 1.092, "step": 330 }, { "epoch": 0.011996665579355586, "grad_norm": 2.8335172131203343, "learning_rate": 3.997584541062803e-06, "loss": 1.1951, "step": 331 }, { "epoch": 0.01203290928201225, "grad_norm": 3.4697219783799222, "learning_rate": 4.009661835748793e-06, "loss": 1.2732, "step": 332 }, { "epoch": 0.012069152984668914, "grad_norm": 3.0773016727435065, "learning_rate": 4.021739130434783e-06, "loss": 1.1981, "step": 333 }, { "epoch": 0.012105396687325578, "grad_norm": 3.051540059206125, "learning_rate": 4.033816425120773e-06, "loss": 1.2808, "step": 334 }, { "epoch": 0.012141640389982241, "grad_norm": 3.0938824564639056, "learning_rate": 4.045893719806764e-06, "loss": 1.0415, "step": 335 }, { "epoch": 0.012177884092638903, "grad_norm": 2.772882500451024, "learning_rate": 4.057971014492754e-06, "loss": 1.1907, "step": 336 }, { "epoch": 0.012214127795295567, "grad_norm": 3.137065900394635, "learning_rate": 4.070048309178744e-06, "loss": 1.2562, "step": 337 }, { "epoch": 0.012250371497952231, "grad_norm": 2.937531049563997, "learning_rate": 4.082125603864734e-06, "loss": 1.1089, "step": 338 }, { "epoch": 0.012286615200608895, "grad_norm": 2.524464361609078, "learning_rate": 4.0942028985507246e-06, "loss": 1.0942, "step": 339 }, { "epoch": 0.012322858903265557, "grad_norm": 2.942855772020156, "learning_rate": 4.106280193236716e-06, "loss": 1.218, "step": 340 }, { "epoch": 0.01235910260592222, "grad_norm": 3.1990284366258956, "learning_rate": 4.118357487922706e-06, "loss": 1.1447, "step": 341 }, { "epoch": 0.012395346308578884, "grad_norm": 3.0285619949240767, "learning_rate": 4.130434782608696e-06, "loss": 1.203, "step": 342 }, { "epoch": 0.012431590011235548, "grad_norm": 2.6786562329356705, "learning_rate": 4.142512077294686e-06, "loss": 1.1553, "step": 343 }, { "epoch": 0.012467833713892212, "grad_norm": 2.696924550695947, "learning_rate": 4.154589371980677e-06, "loss": 1.1739, "step": 344 }, { "epoch": 0.012504077416548874, "grad_norm": 2.4487543861686554, "learning_rate": 4.166666666666667e-06, "loss": 1.0401, "step": 345 }, { "epoch": 0.012540321119205538, "grad_norm": 3.020970753955941, "learning_rate": 4.178743961352658e-06, "loss": 1.0682, "step": 346 }, { "epoch": 0.012576564821862202, "grad_norm": 2.821404422530469, "learning_rate": 4.190821256038647e-06, "loss": 1.1984, "step": 347 }, { "epoch": 0.012612808524518865, "grad_norm": 3.130910158701889, "learning_rate": 4.202898550724638e-06, "loss": 1.131, "step": 348 }, { "epoch": 0.012649052227175528, "grad_norm": 2.7219570847186567, "learning_rate": 4.214975845410628e-06, "loss": 1.149, "step": 349 }, { "epoch": 0.012685295929832191, "grad_norm": 2.6590378506523695, "learning_rate": 4.227053140096619e-06, "loss": 1.0941, "step": 350 }, { "epoch": 0.012721539632488855, "grad_norm": 3.4671640947716047, "learning_rate": 4.239130434782609e-06, "loss": 1.4068, "step": 351 }, { "epoch": 0.012757783335145519, "grad_norm": 3.000366888589261, "learning_rate": 4.251207729468599e-06, "loss": 1.2086, "step": 352 }, { "epoch": 0.012794027037802181, "grad_norm": 2.9192727634698734, "learning_rate": 4.26328502415459e-06, "loss": 1.2999, "step": 353 }, { "epoch": 0.012830270740458845, "grad_norm": 2.76975943081911, "learning_rate": 4.27536231884058e-06, "loss": 1.2151, "step": 354 }, { "epoch": 0.012866514443115509, "grad_norm": 3.017363356210498, "learning_rate": 4.28743961352657e-06, "loss": 1.2257, "step": 355 }, { "epoch": 0.012902758145772172, "grad_norm": 2.6923563407938116, "learning_rate": 4.299516908212561e-06, "loss": 0.9623, "step": 356 }, { "epoch": 0.012939001848428836, "grad_norm": 2.977878011881541, "learning_rate": 4.3115942028985515e-06, "loss": 1.1647, "step": 357 }, { "epoch": 0.012975245551085498, "grad_norm": 2.800056389622747, "learning_rate": 4.323671497584541e-06, "loss": 1.3044, "step": 358 }, { "epoch": 0.013011489253742162, "grad_norm": 2.6124130802334444, "learning_rate": 4.335748792270532e-06, "loss": 1.2115, "step": 359 }, { "epoch": 0.013047732956398826, "grad_norm": 2.860030431141497, "learning_rate": 4.347826086956522e-06, "loss": 1.2939, "step": 360 }, { "epoch": 0.01308397665905549, "grad_norm": 2.626991238451483, "learning_rate": 4.3599033816425124e-06, "loss": 1.1932, "step": 361 }, { "epoch": 0.013120220361712152, "grad_norm": 3.075706716170849, "learning_rate": 4.371980676328503e-06, "loss": 1.1627, "step": 362 }, { "epoch": 0.013156464064368815, "grad_norm": 2.9145666598443283, "learning_rate": 4.384057971014493e-06, "loss": 1.2369, "step": 363 }, { "epoch": 0.01319270776702548, "grad_norm": 2.774289418531849, "learning_rate": 4.396135265700483e-06, "loss": 1.3868, "step": 364 }, { "epoch": 0.013228951469682143, "grad_norm": 2.788760734194359, "learning_rate": 4.408212560386474e-06, "loss": 1.1475, "step": 365 }, { "epoch": 0.013265195172338807, "grad_norm": 3.1002827994056306, "learning_rate": 4.4202898550724645e-06, "loss": 1.0763, "step": 366 }, { "epoch": 0.013301438874995469, "grad_norm": 2.8728347773927934, "learning_rate": 4.432367149758455e-06, "loss": 1.2532, "step": 367 }, { "epoch": 0.013337682577652133, "grad_norm": 2.942292319102511, "learning_rate": 4.444444444444444e-06, "loss": 1.2822, "step": 368 }, { "epoch": 0.013373926280308797, "grad_norm": 2.913079421181531, "learning_rate": 4.456521739130435e-06, "loss": 1.141, "step": 369 }, { "epoch": 0.01341016998296546, "grad_norm": 2.803485490095374, "learning_rate": 4.4685990338164255e-06, "loss": 1.1842, "step": 370 }, { "epoch": 0.013446413685622122, "grad_norm": 2.8411811605680346, "learning_rate": 4.480676328502416e-06, "loss": 1.1721, "step": 371 }, { "epoch": 0.013482657388278786, "grad_norm": 2.6977675189492287, "learning_rate": 4.492753623188406e-06, "loss": 1.1795, "step": 372 }, { "epoch": 0.01351890109093545, "grad_norm": 2.8155813548406705, "learning_rate": 4.504830917874396e-06, "loss": 1.3171, "step": 373 }, { "epoch": 0.013555144793592114, "grad_norm": 3.0848492998529538, "learning_rate": 4.516908212560387e-06, "loss": 1.2648, "step": 374 }, { "epoch": 0.013591388496248778, "grad_norm": 2.8395452028022925, "learning_rate": 4.5289855072463775e-06, "loss": 1.3544, "step": 375 }, { "epoch": 0.01362763219890544, "grad_norm": 2.9139925674167255, "learning_rate": 4.541062801932368e-06, "loss": 1.0827, "step": 376 }, { "epoch": 0.013663875901562103, "grad_norm": 3.051401902386172, "learning_rate": 4.553140096618358e-06, "loss": 1.2591, "step": 377 }, { "epoch": 0.013700119604218767, "grad_norm": 2.7855731887191677, "learning_rate": 4.565217391304348e-06, "loss": 1.2355, "step": 378 }, { "epoch": 0.013736363306875431, "grad_norm": 3.0427099363288117, "learning_rate": 4.5772946859903385e-06, "loss": 1.2695, "step": 379 }, { "epoch": 0.013772607009532093, "grad_norm": 3.005411546141119, "learning_rate": 4.589371980676329e-06, "loss": 1.0221, "step": 380 }, { "epoch": 0.013808850712188757, "grad_norm": 3.0366985006919767, "learning_rate": 4.601449275362319e-06, "loss": 1.219, "step": 381 }, { "epoch": 0.01384509441484542, "grad_norm": 2.9437505504516284, "learning_rate": 4.613526570048309e-06, "loss": 1.1656, "step": 382 }, { "epoch": 0.013881338117502084, "grad_norm": 2.941065615461931, "learning_rate": 4.6256038647342995e-06, "loss": 1.1453, "step": 383 }, { "epoch": 0.013917581820158748, "grad_norm": 2.898573742905869, "learning_rate": 4.637681159420291e-06, "loss": 1.1429, "step": 384 }, { "epoch": 0.01395382552281541, "grad_norm": 2.485697138569429, "learning_rate": 4.649758454106281e-06, "loss": 1.0865, "step": 385 }, { "epoch": 0.013990069225472074, "grad_norm": 2.797049024845866, "learning_rate": 4.661835748792271e-06, "loss": 1.1949, "step": 386 }, { "epoch": 0.014026312928128738, "grad_norm": 2.7463928630222547, "learning_rate": 4.673913043478261e-06, "loss": 1.0582, "step": 387 }, { "epoch": 0.014062556630785402, "grad_norm": 2.8776163191981396, "learning_rate": 4.6859903381642516e-06, "loss": 1.1336, "step": 388 }, { "epoch": 0.014098800333442064, "grad_norm": 2.5606821025417466, "learning_rate": 4.698067632850242e-06, "loss": 1.043, "step": 389 }, { "epoch": 0.014135044036098728, "grad_norm": 3.4092497813415132, "learning_rate": 4.710144927536232e-06, "loss": 1.038, "step": 390 }, { "epoch": 0.014171287738755391, "grad_norm": 3.4430088713580296, "learning_rate": 4.722222222222222e-06, "loss": 1.2105, "step": 391 }, { "epoch": 0.014207531441412055, "grad_norm": 3.2334141771423277, "learning_rate": 4.7342995169082125e-06, "loss": 1.0625, "step": 392 }, { "epoch": 0.014243775144068717, "grad_norm": 3.2110800059486464, "learning_rate": 4.746376811594204e-06, "loss": 1.1807, "step": 393 }, { "epoch": 0.014280018846725381, "grad_norm": 2.8479678474680754, "learning_rate": 4.758454106280194e-06, "loss": 1.2083, "step": 394 }, { "epoch": 0.014316262549382045, "grad_norm": 2.9508289106517767, "learning_rate": 4.770531400966184e-06, "loss": 1.1099, "step": 395 }, { "epoch": 0.014352506252038709, "grad_norm": 2.7197437233435178, "learning_rate": 4.782608695652174e-06, "loss": 1.1371, "step": 396 }, { "epoch": 0.014388749954695372, "grad_norm": 2.6496780970035783, "learning_rate": 4.794685990338165e-06, "loss": 1.1056, "step": 397 }, { "epoch": 0.014424993657352034, "grad_norm": 3.0680386881421997, "learning_rate": 4.806763285024155e-06, "loss": 1.4014, "step": 398 }, { "epoch": 0.014461237360008698, "grad_norm": 3.423986000225501, "learning_rate": 4.818840579710145e-06, "loss": 1.2594, "step": 399 }, { "epoch": 0.014497481062665362, "grad_norm": 2.8774839392896574, "learning_rate": 4.830917874396135e-06, "loss": 1.2858, "step": 400 }, { "epoch": 0.014533724765322026, "grad_norm": 2.837463933373017, "learning_rate": 4.8429951690821256e-06, "loss": 1.1608, "step": 401 }, { "epoch": 0.014569968467978688, "grad_norm": 2.2278669301999585, "learning_rate": 4.855072463768117e-06, "loss": 0.9592, "step": 402 }, { "epoch": 0.014606212170635352, "grad_norm": 3.1591394805441713, "learning_rate": 4.867149758454107e-06, "loss": 1.2526, "step": 403 }, { "epoch": 0.014642455873292015, "grad_norm": 3.215394181947827, "learning_rate": 4.879227053140097e-06, "loss": 1.1779, "step": 404 }, { "epoch": 0.01467869957594868, "grad_norm": 3.4103793515293575, "learning_rate": 4.891304347826087e-06, "loss": 1.1248, "step": 405 }, { "epoch": 0.014714943278605343, "grad_norm": 2.90995216433136, "learning_rate": 4.903381642512078e-06, "loss": 1.0953, "step": 406 }, { "epoch": 0.014751186981262005, "grad_norm": 2.758007952802283, "learning_rate": 4.915458937198068e-06, "loss": 1.3859, "step": 407 }, { "epoch": 0.014787430683918669, "grad_norm": 3.0098871022638356, "learning_rate": 4.927536231884059e-06, "loss": 1.229, "step": 408 }, { "epoch": 0.014823674386575333, "grad_norm": 3.005628467799871, "learning_rate": 4.939613526570048e-06, "loss": 1.177, "step": 409 }, { "epoch": 0.014859918089231997, "grad_norm": 2.9887051075375095, "learning_rate": 4.951690821256039e-06, "loss": 1.2084, "step": 410 }, { "epoch": 0.014896161791888659, "grad_norm": 3.2566592352696446, "learning_rate": 4.963768115942029e-06, "loss": 1.0834, "step": 411 }, { "epoch": 0.014932405494545322, "grad_norm": 2.8983749612002434, "learning_rate": 4.97584541062802e-06, "loss": 1.1995, "step": 412 }, { "epoch": 0.014968649197201986, "grad_norm": 2.585970324723306, "learning_rate": 4.98792270531401e-06, "loss": 1.0549, "step": 413 }, { "epoch": 0.01500489289985865, "grad_norm": 2.9750878728121823, "learning_rate": 5e-06, "loss": 1.3006, "step": 414 }, { "epoch": 0.015041136602515314, "grad_norm": 2.525728313746148, "learning_rate": 5.012077294685991e-06, "loss": 1.0802, "step": 415 }, { "epoch": 0.015077380305171976, "grad_norm": 3.040906761718932, "learning_rate": 5.024154589371981e-06, "loss": 1.0742, "step": 416 }, { "epoch": 0.01511362400782864, "grad_norm": 2.468511167721749, "learning_rate": 5.036231884057972e-06, "loss": 1.0377, "step": 417 }, { "epoch": 0.015149867710485303, "grad_norm": 2.869367386536626, "learning_rate": 5.048309178743962e-06, "loss": 1.2026, "step": 418 }, { "epoch": 0.015186111413141967, "grad_norm": 3.03096675458355, "learning_rate": 5.0603864734299525e-06, "loss": 1.1667, "step": 419 }, { "epoch": 0.01522235511579863, "grad_norm": 2.8803145093684606, "learning_rate": 5.072463768115943e-06, "loss": 1.2616, "step": 420 }, { "epoch": 0.015258598818455293, "grad_norm": 2.908683434054857, "learning_rate": 5.084541062801933e-06, "loss": 1.1455, "step": 421 }, { "epoch": 0.015294842521111957, "grad_norm": 2.421372405085102, "learning_rate": 5.096618357487923e-06, "loss": 1.088, "step": 422 }, { "epoch": 0.01533108622376862, "grad_norm": 3.000195420777106, "learning_rate": 5.108695652173914e-06, "loss": 1.1693, "step": 423 }, { "epoch": 0.015367329926425284, "grad_norm": 2.6528743472759646, "learning_rate": 5.1207729468599045e-06, "loss": 1.0388, "step": 424 }, { "epoch": 0.015403573629081947, "grad_norm": 3.20992582937707, "learning_rate": 5.132850241545894e-06, "loss": 1.2969, "step": 425 }, { "epoch": 0.01543981733173861, "grad_norm": 3.1477662380620264, "learning_rate": 5.144927536231884e-06, "loss": 1.2057, "step": 426 }, { "epoch": 0.015476061034395274, "grad_norm": 3.7297376830151587, "learning_rate": 5.157004830917874e-06, "loss": 1.1795, "step": 427 }, { "epoch": 0.015512304737051938, "grad_norm": 3.037759215534823, "learning_rate": 5.169082125603865e-06, "loss": 1.2648, "step": 428 }, { "epoch": 0.0155485484397086, "grad_norm": 2.971195663624031, "learning_rate": 5.181159420289855e-06, "loss": 1.2401, "step": 429 }, { "epoch": 0.015584792142365264, "grad_norm": 3.174658120512946, "learning_rate": 5.193236714975846e-06, "loss": 1.2011, "step": 430 }, { "epoch": 0.015621035845021928, "grad_norm": 2.8259624787994704, "learning_rate": 5.205314009661836e-06, "loss": 1.1406, "step": 431 }, { "epoch": 0.01565727954767859, "grad_norm": 3.0364632805503726, "learning_rate": 5.2173913043478265e-06, "loss": 1.0535, "step": 432 }, { "epoch": 0.015693523250335253, "grad_norm": 3.161982952822265, "learning_rate": 5.229468599033817e-06, "loss": 1.0641, "step": 433 }, { "epoch": 0.015729766952991917, "grad_norm": 2.904350044046981, "learning_rate": 5.241545893719807e-06, "loss": 1.1934, "step": 434 }, { "epoch": 0.01576601065564858, "grad_norm": 2.819307693600265, "learning_rate": 5.253623188405797e-06, "loss": 1.1815, "step": 435 }, { "epoch": 0.015802254358305245, "grad_norm": 2.495719993218968, "learning_rate": 5.265700483091788e-06, "loss": 1.2181, "step": 436 }, { "epoch": 0.01583849806096191, "grad_norm": 3.05001436055327, "learning_rate": 5.2777777777777785e-06, "loss": 1.1447, "step": 437 }, { "epoch": 0.015874741763618572, "grad_norm": 3.1056802434654016, "learning_rate": 5.289855072463769e-06, "loss": 1.1984, "step": 438 }, { "epoch": 0.015910985466275236, "grad_norm": 2.638023801110385, "learning_rate": 5.301932367149759e-06, "loss": 1.1264, "step": 439 }, { "epoch": 0.015947229168931897, "grad_norm": 2.6325758472759233, "learning_rate": 5.314009661835749e-06, "loss": 1.2439, "step": 440 }, { "epoch": 0.01598347287158856, "grad_norm": 2.8516699645144863, "learning_rate": 5.3260869565217395e-06, "loss": 1.1559, "step": 441 }, { "epoch": 0.016019716574245224, "grad_norm": 2.753511636977306, "learning_rate": 5.338164251207731e-06, "loss": 1.0224, "step": 442 }, { "epoch": 0.016055960276901888, "grad_norm": 3.0719045630944595, "learning_rate": 5.350241545893721e-06, "loss": 1.0775, "step": 443 }, { "epoch": 0.01609220397955855, "grad_norm": 3.3583152127360894, "learning_rate": 5.362318840579711e-06, "loss": 1.3048, "step": 444 }, { "epoch": 0.016128447682215215, "grad_norm": 2.559310156018546, "learning_rate": 5.374396135265701e-06, "loss": 1.1805, "step": 445 }, { "epoch": 0.01616469138487188, "grad_norm": 2.9910181391992574, "learning_rate": 5.386473429951691e-06, "loss": 1.3662, "step": 446 }, { "epoch": 0.016200935087528543, "grad_norm": 2.902715263554103, "learning_rate": 5.398550724637681e-06, "loss": 1.1327, "step": 447 }, { "epoch": 0.016237178790185207, "grad_norm": 2.7315717836418467, "learning_rate": 5.410628019323671e-06, "loss": 1.2534, "step": 448 }, { "epoch": 0.016273422492841867, "grad_norm": 2.732807402118748, "learning_rate": 5.422705314009662e-06, "loss": 1.0317, "step": 449 }, { "epoch": 0.01630966619549853, "grad_norm": 4.1280607398169735, "learning_rate": 5.4347826086956525e-06, "loss": 1.0875, "step": 450 }, { "epoch": 0.016345909898155195, "grad_norm": 2.80953103718647, "learning_rate": 5.446859903381643e-06, "loss": 1.0056, "step": 451 }, { "epoch": 0.01638215360081186, "grad_norm": 3.12312287686756, "learning_rate": 5.458937198067633e-06, "loss": 1.0923, "step": 452 }, { "epoch": 0.016418397303468522, "grad_norm": 3.305016025293721, "learning_rate": 5.471014492753623e-06, "loss": 1.0239, "step": 453 }, { "epoch": 0.016454641006125186, "grad_norm": 2.6773132121419247, "learning_rate": 5.4830917874396135e-06, "loss": 1.0046, "step": 454 }, { "epoch": 0.01649088470878185, "grad_norm": 3.0994383955964344, "learning_rate": 5.495169082125605e-06, "loss": 0.8388, "step": 455 }, { "epoch": 0.016527128411438514, "grad_norm": 3.241708026984593, "learning_rate": 5.507246376811595e-06, "loss": 1.1164, "step": 456 }, { "epoch": 0.016563372114095178, "grad_norm": 2.8804419190451234, "learning_rate": 5.519323671497585e-06, "loss": 1.2941, "step": 457 }, { "epoch": 0.016599615816751838, "grad_norm": 2.9979662637836815, "learning_rate": 5.531400966183575e-06, "loss": 1.3335, "step": 458 }, { "epoch": 0.0166358595194085, "grad_norm": 2.8741658987151824, "learning_rate": 5.543478260869566e-06, "loss": 1.267, "step": 459 }, { "epoch": 0.016672103222065165, "grad_norm": 2.9495115500807407, "learning_rate": 5.555555555555557e-06, "loss": 1.0651, "step": 460 }, { "epoch": 0.01670834692472183, "grad_norm": 2.86608909963217, "learning_rate": 5.567632850241547e-06, "loss": 0.9332, "step": 461 }, { "epoch": 0.016744590627378493, "grad_norm": 3.0397829938640553, "learning_rate": 5.579710144927537e-06, "loss": 1.1985, "step": 462 }, { "epoch": 0.016780834330035157, "grad_norm": 2.7083635614621837, "learning_rate": 5.591787439613527e-06, "loss": 1.113, "step": 463 }, { "epoch": 0.01681707803269182, "grad_norm": 2.947933156252237, "learning_rate": 5.603864734299518e-06, "loss": 1.1551, "step": 464 }, { "epoch": 0.016853321735348484, "grad_norm": 3.003934136352202, "learning_rate": 5.615942028985508e-06, "loss": 1.2129, "step": 465 }, { "epoch": 0.016889565438005148, "grad_norm": 2.87408778518686, "learning_rate": 5.628019323671497e-06, "loss": 1.0458, "step": 466 }, { "epoch": 0.01692580914066181, "grad_norm": 2.751107888498893, "learning_rate": 5.6400966183574875e-06, "loss": 1.3716, "step": 467 }, { "epoch": 0.016962052843318472, "grad_norm": 2.937705116665699, "learning_rate": 5.652173913043479e-06, "loss": 1.24, "step": 468 }, { "epoch": 0.016998296545975136, "grad_norm": 2.891741246423074, "learning_rate": 5.664251207729469e-06, "loss": 1.1518, "step": 469 }, { "epoch": 0.0170345402486318, "grad_norm": 3.033971681871443, "learning_rate": 5.676328502415459e-06, "loss": 1.1282, "step": 470 }, { "epoch": 0.017070783951288464, "grad_norm": 3.108102422658545, "learning_rate": 5.688405797101449e-06, "loss": 1.0928, "step": 471 }, { "epoch": 0.017107027653945128, "grad_norm": 2.8182551194980117, "learning_rate": 5.70048309178744e-06, "loss": 1.0599, "step": 472 }, { "epoch": 0.01714327135660179, "grad_norm": 3.6814782308185063, "learning_rate": 5.712560386473431e-06, "loss": 1.0015, "step": 473 }, { "epoch": 0.017179515059258455, "grad_norm": 2.83755469531126, "learning_rate": 5.724637681159421e-06, "loss": 1.2867, "step": 474 }, { "epoch": 0.01721575876191512, "grad_norm": 3.114399422896658, "learning_rate": 5.736714975845411e-06, "loss": 1.1859, "step": 475 }, { "epoch": 0.01725200246457178, "grad_norm": 2.7065838746708484, "learning_rate": 5.748792270531401e-06, "loss": 1.152, "step": 476 }, { "epoch": 0.017288246167228443, "grad_norm": 2.67928553279454, "learning_rate": 5.760869565217392e-06, "loss": 1.1427, "step": 477 }, { "epoch": 0.017324489869885107, "grad_norm": 2.8629115040492548, "learning_rate": 5.772946859903382e-06, "loss": 1.2055, "step": 478 }, { "epoch": 0.01736073357254177, "grad_norm": 2.475488855065777, "learning_rate": 5.785024154589373e-06, "loss": 0.9854, "step": 479 }, { "epoch": 0.017396977275198434, "grad_norm": 2.859241942087651, "learning_rate": 5.797101449275363e-06, "loss": 1.2492, "step": 480 }, { "epoch": 0.017433220977855098, "grad_norm": 3.0832788591203064, "learning_rate": 5.8091787439613535e-06, "loss": 1.1202, "step": 481 }, { "epoch": 0.017469464680511762, "grad_norm": 2.8791203660469398, "learning_rate": 5.821256038647344e-06, "loss": 1.3101, "step": 482 }, { "epoch": 0.017505708383168426, "grad_norm": 3.076432349245066, "learning_rate": 5.833333333333334e-06, "loss": 1.0925, "step": 483 }, { "epoch": 0.017541952085825086, "grad_norm": 2.725566620857322, "learning_rate": 5.845410628019324e-06, "loss": 1.1966, "step": 484 }, { "epoch": 0.01757819578848175, "grad_norm": 3.8241065523226188, "learning_rate": 5.857487922705315e-06, "loss": 1.1842, "step": 485 }, { "epoch": 0.017614439491138414, "grad_norm": 2.863455266016555, "learning_rate": 5.8695652173913055e-06, "loss": 1.2389, "step": 486 }, { "epoch": 0.017650683193795078, "grad_norm": 2.7627974442764125, "learning_rate": 5.881642512077295e-06, "loss": 1.1411, "step": 487 }, { "epoch": 0.01768692689645174, "grad_norm": 2.9904411299603426, "learning_rate": 5.893719806763285e-06, "loss": 1.2527, "step": 488 }, { "epoch": 0.017723170599108405, "grad_norm": 3.155250118060709, "learning_rate": 5.905797101449275e-06, "loss": 1.3205, "step": 489 }, { "epoch": 0.01775941430176507, "grad_norm": 2.884382519027104, "learning_rate": 5.917874396135266e-06, "loss": 1.2259, "step": 490 }, { "epoch": 0.017795658004421733, "grad_norm": 2.9918616343440743, "learning_rate": 5.929951690821256e-06, "loss": 1.2571, "step": 491 }, { "epoch": 0.017831901707078397, "grad_norm": 2.8953851444158016, "learning_rate": 5.942028985507247e-06, "loss": 1.1011, "step": 492 }, { "epoch": 0.017868145409735057, "grad_norm": 2.9160593994663984, "learning_rate": 5.954106280193237e-06, "loss": 1.2213, "step": 493 }, { "epoch": 0.01790438911239172, "grad_norm": 2.788958376874116, "learning_rate": 5.9661835748792275e-06, "loss": 1.154, "step": 494 }, { "epoch": 0.017940632815048384, "grad_norm": 2.948932019287409, "learning_rate": 5.978260869565218e-06, "loss": 1.0107, "step": 495 }, { "epoch": 0.017976876517705048, "grad_norm": 2.6930099589471403, "learning_rate": 5.990338164251208e-06, "loss": 1.0207, "step": 496 }, { "epoch": 0.018013120220361712, "grad_norm": 2.7081468027341153, "learning_rate": 6.002415458937198e-06, "loss": 1.1962, "step": 497 }, { "epoch": 0.018049363923018376, "grad_norm": 3.0472263861489988, "learning_rate": 6.014492753623189e-06, "loss": 1.1234, "step": 498 }, { "epoch": 0.01808560762567504, "grad_norm": 3.148729719589449, "learning_rate": 6.0265700483091795e-06, "loss": 1.1367, "step": 499 }, { "epoch": 0.018121851328331703, "grad_norm": 3.02841276954991, "learning_rate": 6.03864734299517e-06, "loss": 1.0492, "step": 500 }, { "epoch": 0.018158095030988367, "grad_norm": 14.914422076075668, "learning_rate": 6.05072463768116e-06, "loss": 2.5835, "step": 501 }, { "epoch": 0.018194338733645028, "grad_norm": 2.7364150291181604, "learning_rate": 6.06280193236715e-06, "loss": 1.1397, "step": 502 }, { "epoch": 0.01823058243630169, "grad_norm": 3.0811005806706886, "learning_rate": 6.0748792270531405e-06, "loss": 1.126, "step": 503 }, { "epoch": 0.018266826138958355, "grad_norm": 3.223538517855012, "learning_rate": 6.086956521739132e-06, "loss": 1.2356, "step": 504 }, { "epoch": 0.01830306984161502, "grad_norm": 2.5954594991202335, "learning_rate": 6.099033816425122e-06, "loss": 1.1812, "step": 505 }, { "epoch": 0.018339313544271683, "grad_norm": 2.428698891915629, "learning_rate": 6.111111111111112e-06, "loss": 0.9087, "step": 506 }, { "epoch": 0.018375557246928347, "grad_norm": 2.999400773061357, "learning_rate": 6.123188405797102e-06, "loss": 1.0125, "step": 507 }, { "epoch": 0.01841180094958501, "grad_norm": 3.2185168186126907, "learning_rate": 6.135265700483092e-06, "loss": 1.0632, "step": 508 }, { "epoch": 0.018448044652241674, "grad_norm": 2.772460494554891, "learning_rate": 6.147342995169082e-06, "loss": 1.1227, "step": 509 }, { "epoch": 0.018484288354898338, "grad_norm": 2.941788000523843, "learning_rate": 6.159420289855072e-06, "loss": 1.1271, "step": 510 }, { "epoch": 0.018520532057554998, "grad_norm": 3.0111020046314034, "learning_rate": 6.171497584541063e-06, "loss": 1.2339, "step": 511 }, { "epoch": 0.018556775760211662, "grad_norm": 3.016149917625577, "learning_rate": 6.1835748792270535e-06, "loss": 1.2244, "step": 512 }, { "epoch": 0.018593019462868326, "grad_norm": 2.579930883994206, "learning_rate": 6.195652173913044e-06, "loss": 1.1655, "step": 513 }, { "epoch": 0.01862926316552499, "grad_norm": 2.548961949833641, "learning_rate": 6.207729468599034e-06, "loss": 1.1315, "step": 514 }, { "epoch": 0.018665506868181653, "grad_norm": 3.0441209424057765, "learning_rate": 6.219806763285024e-06, "loss": 1.0864, "step": 515 }, { "epoch": 0.018701750570838317, "grad_norm": 2.6412730703617413, "learning_rate": 6.2318840579710145e-06, "loss": 1.0928, "step": 516 }, { "epoch": 0.01873799427349498, "grad_norm": 3.1674816905501495, "learning_rate": 6.243961352657006e-06, "loss": 1.1618, "step": 517 }, { "epoch": 0.018774237976151645, "grad_norm": 2.992339431954274, "learning_rate": 6.256038647342996e-06, "loss": 1.1579, "step": 518 }, { "epoch": 0.01881048167880831, "grad_norm": 2.932279023140682, "learning_rate": 6.268115942028986e-06, "loss": 1.1157, "step": 519 }, { "epoch": 0.01884672538146497, "grad_norm": 2.789358081202804, "learning_rate": 6.280193236714976e-06, "loss": 1.1517, "step": 520 }, { "epoch": 0.018882969084121633, "grad_norm": 2.9836903544966833, "learning_rate": 6.2922705314009666e-06, "loss": 1.1653, "step": 521 }, { "epoch": 0.018919212786778297, "grad_norm": 3.178194031036052, "learning_rate": 6.304347826086958e-06, "loss": 1.0229, "step": 522 }, { "epoch": 0.01895545648943496, "grad_norm": 2.955729037537651, "learning_rate": 6.316425120772948e-06, "loss": 1.1763, "step": 523 }, { "epoch": 0.018991700192091624, "grad_norm": 3.0174686836989286, "learning_rate": 6.328502415458938e-06, "loss": 0.9827, "step": 524 }, { "epoch": 0.019027943894748288, "grad_norm": 2.943744207312881, "learning_rate": 6.340579710144928e-06, "loss": 1.0985, "step": 525 }, { "epoch": 0.01906418759740495, "grad_norm": 2.7710868220549782, "learning_rate": 6.352657004830919e-06, "loss": 1.219, "step": 526 }, { "epoch": 0.019100431300061615, "grad_norm": 2.7226061008561904, "learning_rate": 6.364734299516909e-06, "loss": 1.0567, "step": 527 }, { "epoch": 0.01913667500271828, "grad_norm": 3.1319712418292225, "learning_rate": 6.376811594202898e-06, "loss": 1.2522, "step": 528 }, { "epoch": 0.01917291870537494, "grad_norm": 2.4189444879457875, "learning_rate": 6.3888888888888885e-06, "loss": 0.9084, "step": 529 }, { "epoch": 0.019209162408031603, "grad_norm": 2.8075652993268028, "learning_rate": 6.40096618357488e-06, "loss": 1.2447, "step": 530 }, { "epoch": 0.019245406110688267, "grad_norm": 3.0030521705633624, "learning_rate": 6.41304347826087e-06, "loss": 1.1004, "step": 531 }, { "epoch": 0.01928164981334493, "grad_norm": 3.1285079870865213, "learning_rate": 6.42512077294686e-06, "loss": 1.0616, "step": 532 }, { "epoch": 0.019317893516001595, "grad_norm": 3.148720016046631, "learning_rate": 6.43719806763285e-06, "loss": 1.2829, "step": 533 }, { "epoch": 0.01935413721865826, "grad_norm": 2.9840445756834786, "learning_rate": 6.449275362318841e-06, "loss": 1.0616, "step": 534 }, { "epoch": 0.019390380921314922, "grad_norm": 3.1967135125537878, "learning_rate": 6.461352657004832e-06, "loss": 1.148, "step": 535 }, { "epoch": 0.019426624623971586, "grad_norm": 3.1870004728472088, "learning_rate": 6.473429951690822e-06, "loss": 1.2369, "step": 536 }, { "epoch": 0.01946286832662825, "grad_norm": 2.65668129990346, "learning_rate": 6.485507246376812e-06, "loss": 1.1345, "step": 537 }, { "epoch": 0.01949911202928491, "grad_norm": 2.484003667896017, "learning_rate": 6.497584541062802e-06, "loss": 1.2006, "step": 538 }, { "epoch": 0.019535355731941574, "grad_norm": 3.0483419119682793, "learning_rate": 6.509661835748793e-06, "loss": 0.9361, "step": 539 }, { "epoch": 0.019571599434598238, "grad_norm": 2.862508016112915, "learning_rate": 6.521739130434783e-06, "loss": 1.1721, "step": 540 }, { "epoch": 0.0196078431372549, "grad_norm": 2.969816623765042, "learning_rate": 6.533816425120774e-06, "loss": 1.1332, "step": 541 }, { "epoch": 0.019644086839911565, "grad_norm": 2.919965352023023, "learning_rate": 6.545893719806764e-06, "loss": 1.2968, "step": 542 }, { "epoch": 0.01968033054256823, "grad_norm": 2.7381288840062283, "learning_rate": 6.5579710144927545e-06, "loss": 0.9656, "step": 543 }, { "epoch": 0.019716574245224893, "grad_norm": 2.7491967664550194, "learning_rate": 6.570048309178745e-06, "loss": 1.1673, "step": 544 }, { "epoch": 0.019752817947881557, "grad_norm": 2.3204918123073885, "learning_rate": 6.582125603864735e-06, "loss": 1.0594, "step": 545 }, { "epoch": 0.01978906165053822, "grad_norm": 2.982720693541124, "learning_rate": 6.594202898550725e-06, "loss": 0.9378, "step": 546 }, { "epoch": 0.01982530535319488, "grad_norm": 3.056003243998176, "learning_rate": 6.606280193236716e-06, "loss": 0.968, "step": 547 }, { "epoch": 0.019861549055851545, "grad_norm": 3.303306532093615, "learning_rate": 6.6183574879227065e-06, "loss": 1.07, "step": 548 }, { "epoch": 0.01989779275850821, "grad_norm": 2.5400548373801812, "learning_rate": 6.630434782608696e-06, "loss": 1.1148, "step": 549 }, { "epoch": 0.019934036461164872, "grad_norm": 3.045548350685437, "learning_rate": 6.642512077294686e-06, "loss": 1.2164, "step": 550 }, { "epoch": 0.019970280163821536, "grad_norm": 2.8732579587657177, "learning_rate": 6.654589371980676e-06, "loss": 1.1563, "step": 551 }, { "epoch": 0.0200065238664782, "grad_norm": 2.4813409969192612, "learning_rate": 6.666666666666667e-06, "loss": 1.1432, "step": 552 }, { "epoch": 0.020042767569134864, "grad_norm": 2.842335522927221, "learning_rate": 6.678743961352657e-06, "loss": 0.9602, "step": 553 }, { "epoch": 0.020079011271791528, "grad_norm": 2.6722103085804294, "learning_rate": 6.690821256038648e-06, "loss": 1.22, "step": 554 }, { "epoch": 0.02011525497444819, "grad_norm": 2.8528964783386064, "learning_rate": 6.702898550724638e-06, "loss": 1.074, "step": 555 }, { "epoch": 0.02015149867710485, "grad_norm": 3.1340952918357976, "learning_rate": 6.7149758454106285e-06, "loss": 1.3209, "step": 556 }, { "epoch": 0.020187742379761515, "grad_norm": 3.0626718558235817, "learning_rate": 6.727053140096619e-06, "loss": 1.0898, "step": 557 }, { "epoch": 0.02022398608241818, "grad_norm": 2.9500401475414093, "learning_rate": 6.739130434782609e-06, "loss": 1.2838, "step": 558 }, { "epoch": 0.020260229785074843, "grad_norm": 2.6285091249174295, "learning_rate": 6.751207729468599e-06, "loss": 1.2508, "step": 559 }, { "epoch": 0.020296473487731507, "grad_norm": 3.350700602944009, "learning_rate": 6.76328502415459e-06, "loss": 1.1241, "step": 560 }, { "epoch": 0.02033271719038817, "grad_norm": 2.5854547499014724, "learning_rate": 6.7753623188405805e-06, "loss": 1.0051, "step": 561 }, { "epoch": 0.020368960893044834, "grad_norm": 3.0517345988410014, "learning_rate": 6.787439613526571e-06, "loss": 1.0107, "step": 562 }, { "epoch": 0.020405204595701498, "grad_norm": 2.673525991936273, "learning_rate": 6.799516908212561e-06, "loss": 0.9891, "step": 563 }, { "epoch": 0.02044144829835816, "grad_norm": 2.7402885952455684, "learning_rate": 6.811594202898551e-06, "loss": 1.115, "step": 564 }, { "epoch": 0.020477692001014822, "grad_norm": 2.9294596090070955, "learning_rate": 6.823671497584542e-06, "loss": 1.2307, "step": 565 }, { "epoch": 0.020513935703671486, "grad_norm": 2.566144583098492, "learning_rate": 6.835748792270533e-06, "loss": 1.0278, "step": 566 }, { "epoch": 0.02055017940632815, "grad_norm": 2.7468530088245595, "learning_rate": 6.847826086956523e-06, "loss": 1.1422, "step": 567 }, { "epoch": 0.020586423108984814, "grad_norm": 3.2315093020309638, "learning_rate": 6.859903381642513e-06, "loss": 1.2691, "step": 568 }, { "epoch": 0.020622666811641478, "grad_norm": 3.2143326730739745, "learning_rate": 6.871980676328503e-06, "loss": 1.2213, "step": 569 }, { "epoch": 0.02065891051429814, "grad_norm": 2.770025380436243, "learning_rate": 6.884057971014493e-06, "loss": 1.2236, "step": 570 }, { "epoch": 0.020695154216954805, "grad_norm": 2.8560887503890116, "learning_rate": 6.896135265700483e-06, "loss": 1.0499, "step": 571 }, { "epoch": 0.02073139791961147, "grad_norm": 2.6904951984850305, "learning_rate": 6.908212560386473e-06, "loss": 1.1304, "step": 572 }, { "epoch": 0.02076764162226813, "grad_norm": 2.3992139296141866, "learning_rate": 6.920289855072464e-06, "loss": 1.1267, "step": 573 }, { "epoch": 0.020803885324924793, "grad_norm": 2.8877466121166773, "learning_rate": 6.9323671497584545e-06, "loss": 1.1022, "step": 574 }, { "epoch": 0.020840129027581457, "grad_norm": 2.946573961018487, "learning_rate": 6.944444444444445e-06, "loss": 1.1946, "step": 575 }, { "epoch": 0.02087637273023812, "grad_norm": 2.3185850676789217, "learning_rate": 6.956521739130435e-06, "loss": 1.0399, "step": 576 }, { "epoch": 0.020912616432894784, "grad_norm": 2.77652636438879, "learning_rate": 6.968599033816425e-06, "loss": 1.1056, "step": 577 }, { "epoch": 0.020948860135551448, "grad_norm": 2.9612801273455456, "learning_rate": 6.980676328502416e-06, "loss": 1.1486, "step": 578 }, { "epoch": 0.020985103838208112, "grad_norm": 2.8324556225053867, "learning_rate": 6.992753623188407e-06, "loss": 1.0983, "step": 579 }, { "epoch": 0.021021347540864776, "grad_norm": 3.1377285425661543, "learning_rate": 7.004830917874397e-06, "loss": 1.1612, "step": 580 }, { "epoch": 0.02105759124352144, "grad_norm": 2.4935445015869924, "learning_rate": 7.016908212560387e-06, "loss": 1.1113, "step": 581 }, { "epoch": 0.0210938349461781, "grad_norm": 3.003645083877445, "learning_rate": 7.028985507246377e-06, "loss": 1.1217, "step": 582 }, { "epoch": 0.021130078648834764, "grad_norm": 3.209151251788986, "learning_rate": 7.0410628019323676e-06, "loss": 1.1624, "step": 583 }, { "epoch": 0.021166322351491428, "grad_norm": 3.030681674881753, "learning_rate": 7.053140096618359e-06, "loss": 1.3442, "step": 584 }, { "epoch": 0.02120256605414809, "grad_norm": 2.8940578159499544, "learning_rate": 7.065217391304349e-06, "loss": 1.2009, "step": 585 }, { "epoch": 0.021238809756804755, "grad_norm": 2.9370067092713796, "learning_rate": 7.077294685990339e-06, "loss": 1.2211, "step": 586 }, { "epoch": 0.02127505345946142, "grad_norm": 2.6050067659869596, "learning_rate": 7.089371980676329e-06, "loss": 1.3641, "step": 587 }, { "epoch": 0.021311297162118083, "grad_norm": 3.012344843566974, "learning_rate": 7.10144927536232e-06, "loss": 1.1283, "step": 588 }, { "epoch": 0.021347540864774747, "grad_norm": 2.991987013220867, "learning_rate": 7.11352657004831e-06, "loss": 1.1417, "step": 589 }, { "epoch": 0.02138378456743141, "grad_norm": 2.822378888744508, "learning_rate": 7.125603864734299e-06, "loss": 1.2023, "step": 590 }, { "epoch": 0.02142002827008807, "grad_norm": 2.7398133504723843, "learning_rate": 7.13768115942029e-06, "loss": 0.9132, "step": 591 }, { "epoch": 0.021456271972744734, "grad_norm": 2.5108995674423853, "learning_rate": 7.149758454106281e-06, "loss": 0.9474, "step": 592 }, { "epoch": 0.021492515675401398, "grad_norm": 3.2410580077316036, "learning_rate": 7.161835748792271e-06, "loss": 1.2264, "step": 593 }, { "epoch": 0.021528759378058062, "grad_norm": 2.6422208730807704, "learning_rate": 7.173913043478261e-06, "loss": 1.1172, "step": 594 }, { "epoch": 0.021565003080714726, "grad_norm": 2.439850887972933, "learning_rate": 7.185990338164251e-06, "loss": 1.1093, "step": 595 }, { "epoch": 0.02160124678337139, "grad_norm": 2.7501621823247153, "learning_rate": 7.1980676328502416e-06, "loss": 1.1833, "step": 596 }, { "epoch": 0.021637490486028053, "grad_norm": 3.011641110296635, "learning_rate": 7.210144927536233e-06, "loss": 0.9863, "step": 597 }, { "epoch": 0.021673734188684717, "grad_norm": 2.55121249020111, "learning_rate": 7.222222222222223e-06, "loss": 0.9554, "step": 598 }, { "epoch": 0.02170997789134138, "grad_norm": 2.3086375605591543, "learning_rate": 7.234299516908213e-06, "loss": 0.8816, "step": 599 }, { "epoch": 0.02174622159399804, "grad_norm": 2.5354797870760586, "learning_rate": 7.246376811594203e-06, "loss": 1.1895, "step": 600 }, { "epoch": 0.021782465296654705, "grad_norm": 2.546664627666719, "learning_rate": 7.258454106280194e-06, "loss": 1.1837, "step": 601 }, { "epoch": 0.02181870899931137, "grad_norm": 2.9016367179798097, "learning_rate": 7.270531400966184e-06, "loss": 1.0882, "step": 602 }, { "epoch": 0.021854952701968033, "grad_norm": 2.5839153610210572, "learning_rate": 7.282608695652175e-06, "loss": 1.079, "step": 603 }, { "epoch": 0.021891196404624697, "grad_norm": 2.92824326696265, "learning_rate": 7.294685990338165e-06, "loss": 1.0947, "step": 604 }, { "epoch": 0.02192744010728136, "grad_norm": 2.983311617735777, "learning_rate": 7.3067632850241555e-06, "loss": 1.039, "step": 605 }, { "epoch": 0.021963683809938024, "grad_norm": 3.0898437270204746, "learning_rate": 7.318840579710146e-06, "loss": 0.9505, "step": 606 }, { "epoch": 0.021999927512594688, "grad_norm": 2.740355963263075, "learning_rate": 7.330917874396136e-06, "loss": 1.2203, "step": 607 }, { "epoch": 0.02203617121525135, "grad_norm": 3.697150203679544, "learning_rate": 7.342995169082127e-06, "loss": 1.0682, "step": 608 }, { "epoch": 0.022072414917908012, "grad_norm": 2.655934975271955, "learning_rate": 7.355072463768117e-06, "loss": 0.9838, "step": 609 }, { "epoch": 0.022108658620564676, "grad_norm": 2.761529103428584, "learning_rate": 7.3671497584541075e-06, "loss": 0.9978, "step": 610 }, { "epoch": 0.02214490232322134, "grad_norm": 2.4911240506583674, "learning_rate": 7.379227053140097e-06, "loss": 1.114, "step": 611 }, { "epoch": 0.022181146025878003, "grad_norm": 2.397352417611392, "learning_rate": 7.391304347826087e-06, "loss": 1.1291, "step": 612 }, { "epoch": 0.022217389728534667, "grad_norm": 2.5646546879532877, "learning_rate": 7.403381642512077e-06, "loss": 1.3767, "step": 613 }, { "epoch": 0.02225363343119133, "grad_norm": 3.3685150784557973, "learning_rate": 7.415458937198068e-06, "loss": 1.0601, "step": 614 }, { "epoch": 0.022289877133847995, "grad_norm": 2.7955834018287167, "learning_rate": 7.427536231884058e-06, "loss": 1.0226, "step": 615 }, { "epoch": 0.02232612083650466, "grad_norm": 2.6150053051862456, "learning_rate": 7.439613526570049e-06, "loss": 1.0642, "step": 616 }, { "epoch": 0.022362364539161322, "grad_norm": 2.7892359838809027, "learning_rate": 7.451690821256039e-06, "loss": 1.1232, "step": 617 }, { "epoch": 0.022398608241817983, "grad_norm": 3.1736264028918866, "learning_rate": 7.4637681159420295e-06, "loss": 1.0444, "step": 618 }, { "epoch": 0.022434851944474647, "grad_norm": 3.0457094866043573, "learning_rate": 7.47584541062802e-06, "loss": 1.2146, "step": 619 }, { "epoch": 0.02247109564713131, "grad_norm": 3.030966161106719, "learning_rate": 7.48792270531401e-06, "loss": 1.1466, "step": 620 }, { "epoch": 0.022507339349787974, "grad_norm": 2.7547827541250145, "learning_rate": 7.500000000000001e-06, "loss": 1.05, "step": 621 }, { "epoch": 0.022543583052444638, "grad_norm": 3.2000977784207496, "learning_rate": 7.512077294685991e-06, "loss": 1.1619, "step": 622 }, { "epoch": 0.0225798267551013, "grad_norm": 2.8737065233873373, "learning_rate": 7.5241545893719815e-06, "loss": 1.0922, "step": 623 }, { "epoch": 0.022616070457757965, "grad_norm": 2.5685087769490202, "learning_rate": 7.536231884057972e-06, "loss": 1.094, "step": 624 }, { "epoch": 0.02265231416041463, "grad_norm": 3.0824774112167472, "learning_rate": 7.548309178743962e-06, "loss": 1.174, "step": 625 }, { "epoch": 0.022688557863071293, "grad_norm": 2.8927843985428345, "learning_rate": 7.560386473429952e-06, "loss": 1.2092, "step": 626 }, { "epoch": 0.022724801565727953, "grad_norm": 2.6283458945944354, "learning_rate": 7.572463768115943e-06, "loss": 1.1027, "step": 627 }, { "epoch": 0.022761045268384617, "grad_norm": 2.8260835882709685, "learning_rate": 7.584541062801934e-06, "loss": 1.0276, "step": 628 }, { "epoch": 0.02279728897104128, "grad_norm": 2.9250019083988557, "learning_rate": 7.596618357487924e-06, "loss": 1.0656, "step": 629 }, { "epoch": 0.022833532673697945, "grad_norm": 2.481519584695273, "learning_rate": 7.608695652173914e-06, "loss": 1.2836, "step": 630 }, { "epoch": 0.02286977637635461, "grad_norm": 3.3538561426761473, "learning_rate": 7.620772946859904e-06, "loss": 1.1077, "step": 631 }, { "epoch": 0.022906020079011272, "grad_norm": 3.1876489064256264, "learning_rate": 7.632850241545895e-06, "loss": 1.2523, "step": 632 }, { "epoch": 0.022942263781667936, "grad_norm": 3.1290470233776335, "learning_rate": 7.644927536231884e-06, "loss": 1.2066, "step": 633 }, { "epoch": 0.0229785074843246, "grad_norm": 2.7033088803749235, "learning_rate": 7.657004830917875e-06, "loss": 1.2152, "step": 634 }, { "epoch": 0.02301475118698126, "grad_norm": 2.8332696474894825, "learning_rate": 7.669082125603864e-06, "loss": 1.214, "step": 635 }, { "epoch": 0.023050994889637924, "grad_norm": 3.019860944956083, "learning_rate": 7.681159420289856e-06, "loss": 1.1049, "step": 636 }, { "epoch": 0.023087238592294588, "grad_norm": 3.258853620925561, "learning_rate": 7.693236714975847e-06, "loss": 1.1038, "step": 637 }, { "epoch": 0.02312348229495125, "grad_norm": 3.0961335559512233, "learning_rate": 7.705314009661836e-06, "loss": 1.2479, "step": 638 }, { "epoch": 0.023159725997607915, "grad_norm": 3.1497923257073586, "learning_rate": 7.717391304347827e-06, "loss": 1.0832, "step": 639 }, { "epoch": 0.02319596970026458, "grad_norm": 2.982446958585488, "learning_rate": 7.729468599033817e-06, "loss": 1.1065, "step": 640 }, { "epoch": 0.023232213402921243, "grad_norm": 2.3810719539242298, "learning_rate": 7.741545893719808e-06, "loss": 1.1469, "step": 641 }, { "epoch": 0.023268457105577907, "grad_norm": 2.9347485561933824, "learning_rate": 7.753623188405797e-06, "loss": 0.8904, "step": 642 }, { "epoch": 0.02330470080823457, "grad_norm": 2.571205161647192, "learning_rate": 7.765700483091788e-06, "loss": 1.0736, "step": 643 }, { "epoch": 0.02334094451089123, "grad_norm": 2.7192232589517475, "learning_rate": 7.77777777777778e-06, "loss": 1.2892, "step": 644 }, { "epoch": 0.023377188213547895, "grad_norm": 3.061488291282543, "learning_rate": 7.789855072463769e-06, "loss": 1.0184, "step": 645 }, { "epoch": 0.02341343191620456, "grad_norm": 3.1950893512487744, "learning_rate": 7.80193236714976e-06, "loss": 1.1935, "step": 646 }, { "epoch": 0.023449675618861222, "grad_norm": 2.688872009127236, "learning_rate": 7.814009661835749e-06, "loss": 0.9314, "step": 647 }, { "epoch": 0.023485919321517886, "grad_norm": 2.762520210457185, "learning_rate": 7.82608695652174e-06, "loss": 1.0196, "step": 648 }, { "epoch": 0.02352216302417455, "grad_norm": 2.8275095657024525, "learning_rate": 7.838164251207731e-06, "loss": 0.9838, "step": 649 }, { "epoch": 0.023558406726831214, "grad_norm": 2.9468432763944947, "learning_rate": 7.85024154589372e-06, "loss": 1.2027, "step": 650 }, { "epoch": 0.023594650429487878, "grad_norm": 2.966607057892112, "learning_rate": 7.862318840579712e-06, "loss": 1.0375, "step": 651 }, { "epoch": 0.02363089413214454, "grad_norm": 2.9228141147020135, "learning_rate": 7.874396135265701e-06, "loss": 1.163, "step": 652 }, { "epoch": 0.0236671378348012, "grad_norm": 3.448092584289347, "learning_rate": 7.88647342995169e-06, "loss": 1.2364, "step": 653 }, { "epoch": 0.023703381537457865, "grad_norm": 2.614219897538733, "learning_rate": 7.898550724637682e-06, "loss": 1.1834, "step": 654 }, { "epoch": 0.02373962524011453, "grad_norm": 2.658696530910834, "learning_rate": 7.910628019323671e-06, "loss": 1.1817, "step": 655 }, { "epoch": 0.023775868942771193, "grad_norm": 2.8745602122007443, "learning_rate": 7.922705314009662e-06, "loss": 1.2269, "step": 656 }, { "epoch": 0.023812112645427857, "grad_norm": 3.248070696500897, "learning_rate": 7.934782608695653e-06, "loss": 1.2337, "step": 657 }, { "epoch": 0.02384835634808452, "grad_norm": 3.0590795458797486, "learning_rate": 7.946859903381643e-06, "loss": 1.0932, "step": 658 }, { "epoch": 0.023884600050741184, "grad_norm": 2.7037983335048725, "learning_rate": 7.958937198067634e-06, "loss": 1.095, "step": 659 }, { "epoch": 0.023920843753397848, "grad_norm": 2.7418803910780474, "learning_rate": 7.971014492753623e-06, "loss": 1.0899, "step": 660 }, { "epoch": 0.023957087456054512, "grad_norm": 2.9134703087361205, "learning_rate": 7.983091787439614e-06, "loss": 0.9015, "step": 661 }, { "epoch": 0.023993331158711172, "grad_norm": 2.4320333667095477, "learning_rate": 7.995169082125605e-06, "loss": 1.1764, "step": 662 }, { "epoch": 0.024029574861367836, "grad_norm": 2.966268983022702, "learning_rate": 8.007246376811595e-06, "loss": 1.0391, "step": 663 }, { "epoch": 0.0240658185640245, "grad_norm": 2.769504465472553, "learning_rate": 8.019323671497586e-06, "loss": 0.9442, "step": 664 }, { "epoch": 0.024102062266681164, "grad_norm": 2.7437126560222396, "learning_rate": 8.031400966183575e-06, "loss": 1.0875, "step": 665 }, { "epoch": 0.024138305969337828, "grad_norm": 2.647057776163857, "learning_rate": 8.043478260869566e-06, "loss": 1.1393, "step": 666 }, { "epoch": 0.02417454967199449, "grad_norm": 2.8033937104207896, "learning_rate": 8.055555555555557e-06, "loss": 1.1008, "step": 667 }, { "epoch": 0.024210793374651155, "grad_norm": 2.8989685611858302, "learning_rate": 8.067632850241547e-06, "loss": 1.1335, "step": 668 }, { "epoch": 0.02424703707730782, "grad_norm": 3.0605737152212154, "learning_rate": 8.079710144927538e-06, "loss": 1.007, "step": 669 }, { "epoch": 0.024283280779964483, "grad_norm": 3.105475272155749, "learning_rate": 8.091787439613527e-06, "loss": 1.185, "step": 670 }, { "epoch": 0.024319524482621143, "grad_norm": 2.7986279243441334, "learning_rate": 8.103864734299518e-06, "loss": 1.1474, "step": 671 }, { "epoch": 0.024355768185277807, "grad_norm": 2.577090613879188, "learning_rate": 8.115942028985508e-06, "loss": 1.0909, "step": 672 }, { "epoch": 0.02439201188793447, "grad_norm": 2.77900855716895, "learning_rate": 8.128019323671497e-06, "loss": 1.1295, "step": 673 }, { "epoch": 0.024428255590591134, "grad_norm": 2.6227228568486094, "learning_rate": 8.140096618357488e-06, "loss": 1.1846, "step": 674 }, { "epoch": 0.024464499293247798, "grad_norm": 2.7265375947743764, "learning_rate": 8.15217391304348e-06, "loss": 1.0621, "step": 675 }, { "epoch": 0.024500742995904462, "grad_norm": 2.6182159461044234, "learning_rate": 8.164251207729469e-06, "loss": 0.9074, "step": 676 }, { "epoch": 0.024536986698561126, "grad_norm": 2.9509879993078, "learning_rate": 8.17632850241546e-06, "loss": 0.905, "step": 677 }, { "epoch": 0.02457323040121779, "grad_norm": 2.9535222884972003, "learning_rate": 8.188405797101449e-06, "loss": 1.1323, "step": 678 }, { "epoch": 0.024609474103874453, "grad_norm": 2.988195077533802, "learning_rate": 8.20048309178744e-06, "loss": 0.9929, "step": 679 }, { "epoch": 0.024645717806531114, "grad_norm": 2.890557870230951, "learning_rate": 8.212560386473431e-06, "loss": 1.2009, "step": 680 }, { "epoch": 0.024681961509187778, "grad_norm": 2.810539563937672, "learning_rate": 8.22463768115942e-06, "loss": 1.2223, "step": 681 }, { "epoch": 0.02471820521184444, "grad_norm": 3.3954347318568048, "learning_rate": 8.236714975845412e-06, "loss": 1.2696, "step": 682 }, { "epoch": 0.024754448914501105, "grad_norm": 2.4520771391791047, "learning_rate": 8.248792270531401e-06, "loss": 0.8867, "step": 683 }, { "epoch": 0.02479069261715777, "grad_norm": 2.87059145734444, "learning_rate": 8.260869565217392e-06, "loss": 1.1507, "step": 684 }, { "epoch": 0.024826936319814433, "grad_norm": 2.78556989030861, "learning_rate": 8.272946859903382e-06, "loss": 1.2233, "step": 685 }, { "epoch": 0.024863180022471097, "grad_norm": 2.9799192378852704, "learning_rate": 8.285024154589373e-06, "loss": 1.1412, "step": 686 }, { "epoch": 0.02489942372512776, "grad_norm": 2.994895744158307, "learning_rate": 8.297101449275364e-06, "loss": 1.2337, "step": 687 }, { "epoch": 0.024935667427784424, "grad_norm": 2.8604202796467817, "learning_rate": 8.309178743961353e-06, "loss": 1.0107, "step": 688 }, { "epoch": 0.024971911130441084, "grad_norm": 2.7483062041339283, "learning_rate": 8.321256038647344e-06, "loss": 1.0956, "step": 689 }, { "epoch": 0.025008154833097748, "grad_norm": 3.3493396890903964, "learning_rate": 8.333333333333334e-06, "loss": 1.2061, "step": 690 }, { "epoch": 0.025044398535754412, "grad_norm": 2.7224936949921563, "learning_rate": 8.345410628019325e-06, "loss": 1.2456, "step": 691 }, { "epoch": 0.025080642238411076, "grad_norm": 2.7292876809288407, "learning_rate": 8.357487922705316e-06, "loss": 1.2686, "step": 692 }, { "epoch": 0.02511688594106774, "grad_norm": 3.0790800565603593, "learning_rate": 8.369565217391305e-06, "loss": 1.0841, "step": 693 }, { "epoch": 0.025153129643724403, "grad_norm": 2.6987475691288862, "learning_rate": 8.381642512077295e-06, "loss": 1.0786, "step": 694 }, { "epoch": 0.025189373346381067, "grad_norm": 2.9513204078342947, "learning_rate": 8.393719806763286e-06, "loss": 1.0869, "step": 695 }, { "epoch": 0.02522561704903773, "grad_norm": 3.1254472462438936, "learning_rate": 8.405797101449275e-06, "loss": 1.1507, "step": 696 }, { "epoch": 0.025261860751694395, "grad_norm": 2.8873043296084413, "learning_rate": 8.417874396135266e-06, "loss": 1.0536, "step": 697 }, { "epoch": 0.025298104454351055, "grad_norm": 3.0661348235278414, "learning_rate": 8.429951690821256e-06, "loss": 1.2056, "step": 698 }, { "epoch": 0.02533434815700772, "grad_norm": 2.9846698799864897, "learning_rate": 8.442028985507247e-06, "loss": 0.8705, "step": 699 }, { "epoch": 0.025370591859664383, "grad_norm": 3.014144725319983, "learning_rate": 8.454106280193238e-06, "loss": 1.2679, "step": 700 }, { "epoch": 0.025406835562321047, "grad_norm": 2.7132927588006055, "learning_rate": 8.466183574879227e-06, "loss": 1.2063, "step": 701 }, { "epoch": 0.02544307926497771, "grad_norm": 2.7745498427201887, "learning_rate": 8.478260869565218e-06, "loss": 1.0257, "step": 702 }, { "epoch": 0.025479322967634374, "grad_norm": 2.630346291677755, "learning_rate": 8.490338164251208e-06, "loss": 0.9224, "step": 703 }, { "epoch": 0.025515566670291038, "grad_norm": 3.7868900301239754, "learning_rate": 8.502415458937199e-06, "loss": 1.1458, "step": 704 }, { "epoch": 0.0255518103729477, "grad_norm": 3.6203022492539083, "learning_rate": 8.51449275362319e-06, "loss": 1.1253, "step": 705 }, { "epoch": 0.025588054075604362, "grad_norm": 2.668201369804905, "learning_rate": 8.52657004830918e-06, "loss": 0.9484, "step": 706 }, { "epoch": 0.025624297778261026, "grad_norm": 3.0218896289484336, "learning_rate": 8.53864734299517e-06, "loss": 1.266, "step": 707 }, { "epoch": 0.02566054148091769, "grad_norm": 2.765134822635593, "learning_rate": 8.55072463768116e-06, "loss": 1.2079, "step": 708 }, { "epoch": 0.025696785183574353, "grad_norm": 2.955693167318123, "learning_rate": 8.562801932367151e-06, "loss": 1.0695, "step": 709 }, { "epoch": 0.025733028886231017, "grad_norm": 2.772984710135731, "learning_rate": 8.57487922705314e-06, "loss": 1.22, "step": 710 }, { "epoch": 0.02576927258888768, "grad_norm": 2.389881454036001, "learning_rate": 8.586956521739131e-06, "loss": 0.8633, "step": 711 }, { "epoch": 0.025805516291544345, "grad_norm": 2.759872426060033, "learning_rate": 8.599033816425122e-06, "loss": 1.1716, "step": 712 }, { "epoch": 0.02584175999420101, "grad_norm": 2.673990463416987, "learning_rate": 8.611111111111112e-06, "loss": 1.187, "step": 713 }, { "epoch": 0.025878003696857672, "grad_norm": 2.623808425383312, "learning_rate": 8.623188405797103e-06, "loss": 1.1943, "step": 714 }, { "epoch": 0.025914247399514333, "grad_norm": 2.912070050923381, "learning_rate": 8.635265700483092e-06, "loss": 1.135, "step": 715 }, { "epoch": 0.025950491102170996, "grad_norm": 2.6966464902073852, "learning_rate": 8.647342995169082e-06, "loss": 1.2691, "step": 716 }, { "epoch": 0.02598673480482766, "grad_norm": 2.7932272398507636, "learning_rate": 8.659420289855073e-06, "loss": 1.0032, "step": 717 }, { "epoch": 0.026022978507484324, "grad_norm": 2.9631243336160784, "learning_rate": 8.671497584541064e-06, "loss": 1.2046, "step": 718 }, { "epoch": 0.026059222210140988, "grad_norm": 2.8600780673739656, "learning_rate": 8.683574879227053e-06, "loss": 1.1655, "step": 719 }, { "epoch": 0.02609546591279765, "grad_norm": 2.947868933926365, "learning_rate": 8.695652173913044e-06, "loss": 1.1025, "step": 720 }, { "epoch": 0.026131709615454315, "grad_norm": 2.716091867340317, "learning_rate": 8.707729468599034e-06, "loss": 1.1345, "step": 721 }, { "epoch": 0.02616795331811098, "grad_norm": 2.907546352450356, "learning_rate": 8.719806763285025e-06, "loss": 1.0468, "step": 722 }, { "epoch": 0.026204197020767643, "grad_norm": 2.836752374189747, "learning_rate": 8.731884057971014e-06, "loss": 1.2985, "step": 723 }, { "epoch": 0.026240440723424303, "grad_norm": 3.066537610810316, "learning_rate": 8.743961352657005e-06, "loss": 1.1031, "step": 724 }, { "epoch": 0.026276684426080967, "grad_norm": 2.8825452821125648, "learning_rate": 8.756038647342996e-06, "loss": 1.0868, "step": 725 }, { "epoch": 0.02631292812873763, "grad_norm": 2.356126596874246, "learning_rate": 8.768115942028986e-06, "loss": 1.0325, "step": 726 }, { "epoch": 0.026349171831394295, "grad_norm": 2.3001064688112707, "learning_rate": 8.780193236714977e-06, "loss": 1.0643, "step": 727 }, { "epoch": 0.02638541553405096, "grad_norm": 3.1208239667391977, "learning_rate": 8.792270531400966e-06, "loss": 1.1008, "step": 728 }, { "epoch": 0.026421659236707622, "grad_norm": 3.1326185045676205, "learning_rate": 8.804347826086957e-06, "loss": 1.3351, "step": 729 }, { "epoch": 0.026457902939364286, "grad_norm": 2.8379621237356543, "learning_rate": 8.816425120772949e-06, "loss": 1.0898, "step": 730 }, { "epoch": 0.02649414664202095, "grad_norm": 2.674604226064666, "learning_rate": 8.828502415458938e-06, "loss": 1.1301, "step": 731 }, { "epoch": 0.026530390344677614, "grad_norm": 3.041462817286652, "learning_rate": 8.840579710144929e-06, "loss": 0.9961, "step": 732 }, { "epoch": 0.026566634047334274, "grad_norm": 2.8685182670899576, "learning_rate": 8.852657004830918e-06, "loss": 1.0579, "step": 733 }, { "epoch": 0.026602877749990938, "grad_norm": 2.610057105250932, "learning_rate": 8.86473429951691e-06, "loss": 1.1077, "step": 734 }, { "epoch": 0.0266391214526476, "grad_norm": 2.664050253989039, "learning_rate": 8.876811594202899e-06, "loss": 1.0267, "step": 735 }, { "epoch": 0.026675365155304265, "grad_norm": 2.629668746245524, "learning_rate": 8.888888888888888e-06, "loss": 0.8893, "step": 736 }, { "epoch": 0.02671160885796093, "grad_norm": 2.8323572303585047, "learning_rate": 8.90096618357488e-06, "loss": 1.1108, "step": 737 }, { "epoch": 0.026747852560617593, "grad_norm": 2.773031448634295, "learning_rate": 8.91304347826087e-06, "loss": 1.2336, "step": 738 }, { "epoch": 0.026784096263274257, "grad_norm": 2.779379333588498, "learning_rate": 8.92512077294686e-06, "loss": 0.9423, "step": 739 }, { "epoch": 0.02682033996593092, "grad_norm": 2.86626964467217, "learning_rate": 8.937198067632851e-06, "loss": 1.0931, "step": 740 }, { "epoch": 0.026856583668587584, "grad_norm": 2.8680955713086034, "learning_rate": 8.94927536231884e-06, "loss": 1.2093, "step": 741 }, { "epoch": 0.026892827371244245, "grad_norm": 2.7831383775022434, "learning_rate": 8.961352657004831e-06, "loss": 1.0219, "step": 742 }, { "epoch": 0.02692907107390091, "grad_norm": 3.0574033913600354, "learning_rate": 8.973429951690823e-06, "loss": 1.0333, "step": 743 }, { "epoch": 0.026965314776557572, "grad_norm": 2.5317777257824217, "learning_rate": 8.985507246376812e-06, "loss": 1.1035, "step": 744 }, { "epoch": 0.027001558479214236, "grad_norm": 2.659416155894581, "learning_rate": 8.997584541062803e-06, "loss": 1.028, "step": 745 }, { "epoch": 0.0270378021818709, "grad_norm": 2.8899811581617167, "learning_rate": 9.009661835748792e-06, "loss": 1.0028, "step": 746 }, { "epoch": 0.027074045884527564, "grad_norm": 2.585145318541483, "learning_rate": 9.021739130434784e-06, "loss": 1.1017, "step": 747 }, { "epoch": 0.027110289587184228, "grad_norm": 2.6714278929057333, "learning_rate": 9.033816425120775e-06, "loss": 1.0141, "step": 748 }, { "epoch": 0.02714653328984089, "grad_norm": 2.8481333603705044, "learning_rate": 9.045893719806764e-06, "loss": 0.9979, "step": 749 }, { "epoch": 0.027182776992497555, "grad_norm": 2.85896781044074, "learning_rate": 9.057971014492755e-06, "loss": 1.0893, "step": 750 }, { "epoch": 0.027219020695154215, "grad_norm": 2.678176543503798, "learning_rate": 9.070048309178744e-06, "loss": 1.1427, "step": 751 }, { "epoch": 0.02725526439781088, "grad_norm": 2.97287558009676, "learning_rate": 9.082125603864736e-06, "loss": 1.1233, "step": 752 }, { "epoch": 0.027291508100467543, "grad_norm": 2.861596001201291, "learning_rate": 9.094202898550725e-06, "loss": 1.0294, "step": 753 }, { "epoch": 0.027327751803124207, "grad_norm": 2.9717773344572422, "learning_rate": 9.106280193236716e-06, "loss": 1.1374, "step": 754 }, { "epoch": 0.02736399550578087, "grad_norm": 2.782677970518301, "learning_rate": 9.118357487922707e-06, "loss": 1.0302, "step": 755 }, { "epoch": 0.027400239208437534, "grad_norm": 2.5770332582582753, "learning_rate": 9.130434782608697e-06, "loss": 1.1166, "step": 756 }, { "epoch": 0.027436482911094198, "grad_norm": 2.751301787960577, "learning_rate": 9.142512077294686e-06, "loss": 0.8989, "step": 757 }, { "epoch": 0.027472726613750862, "grad_norm": 3.0150888984322095, "learning_rate": 9.154589371980677e-06, "loss": 1.0888, "step": 758 }, { "epoch": 0.027508970316407526, "grad_norm": 2.8226749577831125, "learning_rate": 9.166666666666666e-06, "loss": 1.0649, "step": 759 }, { "epoch": 0.027545214019064186, "grad_norm": 3.043381319904003, "learning_rate": 9.178743961352658e-06, "loss": 1.0484, "step": 760 }, { "epoch": 0.02758145772172085, "grad_norm": 2.5044104501149764, "learning_rate": 9.190821256038649e-06, "loss": 1.0625, "step": 761 }, { "epoch": 0.027617701424377514, "grad_norm": 2.536552513126612, "learning_rate": 9.202898550724638e-06, "loss": 0.9742, "step": 762 }, { "epoch": 0.027653945127034178, "grad_norm": 2.682744414302026, "learning_rate": 9.214975845410629e-06, "loss": 1.0785, "step": 763 }, { "epoch": 0.02769018882969084, "grad_norm": 2.758622025107173, "learning_rate": 9.227053140096618e-06, "loss": 1.0507, "step": 764 }, { "epoch": 0.027726432532347505, "grad_norm": 2.82359237149034, "learning_rate": 9.23913043478261e-06, "loss": 0.9428, "step": 765 }, { "epoch": 0.02776267623500417, "grad_norm": 2.6925321088494103, "learning_rate": 9.251207729468599e-06, "loss": 1.3289, "step": 766 }, { "epoch": 0.027798919937660833, "grad_norm": 2.7244384625472997, "learning_rate": 9.26328502415459e-06, "loss": 1.1283, "step": 767 }, { "epoch": 0.027835163640317497, "grad_norm": 2.6528754498528158, "learning_rate": 9.275362318840581e-06, "loss": 1.0904, "step": 768 }, { "epoch": 0.027871407342974157, "grad_norm": 3.013736616629893, "learning_rate": 9.28743961352657e-06, "loss": 0.933, "step": 769 }, { "epoch": 0.02790765104563082, "grad_norm": 2.7203762537557936, "learning_rate": 9.299516908212562e-06, "loss": 0.9846, "step": 770 }, { "epoch": 0.027943894748287484, "grad_norm": 2.3267291842325477, "learning_rate": 9.311594202898551e-06, "loss": 0.9517, "step": 771 }, { "epoch": 0.027980138450944148, "grad_norm": 2.7741528438227125, "learning_rate": 9.323671497584542e-06, "loss": 1.1986, "step": 772 }, { "epoch": 0.028016382153600812, "grad_norm": 2.608693231768052, "learning_rate": 9.335748792270533e-06, "loss": 1.0048, "step": 773 }, { "epoch": 0.028052625856257476, "grad_norm": 2.775879896183148, "learning_rate": 9.347826086956523e-06, "loss": 1.0748, "step": 774 }, { "epoch": 0.02808886955891414, "grad_norm": 2.9874913739450815, "learning_rate": 9.359903381642514e-06, "loss": 1.0122, "step": 775 }, { "epoch": 0.028125113261570803, "grad_norm": 2.9328922783262334, "learning_rate": 9.371980676328503e-06, "loss": 1.249, "step": 776 }, { "epoch": 0.028161356964227467, "grad_norm": 2.726841724049853, "learning_rate": 9.384057971014492e-06, "loss": 0.9715, "step": 777 }, { "epoch": 0.028197600666884128, "grad_norm": 2.8508385178772215, "learning_rate": 9.396135265700484e-06, "loss": 1.1539, "step": 778 }, { "epoch": 0.02823384436954079, "grad_norm": 2.782505082537471, "learning_rate": 9.408212560386473e-06, "loss": 1.137, "step": 779 }, { "epoch": 0.028270088072197455, "grad_norm": 2.494914561702346, "learning_rate": 9.420289855072464e-06, "loss": 1.1059, "step": 780 }, { "epoch": 0.02830633177485412, "grad_norm": 2.7894644785079694, "learning_rate": 9.432367149758455e-06, "loss": 1.1357, "step": 781 }, { "epoch": 0.028342575477510783, "grad_norm": 2.8357390587449904, "learning_rate": 9.444444444444445e-06, "loss": 1.2432, "step": 782 }, { "epoch": 0.028378819180167447, "grad_norm": 2.9648115159423503, "learning_rate": 9.456521739130436e-06, "loss": 1.0548, "step": 783 }, { "epoch": 0.02841506288282411, "grad_norm": 2.948819818696822, "learning_rate": 9.468599033816425e-06, "loss": 1.2632, "step": 784 }, { "epoch": 0.028451306585480774, "grad_norm": 2.7416978907959733, "learning_rate": 9.480676328502416e-06, "loss": 1.089, "step": 785 }, { "epoch": 0.028487550288137434, "grad_norm": 3.0395388929336997, "learning_rate": 9.492753623188407e-06, "loss": 1.0479, "step": 786 }, { "epoch": 0.028523793990794098, "grad_norm": 2.7946794656620346, "learning_rate": 9.504830917874397e-06, "loss": 0.9886, "step": 787 }, { "epoch": 0.028560037693450762, "grad_norm": 2.681029189797198, "learning_rate": 9.516908212560388e-06, "loss": 1.242, "step": 788 }, { "epoch": 0.028596281396107426, "grad_norm": 2.448835467769716, "learning_rate": 9.528985507246377e-06, "loss": 1.0511, "step": 789 }, { "epoch": 0.02863252509876409, "grad_norm": 2.680240019062511, "learning_rate": 9.541062801932368e-06, "loss": 1.192, "step": 790 }, { "epoch": 0.028668768801420753, "grad_norm": 3.3912577564803086, "learning_rate": 9.55314009661836e-06, "loss": 1.0061, "step": 791 }, { "epoch": 0.028705012504077417, "grad_norm": 2.7871215264058384, "learning_rate": 9.565217391304349e-06, "loss": 1.1819, "step": 792 }, { "epoch": 0.02874125620673408, "grad_norm": 2.89072548559572, "learning_rate": 9.57729468599034e-06, "loss": 1.0741, "step": 793 }, { "epoch": 0.028777499909390745, "grad_norm": 2.8274771569877055, "learning_rate": 9.58937198067633e-06, "loss": 0.9273, "step": 794 }, { "epoch": 0.028813743612047405, "grad_norm": 2.9048285512738046, "learning_rate": 9.60144927536232e-06, "loss": 1.4105, "step": 795 }, { "epoch": 0.02884998731470407, "grad_norm": 2.6355541257994552, "learning_rate": 9.61352657004831e-06, "loss": 1.029, "step": 796 }, { "epoch": 0.028886231017360733, "grad_norm": 2.465400109540181, "learning_rate": 9.625603864734299e-06, "loss": 1.1891, "step": 797 }, { "epoch": 0.028922474720017396, "grad_norm": 3.11568870430185, "learning_rate": 9.63768115942029e-06, "loss": 1.0115, "step": 798 }, { "epoch": 0.02895871842267406, "grad_norm": 2.8196962063613453, "learning_rate": 9.649758454106281e-06, "loss": 1.0416, "step": 799 }, { "epoch": 0.028994962125330724, "grad_norm": 2.9230940721911893, "learning_rate": 9.66183574879227e-06, "loss": 1.1777, "step": 800 }, { "epoch": 0.029031205827987388, "grad_norm": 2.6750111075089245, "learning_rate": 9.673913043478262e-06, "loss": 1.0315, "step": 801 }, { "epoch": 0.02906744953064405, "grad_norm": 2.778077584520702, "learning_rate": 9.685990338164251e-06, "loss": 1.1091, "step": 802 }, { "epoch": 0.029103693233300715, "grad_norm": 2.776499612035007, "learning_rate": 9.698067632850242e-06, "loss": 1.1256, "step": 803 }, { "epoch": 0.029139936935957376, "grad_norm": 2.8609929834479146, "learning_rate": 9.710144927536233e-06, "loss": 1.258, "step": 804 }, { "epoch": 0.02917618063861404, "grad_norm": 3.14438518692866, "learning_rate": 9.722222222222223e-06, "loss": 1.1582, "step": 805 }, { "epoch": 0.029212424341270703, "grad_norm": 2.553983628994959, "learning_rate": 9.734299516908214e-06, "loss": 1.088, "step": 806 }, { "epoch": 0.029248668043927367, "grad_norm": 2.7025881535510785, "learning_rate": 9.746376811594203e-06, "loss": 1.1502, "step": 807 }, { "epoch": 0.02928491174658403, "grad_norm": 2.7188158304774235, "learning_rate": 9.758454106280194e-06, "loss": 1.2326, "step": 808 }, { "epoch": 0.029321155449240695, "grad_norm": 2.591039647485563, "learning_rate": 9.770531400966184e-06, "loss": 0.9116, "step": 809 }, { "epoch": 0.02935739915189736, "grad_norm": 2.3459192446303954, "learning_rate": 9.782608695652175e-06, "loss": 1.0657, "step": 810 }, { "epoch": 0.029393642854554022, "grad_norm": 2.5440141488460966, "learning_rate": 9.794685990338166e-06, "loss": 1.0701, "step": 811 }, { "epoch": 0.029429886557210686, "grad_norm": 2.9012515013379203, "learning_rate": 9.806763285024155e-06, "loss": 1.232, "step": 812 }, { "epoch": 0.029466130259867346, "grad_norm": 2.834448852063566, "learning_rate": 9.818840579710146e-06, "loss": 1.2366, "step": 813 }, { "epoch": 0.02950237396252401, "grad_norm": 2.768835539297144, "learning_rate": 9.830917874396136e-06, "loss": 1.0146, "step": 814 }, { "epoch": 0.029538617665180674, "grad_norm": 2.599155903940902, "learning_rate": 9.842995169082127e-06, "loss": 1.0079, "step": 815 }, { "epoch": 0.029574861367837338, "grad_norm": 2.4583830748734266, "learning_rate": 9.855072463768118e-06, "loss": 0.9727, "step": 816 }, { "epoch": 0.029611105070494, "grad_norm": 3.104332829270368, "learning_rate": 9.867149758454107e-06, "loss": 1.1123, "step": 817 }, { "epoch": 0.029647348773150665, "grad_norm": 2.9535208808704896, "learning_rate": 9.879227053140097e-06, "loss": 1.2358, "step": 818 }, { "epoch": 0.02968359247580733, "grad_norm": 2.6652022410197063, "learning_rate": 9.891304347826088e-06, "loss": 1.0181, "step": 819 }, { "epoch": 0.029719836178463993, "grad_norm": 3.0335316407716637, "learning_rate": 9.903381642512077e-06, "loss": 1.1444, "step": 820 }, { "epoch": 0.029756079881120657, "grad_norm": 2.8498994433710374, "learning_rate": 9.915458937198068e-06, "loss": 1.0717, "step": 821 }, { "epoch": 0.029792323583777317, "grad_norm": 2.9428031118784173, "learning_rate": 9.927536231884058e-06, "loss": 1.1612, "step": 822 }, { "epoch": 0.02982856728643398, "grad_norm": 2.74538971894497, "learning_rate": 9.939613526570049e-06, "loss": 0.928, "step": 823 }, { "epoch": 0.029864810989090645, "grad_norm": 5.5955735944682266, "learning_rate": 9.95169082125604e-06, "loss": 1.0899, "step": 824 }, { "epoch": 0.02990105469174731, "grad_norm": 3.013868136933166, "learning_rate": 9.96376811594203e-06, "loss": 1.1477, "step": 825 }, { "epoch": 0.029937298394403972, "grad_norm": 2.859945662599461, "learning_rate": 9.97584541062802e-06, "loss": 0.9857, "step": 826 }, { "epoch": 0.029973542097060636, "grad_norm": 2.5321388577597066, "learning_rate": 9.98792270531401e-06, "loss": 1.1034, "step": 827 }, { "epoch": 0.0300097857997173, "grad_norm": 2.899755505708175, "learning_rate": 1e-05, "loss": 1.2072, "step": 828 }, { "epoch": 0.030046029502373964, "grad_norm": 2.8694935566520723, "learning_rate": 9.999999965551513e-06, "loss": 1.1714, "step": 829 }, { "epoch": 0.030082273205030628, "grad_norm": 2.3516547207600023, "learning_rate": 9.99999986220605e-06, "loss": 1.023, "step": 830 }, { "epoch": 0.030118516907687288, "grad_norm": 2.5814683045074776, "learning_rate": 9.999999689963611e-06, "loss": 0.9946, "step": 831 }, { "epoch": 0.03015476061034395, "grad_norm": 2.624028614094542, "learning_rate": 9.999999448824202e-06, "loss": 1.1234, "step": 832 }, { "epoch": 0.030191004313000615, "grad_norm": 2.9672103663755918, "learning_rate": 9.999999138787824e-06, "loss": 0.9788, "step": 833 }, { "epoch": 0.03022724801565728, "grad_norm": 2.6450998737264624, "learning_rate": 9.999998759854482e-06, "loss": 1.0484, "step": 834 }, { "epoch": 0.030263491718313943, "grad_norm": 2.654207388783077, "learning_rate": 9.99999831202418e-06, "loss": 1.004, "step": 835 }, { "epoch": 0.030299735420970607, "grad_norm": 2.41400287959333, "learning_rate": 9.999997795296925e-06, "loss": 1.0731, "step": 836 }, { "epoch": 0.03033597912362727, "grad_norm": 2.809033178766528, "learning_rate": 9.999997209672726e-06, "loss": 0.9967, "step": 837 }, { "epoch": 0.030372222826283934, "grad_norm": 2.812084467689369, "learning_rate": 9.999996555151587e-06, "loss": 1.103, "step": 838 }, { "epoch": 0.030408466528940598, "grad_norm": 2.3148048400682106, "learning_rate": 9.999995831733522e-06, "loss": 0.847, "step": 839 }, { "epoch": 0.03044471023159726, "grad_norm": 2.5467933161554024, "learning_rate": 9.999995039418537e-06, "loss": 1.0433, "step": 840 }, { "epoch": 0.030480953934253922, "grad_norm": 2.8595461624819314, "learning_rate": 9.999994178206645e-06, "loss": 1.1515, "step": 841 }, { "epoch": 0.030517197636910586, "grad_norm": 3.115600641695874, "learning_rate": 9.999993248097854e-06, "loss": 1.3071, "step": 842 }, { "epoch": 0.03055344133956725, "grad_norm": 2.87331256994261, "learning_rate": 9.999992249092184e-06, "loss": 1.1428, "step": 843 }, { "epoch": 0.030589685042223914, "grad_norm": 2.744812845750045, "learning_rate": 9.999991181189644e-06, "loss": 1.166, "step": 844 }, { "epoch": 0.030625928744880578, "grad_norm": 3.122807669840791, "learning_rate": 9.999990044390246e-06, "loss": 1.1279, "step": 845 }, { "epoch": 0.03066217244753724, "grad_norm": 2.7928438156849666, "learning_rate": 9.999988838694013e-06, "loss": 0.9923, "step": 846 }, { "epoch": 0.030698416150193905, "grad_norm": 2.744176454131334, "learning_rate": 9.999987564100956e-06, "loss": 1.0364, "step": 847 }, { "epoch": 0.03073465985285057, "grad_norm": 2.88340209885247, "learning_rate": 9.999986220611096e-06, "loss": 0.9661, "step": 848 }, { "epoch": 0.03077090355550723, "grad_norm": 2.862800348756437, "learning_rate": 9.999984808224448e-06, "loss": 0.9547, "step": 849 }, { "epoch": 0.030807147258163893, "grad_norm": 2.6867075507775233, "learning_rate": 9.999983326941032e-06, "loss": 1.1315, "step": 850 }, { "epoch": 0.030843390960820557, "grad_norm": 2.5677339549319487, "learning_rate": 9.999981776760872e-06, "loss": 0.9683, "step": 851 }, { "epoch": 0.03087963466347722, "grad_norm": 2.7374429500237802, "learning_rate": 9.999980157683987e-06, "loss": 1.168, "step": 852 }, { "epoch": 0.030915878366133884, "grad_norm": 2.7880479800541407, "learning_rate": 9.999978469710397e-06, "loss": 1.0051, "step": 853 }, { "epoch": 0.030952122068790548, "grad_norm": 2.8629986067683575, "learning_rate": 9.99997671284013e-06, "loss": 1.1586, "step": 854 }, { "epoch": 0.030988365771447212, "grad_norm": 2.9335979322883903, "learning_rate": 9.999974887073207e-06, "loss": 1.1106, "step": 855 }, { "epoch": 0.031024609474103876, "grad_norm": 2.4436898570154812, "learning_rate": 9.999972992409653e-06, "loss": 1.1084, "step": 856 }, { "epoch": 0.031060853176760536, "grad_norm": 2.790065170475129, "learning_rate": 9.999971028849496e-06, "loss": 1.1299, "step": 857 }, { "epoch": 0.0310970968794172, "grad_norm": 2.6506973773489406, "learning_rate": 9.999968996392762e-06, "loss": 1.1711, "step": 858 }, { "epoch": 0.031133340582073864, "grad_norm": 2.712467290467912, "learning_rate": 9.99996689503948e-06, "loss": 1.2024, "step": 859 }, { "epoch": 0.031169584284730528, "grad_norm": 2.8399872754059197, "learning_rate": 9.999964724789676e-06, "loss": 0.997, "step": 860 }, { "epoch": 0.03120582798738719, "grad_norm": 3.301338988768967, "learning_rate": 9.999962485643384e-06, "loss": 1.0737, "step": 861 }, { "epoch": 0.031242071690043855, "grad_norm": 2.5215985705285466, "learning_rate": 9.999960177600632e-06, "loss": 0.9295, "step": 862 }, { "epoch": 0.03127831539270052, "grad_norm": 2.8364678082992882, "learning_rate": 9.999957800661452e-06, "loss": 0.9575, "step": 863 }, { "epoch": 0.03131455909535718, "grad_norm": 3.410059840668443, "learning_rate": 9.999955354825879e-06, "loss": 1.0872, "step": 864 }, { "epoch": 0.031350802798013847, "grad_norm": 2.636306074560395, "learning_rate": 9.999952840093944e-06, "loss": 1.0908, "step": 865 }, { "epoch": 0.03138704650067051, "grad_norm": 2.891751453826906, "learning_rate": 9.999950256465682e-06, "loss": 1.039, "step": 866 }, { "epoch": 0.031423290203327174, "grad_norm": 2.686765921299976, "learning_rate": 9.999947603941131e-06, "loss": 1.2947, "step": 867 }, { "epoch": 0.031459533905983834, "grad_norm": 2.7176509784138765, "learning_rate": 9.999944882520325e-06, "loss": 1.0778, "step": 868 }, { "epoch": 0.0314957776086405, "grad_norm": 2.6069477049746568, "learning_rate": 9.999942092203303e-06, "loss": 1.1144, "step": 869 }, { "epoch": 0.03153202131129716, "grad_norm": 2.761313799241207, "learning_rate": 9.9999392329901e-06, "loss": 1.1287, "step": 870 }, { "epoch": 0.03156826501395382, "grad_norm": 2.725415948519122, "learning_rate": 9.999936304880762e-06, "loss": 1.1406, "step": 871 }, { "epoch": 0.03160450871661049, "grad_norm": 2.5932890074426593, "learning_rate": 9.999933307875325e-06, "loss": 1.0484, "step": 872 }, { "epoch": 0.03164075241926715, "grad_norm": 2.824968121925152, "learning_rate": 9.99993024197383e-06, "loss": 1.2227, "step": 873 }, { "epoch": 0.03167699612192382, "grad_norm": 2.5080155369397055, "learning_rate": 9.99992710717632e-06, "loss": 1.0878, "step": 874 }, { "epoch": 0.03171323982458048, "grad_norm": 2.6356343897758587, "learning_rate": 9.999923903482838e-06, "loss": 1.0001, "step": 875 }, { "epoch": 0.031749483527237145, "grad_norm": 2.5682003821114434, "learning_rate": 9.99992063089343e-06, "loss": 1.0957, "step": 876 }, { "epoch": 0.031785727229893805, "grad_norm": 2.8357842146942303, "learning_rate": 9.999917289408138e-06, "loss": 1.0091, "step": 877 }, { "epoch": 0.03182197093255047, "grad_norm": 2.5852548149420964, "learning_rate": 9.99991387902701e-06, "loss": 1.2592, "step": 878 }, { "epoch": 0.03185821463520713, "grad_norm": 2.692588044210699, "learning_rate": 9.999910399750092e-06, "loss": 1.2696, "step": 879 }, { "epoch": 0.03189445833786379, "grad_norm": 2.8062782347787016, "learning_rate": 9.999906851577434e-06, "loss": 1.0016, "step": 880 }, { "epoch": 0.03193070204052046, "grad_norm": 2.50715610545553, "learning_rate": 9.999903234509081e-06, "loss": 0.9397, "step": 881 }, { "epoch": 0.03196694574317712, "grad_norm": 2.2859343661392666, "learning_rate": 9.999899548545087e-06, "loss": 1.1685, "step": 882 }, { "epoch": 0.03200318944583379, "grad_norm": 2.6697503982385493, "learning_rate": 9.999895793685501e-06, "loss": 1.0061, "step": 883 }, { "epoch": 0.03203943314849045, "grad_norm": 2.6687576126402113, "learning_rate": 9.999891969930375e-06, "loss": 1.1373, "step": 884 }, { "epoch": 0.032075676851147115, "grad_norm": 2.7456380223283725, "learning_rate": 9.999888077279762e-06, "loss": 1.1426, "step": 885 }, { "epoch": 0.032111920553803776, "grad_norm": 2.856987965881936, "learning_rate": 9.999884115733715e-06, "loss": 1.1366, "step": 886 }, { "epoch": 0.03214816425646044, "grad_norm": 2.7612501986200955, "learning_rate": 9.999880085292287e-06, "loss": 1.1436, "step": 887 }, { "epoch": 0.0321844079591171, "grad_norm": 2.807896133095337, "learning_rate": 9.999875985955536e-06, "loss": 1.054, "step": 888 }, { "epoch": 0.032220651661773764, "grad_norm": 2.8517811615473985, "learning_rate": 9.999871817723518e-06, "loss": 1.1921, "step": 889 }, { "epoch": 0.03225689536443043, "grad_norm": 2.5887039184667167, "learning_rate": 9.999867580596291e-06, "loss": 1.3497, "step": 890 }, { "epoch": 0.03229313906708709, "grad_norm": 2.78294885036165, "learning_rate": 9.999863274573912e-06, "loss": 0.9953, "step": 891 }, { "epoch": 0.03232938276974376, "grad_norm": 2.5910486792539307, "learning_rate": 9.99985889965644e-06, "loss": 0.9238, "step": 892 }, { "epoch": 0.03236562647240042, "grad_norm": 2.8311932616488322, "learning_rate": 9.999854455843938e-06, "loss": 1.1456, "step": 893 }, { "epoch": 0.032401870175057086, "grad_norm": 2.731713001672819, "learning_rate": 9.999849943136463e-06, "loss": 1.2262, "step": 894 }, { "epoch": 0.032438113877713746, "grad_norm": 2.974510647950106, "learning_rate": 9.999845361534082e-06, "loss": 1.2257, "step": 895 }, { "epoch": 0.032474357580370414, "grad_norm": 2.7875438215320587, "learning_rate": 9.999840711036854e-06, "loss": 0.9986, "step": 896 }, { "epoch": 0.032510601283027074, "grad_norm": 2.6692997045758124, "learning_rate": 9.999835991644846e-06, "loss": 0.9917, "step": 897 }, { "epoch": 0.032546844985683734, "grad_norm": 2.4593378964392403, "learning_rate": 9.999831203358121e-06, "loss": 1.1944, "step": 898 }, { "epoch": 0.0325830886883404, "grad_norm": 2.8699579352409685, "learning_rate": 9.999826346176746e-06, "loss": 1.029, "step": 899 }, { "epoch": 0.03261933239099706, "grad_norm": 2.742743318385643, "learning_rate": 9.999821420100787e-06, "loss": 1.2504, "step": 900 }, { "epoch": 0.03265557609365373, "grad_norm": 2.9425357650328636, "learning_rate": 9.999816425130316e-06, "loss": 1.1087, "step": 901 }, { "epoch": 0.03269181979631039, "grad_norm": 2.389623037227758, "learning_rate": 9.999811361265395e-06, "loss": 1.3491, "step": 902 }, { "epoch": 0.03272806349896706, "grad_norm": 2.7311352430888047, "learning_rate": 9.9998062285061e-06, "loss": 1.2015, "step": 903 }, { "epoch": 0.03276430720162372, "grad_norm": 2.552867398893463, "learning_rate": 9.999801026852496e-06, "loss": 1.1127, "step": 904 }, { "epoch": 0.032800550904280384, "grad_norm": 2.795970586096162, "learning_rate": 9.999795756304658e-06, "loss": 1.0631, "step": 905 }, { "epoch": 0.032836794606937045, "grad_norm": 3.2042935559722667, "learning_rate": 9.99979041686266e-06, "loss": 1.0647, "step": 906 }, { "epoch": 0.032873038309593705, "grad_norm": 2.8360452939006073, "learning_rate": 9.999785008526572e-06, "loss": 0.8883, "step": 907 }, { "epoch": 0.03290928201225037, "grad_norm": 2.732245664420895, "learning_rate": 9.999779531296472e-06, "loss": 0.8456, "step": 908 }, { "epoch": 0.03294552571490703, "grad_norm": 2.738209492579012, "learning_rate": 9.999773985172433e-06, "loss": 0.9795, "step": 909 }, { "epoch": 0.0329817694175637, "grad_norm": 2.6579856776430506, "learning_rate": 9.999768370154532e-06, "loss": 1.1005, "step": 910 }, { "epoch": 0.03301801312022036, "grad_norm": 2.5673317216085283, "learning_rate": 9.999762686242846e-06, "loss": 1.1547, "step": 911 }, { "epoch": 0.03305425682287703, "grad_norm": 3.10935164233027, "learning_rate": 9.999756933437457e-06, "loss": 1.1297, "step": 912 }, { "epoch": 0.03309050052553369, "grad_norm": 3.0831603015312408, "learning_rate": 9.999751111738438e-06, "loss": 1.1867, "step": 913 }, { "epoch": 0.033126744228190355, "grad_norm": 2.6828604875036572, "learning_rate": 9.999745221145874e-06, "loss": 1.3136, "step": 914 }, { "epoch": 0.033162987930847015, "grad_norm": 2.6018843478319957, "learning_rate": 9.999739261659845e-06, "loss": 1.0685, "step": 915 }, { "epoch": 0.033199231633503676, "grad_norm": 2.4298200257024076, "learning_rate": 9.999733233280431e-06, "loss": 0.8419, "step": 916 }, { "epoch": 0.03323547533616034, "grad_norm": 2.5906414878003967, "learning_rate": 9.999727136007717e-06, "loss": 0.961, "step": 917 }, { "epoch": 0.033271719038817, "grad_norm": 2.753130974258396, "learning_rate": 9.999720969841788e-06, "loss": 1.2423, "step": 918 }, { "epoch": 0.03330796274147367, "grad_norm": 2.8749867415896597, "learning_rate": 9.999714734782727e-06, "loss": 1.1219, "step": 919 }, { "epoch": 0.03334420644413033, "grad_norm": 2.7655299334882226, "learning_rate": 9.999708430830622e-06, "loss": 1.2137, "step": 920 }, { "epoch": 0.033380450146787, "grad_norm": 3.3485546226980034, "learning_rate": 9.999702057985557e-06, "loss": 1.0296, "step": 921 }, { "epoch": 0.03341669384944366, "grad_norm": 2.879615244388083, "learning_rate": 9.999695616247621e-06, "loss": 1.1865, "step": 922 }, { "epoch": 0.033452937552100326, "grad_norm": 2.5988476951257504, "learning_rate": 9.999689105616902e-06, "loss": 1.0905, "step": 923 }, { "epoch": 0.033489181254756986, "grad_norm": 2.7518245782449777, "learning_rate": 9.999682526093494e-06, "loss": 1.1016, "step": 924 }, { "epoch": 0.033525424957413646, "grad_norm": 2.1272874276719285, "learning_rate": 9.999675877677483e-06, "loss": 1.2052, "step": 925 }, { "epoch": 0.033561668660070314, "grad_norm": 2.649782384603516, "learning_rate": 9.999669160368963e-06, "loss": 1.1503, "step": 926 }, { "epoch": 0.033597912362726974, "grad_norm": 2.4533035776203764, "learning_rate": 9.999662374168025e-06, "loss": 1.051, "step": 927 }, { "epoch": 0.03363415606538364, "grad_norm": 3.4666039331856973, "learning_rate": 9.999655519074765e-06, "loss": 1.0927, "step": 928 }, { "epoch": 0.0336703997680403, "grad_norm": 2.7689097457528145, "learning_rate": 9.999648595089273e-06, "loss": 1.136, "step": 929 }, { "epoch": 0.03370664347069697, "grad_norm": 2.726975878929337, "learning_rate": 9.999641602211647e-06, "loss": 1.1048, "step": 930 }, { "epoch": 0.03374288717335363, "grad_norm": 2.5035999373222424, "learning_rate": 9.999634540441985e-06, "loss": 1.1711, "step": 931 }, { "epoch": 0.033779130876010297, "grad_norm": 2.8315745697162633, "learning_rate": 9.999627409780381e-06, "loss": 1.3591, "step": 932 }, { "epoch": 0.03381537457866696, "grad_norm": 2.545779889103352, "learning_rate": 9.999620210226936e-06, "loss": 0.9779, "step": 933 }, { "epoch": 0.03385161828132362, "grad_norm": 2.883042717399389, "learning_rate": 9.999612941781749e-06, "loss": 1.084, "step": 934 }, { "epoch": 0.033887861983980284, "grad_norm": 2.197877291343207, "learning_rate": 9.99960560444492e-06, "loss": 0.9693, "step": 935 }, { "epoch": 0.033924105686636945, "grad_norm": 2.971246813929601, "learning_rate": 9.999598198216546e-06, "loss": 1.0108, "step": 936 }, { "epoch": 0.03396034938929361, "grad_norm": 2.8131066831206084, "learning_rate": 9.999590723096736e-06, "loss": 1.2988, "step": 937 }, { "epoch": 0.03399659309195027, "grad_norm": 2.4437001230404682, "learning_rate": 9.999583179085588e-06, "loss": 1.0065, "step": 938 }, { "epoch": 0.03403283679460694, "grad_norm": 2.7756327082395575, "learning_rate": 9.999575566183207e-06, "loss": 1.0664, "step": 939 }, { "epoch": 0.0340690804972636, "grad_norm": 2.774442896052737, "learning_rate": 9.999567884389699e-06, "loss": 1.1358, "step": 940 }, { "epoch": 0.03410532419992027, "grad_norm": 2.786911782398373, "learning_rate": 9.999560133705168e-06, "loss": 1.0449, "step": 941 }, { "epoch": 0.03414156790257693, "grad_norm": 2.832055088979381, "learning_rate": 9.999552314129724e-06, "loss": 1.0249, "step": 942 }, { "epoch": 0.03417781160523359, "grad_norm": 2.9306217521512634, "learning_rate": 9.999544425663472e-06, "loss": 1.0242, "step": 943 }, { "epoch": 0.034214055307890255, "grad_norm": 3.0363182544711753, "learning_rate": 9.99953646830652e-06, "loss": 1.1281, "step": 944 }, { "epoch": 0.034250299010546915, "grad_norm": 2.742794055935269, "learning_rate": 9.999528442058982e-06, "loss": 1.0842, "step": 945 }, { "epoch": 0.03428654271320358, "grad_norm": 2.8551682986736022, "learning_rate": 9.999520346920962e-06, "loss": 1.2339, "step": 946 }, { "epoch": 0.03432278641586024, "grad_norm": 2.829710215592776, "learning_rate": 9.999512182892576e-06, "loss": 1.0354, "step": 947 }, { "epoch": 0.03435903011851691, "grad_norm": 2.7551677732369586, "learning_rate": 9.999503949973938e-06, "loss": 1.0524, "step": 948 }, { "epoch": 0.03439527382117357, "grad_norm": 2.5626485174041136, "learning_rate": 9.999495648165159e-06, "loss": 1.0418, "step": 949 }, { "epoch": 0.03443151752383024, "grad_norm": 2.7386868584065565, "learning_rate": 9.999487277466353e-06, "loss": 1.1004, "step": 950 }, { "epoch": 0.0344677612264869, "grad_norm": 2.648462853769055, "learning_rate": 9.999478837877634e-06, "loss": 1.0316, "step": 951 }, { "epoch": 0.03450400492914356, "grad_norm": 2.646565215259272, "learning_rate": 9.999470329399122e-06, "loss": 1.1414, "step": 952 }, { "epoch": 0.034540248631800226, "grad_norm": 2.8777115101673707, "learning_rate": 9.999461752030933e-06, "loss": 1.052, "step": 953 }, { "epoch": 0.034576492334456886, "grad_norm": 2.7430177860998337, "learning_rate": 9.999453105773183e-06, "loss": 1.1526, "step": 954 }, { "epoch": 0.03461273603711355, "grad_norm": 2.810187824072744, "learning_rate": 9.999444390625994e-06, "loss": 1.0395, "step": 955 }, { "epoch": 0.034648979739770214, "grad_norm": 2.7638407527998257, "learning_rate": 9.999435606589484e-06, "loss": 1.0269, "step": 956 }, { "epoch": 0.03468522344242688, "grad_norm": 3.0674799813788445, "learning_rate": 9.999426753663776e-06, "loss": 0.9471, "step": 957 }, { "epoch": 0.03472146714508354, "grad_norm": 2.8885997514224417, "learning_rate": 9.99941783184899e-06, "loss": 0.9883, "step": 958 }, { "epoch": 0.03475771084774021, "grad_norm": 2.3598406375764776, "learning_rate": 9.99940884114525e-06, "loss": 1.0448, "step": 959 }, { "epoch": 0.03479395455039687, "grad_norm": 2.4843641242976235, "learning_rate": 9.999399781552679e-06, "loss": 1.0194, "step": 960 }, { "epoch": 0.03483019825305353, "grad_norm": 2.6350708052543967, "learning_rate": 9.999390653071403e-06, "loss": 1.1175, "step": 961 }, { "epoch": 0.034866441955710196, "grad_norm": 3.055494039917762, "learning_rate": 9.99938145570155e-06, "loss": 1.3006, "step": 962 }, { "epoch": 0.03490268565836686, "grad_norm": 2.935431125592017, "learning_rate": 9.99937218944324e-06, "loss": 1.1861, "step": 963 }, { "epoch": 0.034938929361023524, "grad_norm": 3.0478192293913957, "learning_rate": 9.999362854296606e-06, "loss": 1.138, "step": 964 }, { "epoch": 0.034975173063680184, "grad_norm": 2.7553567274975976, "learning_rate": 9.999353450261777e-06, "loss": 1.1176, "step": 965 }, { "epoch": 0.03501141676633685, "grad_norm": 2.5669599661508724, "learning_rate": 9.99934397733888e-06, "loss": 1.104, "step": 966 }, { "epoch": 0.03504766046899351, "grad_norm": 2.8356113638716582, "learning_rate": 9.999334435528045e-06, "loss": 0.9096, "step": 967 }, { "epoch": 0.03508390417165017, "grad_norm": 2.9005543901033155, "learning_rate": 9.999324824829407e-06, "loss": 1.2155, "step": 968 }, { "epoch": 0.03512014787430684, "grad_norm": 2.9583419709822114, "learning_rate": 9.999315145243096e-06, "loss": 1.0943, "step": 969 }, { "epoch": 0.0351563915769635, "grad_norm": 2.6860522238127604, "learning_rate": 9.999305396769246e-06, "loss": 1.2467, "step": 970 }, { "epoch": 0.03519263527962017, "grad_norm": 3.1081019497086664, "learning_rate": 9.99929557940799e-06, "loss": 1.0151, "step": 971 }, { "epoch": 0.03522887898227683, "grad_norm": 2.9288934963715922, "learning_rate": 9.999285693159465e-06, "loss": 1.1815, "step": 972 }, { "epoch": 0.035265122684933495, "grad_norm": 3.0172575663942025, "learning_rate": 9.999275738023806e-06, "loss": 1.1148, "step": 973 }, { "epoch": 0.035301366387590155, "grad_norm": 2.816681851887189, "learning_rate": 9.999265714001151e-06, "loss": 1.1584, "step": 974 }, { "epoch": 0.03533761009024682, "grad_norm": 2.5114601764960423, "learning_rate": 9.99925562109164e-06, "loss": 1.0926, "step": 975 }, { "epoch": 0.03537385379290348, "grad_norm": 2.7519511576196263, "learning_rate": 9.999245459295408e-06, "loss": 1.1159, "step": 976 }, { "epoch": 0.03541009749556014, "grad_norm": 2.7923480358586636, "learning_rate": 9.999235228612596e-06, "loss": 1.1949, "step": 977 }, { "epoch": 0.03544634119821681, "grad_norm": 2.8396537531269472, "learning_rate": 9.999224929043348e-06, "loss": 1.1241, "step": 978 }, { "epoch": 0.03548258490087347, "grad_norm": 2.6392036958038685, "learning_rate": 9.999214560587805e-06, "loss": 1.2318, "step": 979 }, { "epoch": 0.03551882860353014, "grad_norm": 2.6553447634353695, "learning_rate": 9.999204123246106e-06, "loss": 0.9385, "step": 980 }, { "epoch": 0.0355550723061868, "grad_norm": 2.490564869113836, "learning_rate": 9.9991936170184e-06, "loss": 0.9217, "step": 981 }, { "epoch": 0.035591316008843465, "grad_norm": 2.9605205867302393, "learning_rate": 9.999183041904827e-06, "loss": 1.1521, "step": 982 }, { "epoch": 0.035627559711500126, "grad_norm": 2.798866100246211, "learning_rate": 9.999172397905535e-06, "loss": 1.1419, "step": 983 }, { "epoch": 0.03566380341415679, "grad_norm": 2.861993130990501, "learning_rate": 9.999161685020673e-06, "loss": 1.1269, "step": 984 }, { "epoch": 0.03570004711681345, "grad_norm": 2.763142562423783, "learning_rate": 9.999150903250386e-06, "loss": 1.1589, "step": 985 }, { "epoch": 0.035736290819470114, "grad_norm": 2.5529475441542515, "learning_rate": 9.99914005259482e-06, "loss": 1.2202, "step": 986 }, { "epoch": 0.03577253452212678, "grad_norm": 2.4710654426223564, "learning_rate": 9.99912913305413e-06, "loss": 1.2002, "step": 987 }, { "epoch": 0.03580877822478344, "grad_norm": 2.620057992823482, "learning_rate": 9.999118144628463e-06, "loss": 1.1619, "step": 988 }, { "epoch": 0.03584502192744011, "grad_norm": 2.4589627312611824, "learning_rate": 9.999107087317973e-06, "loss": 1.2946, "step": 989 }, { "epoch": 0.03588126563009677, "grad_norm": 2.4770602944945233, "learning_rate": 9.999095961122809e-06, "loss": 0.9144, "step": 990 }, { "epoch": 0.035917509332753436, "grad_norm": 2.867695356736691, "learning_rate": 9.999084766043128e-06, "loss": 1.2716, "step": 991 }, { "epoch": 0.035953753035410096, "grad_norm": 2.4763914442148147, "learning_rate": 9.99907350207908e-06, "loss": 1.0626, "step": 992 }, { "epoch": 0.035989996738066764, "grad_norm": 2.6372396070097777, "learning_rate": 9.999062169230823e-06, "loss": 0.9827, "step": 993 }, { "epoch": 0.036026240440723424, "grad_norm": 2.903792424359952, "learning_rate": 9.999050767498512e-06, "loss": 1.0372, "step": 994 }, { "epoch": 0.036062484143380084, "grad_norm": 2.7553455544258783, "learning_rate": 9.999039296882307e-06, "loss": 1.0359, "step": 995 }, { "epoch": 0.03609872784603675, "grad_norm": 2.7553453600618116, "learning_rate": 9.999027757382363e-06, "loss": 1.0321, "step": 996 }, { "epoch": 0.03613497154869341, "grad_norm": 2.8828201553258745, "learning_rate": 9.999016148998839e-06, "loss": 1.0799, "step": 997 }, { "epoch": 0.03617121525135008, "grad_norm": 3.028270699133965, "learning_rate": 9.999004471731898e-06, "loss": 1.1206, "step": 998 }, { "epoch": 0.03620745895400674, "grad_norm": 2.54556351316014, "learning_rate": 9.998992725581696e-06, "loss": 0.8544, "step": 999 }, { "epoch": 0.03624370265666341, "grad_norm": 2.9306388650788104, "learning_rate": 9.998980910548398e-06, "loss": 1.1111, "step": 1000 }, { "epoch": 0.03627994635932007, "grad_norm": 2.6074601721012716, "learning_rate": 9.998969026632167e-06, "loss": 1.1973, "step": 1001 }, { "epoch": 0.036316190061976734, "grad_norm": 2.464045464699788, "learning_rate": 9.998957073833166e-06, "loss": 1.0706, "step": 1002 }, { "epoch": 0.036352433764633395, "grad_norm": 2.6755541305950685, "learning_rate": 9.99894505215156e-06, "loss": 0.9128, "step": 1003 }, { "epoch": 0.036388677467290055, "grad_norm": 2.5840624581221823, "learning_rate": 9.998932961587514e-06, "loss": 1.0845, "step": 1004 }, { "epoch": 0.03642492116994672, "grad_norm": 2.6587328224791507, "learning_rate": 9.998920802141194e-06, "loss": 1.1371, "step": 1005 }, { "epoch": 0.03646116487260338, "grad_norm": 2.941979944928, "learning_rate": 9.99890857381277e-06, "loss": 1.1058, "step": 1006 }, { "epoch": 0.03649740857526005, "grad_norm": 2.9967664083239667, "learning_rate": 9.998896276602408e-06, "loss": 1.0174, "step": 1007 }, { "epoch": 0.03653365227791671, "grad_norm": 2.6100401822006583, "learning_rate": 9.99888391051028e-06, "loss": 0.8985, "step": 1008 }, { "epoch": 0.03656989598057338, "grad_norm": 2.5835449275907383, "learning_rate": 9.998871475536554e-06, "loss": 0.9819, "step": 1009 }, { "epoch": 0.03660613968323004, "grad_norm": 2.3991899140301824, "learning_rate": 9.998858971681403e-06, "loss": 1.0098, "step": 1010 }, { "epoch": 0.036642383385886705, "grad_norm": 2.4928921048490778, "learning_rate": 9.998846398944997e-06, "loss": 1.0852, "step": 1011 }, { "epoch": 0.036678627088543365, "grad_norm": 2.4826641135574747, "learning_rate": 9.998833757327513e-06, "loss": 0.8596, "step": 1012 }, { "epoch": 0.036714870791200026, "grad_norm": 2.8150157269425735, "learning_rate": 9.998821046829122e-06, "loss": 1.1315, "step": 1013 }, { "epoch": 0.03675111449385669, "grad_norm": 2.587845538619781, "learning_rate": 9.99880826745e-06, "loss": 0.98, "step": 1014 }, { "epoch": 0.03678735819651335, "grad_norm": 2.891882196744645, "learning_rate": 9.998795419190324e-06, "loss": 1.1152, "step": 1015 }, { "epoch": 0.03682360189917002, "grad_norm": 2.4704068064731777, "learning_rate": 9.998782502050269e-06, "loss": 1.0654, "step": 1016 }, { "epoch": 0.03685984560182668, "grad_norm": 2.4515293992643903, "learning_rate": 9.998769516030015e-06, "loss": 0.9387, "step": 1017 }, { "epoch": 0.03689608930448335, "grad_norm": 2.623132939227701, "learning_rate": 9.99875646112974e-06, "loss": 0.9693, "step": 1018 }, { "epoch": 0.03693233300714001, "grad_norm": 2.7984514754605097, "learning_rate": 9.998743337349625e-06, "loss": 0.9298, "step": 1019 }, { "epoch": 0.036968576709796676, "grad_norm": 2.66693798407101, "learning_rate": 9.99873014468985e-06, "loss": 0.9747, "step": 1020 }, { "epoch": 0.037004820412453336, "grad_norm": 2.6858905358362697, "learning_rate": 9.998716883150595e-06, "loss": 1.1598, "step": 1021 }, { "epoch": 0.037041064115109996, "grad_norm": 2.5901830763126, "learning_rate": 9.998703552732046e-06, "loss": 1.1048, "step": 1022 }, { "epoch": 0.037077307817766664, "grad_norm": 2.647365305718622, "learning_rate": 9.998690153434385e-06, "loss": 0.9852, "step": 1023 }, { "epoch": 0.037113551520423324, "grad_norm": 2.609701391927192, "learning_rate": 9.998676685257795e-06, "loss": 0.9185, "step": 1024 }, { "epoch": 0.03714979522307999, "grad_norm": 2.388630076255003, "learning_rate": 9.998663148202465e-06, "loss": 1.0551, "step": 1025 }, { "epoch": 0.03718603892573665, "grad_norm": 2.914811827385374, "learning_rate": 9.998649542268582e-06, "loss": 1.1317, "step": 1026 }, { "epoch": 0.03722228262839332, "grad_norm": 2.7379878834248763, "learning_rate": 9.998635867456328e-06, "loss": 1.2464, "step": 1027 }, { "epoch": 0.03725852633104998, "grad_norm": 2.344169587196137, "learning_rate": 9.998622123765896e-06, "loss": 1.0076, "step": 1028 }, { "epoch": 0.037294770033706647, "grad_norm": 3.5524582937354294, "learning_rate": 9.998608311197473e-06, "loss": 1.0612, "step": 1029 }, { "epoch": 0.03733101373636331, "grad_norm": 2.82821096296721, "learning_rate": 9.998594429751254e-06, "loss": 1.144, "step": 1030 }, { "epoch": 0.03736725743901997, "grad_norm": 2.455605750076844, "learning_rate": 9.998580479427424e-06, "loss": 1.3177, "step": 1031 }, { "epoch": 0.037403501141676634, "grad_norm": 2.485924429716126, "learning_rate": 9.998566460226178e-06, "loss": 1.2282, "step": 1032 }, { "epoch": 0.037439744844333295, "grad_norm": 2.944204338881822, "learning_rate": 9.99855237214771e-06, "loss": 1.2137, "step": 1033 }, { "epoch": 0.03747598854698996, "grad_norm": 2.508609262705804, "learning_rate": 9.998538215192213e-06, "loss": 0.9481, "step": 1034 }, { "epoch": 0.03751223224964662, "grad_norm": 2.7444820086890034, "learning_rate": 9.998523989359884e-06, "loss": 1.1325, "step": 1035 }, { "epoch": 0.03754847595230329, "grad_norm": 2.6523860786073024, "learning_rate": 9.998509694650916e-06, "loss": 1.0714, "step": 1036 }, { "epoch": 0.03758471965495995, "grad_norm": 2.6648418765079147, "learning_rate": 9.998495331065505e-06, "loss": 1.0895, "step": 1037 }, { "epoch": 0.03762096335761662, "grad_norm": 2.8231143454248144, "learning_rate": 9.998480898603855e-06, "loss": 1.0853, "step": 1038 }, { "epoch": 0.03765720706027328, "grad_norm": 2.8286877317545214, "learning_rate": 9.99846639726616e-06, "loss": 0.9836, "step": 1039 }, { "epoch": 0.03769345076292994, "grad_norm": 2.6704212415179183, "learning_rate": 9.998451827052621e-06, "loss": 1.1867, "step": 1040 }, { "epoch": 0.037729694465586605, "grad_norm": 2.7586477415421853, "learning_rate": 9.998437187963438e-06, "loss": 1.0778, "step": 1041 }, { "epoch": 0.037765938168243265, "grad_norm": 3.026599315174615, "learning_rate": 9.998422479998815e-06, "loss": 0.9293, "step": 1042 }, { "epoch": 0.03780218187089993, "grad_norm": 2.960973903388098, "learning_rate": 9.998407703158953e-06, "loss": 1.0498, "step": 1043 }, { "epoch": 0.03783842557355659, "grad_norm": 2.479802120603929, "learning_rate": 9.998392857444055e-06, "loss": 0.994, "step": 1044 }, { "epoch": 0.03787466927621326, "grad_norm": 2.6229154122137817, "learning_rate": 9.998377942854326e-06, "loss": 1.2342, "step": 1045 }, { "epoch": 0.03791091297886992, "grad_norm": 2.5170935607575706, "learning_rate": 9.998362959389972e-06, "loss": 0.9376, "step": 1046 }, { "epoch": 0.03794715668152659, "grad_norm": 2.495085134341844, "learning_rate": 9.9983479070512e-06, "loss": 1.026, "step": 1047 }, { "epoch": 0.03798340038418325, "grad_norm": 2.609782708585375, "learning_rate": 9.998332785838216e-06, "loss": 0.9568, "step": 1048 }, { "epoch": 0.03801964408683991, "grad_norm": 2.6436210757836722, "learning_rate": 9.99831759575123e-06, "loss": 1.1529, "step": 1049 }, { "epoch": 0.038055887789496576, "grad_norm": 2.8005967992551857, "learning_rate": 9.998302336790452e-06, "loss": 0.9207, "step": 1050 }, { "epoch": 0.038092131492153236, "grad_norm": 2.6378326703797192, "learning_rate": 9.998287008956088e-06, "loss": 1.1284, "step": 1051 }, { "epoch": 0.0381283751948099, "grad_norm": 2.897323386155841, "learning_rate": 9.998271612248351e-06, "loss": 1.1087, "step": 1052 }, { "epoch": 0.038164618897466564, "grad_norm": 3.0543261208554493, "learning_rate": 9.998256146667456e-06, "loss": 1.2262, "step": 1053 }, { "epoch": 0.03820086260012323, "grad_norm": 2.6101884930625934, "learning_rate": 9.998240612213615e-06, "loss": 1.327, "step": 1054 }, { "epoch": 0.03823710630277989, "grad_norm": 2.68227516563754, "learning_rate": 9.99822500888704e-06, "loss": 0.9096, "step": 1055 }, { "epoch": 0.03827335000543656, "grad_norm": 2.646523042544066, "learning_rate": 9.998209336687948e-06, "loss": 1.1219, "step": 1056 }, { "epoch": 0.03830959370809322, "grad_norm": 2.561755406340083, "learning_rate": 9.998193595616553e-06, "loss": 1.0454, "step": 1057 }, { "epoch": 0.03834583741074988, "grad_norm": 2.6656723200177215, "learning_rate": 9.998177785673071e-06, "loss": 0.8539, "step": 1058 }, { "epoch": 0.038382081113406546, "grad_norm": 2.667004181357139, "learning_rate": 9.998161906857726e-06, "loss": 1.1509, "step": 1059 }, { "epoch": 0.03841832481606321, "grad_norm": 2.9668504203922668, "learning_rate": 9.998145959170731e-06, "loss": 0.9645, "step": 1060 }, { "epoch": 0.038454568518719874, "grad_norm": 2.42108839831641, "learning_rate": 9.998129942612307e-06, "loss": 1.0439, "step": 1061 }, { "epoch": 0.038490812221376534, "grad_norm": 2.569584654232479, "learning_rate": 9.998113857182676e-06, "loss": 0.9386, "step": 1062 }, { "epoch": 0.0385270559240332, "grad_norm": 2.9299222132310345, "learning_rate": 9.99809770288206e-06, "loss": 1.1341, "step": 1063 }, { "epoch": 0.03856329962668986, "grad_norm": 2.89000597912012, "learning_rate": 9.998081479710677e-06, "loss": 1.0717, "step": 1064 }, { "epoch": 0.03859954332934653, "grad_norm": 2.5236899706265823, "learning_rate": 9.998065187668756e-06, "loss": 1.0176, "step": 1065 }, { "epoch": 0.03863578703200319, "grad_norm": 2.377980020788475, "learning_rate": 9.99804882675652e-06, "loss": 1.0087, "step": 1066 }, { "epoch": 0.03867203073465985, "grad_norm": 3.3058284708873305, "learning_rate": 9.998032396974194e-06, "loss": 1.1565, "step": 1067 }, { "epoch": 0.03870827443731652, "grad_norm": 2.7639073243021985, "learning_rate": 9.998015898322003e-06, "loss": 1.1857, "step": 1068 }, { "epoch": 0.03874451813997318, "grad_norm": 2.7790538959714888, "learning_rate": 9.997999330800175e-06, "loss": 0.9203, "step": 1069 }, { "epoch": 0.038780761842629845, "grad_norm": 2.7388452289288523, "learning_rate": 9.99798269440894e-06, "loss": 0.9728, "step": 1070 }, { "epoch": 0.038817005545286505, "grad_norm": 2.688277754478218, "learning_rate": 9.997965989148526e-06, "loss": 1.0939, "step": 1071 }, { "epoch": 0.03885324924794317, "grad_norm": 2.692777274047724, "learning_rate": 9.997949215019165e-06, "loss": 0.9707, "step": 1072 }, { "epoch": 0.03888949295059983, "grad_norm": 2.9069046587665426, "learning_rate": 9.997932372021084e-06, "loss": 1.012, "step": 1073 }, { "epoch": 0.0389257366532565, "grad_norm": 2.577297806729221, "learning_rate": 9.997915460154518e-06, "loss": 1.08, "step": 1074 }, { "epoch": 0.03896198035591316, "grad_norm": 2.652225562000232, "learning_rate": 9.9978984794197e-06, "loss": 1.0136, "step": 1075 }, { "epoch": 0.03899822405856982, "grad_norm": 2.4939921942065553, "learning_rate": 9.997881429816865e-06, "loss": 1.0561, "step": 1076 }, { "epoch": 0.03903446776122649, "grad_norm": 2.7668393649004277, "learning_rate": 9.997864311346244e-06, "loss": 0.95, "step": 1077 }, { "epoch": 0.03907071146388315, "grad_norm": 2.7718591777177664, "learning_rate": 9.997847124008078e-06, "loss": 1.0991, "step": 1078 }, { "epoch": 0.039106955166539815, "grad_norm": 2.621980041579272, "learning_rate": 9.9978298678026e-06, "loss": 1.1412, "step": 1079 }, { "epoch": 0.039143198869196476, "grad_norm": 2.716207665733985, "learning_rate": 9.997812542730048e-06, "loss": 1.058, "step": 1080 }, { "epoch": 0.03917944257185314, "grad_norm": 2.806318793648469, "learning_rate": 9.997795148790664e-06, "loss": 1.0344, "step": 1081 }, { "epoch": 0.0392156862745098, "grad_norm": 2.7049625212454687, "learning_rate": 9.997777685984684e-06, "loss": 1.064, "step": 1082 }, { "epoch": 0.03925192997716647, "grad_norm": 2.5012952280241048, "learning_rate": 9.997760154312349e-06, "loss": 1.1558, "step": 1083 }, { "epoch": 0.03928817367982313, "grad_norm": 2.571951251336671, "learning_rate": 9.997742553773904e-06, "loss": 1.2213, "step": 1084 }, { "epoch": 0.03932441738247979, "grad_norm": 2.5727806033643503, "learning_rate": 9.997724884369587e-06, "loss": 1.1479, "step": 1085 }, { "epoch": 0.03936066108513646, "grad_norm": 2.6436399564651594, "learning_rate": 9.997707146099644e-06, "loss": 1.1832, "step": 1086 }, { "epoch": 0.03939690478779312, "grad_norm": 2.5372479476464265, "learning_rate": 9.99768933896432e-06, "loss": 1.2213, "step": 1087 }, { "epoch": 0.039433148490449786, "grad_norm": 3.2779726506639504, "learning_rate": 9.997671462963858e-06, "loss": 1.0501, "step": 1088 }, { "epoch": 0.039469392193106446, "grad_norm": 2.657633556121864, "learning_rate": 9.997653518098508e-06, "loss": 1.0684, "step": 1089 }, { "epoch": 0.039505635895763114, "grad_norm": 2.809196979977875, "learning_rate": 9.997635504368514e-06, "loss": 1.143, "step": 1090 }, { "epoch": 0.039541879598419774, "grad_norm": 2.390596033115433, "learning_rate": 9.997617421774124e-06, "loss": 0.9509, "step": 1091 }, { "epoch": 0.03957812330107644, "grad_norm": 2.370126656228859, "learning_rate": 9.997599270315589e-06, "loss": 1.0697, "step": 1092 }, { "epoch": 0.0396143670037331, "grad_norm": 2.8504769928080975, "learning_rate": 9.997581049993159e-06, "loss": 1.0258, "step": 1093 }, { "epoch": 0.03965061070638976, "grad_norm": 2.712249713502716, "learning_rate": 9.997562760807085e-06, "loss": 1.3034, "step": 1094 }, { "epoch": 0.03968685440904643, "grad_norm": 2.94212859938791, "learning_rate": 9.997544402757618e-06, "loss": 0.9512, "step": 1095 }, { "epoch": 0.03972309811170309, "grad_norm": 2.5586455092094647, "learning_rate": 9.997525975845012e-06, "loss": 1.1568, "step": 1096 }, { "epoch": 0.03975934181435976, "grad_norm": 2.286795011328722, "learning_rate": 9.997507480069519e-06, "loss": 1.002, "step": 1097 }, { "epoch": 0.03979558551701642, "grad_norm": 2.7218032778127697, "learning_rate": 9.997488915431397e-06, "loss": 1.2443, "step": 1098 }, { "epoch": 0.039831829219673084, "grad_norm": 2.5525317453265646, "learning_rate": 9.997470281930898e-06, "loss": 0.983, "step": 1099 }, { "epoch": 0.039868072922329745, "grad_norm": 2.6024188379912268, "learning_rate": 9.997451579568284e-06, "loss": 1.0727, "step": 1100 }, { "epoch": 0.03990431662498641, "grad_norm": 2.6556839167121757, "learning_rate": 9.997432808343807e-06, "loss": 0.9799, "step": 1101 }, { "epoch": 0.03994056032764307, "grad_norm": 2.5816584451640874, "learning_rate": 9.99741396825773e-06, "loss": 1.2385, "step": 1102 }, { "epoch": 0.03997680403029973, "grad_norm": 3.0431629555984934, "learning_rate": 9.997395059310309e-06, "loss": 1.1891, "step": 1103 }, { "epoch": 0.0400130477329564, "grad_norm": 2.597163430707502, "learning_rate": 9.997376081501808e-06, "loss": 1.0398, "step": 1104 }, { "epoch": 0.04004929143561306, "grad_norm": 3.0353762363287498, "learning_rate": 9.997357034832486e-06, "loss": 0.8246, "step": 1105 }, { "epoch": 0.04008553513826973, "grad_norm": 2.620611845739713, "learning_rate": 9.997337919302608e-06, "loss": 1.0622, "step": 1106 }, { "epoch": 0.04012177884092639, "grad_norm": 2.817294161322096, "learning_rate": 9.997318734912434e-06, "loss": 1.0299, "step": 1107 }, { "epoch": 0.040158022543583055, "grad_norm": 2.7234050981728077, "learning_rate": 9.99729948166223e-06, "loss": 1.0091, "step": 1108 }, { "epoch": 0.040194266246239715, "grad_norm": 2.46976063763992, "learning_rate": 9.997280159552263e-06, "loss": 1.2344, "step": 1109 }, { "epoch": 0.04023050994889638, "grad_norm": 3.2546006558367666, "learning_rate": 9.997260768582796e-06, "loss": 1.0793, "step": 1110 }, { "epoch": 0.04026675365155304, "grad_norm": 2.6968681934449656, "learning_rate": 9.997241308754099e-06, "loss": 1.2181, "step": 1111 }, { "epoch": 0.0403029973542097, "grad_norm": 2.3130846573082473, "learning_rate": 9.997221780066437e-06, "loss": 1.1673, "step": 1112 }, { "epoch": 0.04033924105686637, "grad_norm": 2.6151310580156606, "learning_rate": 9.997202182520084e-06, "loss": 1.0396, "step": 1113 }, { "epoch": 0.04037548475952303, "grad_norm": 2.570061384486151, "learning_rate": 9.997182516115305e-06, "loss": 0.8356, "step": 1114 }, { "epoch": 0.0404117284621797, "grad_norm": 2.555083281616365, "learning_rate": 9.997162780852373e-06, "loss": 1.0764, "step": 1115 }, { "epoch": 0.04044797216483636, "grad_norm": 2.4409618560834003, "learning_rate": 9.997142976731562e-06, "loss": 1.202, "step": 1116 }, { "epoch": 0.040484215867493026, "grad_norm": 2.7740977584875623, "learning_rate": 9.997123103753143e-06, "loss": 1.1216, "step": 1117 }, { "epoch": 0.040520459570149686, "grad_norm": 3.118880910441637, "learning_rate": 9.997103161917388e-06, "loss": 1.2119, "step": 1118 }, { "epoch": 0.040556703272806346, "grad_norm": 2.5439766204525247, "learning_rate": 9.997083151224573e-06, "loss": 0.9742, "step": 1119 }, { "epoch": 0.040592946975463014, "grad_norm": 2.560924802463634, "learning_rate": 9.997063071674978e-06, "loss": 0.8738, "step": 1120 }, { "epoch": 0.040629190678119674, "grad_norm": 2.804989460436454, "learning_rate": 9.997042923268873e-06, "loss": 0.913, "step": 1121 }, { "epoch": 0.04066543438077634, "grad_norm": 2.5903565120124106, "learning_rate": 9.997022706006538e-06, "loss": 1.0543, "step": 1122 }, { "epoch": 0.040701678083433, "grad_norm": 2.515932969659514, "learning_rate": 9.997002419888254e-06, "loss": 1.0145, "step": 1123 }, { "epoch": 0.04073792178608967, "grad_norm": 3.0721524729788654, "learning_rate": 9.996982064914298e-06, "loss": 1.0109, "step": 1124 }, { "epoch": 0.04077416548874633, "grad_norm": 2.446264378734785, "learning_rate": 9.996961641084953e-06, "loss": 1.113, "step": 1125 }, { "epoch": 0.040810409191402996, "grad_norm": 2.6160187727670743, "learning_rate": 9.996941148400496e-06, "loss": 1.0828, "step": 1126 }, { "epoch": 0.04084665289405966, "grad_norm": 2.3568482547282583, "learning_rate": 9.996920586861213e-06, "loss": 0.855, "step": 1127 }, { "epoch": 0.04088289659671632, "grad_norm": 2.7222755934430727, "learning_rate": 9.996899956467386e-06, "loss": 1.0677, "step": 1128 }, { "epoch": 0.040919140299372984, "grad_norm": 2.5406588760933086, "learning_rate": 9.996879257219301e-06, "loss": 1.1341, "step": 1129 }, { "epoch": 0.040955384002029645, "grad_norm": 2.767633322326156, "learning_rate": 9.99685848911724e-06, "loss": 0.9767, "step": 1130 }, { "epoch": 0.04099162770468631, "grad_norm": 2.595495046768334, "learning_rate": 9.996837652161493e-06, "loss": 0.9129, "step": 1131 }, { "epoch": 0.04102787140734297, "grad_norm": 2.827426677799763, "learning_rate": 9.996816746352344e-06, "loss": 1.1807, "step": 1132 }, { "epoch": 0.04106411510999964, "grad_norm": 2.7254534410393694, "learning_rate": 9.996795771690082e-06, "loss": 1.0172, "step": 1133 }, { "epoch": 0.0411003588126563, "grad_norm": 2.378800397379845, "learning_rate": 9.996774728174997e-06, "loss": 0.9431, "step": 1134 }, { "epoch": 0.04113660251531297, "grad_norm": 2.6801185376442658, "learning_rate": 9.996753615807378e-06, "loss": 1.1249, "step": 1135 }, { "epoch": 0.04117284621796963, "grad_norm": 2.402979855414594, "learning_rate": 9.996732434587516e-06, "loss": 1.1084, "step": 1136 }, { "epoch": 0.04120908992062629, "grad_norm": 3.0536640846828913, "learning_rate": 9.996711184515701e-06, "loss": 1.1176, "step": 1137 }, { "epoch": 0.041245333623282955, "grad_norm": 2.500372553068908, "learning_rate": 9.99668986559223e-06, "loss": 0.9283, "step": 1138 }, { "epoch": 0.041281577325939615, "grad_norm": 2.475134428737406, "learning_rate": 9.996668477817396e-06, "loss": 1.1436, "step": 1139 }, { "epoch": 0.04131782102859628, "grad_norm": 2.6969818777804933, "learning_rate": 9.996647021191489e-06, "loss": 1.1507, "step": 1140 }, { "epoch": 0.04135406473125294, "grad_norm": 2.805783477564058, "learning_rate": 9.99662549571481e-06, "loss": 0.9834, "step": 1141 }, { "epoch": 0.04139030843390961, "grad_norm": 2.6220250234770446, "learning_rate": 9.996603901387651e-06, "loss": 0.9765, "step": 1142 }, { "epoch": 0.04142655213656627, "grad_norm": 2.5329496725188436, "learning_rate": 9.996582238210314e-06, "loss": 1.0235, "step": 1143 }, { "epoch": 0.04146279583922294, "grad_norm": 2.35655869363498, "learning_rate": 9.996560506183097e-06, "loss": 1.0457, "step": 1144 }, { "epoch": 0.0414990395418796, "grad_norm": 2.6537045133183357, "learning_rate": 9.996538705306296e-06, "loss": 1.0104, "step": 1145 }, { "epoch": 0.04153528324453626, "grad_norm": 2.8831775127842163, "learning_rate": 9.996516835580215e-06, "loss": 1.0154, "step": 1146 }, { "epoch": 0.041571526947192926, "grad_norm": 2.229489542015394, "learning_rate": 9.996494897005152e-06, "loss": 1.0779, "step": 1147 }, { "epoch": 0.041607770649849586, "grad_norm": 2.7017445914144362, "learning_rate": 9.996472889581413e-06, "loss": 1.0686, "step": 1148 }, { "epoch": 0.04164401435250625, "grad_norm": 2.785628622814939, "learning_rate": 9.996450813309298e-06, "loss": 1.2086, "step": 1149 }, { "epoch": 0.041680258055162914, "grad_norm": 2.6830039186879198, "learning_rate": 9.996428668189113e-06, "loss": 0.8738, "step": 1150 }, { "epoch": 0.04171650175781958, "grad_norm": 2.573567040280184, "learning_rate": 9.996406454221162e-06, "loss": 1.1136, "step": 1151 }, { "epoch": 0.04175274546047624, "grad_norm": 2.583376214854765, "learning_rate": 9.996384171405752e-06, "loss": 0.9571, "step": 1152 }, { "epoch": 0.04178898916313291, "grad_norm": 2.6517967192080425, "learning_rate": 9.99636181974319e-06, "loss": 1.0839, "step": 1153 }, { "epoch": 0.04182523286578957, "grad_norm": 2.594377152120582, "learning_rate": 9.996339399233785e-06, "loss": 1.1369, "step": 1154 }, { "epoch": 0.04186147656844623, "grad_norm": 2.7258668475994083, "learning_rate": 9.996316909877843e-06, "loss": 0.9189, "step": 1155 }, { "epoch": 0.041897720271102896, "grad_norm": 2.970967302767332, "learning_rate": 9.996294351675677e-06, "loss": 1.202, "step": 1156 }, { "epoch": 0.04193396397375956, "grad_norm": 2.57762951537527, "learning_rate": 9.996271724627594e-06, "loss": 0.9622, "step": 1157 }, { "epoch": 0.041970207676416224, "grad_norm": 2.700462554009419, "learning_rate": 9.996249028733912e-06, "loss": 1.0534, "step": 1158 }, { "epoch": 0.042006451379072884, "grad_norm": 2.9144342864265975, "learning_rate": 9.996226263994939e-06, "loss": 1.1737, "step": 1159 }, { "epoch": 0.04204269508172955, "grad_norm": 2.7773646701036423, "learning_rate": 9.996203430410989e-06, "loss": 0.9857, "step": 1160 }, { "epoch": 0.04207893878438621, "grad_norm": 2.7696091272109915, "learning_rate": 9.996180527982376e-06, "loss": 1.0741, "step": 1161 }, { "epoch": 0.04211518248704288, "grad_norm": 2.503256200178436, "learning_rate": 9.996157556709418e-06, "loss": 1.2436, "step": 1162 }, { "epoch": 0.04215142618969954, "grad_norm": 2.8313201418797074, "learning_rate": 9.99613451659243e-06, "loss": 1.0798, "step": 1163 }, { "epoch": 0.0421876698923562, "grad_norm": 2.549269741970174, "learning_rate": 9.996111407631732e-06, "loss": 1.0977, "step": 1164 }, { "epoch": 0.04222391359501287, "grad_norm": 2.444140143801087, "learning_rate": 9.996088229827638e-06, "loss": 1.0763, "step": 1165 }, { "epoch": 0.04226015729766953, "grad_norm": 2.837654597073552, "learning_rate": 9.996064983180472e-06, "loss": 1.2885, "step": 1166 }, { "epoch": 0.042296401000326195, "grad_norm": 2.5664133885768017, "learning_rate": 9.99604166769055e-06, "loss": 1.1622, "step": 1167 }, { "epoch": 0.042332644702982855, "grad_norm": 2.986663197675219, "learning_rate": 9.996018283358196e-06, "loss": 1.1823, "step": 1168 }, { "epoch": 0.04236888840563952, "grad_norm": 2.8020930130539976, "learning_rate": 9.995994830183731e-06, "loss": 1.0099, "step": 1169 }, { "epoch": 0.04240513210829618, "grad_norm": 2.658026364655187, "learning_rate": 9.99597130816748e-06, "loss": 1.1234, "step": 1170 }, { "epoch": 0.04244137581095285, "grad_norm": 2.727683459139362, "learning_rate": 9.995947717309766e-06, "loss": 1.0414, "step": 1171 }, { "epoch": 0.04247761951360951, "grad_norm": 2.4819648427958363, "learning_rate": 9.995924057610913e-06, "loss": 1.1069, "step": 1172 }, { "epoch": 0.04251386321626617, "grad_norm": 2.8115530018155157, "learning_rate": 9.995900329071248e-06, "loss": 1.0745, "step": 1173 }, { "epoch": 0.04255010691892284, "grad_norm": 2.6807777680692593, "learning_rate": 9.995876531691098e-06, "loss": 1.0795, "step": 1174 }, { "epoch": 0.0425863506215795, "grad_norm": 2.2407821020430965, "learning_rate": 9.99585266547079e-06, "loss": 0.9033, "step": 1175 }, { "epoch": 0.042622594324236165, "grad_norm": 2.7745656068852984, "learning_rate": 9.995828730410654e-06, "loss": 1.1096, "step": 1176 }, { "epoch": 0.042658838026892826, "grad_norm": 2.341013152375195, "learning_rate": 9.99580472651102e-06, "loss": 1.089, "step": 1177 }, { "epoch": 0.04269508172954949, "grad_norm": 2.7026148393013276, "learning_rate": 9.995780653772218e-06, "loss": 0.9617, "step": 1178 }, { "epoch": 0.04273132543220615, "grad_norm": 2.2985687776710195, "learning_rate": 9.99575651219458e-06, "loss": 0.9141, "step": 1179 }, { "epoch": 0.04276756913486282, "grad_norm": 2.466864116161777, "learning_rate": 9.995732301778438e-06, "loss": 0.8792, "step": 1180 }, { "epoch": 0.04280381283751948, "grad_norm": 2.796112329919255, "learning_rate": 9.995708022524127e-06, "loss": 1.0733, "step": 1181 }, { "epoch": 0.04284005654017614, "grad_norm": 2.7634368954867115, "learning_rate": 9.995683674431981e-06, "loss": 1.0228, "step": 1182 }, { "epoch": 0.04287630024283281, "grad_norm": 2.492287929923839, "learning_rate": 9.995659257502335e-06, "loss": 1.0522, "step": 1183 }, { "epoch": 0.04291254394548947, "grad_norm": 2.603607317158806, "learning_rate": 9.995634771735526e-06, "loss": 0.9681, "step": 1184 }, { "epoch": 0.042948787648146136, "grad_norm": 2.7618084926181052, "learning_rate": 9.99561021713189e-06, "loss": 1.1746, "step": 1185 }, { "epoch": 0.042985031350802796, "grad_norm": 2.5262727381406362, "learning_rate": 9.995585593691768e-06, "loss": 1.0306, "step": 1186 }, { "epoch": 0.043021275053459464, "grad_norm": 2.880214717585996, "learning_rate": 9.995560901415495e-06, "loss": 0.8971, "step": 1187 }, { "epoch": 0.043057518756116124, "grad_norm": 2.4823682580468773, "learning_rate": 9.995536140303416e-06, "loss": 0.9449, "step": 1188 }, { "epoch": 0.04309376245877279, "grad_norm": 2.5416731810149242, "learning_rate": 9.995511310355872e-06, "loss": 1.0568, "step": 1189 }, { "epoch": 0.04313000616142945, "grad_norm": 2.4958495448139013, "learning_rate": 9.9954864115732e-06, "loss": 0.9929, "step": 1190 }, { "epoch": 0.04316624986408611, "grad_norm": 2.733714153873755, "learning_rate": 9.995461443955747e-06, "loss": 1.1051, "step": 1191 }, { "epoch": 0.04320249356674278, "grad_norm": 2.7914541337090597, "learning_rate": 9.995436407503857e-06, "loss": 1.082, "step": 1192 }, { "epoch": 0.04323873726939944, "grad_norm": 2.6618901750063473, "learning_rate": 9.995411302217877e-06, "loss": 0.8576, "step": 1193 }, { "epoch": 0.04327498097205611, "grad_norm": 2.6155641201105437, "learning_rate": 9.995386128098147e-06, "loss": 1.2104, "step": 1194 }, { "epoch": 0.04331122467471277, "grad_norm": 2.5118681326950907, "learning_rate": 9.995360885145018e-06, "loss": 1.1624, "step": 1195 }, { "epoch": 0.043347468377369434, "grad_norm": 2.4694504696480455, "learning_rate": 9.995335573358838e-06, "loss": 1.0467, "step": 1196 }, { "epoch": 0.043383712080026095, "grad_norm": 2.4782478511848103, "learning_rate": 9.995310192739955e-06, "loss": 0.9986, "step": 1197 }, { "epoch": 0.04341995578268276, "grad_norm": 2.385725641015952, "learning_rate": 9.995284743288718e-06, "loss": 0.9432, "step": 1198 }, { "epoch": 0.04345619948533942, "grad_norm": 2.32585633768133, "learning_rate": 9.995259225005478e-06, "loss": 0.9102, "step": 1199 }, { "epoch": 0.04349244318799608, "grad_norm": 2.490820814574644, "learning_rate": 9.995233637890586e-06, "loss": 1.2231, "step": 1200 }, { "epoch": 0.04352868689065275, "grad_norm": 2.5807407627590946, "learning_rate": 9.995207981944397e-06, "loss": 1.0706, "step": 1201 }, { "epoch": 0.04356493059330941, "grad_norm": 2.671426151626004, "learning_rate": 9.995182257167263e-06, "loss": 0.9821, "step": 1202 }, { "epoch": 0.04360117429596608, "grad_norm": 2.37586246336112, "learning_rate": 9.995156463559538e-06, "loss": 0.8899, "step": 1203 }, { "epoch": 0.04363741799862274, "grad_norm": 2.6146253884852366, "learning_rate": 9.995130601121579e-06, "loss": 1.1933, "step": 1204 }, { "epoch": 0.043673661701279405, "grad_norm": 2.631616698287628, "learning_rate": 9.995104669853739e-06, "loss": 1.0387, "step": 1205 }, { "epoch": 0.043709905403936065, "grad_norm": 2.7237625383989417, "learning_rate": 9.99507866975638e-06, "loss": 1.1838, "step": 1206 }, { "epoch": 0.04374614910659273, "grad_norm": 2.8636201295432286, "learning_rate": 9.995052600829855e-06, "loss": 1.0784, "step": 1207 }, { "epoch": 0.04378239280924939, "grad_norm": 2.5333891460978624, "learning_rate": 9.995026463074528e-06, "loss": 1.0474, "step": 1208 }, { "epoch": 0.04381863651190605, "grad_norm": 3.0845751849770227, "learning_rate": 9.995000256490758e-06, "loss": 1.177, "step": 1209 }, { "epoch": 0.04385488021456272, "grad_norm": 2.515566048059144, "learning_rate": 9.994973981078904e-06, "loss": 1.0753, "step": 1210 }, { "epoch": 0.04389112391721938, "grad_norm": 2.7613878080691716, "learning_rate": 9.99494763683933e-06, "loss": 0.9661, "step": 1211 }, { "epoch": 0.04392736761987605, "grad_norm": 2.4961460688322026, "learning_rate": 9.994921223772399e-06, "loss": 0.9239, "step": 1212 }, { "epoch": 0.04396361132253271, "grad_norm": 2.6002786498450745, "learning_rate": 9.994894741878474e-06, "loss": 0.9994, "step": 1213 }, { "epoch": 0.043999855025189376, "grad_norm": 2.6464616940762284, "learning_rate": 9.994868191157918e-06, "loss": 1.208, "step": 1214 }, { "epoch": 0.044036098727846036, "grad_norm": 2.586394617107966, "learning_rate": 9.994841571611103e-06, "loss": 0.995, "step": 1215 }, { "epoch": 0.0440723424305027, "grad_norm": 2.81525491642674, "learning_rate": 9.99481488323839e-06, "loss": 1.105, "step": 1216 }, { "epoch": 0.044108586133159364, "grad_norm": 2.487018157779783, "learning_rate": 9.994788126040148e-06, "loss": 1.1298, "step": 1217 }, { "epoch": 0.044144829835816024, "grad_norm": 2.4195390906166216, "learning_rate": 9.994761300016747e-06, "loss": 0.9642, "step": 1218 }, { "epoch": 0.04418107353847269, "grad_norm": 2.658799369466066, "learning_rate": 9.994734405168557e-06, "loss": 1.0631, "step": 1219 }, { "epoch": 0.04421731724112935, "grad_norm": 2.5463319262970288, "learning_rate": 9.994707441495946e-06, "loss": 1.1381, "step": 1220 }, { "epoch": 0.04425356094378602, "grad_norm": 2.710853445134437, "learning_rate": 9.994680408999287e-06, "loss": 0.987, "step": 1221 }, { "epoch": 0.04428980464644268, "grad_norm": 2.3754242636789855, "learning_rate": 9.994653307678952e-06, "loss": 0.9929, "step": 1222 }, { "epoch": 0.044326048349099346, "grad_norm": 2.575861629262741, "learning_rate": 9.994626137535317e-06, "loss": 1.2272, "step": 1223 }, { "epoch": 0.04436229205175601, "grad_norm": 2.302810931243138, "learning_rate": 9.994598898568755e-06, "loss": 0.9936, "step": 1224 }, { "epoch": 0.044398535754412674, "grad_norm": 2.582909940677817, "learning_rate": 9.99457159077964e-06, "loss": 1.0395, "step": 1225 }, { "epoch": 0.044434779457069334, "grad_norm": 2.867778448103768, "learning_rate": 9.99454421416835e-06, "loss": 1.0991, "step": 1226 }, { "epoch": 0.044471023159725995, "grad_norm": 2.994855569747748, "learning_rate": 9.994516768735259e-06, "loss": 1.1273, "step": 1227 }, { "epoch": 0.04450726686238266, "grad_norm": 2.7715430772343437, "learning_rate": 9.994489254480748e-06, "loss": 1.1322, "step": 1228 }, { "epoch": 0.04454351056503932, "grad_norm": 2.7535856470658793, "learning_rate": 9.994461671405196e-06, "loss": 1.0157, "step": 1229 }, { "epoch": 0.04457975426769599, "grad_norm": 3.0537701590049364, "learning_rate": 9.994434019508983e-06, "loss": 1.1233, "step": 1230 }, { "epoch": 0.04461599797035265, "grad_norm": 2.4919844055533997, "learning_rate": 9.99440629879249e-06, "loss": 1.1413, "step": 1231 }, { "epoch": 0.04465224167300932, "grad_norm": 2.7000603800563487, "learning_rate": 9.9943785092561e-06, "loss": 1.1138, "step": 1232 }, { "epoch": 0.04468848537566598, "grad_norm": 2.4846965575723217, "learning_rate": 9.994350650900193e-06, "loss": 0.8683, "step": 1233 }, { "epoch": 0.044724729078322645, "grad_norm": 2.8471175040057557, "learning_rate": 9.994322723725154e-06, "loss": 1.0648, "step": 1234 }, { "epoch": 0.044760972780979305, "grad_norm": 2.7109586709282976, "learning_rate": 9.994294727731369e-06, "loss": 0.9427, "step": 1235 }, { "epoch": 0.044797216483635965, "grad_norm": 2.162907907513663, "learning_rate": 9.994266662919224e-06, "loss": 1.1103, "step": 1236 }, { "epoch": 0.04483346018629263, "grad_norm": 2.7081281448863814, "learning_rate": 9.994238529289104e-06, "loss": 0.9399, "step": 1237 }, { "epoch": 0.04486970388894929, "grad_norm": 2.9178419344155233, "learning_rate": 9.994210326841397e-06, "loss": 1.0607, "step": 1238 }, { "epoch": 0.04490594759160596, "grad_norm": 2.3447607617385824, "learning_rate": 9.994182055576492e-06, "loss": 1.2782, "step": 1239 }, { "epoch": 0.04494219129426262, "grad_norm": 2.4446770526683865, "learning_rate": 9.99415371549478e-06, "loss": 1.0032, "step": 1240 }, { "epoch": 0.04497843499691929, "grad_norm": 2.737196563375354, "learning_rate": 9.99412530659665e-06, "loss": 1.1054, "step": 1241 }, { "epoch": 0.04501467869957595, "grad_norm": 2.789630455897214, "learning_rate": 9.994096828882492e-06, "loss": 0.898, "step": 1242 }, { "epoch": 0.045050922402232615, "grad_norm": 2.7308815990157504, "learning_rate": 9.9940682823527e-06, "loss": 0.94, "step": 1243 }, { "epoch": 0.045087166104889276, "grad_norm": 2.692874611957867, "learning_rate": 9.99403966700767e-06, "loss": 1.0955, "step": 1244 }, { "epoch": 0.045123409807545936, "grad_norm": 2.9268465356442164, "learning_rate": 9.994010982847792e-06, "loss": 1.0335, "step": 1245 }, { "epoch": 0.0451596535102026, "grad_norm": 2.5702302196535913, "learning_rate": 9.993982229873463e-06, "loss": 1.0725, "step": 1246 }, { "epoch": 0.045195897212859264, "grad_norm": 2.6205907300905555, "learning_rate": 9.99395340808508e-06, "loss": 1.0471, "step": 1247 }, { "epoch": 0.04523214091551593, "grad_norm": 2.7585907842356288, "learning_rate": 9.993924517483038e-06, "loss": 0.9156, "step": 1248 }, { "epoch": 0.04526838461817259, "grad_norm": 2.8799676895529407, "learning_rate": 9.993895558067736e-06, "loss": 1.151, "step": 1249 }, { "epoch": 0.04530462832082926, "grad_norm": 2.3940990124574117, "learning_rate": 9.993866529839576e-06, "loss": 0.8467, "step": 1250 }, { "epoch": 0.04534087202348592, "grad_norm": 3.038150037561658, "learning_rate": 9.993837432798952e-06, "loss": 1.1626, "step": 1251 }, { "epoch": 0.045377115726142586, "grad_norm": 2.5770538006239696, "learning_rate": 9.99380826694627e-06, "loss": 1.1004, "step": 1252 }, { "epoch": 0.045413359428799246, "grad_norm": 2.5921260431557918, "learning_rate": 9.993779032281932e-06, "loss": 1.0354, "step": 1253 }, { "epoch": 0.04544960313145591, "grad_norm": 2.649459589723242, "learning_rate": 9.993749728806336e-06, "loss": 1.111, "step": 1254 }, { "epoch": 0.045485846834112574, "grad_norm": 2.4342849552885357, "learning_rate": 9.99372035651989e-06, "loss": 1.1125, "step": 1255 }, { "epoch": 0.045522090536769234, "grad_norm": 2.6173463114013336, "learning_rate": 9.993690915422997e-06, "loss": 1.1774, "step": 1256 }, { "epoch": 0.0455583342394259, "grad_norm": 2.6507459624739282, "learning_rate": 9.993661405516065e-06, "loss": 1.1987, "step": 1257 }, { "epoch": 0.04559457794208256, "grad_norm": 2.635493841183419, "learning_rate": 9.993631826799499e-06, "loss": 1.0236, "step": 1258 }, { "epoch": 0.04563082164473923, "grad_norm": 2.6089899954739155, "learning_rate": 9.993602179273704e-06, "loss": 1.1418, "step": 1259 }, { "epoch": 0.04566706534739589, "grad_norm": 2.860089711882737, "learning_rate": 9.993572462939093e-06, "loss": 1.1703, "step": 1260 }, { "epoch": 0.04570330905005255, "grad_norm": 2.583273706490074, "learning_rate": 9.993542677796073e-06, "loss": 1.004, "step": 1261 }, { "epoch": 0.04573955275270922, "grad_norm": 2.3695301227650574, "learning_rate": 9.993512823845056e-06, "loss": 1.1756, "step": 1262 }, { "epoch": 0.04577579645536588, "grad_norm": 2.496297938555874, "learning_rate": 9.99348290108645e-06, "loss": 1.0402, "step": 1263 }, { "epoch": 0.045812040158022545, "grad_norm": 2.6997126837778476, "learning_rate": 9.99345290952067e-06, "loss": 0.933, "step": 1264 }, { "epoch": 0.045848283860679205, "grad_norm": 2.7061469254719297, "learning_rate": 9.993422849148131e-06, "loss": 1.0403, "step": 1265 }, { "epoch": 0.04588452756333587, "grad_norm": 2.5461392051973224, "learning_rate": 9.993392719969243e-06, "loss": 0.8997, "step": 1266 }, { "epoch": 0.04592077126599253, "grad_norm": 2.339918187940317, "learning_rate": 9.993362521984424e-06, "loss": 1.0374, "step": 1267 }, { "epoch": 0.0459570149686492, "grad_norm": 3.091288694410307, "learning_rate": 9.993332255194089e-06, "loss": 1.1318, "step": 1268 }, { "epoch": 0.04599325867130586, "grad_norm": 2.676125077359574, "learning_rate": 9.993301919598655e-06, "loss": 0.9676, "step": 1269 }, { "epoch": 0.04602950237396252, "grad_norm": 2.615404741592967, "learning_rate": 9.993271515198541e-06, "loss": 1.1818, "step": 1270 }, { "epoch": 0.04606574607661919, "grad_norm": 2.5390195707355026, "learning_rate": 9.993241041994166e-06, "loss": 0.9847, "step": 1271 }, { "epoch": 0.04610198977927585, "grad_norm": 2.6290552321677896, "learning_rate": 9.99321049998595e-06, "loss": 1.0604, "step": 1272 }, { "epoch": 0.046138233481932515, "grad_norm": 2.335636032537512, "learning_rate": 9.99317988917431e-06, "loss": 1.0095, "step": 1273 }, { "epoch": 0.046174477184589176, "grad_norm": 2.748514733502217, "learning_rate": 9.993149209559675e-06, "loss": 1.1617, "step": 1274 }, { "epoch": 0.04621072088724584, "grad_norm": 2.557187713805555, "learning_rate": 9.99311846114246e-06, "loss": 1.2109, "step": 1275 }, { "epoch": 0.0462469645899025, "grad_norm": 2.2578647231937374, "learning_rate": 9.993087643923093e-06, "loss": 0.9531, "step": 1276 }, { "epoch": 0.04628320829255917, "grad_norm": 2.1609589077517604, "learning_rate": 9.993056757901998e-06, "loss": 0.8098, "step": 1277 }, { "epoch": 0.04631945199521583, "grad_norm": 2.6261456648052772, "learning_rate": 9.993025803079602e-06, "loss": 0.9708, "step": 1278 }, { "epoch": 0.04635569569787249, "grad_norm": 2.7468651625236413, "learning_rate": 9.99299477945633e-06, "loss": 1.1978, "step": 1279 }, { "epoch": 0.04639193940052916, "grad_norm": 2.73904392558495, "learning_rate": 9.992963687032609e-06, "loss": 0.9994, "step": 1280 }, { "epoch": 0.04642818310318582, "grad_norm": 2.400913575898052, "learning_rate": 9.992932525808868e-06, "loss": 1.0067, "step": 1281 }, { "epoch": 0.046464426805842486, "grad_norm": 2.7443165434434693, "learning_rate": 9.992901295785535e-06, "loss": 1.0011, "step": 1282 }, { "epoch": 0.046500670508499146, "grad_norm": 2.4178288787805227, "learning_rate": 9.992869996963043e-06, "loss": 1.061, "step": 1283 }, { "epoch": 0.046536914211155814, "grad_norm": 2.649684152593111, "learning_rate": 9.992838629341823e-06, "loss": 1.2464, "step": 1284 }, { "epoch": 0.046573157913812474, "grad_norm": 2.802837058913444, "learning_rate": 9.992807192922305e-06, "loss": 1.101, "step": 1285 }, { "epoch": 0.04660940161646914, "grad_norm": 2.9220256019561854, "learning_rate": 9.992775687704924e-06, "loss": 1.1411, "step": 1286 }, { "epoch": 0.0466456453191258, "grad_norm": 2.2382546844173743, "learning_rate": 9.992744113690114e-06, "loss": 1.1561, "step": 1287 }, { "epoch": 0.04668188902178246, "grad_norm": 3.0403579031700536, "learning_rate": 9.992712470878309e-06, "loss": 1.126, "step": 1288 }, { "epoch": 0.04671813272443913, "grad_norm": 2.4423728592312597, "learning_rate": 9.992680759269945e-06, "loss": 1.2253, "step": 1289 }, { "epoch": 0.04675437642709579, "grad_norm": 2.6762416903893977, "learning_rate": 9.99264897886546e-06, "loss": 1.1544, "step": 1290 }, { "epoch": 0.04679062012975246, "grad_norm": 2.4127922073542303, "learning_rate": 9.992617129665292e-06, "loss": 1.0747, "step": 1291 }, { "epoch": 0.04682686383240912, "grad_norm": 2.5272657131855047, "learning_rate": 9.99258521166988e-06, "loss": 1.1623, "step": 1292 }, { "epoch": 0.046863107535065784, "grad_norm": 2.491082140709751, "learning_rate": 9.992553224879663e-06, "loss": 1.0518, "step": 1293 }, { "epoch": 0.046899351237722445, "grad_norm": 2.5993691530801506, "learning_rate": 9.99252116929508e-06, "loss": 0.9968, "step": 1294 }, { "epoch": 0.04693559494037911, "grad_norm": 2.491995908072792, "learning_rate": 9.992489044916576e-06, "loss": 0.8497, "step": 1295 }, { "epoch": 0.04697183864303577, "grad_norm": 2.7118969302663136, "learning_rate": 9.992456851744593e-06, "loss": 1.076, "step": 1296 }, { "epoch": 0.04700808234569243, "grad_norm": 2.5976811234458506, "learning_rate": 9.992424589779573e-06, "loss": 1.0158, "step": 1297 }, { "epoch": 0.0470443260483491, "grad_norm": 2.7157301189326604, "learning_rate": 9.992392259021961e-06, "loss": 0.9619, "step": 1298 }, { "epoch": 0.04708056975100576, "grad_norm": 2.4283333854376865, "learning_rate": 9.992359859472204e-06, "loss": 1.1354, "step": 1299 }, { "epoch": 0.04711681345366243, "grad_norm": 2.8454557378827485, "learning_rate": 9.992327391130748e-06, "loss": 1.1369, "step": 1300 }, { "epoch": 0.04715305715631909, "grad_norm": 2.5355172226880676, "learning_rate": 9.99229485399804e-06, "loss": 1.0219, "step": 1301 }, { "epoch": 0.047189300858975755, "grad_norm": 2.476897728735223, "learning_rate": 9.992262248074526e-06, "loss": 1.2075, "step": 1302 }, { "epoch": 0.047225544561632415, "grad_norm": 2.650103559703849, "learning_rate": 9.99222957336066e-06, "loss": 1.0045, "step": 1303 }, { "epoch": 0.04726178826428908, "grad_norm": 2.530803888234768, "learning_rate": 9.992196829856886e-06, "loss": 1.0957, "step": 1304 }, { "epoch": 0.04729803196694574, "grad_norm": 2.6074018979767017, "learning_rate": 9.99216401756366e-06, "loss": 1.127, "step": 1305 }, { "epoch": 0.0473342756696024, "grad_norm": 2.1905374218186506, "learning_rate": 9.992131136481435e-06, "loss": 0.9969, "step": 1306 }, { "epoch": 0.04737051937225907, "grad_norm": 2.5239091095470947, "learning_rate": 9.992098186610662e-06, "loss": 1.1171, "step": 1307 }, { "epoch": 0.04740676307491573, "grad_norm": 2.485186451840368, "learning_rate": 9.992065167951795e-06, "loss": 1.253, "step": 1308 }, { "epoch": 0.0474430067775724, "grad_norm": 2.812083955570685, "learning_rate": 9.992032080505287e-06, "loss": 1.1711, "step": 1309 }, { "epoch": 0.04747925048022906, "grad_norm": 2.4833650133069045, "learning_rate": 9.991998924271598e-06, "loss": 0.9838, "step": 1310 }, { "epoch": 0.047515494182885726, "grad_norm": 2.5412157075900748, "learning_rate": 9.991965699251183e-06, "loss": 1.0427, "step": 1311 }, { "epoch": 0.047551737885542386, "grad_norm": 2.7029336887980704, "learning_rate": 9.991932405444498e-06, "loss": 1.1125, "step": 1312 }, { "epoch": 0.04758798158819905, "grad_norm": 2.31690857263727, "learning_rate": 9.991899042852004e-06, "loss": 1.0148, "step": 1313 }, { "epoch": 0.047624225290855714, "grad_norm": 3.536040378043345, "learning_rate": 9.991865611474161e-06, "loss": 1.0394, "step": 1314 }, { "epoch": 0.047660468993512374, "grad_norm": 2.727702540348087, "learning_rate": 9.99183211131143e-06, "loss": 0.9788, "step": 1315 }, { "epoch": 0.04769671269616904, "grad_norm": 2.2866384938351936, "learning_rate": 9.991798542364269e-06, "loss": 0.927, "step": 1316 }, { "epoch": 0.0477329563988257, "grad_norm": 2.756029875320837, "learning_rate": 9.991764904633144e-06, "loss": 1.0268, "step": 1317 }, { "epoch": 0.04776920010148237, "grad_norm": 2.481767060527416, "learning_rate": 9.991731198118517e-06, "loss": 1.2139, "step": 1318 }, { "epoch": 0.04780544380413903, "grad_norm": 2.633638597609413, "learning_rate": 9.991697422820854e-06, "loss": 1.106, "step": 1319 }, { "epoch": 0.047841687506795696, "grad_norm": 2.668136159322878, "learning_rate": 9.99166357874062e-06, "loss": 1.0594, "step": 1320 }, { "epoch": 0.04787793120945236, "grad_norm": 2.4991915489515235, "learning_rate": 9.991629665878279e-06, "loss": 0.9673, "step": 1321 }, { "epoch": 0.047914174912109024, "grad_norm": 2.660148898094465, "learning_rate": 9.991595684234301e-06, "loss": 1.0861, "step": 1322 }, { "epoch": 0.047950418614765684, "grad_norm": 2.602426306389921, "learning_rate": 9.991561633809153e-06, "loss": 0.9517, "step": 1323 }, { "epoch": 0.047986662317422345, "grad_norm": 3.0594322219017176, "learning_rate": 9.991527514603305e-06, "loss": 0.9746, "step": 1324 }, { "epoch": 0.04802290602007901, "grad_norm": 2.7090047766185266, "learning_rate": 9.991493326617226e-06, "loss": 0.9988, "step": 1325 }, { "epoch": 0.04805914972273567, "grad_norm": 2.9113278794265347, "learning_rate": 9.991459069851389e-06, "loss": 1.1126, "step": 1326 }, { "epoch": 0.04809539342539234, "grad_norm": 3.0033676206347515, "learning_rate": 9.991424744306261e-06, "loss": 1.2026, "step": 1327 }, { "epoch": 0.048131637128049, "grad_norm": 2.694104080864387, "learning_rate": 9.991390349982321e-06, "loss": 1.2196, "step": 1328 }, { "epoch": 0.04816788083070567, "grad_norm": 2.858226086163099, "learning_rate": 9.991355886880043e-06, "loss": 1.127, "step": 1329 }, { "epoch": 0.04820412453336233, "grad_norm": 2.527405991072386, "learning_rate": 9.991321354999897e-06, "loss": 1.0533, "step": 1330 }, { "epoch": 0.048240368236018995, "grad_norm": 2.736654206109358, "learning_rate": 9.991286754342363e-06, "loss": 0.9657, "step": 1331 }, { "epoch": 0.048276611938675655, "grad_norm": 2.5774852498060534, "learning_rate": 9.991252084907915e-06, "loss": 1.0213, "step": 1332 }, { "epoch": 0.048312855641332315, "grad_norm": 2.420722377504781, "learning_rate": 9.991217346697031e-06, "loss": 0.9028, "step": 1333 }, { "epoch": 0.04834909934398898, "grad_norm": 2.9742575746803634, "learning_rate": 9.991182539710191e-06, "loss": 1.2168, "step": 1334 }, { "epoch": 0.04838534304664564, "grad_norm": 2.968313701026513, "learning_rate": 9.991147663947875e-06, "loss": 1.0369, "step": 1335 }, { "epoch": 0.04842158674930231, "grad_norm": 2.8733655955019772, "learning_rate": 9.991112719410564e-06, "loss": 0.9153, "step": 1336 }, { "epoch": 0.04845783045195897, "grad_norm": 2.8539245043362187, "learning_rate": 9.991077706098735e-06, "loss": 1.0383, "step": 1337 }, { "epoch": 0.04849407415461564, "grad_norm": 2.57307893561561, "learning_rate": 9.991042624012877e-06, "loss": 1.2451, "step": 1338 }, { "epoch": 0.0485303178572723, "grad_norm": 2.152413503087754, "learning_rate": 9.991007473153467e-06, "loss": 0.7892, "step": 1339 }, { "epoch": 0.048566561559928965, "grad_norm": 2.575922665820876, "learning_rate": 9.990972253520994e-06, "loss": 1.3086, "step": 1340 }, { "epoch": 0.048602805262585626, "grad_norm": 2.4682865771407494, "learning_rate": 9.990936965115943e-06, "loss": 1.1358, "step": 1341 }, { "epoch": 0.048639048965242286, "grad_norm": 2.711687978884338, "learning_rate": 9.990901607938798e-06, "loss": 1.1037, "step": 1342 }, { "epoch": 0.04867529266789895, "grad_norm": 2.799578483564169, "learning_rate": 9.990866181990046e-06, "loss": 1.0853, "step": 1343 }, { "epoch": 0.048711536370555614, "grad_norm": 2.6788656287970123, "learning_rate": 9.990830687270178e-06, "loss": 1.0179, "step": 1344 }, { "epoch": 0.04874778007321228, "grad_norm": 2.378327128528641, "learning_rate": 9.990795123779681e-06, "loss": 0.9316, "step": 1345 }, { "epoch": 0.04878402377586894, "grad_norm": 2.556710154238419, "learning_rate": 9.990759491519047e-06, "loss": 0.9618, "step": 1346 }, { "epoch": 0.04882026747852561, "grad_norm": 2.8664374773683647, "learning_rate": 9.990723790488763e-06, "loss": 1.1733, "step": 1347 }, { "epoch": 0.04885651118118227, "grad_norm": 2.7986299151826484, "learning_rate": 9.990688020689325e-06, "loss": 0.9859, "step": 1348 }, { "epoch": 0.048892754883838936, "grad_norm": 2.7124603516080223, "learning_rate": 9.990652182121223e-06, "loss": 1.1743, "step": 1349 }, { "epoch": 0.048928998586495596, "grad_norm": 2.616447092170907, "learning_rate": 9.990616274784955e-06, "loss": 1.1301, "step": 1350 }, { "epoch": 0.04896524228915226, "grad_norm": 2.5031952329904072, "learning_rate": 9.99058029868101e-06, "loss": 0.8926, "step": 1351 }, { "epoch": 0.049001485991808924, "grad_norm": 2.7095200698078523, "learning_rate": 9.990544253809888e-06, "loss": 0.9044, "step": 1352 }, { "epoch": 0.049037729694465584, "grad_norm": 2.5317879242720287, "learning_rate": 9.990508140172083e-06, "loss": 1.0179, "step": 1353 }, { "epoch": 0.04907397339712225, "grad_norm": 2.464595108954304, "learning_rate": 9.990471957768095e-06, "loss": 1.0159, "step": 1354 }, { "epoch": 0.04911021709977891, "grad_norm": 2.8492354278974417, "learning_rate": 9.990435706598423e-06, "loss": 1.1725, "step": 1355 }, { "epoch": 0.04914646080243558, "grad_norm": 2.648492523165757, "learning_rate": 9.990399386663563e-06, "loss": 1.0825, "step": 1356 }, { "epoch": 0.04918270450509224, "grad_norm": 2.5592034896563094, "learning_rate": 9.990362997964018e-06, "loss": 0.9676, "step": 1357 }, { "epoch": 0.04921894820774891, "grad_norm": 2.327690456603759, "learning_rate": 9.99032654050029e-06, "loss": 1.0471, "step": 1358 }, { "epoch": 0.04925519191040557, "grad_norm": 2.4795310259798087, "learning_rate": 9.990290014272879e-06, "loss": 0.9081, "step": 1359 }, { "epoch": 0.04929143561306223, "grad_norm": 2.842985624644278, "learning_rate": 9.99025341928229e-06, "loss": 1.1981, "step": 1360 }, { "epoch": 0.049327679315718895, "grad_norm": 2.616176404244073, "learning_rate": 9.990216755529027e-06, "loss": 1.1742, "step": 1361 }, { "epoch": 0.049363923018375555, "grad_norm": 2.6270901165478393, "learning_rate": 9.990180023013595e-06, "loss": 1.0734, "step": 1362 }, { "epoch": 0.04940016672103222, "grad_norm": 2.5573578067879548, "learning_rate": 9.9901432217365e-06, "loss": 1.0479, "step": 1363 }, { "epoch": 0.04943641042368888, "grad_norm": 2.3216011237776533, "learning_rate": 9.990106351698252e-06, "loss": 1.0857, "step": 1364 }, { "epoch": 0.04947265412634555, "grad_norm": 2.7386340501534634, "learning_rate": 9.990069412899352e-06, "loss": 1.2473, "step": 1365 }, { "epoch": 0.04950889782900221, "grad_norm": 2.567287811722136, "learning_rate": 9.990032405340316e-06, "loss": 1.0666, "step": 1366 }, { "epoch": 0.04954514153165888, "grad_norm": 2.8584115993901142, "learning_rate": 9.989995329021651e-06, "loss": 0.9412, "step": 1367 }, { "epoch": 0.04958138523431554, "grad_norm": 2.861404714309109, "learning_rate": 9.989958183943868e-06, "loss": 1.0033, "step": 1368 }, { "epoch": 0.0496176289369722, "grad_norm": 2.729269267998561, "learning_rate": 9.98992097010748e-06, "loss": 1.0186, "step": 1369 }, { "epoch": 0.049653872639628865, "grad_norm": 2.7717176632018172, "learning_rate": 9.989883687512998e-06, "loss": 1.041, "step": 1370 }, { "epoch": 0.049690116342285526, "grad_norm": 2.888415447726949, "learning_rate": 9.989846336160935e-06, "loss": 1.197, "step": 1371 }, { "epoch": 0.04972636004494219, "grad_norm": 2.6349246660880072, "learning_rate": 9.989808916051812e-06, "loss": 1.0523, "step": 1372 }, { "epoch": 0.04976260374759885, "grad_norm": 2.433677913661981, "learning_rate": 9.989771427186135e-06, "loss": 1.0229, "step": 1373 }, { "epoch": 0.04979884745025552, "grad_norm": 2.449702151818341, "learning_rate": 9.989733869564426e-06, "loss": 1.1075, "step": 1374 }, { "epoch": 0.04983509115291218, "grad_norm": 2.3965525514988824, "learning_rate": 9.989696243187204e-06, "loss": 1.0351, "step": 1375 }, { "epoch": 0.04987133485556885, "grad_norm": 14.171557178145935, "learning_rate": 9.989658548054985e-06, "loss": 2.5342, "step": 1376 }, { "epoch": 0.04990757855822551, "grad_norm": 2.720363926217287, "learning_rate": 9.989620784168288e-06, "loss": 1.1153, "step": 1377 }, { "epoch": 0.04994382226088217, "grad_norm": 2.543211880821152, "learning_rate": 9.989582951527635e-06, "loss": 1.0203, "step": 1378 }, { "epoch": 0.049980065963538836, "grad_norm": 2.7453370108798603, "learning_rate": 9.989545050133547e-06, "loss": 1.1143, "step": 1379 }, { "epoch": 0.050016309666195496, "grad_norm": 2.8387178831465305, "learning_rate": 9.989507079986545e-06, "loss": 0.9462, "step": 1380 }, { "epoch": 0.050052553368852164, "grad_norm": 2.8499067817602675, "learning_rate": 9.989469041087153e-06, "loss": 1.3834, "step": 1381 }, { "epoch": 0.050088797071508824, "grad_norm": 2.6066395209671542, "learning_rate": 9.989430933435895e-06, "loss": 0.8782, "step": 1382 }, { "epoch": 0.05012504077416549, "grad_norm": 2.1607304687427153, "learning_rate": 9.989392757033295e-06, "loss": 0.9565, "step": 1383 }, { "epoch": 0.05016128447682215, "grad_norm": 2.5325558738338074, "learning_rate": 9.989354511879881e-06, "loss": 0.8443, "step": 1384 }, { "epoch": 0.05019752817947882, "grad_norm": 2.6436269593190933, "learning_rate": 9.98931619797618e-06, "loss": 1.2178, "step": 1385 }, { "epoch": 0.05023377188213548, "grad_norm": 3.0176849490978803, "learning_rate": 9.98927781532272e-06, "loss": 0.9073, "step": 1386 }, { "epoch": 0.05027001558479214, "grad_norm": 2.566608759516415, "learning_rate": 9.989239363920029e-06, "loss": 1.1554, "step": 1387 }, { "epoch": 0.05030625928744881, "grad_norm": 2.586341179698537, "learning_rate": 9.989200843768636e-06, "loss": 1.084, "step": 1388 }, { "epoch": 0.05034250299010547, "grad_norm": 2.6674574873283525, "learning_rate": 9.989162254869073e-06, "loss": 1.0587, "step": 1389 }, { "epoch": 0.050378746692762134, "grad_norm": 2.5351481487845926, "learning_rate": 9.98912359722187e-06, "loss": 1.0133, "step": 1390 }, { "epoch": 0.050414990395418795, "grad_norm": 2.4080371864002252, "learning_rate": 9.989084870827563e-06, "loss": 1.0916, "step": 1391 }, { "epoch": 0.05045123409807546, "grad_norm": 2.6141602658831933, "learning_rate": 9.989046075686683e-06, "loss": 1.0676, "step": 1392 }, { "epoch": 0.05048747780073212, "grad_norm": 2.646603961293931, "learning_rate": 9.989007211799766e-06, "loss": 1.0651, "step": 1393 }, { "epoch": 0.05052372150338879, "grad_norm": 2.6581086397806173, "learning_rate": 9.988968279167348e-06, "loss": 1.1536, "step": 1394 }, { "epoch": 0.05055996520604545, "grad_norm": 2.654973258437765, "learning_rate": 9.988929277789962e-06, "loss": 1.1115, "step": 1395 }, { "epoch": 0.05059620890870211, "grad_norm": 2.457574331771393, "learning_rate": 9.98889020766815e-06, "loss": 1.1551, "step": 1396 }, { "epoch": 0.05063245261135878, "grad_norm": 2.5390467163031363, "learning_rate": 9.988851068802446e-06, "loss": 1.1335, "step": 1397 }, { "epoch": 0.05066869631401544, "grad_norm": 2.897508115231262, "learning_rate": 9.988811861193393e-06, "loss": 1.114, "step": 1398 }, { "epoch": 0.050704940016672105, "grad_norm": 2.831291003286798, "learning_rate": 9.988772584841529e-06, "loss": 1.3354, "step": 1399 }, { "epoch": 0.050741183719328765, "grad_norm": 2.7537944708868167, "learning_rate": 9.988733239747395e-06, "loss": 1.1084, "step": 1400 }, { "epoch": 0.05077742742198543, "grad_norm": 2.6584311550497084, "learning_rate": 9.988693825911536e-06, "loss": 1.2176, "step": 1401 }, { "epoch": 0.05081367112464209, "grad_norm": 2.551041547158593, "learning_rate": 9.988654343334492e-06, "loss": 1.226, "step": 1402 }, { "epoch": 0.05084991482729876, "grad_norm": 2.483813691759989, "learning_rate": 9.988614792016807e-06, "loss": 1.0752, "step": 1403 }, { "epoch": 0.05088615852995542, "grad_norm": 2.2671649063302355, "learning_rate": 9.98857517195903e-06, "loss": 1.1417, "step": 1404 }, { "epoch": 0.05092240223261208, "grad_norm": 2.3407859062165817, "learning_rate": 9.988535483161701e-06, "loss": 1.0604, "step": 1405 }, { "epoch": 0.05095864593526875, "grad_norm": 2.600886684542775, "learning_rate": 9.988495725625373e-06, "loss": 1.0204, "step": 1406 }, { "epoch": 0.05099488963792541, "grad_norm": 2.815913866943588, "learning_rate": 9.988455899350589e-06, "loss": 1.0229, "step": 1407 }, { "epoch": 0.051031133340582076, "grad_norm": 2.7446658335546577, "learning_rate": 9.9884160043379e-06, "loss": 1.0791, "step": 1408 }, { "epoch": 0.051067377043238736, "grad_norm": 2.9136221407663476, "learning_rate": 9.988376040587856e-06, "loss": 1.1485, "step": 1409 }, { "epoch": 0.0511036207458954, "grad_norm": 2.3826040973485885, "learning_rate": 9.988336008101005e-06, "loss": 1.0009, "step": 1410 }, { "epoch": 0.051139864448552064, "grad_norm": 2.9617462208857357, "learning_rate": 9.988295906877902e-06, "loss": 1.1462, "step": 1411 }, { "epoch": 0.051176108151208724, "grad_norm": 2.6228782240253867, "learning_rate": 9.988255736919099e-06, "loss": 1.121, "step": 1412 }, { "epoch": 0.05121235185386539, "grad_norm": 2.7143739733066705, "learning_rate": 9.988215498225149e-06, "loss": 1.1899, "step": 1413 }, { "epoch": 0.05124859555652205, "grad_norm": 3.0918911777930447, "learning_rate": 9.988175190796604e-06, "loss": 1.1823, "step": 1414 }, { "epoch": 0.05128483925917872, "grad_norm": 2.6156370338077273, "learning_rate": 9.988134814634023e-06, "loss": 1.1032, "step": 1415 }, { "epoch": 0.05132108296183538, "grad_norm": 3.1330658678945724, "learning_rate": 9.988094369737963e-06, "loss": 1.2031, "step": 1416 }, { "epoch": 0.051357326664492046, "grad_norm": 2.349520938538853, "learning_rate": 9.988053856108976e-06, "loss": 1.0417, "step": 1417 }, { "epoch": 0.05139357036714871, "grad_norm": 2.5056427644421846, "learning_rate": 9.988013273747624e-06, "loss": 0.9651, "step": 1418 }, { "epoch": 0.051429814069805374, "grad_norm": 2.405922419552745, "learning_rate": 9.987972622654467e-06, "loss": 1.0149, "step": 1419 }, { "epoch": 0.051466057772462034, "grad_norm": 2.424112679451468, "learning_rate": 9.987931902830063e-06, "loss": 1.0384, "step": 1420 }, { "epoch": 0.051502301475118695, "grad_norm": 2.7161006429934726, "learning_rate": 9.987891114274975e-06, "loss": 1.2439, "step": 1421 }, { "epoch": 0.05153854517777536, "grad_norm": 2.4959524868587066, "learning_rate": 9.987850256989764e-06, "loss": 1.1679, "step": 1422 }, { "epoch": 0.05157478888043202, "grad_norm": 2.459210792828383, "learning_rate": 9.987809330974991e-06, "loss": 0.962, "step": 1423 }, { "epoch": 0.05161103258308869, "grad_norm": 2.7283941129792746, "learning_rate": 9.987768336231223e-06, "loss": 1.1622, "step": 1424 }, { "epoch": 0.05164727628574535, "grad_norm": 2.544149418571773, "learning_rate": 9.987727272759025e-06, "loss": 1.1366, "step": 1425 }, { "epoch": 0.05168351998840202, "grad_norm": 2.3913717098765614, "learning_rate": 9.98768614055896e-06, "loss": 1.1225, "step": 1426 }, { "epoch": 0.05171976369105868, "grad_norm": 2.565372412383444, "learning_rate": 9.987644939631598e-06, "loss": 1.2146, "step": 1427 }, { "epoch": 0.051756007393715345, "grad_norm": 2.4150366413943414, "learning_rate": 9.987603669977504e-06, "loss": 1.0552, "step": 1428 }, { "epoch": 0.051792251096372005, "grad_norm": 2.90875854285962, "learning_rate": 9.98756233159725e-06, "loss": 1.0683, "step": 1429 }, { "epoch": 0.051828494799028665, "grad_norm": 2.575788038409133, "learning_rate": 9.9875209244914e-06, "loss": 1.0977, "step": 1430 }, { "epoch": 0.05186473850168533, "grad_norm": 2.899623776991121, "learning_rate": 9.98747944866053e-06, "loss": 1.1828, "step": 1431 }, { "epoch": 0.05190098220434199, "grad_norm": 2.3449434298559786, "learning_rate": 9.987437904105209e-06, "loss": 1.021, "step": 1432 }, { "epoch": 0.05193722590699866, "grad_norm": 2.4614777700066326, "learning_rate": 9.98739629082601e-06, "loss": 1.0471, "step": 1433 }, { "epoch": 0.05197346960965532, "grad_norm": 2.666032941264557, "learning_rate": 9.987354608823507e-06, "loss": 1.0796, "step": 1434 }, { "epoch": 0.05200971331231199, "grad_norm": 2.687790988290415, "learning_rate": 9.987312858098274e-06, "loss": 1.1598, "step": 1435 }, { "epoch": 0.05204595701496865, "grad_norm": 2.727966542613019, "learning_rate": 9.987271038650885e-06, "loss": 1.1137, "step": 1436 }, { "epoch": 0.052082200717625315, "grad_norm": 2.702752054375389, "learning_rate": 9.987229150481918e-06, "loss": 1.0255, "step": 1437 }, { "epoch": 0.052118444420281976, "grad_norm": 2.6707694707535263, "learning_rate": 9.98718719359195e-06, "loss": 0.9832, "step": 1438 }, { "epoch": 0.052154688122938636, "grad_norm": 2.659026444374703, "learning_rate": 9.987145167981558e-06, "loss": 1.0698, "step": 1439 }, { "epoch": 0.0521909318255953, "grad_norm": 2.3458763687168553, "learning_rate": 9.98710307365132e-06, "loss": 1.084, "step": 1440 }, { "epoch": 0.052227175528251964, "grad_norm": 2.9064198801675665, "learning_rate": 9.98706091060182e-06, "loss": 1.2057, "step": 1441 }, { "epoch": 0.05226341923090863, "grad_norm": 2.0909036483123473, "learning_rate": 9.987018678833636e-06, "loss": 0.6758, "step": 1442 }, { "epoch": 0.05229966293356529, "grad_norm": 2.563173921020278, "learning_rate": 9.986976378347351e-06, "loss": 1.0411, "step": 1443 }, { "epoch": 0.05233590663622196, "grad_norm": 2.4446604247770463, "learning_rate": 9.986934009143547e-06, "loss": 0.9356, "step": 1444 }, { "epoch": 0.05237215033887862, "grad_norm": 2.548816085648816, "learning_rate": 9.986891571222807e-06, "loss": 1.0778, "step": 1445 }, { "epoch": 0.052408394041535286, "grad_norm": 2.4650150279907264, "learning_rate": 9.98684906458572e-06, "loss": 0.9848, "step": 1446 }, { "epoch": 0.052444637744191946, "grad_norm": 2.820576280244608, "learning_rate": 9.986806489232868e-06, "loss": 1.0617, "step": 1447 }, { "epoch": 0.05248088144684861, "grad_norm": 2.4826603296319694, "learning_rate": 9.986763845164838e-06, "loss": 1.1566, "step": 1448 }, { "epoch": 0.052517125149505274, "grad_norm": 2.595559989156023, "learning_rate": 9.986721132382218e-06, "loss": 0.958, "step": 1449 }, { "epoch": 0.052553368852161934, "grad_norm": 2.67157063719841, "learning_rate": 9.986678350885598e-06, "loss": 1.0488, "step": 1450 }, { "epoch": 0.0525896125548186, "grad_norm": 2.8019061829799643, "learning_rate": 9.986635500675564e-06, "loss": 1.0538, "step": 1451 }, { "epoch": 0.05262585625747526, "grad_norm": 2.5523009381554895, "learning_rate": 9.98659258175271e-06, "loss": 1.1285, "step": 1452 }, { "epoch": 0.05266209996013193, "grad_norm": 3.1630480187695715, "learning_rate": 9.986549594117624e-06, "loss": 1.2749, "step": 1453 }, { "epoch": 0.05269834366278859, "grad_norm": 2.5739147503675435, "learning_rate": 9.986506537770903e-06, "loss": 1.0903, "step": 1454 }, { "epoch": 0.05273458736544526, "grad_norm": 2.613724447144511, "learning_rate": 9.986463412713136e-06, "loss": 0.9284, "step": 1455 }, { "epoch": 0.05277083106810192, "grad_norm": 2.612053934214342, "learning_rate": 9.98642021894492e-06, "loss": 1.0712, "step": 1456 }, { "epoch": 0.05280707477075858, "grad_norm": 2.773945411788774, "learning_rate": 9.986376956466849e-06, "loss": 1.0077, "step": 1457 }, { "epoch": 0.052843318473415245, "grad_norm": 2.6017605098568732, "learning_rate": 9.986333625279518e-06, "loss": 1.0082, "step": 1458 }, { "epoch": 0.052879562176071905, "grad_norm": 3.205280766526254, "learning_rate": 9.986290225383526e-06, "loss": 1.2941, "step": 1459 }, { "epoch": 0.05291580587872857, "grad_norm": 2.3278886108516295, "learning_rate": 9.986246756779471e-06, "loss": 0.9146, "step": 1460 }, { "epoch": 0.05295204958138523, "grad_norm": 2.919915602406789, "learning_rate": 9.98620321946795e-06, "loss": 0.916, "step": 1461 }, { "epoch": 0.0529882932840419, "grad_norm": 2.727483344536315, "learning_rate": 9.986159613449565e-06, "loss": 1.1483, "step": 1462 }, { "epoch": 0.05302453698669856, "grad_norm": 2.8307055572117297, "learning_rate": 9.986115938724916e-06, "loss": 0.9676, "step": 1463 }, { "epoch": 0.05306078068935523, "grad_norm": 2.662574396948436, "learning_rate": 9.986072195294606e-06, "loss": 1.0372, "step": 1464 }, { "epoch": 0.05309702439201189, "grad_norm": 2.5962633650723648, "learning_rate": 9.986028383159234e-06, "loss": 1.2074, "step": 1465 }, { "epoch": 0.05313326809466855, "grad_norm": 2.690586904217278, "learning_rate": 9.985984502319408e-06, "loss": 0.9998, "step": 1466 }, { "epoch": 0.053169511797325215, "grad_norm": 2.5178217648563495, "learning_rate": 9.98594055277573e-06, "loss": 1.1037, "step": 1467 }, { "epoch": 0.053205755499981876, "grad_norm": 2.638847650013361, "learning_rate": 9.985896534528808e-06, "loss": 1.2227, "step": 1468 }, { "epoch": 0.05324199920263854, "grad_norm": 14.95833021008938, "learning_rate": 9.985852447579247e-06, "loss": 2.5234, "step": 1469 }, { "epoch": 0.0532782429052952, "grad_norm": 2.645807281921469, "learning_rate": 9.985808291927654e-06, "loss": 0.9965, "step": 1470 }, { "epoch": 0.05331448660795187, "grad_norm": 2.5262984930610863, "learning_rate": 9.985764067574637e-06, "loss": 0.9083, "step": 1471 }, { "epoch": 0.05335073031060853, "grad_norm": 2.507138618962707, "learning_rate": 9.985719774520808e-06, "loss": 0.9798, "step": 1472 }, { "epoch": 0.0533869740132652, "grad_norm": 2.5956233288911137, "learning_rate": 9.985675412766775e-06, "loss": 1.0587, "step": 1473 }, { "epoch": 0.05342321771592186, "grad_norm": 2.6139744453778158, "learning_rate": 9.985630982313152e-06, "loss": 1.0072, "step": 1474 }, { "epoch": 0.05345946141857852, "grad_norm": 2.5785259905724156, "learning_rate": 9.985586483160547e-06, "loss": 1.1666, "step": 1475 }, { "epoch": 0.053495705121235186, "grad_norm": 2.638039543751077, "learning_rate": 9.985541915309576e-06, "loss": 1.1098, "step": 1476 }, { "epoch": 0.053531948823891846, "grad_norm": 2.9352449182794746, "learning_rate": 9.985497278760853e-06, "loss": 1.1602, "step": 1477 }, { "epoch": 0.053568192526548514, "grad_norm": 2.8787474701385247, "learning_rate": 9.985452573514992e-06, "loss": 0.9784, "step": 1478 }, { "epoch": 0.053604436229205174, "grad_norm": 2.3647168626826054, "learning_rate": 9.98540779957261e-06, "loss": 1.0658, "step": 1479 }, { "epoch": 0.05364067993186184, "grad_norm": 2.2386568003891973, "learning_rate": 9.985362956934324e-06, "loss": 1.133, "step": 1480 }, { "epoch": 0.0536769236345185, "grad_norm": 2.5594992316240797, "learning_rate": 9.98531804560075e-06, "loss": 1.0707, "step": 1481 }, { "epoch": 0.05371316733717517, "grad_norm": 3.096425673514145, "learning_rate": 9.985273065572509e-06, "loss": 1.1464, "step": 1482 }, { "epoch": 0.05374941103983183, "grad_norm": 2.745779957513894, "learning_rate": 9.985228016850222e-06, "loss": 1.116, "step": 1483 }, { "epoch": 0.05378565474248849, "grad_norm": 2.7891421234773106, "learning_rate": 9.985182899434506e-06, "loss": 1.1969, "step": 1484 }, { "epoch": 0.05382189844514516, "grad_norm": 2.4354035941069205, "learning_rate": 9.985137713325986e-06, "loss": 1.0213, "step": 1485 }, { "epoch": 0.05385814214780182, "grad_norm": 2.676261997957956, "learning_rate": 9.98509245852528e-06, "loss": 1.0083, "step": 1486 }, { "epoch": 0.053894385850458484, "grad_norm": 2.831735419846914, "learning_rate": 9.985047135033019e-06, "loss": 0.9275, "step": 1487 }, { "epoch": 0.053930629553115145, "grad_norm": 2.83648279615506, "learning_rate": 9.98500174284982e-06, "loss": 1.1648, "step": 1488 }, { "epoch": 0.05396687325577181, "grad_norm": 2.8994377172026318, "learning_rate": 9.984956281976314e-06, "loss": 1.1951, "step": 1489 }, { "epoch": 0.05400311695842847, "grad_norm": 2.619345863580884, "learning_rate": 9.984910752413125e-06, "loss": 1.1825, "step": 1490 }, { "epoch": 0.05403936066108514, "grad_norm": 2.638789949135167, "learning_rate": 9.98486515416088e-06, "loss": 1.0027, "step": 1491 }, { "epoch": 0.0540756043637418, "grad_norm": 2.526712724130426, "learning_rate": 9.984819487220207e-06, "loss": 1.1318, "step": 1492 }, { "epoch": 0.05411184806639846, "grad_norm": 2.4837693902481783, "learning_rate": 9.984773751591739e-06, "loss": 1.0715, "step": 1493 }, { "epoch": 0.05414809176905513, "grad_norm": 2.5031993331279736, "learning_rate": 9.9847279472761e-06, "loss": 0.9671, "step": 1494 }, { "epoch": 0.05418433547171179, "grad_norm": 2.650756617698735, "learning_rate": 9.984682074273927e-06, "loss": 1.0443, "step": 1495 }, { "epoch": 0.054220579174368455, "grad_norm": 2.794028427048769, "learning_rate": 9.984636132585848e-06, "loss": 0.9705, "step": 1496 }, { "epoch": 0.054256822877025115, "grad_norm": 2.4510587234907297, "learning_rate": 9.984590122212499e-06, "loss": 1.1183, "step": 1497 }, { "epoch": 0.05429306657968178, "grad_norm": 2.5518348965625473, "learning_rate": 9.984544043154512e-06, "loss": 1.2859, "step": 1498 }, { "epoch": 0.05432931028233844, "grad_norm": 2.4374627162955798, "learning_rate": 9.984497895412524e-06, "loss": 1.1534, "step": 1499 }, { "epoch": 0.05436555398499511, "grad_norm": 2.8296609270374566, "learning_rate": 9.984451678987169e-06, "loss": 1.0563, "step": 1500 }, { "epoch": 0.05440179768765177, "grad_norm": 2.712122296434935, "learning_rate": 9.984405393879081e-06, "loss": 1.0176, "step": 1501 }, { "epoch": 0.05443804139030843, "grad_norm": 2.386605137475672, "learning_rate": 9.984359040088906e-06, "loss": 1.033, "step": 1502 }, { "epoch": 0.0544742850929651, "grad_norm": 2.6070224629350514, "learning_rate": 9.984312617617275e-06, "loss": 0.9999, "step": 1503 }, { "epoch": 0.05451052879562176, "grad_norm": 2.4476139829848327, "learning_rate": 9.984266126464832e-06, "loss": 1.0787, "step": 1504 }, { "epoch": 0.054546772498278426, "grad_norm": 2.7876465105137287, "learning_rate": 9.984219566632214e-06, "loss": 0.9318, "step": 1505 }, { "epoch": 0.054583016200935086, "grad_norm": 2.5195776872793787, "learning_rate": 9.984172938120067e-06, "loss": 0.9566, "step": 1506 }, { "epoch": 0.05461925990359175, "grad_norm": 2.69466758982609, "learning_rate": 9.984126240929031e-06, "loss": 0.9709, "step": 1507 }, { "epoch": 0.054655503606248414, "grad_norm": 2.8029296689735044, "learning_rate": 9.984079475059749e-06, "loss": 1.1041, "step": 1508 }, { "epoch": 0.05469174730890508, "grad_norm": 2.5081715134335236, "learning_rate": 9.984032640512866e-06, "loss": 1.0223, "step": 1509 }, { "epoch": 0.05472799101156174, "grad_norm": 2.4802973232671, "learning_rate": 9.983985737289028e-06, "loss": 1.1134, "step": 1510 }, { "epoch": 0.0547642347142184, "grad_norm": 2.6531881445642056, "learning_rate": 9.983938765388881e-06, "loss": 1.0902, "step": 1511 }, { "epoch": 0.05480047841687507, "grad_norm": 2.3481902057831743, "learning_rate": 9.98389172481307e-06, "loss": 1.1516, "step": 1512 }, { "epoch": 0.05483672211953173, "grad_norm": 2.605751282654986, "learning_rate": 9.983844615562248e-06, "loss": 0.9346, "step": 1513 }, { "epoch": 0.054872965822188396, "grad_norm": 2.5362218174893285, "learning_rate": 9.98379743763706e-06, "loss": 1.0076, "step": 1514 }, { "epoch": 0.05490920952484506, "grad_norm": 2.812164174432898, "learning_rate": 9.983750191038157e-06, "loss": 1.1418, "step": 1515 }, { "epoch": 0.054945453227501724, "grad_norm": 3.003790978748366, "learning_rate": 9.98370287576619e-06, "loss": 1.0425, "step": 1516 }, { "epoch": 0.054981696930158384, "grad_norm": 2.6541363225586587, "learning_rate": 9.983655491821813e-06, "loss": 1.0503, "step": 1517 }, { "epoch": 0.05501794063281505, "grad_norm": 2.5408165143649915, "learning_rate": 9.983608039205676e-06, "loss": 0.9691, "step": 1518 }, { "epoch": 0.05505418433547171, "grad_norm": 2.524296905767443, "learning_rate": 9.983560517918436e-06, "loss": 1.1199, "step": 1519 }, { "epoch": 0.05509042803812837, "grad_norm": 2.6310585572361167, "learning_rate": 9.983512927960745e-06, "loss": 1.1917, "step": 1520 }, { "epoch": 0.05512667174078504, "grad_norm": 2.799770554573646, "learning_rate": 9.98346526933326e-06, "loss": 0.9951, "step": 1521 }, { "epoch": 0.0551629154434417, "grad_norm": 2.5603416245376196, "learning_rate": 9.983417542036637e-06, "loss": 1.0267, "step": 1522 }, { "epoch": 0.05519915914609837, "grad_norm": 2.6879538744739726, "learning_rate": 9.983369746071535e-06, "loss": 1.008, "step": 1523 }, { "epoch": 0.05523540284875503, "grad_norm": 2.6009614526641593, "learning_rate": 9.98332188143861e-06, "loss": 0.9928, "step": 1524 }, { "epoch": 0.055271646551411695, "grad_norm": 2.7913654077140997, "learning_rate": 9.983273948138523e-06, "loss": 0.9767, "step": 1525 }, { "epoch": 0.055307890254068355, "grad_norm": 2.7374209240615457, "learning_rate": 9.983225946171938e-06, "loss": 1.1231, "step": 1526 }, { "epoch": 0.05534413395672502, "grad_norm": 2.7363414255206693, "learning_rate": 9.98317787553951e-06, "loss": 1.0994, "step": 1527 }, { "epoch": 0.05538037765938168, "grad_norm": 2.4413261642125823, "learning_rate": 9.983129736241905e-06, "loss": 0.9226, "step": 1528 }, { "epoch": 0.05541662136203834, "grad_norm": 2.610793509297645, "learning_rate": 9.983081528279788e-06, "loss": 1.1686, "step": 1529 }, { "epoch": 0.05545286506469501, "grad_norm": 2.810659161237857, "learning_rate": 9.98303325165382e-06, "loss": 1.1174, "step": 1530 }, { "epoch": 0.05548910876735167, "grad_norm": 2.748006130286893, "learning_rate": 9.982984906364668e-06, "loss": 1.1954, "step": 1531 }, { "epoch": 0.05552535247000834, "grad_norm": 2.471255963235399, "learning_rate": 9.982936492412998e-06, "loss": 1.0128, "step": 1532 }, { "epoch": 0.055561596172665, "grad_norm": 2.851770824260387, "learning_rate": 9.982888009799474e-06, "loss": 1.1041, "step": 1533 }, { "epoch": 0.055597839875321665, "grad_norm": 2.5420907898886917, "learning_rate": 9.98283945852477e-06, "loss": 0.9898, "step": 1534 }, { "epoch": 0.055634083577978326, "grad_norm": 2.3876649752972936, "learning_rate": 9.98279083858955e-06, "loss": 0.9024, "step": 1535 }, { "epoch": 0.05567032728063499, "grad_norm": 2.89413398495457, "learning_rate": 9.982742149994487e-06, "loss": 1.0323, "step": 1536 }, { "epoch": 0.05570657098329165, "grad_norm": 2.698682482999258, "learning_rate": 9.982693392740247e-06, "loss": 0.8974, "step": 1537 }, { "epoch": 0.055742814685948314, "grad_norm": 2.3473762821528057, "learning_rate": 9.98264456682751e-06, "loss": 1.0609, "step": 1538 }, { "epoch": 0.05577905838860498, "grad_norm": 2.816886324357231, "learning_rate": 9.982595672256943e-06, "loss": 1.066, "step": 1539 }, { "epoch": 0.05581530209126164, "grad_norm": 2.674026578815591, "learning_rate": 9.982546709029219e-06, "loss": 1.0488, "step": 1540 }, { "epoch": 0.05585154579391831, "grad_norm": 2.3771476782372867, "learning_rate": 9.982497677145018e-06, "loss": 1.1283, "step": 1541 }, { "epoch": 0.05588778949657497, "grad_norm": 2.6656857872176163, "learning_rate": 9.98244857660501e-06, "loss": 1.1732, "step": 1542 }, { "epoch": 0.055924033199231636, "grad_norm": 2.5355717551488954, "learning_rate": 9.982399407409875e-06, "loss": 0.9216, "step": 1543 }, { "epoch": 0.055960276901888296, "grad_norm": 2.4645037192248553, "learning_rate": 9.982350169560288e-06, "loss": 1.1699, "step": 1544 }, { "epoch": 0.055996520604544964, "grad_norm": 2.7875279668894573, "learning_rate": 9.982300863056931e-06, "loss": 0.8947, "step": 1545 }, { "epoch": 0.056032764307201624, "grad_norm": 2.622295146362543, "learning_rate": 9.982251487900478e-06, "loss": 1.1003, "step": 1546 }, { "epoch": 0.056069008009858284, "grad_norm": 2.6275825742717562, "learning_rate": 9.982202044091615e-06, "loss": 0.9392, "step": 1547 }, { "epoch": 0.05610525171251495, "grad_norm": 2.6817328225991792, "learning_rate": 9.982152531631022e-06, "loss": 0.9475, "step": 1548 }, { "epoch": 0.05614149541517161, "grad_norm": 2.460067749682772, "learning_rate": 9.982102950519378e-06, "loss": 1.1753, "step": 1549 }, { "epoch": 0.05617773911782828, "grad_norm": 2.446721080317369, "learning_rate": 9.982053300757371e-06, "loss": 1.0868, "step": 1550 }, { "epoch": 0.05621398282048494, "grad_norm": 2.5652862475876526, "learning_rate": 9.98200358234568e-06, "loss": 1.1093, "step": 1551 }, { "epoch": 0.05625022652314161, "grad_norm": 2.6103045051641716, "learning_rate": 9.981953795284992e-06, "loss": 1.1549, "step": 1552 }, { "epoch": 0.05628647022579827, "grad_norm": 2.1209303416805483, "learning_rate": 9.981903939575996e-06, "loss": 0.9622, "step": 1553 }, { "epoch": 0.056322713928454934, "grad_norm": 2.4251293404106518, "learning_rate": 9.981854015219377e-06, "loss": 0.9699, "step": 1554 }, { "epoch": 0.056358957631111595, "grad_norm": 2.6985053472521074, "learning_rate": 9.981804022215822e-06, "loss": 1.1156, "step": 1555 }, { "epoch": 0.056395201333768255, "grad_norm": 2.688332116475547, "learning_rate": 9.98175396056602e-06, "loss": 1.1355, "step": 1556 }, { "epoch": 0.05643144503642492, "grad_norm": 2.447035890483611, "learning_rate": 9.981703830270661e-06, "loss": 1.1437, "step": 1557 }, { "epoch": 0.05646768873908158, "grad_norm": 2.7358110945990335, "learning_rate": 9.981653631330437e-06, "loss": 1.2032, "step": 1558 }, { "epoch": 0.05650393244173825, "grad_norm": 2.622206100257475, "learning_rate": 9.981603363746037e-06, "loss": 1.4045, "step": 1559 }, { "epoch": 0.05654017614439491, "grad_norm": 2.874484768918662, "learning_rate": 9.981553027518157e-06, "loss": 1.1003, "step": 1560 }, { "epoch": 0.05657641984705158, "grad_norm": 2.253477780871665, "learning_rate": 9.981502622647487e-06, "loss": 1.0547, "step": 1561 }, { "epoch": 0.05661266354970824, "grad_norm": 2.5021662567963943, "learning_rate": 9.981452149134727e-06, "loss": 0.985, "step": 1562 }, { "epoch": 0.0566489072523649, "grad_norm": 2.5776491635639944, "learning_rate": 9.981401606980568e-06, "loss": 1.0705, "step": 1563 }, { "epoch": 0.056685150955021565, "grad_norm": 2.6268266252973427, "learning_rate": 9.981350996185706e-06, "loss": 1.0986, "step": 1564 }, { "epoch": 0.056721394657678226, "grad_norm": 2.569341280142752, "learning_rate": 9.98130031675084e-06, "loss": 1.0785, "step": 1565 }, { "epoch": 0.05675763836033489, "grad_norm": 2.430870868808274, "learning_rate": 9.98124956867667e-06, "loss": 1.0717, "step": 1566 }, { "epoch": 0.05679388206299155, "grad_norm": 2.4789405018459463, "learning_rate": 9.981198751963892e-06, "loss": 0.9463, "step": 1567 }, { "epoch": 0.05683012576564822, "grad_norm": 2.576403806148025, "learning_rate": 9.981147866613208e-06, "loss": 1.0838, "step": 1568 }, { "epoch": 0.05686636946830488, "grad_norm": 2.65013951929981, "learning_rate": 9.98109691262532e-06, "loss": 0.8816, "step": 1569 }, { "epoch": 0.05690261317096155, "grad_norm": 2.5986793683974727, "learning_rate": 9.981045890000926e-06, "loss": 0.9098, "step": 1570 }, { "epoch": 0.05693885687361821, "grad_norm": 2.502241352348081, "learning_rate": 9.980994798740734e-06, "loss": 0.9433, "step": 1571 }, { "epoch": 0.05697510057627487, "grad_norm": 2.5952747954514725, "learning_rate": 9.980943638845448e-06, "loss": 0.9613, "step": 1572 }, { "epoch": 0.057011344278931536, "grad_norm": 3.034784836239219, "learning_rate": 9.980892410315768e-06, "loss": 1.0186, "step": 1573 }, { "epoch": 0.057047587981588196, "grad_norm": 2.600024295964325, "learning_rate": 9.980841113152404e-06, "loss": 1.1878, "step": 1574 }, { "epoch": 0.057083831684244864, "grad_norm": 2.547923082804006, "learning_rate": 9.980789747356063e-06, "loss": 1.1807, "step": 1575 }, { "epoch": 0.057120075386901524, "grad_norm": 2.449212671855421, "learning_rate": 9.980738312927449e-06, "loss": 1.0813, "step": 1576 }, { "epoch": 0.05715631908955819, "grad_norm": 2.7496824736674284, "learning_rate": 9.980686809867275e-06, "loss": 1.0861, "step": 1577 }, { "epoch": 0.05719256279221485, "grad_norm": 2.778086811507468, "learning_rate": 9.980635238176249e-06, "loss": 1.1088, "step": 1578 }, { "epoch": 0.05722880649487152, "grad_norm": 2.853340026878183, "learning_rate": 9.980583597855083e-06, "loss": 1.1066, "step": 1579 }, { "epoch": 0.05726505019752818, "grad_norm": 3.1569063460353566, "learning_rate": 9.980531888904485e-06, "loss": 0.8719, "step": 1580 }, { "epoch": 0.05730129390018484, "grad_norm": 2.7250851045473583, "learning_rate": 9.98048011132517e-06, "loss": 1.0253, "step": 1581 }, { "epoch": 0.05733753760284151, "grad_norm": 2.9184323404099715, "learning_rate": 9.980428265117852e-06, "loss": 1.1351, "step": 1582 }, { "epoch": 0.05737378130549817, "grad_norm": 2.5899732680596825, "learning_rate": 9.980376350283243e-06, "loss": 1.2011, "step": 1583 }, { "epoch": 0.057410025008154834, "grad_norm": 2.7739288450980246, "learning_rate": 9.98032436682206e-06, "loss": 1.0959, "step": 1584 }, { "epoch": 0.057446268710811495, "grad_norm": 2.4626932722915273, "learning_rate": 9.98027231473502e-06, "loss": 0.9204, "step": 1585 }, { "epoch": 0.05748251241346816, "grad_norm": 2.208922881952972, "learning_rate": 9.98022019402284e-06, "loss": 1.1353, "step": 1586 }, { "epoch": 0.05751875611612482, "grad_norm": 2.5179301897344244, "learning_rate": 9.980168004686237e-06, "loss": 1.0947, "step": 1587 }, { "epoch": 0.05755499981878149, "grad_norm": 2.5303396538055645, "learning_rate": 9.980115746725932e-06, "loss": 0.9884, "step": 1588 }, { "epoch": 0.05759124352143815, "grad_norm": 2.5501804133619927, "learning_rate": 9.980063420142643e-06, "loss": 1.1358, "step": 1589 }, { "epoch": 0.05762748722409481, "grad_norm": 2.4402939787053324, "learning_rate": 9.980011024937093e-06, "loss": 0.9597, "step": 1590 }, { "epoch": 0.05766373092675148, "grad_norm": 2.5072550600180437, "learning_rate": 9.979958561110001e-06, "loss": 1.0017, "step": 1591 }, { "epoch": 0.05769997462940814, "grad_norm": 2.936625211995625, "learning_rate": 9.979906028662093e-06, "loss": 1.0972, "step": 1592 }, { "epoch": 0.057736218332064805, "grad_norm": 2.4841070243690186, "learning_rate": 9.97985342759409e-06, "loss": 1.0646, "step": 1593 }, { "epoch": 0.057772462034721465, "grad_norm": 2.6863093400550917, "learning_rate": 9.979800757906721e-06, "loss": 1.0093, "step": 1594 }, { "epoch": 0.05780870573737813, "grad_norm": 2.2753996863289245, "learning_rate": 9.979748019600708e-06, "loss": 0.9886, "step": 1595 }, { "epoch": 0.05784494944003479, "grad_norm": 2.435655375425583, "learning_rate": 9.979695212676778e-06, "loss": 0.9845, "step": 1596 }, { "epoch": 0.05788119314269146, "grad_norm": 2.7319671053828785, "learning_rate": 9.979642337135662e-06, "loss": 0.9637, "step": 1597 }, { "epoch": 0.05791743684534812, "grad_norm": 2.594416240843376, "learning_rate": 9.979589392978085e-06, "loss": 1.1175, "step": 1598 }, { "epoch": 0.05795368054800478, "grad_norm": 2.656117315733213, "learning_rate": 9.979536380204777e-06, "loss": 1.1367, "step": 1599 }, { "epoch": 0.05798992425066145, "grad_norm": 2.583486774179798, "learning_rate": 9.97948329881647e-06, "loss": 1.0928, "step": 1600 }, { "epoch": 0.05802616795331811, "grad_norm": 2.6499556772523856, "learning_rate": 9.979430148813894e-06, "loss": 1.1638, "step": 1601 }, { "epoch": 0.058062411655974776, "grad_norm": 2.8440649467800507, "learning_rate": 9.979376930197782e-06, "loss": 1.117, "step": 1602 }, { "epoch": 0.058098655358631436, "grad_norm": 2.502857888739055, "learning_rate": 9.979323642968867e-06, "loss": 0.9208, "step": 1603 }, { "epoch": 0.0581348990612881, "grad_norm": 2.649773875845892, "learning_rate": 9.979270287127885e-06, "loss": 0.7928, "step": 1604 }, { "epoch": 0.058171142763944764, "grad_norm": 2.7363408975990082, "learning_rate": 9.979216862675569e-06, "loss": 0.9924, "step": 1605 }, { "epoch": 0.05820738646660143, "grad_norm": 2.4785980126797877, "learning_rate": 9.979163369612657e-06, "loss": 0.9236, "step": 1606 }, { "epoch": 0.05824363016925809, "grad_norm": 2.659433348412232, "learning_rate": 9.979109807939883e-06, "loss": 0.9585, "step": 1607 }, { "epoch": 0.05827987387191475, "grad_norm": 2.6128295222689317, "learning_rate": 9.979056177657988e-06, "loss": 1.0304, "step": 1608 }, { "epoch": 0.05831611757457142, "grad_norm": 2.793166339231601, "learning_rate": 9.97900247876771e-06, "loss": 1.1085, "step": 1609 }, { "epoch": 0.05835236127722808, "grad_norm": 2.4566381769904657, "learning_rate": 9.97894871126979e-06, "loss": 1.1712, "step": 1610 }, { "epoch": 0.058388604979884746, "grad_norm": 2.713030245102754, "learning_rate": 9.978894875164969e-06, "loss": 1.0858, "step": 1611 }, { "epoch": 0.05842484868254141, "grad_norm": 2.9006313087431876, "learning_rate": 9.978840970453985e-06, "loss": 1.0496, "step": 1612 }, { "epoch": 0.058461092385198074, "grad_norm": 2.4446946245550985, "learning_rate": 9.978786997137584e-06, "loss": 1.0438, "step": 1613 }, { "epoch": 0.058497336087854734, "grad_norm": 2.435269796905278, "learning_rate": 9.978732955216511e-06, "loss": 1.1891, "step": 1614 }, { "epoch": 0.0585335797905114, "grad_norm": 2.6462193040960087, "learning_rate": 9.978678844691507e-06, "loss": 1.1707, "step": 1615 }, { "epoch": 0.05856982349316806, "grad_norm": 3.034320492735949, "learning_rate": 9.97862466556332e-06, "loss": 1.0851, "step": 1616 }, { "epoch": 0.05860606719582472, "grad_norm": 2.7709232899041485, "learning_rate": 9.978570417832697e-06, "loss": 1.085, "step": 1617 }, { "epoch": 0.05864231089848139, "grad_norm": 2.463281999967861, "learning_rate": 9.978516101500383e-06, "loss": 1.0758, "step": 1618 }, { "epoch": 0.05867855460113805, "grad_norm": 2.7632525391276186, "learning_rate": 9.97846171656713e-06, "loss": 1.0714, "step": 1619 }, { "epoch": 0.05871479830379472, "grad_norm": 2.7157043687621396, "learning_rate": 9.978407263033685e-06, "loss": 1.1004, "step": 1620 }, { "epoch": 0.05875104200645138, "grad_norm": 2.561824502732892, "learning_rate": 9.978352740900798e-06, "loss": 1.1982, "step": 1621 }, { "epoch": 0.058787285709108045, "grad_norm": 2.45972996776943, "learning_rate": 9.978298150169221e-06, "loss": 1.0636, "step": 1622 }, { "epoch": 0.058823529411764705, "grad_norm": 2.4481484959716284, "learning_rate": 9.978243490839706e-06, "loss": 1.0831, "step": 1623 }, { "epoch": 0.05885977311442137, "grad_norm": 2.772268088797441, "learning_rate": 9.978188762913007e-06, "loss": 1.0479, "step": 1624 }, { "epoch": 0.05889601681707803, "grad_norm": 2.5418088526096616, "learning_rate": 9.978133966389878e-06, "loss": 1.1745, "step": 1625 }, { "epoch": 0.05893226051973469, "grad_norm": 2.5269351078560423, "learning_rate": 9.978079101271072e-06, "loss": 1.1529, "step": 1626 }, { "epoch": 0.05896850422239136, "grad_norm": 2.669739797448325, "learning_rate": 9.978024167557348e-06, "loss": 0.9705, "step": 1627 }, { "epoch": 0.05900474792504802, "grad_norm": 2.5118943100638136, "learning_rate": 9.977969165249462e-06, "loss": 0.9292, "step": 1628 }, { "epoch": 0.05904099162770469, "grad_norm": 2.482198002847068, "learning_rate": 9.97791409434817e-06, "loss": 1.1278, "step": 1629 }, { "epoch": 0.05907723533036135, "grad_norm": 2.9427845711646556, "learning_rate": 9.977858954854234e-06, "loss": 1.0625, "step": 1630 }, { "epoch": 0.059113479033018015, "grad_norm": 2.842643367447043, "learning_rate": 9.977803746768412e-06, "loss": 1.0367, "step": 1631 }, { "epoch": 0.059149722735674676, "grad_norm": 2.2978506133535697, "learning_rate": 9.977748470091464e-06, "loss": 0.918, "step": 1632 }, { "epoch": 0.05918596643833134, "grad_norm": 2.6459594886550444, "learning_rate": 9.977693124824152e-06, "loss": 1.1079, "step": 1633 }, { "epoch": 0.059222210140988, "grad_norm": 2.5926060774128468, "learning_rate": 9.97763771096724e-06, "loss": 1.1491, "step": 1634 }, { "epoch": 0.059258453843644664, "grad_norm": 2.4510417142834413, "learning_rate": 9.97758222852149e-06, "loss": 1.1638, "step": 1635 }, { "epoch": 0.05929469754630133, "grad_norm": 2.5185344010184565, "learning_rate": 9.977526677487668e-06, "loss": 0.9959, "step": 1636 }, { "epoch": 0.05933094124895799, "grad_norm": 2.4840390732745106, "learning_rate": 9.97747105786654e-06, "loss": 0.9634, "step": 1637 }, { "epoch": 0.05936718495161466, "grad_norm": 2.3665805909207873, "learning_rate": 9.977415369658868e-06, "loss": 1.0603, "step": 1638 }, { "epoch": 0.05940342865427132, "grad_norm": 2.584123506058938, "learning_rate": 9.977359612865424e-06, "loss": 1.0759, "step": 1639 }, { "epoch": 0.059439672356927986, "grad_norm": 2.5054716977172697, "learning_rate": 9.977303787486973e-06, "loss": 0.9526, "step": 1640 }, { "epoch": 0.059475916059584646, "grad_norm": 2.8876299231252163, "learning_rate": 9.977247893524289e-06, "loss": 1.0856, "step": 1641 }, { "epoch": 0.059512159762241314, "grad_norm": 2.229657587892253, "learning_rate": 9.977191930978138e-06, "loss": 0.9385, "step": 1642 }, { "epoch": 0.059548403464897974, "grad_norm": 2.4399583160151166, "learning_rate": 9.977135899849292e-06, "loss": 0.9467, "step": 1643 }, { "epoch": 0.059584647167554634, "grad_norm": 2.6187588354696243, "learning_rate": 9.977079800138524e-06, "loss": 0.9187, "step": 1644 }, { "epoch": 0.0596208908702113, "grad_norm": 2.7508549670629026, "learning_rate": 9.977023631846605e-06, "loss": 1.1157, "step": 1645 }, { "epoch": 0.05965713457286796, "grad_norm": 2.839860776069995, "learning_rate": 9.976967394974314e-06, "loss": 1.0484, "step": 1646 }, { "epoch": 0.05969337827552463, "grad_norm": 2.7310681728461725, "learning_rate": 9.976911089522419e-06, "loss": 1.0349, "step": 1647 }, { "epoch": 0.05972962197818129, "grad_norm": 2.7486818582584904, "learning_rate": 9.976854715491699e-06, "loss": 0.9549, "step": 1648 }, { "epoch": 0.05976586568083796, "grad_norm": 2.5072505420116866, "learning_rate": 9.976798272882931e-06, "loss": 1.0844, "step": 1649 }, { "epoch": 0.05980210938349462, "grad_norm": 2.784492210952114, "learning_rate": 9.976741761696893e-06, "loss": 1.0518, "step": 1650 }, { "epoch": 0.059838353086151284, "grad_norm": 2.7226002239010096, "learning_rate": 9.976685181934365e-06, "loss": 1.1787, "step": 1651 }, { "epoch": 0.059874596788807945, "grad_norm": 2.185012067166427, "learning_rate": 9.976628533596124e-06, "loss": 1.059, "step": 1652 }, { "epoch": 0.059910840491464605, "grad_norm": 2.339942432961851, "learning_rate": 9.97657181668295e-06, "loss": 1.1807, "step": 1653 }, { "epoch": 0.05994708419412127, "grad_norm": 2.854228894659676, "learning_rate": 9.976515031195628e-06, "loss": 0.9386, "step": 1654 }, { "epoch": 0.05998332789677793, "grad_norm": 2.7231477599907974, "learning_rate": 9.976458177134939e-06, "loss": 0.926, "step": 1655 }, { "epoch": 0.0600195715994346, "grad_norm": 2.4106427871417675, "learning_rate": 9.976401254501665e-06, "loss": 0.9242, "step": 1656 }, { "epoch": 0.06005581530209126, "grad_norm": 2.4697309281481568, "learning_rate": 9.976344263296588e-06, "loss": 1.1747, "step": 1657 }, { "epoch": 0.06009205900474793, "grad_norm": 2.4082649106262726, "learning_rate": 9.976287203520502e-06, "loss": 0.9334, "step": 1658 }, { "epoch": 0.06012830270740459, "grad_norm": 2.44839526234557, "learning_rate": 9.976230075174184e-06, "loss": 0.9693, "step": 1659 }, { "epoch": 0.060164546410061255, "grad_norm": 2.5425767286984846, "learning_rate": 9.976172878258428e-06, "loss": 1.1083, "step": 1660 }, { "epoch": 0.060200790112717915, "grad_norm": 2.3653927244653685, "learning_rate": 9.976115612774017e-06, "loss": 1.0064, "step": 1661 }, { "epoch": 0.060237033815374576, "grad_norm": 2.3678292753165224, "learning_rate": 9.976058278721745e-06, "loss": 1.0967, "step": 1662 }, { "epoch": 0.06027327751803124, "grad_norm": 2.716888223146036, "learning_rate": 9.976000876102397e-06, "loss": 1.1124, "step": 1663 }, { "epoch": 0.0603095212206879, "grad_norm": 2.5618933024512867, "learning_rate": 9.975943404916768e-06, "loss": 1.0075, "step": 1664 }, { "epoch": 0.06034576492334457, "grad_norm": 2.365677065826713, "learning_rate": 9.975885865165647e-06, "loss": 1.038, "step": 1665 }, { "epoch": 0.06038200862600123, "grad_norm": 2.570576079159994, "learning_rate": 9.97582825684983e-06, "loss": 0.9966, "step": 1666 }, { "epoch": 0.0604182523286579, "grad_norm": 2.6009241063067043, "learning_rate": 9.975770579970105e-06, "loss": 1.0001, "step": 1667 }, { "epoch": 0.06045449603131456, "grad_norm": 2.6088594132500567, "learning_rate": 9.975712834527273e-06, "loss": 1.2002, "step": 1668 }, { "epoch": 0.060490739733971226, "grad_norm": 2.5699868009462974, "learning_rate": 9.975655020522128e-06, "loss": 1.1493, "step": 1669 }, { "epoch": 0.060526983436627886, "grad_norm": 2.7480145188525715, "learning_rate": 9.975597137955467e-06, "loss": 1.1311, "step": 1670 }, { "epoch": 0.060563227139284546, "grad_norm": 2.699359788889789, "learning_rate": 9.975539186828083e-06, "loss": 1.0691, "step": 1671 }, { "epoch": 0.060599470841941214, "grad_norm": 2.3889754543467525, "learning_rate": 9.97548116714078e-06, "loss": 0.9304, "step": 1672 }, { "epoch": 0.060635714544597874, "grad_norm": 2.47541738300675, "learning_rate": 9.975423078894357e-06, "loss": 0.8512, "step": 1673 }, { "epoch": 0.06067195824725454, "grad_norm": 2.4327594261919776, "learning_rate": 9.975364922089612e-06, "loss": 1.0674, "step": 1674 }, { "epoch": 0.0607082019499112, "grad_norm": 2.8686960392706027, "learning_rate": 9.975306696727346e-06, "loss": 1.331, "step": 1675 }, { "epoch": 0.06074444565256787, "grad_norm": 2.574215926872807, "learning_rate": 9.975248402808365e-06, "loss": 0.9838, "step": 1676 }, { "epoch": 0.06078068935522453, "grad_norm": 2.666386695878769, "learning_rate": 9.975190040333466e-06, "loss": 1.1426, "step": 1677 }, { "epoch": 0.060816933057881196, "grad_norm": 2.6871778062062495, "learning_rate": 9.97513160930346e-06, "loss": 1.1232, "step": 1678 }, { "epoch": 0.06085317676053786, "grad_norm": 2.8094161431276263, "learning_rate": 9.97507310971915e-06, "loss": 0.9462, "step": 1679 }, { "epoch": 0.06088942046319452, "grad_norm": 2.8142276277846037, "learning_rate": 9.97501454158134e-06, "loss": 0.9607, "step": 1680 }, { "epoch": 0.060925664165851184, "grad_norm": 3.1080243481000056, "learning_rate": 9.97495590489084e-06, "loss": 1.2368, "step": 1681 }, { "epoch": 0.060961907868507845, "grad_norm": 2.536066650858658, "learning_rate": 9.974897199648454e-06, "loss": 1.0523, "step": 1682 }, { "epoch": 0.06099815157116451, "grad_norm": 3.103216294385368, "learning_rate": 9.974838425854996e-06, "loss": 1.1416, "step": 1683 }, { "epoch": 0.06103439527382117, "grad_norm": 2.5920183280310756, "learning_rate": 9.97477958351127e-06, "loss": 1.1349, "step": 1684 }, { "epoch": 0.06107063897647784, "grad_norm": 2.5136960704013545, "learning_rate": 9.974720672618092e-06, "loss": 1.0537, "step": 1685 }, { "epoch": 0.0611068826791345, "grad_norm": 2.651404790053362, "learning_rate": 9.974661693176273e-06, "loss": 0.9767, "step": 1686 }, { "epoch": 0.06114312638179117, "grad_norm": 2.68755571817954, "learning_rate": 9.974602645186622e-06, "loss": 0.9573, "step": 1687 }, { "epoch": 0.06117937008444783, "grad_norm": 2.479202521714558, "learning_rate": 9.974543528649956e-06, "loss": 1.1417, "step": 1688 }, { "epoch": 0.06121561378710449, "grad_norm": 2.4855012925734017, "learning_rate": 9.974484343567089e-06, "loss": 0.9636, "step": 1689 }, { "epoch": 0.061251857489761155, "grad_norm": 2.5856179295154367, "learning_rate": 9.974425089938835e-06, "loss": 1.0443, "step": 1690 }, { "epoch": 0.061288101192417815, "grad_norm": 2.5459669458853695, "learning_rate": 9.974365767766013e-06, "loss": 0.9788, "step": 1691 }, { "epoch": 0.06132434489507448, "grad_norm": 2.574212850366362, "learning_rate": 9.974306377049439e-06, "loss": 1.0352, "step": 1692 }, { "epoch": 0.06136058859773114, "grad_norm": 2.6214612847628205, "learning_rate": 9.974246917789932e-06, "loss": 1.0933, "step": 1693 }, { "epoch": 0.06139683230038781, "grad_norm": 2.5750839818419187, "learning_rate": 9.97418738998831e-06, "loss": 1.0936, "step": 1694 }, { "epoch": 0.06143307600304447, "grad_norm": 2.2875317615723616, "learning_rate": 9.974127793645394e-06, "loss": 1.0958, "step": 1695 }, { "epoch": 0.06146931970570114, "grad_norm": 2.707835126245153, "learning_rate": 9.974068128762004e-06, "loss": 1.1241, "step": 1696 }, { "epoch": 0.0615055634083578, "grad_norm": 2.473090780335916, "learning_rate": 9.974008395338965e-06, "loss": 0.9784, "step": 1697 }, { "epoch": 0.06154180711101446, "grad_norm": 2.5374905736975095, "learning_rate": 9.973948593377099e-06, "loss": 1.1756, "step": 1698 }, { "epoch": 0.061578050813671126, "grad_norm": 2.6071358141392253, "learning_rate": 9.973888722877228e-06, "loss": 1.1601, "step": 1699 }, { "epoch": 0.061614294516327786, "grad_norm": 2.3406221982511752, "learning_rate": 9.97382878384018e-06, "loss": 1.1554, "step": 1700 }, { "epoch": 0.06165053821898445, "grad_norm": 2.4040236065139924, "learning_rate": 9.973768776266778e-06, "loss": 1.1488, "step": 1701 }, { "epoch": 0.061686781921641114, "grad_norm": 2.6249673845241146, "learning_rate": 9.973708700157852e-06, "loss": 1.1257, "step": 1702 }, { "epoch": 0.06172302562429778, "grad_norm": 2.4102607945821335, "learning_rate": 9.973648555514228e-06, "loss": 0.9338, "step": 1703 }, { "epoch": 0.06175926932695444, "grad_norm": 2.780278914105877, "learning_rate": 9.973588342336734e-06, "loss": 0.9977, "step": 1704 }, { "epoch": 0.06179551302961111, "grad_norm": 2.8531019678290104, "learning_rate": 9.973528060626199e-06, "loss": 1.0014, "step": 1705 }, { "epoch": 0.06183175673226777, "grad_norm": 2.341813626583222, "learning_rate": 9.973467710383458e-06, "loss": 0.9715, "step": 1706 }, { "epoch": 0.06186800043492443, "grad_norm": 2.2649096383156766, "learning_rate": 9.973407291609338e-06, "loss": 0.9687, "step": 1707 }, { "epoch": 0.061904244137581096, "grad_norm": 2.5150067630655695, "learning_rate": 9.973346804304672e-06, "loss": 0.9244, "step": 1708 }, { "epoch": 0.06194048784023776, "grad_norm": 2.5707332936160103, "learning_rate": 9.973286248470298e-06, "loss": 0.9139, "step": 1709 }, { "epoch": 0.061976731542894424, "grad_norm": 2.904789695099608, "learning_rate": 9.973225624107045e-06, "loss": 1.0305, "step": 1710 }, { "epoch": 0.062012975245551084, "grad_norm": 2.6662874580693368, "learning_rate": 9.973164931215753e-06, "loss": 1.1865, "step": 1711 }, { "epoch": 0.06204921894820775, "grad_norm": 2.718562318175033, "learning_rate": 9.973104169797255e-06, "loss": 0.956, "step": 1712 }, { "epoch": 0.06208546265086441, "grad_norm": 2.7155139584537786, "learning_rate": 9.973043339852389e-06, "loss": 1.0496, "step": 1713 }, { "epoch": 0.06212170635352107, "grad_norm": 2.369099597703102, "learning_rate": 9.972982441381993e-06, "loss": 0.8121, "step": 1714 }, { "epoch": 0.06215795005617774, "grad_norm": 2.640555968393281, "learning_rate": 9.972921474386906e-06, "loss": 1.2118, "step": 1715 }, { "epoch": 0.0621941937588344, "grad_norm": 2.421360086019664, "learning_rate": 9.97286043886797e-06, "loss": 0.9947, "step": 1716 }, { "epoch": 0.06223043746149107, "grad_norm": 2.6041274406359842, "learning_rate": 9.972799334826023e-06, "loss": 0.9355, "step": 1717 }, { "epoch": 0.06226668116414773, "grad_norm": 2.8975409552898195, "learning_rate": 9.97273816226191e-06, "loss": 1.195, "step": 1718 }, { "epoch": 0.062302924866804395, "grad_norm": 2.7498410199395575, "learning_rate": 9.972676921176472e-06, "loss": 1.2538, "step": 1719 }, { "epoch": 0.062339168569461055, "grad_norm": 2.5901702626127117, "learning_rate": 9.972615611570554e-06, "loss": 0.9572, "step": 1720 }, { "epoch": 0.06237541227211772, "grad_norm": 2.4254094584365147, "learning_rate": 9.972554233444998e-06, "loss": 0.9953, "step": 1721 }, { "epoch": 0.06241165597477438, "grad_norm": 2.5010323476494323, "learning_rate": 9.972492786800654e-06, "loss": 1.0191, "step": 1722 }, { "epoch": 0.06244789967743104, "grad_norm": 2.506538067370429, "learning_rate": 9.972431271638368e-06, "loss": 1.0691, "step": 1723 }, { "epoch": 0.06248414338008771, "grad_norm": 2.937782553916953, "learning_rate": 9.972369687958982e-06, "loss": 1.0373, "step": 1724 }, { "epoch": 0.06252038708274438, "grad_norm": 2.7156176710972564, "learning_rate": 9.972308035763354e-06, "loss": 1.2655, "step": 1725 }, { "epoch": 0.06255663078540104, "grad_norm": 2.62929246463651, "learning_rate": 9.972246315052326e-06, "loss": 0.9731, "step": 1726 }, { "epoch": 0.0625928744880577, "grad_norm": 2.896216957517642, "learning_rate": 9.97218452582675e-06, "loss": 0.9594, "step": 1727 }, { "epoch": 0.06262911819071436, "grad_norm": 2.7017686842333966, "learning_rate": 9.972122668087478e-06, "loss": 1.0632, "step": 1728 }, { "epoch": 0.06266536189337103, "grad_norm": 2.717381272961238, "learning_rate": 9.972060741835365e-06, "loss": 1.0312, "step": 1729 }, { "epoch": 0.06270160559602769, "grad_norm": 2.4696552870652915, "learning_rate": 9.97199874707126e-06, "loss": 1.1839, "step": 1730 }, { "epoch": 0.06273784929868435, "grad_norm": 2.72313436340344, "learning_rate": 9.971936683796019e-06, "loss": 0.9735, "step": 1731 }, { "epoch": 0.06277409300134101, "grad_norm": 2.507700632503803, "learning_rate": 9.9718745520105e-06, "loss": 0.9886, "step": 1732 }, { "epoch": 0.06281033670399767, "grad_norm": 2.5251718007075934, "learning_rate": 9.971812351715555e-06, "loss": 1.0243, "step": 1733 }, { "epoch": 0.06284658040665435, "grad_norm": 2.6652404783216297, "learning_rate": 9.971750082912042e-06, "loss": 1.0088, "step": 1734 }, { "epoch": 0.06288282410931101, "grad_norm": 2.349167869327059, "learning_rate": 9.97168774560082e-06, "loss": 0.9097, "step": 1735 }, { "epoch": 0.06291906781196767, "grad_norm": 2.33987941728138, "learning_rate": 9.971625339782748e-06, "loss": 1.1763, "step": 1736 }, { "epoch": 0.06295531151462433, "grad_norm": 2.698154313484148, "learning_rate": 9.971562865458686e-06, "loss": 1.1356, "step": 1737 }, { "epoch": 0.062991555217281, "grad_norm": 2.7767966354025435, "learning_rate": 9.971500322629496e-06, "loss": 1.1285, "step": 1738 }, { "epoch": 0.06302779891993766, "grad_norm": 2.6494362769073736, "learning_rate": 9.971437711296035e-06, "loss": 1.2879, "step": 1739 }, { "epoch": 0.06306404262259432, "grad_norm": 2.6791287326391786, "learning_rate": 9.97137503145917e-06, "loss": 0.9198, "step": 1740 }, { "epoch": 0.06310028632525098, "grad_norm": 2.470835900181765, "learning_rate": 9.971312283119763e-06, "loss": 0.8893, "step": 1741 }, { "epoch": 0.06313653002790764, "grad_norm": 2.89736855883363, "learning_rate": 9.97124946627868e-06, "loss": 0.972, "step": 1742 }, { "epoch": 0.06317277373056432, "grad_norm": 2.5233929306688085, "learning_rate": 9.971186580936787e-06, "loss": 1.0736, "step": 1743 }, { "epoch": 0.06320901743322098, "grad_norm": 3.024210134217232, "learning_rate": 9.971123627094947e-06, "loss": 1.0935, "step": 1744 }, { "epoch": 0.06324526113587764, "grad_norm": 2.6935743619831647, "learning_rate": 9.971060604754032e-06, "loss": 1.0087, "step": 1745 }, { "epoch": 0.0632815048385343, "grad_norm": 3.0003658851037702, "learning_rate": 9.970997513914908e-06, "loss": 1.027, "step": 1746 }, { "epoch": 0.06331774854119097, "grad_norm": 2.2873434292679518, "learning_rate": 9.970934354578443e-06, "loss": 0.8312, "step": 1747 }, { "epoch": 0.06335399224384763, "grad_norm": 2.605194304452626, "learning_rate": 9.970871126745511e-06, "loss": 1.1432, "step": 1748 }, { "epoch": 0.0633902359465043, "grad_norm": 2.666163050226622, "learning_rate": 9.97080783041698e-06, "loss": 0.9148, "step": 1749 }, { "epoch": 0.06342647964916096, "grad_norm": 2.9052272648524182, "learning_rate": 9.970744465593724e-06, "loss": 1.0205, "step": 1750 }, { "epoch": 0.06346272335181762, "grad_norm": 2.775272044243884, "learning_rate": 9.970681032276615e-06, "loss": 1.2441, "step": 1751 }, { "epoch": 0.06349896705447429, "grad_norm": 2.5659826757330926, "learning_rate": 9.970617530466527e-06, "loss": 0.833, "step": 1752 }, { "epoch": 0.06353521075713095, "grad_norm": 2.641101692396123, "learning_rate": 9.970553960164335e-06, "loss": 1.1359, "step": 1753 }, { "epoch": 0.06357145445978761, "grad_norm": 2.795898228253362, "learning_rate": 9.970490321370919e-06, "loss": 1.2263, "step": 1754 }, { "epoch": 0.06360769816244427, "grad_norm": 2.6147455629068084, "learning_rate": 9.970426614087148e-06, "loss": 1.1981, "step": 1755 }, { "epoch": 0.06364394186510094, "grad_norm": 2.526202751404088, "learning_rate": 9.970362838313907e-06, "loss": 1.0194, "step": 1756 }, { "epoch": 0.0636801855677576, "grad_norm": 2.5837310557021844, "learning_rate": 9.970298994052073e-06, "loss": 1.0627, "step": 1757 }, { "epoch": 0.06371642927041427, "grad_norm": 2.3804981513522683, "learning_rate": 9.970235081302522e-06, "loss": 1.0674, "step": 1758 }, { "epoch": 0.06375267297307093, "grad_norm": 2.6410582991826823, "learning_rate": 9.970171100066138e-06, "loss": 0.9, "step": 1759 }, { "epoch": 0.06378891667572759, "grad_norm": 2.6670194005448664, "learning_rate": 9.970107050343802e-06, "loss": 1.1603, "step": 1760 }, { "epoch": 0.06382516037838426, "grad_norm": 2.891228102007222, "learning_rate": 9.970042932136398e-06, "loss": 1.1732, "step": 1761 }, { "epoch": 0.06386140408104092, "grad_norm": 2.683262218596465, "learning_rate": 9.969978745444809e-06, "loss": 1.0164, "step": 1762 }, { "epoch": 0.06389764778369758, "grad_norm": 2.7806253899139035, "learning_rate": 9.969914490269916e-06, "loss": 1.3055, "step": 1763 }, { "epoch": 0.06393389148635424, "grad_norm": 2.466916815253036, "learning_rate": 9.96985016661261e-06, "loss": 1.1425, "step": 1764 }, { "epoch": 0.06397013518901092, "grad_norm": 2.5093190503295366, "learning_rate": 9.969785774473772e-06, "loss": 0.8981, "step": 1765 }, { "epoch": 0.06400637889166758, "grad_norm": 2.147089690942753, "learning_rate": 9.969721313854292e-06, "loss": 0.8203, "step": 1766 }, { "epoch": 0.06404262259432424, "grad_norm": 2.711992277294685, "learning_rate": 9.969656784755057e-06, "loss": 1.164, "step": 1767 }, { "epoch": 0.0640788662969809, "grad_norm": 2.3146369922364247, "learning_rate": 9.96959218717696e-06, "loss": 0.9526, "step": 1768 }, { "epoch": 0.06411510999963756, "grad_norm": 2.765101329762929, "learning_rate": 9.969527521120885e-06, "loss": 1.1021, "step": 1769 }, { "epoch": 0.06415135370229423, "grad_norm": 2.7873397311698604, "learning_rate": 9.969462786587729e-06, "loss": 1.145, "step": 1770 }, { "epoch": 0.06418759740495089, "grad_norm": 2.6462282669616535, "learning_rate": 9.96939798357838e-06, "loss": 1.0868, "step": 1771 }, { "epoch": 0.06422384110760755, "grad_norm": 2.4895108149998886, "learning_rate": 9.969333112093731e-06, "loss": 1.0714, "step": 1772 }, { "epoch": 0.06426008481026421, "grad_norm": 2.6837461948695243, "learning_rate": 9.969268172134678e-06, "loss": 1.1547, "step": 1773 }, { "epoch": 0.06429632851292089, "grad_norm": 2.4867637611432967, "learning_rate": 9.969203163702117e-06, "loss": 0.9795, "step": 1774 }, { "epoch": 0.06433257221557755, "grad_norm": 2.416136961819405, "learning_rate": 9.96913808679694e-06, "loss": 0.8841, "step": 1775 }, { "epoch": 0.0643688159182342, "grad_norm": 3.542232917450219, "learning_rate": 9.969072941420044e-06, "loss": 1.0221, "step": 1776 }, { "epoch": 0.06440505962089087, "grad_norm": 2.4816710083051463, "learning_rate": 9.969007727572328e-06, "loss": 1.0451, "step": 1777 }, { "epoch": 0.06444130332354753, "grad_norm": 2.7528437078440997, "learning_rate": 9.968942445254692e-06, "loss": 1.1828, "step": 1778 }, { "epoch": 0.0644775470262042, "grad_norm": 2.7200001604983184, "learning_rate": 9.968877094468035e-06, "loss": 1.0201, "step": 1779 }, { "epoch": 0.06451379072886086, "grad_norm": 2.4633089100833554, "learning_rate": 9.968811675213256e-06, "loss": 1.0747, "step": 1780 }, { "epoch": 0.06455003443151752, "grad_norm": 2.5766963984079205, "learning_rate": 9.968746187491257e-06, "loss": 1.0986, "step": 1781 }, { "epoch": 0.06458627813417418, "grad_norm": 2.8542655419500043, "learning_rate": 9.968680631302941e-06, "loss": 0.8631, "step": 1782 }, { "epoch": 0.06462252183683086, "grad_norm": 2.4191577419596393, "learning_rate": 9.96861500664921e-06, "loss": 1.1748, "step": 1783 }, { "epoch": 0.06465876553948752, "grad_norm": 2.4117660370231757, "learning_rate": 9.96854931353097e-06, "loss": 0.9932, "step": 1784 }, { "epoch": 0.06469500924214418, "grad_norm": 2.6064532034727996, "learning_rate": 9.968483551949125e-06, "loss": 1.065, "step": 1785 }, { "epoch": 0.06473125294480084, "grad_norm": 2.372349056979676, "learning_rate": 9.96841772190458e-06, "loss": 0.9687, "step": 1786 }, { "epoch": 0.0647674966474575, "grad_norm": 2.3633572390558872, "learning_rate": 9.968351823398246e-06, "loss": 0.887, "step": 1787 }, { "epoch": 0.06480374035011417, "grad_norm": 2.4483939194226045, "learning_rate": 9.968285856431028e-06, "loss": 1.0525, "step": 1788 }, { "epoch": 0.06483998405277083, "grad_norm": 2.7119665915461404, "learning_rate": 9.968219821003836e-06, "loss": 1.0849, "step": 1789 }, { "epoch": 0.06487622775542749, "grad_norm": 2.327733121224774, "learning_rate": 9.968153717117577e-06, "loss": 1.0406, "step": 1790 }, { "epoch": 0.06491247145808415, "grad_norm": 2.7708062412883945, "learning_rate": 9.968087544773167e-06, "loss": 1.0627, "step": 1791 }, { "epoch": 0.06494871516074083, "grad_norm": 2.6166971564525263, "learning_rate": 9.968021303971513e-06, "loss": 0.9208, "step": 1792 }, { "epoch": 0.06498495886339749, "grad_norm": 2.6814733642807984, "learning_rate": 9.967954994713532e-06, "loss": 1.3442, "step": 1793 }, { "epoch": 0.06502120256605415, "grad_norm": 2.6652124541569484, "learning_rate": 9.967888617000136e-06, "loss": 1.1816, "step": 1794 }, { "epoch": 0.06505744626871081, "grad_norm": 2.5616332578677503, "learning_rate": 9.967822170832237e-06, "loss": 1.0267, "step": 1795 }, { "epoch": 0.06509368997136747, "grad_norm": 2.403197136524053, "learning_rate": 9.967755656210756e-06, "loss": 0.9803, "step": 1796 }, { "epoch": 0.06512993367402414, "grad_norm": 2.3814192200625324, "learning_rate": 9.967689073136603e-06, "loss": 1.0008, "step": 1797 }, { "epoch": 0.0651661773766808, "grad_norm": 2.6266980491248626, "learning_rate": 9.967622421610701e-06, "loss": 0.9887, "step": 1798 }, { "epoch": 0.06520242107933746, "grad_norm": 2.544285182426752, "learning_rate": 9.967555701633968e-06, "loss": 0.9591, "step": 1799 }, { "epoch": 0.06523866478199412, "grad_norm": 2.3776745032720825, "learning_rate": 9.96748891320732e-06, "loss": 1.1093, "step": 1800 }, { "epoch": 0.0652749084846508, "grad_norm": 2.6867937463639033, "learning_rate": 9.967422056331678e-06, "loss": 1.0222, "step": 1801 }, { "epoch": 0.06531115218730746, "grad_norm": 2.5140080104685567, "learning_rate": 9.967355131007966e-06, "loss": 1.1607, "step": 1802 }, { "epoch": 0.06534739588996412, "grad_norm": 2.5847734450287585, "learning_rate": 9.967288137237104e-06, "loss": 1.2508, "step": 1803 }, { "epoch": 0.06538363959262078, "grad_norm": 2.6714993274059826, "learning_rate": 9.967221075020015e-06, "loss": 1.0897, "step": 1804 }, { "epoch": 0.06541988329527744, "grad_norm": 2.451093937967695, "learning_rate": 9.967153944357624e-06, "loss": 1.1693, "step": 1805 }, { "epoch": 0.06545612699793411, "grad_norm": 2.766586848498334, "learning_rate": 9.967086745250854e-06, "loss": 1.1996, "step": 1806 }, { "epoch": 0.06549237070059077, "grad_norm": 2.411376024727565, "learning_rate": 9.967019477700634e-06, "loss": 0.8004, "step": 1807 }, { "epoch": 0.06552861440324743, "grad_norm": 2.3017890170530144, "learning_rate": 9.96695214170789e-06, "loss": 0.7968, "step": 1808 }, { "epoch": 0.0655648581059041, "grad_norm": 2.6946453273570103, "learning_rate": 9.96688473727355e-06, "loss": 1.0776, "step": 1809 }, { "epoch": 0.06560110180856077, "grad_norm": 2.938358588908407, "learning_rate": 9.96681726439854e-06, "loss": 0.9172, "step": 1810 }, { "epoch": 0.06563734551121743, "grad_norm": 2.6110683578139997, "learning_rate": 9.966749723083793e-06, "loss": 0.8267, "step": 1811 }, { "epoch": 0.06567358921387409, "grad_norm": 2.6522736374283564, "learning_rate": 9.96668211333024e-06, "loss": 0.9129, "step": 1812 }, { "epoch": 0.06570983291653075, "grad_norm": 2.704734570155091, "learning_rate": 9.966614435138811e-06, "loss": 0.9307, "step": 1813 }, { "epoch": 0.06574607661918741, "grad_norm": 2.4773899176957266, "learning_rate": 9.966546688510437e-06, "loss": 1.0637, "step": 1814 }, { "epoch": 0.06578232032184408, "grad_norm": 2.3889711148717137, "learning_rate": 9.966478873446055e-06, "loss": 0.929, "step": 1815 }, { "epoch": 0.06581856402450074, "grad_norm": 2.6347987752260367, "learning_rate": 9.966410989946595e-06, "loss": 0.9249, "step": 1816 }, { "epoch": 0.0658548077271574, "grad_norm": 3.0740049981328763, "learning_rate": 9.966343038012998e-06, "loss": 0.9393, "step": 1817 }, { "epoch": 0.06589105142981407, "grad_norm": 2.46604127459519, "learning_rate": 9.966275017646196e-06, "loss": 1.0906, "step": 1818 }, { "epoch": 0.06592729513247074, "grad_norm": 2.368749765614607, "learning_rate": 9.966206928847129e-06, "loss": 0.9778, "step": 1819 }, { "epoch": 0.0659635388351274, "grad_norm": 2.4689889275193924, "learning_rate": 9.966138771616733e-06, "loss": 0.9014, "step": 1820 }, { "epoch": 0.06599978253778406, "grad_norm": 2.5193184367425707, "learning_rate": 9.966070545955949e-06, "loss": 0.8084, "step": 1821 }, { "epoch": 0.06603602624044072, "grad_norm": 3.0028465365922448, "learning_rate": 9.966002251865716e-06, "loss": 1.113, "step": 1822 }, { "epoch": 0.06607226994309738, "grad_norm": 2.3945560504845727, "learning_rate": 9.965933889346975e-06, "loss": 1.0095, "step": 1823 }, { "epoch": 0.06610851364575406, "grad_norm": 2.4639711862848452, "learning_rate": 9.965865458400668e-06, "loss": 0.9746, "step": 1824 }, { "epoch": 0.06614475734841072, "grad_norm": 2.692536150626392, "learning_rate": 9.96579695902774e-06, "loss": 1.1315, "step": 1825 }, { "epoch": 0.06618100105106738, "grad_norm": 2.524355212909332, "learning_rate": 9.96572839122913e-06, "loss": 1.0662, "step": 1826 }, { "epoch": 0.06621724475372404, "grad_norm": 2.5369156176808714, "learning_rate": 9.965659755005789e-06, "loss": 1.2606, "step": 1827 }, { "epoch": 0.06625348845638071, "grad_norm": 2.71220017366294, "learning_rate": 9.965591050358659e-06, "loss": 0.9152, "step": 1828 }, { "epoch": 0.06628973215903737, "grad_norm": 2.4683393849659048, "learning_rate": 9.965522277288687e-06, "loss": 1.0417, "step": 1829 }, { "epoch": 0.06632597586169403, "grad_norm": 2.4927242461174095, "learning_rate": 9.96545343579682e-06, "loss": 1.2101, "step": 1830 }, { "epoch": 0.06636221956435069, "grad_norm": 2.675729292335295, "learning_rate": 9.965384525884008e-06, "loss": 1.1655, "step": 1831 }, { "epoch": 0.06639846326700735, "grad_norm": 2.71826746312297, "learning_rate": 9.965315547551201e-06, "loss": 0.9661, "step": 1832 }, { "epoch": 0.06643470696966403, "grad_norm": 2.4548603974876015, "learning_rate": 9.965246500799348e-06, "loss": 0.9911, "step": 1833 }, { "epoch": 0.06647095067232069, "grad_norm": 2.5479350919575214, "learning_rate": 9.965177385629403e-06, "loss": 1.0935, "step": 1834 }, { "epoch": 0.06650719437497735, "grad_norm": 2.760253788507175, "learning_rate": 9.965108202042314e-06, "loss": 1.0601, "step": 1835 }, { "epoch": 0.066543438077634, "grad_norm": 2.9734917706688755, "learning_rate": 9.965038950039038e-06, "loss": 0.9686, "step": 1836 }, { "epoch": 0.06657968178029068, "grad_norm": 2.480131587934739, "learning_rate": 9.964969629620527e-06, "loss": 1.1029, "step": 1837 }, { "epoch": 0.06661592548294734, "grad_norm": 2.369990766953026, "learning_rate": 9.964900240787738e-06, "loss": 1.0123, "step": 1838 }, { "epoch": 0.066652169185604, "grad_norm": 2.5041737552537873, "learning_rate": 9.964830783541626e-06, "loss": 0.7899, "step": 1839 }, { "epoch": 0.06668841288826066, "grad_norm": 2.5207776528053367, "learning_rate": 9.96476125788315e-06, "loss": 0.9964, "step": 1840 }, { "epoch": 0.06672465659091732, "grad_norm": 2.544101731504989, "learning_rate": 9.964691663813265e-06, "loss": 1.2088, "step": 1841 }, { "epoch": 0.066760900293574, "grad_norm": 2.4749639476228102, "learning_rate": 9.964622001332931e-06, "loss": 0.819, "step": 1842 }, { "epoch": 0.06679714399623066, "grad_norm": 2.0115632781882664, "learning_rate": 9.96455227044311e-06, "loss": 0.9436, "step": 1843 }, { "epoch": 0.06683338769888732, "grad_norm": 2.5725521785217493, "learning_rate": 9.96448247114476e-06, "loss": 1.005, "step": 1844 }, { "epoch": 0.06686963140154398, "grad_norm": 2.358136435833858, "learning_rate": 9.964412603438845e-06, "loss": 1.124, "step": 1845 }, { "epoch": 0.06690587510420065, "grad_norm": 2.62723212064016, "learning_rate": 9.964342667326326e-06, "loss": 1.0474, "step": 1846 }, { "epoch": 0.06694211880685731, "grad_norm": 2.39658826656912, "learning_rate": 9.964272662808166e-06, "loss": 1.0353, "step": 1847 }, { "epoch": 0.06697836250951397, "grad_norm": 2.8248978919698176, "learning_rate": 9.964202589885333e-06, "loss": 1.2063, "step": 1848 }, { "epoch": 0.06701460621217063, "grad_norm": 2.620690556217176, "learning_rate": 9.96413244855879e-06, "loss": 0.9413, "step": 1849 }, { "epoch": 0.06705084991482729, "grad_norm": 2.359241655422869, "learning_rate": 9.964062238829504e-06, "loss": 1.0041, "step": 1850 }, { "epoch": 0.06708709361748397, "grad_norm": 2.7543319728051947, "learning_rate": 9.963991960698443e-06, "loss": 0.9432, "step": 1851 }, { "epoch": 0.06712333732014063, "grad_norm": 2.502920504069217, "learning_rate": 9.963921614166574e-06, "loss": 1.0208, "step": 1852 }, { "epoch": 0.06715958102279729, "grad_norm": 2.576346039552006, "learning_rate": 9.963851199234867e-06, "loss": 1.0832, "step": 1853 }, { "epoch": 0.06719582472545395, "grad_norm": 2.307107863011311, "learning_rate": 9.963780715904291e-06, "loss": 1.007, "step": 1854 }, { "epoch": 0.06723206842811062, "grad_norm": 2.750372232618227, "learning_rate": 9.963710164175822e-06, "loss": 1.0065, "step": 1855 }, { "epoch": 0.06726831213076728, "grad_norm": 2.599227654031275, "learning_rate": 9.963639544050428e-06, "loss": 0.9686, "step": 1856 }, { "epoch": 0.06730455583342394, "grad_norm": 2.6134782324667087, "learning_rate": 9.963568855529084e-06, "loss": 1.0671, "step": 1857 }, { "epoch": 0.0673407995360806, "grad_norm": 2.7534608803363776, "learning_rate": 9.96349809861276e-06, "loss": 0.9956, "step": 1858 }, { "epoch": 0.06737704323873726, "grad_norm": 2.556180983420589, "learning_rate": 9.963427273302437e-06, "loss": 1.0313, "step": 1859 }, { "epoch": 0.06741328694139394, "grad_norm": 2.858933891424755, "learning_rate": 9.963356379599086e-06, "loss": 1.1619, "step": 1860 }, { "epoch": 0.0674495306440506, "grad_norm": 2.5845112845924634, "learning_rate": 9.963285417503687e-06, "loss": 1.0151, "step": 1861 }, { "epoch": 0.06748577434670726, "grad_norm": 2.545263586803242, "learning_rate": 9.963214387017215e-06, "loss": 0.921, "step": 1862 }, { "epoch": 0.06752201804936392, "grad_norm": 2.7085309347629405, "learning_rate": 9.963143288140653e-06, "loss": 0.8762, "step": 1863 }, { "epoch": 0.06755826175202059, "grad_norm": 2.6956829716797053, "learning_rate": 9.963072120874977e-06, "loss": 1.0267, "step": 1864 }, { "epoch": 0.06759450545467725, "grad_norm": 2.3774277413365468, "learning_rate": 9.963000885221168e-06, "loss": 1.0631, "step": 1865 }, { "epoch": 0.06763074915733391, "grad_norm": 2.2276647302726023, "learning_rate": 9.96292958118021e-06, "loss": 0.8765, "step": 1866 }, { "epoch": 0.06766699285999057, "grad_norm": 2.525526396642587, "learning_rate": 9.962858208753082e-06, "loss": 0.9177, "step": 1867 }, { "epoch": 0.06770323656264723, "grad_norm": 2.847455969533368, "learning_rate": 9.962786767940769e-06, "loss": 1.1189, "step": 1868 }, { "epoch": 0.06773948026530391, "grad_norm": 2.3823085625362332, "learning_rate": 9.962715258744257e-06, "loss": 1.0262, "step": 1869 }, { "epoch": 0.06777572396796057, "grad_norm": 2.39931386001312, "learning_rate": 9.96264368116453e-06, "loss": 0.9607, "step": 1870 }, { "epoch": 0.06781196767061723, "grad_norm": 2.541084529781868, "learning_rate": 9.962572035202573e-06, "loss": 1.0667, "step": 1871 }, { "epoch": 0.06784821137327389, "grad_norm": 2.8841381593468496, "learning_rate": 9.962500320859377e-06, "loss": 1.1418, "step": 1872 }, { "epoch": 0.06788445507593056, "grad_norm": 2.453311164897604, "learning_rate": 9.962428538135926e-06, "loss": 1.0336, "step": 1873 }, { "epoch": 0.06792069877858722, "grad_norm": 2.658690820344403, "learning_rate": 9.962356687033212e-06, "loss": 0.8691, "step": 1874 }, { "epoch": 0.06795694248124388, "grad_norm": 2.3136176122535534, "learning_rate": 9.962284767552225e-06, "loss": 1.0097, "step": 1875 }, { "epoch": 0.06799318618390054, "grad_norm": 2.68769842700223, "learning_rate": 9.962212779693952e-06, "loss": 0.9619, "step": 1876 }, { "epoch": 0.0680294298865572, "grad_norm": 2.4863644556419744, "learning_rate": 9.96214072345939e-06, "loss": 1.216, "step": 1877 }, { "epoch": 0.06806567358921388, "grad_norm": 2.6112923555953977, "learning_rate": 9.962068598849529e-06, "loss": 1.0936, "step": 1878 }, { "epoch": 0.06810191729187054, "grad_norm": 2.892524833964202, "learning_rate": 9.961996405865364e-06, "loss": 1.1455, "step": 1879 }, { "epoch": 0.0681381609945272, "grad_norm": 2.4647960067444528, "learning_rate": 9.96192414450789e-06, "loss": 0.8424, "step": 1880 }, { "epoch": 0.06817440469718386, "grad_norm": 2.5612674420667743, "learning_rate": 9.9618518147781e-06, "loss": 0.888, "step": 1881 }, { "epoch": 0.06821064839984053, "grad_norm": 2.473302619650265, "learning_rate": 9.961779416676995e-06, "loss": 1.0975, "step": 1882 }, { "epoch": 0.0682468921024972, "grad_norm": 2.4494712718129565, "learning_rate": 9.96170695020557e-06, "loss": 0.997, "step": 1883 }, { "epoch": 0.06828313580515386, "grad_norm": 2.520603446949091, "learning_rate": 9.961634415364824e-06, "loss": 0.9235, "step": 1884 }, { "epoch": 0.06831937950781052, "grad_norm": 2.8512953799335556, "learning_rate": 9.961561812155755e-06, "loss": 1.0579, "step": 1885 }, { "epoch": 0.06835562321046718, "grad_norm": 2.598094672732915, "learning_rate": 9.961489140579367e-06, "loss": 0.9908, "step": 1886 }, { "epoch": 0.06839186691312385, "grad_norm": 2.8482194362278284, "learning_rate": 9.961416400636658e-06, "loss": 0.9893, "step": 1887 }, { "epoch": 0.06842811061578051, "grad_norm": 2.5025466090652726, "learning_rate": 9.961343592328632e-06, "loss": 1.0595, "step": 1888 }, { "epoch": 0.06846435431843717, "grad_norm": 2.1986229163074458, "learning_rate": 9.961270715656293e-06, "loss": 0.965, "step": 1889 }, { "epoch": 0.06850059802109383, "grad_norm": 2.6579577390340106, "learning_rate": 9.961197770620644e-06, "loss": 1.0132, "step": 1890 }, { "epoch": 0.0685368417237505, "grad_norm": 2.118020756304543, "learning_rate": 9.961124757222687e-06, "loss": 1.0646, "step": 1891 }, { "epoch": 0.06857308542640717, "grad_norm": 2.6204878720183817, "learning_rate": 9.961051675463433e-06, "loss": 1.0365, "step": 1892 }, { "epoch": 0.06860932912906383, "grad_norm": 2.6724264317874953, "learning_rate": 9.960978525343888e-06, "loss": 1.2064, "step": 1893 }, { "epoch": 0.06864557283172049, "grad_norm": 2.5400339886546397, "learning_rate": 9.960905306865058e-06, "loss": 0.9333, "step": 1894 }, { "epoch": 0.06868181653437715, "grad_norm": 2.5743179061633197, "learning_rate": 9.960832020027956e-06, "loss": 0.9072, "step": 1895 }, { "epoch": 0.06871806023703382, "grad_norm": 2.5458336904041743, "learning_rate": 9.960758664833585e-06, "loss": 1.1415, "step": 1896 }, { "epoch": 0.06875430393969048, "grad_norm": 2.594102310053445, "learning_rate": 9.960685241282961e-06, "loss": 0.9844, "step": 1897 }, { "epoch": 0.06879054764234714, "grad_norm": 2.4566991154771385, "learning_rate": 9.960611749377097e-06, "loss": 1.1048, "step": 1898 }, { "epoch": 0.0688267913450038, "grad_norm": 2.640824428860736, "learning_rate": 9.960538189117e-06, "loss": 1.0744, "step": 1899 }, { "epoch": 0.06886303504766048, "grad_norm": 2.7080231707242453, "learning_rate": 9.960464560503689e-06, "loss": 0.964, "step": 1900 }, { "epoch": 0.06889927875031714, "grad_norm": 2.5973636777424978, "learning_rate": 9.960390863538173e-06, "loss": 0.9327, "step": 1901 }, { "epoch": 0.0689355224529738, "grad_norm": 2.7843023920132417, "learning_rate": 9.960317098221475e-06, "loss": 1.1281, "step": 1902 }, { "epoch": 0.06897176615563046, "grad_norm": 2.6018717411729315, "learning_rate": 9.960243264554606e-06, "loss": 1.0247, "step": 1903 }, { "epoch": 0.06900800985828712, "grad_norm": 2.538769987712902, "learning_rate": 9.960169362538584e-06, "loss": 0.8858, "step": 1904 }, { "epoch": 0.06904425356094379, "grad_norm": 2.320930917156091, "learning_rate": 9.960095392174428e-06, "loss": 0.9669, "step": 1905 }, { "epoch": 0.06908049726360045, "grad_norm": 2.5680983267421897, "learning_rate": 9.960021353463158e-06, "loss": 1.1056, "step": 1906 }, { "epoch": 0.06911674096625711, "grad_norm": 2.520514198434907, "learning_rate": 9.959947246405792e-06, "loss": 1.0545, "step": 1907 }, { "epoch": 0.06915298466891377, "grad_norm": 2.738410725224525, "learning_rate": 9.959873071003354e-06, "loss": 0.9257, "step": 1908 }, { "epoch": 0.06918922837157045, "grad_norm": 2.5161003108224858, "learning_rate": 9.959798827256863e-06, "loss": 1.0354, "step": 1909 }, { "epoch": 0.0692254720742271, "grad_norm": 2.604231682503092, "learning_rate": 9.959724515167346e-06, "loss": 1.0679, "step": 1910 }, { "epoch": 0.06926171577688377, "grad_norm": 2.253604150800574, "learning_rate": 9.959650134735824e-06, "loss": 0.8752, "step": 1911 }, { "epoch": 0.06929795947954043, "grad_norm": 2.1886973337919278, "learning_rate": 9.959575685963322e-06, "loss": 1.0183, "step": 1912 }, { "epoch": 0.06933420318219709, "grad_norm": 2.5565306133438463, "learning_rate": 9.959501168850868e-06, "loss": 1.0739, "step": 1913 }, { "epoch": 0.06937044688485376, "grad_norm": 2.5127956524421187, "learning_rate": 9.959426583399485e-06, "loss": 1.1004, "step": 1914 }, { "epoch": 0.06940669058751042, "grad_norm": 2.507229175904147, "learning_rate": 9.959351929610205e-06, "loss": 1.2159, "step": 1915 }, { "epoch": 0.06944293429016708, "grad_norm": 2.664908283621723, "learning_rate": 9.959277207484054e-06, "loss": 0.9739, "step": 1916 }, { "epoch": 0.06947917799282374, "grad_norm": 2.4946890810103417, "learning_rate": 9.959202417022062e-06, "loss": 1.0861, "step": 1917 }, { "epoch": 0.06951542169548042, "grad_norm": 2.497961330760119, "learning_rate": 9.95912755822526e-06, "loss": 0.9179, "step": 1918 }, { "epoch": 0.06955166539813708, "grad_norm": 2.605622021585441, "learning_rate": 9.95905263109468e-06, "loss": 1.0035, "step": 1919 }, { "epoch": 0.06958790910079374, "grad_norm": 2.429958017430739, "learning_rate": 9.958977635631353e-06, "loss": 1.0032, "step": 1920 }, { "epoch": 0.0696241528034504, "grad_norm": 2.6965936121409326, "learning_rate": 9.958902571836314e-06, "loss": 0.9693, "step": 1921 }, { "epoch": 0.06966039650610706, "grad_norm": 2.426751391821847, "learning_rate": 9.958827439710596e-06, "loss": 0.9881, "step": 1922 }, { "epoch": 0.06969664020876373, "grad_norm": 2.80893371759829, "learning_rate": 9.958752239255236e-06, "loss": 1.2539, "step": 1923 }, { "epoch": 0.06973288391142039, "grad_norm": 2.571324193553002, "learning_rate": 9.958676970471268e-06, "loss": 0.9405, "step": 1924 }, { "epoch": 0.06976912761407705, "grad_norm": 2.4060126965251643, "learning_rate": 9.95860163335973e-06, "loss": 0.8884, "step": 1925 }, { "epoch": 0.06980537131673371, "grad_norm": 2.7721797352743276, "learning_rate": 9.958526227921662e-06, "loss": 0.9882, "step": 1926 }, { "epoch": 0.06984161501939039, "grad_norm": 2.356174932198635, "learning_rate": 9.958450754158102e-06, "loss": 1.0446, "step": 1927 }, { "epoch": 0.06987785872204705, "grad_norm": 2.617390457912097, "learning_rate": 9.958375212070087e-06, "loss": 1.1348, "step": 1928 }, { "epoch": 0.06991410242470371, "grad_norm": 3.1834565148971623, "learning_rate": 9.958299601658661e-06, "loss": 0.9511, "step": 1929 }, { "epoch": 0.06995034612736037, "grad_norm": 2.376739041501506, "learning_rate": 9.958223922924865e-06, "loss": 1.0056, "step": 1930 }, { "epoch": 0.06998658983001703, "grad_norm": 2.4811660886456677, "learning_rate": 9.958148175869742e-06, "loss": 0.9954, "step": 1931 }, { "epoch": 0.0700228335326737, "grad_norm": 2.3352200532327423, "learning_rate": 9.958072360494336e-06, "loss": 0.9627, "step": 1932 }, { "epoch": 0.07005907723533036, "grad_norm": 2.3600028373578787, "learning_rate": 9.957996476799692e-06, "loss": 0.996, "step": 1933 }, { "epoch": 0.07009532093798702, "grad_norm": 2.6636126258273936, "learning_rate": 9.957920524786855e-06, "loss": 0.9423, "step": 1934 }, { "epoch": 0.07013156464064368, "grad_norm": 2.8058928286258413, "learning_rate": 9.95784450445687e-06, "loss": 1.0586, "step": 1935 }, { "epoch": 0.07016780834330034, "grad_norm": 2.6468073135656853, "learning_rate": 9.957768415810788e-06, "loss": 0.9339, "step": 1936 }, { "epoch": 0.07020405204595702, "grad_norm": 2.5045257210444096, "learning_rate": 9.957692258849655e-06, "loss": 0.8386, "step": 1937 }, { "epoch": 0.07024029574861368, "grad_norm": 2.427982990710411, "learning_rate": 9.957616033574519e-06, "loss": 1.1134, "step": 1938 }, { "epoch": 0.07027653945127034, "grad_norm": 2.8656576393962823, "learning_rate": 9.957539739986435e-06, "loss": 0.865, "step": 1939 }, { "epoch": 0.070312783153927, "grad_norm": 2.60089278630771, "learning_rate": 9.957463378086452e-06, "loss": 1.1769, "step": 1940 }, { "epoch": 0.07034902685658367, "grad_norm": 2.464396004771623, "learning_rate": 9.95738694787562e-06, "loss": 1.0961, "step": 1941 }, { "epoch": 0.07038527055924033, "grad_norm": 2.4241925209488158, "learning_rate": 9.957310449354995e-06, "loss": 0.973, "step": 1942 }, { "epoch": 0.070421514261897, "grad_norm": 2.5703166649207545, "learning_rate": 9.95723388252563e-06, "loss": 1.017, "step": 1943 }, { "epoch": 0.07045775796455366, "grad_norm": 2.6132995934282204, "learning_rate": 9.957157247388579e-06, "loss": 1.0844, "step": 1944 }, { "epoch": 0.07049400166721032, "grad_norm": 2.2355022965970015, "learning_rate": 9.957080543944901e-06, "loss": 0.9205, "step": 1945 }, { "epoch": 0.07053024536986699, "grad_norm": 2.5861215377613247, "learning_rate": 9.95700377219565e-06, "loss": 1.0258, "step": 1946 }, { "epoch": 0.07056648907252365, "grad_norm": 2.3733559293406064, "learning_rate": 9.956926932141883e-06, "loss": 0.9235, "step": 1947 }, { "epoch": 0.07060273277518031, "grad_norm": 2.252720147272069, "learning_rate": 9.956850023784663e-06, "loss": 0.9773, "step": 1948 }, { "epoch": 0.07063897647783697, "grad_norm": 2.591492293878663, "learning_rate": 9.956773047125048e-06, "loss": 0.9629, "step": 1949 }, { "epoch": 0.07067522018049364, "grad_norm": 2.430480406748716, "learning_rate": 9.956696002164096e-06, "loss": 1.0442, "step": 1950 }, { "epoch": 0.0707114638831503, "grad_norm": 2.6211318036727116, "learning_rate": 9.956618888902873e-06, "loss": 1.0544, "step": 1951 }, { "epoch": 0.07074770758580697, "grad_norm": 2.616974208515064, "learning_rate": 9.95654170734244e-06, "loss": 1.0692, "step": 1952 }, { "epoch": 0.07078395128846363, "grad_norm": 2.8821278753241635, "learning_rate": 9.956464457483857e-06, "loss": 1.0014, "step": 1953 }, { "epoch": 0.07082019499112029, "grad_norm": 2.487772573431684, "learning_rate": 9.956387139328192e-06, "loss": 1.0477, "step": 1954 }, { "epoch": 0.07085643869377696, "grad_norm": 2.440479314422518, "learning_rate": 9.956309752876512e-06, "loss": 1.1074, "step": 1955 }, { "epoch": 0.07089268239643362, "grad_norm": 2.745368572372165, "learning_rate": 9.956232298129879e-06, "loss": 1.1298, "step": 1956 }, { "epoch": 0.07092892609909028, "grad_norm": 2.3585138068334146, "learning_rate": 9.956154775089362e-06, "loss": 0.8645, "step": 1957 }, { "epoch": 0.07096516980174694, "grad_norm": 2.482891990967012, "learning_rate": 9.95607718375603e-06, "loss": 1.1316, "step": 1958 }, { "epoch": 0.07100141350440362, "grad_norm": 2.3450304014344807, "learning_rate": 9.955999524130953e-06, "loss": 1.0258, "step": 1959 }, { "epoch": 0.07103765720706028, "grad_norm": 2.301715294646894, "learning_rate": 9.9559217962152e-06, "loss": 1.0164, "step": 1960 }, { "epoch": 0.07107390090971694, "grad_norm": 2.292481271801993, "learning_rate": 9.95584400000984e-06, "loss": 0.9955, "step": 1961 }, { "epoch": 0.0711101446123736, "grad_norm": 2.5361502701215772, "learning_rate": 9.955766135515948e-06, "loss": 0.9774, "step": 1962 }, { "epoch": 0.07114638831503026, "grad_norm": 2.7916587624545013, "learning_rate": 9.955688202734596e-06, "loss": 1.0339, "step": 1963 }, { "epoch": 0.07118263201768693, "grad_norm": 2.467690524292071, "learning_rate": 9.955610201666856e-06, "loss": 0.9258, "step": 1964 }, { "epoch": 0.07121887572034359, "grad_norm": 2.4968858971527523, "learning_rate": 9.955532132313808e-06, "loss": 0.9201, "step": 1965 }, { "epoch": 0.07125511942300025, "grad_norm": 2.546299537377579, "learning_rate": 9.95545399467652e-06, "loss": 1.0885, "step": 1966 }, { "epoch": 0.07129136312565691, "grad_norm": 2.8658345924838717, "learning_rate": 9.955375788756077e-06, "loss": 0.9598, "step": 1967 }, { "epoch": 0.07132760682831359, "grad_norm": 2.4996510468212376, "learning_rate": 9.95529751455355e-06, "loss": 1.101, "step": 1968 }, { "epoch": 0.07136385053097025, "grad_norm": 2.4557333441550346, "learning_rate": 9.955219172070021e-06, "loss": 0.9267, "step": 1969 }, { "epoch": 0.0714000942336269, "grad_norm": 2.3823591235795063, "learning_rate": 9.955140761306567e-06, "loss": 1.0208, "step": 1970 }, { "epoch": 0.07143633793628357, "grad_norm": 2.438187862299112, "learning_rate": 9.955062282264272e-06, "loss": 0.8628, "step": 1971 }, { "epoch": 0.07147258163894023, "grad_norm": 2.6893424842502704, "learning_rate": 9.954983734944214e-06, "loss": 0.9646, "step": 1972 }, { "epoch": 0.0715088253415969, "grad_norm": 2.7179638978650393, "learning_rate": 9.954905119347478e-06, "loss": 1.0154, "step": 1973 }, { "epoch": 0.07154506904425356, "grad_norm": 2.5174796496849603, "learning_rate": 9.954826435475147e-06, "loss": 0.9797, "step": 1974 }, { "epoch": 0.07158131274691022, "grad_norm": 2.4508918359042187, "learning_rate": 9.954747683328302e-06, "loss": 0.9537, "step": 1975 }, { "epoch": 0.07161755644956688, "grad_norm": 2.198190883870129, "learning_rate": 9.954668862908033e-06, "loss": 1.0091, "step": 1976 }, { "epoch": 0.07165380015222356, "grad_norm": 2.232790568596451, "learning_rate": 9.954589974215421e-06, "loss": 0.9321, "step": 1977 }, { "epoch": 0.07169004385488022, "grad_norm": 2.3607538136967747, "learning_rate": 9.954511017251555e-06, "loss": 0.7304, "step": 1978 }, { "epoch": 0.07172628755753688, "grad_norm": 3.048341851137711, "learning_rate": 9.954431992017526e-06, "loss": 1.1603, "step": 1979 }, { "epoch": 0.07176253126019354, "grad_norm": 2.723618486667567, "learning_rate": 9.95435289851442e-06, "loss": 1.0843, "step": 1980 }, { "epoch": 0.0717987749628502, "grad_norm": 2.614719621409469, "learning_rate": 9.954273736743327e-06, "loss": 1.1335, "step": 1981 }, { "epoch": 0.07183501866550687, "grad_norm": 2.2719681748957155, "learning_rate": 9.954194506705338e-06, "loss": 0.8808, "step": 1982 }, { "epoch": 0.07187126236816353, "grad_norm": 2.555914993809348, "learning_rate": 9.954115208401546e-06, "loss": 1.2087, "step": 1983 }, { "epoch": 0.07190750607082019, "grad_norm": 2.5533170191930044, "learning_rate": 9.954035841833041e-06, "loss": 0.9531, "step": 1984 }, { "epoch": 0.07194374977347685, "grad_norm": 2.6124823168349325, "learning_rate": 9.95395640700092e-06, "loss": 1.1128, "step": 1985 }, { "epoch": 0.07197999347613353, "grad_norm": 2.760174558645554, "learning_rate": 9.953876903906274e-06, "loss": 1.0098, "step": 1986 }, { "epoch": 0.07201623717879019, "grad_norm": 2.4497043556509572, "learning_rate": 9.953797332550202e-06, "loss": 0.9348, "step": 1987 }, { "epoch": 0.07205248088144685, "grad_norm": 2.5326959954703114, "learning_rate": 9.953717692933796e-06, "loss": 1.0909, "step": 1988 }, { "epoch": 0.07208872458410351, "grad_norm": 2.7241344529156772, "learning_rate": 9.953637985058157e-06, "loss": 1.007, "step": 1989 }, { "epoch": 0.07212496828676017, "grad_norm": 2.327643186201604, "learning_rate": 9.953558208924384e-06, "loss": 1.0974, "step": 1990 }, { "epoch": 0.07216121198941684, "grad_norm": 2.6126506325528824, "learning_rate": 9.953478364533574e-06, "loss": 1.0401, "step": 1991 }, { "epoch": 0.0721974556920735, "grad_norm": 2.3386419211229144, "learning_rate": 9.953398451886829e-06, "loss": 1.0128, "step": 1992 }, { "epoch": 0.07223369939473016, "grad_norm": 2.4607393101989, "learning_rate": 9.953318470985248e-06, "loss": 1.1671, "step": 1993 }, { "epoch": 0.07226994309738682, "grad_norm": 2.0981660153776063, "learning_rate": 9.953238421829933e-06, "loss": 0.8517, "step": 1994 }, { "epoch": 0.0723061868000435, "grad_norm": 2.535065494163734, "learning_rate": 9.95315830442199e-06, "loss": 1.21, "step": 1995 }, { "epoch": 0.07234243050270016, "grad_norm": 2.78776219301153, "learning_rate": 9.95307811876252e-06, "loss": 1.0542, "step": 1996 }, { "epoch": 0.07237867420535682, "grad_norm": 2.9651130331356024, "learning_rate": 9.95299786485263e-06, "loss": 0.9869, "step": 1997 }, { "epoch": 0.07241491790801348, "grad_norm": 2.6938895983481594, "learning_rate": 9.952917542693424e-06, "loss": 1.1244, "step": 1998 }, { "epoch": 0.07245116161067014, "grad_norm": 2.77886854072929, "learning_rate": 9.952837152286009e-06, "loss": 0.9473, "step": 1999 }, { "epoch": 0.07248740531332681, "grad_norm": 2.6148422052223914, "learning_rate": 9.952756693631493e-06, "loss": 0.9863, "step": 2000 }, { "epoch": 0.07252364901598347, "grad_norm": 2.871284556004327, "learning_rate": 9.952676166730988e-06, "loss": 0.9634, "step": 2001 }, { "epoch": 0.07255989271864013, "grad_norm": 2.4884679172731414, "learning_rate": 9.9525955715856e-06, "loss": 0.9721, "step": 2002 }, { "epoch": 0.0725961364212968, "grad_norm": 2.591884277957029, "learning_rate": 9.952514908196437e-06, "loss": 1.0445, "step": 2003 }, { "epoch": 0.07263238012395347, "grad_norm": 2.542117539505607, "learning_rate": 9.952434176564617e-06, "loss": 0.9596, "step": 2004 }, { "epoch": 0.07266862382661013, "grad_norm": 2.694125400442335, "learning_rate": 9.952353376691248e-06, "loss": 1.0237, "step": 2005 }, { "epoch": 0.07270486752926679, "grad_norm": 2.551388241170096, "learning_rate": 9.952272508577445e-06, "loss": 1.1369, "step": 2006 }, { "epoch": 0.07274111123192345, "grad_norm": 2.5453760081190224, "learning_rate": 9.952191572224322e-06, "loss": 0.9592, "step": 2007 }, { "epoch": 0.07277735493458011, "grad_norm": 2.4861303762321576, "learning_rate": 9.952110567632994e-06, "loss": 1.0411, "step": 2008 }, { "epoch": 0.07281359863723678, "grad_norm": 2.7082503128350517, "learning_rate": 9.952029494804577e-06, "loss": 1.1889, "step": 2009 }, { "epoch": 0.07284984233989344, "grad_norm": 2.8649210444777466, "learning_rate": 9.951948353740188e-06, "loss": 1.0847, "step": 2010 }, { "epoch": 0.0728860860425501, "grad_norm": 2.5479753606842843, "learning_rate": 9.951867144440947e-06, "loss": 1.0087, "step": 2011 }, { "epoch": 0.07292232974520677, "grad_norm": 2.4942651417659265, "learning_rate": 9.95178586690797e-06, "loss": 1.2535, "step": 2012 }, { "epoch": 0.07295857344786344, "grad_norm": 2.394521344549001, "learning_rate": 9.95170452114238e-06, "loss": 1.0136, "step": 2013 }, { "epoch": 0.0729948171505201, "grad_norm": 2.7476727880113376, "learning_rate": 9.951623107145296e-06, "loss": 1.0868, "step": 2014 }, { "epoch": 0.07303106085317676, "grad_norm": 2.44218199136094, "learning_rate": 9.95154162491784e-06, "loss": 0.9536, "step": 2015 }, { "epoch": 0.07306730455583342, "grad_norm": 2.2731433118937927, "learning_rate": 9.951460074461135e-06, "loss": 1.0295, "step": 2016 }, { "epoch": 0.07310354825849008, "grad_norm": 2.507187990298085, "learning_rate": 9.951378455776304e-06, "loss": 0.992, "step": 2017 }, { "epoch": 0.07313979196114676, "grad_norm": 2.587010099300595, "learning_rate": 9.951296768864474e-06, "loss": 0.8272, "step": 2018 }, { "epoch": 0.07317603566380342, "grad_norm": 2.7447657934181464, "learning_rate": 9.951215013726767e-06, "loss": 1.2218, "step": 2019 }, { "epoch": 0.07321227936646008, "grad_norm": 2.8944702033398153, "learning_rate": 9.951133190364313e-06, "loss": 1.0544, "step": 2020 }, { "epoch": 0.07324852306911674, "grad_norm": 2.539949765628669, "learning_rate": 9.951051298778237e-06, "loss": 0.9142, "step": 2021 }, { "epoch": 0.07328476677177341, "grad_norm": 2.8885561040460246, "learning_rate": 9.950969338969669e-06, "loss": 0.947, "step": 2022 }, { "epoch": 0.07332101047443007, "grad_norm": 2.4029897140860563, "learning_rate": 9.950887310939738e-06, "loss": 1.1688, "step": 2023 }, { "epoch": 0.07335725417708673, "grad_norm": 2.4403028622364458, "learning_rate": 9.950805214689572e-06, "loss": 0.972, "step": 2024 }, { "epoch": 0.07339349787974339, "grad_norm": 3.0913121015567704, "learning_rate": 9.950723050220306e-06, "loss": 1.432, "step": 2025 }, { "epoch": 0.07342974158240005, "grad_norm": 2.334388357781425, "learning_rate": 9.950640817533072e-06, "loss": 0.9767, "step": 2026 }, { "epoch": 0.07346598528505673, "grad_norm": 2.629837428549217, "learning_rate": 9.950558516628999e-06, "loss": 1.1785, "step": 2027 }, { "epoch": 0.07350222898771339, "grad_norm": 2.5600064335249466, "learning_rate": 9.950476147509224e-06, "loss": 1.1783, "step": 2028 }, { "epoch": 0.07353847269037005, "grad_norm": 2.742684066383179, "learning_rate": 9.950393710174884e-06, "loss": 1.0853, "step": 2029 }, { "epoch": 0.0735747163930267, "grad_norm": 2.413242141335344, "learning_rate": 9.950311204627109e-06, "loss": 0.9823, "step": 2030 }, { "epoch": 0.07361096009568338, "grad_norm": 2.587083911175631, "learning_rate": 9.950228630867042e-06, "loss": 0.8317, "step": 2031 }, { "epoch": 0.07364720379834004, "grad_norm": 2.43280725333164, "learning_rate": 9.950145988895819e-06, "loss": 0.9581, "step": 2032 }, { "epoch": 0.0736834475009967, "grad_norm": 2.790821830407871, "learning_rate": 9.950063278714575e-06, "loss": 1.1112, "step": 2033 }, { "epoch": 0.07371969120365336, "grad_norm": 2.4398141693727475, "learning_rate": 9.949980500324454e-06, "loss": 0.9929, "step": 2034 }, { "epoch": 0.07375593490631002, "grad_norm": 2.544995709176272, "learning_rate": 9.949897653726596e-06, "loss": 0.9476, "step": 2035 }, { "epoch": 0.0737921786089667, "grad_norm": 2.4487320425369705, "learning_rate": 9.949814738922142e-06, "loss": 1.184, "step": 2036 }, { "epoch": 0.07382842231162336, "grad_norm": 2.384706487648662, "learning_rate": 9.949731755912235e-06, "loss": 1.0122, "step": 2037 }, { "epoch": 0.07386466601428002, "grad_norm": 2.6189724128137755, "learning_rate": 9.949648704698015e-06, "loss": 1.1122, "step": 2038 }, { "epoch": 0.07390090971693668, "grad_norm": 2.245471636196957, "learning_rate": 9.949565585280632e-06, "loss": 0.9848, "step": 2039 }, { "epoch": 0.07393715341959335, "grad_norm": 2.546558713255237, "learning_rate": 9.949482397661228e-06, "loss": 0.8375, "step": 2040 }, { "epoch": 0.07397339712225001, "grad_norm": 2.5412041701442156, "learning_rate": 9.94939914184095e-06, "loss": 0.9577, "step": 2041 }, { "epoch": 0.07400964082490667, "grad_norm": 2.5100670542168526, "learning_rate": 9.949315817820947e-06, "loss": 0.9775, "step": 2042 }, { "epoch": 0.07404588452756333, "grad_norm": 2.4570846615795507, "learning_rate": 9.949232425602362e-06, "loss": 1.0585, "step": 2043 }, { "epoch": 0.07408212823021999, "grad_norm": 2.450374494118642, "learning_rate": 9.949148965186348e-06, "loss": 0.8446, "step": 2044 }, { "epoch": 0.07411837193287667, "grad_norm": 2.585216462810563, "learning_rate": 9.949065436574054e-06, "loss": 0.9267, "step": 2045 }, { "epoch": 0.07415461563553333, "grad_norm": 2.7664019548218572, "learning_rate": 9.948981839766633e-06, "loss": 1.1722, "step": 2046 }, { "epoch": 0.07419085933818999, "grad_norm": 2.5954628174808194, "learning_rate": 9.948898174765235e-06, "loss": 0.7848, "step": 2047 }, { "epoch": 0.07422710304084665, "grad_norm": 2.57919235733836, "learning_rate": 9.948814441571013e-06, "loss": 1.1461, "step": 2048 }, { "epoch": 0.07426334674350332, "grad_norm": 2.5180947814481605, "learning_rate": 9.94873064018512e-06, "loss": 0.952, "step": 2049 }, { "epoch": 0.07429959044615998, "grad_norm": 2.358853248902826, "learning_rate": 9.948646770608713e-06, "loss": 1.0116, "step": 2050 }, { "epoch": 0.07433583414881664, "grad_norm": 2.2908133044642196, "learning_rate": 9.948562832842946e-06, "loss": 1.11, "step": 2051 }, { "epoch": 0.0743720778514733, "grad_norm": 2.5496909676989516, "learning_rate": 9.948478826888975e-06, "loss": 1.0303, "step": 2052 }, { "epoch": 0.07440832155412996, "grad_norm": 2.393233011859734, "learning_rate": 9.948394752747961e-06, "loss": 1.1279, "step": 2053 }, { "epoch": 0.07444456525678664, "grad_norm": 2.5703641955020653, "learning_rate": 9.948310610421056e-06, "loss": 0.9419, "step": 2054 }, { "epoch": 0.0744808089594433, "grad_norm": 2.976049738396178, "learning_rate": 9.948226399909427e-06, "loss": 1.1013, "step": 2055 }, { "epoch": 0.07451705266209996, "grad_norm": 2.4934748737002006, "learning_rate": 9.948142121214229e-06, "loss": 1.0448, "step": 2056 }, { "epoch": 0.07455329636475662, "grad_norm": 2.775923724049319, "learning_rate": 9.948057774336625e-06, "loss": 0.9796, "step": 2057 }, { "epoch": 0.07458954006741329, "grad_norm": 2.6111208528814704, "learning_rate": 9.947973359277778e-06, "loss": 1.1062, "step": 2058 }, { "epoch": 0.07462578377006995, "grad_norm": 2.294165344810095, "learning_rate": 9.94788887603885e-06, "loss": 0.9916, "step": 2059 }, { "epoch": 0.07466202747272661, "grad_norm": 2.6929993690437626, "learning_rate": 9.947804324621007e-06, "loss": 1.0397, "step": 2060 }, { "epoch": 0.07469827117538327, "grad_norm": 2.600670249935081, "learning_rate": 9.947719705025412e-06, "loss": 1.0189, "step": 2061 }, { "epoch": 0.07473451487803993, "grad_norm": 2.4712752939610336, "learning_rate": 9.947635017253233e-06, "loss": 1.0453, "step": 2062 }, { "epoch": 0.07477075858069661, "grad_norm": 2.455499496977452, "learning_rate": 9.947550261305633e-06, "loss": 1.0645, "step": 2063 }, { "epoch": 0.07480700228335327, "grad_norm": 2.859491784748838, "learning_rate": 9.947465437183786e-06, "loss": 1.1853, "step": 2064 }, { "epoch": 0.07484324598600993, "grad_norm": 2.944397517399421, "learning_rate": 9.947380544888854e-06, "loss": 1.0122, "step": 2065 }, { "epoch": 0.07487948968866659, "grad_norm": 2.538907423552473, "learning_rate": 9.947295584422012e-06, "loss": 1.0516, "step": 2066 }, { "epoch": 0.07491573339132326, "grad_norm": 2.5405733036287623, "learning_rate": 9.947210555784428e-06, "loss": 1.1405, "step": 2067 }, { "epoch": 0.07495197709397992, "grad_norm": 2.1839547472692837, "learning_rate": 9.947125458977274e-06, "loss": 1.0769, "step": 2068 }, { "epoch": 0.07498822079663658, "grad_norm": 2.3393976883545466, "learning_rate": 9.947040294001723e-06, "loss": 0.9117, "step": 2069 }, { "epoch": 0.07502446449929324, "grad_norm": 2.923086084759397, "learning_rate": 9.946955060858951e-06, "loss": 1.0684, "step": 2070 }, { "epoch": 0.0750607082019499, "grad_norm": 2.8412252878262354, "learning_rate": 9.946869759550129e-06, "loss": 1.0977, "step": 2071 }, { "epoch": 0.07509695190460658, "grad_norm": 2.1496197598796978, "learning_rate": 9.946784390076433e-06, "loss": 1.1116, "step": 2072 }, { "epoch": 0.07513319560726324, "grad_norm": 2.4065141289873777, "learning_rate": 9.94669895243904e-06, "loss": 1.1266, "step": 2073 }, { "epoch": 0.0751694393099199, "grad_norm": 2.9773360256085457, "learning_rate": 9.94661344663913e-06, "loss": 1.1087, "step": 2074 }, { "epoch": 0.07520568301257656, "grad_norm": 2.592005641951749, "learning_rate": 9.946527872677874e-06, "loss": 1.1552, "step": 2075 }, { "epoch": 0.07524192671523323, "grad_norm": 2.6337343559112663, "learning_rate": 9.946442230556456e-06, "loss": 1.0266, "step": 2076 }, { "epoch": 0.0752781704178899, "grad_norm": 2.524288980233936, "learning_rate": 9.946356520276057e-06, "loss": 0.9315, "step": 2077 }, { "epoch": 0.07531441412054656, "grad_norm": 2.3000557293389408, "learning_rate": 9.946270741837855e-06, "loss": 1.0624, "step": 2078 }, { "epoch": 0.07535065782320322, "grad_norm": 2.5802371697774906, "learning_rate": 9.946184895243036e-06, "loss": 0.9659, "step": 2079 }, { "epoch": 0.07538690152585988, "grad_norm": 2.5597318503101145, "learning_rate": 9.946098980492779e-06, "loss": 1.0572, "step": 2080 }, { "epoch": 0.07542314522851655, "grad_norm": 2.391129738182301, "learning_rate": 9.946012997588268e-06, "loss": 1.1567, "step": 2081 }, { "epoch": 0.07545938893117321, "grad_norm": 2.3267543129955874, "learning_rate": 9.945926946530692e-06, "loss": 1.0321, "step": 2082 }, { "epoch": 0.07549563263382987, "grad_norm": 2.334790605381039, "learning_rate": 9.945840827321232e-06, "loss": 1.0331, "step": 2083 }, { "epoch": 0.07553187633648653, "grad_norm": 2.620497423240334, "learning_rate": 9.945754639961077e-06, "loss": 1.0426, "step": 2084 }, { "epoch": 0.0755681200391432, "grad_norm": 2.3933329408590054, "learning_rate": 9.945668384451415e-06, "loss": 0.946, "step": 2085 }, { "epoch": 0.07560436374179987, "grad_norm": 2.579702643254087, "learning_rate": 9.945582060793433e-06, "loss": 0.9579, "step": 2086 }, { "epoch": 0.07564060744445653, "grad_norm": 2.122650086330965, "learning_rate": 9.94549566898832e-06, "loss": 0.8769, "step": 2087 }, { "epoch": 0.07567685114711319, "grad_norm": 2.709840692101198, "learning_rate": 9.94540920903727e-06, "loss": 1.08, "step": 2088 }, { "epoch": 0.07571309484976985, "grad_norm": 2.717622592000133, "learning_rate": 9.945322680941472e-06, "loss": 1.0143, "step": 2089 }, { "epoch": 0.07574933855242652, "grad_norm": 2.380738163594919, "learning_rate": 9.945236084702117e-06, "loss": 1.1331, "step": 2090 }, { "epoch": 0.07578558225508318, "grad_norm": 2.587059608046638, "learning_rate": 9.9451494203204e-06, "loss": 1.1836, "step": 2091 }, { "epoch": 0.07582182595773984, "grad_norm": 2.985009094415812, "learning_rate": 9.945062687797513e-06, "loss": 0.939, "step": 2092 }, { "epoch": 0.0758580696603965, "grad_norm": 2.7691965648483015, "learning_rate": 9.944975887134654e-06, "loss": 1.1358, "step": 2093 }, { "epoch": 0.07589431336305318, "grad_norm": 2.3551032904614053, "learning_rate": 9.944889018333019e-06, "loss": 0.9585, "step": 2094 }, { "epoch": 0.07593055706570984, "grad_norm": 2.2428232678654227, "learning_rate": 9.944802081393804e-06, "loss": 0.9912, "step": 2095 }, { "epoch": 0.0759668007683665, "grad_norm": 2.5437083683358384, "learning_rate": 9.944715076318205e-06, "loss": 1.0642, "step": 2096 }, { "epoch": 0.07600304447102316, "grad_norm": 2.663319171440547, "learning_rate": 9.944628003107423e-06, "loss": 1.0673, "step": 2097 }, { "epoch": 0.07603928817367982, "grad_norm": 2.2810147902424003, "learning_rate": 9.944540861762658e-06, "loss": 0.8802, "step": 2098 }, { "epoch": 0.07607553187633649, "grad_norm": 2.7749351066625976, "learning_rate": 9.944453652285112e-06, "loss": 1.0478, "step": 2099 }, { "epoch": 0.07611177557899315, "grad_norm": 2.7445480192367415, "learning_rate": 9.944366374675983e-06, "loss": 0.9836, "step": 2100 }, { "epoch": 0.07614801928164981, "grad_norm": 2.4768008675116064, "learning_rate": 9.944279028936475e-06, "loss": 1.0991, "step": 2101 }, { "epoch": 0.07618426298430647, "grad_norm": 2.6863535059820736, "learning_rate": 9.944191615067793e-06, "loss": 0.9445, "step": 2102 }, { "epoch": 0.07622050668696315, "grad_norm": 2.513949193982624, "learning_rate": 9.94410413307114e-06, "loss": 0.8746, "step": 2103 }, { "epoch": 0.0762567503896198, "grad_norm": 2.6970712752716706, "learning_rate": 9.944016582947724e-06, "loss": 1.1939, "step": 2104 }, { "epoch": 0.07629299409227647, "grad_norm": 2.563291632695011, "learning_rate": 9.943928964698747e-06, "loss": 1.0863, "step": 2105 }, { "epoch": 0.07632923779493313, "grad_norm": 2.285462146115187, "learning_rate": 9.943841278325421e-06, "loss": 1.0495, "step": 2106 }, { "epoch": 0.07636548149758979, "grad_norm": 2.938209642690391, "learning_rate": 9.943753523828952e-06, "loss": 0.9607, "step": 2107 }, { "epoch": 0.07640172520024646, "grad_norm": 2.539694638402931, "learning_rate": 9.94366570121055e-06, "loss": 1.1561, "step": 2108 }, { "epoch": 0.07643796890290312, "grad_norm": 2.4918408517512973, "learning_rate": 9.943577810471424e-06, "loss": 0.9851, "step": 2109 }, { "epoch": 0.07647421260555978, "grad_norm": 2.4244034644101853, "learning_rate": 9.943489851612784e-06, "loss": 1.0712, "step": 2110 }, { "epoch": 0.07651045630821644, "grad_norm": 2.5663957169254576, "learning_rate": 9.943401824635842e-06, "loss": 1.2088, "step": 2111 }, { "epoch": 0.07654670001087312, "grad_norm": 2.485905131982626, "learning_rate": 9.943313729541818e-06, "loss": 1.0232, "step": 2112 }, { "epoch": 0.07658294371352978, "grad_norm": 2.478323370008149, "learning_rate": 9.943225566331916e-06, "loss": 0.9794, "step": 2113 }, { "epoch": 0.07661918741618644, "grad_norm": 2.499224515848921, "learning_rate": 9.943137335007357e-06, "loss": 1.1051, "step": 2114 }, { "epoch": 0.0766554311188431, "grad_norm": 2.412271454599359, "learning_rate": 9.943049035569355e-06, "loss": 0.9507, "step": 2115 }, { "epoch": 0.07669167482149976, "grad_norm": 2.6856609759175987, "learning_rate": 9.942960668019126e-06, "loss": 1.1344, "step": 2116 }, { "epoch": 0.07672791852415643, "grad_norm": 2.4816152161491787, "learning_rate": 9.94287223235789e-06, "loss": 1.184, "step": 2117 }, { "epoch": 0.07676416222681309, "grad_norm": 2.3034265874084565, "learning_rate": 9.942783728586863e-06, "loss": 0.8718, "step": 2118 }, { "epoch": 0.07680040592946975, "grad_norm": 2.603389244974213, "learning_rate": 9.942695156707264e-06, "loss": 1.0608, "step": 2119 }, { "epoch": 0.07683664963212641, "grad_norm": 2.435104268989588, "learning_rate": 9.942606516720317e-06, "loss": 1.1488, "step": 2120 }, { "epoch": 0.07687289333478309, "grad_norm": 2.60724519699691, "learning_rate": 9.94251780862724e-06, "loss": 1.0028, "step": 2121 }, { "epoch": 0.07690913703743975, "grad_norm": 2.950318967780236, "learning_rate": 9.942429032429257e-06, "loss": 1.1388, "step": 2122 }, { "epoch": 0.07694538074009641, "grad_norm": 2.62605406642082, "learning_rate": 9.942340188127592e-06, "loss": 1.0866, "step": 2123 }, { "epoch": 0.07698162444275307, "grad_norm": 2.5280626936546784, "learning_rate": 9.942251275723467e-06, "loss": 0.9837, "step": 2124 }, { "epoch": 0.07701786814540973, "grad_norm": 2.7754107328440374, "learning_rate": 9.94216229521811e-06, "loss": 0.8754, "step": 2125 }, { "epoch": 0.0770541118480664, "grad_norm": 2.4063661365669495, "learning_rate": 9.942073246612745e-06, "loss": 1.1586, "step": 2126 }, { "epoch": 0.07709035555072306, "grad_norm": 2.7617681606474345, "learning_rate": 9.941984129908598e-06, "loss": 0.8818, "step": 2127 }, { "epoch": 0.07712659925337972, "grad_norm": 2.929292635902066, "learning_rate": 9.9418949451069e-06, "loss": 0.9303, "step": 2128 }, { "epoch": 0.07716284295603638, "grad_norm": 2.652470537504554, "learning_rate": 9.94180569220888e-06, "loss": 1.0194, "step": 2129 }, { "epoch": 0.07719908665869306, "grad_norm": 2.195663219971979, "learning_rate": 9.941716371215762e-06, "loss": 0.9648, "step": 2130 }, { "epoch": 0.07723533036134972, "grad_norm": 2.614493447553089, "learning_rate": 9.941626982128784e-06, "loss": 0.9798, "step": 2131 }, { "epoch": 0.07727157406400638, "grad_norm": 2.3994457589596245, "learning_rate": 9.941537524949175e-06, "loss": 1.1195, "step": 2132 }, { "epoch": 0.07730781776666304, "grad_norm": 2.4623977163391872, "learning_rate": 9.941447999678167e-06, "loss": 0.8301, "step": 2133 }, { "epoch": 0.0773440614693197, "grad_norm": 2.4375848194595386, "learning_rate": 9.941358406316991e-06, "loss": 0.8386, "step": 2134 }, { "epoch": 0.07738030517197637, "grad_norm": 2.525365541804405, "learning_rate": 9.94126874486689e-06, "loss": 1.165, "step": 2135 }, { "epoch": 0.07741654887463303, "grad_norm": 2.628481319615044, "learning_rate": 9.94117901532909e-06, "loss": 0.9995, "step": 2136 }, { "epoch": 0.0774527925772897, "grad_norm": 2.469624890500011, "learning_rate": 9.941089217704834e-06, "loss": 0.8613, "step": 2137 }, { "epoch": 0.07748903627994636, "grad_norm": 2.5899278607652416, "learning_rate": 9.940999351995355e-06, "loss": 1.0144, "step": 2138 }, { "epoch": 0.07752527998260303, "grad_norm": 2.7007501115346466, "learning_rate": 9.940909418201894e-06, "loss": 1.1526, "step": 2139 }, { "epoch": 0.07756152368525969, "grad_norm": 2.7586122762257497, "learning_rate": 9.940819416325689e-06, "loss": 0.9177, "step": 2140 }, { "epoch": 0.07759776738791635, "grad_norm": 2.4782613692233655, "learning_rate": 9.940729346367982e-06, "loss": 0.9092, "step": 2141 }, { "epoch": 0.07763401109057301, "grad_norm": 2.262132576618957, "learning_rate": 9.940639208330011e-06, "loss": 0.9186, "step": 2142 }, { "epoch": 0.07767025479322967, "grad_norm": 2.25088362938958, "learning_rate": 9.94054900221302e-06, "loss": 1.0248, "step": 2143 }, { "epoch": 0.07770649849588634, "grad_norm": 2.8313950704546067, "learning_rate": 9.940458728018251e-06, "loss": 1.1266, "step": 2144 }, { "epoch": 0.077742742198543, "grad_norm": 2.339923683119237, "learning_rate": 9.94036838574695e-06, "loss": 1.1062, "step": 2145 }, { "epoch": 0.07777898590119967, "grad_norm": 2.322948259623539, "learning_rate": 9.940277975400361e-06, "loss": 1.0436, "step": 2146 }, { "epoch": 0.07781522960385633, "grad_norm": 2.6079180354966, "learning_rate": 9.940187496979727e-06, "loss": 1.0041, "step": 2147 }, { "epoch": 0.077851473306513, "grad_norm": 2.2441965010935263, "learning_rate": 9.9400969504863e-06, "loss": 1.0587, "step": 2148 }, { "epoch": 0.07788771700916966, "grad_norm": 2.3732503889699825, "learning_rate": 9.940006335921321e-06, "loss": 1.1445, "step": 2149 }, { "epoch": 0.07792396071182632, "grad_norm": 3.044184139959529, "learning_rate": 9.939915653286045e-06, "loss": 1.0213, "step": 2150 }, { "epoch": 0.07796020441448298, "grad_norm": 2.869042280171977, "learning_rate": 9.939824902581719e-06, "loss": 1.0973, "step": 2151 }, { "epoch": 0.07799644811713964, "grad_norm": 2.301250988297236, "learning_rate": 9.93973408380959e-06, "loss": 1.0576, "step": 2152 }, { "epoch": 0.07803269181979632, "grad_norm": 2.6916398008510685, "learning_rate": 9.939643196970917e-06, "loss": 0.8935, "step": 2153 }, { "epoch": 0.07806893552245298, "grad_norm": 2.3245043030643155, "learning_rate": 9.939552242066947e-06, "loss": 1.0743, "step": 2154 }, { "epoch": 0.07810517922510964, "grad_norm": 2.638840624023143, "learning_rate": 9.939461219098933e-06, "loss": 1.0154, "step": 2155 }, { "epoch": 0.0781414229277663, "grad_norm": 2.6917999044075023, "learning_rate": 9.939370128068132e-06, "loss": 0.9375, "step": 2156 }, { "epoch": 0.07817766663042297, "grad_norm": 3.073934419336797, "learning_rate": 9.939278968975798e-06, "loss": 1.1965, "step": 2157 }, { "epoch": 0.07821391033307963, "grad_norm": 2.4248806206594313, "learning_rate": 9.939187741823186e-06, "loss": 1.1956, "step": 2158 }, { "epoch": 0.07825015403573629, "grad_norm": 2.110027611925821, "learning_rate": 9.939096446611556e-06, "loss": 0.8935, "step": 2159 }, { "epoch": 0.07828639773839295, "grad_norm": 2.3156793044650628, "learning_rate": 9.939005083342161e-06, "loss": 1.1048, "step": 2160 }, { "epoch": 0.07832264144104961, "grad_norm": 2.760909335830139, "learning_rate": 9.938913652016264e-06, "loss": 1.3801, "step": 2161 }, { "epoch": 0.07835888514370629, "grad_norm": 2.7722148194259786, "learning_rate": 9.938822152635124e-06, "loss": 0.9207, "step": 2162 }, { "epoch": 0.07839512884636295, "grad_norm": 2.422340815214974, "learning_rate": 9.938730585200002e-06, "loss": 0.9949, "step": 2163 }, { "epoch": 0.0784313725490196, "grad_norm": 2.5261973972512073, "learning_rate": 9.93863894971216e-06, "loss": 1.0382, "step": 2164 }, { "epoch": 0.07846761625167627, "grad_norm": 2.4634014481364566, "learning_rate": 9.93854724617286e-06, "loss": 1.1077, "step": 2165 }, { "epoch": 0.07850385995433294, "grad_norm": 2.316029201654541, "learning_rate": 9.938455474583365e-06, "loss": 1.1202, "step": 2166 }, { "epoch": 0.0785401036569896, "grad_norm": 2.610096144642272, "learning_rate": 9.93836363494494e-06, "loss": 1.1358, "step": 2167 }, { "epoch": 0.07857634735964626, "grad_norm": 2.513777652902076, "learning_rate": 9.938271727258851e-06, "loss": 0.9663, "step": 2168 }, { "epoch": 0.07861259106230292, "grad_norm": 2.7446900392118265, "learning_rate": 9.938179751526364e-06, "loss": 1.1452, "step": 2169 }, { "epoch": 0.07864883476495958, "grad_norm": 2.2970962182343913, "learning_rate": 9.938087707748747e-06, "loss": 0.956, "step": 2170 }, { "epoch": 0.07868507846761626, "grad_norm": 2.138690273385125, "learning_rate": 9.937995595927267e-06, "loss": 0.8685, "step": 2171 }, { "epoch": 0.07872132217027292, "grad_norm": 2.425490295931517, "learning_rate": 9.937903416063195e-06, "loss": 1.0385, "step": 2172 }, { "epoch": 0.07875756587292958, "grad_norm": 2.4798779601016836, "learning_rate": 9.9378111681578e-06, "loss": 1.0044, "step": 2173 }, { "epoch": 0.07879380957558624, "grad_norm": 2.4745686481428835, "learning_rate": 9.937718852212354e-06, "loss": 1.0293, "step": 2174 }, { "epoch": 0.07883005327824291, "grad_norm": 2.66253735873357, "learning_rate": 9.937626468228128e-06, "loss": 1.1396, "step": 2175 }, { "epoch": 0.07886629698089957, "grad_norm": 2.3668991693100287, "learning_rate": 9.937534016206396e-06, "loss": 1.0001, "step": 2176 }, { "epoch": 0.07890254068355623, "grad_norm": 2.6757094111423063, "learning_rate": 9.93744149614843e-06, "loss": 1.007, "step": 2177 }, { "epoch": 0.07893878438621289, "grad_norm": 2.609940043023765, "learning_rate": 9.937348908055507e-06, "loss": 0.9208, "step": 2178 }, { "epoch": 0.07897502808886955, "grad_norm": 2.414033058538829, "learning_rate": 9.937256251928902e-06, "loss": 0.926, "step": 2179 }, { "epoch": 0.07901127179152623, "grad_norm": 2.7481505359607104, "learning_rate": 9.937163527769893e-06, "loss": 1.2385, "step": 2180 }, { "epoch": 0.07904751549418289, "grad_norm": 2.3584012132134125, "learning_rate": 9.937070735579755e-06, "loss": 1.0821, "step": 2181 }, { "epoch": 0.07908375919683955, "grad_norm": 2.443524745854296, "learning_rate": 9.936977875359767e-06, "loss": 1.2581, "step": 2182 }, { "epoch": 0.07912000289949621, "grad_norm": 2.492314310177437, "learning_rate": 9.936884947111212e-06, "loss": 0.958, "step": 2183 }, { "epoch": 0.07915624660215288, "grad_norm": 2.632898459949441, "learning_rate": 9.936791950835368e-06, "loss": 0.9883, "step": 2184 }, { "epoch": 0.07919249030480954, "grad_norm": 2.732576854782792, "learning_rate": 9.936698886533514e-06, "loss": 0.9631, "step": 2185 }, { "epoch": 0.0792287340074662, "grad_norm": 2.5325000400725197, "learning_rate": 9.936605754206938e-06, "loss": 0.8352, "step": 2186 }, { "epoch": 0.07926497771012286, "grad_norm": 2.7419598285055176, "learning_rate": 9.93651255385692e-06, "loss": 1.039, "step": 2187 }, { "epoch": 0.07930122141277952, "grad_norm": 2.324583810211272, "learning_rate": 9.936419285484742e-06, "loss": 0.8859, "step": 2188 }, { "epoch": 0.0793374651154362, "grad_norm": 2.5319478133757336, "learning_rate": 9.936325949091695e-06, "loss": 0.9671, "step": 2189 }, { "epoch": 0.07937370881809286, "grad_norm": 1.8853405184347847, "learning_rate": 9.936232544679059e-06, "loss": 1.033, "step": 2190 }, { "epoch": 0.07940995252074952, "grad_norm": 2.373770721468508, "learning_rate": 9.936139072248124e-06, "loss": 1.01, "step": 2191 }, { "epoch": 0.07944619622340618, "grad_norm": 2.438491223849755, "learning_rate": 9.936045531800179e-06, "loss": 1.1362, "step": 2192 }, { "epoch": 0.07948243992606285, "grad_norm": 2.179757203718046, "learning_rate": 9.935951923336511e-06, "loss": 0.7769, "step": 2193 }, { "epoch": 0.07951868362871951, "grad_norm": 2.6752510810916847, "learning_rate": 9.935858246858412e-06, "loss": 1.1004, "step": 2194 }, { "epoch": 0.07955492733137617, "grad_norm": 2.439733355035189, "learning_rate": 9.935764502367171e-06, "loss": 0.8393, "step": 2195 }, { "epoch": 0.07959117103403283, "grad_norm": 2.6169290793186106, "learning_rate": 9.93567068986408e-06, "loss": 0.8743, "step": 2196 }, { "epoch": 0.0796274147366895, "grad_norm": 2.328796959283676, "learning_rate": 9.935576809350431e-06, "loss": 1.1798, "step": 2197 }, { "epoch": 0.07966365843934617, "grad_norm": 2.2913459956020463, "learning_rate": 9.93548286082752e-06, "loss": 1.0516, "step": 2198 }, { "epoch": 0.07969990214200283, "grad_norm": 2.817842000987872, "learning_rate": 9.93538884429664e-06, "loss": 0.9585, "step": 2199 }, { "epoch": 0.07973614584465949, "grad_norm": 2.338106032008898, "learning_rate": 9.935294759759087e-06, "loss": 0.9616, "step": 2200 }, { "epoch": 0.07977238954731615, "grad_norm": 2.5245996788906075, "learning_rate": 9.935200607216154e-06, "loss": 1.0436, "step": 2201 }, { "epoch": 0.07980863324997282, "grad_norm": 2.6688719433249415, "learning_rate": 9.935106386669145e-06, "loss": 0.9563, "step": 2202 }, { "epoch": 0.07984487695262948, "grad_norm": 2.475808327917855, "learning_rate": 9.935012098119351e-06, "loss": 1.1209, "step": 2203 }, { "epoch": 0.07988112065528614, "grad_norm": 2.414287834719353, "learning_rate": 9.934917741568077e-06, "loss": 0.9482, "step": 2204 }, { "epoch": 0.0799173643579428, "grad_norm": 2.503593778620403, "learning_rate": 9.934823317016622e-06, "loss": 0.9628, "step": 2205 }, { "epoch": 0.07995360806059947, "grad_norm": 2.2548777269048124, "learning_rate": 9.934728824466283e-06, "loss": 1.1711, "step": 2206 }, { "epoch": 0.07998985176325614, "grad_norm": 2.3233496955348194, "learning_rate": 9.93463426391837e-06, "loss": 0.9387, "step": 2207 }, { "epoch": 0.0800260954659128, "grad_norm": 2.9523912816755944, "learning_rate": 9.934539635374177e-06, "loss": 1.0617, "step": 2208 }, { "epoch": 0.08006233916856946, "grad_norm": 2.6114337577853766, "learning_rate": 9.934444938835015e-06, "loss": 0.779, "step": 2209 }, { "epoch": 0.08009858287122612, "grad_norm": 2.7973236593292676, "learning_rate": 9.934350174302185e-06, "loss": 1.1088, "step": 2210 }, { "epoch": 0.0801348265738828, "grad_norm": 2.246633424352738, "learning_rate": 9.934255341776992e-06, "loss": 0.8812, "step": 2211 }, { "epoch": 0.08017107027653946, "grad_norm": 2.5175507246934474, "learning_rate": 9.934160441260748e-06, "loss": 0.9315, "step": 2212 }, { "epoch": 0.08020731397919612, "grad_norm": 2.566553012513911, "learning_rate": 9.934065472754754e-06, "loss": 0.9838, "step": 2213 }, { "epoch": 0.08024355768185278, "grad_norm": 2.425359008646604, "learning_rate": 9.933970436260323e-06, "loss": 0.994, "step": 2214 }, { "epoch": 0.08027980138450944, "grad_norm": 2.352173764140118, "learning_rate": 9.933875331778763e-06, "loss": 1.1245, "step": 2215 }, { "epoch": 0.08031604508716611, "grad_norm": 2.5246479399342534, "learning_rate": 9.933780159311386e-06, "loss": 0.8757, "step": 2216 }, { "epoch": 0.08035228878982277, "grad_norm": 2.7403583126948328, "learning_rate": 9.9336849188595e-06, "loss": 1.1644, "step": 2217 }, { "epoch": 0.08038853249247943, "grad_norm": 2.508999239177527, "learning_rate": 9.933589610424421e-06, "loss": 1.0301, "step": 2218 }, { "epoch": 0.08042477619513609, "grad_norm": 2.5214251838188497, "learning_rate": 9.93349423400746e-06, "loss": 1.0431, "step": 2219 }, { "epoch": 0.08046101989779277, "grad_norm": 2.3093621777925524, "learning_rate": 9.933398789609934e-06, "loss": 0.981, "step": 2220 }, { "epoch": 0.08049726360044943, "grad_norm": 2.556631172886141, "learning_rate": 9.933303277233157e-06, "loss": 0.9638, "step": 2221 }, { "epoch": 0.08053350730310609, "grad_norm": 2.518358342275103, "learning_rate": 9.933207696878442e-06, "loss": 1.0113, "step": 2222 }, { "epoch": 0.08056975100576275, "grad_norm": 2.4756249010693088, "learning_rate": 9.933112048547108e-06, "loss": 0.8408, "step": 2223 }, { "epoch": 0.0806059947084194, "grad_norm": 2.1470028478940955, "learning_rate": 9.933016332240478e-06, "loss": 0.8718, "step": 2224 }, { "epoch": 0.08064223841107608, "grad_norm": 2.7988429246704634, "learning_rate": 9.932920547959861e-06, "loss": 1.0596, "step": 2225 }, { "epoch": 0.08067848211373274, "grad_norm": 2.392069671007185, "learning_rate": 9.932824695706584e-06, "loss": 1.0233, "step": 2226 }, { "epoch": 0.0807147258163894, "grad_norm": 2.7888907541448993, "learning_rate": 9.932728775481967e-06, "loss": 0.9777, "step": 2227 }, { "epoch": 0.08075096951904606, "grad_norm": 2.6000118637077154, "learning_rate": 9.932632787287329e-06, "loss": 0.9326, "step": 2228 }, { "epoch": 0.08078721322170272, "grad_norm": 2.3789452828052435, "learning_rate": 9.932536731123997e-06, "loss": 0.9763, "step": 2229 }, { "epoch": 0.0808234569243594, "grad_norm": 2.8756477922490915, "learning_rate": 9.93244060699329e-06, "loss": 1.1047, "step": 2230 }, { "epoch": 0.08085970062701606, "grad_norm": 2.4720917597515406, "learning_rate": 9.932344414896533e-06, "loss": 1.1729, "step": 2231 }, { "epoch": 0.08089594432967272, "grad_norm": 2.4740049334685983, "learning_rate": 9.932248154835055e-06, "loss": 1.2176, "step": 2232 }, { "epoch": 0.08093218803232938, "grad_norm": 2.7467788967785665, "learning_rate": 9.932151826810182e-06, "loss": 1.0872, "step": 2233 }, { "epoch": 0.08096843173498605, "grad_norm": 2.84344466315475, "learning_rate": 9.932055430823237e-06, "loss": 1.1592, "step": 2234 }, { "epoch": 0.08100467543764271, "grad_norm": 2.759666273898018, "learning_rate": 9.93195896687555e-06, "loss": 1.1525, "step": 2235 }, { "epoch": 0.08104091914029937, "grad_norm": 2.7557415204344173, "learning_rate": 9.931862434968454e-06, "loss": 0.9942, "step": 2236 }, { "epoch": 0.08107716284295603, "grad_norm": 2.5102019069233195, "learning_rate": 9.931765835103276e-06, "loss": 0.8971, "step": 2237 }, { "epoch": 0.08111340654561269, "grad_norm": 2.6976024755332126, "learning_rate": 9.931669167281345e-06, "loss": 1.1576, "step": 2238 }, { "epoch": 0.08114965024826937, "grad_norm": 2.5458523918076814, "learning_rate": 9.931572431503998e-06, "loss": 1.156, "step": 2239 }, { "epoch": 0.08118589395092603, "grad_norm": 2.569897157332778, "learning_rate": 9.931475627772565e-06, "loss": 1.0014, "step": 2240 }, { "epoch": 0.08122213765358269, "grad_norm": 2.416150574883883, "learning_rate": 9.93137875608838e-06, "loss": 1.161, "step": 2241 }, { "epoch": 0.08125838135623935, "grad_norm": 2.416085187979108, "learning_rate": 9.931281816452777e-06, "loss": 0.906, "step": 2242 }, { "epoch": 0.08129462505889602, "grad_norm": 3.4643952819281076, "learning_rate": 9.931184808867093e-06, "loss": 0.9839, "step": 2243 }, { "epoch": 0.08133086876155268, "grad_norm": 2.680288939854586, "learning_rate": 9.931087733332665e-06, "loss": 1.1211, "step": 2244 }, { "epoch": 0.08136711246420934, "grad_norm": 2.6024896821818393, "learning_rate": 9.930990589850832e-06, "loss": 1.1058, "step": 2245 }, { "epoch": 0.081403356166866, "grad_norm": 2.5522617027349717, "learning_rate": 9.93089337842293e-06, "loss": 1.0224, "step": 2246 }, { "epoch": 0.08143959986952266, "grad_norm": 2.311584963500685, "learning_rate": 9.930796099050299e-06, "loss": 1.0499, "step": 2247 }, { "epoch": 0.08147584357217934, "grad_norm": 2.3915140970486606, "learning_rate": 9.93069875173428e-06, "loss": 0.9797, "step": 2248 }, { "epoch": 0.081512087274836, "grad_norm": 2.6839258037413436, "learning_rate": 9.930601336476214e-06, "loss": 0.891, "step": 2249 }, { "epoch": 0.08154833097749266, "grad_norm": 2.4191143235770673, "learning_rate": 9.930503853277443e-06, "loss": 0.9379, "step": 2250 }, { "epoch": 0.08158457468014932, "grad_norm": 2.3694773239774913, "learning_rate": 9.93040630213931e-06, "loss": 1.0114, "step": 2251 }, { "epoch": 0.08162081838280599, "grad_norm": 1.9067805964168503, "learning_rate": 9.930308683063161e-06, "loss": 0.7483, "step": 2252 }, { "epoch": 0.08165706208546265, "grad_norm": 2.690353187360814, "learning_rate": 9.930210996050342e-06, "loss": 1.0977, "step": 2253 }, { "epoch": 0.08169330578811931, "grad_norm": 2.2874729354534775, "learning_rate": 9.930113241102195e-06, "loss": 0.9156, "step": 2254 }, { "epoch": 0.08172954949077597, "grad_norm": 2.6108697741641125, "learning_rate": 9.930015418220069e-06, "loss": 1.0359, "step": 2255 }, { "epoch": 0.08176579319343263, "grad_norm": 2.4228067212672633, "learning_rate": 9.929917527405313e-06, "loss": 1.1289, "step": 2256 }, { "epoch": 0.08180203689608931, "grad_norm": 2.765657776267964, "learning_rate": 9.929819568659275e-06, "loss": 1.1022, "step": 2257 }, { "epoch": 0.08183828059874597, "grad_norm": 2.3207427011119086, "learning_rate": 9.929721541983305e-06, "loss": 1.1637, "step": 2258 }, { "epoch": 0.08187452430140263, "grad_norm": 2.29749616341382, "learning_rate": 9.929623447378752e-06, "loss": 1.0278, "step": 2259 }, { "epoch": 0.08191076800405929, "grad_norm": 2.5804034753728478, "learning_rate": 9.92952528484697e-06, "loss": 0.8776, "step": 2260 }, { "epoch": 0.08194701170671596, "grad_norm": 2.630323334111654, "learning_rate": 9.929427054389311e-06, "loss": 1.2521, "step": 2261 }, { "epoch": 0.08198325540937262, "grad_norm": 2.5694068272815884, "learning_rate": 9.929328756007129e-06, "loss": 0.9693, "step": 2262 }, { "epoch": 0.08201949911202928, "grad_norm": 2.459025329716011, "learning_rate": 9.929230389701778e-06, "loss": 1.0853, "step": 2263 }, { "epoch": 0.08205574281468594, "grad_norm": 2.63130434211009, "learning_rate": 9.929131955474612e-06, "loss": 1.1032, "step": 2264 }, { "epoch": 0.0820919865173426, "grad_norm": 2.6132296070389978, "learning_rate": 9.92903345332699e-06, "loss": 1.1441, "step": 2265 }, { "epoch": 0.08212823021999928, "grad_norm": 2.6341909476716054, "learning_rate": 9.928934883260269e-06, "loss": 1.1557, "step": 2266 }, { "epoch": 0.08216447392265594, "grad_norm": 2.605723881414564, "learning_rate": 9.928836245275804e-06, "loss": 0.9276, "step": 2267 }, { "epoch": 0.0822007176253126, "grad_norm": 2.287777264476196, "learning_rate": 9.928737539374956e-06, "loss": 1.1162, "step": 2268 }, { "epoch": 0.08223696132796926, "grad_norm": 2.870699688468729, "learning_rate": 9.928638765559087e-06, "loss": 1.0892, "step": 2269 }, { "epoch": 0.08227320503062593, "grad_norm": 2.4618612579876347, "learning_rate": 9.928539923829557e-06, "loss": 1.1502, "step": 2270 }, { "epoch": 0.0823094487332826, "grad_norm": 2.5274564499555456, "learning_rate": 9.928441014187726e-06, "loss": 1.0339, "step": 2271 }, { "epoch": 0.08234569243593926, "grad_norm": 2.7713322859603053, "learning_rate": 9.928342036634959e-06, "loss": 1.1181, "step": 2272 }, { "epoch": 0.08238193613859592, "grad_norm": 2.699287712742314, "learning_rate": 9.92824299117262e-06, "loss": 0.9691, "step": 2273 }, { "epoch": 0.08241817984125258, "grad_norm": 2.5755588856946754, "learning_rate": 9.928143877802073e-06, "loss": 0.8456, "step": 2274 }, { "epoch": 0.08245442354390925, "grad_norm": 2.598300368340022, "learning_rate": 9.928044696524684e-06, "loss": 0.8232, "step": 2275 }, { "epoch": 0.08249066724656591, "grad_norm": 2.410453296965579, "learning_rate": 9.927945447341818e-06, "loss": 1.0195, "step": 2276 }, { "epoch": 0.08252691094922257, "grad_norm": 2.5535126254329112, "learning_rate": 9.927846130254844e-06, "loss": 0.9792, "step": 2277 }, { "epoch": 0.08256315465187923, "grad_norm": 2.638166984374711, "learning_rate": 9.927746745265133e-06, "loss": 0.8953, "step": 2278 }, { "epoch": 0.0825993983545359, "grad_norm": 2.2792186591612458, "learning_rate": 9.92764729237405e-06, "loss": 0.8915, "step": 2279 }, { "epoch": 0.08263564205719257, "grad_norm": 2.658690108419861, "learning_rate": 9.927547771582969e-06, "loss": 1.2026, "step": 2280 }, { "epoch": 0.08267188575984923, "grad_norm": 2.500268637684792, "learning_rate": 9.927448182893259e-06, "loss": 1.0609, "step": 2281 }, { "epoch": 0.08270812946250589, "grad_norm": 2.47784122582398, "learning_rate": 9.927348526306292e-06, "loss": 0.9863, "step": 2282 }, { "epoch": 0.08274437316516255, "grad_norm": 2.55274191343437, "learning_rate": 9.927248801823443e-06, "loss": 1.0804, "step": 2283 }, { "epoch": 0.08278061686781922, "grad_norm": 2.332750656199557, "learning_rate": 9.927149009446085e-06, "loss": 1.0807, "step": 2284 }, { "epoch": 0.08281686057047588, "grad_norm": 2.443412847635185, "learning_rate": 9.927049149175594e-06, "loss": 0.9972, "step": 2285 }, { "epoch": 0.08285310427313254, "grad_norm": 2.8550202304791608, "learning_rate": 9.926949221013346e-06, "loss": 1.0613, "step": 2286 }, { "epoch": 0.0828893479757892, "grad_norm": 2.6054513845457907, "learning_rate": 9.926849224960718e-06, "loss": 0.9381, "step": 2287 }, { "epoch": 0.08292559167844588, "grad_norm": 2.3323907158255093, "learning_rate": 9.926749161019085e-06, "loss": 1.1071, "step": 2288 }, { "epoch": 0.08296183538110254, "grad_norm": 2.162903137539264, "learning_rate": 9.92664902918983e-06, "loss": 0.9233, "step": 2289 }, { "epoch": 0.0829980790837592, "grad_norm": 2.6115497219653485, "learning_rate": 9.92654882947433e-06, "loss": 0.9652, "step": 2290 }, { "epoch": 0.08303432278641586, "grad_norm": 2.6346559194237287, "learning_rate": 9.926448561873968e-06, "loss": 1.0115, "step": 2291 }, { "epoch": 0.08307056648907252, "grad_norm": 2.404936878992064, "learning_rate": 9.926348226390123e-06, "loss": 0.9538, "step": 2292 }, { "epoch": 0.08310681019172919, "grad_norm": 2.6342213777807646, "learning_rate": 9.92624782302418e-06, "loss": 1.1906, "step": 2293 }, { "epoch": 0.08314305389438585, "grad_norm": 2.3243508628504763, "learning_rate": 9.92614735177752e-06, "loss": 0.97, "step": 2294 }, { "epoch": 0.08317929759704251, "grad_norm": 2.573403869964521, "learning_rate": 9.926046812651528e-06, "loss": 1.0641, "step": 2295 }, { "epoch": 0.08321554129969917, "grad_norm": 2.4506993446208765, "learning_rate": 9.925946205647592e-06, "loss": 1.0127, "step": 2296 }, { "epoch": 0.08325178500235585, "grad_norm": 2.4112303164797995, "learning_rate": 9.925845530767095e-06, "loss": 1.0026, "step": 2297 }, { "epoch": 0.0832880287050125, "grad_norm": 2.666259832880138, "learning_rate": 9.925744788011426e-06, "loss": 1.0716, "step": 2298 }, { "epoch": 0.08332427240766917, "grad_norm": 2.317586457093397, "learning_rate": 9.925643977381973e-06, "loss": 0.9868, "step": 2299 }, { "epoch": 0.08336051611032583, "grad_norm": 2.546889537391047, "learning_rate": 9.925543098880125e-06, "loss": 1.0506, "step": 2300 }, { "epoch": 0.08339675981298249, "grad_norm": 2.3582414671444822, "learning_rate": 9.925442152507272e-06, "loss": 1.0675, "step": 2301 }, { "epoch": 0.08343300351563916, "grad_norm": 2.4676989553216733, "learning_rate": 9.925341138264805e-06, "loss": 1.0212, "step": 2302 }, { "epoch": 0.08346924721829582, "grad_norm": 2.5050146433157097, "learning_rate": 9.925240056154118e-06, "loss": 1.0155, "step": 2303 }, { "epoch": 0.08350549092095248, "grad_norm": 2.591264141839035, "learning_rate": 9.925138906176599e-06, "loss": 1.0056, "step": 2304 }, { "epoch": 0.08354173462360914, "grad_norm": 2.5025069544586453, "learning_rate": 9.925037688333646e-06, "loss": 1.0815, "step": 2305 }, { "epoch": 0.08357797832626582, "grad_norm": 2.3710358146382773, "learning_rate": 9.92493640262665e-06, "loss": 0.9997, "step": 2306 }, { "epoch": 0.08361422202892248, "grad_norm": 2.4955771615807283, "learning_rate": 9.92483504905701e-06, "loss": 1.0515, "step": 2307 }, { "epoch": 0.08365046573157914, "grad_norm": 2.8520677941839208, "learning_rate": 9.924733627626122e-06, "loss": 1.0859, "step": 2308 }, { "epoch": 0.0836867094342358, "grad_norm": 2.6497593509898607, "learning_rate": 9.924632138335382e-06, "loss": 0.899, "step": 2309 }, { "epoch": 0.08372295313689246, "grad_norm": 2.398667349824609, "learning_rate": 9.92453058118619e-06, "loss": 1.1711, "step": 2310 }, { "epoch": 0.08375919683954913, "grad_norm": 2.7524115311514192, "learning_rate": 9.924428956179945e-06, "loss": 0.9552, "step": 2311 }, { "epoch": 0.08379544054220579, "grad_norm": 2.8998807070632258, "learning_rate": 9.924327263318047e-06, "loss": 1.1753, "step": 2312 }, { "epoch": 0.08383168424486245, "grad_norm": 2.288997220420434, "learning_rate": 9.924225502601898e-06, "loss": 1.0562, "step": 2313 }, { "epoch": 0.08386792794751911, "grad_norm": 2.9866665618996477, "learning_rate": 9.9241236740329e-06, "loss": 0.9951, "step": 2314 }, { "epoch": 0.08390417165017579, "grad_norm": 2.4107101982179358, "learning_rate": 9.924021777612454e-06, "loss": 1.1248, "step": 2315 }, { "epoch": 0.08394041535283245, "grad_norm": 2.8697161054384144, "learning_rate": 9.923919813341966e-06, "loss": 1.185, "step": 2316 }, { "epoch": 0.08397665905548911, "grad_norm": 2.383730473833038, "learning_rate": 9.923817781222842e-06, "loss": 0.9842, "step": 2317 }, { "epoch": 0.08401290275814577, "grad_norm": 2.301071189354383, "learning_rate": 9.923715681256487e-06, "loss": 0.9639, "step": 2318 }, { "epoch": 0.08404914646080243, "grad_norm": 2.767041304238905, "learning_rate": 9.923613513444308e-06, "loss": 0.9021, "step": 2319 }, { "epoch": 0.0840853901634591, "grad_norm": 2.3338455349214864, "learning_rate": 9.92351127778771e-06, "loss": 0.9998, "step": 2320 }, { "epoch": 0.08412163386611576, "grad_norm": 2.539945324815012, "learning_rate": 9.923408974288107e-06, "loss": 1.0666, "step": 2321 }, { "epoch": 0.08415787756877242, "grad_norm": 2.734888730000749, "learning_rate": 9.923306602946905e-06, "loss": 1.047, "step": 2322 }, { "epoch": 0.08419412127142908, "grad_norm": 2.3068233254065102, "learning_rate": 9.923204163765514e-06, "loss": 0.9653, "step": 2323 }, { "epoch": 0.08423036497408576, "grad_norm": 2.812987277226077, "learning_rate": 9.923101656745348e-06, "loss": 1.0392, "step": 2324 }, { "epoch": 0.08426660867674242, "grad_norm": 2.3216390729010064, "learning_rate": 9.92299908188782e-06, "loss": 0.8687, "step": 2325 }, { "epoch": 0.08430285237939908, "grad_norm": 2.3806894297687906, "learning_rate": 9.922896439194342e-06, "loss": 0.9938, "step": 2326 }, { "epoch": 0.08433909608205574, "grad_norm": 2.661695181705108, "learning_rate": 9.922793728666327e-06, "loss": 1.0001, "step": 2327 }, { "epoch": 0.0843753397847124, "grad_norm": 2.6146953655319045, "learning_rate": 9.922690950305191e-06, "loss": 0.8426, "step": 2328 }, { "epoch": 0.08441158348736907, "grad_norm": 2.31037632523907, "learning_rate": 9.922588104112351e-06, "loss": 1.0323, "step": 2329 }, { "epoch": 0.08444782719002573, "grad_norm": 2.623898158651432, "learning_rate": 9.922485190089226e-06, "loss": 0.9613, "step": 2330 }, { "epoch": 0.0844840708926824, "grad_norm": 2.5254602157858526, "learning_rate": 9.922382208237232e-06, "loss": 1.0695, "step": 2331 }, { "epoch": 0.08452031459533905, "grad_norm": 2.831397145128254, "learning_rate": 9.922279158557785e-06, "loss": 1.0437, "step": 2332 }, { "epoch": 0.08455655829799573, "grad_norm": 2.218459215435316, "learning_rate": 9.922176041052311e-06, "loss": 1.0636, "step": 2333 }, { "epoch": 0.08459280200065239, "grad_norm": 2.4727832614654983, "learning_rate": 9.922072855722227e-06, "loss": 1.0342, "step": 2334 }, { "epoch": 0.08462904570330905, "grad_norm": 2.348484041557049, "learning_rate": 9.921969602568957e-06, "loss": 0.9061, "step": 2335 }, { "epoch": 0.08466528940596571, "grad_norm": 2.736508571093142, "learning_rate": 9.921866281593922e-06, "loss": 0.9898, "step": 2336 }, { "epoch": 0.08470153310862237, "grad_norm": 2.4782177001585, "learning_rate": 9.921762892798545e-06, "loss": 0.8955, "step": 2337 }, { "epoch": 0.08473777681127904, "grad_norm": 2.3339785615837902, "learning_rate": 9.921659436184253e-06, "loss": 1.0979, "step": 2338 }, { "epoch": 0.0847740205139357, "grad_norm": 2.462939640092608, "learning_rate": 9.92155591175247e-06, "loss": 1.0947, "step": 2339 }, { "epoch": 0.08481026421659237, "grad_norm": 2.463977497952275, "learning_rate": 9.921452319504623e-06, "loss": 1.0538, "step": 2340 }, { "epoch": 0.08484650791924903, "grad_norm": 2.6604139802815814, "learning_rate": 9.92134865944214e-06, "loss": 1.1055, "step": 2341 }, { "epoch": 0.0848827516219057, "grad_norm": 2.3831848442487114, "learning_rate": 9.921244931566449e-06, "loss": 0.9201, "step": 2342 }, { "epoch": 0.08491899532456236, "grad_norm": 2.5447450084546115, "learning_rate": 9.921141135878978e-06, "loss": 1.1878, "step": 2343 }, { "epoch": 0.08495523902721902, "grad_norm": 2.4913779953594632, "learning_rate": 9.92103727238116e-06, "loss": 0.9998, "step": 2344 }, { "epoch": 0.08499148272987568, "grad_norm": 2.509031811890937, "learning_rate": 9.920933341074424e-06, "loss": 1.0609, "step": 2345 }, { "epoch": 0.08502772643253234, "grad_norm": 2.281526682312867, "learning_rate": 9.9208293419602e-06, "loss": 0.9748, "step": 2346 }, { "epoch": 0.08506397013518902, "grad_norm": 2.462373060894886, "learning_rate": 9.920725275039926e-06, "loss": 1.0441, "step": 2347 }, { "epoch": 0.08510021383784568, "grad_norm": 2.4658702984421335, "learning_rate": 9.920621140315035e-06, "loss": 1.0679, "step": 2348 }, { "epoch": 0.08513645754050234, "grad_norm": 2.562845454373311, "learning_rate": 9.920516937786959e-06, "loss": 1.0554, "step": 2349 }, { "epoch": 0.085172701243159, "grad_norm": 2.4262088606777437, "learning_rate": 9.920412667457136e-06, "loss": 1.062, "step": 2350 }, { "epoch": 0.08520894494581567, "grad_norm": 2.397279903877801, "learning_rate": 9.920308329327e-06, "loss": 1.0266, "step": 2351 }, { "epoch": 0.08524518864847233, "grad_norm": 2.6089207260569465, "learning_rate": 9.920203923397992e-06, "loss": 0.8829, "step": 2352 }, { "epoch": 0.08528143235112899, "grad_norm": 2.4684434071198855, "learning_rate": 9.920099449671552e-06, "loss": 0.9745, "step": 2353 }, { "epoch": 0.08531767605378565, "grad_norm": 2.5266629799994065, "learning_rate": 9.919994908149116e-06, "loss": 0.9768, "step": 2354 }, { "epoch": 0.08535391975644231, "grad_norm": 2.3878750354633946, "learning_rate": 9.919890298832125e-06, "loss": 1.0584, "step": 2355 }, { "epoch": 0.08539016345909899, "grad_norm": 2.1482489773890494, "learning_rate": 9.91978562172202e-06, "loss": 0.8189, "step": 2356 }, { "epoch": 0.08542640716175565, "grad_norm": 2.6537946906391814, "learning_rate": 9.919680876820246e-06, "loss": 0.7908, "step": 2357 }, { "epoch": 0.0854626508644123, "grad_norm": 2.655639874201575, "learning_rate": 9.919576064128244e-06, "loss": 1.0802, "step": 2358 }, { "epoch": 0.08549889456706897, "grad_norm": 2.4807658717773693, "learning_rate": 9.91947118364746e-06, "loss": 1.0234, "step": 2359 }, { "epoch": 0.08553513826972564, "grad_norm": 2.4501208491979685, "learning_rate": 9.919366235379338e-06, "loss": 1.1985, "step": 2360 }, { "epoch": 0.0855713819723823, "grad_norm": 2.5171101731213943, "learning_rate": 9.919261219325324e-06, "loss": 0.9363, "step": 2361 }, { "epoch": 0.08560762567503896, "grad_norm": 2.699394684929645, "learning_rate": 9.919156135486867e-06, "loss": 1.0371, "step": 2362 }, { "epoch": 0.08564386937769562, "grad_norm": 2.3957731847488875, "learning_rate": 9.919050983865412e-06, "loss": 0.8886, "step": 2363 }, { "epoch": 0.08568011308035228, "grad_norm": 2.5597632567655, "learning_rate": 9.91894576446241e-06, "loss": 1.0591, "step": 2364 }, { "epoch": 0.08571635678300896, "grad_norm": 2.899958464664698, "learning_rate": 9.91884047727931e-06, "loss": 0.9714, "step": 2365 }, { "epoch": 0.08575260048566562, "grad_norm": 2.445122891773546, "learning_rate": 9.918735122317563e-06, "loss": 1.001, "step": 2366 }, { "epoch": 0.08578884418832228, "grad_norm": 2.3403328119691524, "learning_rate": 9.91862969957862e-06, "loss": 0.9767, "step": 2367 }, { "epoch": 0.08582508789097894, "grad_norm": 2.2447354129890567, "learning_rate": 9.918524209063936e-06, "loss": 0.8453, "step": 2368 }, { "epoch": 0.08586133159363561, "grad_norm": 2.523725649209135, "learning_rate": 9.918418650774962e-06, "loss": 1.0426, "step": 2369 }, { "epoch": 0.08589757529629227, "grad_norm": 2.4726951598107845, "learning_rate": 9.918313024713154e-06, "loss": 1.1554, "step": 2370 }, { "epoch": 0.08593381899894893, "grad_norm": 2.3446553620712094, "learning_rate": 9.918207330879967e-06, "loss": 0.9451, "step": 2371 }, { "epoch": 0.08597006270160559, "grad_norm": 2.679727511679394, "learning_rate": 9.918101569276856e-06, "loss": 0.8813, "step": 2372 }, { "epoch": 0.08600630640426225, "grad_norm": 2.452607941290233, "learning_rate": 9.917995739905282e-06, "loss": 1.0128, "step": 2373 }, { "epoch": 0.08604255010691893, "grad_norm": 2.5605699135841964, "learning_rate": 9.9178898427667e-06, "loss": 0.9201, "step": 2374 }, { "epoch": 0.08607879380957559, "grad_norm": 2.4662920287483865, "learning_rate": 9.91778387786257e-06, "loss": 0.9284, "step": 2375 }, { "epoch": 0.08611503751223225, "grad_norm": 2.4544593085282997, "learning_rate": 9.917677845194354e-06, "loss": 0.973, "step": 2376 }, { "epoch": 0.08615128121488891, "grad_norm": 2.493504054473984, "learning_rate": 9.91757174476351e-06, "loss": 0.9817, "step": 2377 }, { "epoch": 0.08618752491754558, "grad_norm": 2.888367480261037, "learning_rate": 9.9174655765715e-06, "loss": 1.0204, "step": 2378 }, { "epoch": 0.08622376862020224, "grad_norm": 2.5205797021863066, "learning_rate": 9.91735934061979e-06, "loss": 1.1886, "step": 2379 }, { "epoch": 0.0862600123228589, "grad_norm": 2.528456839462121, "learning_rate": 9.917253036909842e-06, "loss": 1.0587, "step": 2380 }, { "epoch": 0.08629625602551556, "grad_norm": 2.3305258807119897, "learning_rate": 9.917146665443122e-06, "loss": 1.1577, "step": 2381 }, { "epoch": 0.08633249972817222, "grad_norm": 2.5815773187269597, "learning_rate": 9.917040226221093e-06, "loss": 0.9021, "step": 2382 }, { "epoch": 0.0863687434308289, "grad_norm": 2.5326772934126374, "learning_rate": 9.916933719245224e-06, "loss": 1.1673, "step": 2383 }, { "epoch": 0.08640498713348556, "grad_norm": 2.5070129728699726, "learning_rate": 9.916827144516982e-06, "loss": 1.0847, "step": 2384 }, { "epoch": 0.08644123083614222, "grad_norm": 2.4515960578310687, "learning_rate": 9.916720502037835e-06, "loss": 0.93, "step": 2385 }, { "epoch": 0.08647747453879888, "grad_norm": 2.48324109528191, "learning_rate": 9.916613791809255e-06, "loss": 1.04, "step": 2386 }, { "epoch": 0.08651371824145555, "grad_norm": 2.769289207535868, "learning_rate": 9.916507013832709e-06, "loss": 1.1426, "step": 2387 }, { "epoch": 0.08654996194411221, "grad_norm": 2.7561501781651856, "learning_rate": 9.91640016810967e-06, "loss": 1.0447, "step": 2388 }, { "epoch": 0.08658620564676887, "grad_norm": 2.3061679040346137, "learning_rate": 9.916293254641609e-06, "loss": 1.0061, "step": 2389 }, { "epoch": 0.08662244934942553, "grad_norm": 2.4338069889580494, "learning_rate": 9.916186273430001e-06, "loss": 1.2534, "step": 2390 }, { "epoch": 0.0866586930520822, "grad_norm": 2.6407332248955457, "learning_rate": 9.91607922447632e-06, "loss": 0.9609, "step": 2391 }, { "epoch": 0.08669493675473887, "grad_norm": 2.74562193474882, "learning_rate": 9.915972107782039e-06, "loss": 1.0882, "step": 2392 }, { "epoch": 0.08673118045739553, "grad_norm": 2.322497719670539, "learning_rate": 9.915864923348636e-06, "loss": 0.8908, "step": 2393 }, { "epoch": 0.08676742416005219, "grad_norm": 2.2983339075071747, "learning_rate": 9.91575767117759e-06, "loss": 0.9991, "step": 2394 }, { "epoch": 0.08680366786270885, "grad_norm": 2.5231498813631017, "learning_rate": 9.915650351270374e-06, "loss": 0.8833, "step": 2395 }, { "epoch": 0.08683991156536552, "grad_norm": 2.5720061871517585, "learning_rate": 9.915542963628469e-06, "loss": 1.0624, "step": 2396 }, { "epoch": 0.08687615526802218, "grad_norm": 2.62939955193302, "learning_rate": 9.915435508253356e-06, "loss": 0.9957, "step": 2397 }, { "epoch": 0.08691239897067884, "grad_norm": 2.502740195930256, "learning_rate": 9.915327985146511e-06, "loss": 1.1199, "step": 2398 }, { "epoch": 0.0869486426733355, "grad_norm": 2.513803617246907, "learning_rate": 9.915220394309422e-06, "loss": 0.9379, "step": 2399 }, { "epoch": 0.08698488637599217, "grad_norm": 2.6802079101424834, "learning_rate": 9.915112735743569e-06, "loss": 1.014, "step": 2400 }, { "epoch": 0.08702113007864884, "grad_norm": 2.477630878598541, "learning_rate": 9.915005009450432e-06, "loss": 1.0092, "step": 2401 }, { "epoch": 0.0870573737813055, "grad_norm": 2.188633891348841, "learning_rate": 9.914897215431501e-06, "loss": 1.1296, "step": 2402 }, { "epoch": 0.08709361748396216, "grad_norm": 2.683760732491508, "learning_rate": 9.914789353688258e-06, "loss": 1.1479, "step": 2403 }, { "epoch": 0.08712986118661882, "grad_norm": 2.306940398047011, "learning_rate": 9.91468142422219e-06, "loss": 0.9549, "step": 2404 }, { "epoch": 0.0871661048892755, "grad_norm": 2.5407621127383275, "learning_rate": 9.914573427034785e-06, "loss": 0.9718, "step": 2405 }, { "epoch": 0.08720234859193216, "grad_norm": 2.4833124349807827, "learning_rate": 9.91446536212753e-06, "loss": 0.8745, "step": 2406 }, { "epoch": 0.08723859229458882, "grad_norm": 2.2863415823625286, "learning_rate": 9.914357229501915e-06, "loss": 0.9986, "step": 2407 }, { "epoch": 0.08727483599724548, "grad_norm": 2.393937806754512, "learning_rate": 9.914249029159429e-06, "loss": 0.8958, "step": 2408 }, { "epoch": 0.08731107969990214, "grad_norm": 2.5618377962976906, "learning_rate": 9.914140761101563e-06, "loss": 0.996, "step": 2409 }, { "epoch": 0.08734732340255881, "grad_norm": 2.403219697222228, "learning_rate": 9.91403242532981e-06, "loss": 0.9964, "step": 2410 }, { "epoch": 0.08738356710521547, "grad_norm": 2.361329804690371, "learning_rate": 9.913924021845661e-06, "loss": 1.1691, "step": 2411 }, { "epoch": 0.08741981080787213, "grad_norm": 2.85272065010679, "learning_rate": 9.913815550650612e-06, "loss": 1.1162, "step": 2412 }, { "epoch": 0.08745605451052879, "grad_norm": 2.64958874721369, "learning_rate": 9.913707011746157e-06, "loss": 1.097, "step": 2413 }, { "epoch": 0.08749229821318547, "grad_norm": 2.6695540777753526, "learning_rate": 9.91359840513379e-06, "loss": 1.0903, "step": 2414 }, { "epoch": 0.08752854191584213, "grad_norm": 2.487516326892678, "learning_rate": 9.91348973081501e-06, "loss": 0.8559, "step": 2415 }, { "epoch": 0.08756478561849879, "grad_norm": 2.394061884388645, "learning_rate": 9.91338098879131e-06, "loss": 0.8318, "step": 2416 }, { "epoch": 0.08760102932115545, "grad_norm": 2.8165574719967514, "learning_rate": 9.913272179064195e-06, "loss": 1.0874, "step": 2417 }, { "epoch": 0.0876372730238121, "grad_norm": 2.568781474830152, "learning_rate": 9.913163301635159e-06, "loss": 1.1342, "step": 2418 }, { "epoch": 0.08767351672646878, "grad_norm": 2.339849801936353, "learning_rate": 9.913054356505704e-06, "loss": 1.1934, "step": 2419 }, { "epoch": 0.08770976042912544, "grad_norm": 2.505255191524472, "learning_rate": 9.912945343677331e-06, "loss": 1.0476, "step": 2420 }, { "epoch": 0.0877460041317821, "grad_norm": 2.473154927393435, "learning_rate": 9.912836263151544e-06, "loss": 1.0, "step": 2421 }, { "epoch": 0.08778224783443876, "grad_norm": 2.485911433267911, "learning_rate": 9.912727114929843e-06, "loss": 0.94, "step": 2422 }, { "epoch": 0.08781849153709544, "grad_norm": 2.341026193279751, "learning_rate": 9.912617899013735e-06, "loss": 1.1077, "step": 2423 }, { "epoch": 0.0878547352397521, "grad_norm": 2.3711485281374096, "learning_rate": 9.91250861540472e-06, "loss": 0.9689, "step": 2424 }, { "epoch": 0.08789097894240876, "grad_norm": 2.5858471101922595, "learning_rate": 9.912399264104311e-06, "loss": 1.0301, "step": 2425 }, { "epoch": 0.08792722264506542, "grad_norm": 2.478461539846077, "learning_rate": 9.912289845114011e-06, "loss": 0.9898, "step": 2426 }, { "epoch": 0.08796346634772208, "grad_norm": 2.6329385434164605, "learning_rate": 9.912180358435326e-06, "loss": 1.0567, "step": 2427 }, { "epoch": 0.08799971005037875, "grad_norm": 2.3919858352754386, "learning_rate": 9.912070804069767e-06, "loss": 0.8985, "step": 2428 }, { "epoch": 0.08803595375303541, "grad_norm": 2.6365154778796023, "learning_rate": 9.911961182018843e-06, "loss": 1.12, "step": 2429 }, { "epoch": 0.08807219745569207, "grad_norm": 2.4154876920296426, "learning_rate": 9.911851492284065e-06, "loss": 0.9664, "step": 2430 }, { "epoch": 0.08810844115834873, "grad_norm": 2.693613369941016, "learning_rate": 9.911741734866943e-06, "loss": 1.0743, "step": 2431 }, { "epoch": 0.0881446848610054, "grad_norm": 2.433896317870715, "learning_rate": 9.911631909768991e-06, "loss": 1.0209, "step": 2432 }, { "epoch": 0.08818092856366207, "grad_norm": 2.6318466353811063, "learning_rate": 9.911522016991722e-06, "loss": 1.0721, "step": 2433 }, { "epoch": 0.08821717226631873, "grad_norm": 2.5293649828509337, "learning_rate": 9.91141205653665e-06, "loss": 1.0415, "step": 2434 }, { "epoch": 0.08825341596897539, "grad_norm": 2.7239343200772685, "learning_rate": 9.91130202840529e-06, "loss": 1.0867, "step": 2435 }, { "epoch": 0.08828965967163205, "grad_norm": 2.582355342685725, "learning_rate": 9.911191932599158e-06, "loss": 1.0067, "step": 2436 }, { "epoch": 0.08832590337428872, "grad_norm": 2.519095760514785, "learning_rate": 9.911081769119772e-06, "loss": 1.1723, "step": 2437 }, { "epoch": 0.08836214707694538, "grad_norm": 2.391909434703511, "learning_rate": 9.910971537968649e-06, "loss": 0.7709, "step": 2438 }, { "epoch": 0.08839839077960204, "grad_norm": 2.388364314515443, "learning_rate": 9.910861239147308e-06, "loss": 0.9474, "step": 2439 }, { "epoch": 0.0884346344822587, "grad_norm": 2.6751085637046628, "learning_rate": 9.91075087265727e-06, "loss": 1.0191, "step": 2440 }, { "epoch": 0.08847087818491538, "grad_norm": 2.5245980627621303, "learning_rate": 9.910640438500053e-06, "loss": 1.0022, "step": 2441 }, { "epoch": 0.08850712188757204, "grad_norm": 2.6407466839384632, "learning_rate": 9.910529936677182e-06, "loss": 1.0213, "step": 2442 }, { "epoch": 0.0885433655902287, "grad_norm": 2.3267069161609832, "learning_rate": 9.91041936719018e-06, "loss": 0.9425, "step": 2443 }, { "epoch": 0.08857960929288536, "grad_norm": 2.2522880082964893, "learning_rate": 9.910308730040566e-06, "loss": 1.0997, "step": 2444 }, { "epoch": 0.08861585299554202, "grad_norm": 2.3985563813072353, "learning_rate": 9.910198025229869e-06, "loss": 0.974, "step": 2445 }, { "epoch": 0.08865209669819869, "grad_norm": 2.387472804117574, "learning_rate": 9.910087252759613e-06, "loss": 1.0685, "step": 2446 }, { "epoch": 0.08868834040085535, "grad_norm": 2.353363852933166, "learning_rate": 9.909976412631322e-06, "loss": 0.9435, "step": 2447 }, { "epoch": 0.08872458410351201, "grad_norm": 2.5273166782358625, "learning_rate": 9.909865504846528e-06, "loss": 0.8349, "step": 2448 }, { "epoch": 0.08876082780616867, "grad_norm": 2.384229824581587, "learning_rate": 9.909754529406755e-06, "loss": 1.0331, "step": 2449 }, { "epoch": 0.08879707150882535, "grad_norm": 2.6134168633411217, "learning_rate": 9.909643486313533e-06, "loss": 0.9577, "step": 2450 }, { "epoch": 0.08883331521148201, "grad_norm": 2.162444210698601, "learning_rate": 9.909532375568396e-06, "loss": 0.9762, "step": 2451 }, { "epoch": 0.08886955891413867, "grad_norm": 2.3334070396421476, "learning_rate": 9.909421197172871e-06, "loss": 0.9366, "step": 2452 }, { "epoch": 0.08890580261679533, "grad_norm": 2.502534175651754, "learning_rate": 9.90930995112849e-06, "loss": 0.8854, "step": 2453 }, { "epoch": 0.08894204631945199, "grad_norm": 2.4005900858972677, "learning_rate": 9.909198637436787e-06, "loss": 0.9486, "step": 2454 }, { "epoch": 0.08897829002210866, "grad_norm": 2.212471045942603, "learning_rate": 9.909087256099296e-06, "loss": 1.1407, "step": 2455 }, { "epoch": 0.08901453372476532, "grad_norm": 2.2478541255761635, "learning_rate": 9.908975807117554e-06, "loss": 0.9972, "step": 2456 }, { "epoch": 0.08905077742742198, "grad_norm": 2.4690101320403484, "learning_rate": 9.908864290493091e-06, "loss": 1.0037, "step": 2457 }, { "epoch": 0.08908702113007864, "grad_norm": 2.865111612629102, "learning_rate": 9.908752706227448e-06, "loss": 0.9069, "step": 2458 }, { "epoch": 0.08912326483273532, "grad_norm": 2.6261185516425205, "learning_rate": 9.908641054322162e-06, "loss": 1.0621, "step": 2459 }, { "epoch": 0.08915950853539198, "grad_norm": 2.7324303748354564, "learning_rate": 9.908529334778769e-06, "loss": 1.2117, "step": 2460 }, { "epoch": 0.08919575223804864, "grad_norm": 2.6875644891315242, "learning_rate": 9.908417547598813e-06, "loss": 0.9854, "step": 2461 }, { "epoch": 0.0892319959407053, "grad_norm": 2.4983160810570384, "learning_rate": 9.90830569278383e-06, "loss": 1.1929, "step": 2462 }, { "epoch": 0.08926823964336196, "grad_norm": 2.5607725147442424, "learning_rate": 9.908193770335364e-06, "loss": 0.9727, "step": 2463 }, { "epoch": 0.08930448334601863, "grad_norm": 2.7135012243456638, "learning_rate": 9.908081780254954e-06, "loss": 1.0683, "step": 2464 }, { "epoch": 0.0893407270486753, "grad_norm": 2.7707711017868704, "learning_rate": 9.907969722544147e-06, "loss": 0.922, "step": 2465 }, { "epoch": 0.08937697075133196, "grad_norm": 2.6672575702109325, "learning_rate": 9.907857597204487e-06, "loss": 0.8545, "step": 2466 }, { "epoch": 0.08941321445398862, "grad_norm": 2.4451754102383303, "learning_rate": 9.907745404237514e-06, "loss": 1.0642, "step": 2467 }, { "epoch": 0.08944945815664529, "grad_norm": 2.523811181689826, "learning_rate": 9.90763314364478e-06, "loss": 1.1098, "step": 2468 }, { "epoch": 0.08948570185930195, "grad_norm": 2.448631843364883, "learning_rate": 9.90752081542783e-06, "loss": 1.0998, "step": 2469 }, { "epoch": 0.08952194556195861, "grad_norm": 2.563897082548651, "learning_rate": 9.907408419588209e-06, "loss": 0.9862, "step": 2470 }, { "epoch": 0.08955818926461527, "grad_norm": 2.66370798860378, "learning_rate": 9.907295956127468e-06, "loss": 0.9043, "step": 2471 }, { "epoch": 0.08959443296727193, "grad_norm": 2.4154978960292306, "learning_rate": 9.907183425047158e-06, "loss": 1.0553, "step": 2472 }, { "epoch": 0.0896306766699286, "grad_norm": 2.5056489887336264, "learning_rate": 9.907070826348826e-06, "loss": 1.1276, "step": 2473 }, { "epoch": 0.08966692037258527, "grad_norm": 2.4617419196272308, "learning_rate": 9.906958160034028e-06, "loss": 1.0492, "step": 2474 }, { "epoch": 0.08970316407524193, "grad_norm": 2.1297805251299518, "learning_rate": 9.906845426104313e-06, "loss": 0.9978, "step": 2475 }, { "epoch": 0.08973940777789859, "grad_norm": 2.4049469457398938, "learning_rate": 9.906732624561237e-06, "loss": 1.1253, "step": 2476 }, { "epoch": 0.08977565148055526, "grad_norm": 2.3426097202195986, "learning_rate": 9.906619755406352e-06, "loss": 0.8233, "step": 2477 }, { "epoch": 0.08981189518321192, "grad_norm": 2.4153220871377576, "learning_rate": 9.906506818641213e-06, "loss": 1.0535, "step": 2478 }, { "epoch": 0.08984813888586858, "grad_norm": 2.542457780178735, "learning_rate": 9.906393814267378e-06, "loss": 0.9496, "step": 2479 }, { "epoch": 0.08988438258852524, "grad_norm": 2.305437070517996, "learning_rate": 9.906280742286406e-06, "loss": 1.0278, "step": 2480 }, { "epoch": 0.0899206262911819, "grad_norm": 2.7161050980502153, "learning_rate": 9.90616760269985e-06, "loss": 1.0251, "step": 2481 }, { "epoch": 0.08995686999383858, "grad_norm": 2.503575539504295, "learning_rate": 9.906054395509272e-06, "loss": 1.0055, "step": 2482 }, { "epoch": 0.08999311369649524, "grad_norm": 2.5763744373853568, "learning_rate": 9.905941120716232e-06, "loss": 0.9384, "step": 2483 }, { "epoch": 0.0900293573991519, "grad_norm": 2.366397054204619, "learning_rate": 9.90582777832229e-06, "loss": 1.0361, "step": 2484 }, { "epoch": 0.09006560110180856, "grad_norm": 2.3534024173013135, "learning_rate": 9.905714368329007e-06, "loss": 1.1054, "step": 2485 }, { "epoch": 0.09010184480446523, "grad_norm": 2.2930899182694757, "learning_rate": 9.905600890737949e-06, "loss": 0.9903, "step": 2486 }, { "epoch": 0.09013808850712189, "grad_norm": 2.7046296189837786, "learning_rate": 9.905487345550677e-06, "loss": 1.1708, "step": 2487 }, { "epoch": 0.09017433220977855, "grad_norm": 2.5990782166683055, "learning_rate": 9.905373732768754e-06, "loss": 0.9449, "step": 2488 }, { "epoch": 0.09021057591243521, "grad_norm": 2.865203242983483, "learning_rate": 9.90526005239375e-06, "loss": 1.2304, "step": 2489 }, { "epoch": 0.09024681961509187, "grad_norm": 2.1595774358691844, "learning_rate": 9.90514630442723e-06, "loss": 0.9675, "step": 2490 }, { "epoch": 0.09028306331774855, "grad_norm": 2.418706411300181, "learning_rate": 9.905032488870758e-06, "loss": 1.0092, "step": 2491 }, { "epoch": 0.0903193070204052, "grad_norm": 2.5859933692273813, "learning_rate": 9.904918605725907e-06, "loss": 1.097, "step": 2492 }, { "epoch": 0.09035555072306187, "grad_norm": 2.336210881680596, "learning_rate": 9.904804654994242e-06, "loss": 1.2549, "step": 2493 }, { "epoch": 0.09039179442571853, "grad_norm": 2.217674078671512, "learning_rate": 9.904690636677336e-06, "loss": 1.0485, "step": 2494 }, { "epoch": 0.0904280381283752, "grad_norm": 2.539770619684468, "learning_rate": 9.90457655077676e-06, "loss": 1.0402, "step": 2495 }, { "epoch": 0.09046428183103186, "grad_norm": 2.344100492266446, "learning_rate": 9.904462397294083e-06, "loss": 0.7883, "step": 2496 }, { "epoch": 0.09050052553368852, "grad_norm": 2.5273391243113323, "learning_rate": 9.904348176230882e-06, "loss": 0.9453, "step": 2497 }, { "epoch": 0.09053676923634518, "grad_norm": 2.2550215079036793, "learning_rate": 9.904233887588728e-06, "loss": 1.0096, "step": 2498 }, { "epoch": 0.09057301293900184, "grad_norm": 2.183366508027122, "learning_rate": 9.904119531369198e-06, "loss": 1.0719, "step": 2499 }, { "epoch": 0.09060925664165852, "grad_norm": 2.3515001709339614, "learning_rate": 9.904005107573868e-06, "loss": 1.061, "step": 2500 }, { "epoch": 0.09064550034431518, "grad_norm": 2.2864768112197402, "learning_rate": 9.903890616204312e-06, "loss": 1.0915, "step": 2501 }, { "epoch": 0.09068174404697184, "grad_norm": 2.5929340308548703, "learning_rate": 9.903776057262108e-06, "loss": 1.0745, "step": 2502 }, { "epoch": 0.0907179877496285, "grad_norm": 2.2845338025852557, "learning_rate": 9.903661430748839e-06, "loss": 1.039, "step": 2503 }, { "epoch": 0.09075423145228517, "grad_norm": 2.5091490323522714, "learning_rate": 9.903546736666079e-06, "loss": 1.1997, "step": 2504 }, { "epoch": 0.09079047515494183, "grad_norm": 2.6342677006600987, "learning_rate": 9.903431975015411e-06, "loss": 0.982, "step": 2505 }, { "epoch": 0.09082671885759849, "grad_norm": 2.24673654321082, "learning_rate": 9.903317145798414e-06, "loss": 0.9895, "step": 2506 }, { "epoch": 0.09086296256025515, "grad_norm": 2.2267507104224986, "learning_rate": 9.903202249016674e-06, "loss": 0.8759, "step": 2507 }, { "epoch": 0.09089920626291181, "grad_norm": 2.7416842807946935, "learning_rate": 9.903087284671773e-06, "loss": 0.9954, "step": 2508 }, { "epoch": 0.09093544996556849, "grad_norm": 2.5489089682296107, "learning_rate": 9.902972252765292e-06, "loss": 1.0904, "step": 2509 }, { "epoch": 0.09097169366822515, "grad_norm": 2.9292695214063236, "learning_rate": 9.902857153298822e-06, "loss": 1.0712, "step": 2510 }, { "epoch": 0.09100793737088181, "grad_norm": 2.322768438191238, "learning_rate": 9.902741986273942e-06, "loss": 0.9565, "step": 2511 }, { "epoch": 0.09104418107353847, "grad_norm": 2.22746862021083, "learning_rate": 9.902626751692245e-06, "loss": 0.8995, "step": 2512 }, { "epoch": 0.09108042477619514, "grad_norm": 2.521764879315463, "learning_rate": 9.902511449555317e-06, "loss": 1.243, "step": 2513 }, { "epoch": 0.0911166684788518, "grad_norm": 2.644036059887665, "learning_rate": 9.902396079864744e-06, "loss": 0.9312, "step": 2514 }, { "epoch": 0.09115291218150846, "grad_norm": 2.7159191100187203, "learning_rate": 9.902280642622119e-06, "loss": 1.0342, "step": 2515 }, { "epoch": 0.09118915588416512, "grad_norm": 2.759128609199014, "learning_rate": 9.902165137829033e-06, "loss": 0.9446, "step": 2516 }, { "epoch": 0.09122539958682178, "grad_norm": 2.4623325679340806, "learning_rate": 9.902049565487073e-06, "loss": 0.9827, "step": 2517 }, { "epoch": 0.09126164328947846, "grad_norm": 2.5730733089309243, "learning_rate": 9.901933925597836e-06, "loss": 0.9653, "step": 2518 }, { "epoch": 0.09129788699213512, "grad_norm": 2.536370012518588, "learning_rate": 9.901818218162914e-06, "loss": 1.0982, "step": 2519 }, { "epoch": 0.09133413069479178, "grad_norm": 2.0168894728974722, "learning_rate": 9.9017024431839e-06, "loss": 1.0219, "step": 2520 }, { "epoch": 0.09137037439744844, "grad_norm": 2.425948379796514, "learning_rate": 9.901586600662393e-06, "loss": 0.9973, "step": 2521 }, { "epoch": 0.0914066181001051, "grad_norm": 2.379114948563996, "learning_rate": 9.901470690599987e-06, "loss": 0.9685, "step": 2522 }, { "epoch": 0.09144286180276177, "grad_norm": 2.325074138645938, "learning_rate": 9.90135471299828e-06, "loss": 0.9053, "step": 2523 }, { "epoch": 0.09147910550541843, "grad_norm": 2.729136601797715, "learning_rate": 9.901238667858867e-06, "loss": 0.8563, "step": 2524 }, { "epoch": 0.0915153492080751, "grad_norm": 2.3087988224102998, "learning_rate": 9.901122555183348e-06, "loss": 1.1905, "step": 2525 }, { "epoch": 0.09155159291073175, "grad_norm": 2.536229577943709, "learning_rate": 9.901006374973327e-06, "loss": 0.8817, "step": 2526 }, { "epoch": 0.09158783661338843, "grad_norm": 2.3573214608354833, "learning_rate": 9.900890127230403e-06, "loss": 1.136, "step": 2527 }, { "epoch": 0.09162408031604509, "grad_norm": 2.5647865271969503, "learning_rate": 9.900773811956176e-06, "loss": 0.9276, "step": 2528 }, { "epoch": 0.09166032401870175, "grad_norm": 2.59808820827578, "learning_rate": 9.900657429152249e-06, "loss": 1.0109, "step": 2529 }, { "epoch": 0.09169656772135841, "grad_norm": 2.570370681962346, "learning_rate": 9.900540978820227e-06, "loss": 0.9762, "step": 2530 }, { "epoch": 0.09173281142401507, "grad_norm": 2.47211444260158, "learning_rate": 9.900424460961715e-06, "loss": 1.0649, "step": 2531 }, { "epoch": 0.09176905512667174, "grad_norm": 2.565038063521453, "learning_rate": 9.900307875578317e-06, "loss": 0.9023, "step": 2532 }, { "epoch": 0.0918052988293284, "grad_norm": 2.58931625342361, "learning_rate": 9.900191222671637e-06, "loss": 1.1077, "step": 2533 }, { "epoch": 0.09184154253198507, "grad_norm": 2.4957562318425204, "learning_rate": 9.90007450224329e-06, "loss": 1.1056, "step": 2534 }, { "epoch": 0.09187778623464173, "grad_norm": 2.4222943811217768, "learning_rate": 9.899957714294877e-06, "loss": 1.0257, "step": 2535 }, { "epoch": 0.0919140299372984, "grad_norm": 2.3474128011613256, "learning_rate": 9.899840858828012e-06, "loss": 0.9472, "step": 2536 }, { "epoch": 0.09195027363995506, "grad_norm": 2.7461026526725902, "learning_rate": 9.899723935844303e-06, "loss": 1.0071, "step": 2537 }, { "epoch": 0.09198651734261172, "grad_norm": 2.367867974690444, "learning_rate": 9.89960694534536e-06, "loss": 0.8914, "step": 2538 }, { "epoch": 0.09202276104526838, "grad_norm": 2.0325944629667174, "learning_rate": 9.899489887332798e-06, "loss": 1.0372, "step": 2539 }, { "epoch": 0.09205900474792504, "grad_norm": 2.334694554963021, "learning_rate": 9.899372761808228e-06, "loss": 1.1092, "step": 2540 }, { "epoch": 0.09209524845058172, "grad_norm": 2.3974538796802634, "learning_rate": 9.899255568773263e-06, "loss": 1.0244, "step": 2541 }, { "epoch": 0.09213149215323838, "grad_norm": 2.3203751247088675, "learning_rate": 9.899138308229522e-06, "loss": 0.9475, "step": 2542 }, { "epoch": 0.09216773585589504, "grad_norm": 2.1314687617230375, "learning_rate": 9.899020980178615e-06, "loss": 1.053, "step": 2543 }, { "epoch": 0.0922039795585517, "grad_norm": 14.446280490761456, "learning_rate": 9.898903584622163e-06, "loss": 2.3397, "step": 2544 }, { "epoch": 0.09224022326120837, "grad_norm": 2.472689709619726, "learning_rate": 9.898786121561784e-06, "loss": 1.1692, "step": 2545 }, { "epoch": 0.09227646696386503, "grad_norm": 2.7365780596312206, "learning_rate": 9.898668590999092e-06, "loss": 0.9056, "step": 2546 }, { "epoch": 0.09231271066652169, "grad_norm": 2.6110763090303277, "learning_rate": 9.898550992935711e-06, "loss": 0.8401, "step": 2547 }, { "epoch": 0.09234895436917835, "grad_norm": 2.4471519909821726, "learning_rate": 9.89843332737326e-06, "loss": 1.0485, "step": 2548 }, { "epoch": 0.09238519807183501, "grad_norm": 2.7981495588555645, "learning_rate": 9.898315594313362e-06, "loss": 1.2135, "step": 2549 }, { "epoch": 0.09242144177449169, "grad_norm": 2.059420380254247, "learning_rate": 9.898197793757634e-06, "loss": 0.7571, "step": 2550 }, { "epoch": 0.09245768547714835, "grad_norm": 2.3440199778044097, "learning_rate": 9.898079925707704e-06, "loss": 0.8561, "step": 2551 }, { "epoch": 0.092493929179805, "grad_norm": 2.5150010940653362, "learning_rate": 9.897961990165195e-06, "loss": 0.7683, "step": 2552 }, { "epoch": 0.09253017288246167, "grad_norm": 2.8892649783106368, "learning_rate": 9.897843987131732e-06, "loss": 0.8736, "step": 2553 }, { "epoch": 0.09256641658511834, "grad_norm": 2.5180209270044656, "learning_rate": 9.897725916608942e-06, "loss": 1.014, "step": 2554 }, { "epoch": 0.092602660287775, "grad_norm": 2.573026782626648, "learning_rate": 9.89760777859845e-06, "loss": 0.9093, "step": 2555 }, { "epoch": 0.09263890399043166, "grad_norm": 2.3989690449504057, "learning_rate": 9.897489573101884e-06, "loss": 0.972, "step": 2556 }, { "epoch": 0.09267514769308832, "grad_norm": 2.613083052274946, "learning_rate": 9.897371300120875e-06, "loss": 1.0884, "step": 2557 }, { "epoch": 0.09271139139574498, "grad_norm": 2.608441807312118, "learning_rate": 9.897252959657051e-06, "loss": 1.0577, "step": 2558 }, { "epoch": 0.09274763509840166, "grad_norm": 2.5862276097077626, "learning_rate": 9.897134551712042e-06, "loss": 0.9553, "step": 2559 }, { "epoch": 0.09278387880105832, "grad_norm": 2.5572743578172603, "learning_rate": 9.897016076287483e-06, "loss": 1.0214, "step": 2560 }, { "epoch": 0.09282012250371498, "grad_norm": 2.4621319148958825, "learning_rate": 9.896897533385002e-06, "loss": 0.85, "step": 2561 }, { "epoch": 0.09285636620637164, "grad_norm": 2.8665801629545618, "learning_rate": 9.896778923006234e-06, "loss": 1.0341, "step": 2562 }, { "epoch": 0.09289260990902831, "grad_norm": 12.947939608795233, "learning_rate": 9.896660245152814e-06, "loss": 1.848, "step": 2563 }, { "epoch": 0.09292885361168497, "grad_norm": 2.4805852910747075, "learning_rate": 9.896541499826378e-06, "loss": 0.9049, "step": 2564 }, { "epoch": 0.09296509731434163, "grad_norm": 2.346902420773655, "learning_rate": 9.896422687028562e-06, "loss": 0.926, "step": 2565 }, { "epoch": 0.09300134101699829, "grad_norm": 2.620047687174687, "learning_rate": 9.896303806761003e-06, "loss": 0.9193, "step": 2566 }, { "epoch": 0.09303758471965495, "grad_norm": 2.605041667715644, "learning_rate": 9.896184859025338e-06, "loss": 1.0019, "step": 2567 }, { "epoch": 0.09307382842231163, "grad_norm": 2.7459776709594164, "learning_rate": 9.896065843823207e-06, "loss": 0.9236, "step": 2568 }, { "epoch": 0.09311007212496829, "grad_norm": 2.2288452525795273, "learning_rate": 9.895946761156248e-06, "loss": 0.936, "step": 2569 }, { "epoch": 0.09314631582762495, "grad_norm": 2.5459593882484213, "learning_rate": 9.895827611026105e-06, "loss": 0.9027, "step": 2570 }, { "epoch": 0.09318255953028161, "grad_norm": 2.5961930380330567, "learning_rate": 9.895708393434418e-06, "loss": 0.8991, "step": 2571 }, { "epoch": 0.09321880323293828, "grad_norm": 2.591546833920167, "learning_rate": 9.89558910838283e-06, "loss": 1.0133, "step": 2572 }, { "epoch": 0.09325504693559494, "grad_norm": 2.209744707958113, "learning_rate": 9.895469755872987e-06, "loss": 0.9659, "step": 2573 }, { "epoch": 0.0932912906382516, "grad_norm": 2.5614068299734827, "learning_rate": 9.895350335906528e-06, "loss": 0.9674, "step": 2574 }, { "epoch": 0.09332753434090826, "grad_norm": 2.3177235127838443, "learning_rate": 9.895230848485104e-06, "loss": 1.0563, "step": 2575 }, { "epoch": 0.09336377804356492, "grad_norm": 2.503025996781394, "learning_rate": 9.89511129361036e-06, "loss": 1.0834, "step": 2576 }, { "epoch": 0.0934000217462216, "grad_norm": 2.5668818736740104, "learning_rate": 9.894991671283942e-06, "loss": 0.9734, "step": 2577 }, { "epoch": 0.09343626544887826, "grad_norm": 2.2306947834527264, "learning_rate": 9.8948719815075e-06, "loss": 0.8725, "step": 2578 }, { "epoch": 0.09347250915153492, "grad_norm": 2.518887558169204, "learning_rate": 9.89475222428268e-06, "loss": 0.9614, "step": 2579 }, { "epoch": 0.09350875285419158, "grad_norm": 2.275877963011143, "learning_rate": 9.894632399611136e-06, "loss": 0.9729, "step": 2580 }, { "epoch": 0.09354499655684825, "grad_norm": 2.521246830921767, "learning_rate": 9.89451250749452e-06, "loss": 1.088, "step": 2581 }, { "epoch": 0.09358124025950491, "grad_norm": 2.429980114402359, "learning_rate": 9.89439254793448e-06, "loss": 0.8632, "step": 2582 }, { "epoch": 0.09361748396216157, "grad_norm": 2.384667107748348, "learning_rate": 9.89427252093267e-06, "loss": 1.0228, "step": 2583 }, { "epoch": 0.09365372766481823, "grad_norm": 2.3903680197014054, "learning_rate": 9.894152426490745e-06, "loss": 1.033, "step": 2584 }, { "epoch": 0.0936899713674749, "grad_norm": 2.2997685727569026, "learning_rate": 9.89403226461036e-06, "loss": 0.8755, "step": 2585 }, { "epoch": 0.09372621507013157, "grad_norm": 2.6837235074658894, "learning_rate": 9.893912035293169e-06, "loss": 0.8677, "step": 2586 }, { "epoch": 0.09376245877278823, "grad_norm": 2.4511110876115767, "learning_rate": 9.893791738540831e-06, "loss": 1.0389, "step": 2587 }, { "epoch": 0.09379870247544489, "grad_norm": 2.33484823126765, "learning_rate": 9.893671374355002e-06, "loss": 1.0329, "step": 2588 }, { "epoch": 0.09383494617810155, "grad_norm": 2.5243262042357792, "learning_rate": 9.893550942737341e-06, "loss": 1.0104, "step": 2589 }, { "epoch": 0.09387118988075822, "grad_norm": 2.454022182300016, "learning_rate": 9.893430443689508e-06, "loss": 1.2123, "step": 2590 }, { "epoch": 0.09390743358341488, "grad_norm": 2.682548171071434, "learning_rate": 9.893309877213162e-06, "loss": 1.0685, "step": 2591 }, { "epoch": 0.09394367728607154, "grad_norm": 2.336790097523007, "learning_rate": 9.893189243309967e-06, "loss": 1.1298, "step": 2592 }, { "epoch": 0.0939799209887282, "grad_norm": 2.330918472016424, "learning_rate": 9.893068541981583e-06, "loss": 1.1618, "step": 2593 }, { "epoch": 0.09401616469138487, "grad_norm": 2.6703981050865644, "learning_rate": 9.892947773229672e-06, "loss": 1.0392, "step": 2594 }, { "epoch": 0.09405240839404154, "grad_norm": 2.50154064680346, "learning_rate": 9.892826937055901e-06, "loss": 1.0786, "step": 2595 }, { "epoch": 0.0940886520966982, "grad_norm": 2.146517959048565, "learning_rate": 9.892706033461934e-06, "loss": 1.0075, "step": 2596 }, { "epoch": 0.09412489579935486, "grad_norm": 2.3237222227809697, "learning_rate": 9.892585062449438e-06, "loss": 0.9884, "step": 2597 }, { "epoch": 0.09416113950201152, "grad_norm": 2.633188010295999, "learning_rate": 9.892464024020076e-06, "loss": 1.1564, "step": 2598 }, { "epoch": 0.0941973832046682, "grad_norm": 2.306384985497603, "learning_rate": 9.892342918175522e-06, "loss": 0.9081, "step": 2599 }, { "epoch": 0.09423362690732486, "grad_norm": 2.399641951442896, "learning_rate": 9.89222174491744e-06, "loss": 0.9007, "step": 2600 }, { "epoch": 0.09426987060998152, "grad_norm": 2.4980676809971407, "learning_rate": 9.892100504247499e-06, "loss": 1.0835, "step": 2601 }, { "epoch": 0.09430611431263818, "grad_norm": 2.4920542965236288, "learning_rate": 9.891979196167374e-06, "loss": 0.9382, "step": 2602 }, { "epoch": 0.09434235801529484, "grad_norm": 2.4534891761692954, "learning_rate": 9.891857820678733e-06, "loss": 1.1008, "step": 2603 }, { "epoch": 0.09437860171795151, "grad_norm": 2.489380864778069, "learning_rate": 9.891736377783251e-06, "loss": 1.0773, "step": 2604 }, { "epoch": 0.09441484542060817, "grad_norm": 2.301428554381275, "learning_rate": 9.8916148674826e-06, "loss": 1.1341, "step": 2605 }, { "epoch": 0.09445108912326483, "grad_norm": 2.725076276265779, "learning_rate": 9.891493289778453e-06, "loss": 1.1409, "step": 2606 }, { "epoch": 0.09448733282592149, "grad_norm": 2.2950664156260316, "learning_rate": 9.891371644672489e-06, "loss": 1.1056, "step": 2607 }, { "epoch": 0.09452357652857817, "grad_norm": 2.5619072175618878, "learning_rate": 9.89124993216638e-06, "loss": 1.0286, "step": 2608 }, { "epoch": 0.09455982023123483, "grad_norm": 2.4773566933147357, "learning_rate": 9.891128152261807e-06, "loss": 1.0684, "step": 2609 }, { "epoch": 0.09459606393389149, "grad_norm": 2.7979557013351486, "learning_rate": 9.891006304960444e-06, "loss": 1.0785, "step": 2610 }, { "epoch": 0.09463230763654815, "grad_norm": 2.2564380217152675, "learning_rate": 9.890884390263973e-06, "loss": 1.1408, "step": 2611 }, { "epoch": 0.0946685513392048, "grad_norm": 3.291487221025246, "learning_rate": 9.890762408174072e-06, "loss": 0.969, "step": 2612 }, { "epoch": 0.09470479504186148, "grad_norm": 2.225546502448945, "learning_rate": 9.890640358692425e-06, "loss": 0.8578, "step": 2613 }, { "epoch": 0.09474103874451814, "grad_norm": 2.587682661807931, "learning_rate": 9.89051824182071e-06, "loss": 1.1617, "step": 2614 }, { "epoch": 0.0947772824471748, "grad_norm": 2.800344439827746, "learning_rate": 9.890396057560612e-06, "loss": 0.9827, "step": 2615 }, { "epoch": 0.09481352614983146, "grad_norm": 2.4131940518125603, "learning_rate": 9.890273805913814e-06, "loss": 1.1548, "step": 2616 }, { "epoch": 0.09484976985248814, "grad_norm": 2.2749874939511927, "learning_rate": 9.890151486882e-06, "loss": 0.9948, "step": 2617 }, { "epoch": 0.0948860135551448, "grad_norm": 2.670949346294758, "learning_rate": 9.890029100466855e-06, "loss": 1.0766, "step": 2618 }, { "epoch": 0.09492225725780146, "grad_norm": 2.3618174841503654, "learning_rate": 9.889906646670068e-06, "loss": 0.8397, "step": 2619 }, { "epoch": 0.09495850096045812, "grad_norm": 2.4629113775351446, "learning_rate": 9.889784125493324e-06, "loss": 1.1078, "step": 2620 }, { "epoch": 0.09499474466311478, "grad_norm": 2.387607199606812, "learning_rate": 9.889661536938313e-06, "loss": 0.9527, "step": 2621 }, { "epoch": 0.09503098836577145, "grad_norm": 2.18741626947098, "learning_rate": 9.889538881006724e-06, "loss": 0.8516, "step": 2622 }, { "epoch": 0.09506723206842811, "grad_norm": 2.329683282768065, "learning_rate": 9.889416157700243e-06, "loss": 0.9975, "step": 2623 }, { "epoch": 0.09510347577108477, "grad_norm": 2.3087906323410294, "learning_rate": 9.889293367020566e-06, "loss": 0.9833, "step": 2624 }, { "epoch": 0.09513971947374143, "grad_norm": 2.576451091698671, "learning_rate": 9.889170508969384e-06, "loss": 1.1873, "step": 2625 }, { "epoch": 0.0951759631763981, "grad_norm": 2.44484285855936, "learning_rate": 9.889047583548389e-06, "loss": 1.1268, "step": 2626 }, { "epoch": 0.09521220687905477, "grad_norm": 2.3597246788268222, "learning_rate": 9.888924590759275e-06, "loss": 1.087, "step": 2627 }, { "epoch": 0.09524845058171143, "grad_norm": 2.3033732240968843, "learning_rate": 9.888801530603737e-06, "loss": 1.0905, "step": 2628 }, { "epoch": 0.09528469428436809, "grad_norm": 2.4883670882633053, "learning_rate": 9.88867840308347e-06, "loss": 1.0241, "step": 2629 }, { "epoch": 0.09532093798702475, "grad_norm": 2.4763615796378486, "learning_rate": 9.888555208200172e-06, "loss": 1.1001, "step": 2630 }, { "epoch": 0.09535718168968142, "grad_norm": 2.4715079680873013, "learning_rate": 9.888431945955538e-06, "loss": 1.0168, "step": 2631 }, { "epoch": 0.09539342539233808, "grad_norm": 2.444235905193475, "learning_rate": 9.88830861635127e-06, "loss": 1.12, "step": 2632 }, { "epoch": 0.09542966909499474, "grad_norm": 2.5918820682407318, "learning_rate": 9.888185219389065e-06, "loss": 1.0124, "step": 2633 }, { "epoch": 0.0954659127976514, "grad_norm": 2.4832403284697246, "learning_rate": 9.888061755070625e-06, "loss": 0.8863, "step": 2634 }, { "epoch": 0.09550215650030808, "grad_norm": 2.77309124163432, "learning_rate": 9.887938223397648e-06, "loss": 0.9695, "step": 2635 }, { "epoch": 0.09553840020296474, "grad_norm": 2.4780963887616134, "learning_rate": 9.887814624371842e-06, "loss": 1.08, "step": 2636 }, { "epoch": 0.0955746439056214, "grad_norm": 2.929660678174642, "learning_rate": 9.887690957994903e-06, "loss": 1.031, "step": 2637 }, { "epoch": 0.09561088760827806, "grad_norm": 2.501597591042024, "learning_rate": 9.887567224268539e-06, "loss": 0.9393, "step": 2638 }, { "epoch": 0.09564713131093472, "grad_norm": 2.397391916157252, "learning_rate": 9.887443423194456e-06, "loss": 1.0837, "step": 2639 }, { "epoch": 0.09568337501359139, "grad_norm": 2.419204756880285, "learning_rate": 9.887319554774357e-06, "loss": 0.9947, "step": 2640 }, { "epoch": 0.09571961871624805, "grad_norm": 2.7600360030335658, "learning_rate": 9.887195619009952e-06, "loss": 0.9223, "step": 2641 }, { "epoch": 0.09575586241890471, "grad_norm": 2.4077859112928204, "learning_rate": 9.887071615902946e-06, "loss": 0.9717, "step": 2642 }, { "epoch": 0.09579210612156137, "grad_norm": 2.7218674492644395, "learning_rate": 9.886947545455049e-06, "loss": 1.0631, "step": 2643 }, { "epoch": 0.09582834982421805, "grad_norm": 2.265128563861742, "learning_rate": 9.88682340766797e-06, "loss": 0.8279, "step": 2644 }, { "epoch": 0.09586459352687471, "grad_norm": 2.3618969335702378, "learning_rate": 9.88669920254342e-06, "loss": 1.0217, "step": 2645 }, { "epoch": 0.09590083722953137, "grad_norm": 2.4749428303533887, "learning_rate": 9.88657493008311e-06, "loss": 0.9489, "step": 2646 }, { "epoch": 0.09593708093218803, "grad_norm": 2.553818874540416, "learning_rate": 9.886450590288754e-06, "loss": 0.9462, "step": 2647 }, { "epoch": 0.09597332463484469, "grad_norm": 2.549461035192039, "learning_rate": 9.886326183162064e-06, "loss": 1.1053, "step": 2648 }, { "epoch": 0.09600956833750136, "grad_norm": 2.2240556763316204, "learning_rate": 9.886201708704754e-06, "loss": 1.0615, "step": 2649 }, { "epoch": 0.09604581204015802, "grad_norm": 2.576465738487123, "learning_rate": 9.886077166918539e-06, "loss": 1.1355, "step": 2650 }, { "epoch": 0.09608205574281468, "grad_norm": 2.4800741509451143, "learning_rate": 9.885952557805137e-06, "loss": 0.9655, "step": 2651 }, { "epoch": 0.09611829944547134, "grad_norm": 2.4891027542029702, "learning_rate": 9.88582788136626e-06, "loss": 1.03, "step": 2652 }, { "epoch": 0.09615454314812802, "grad_norm": 2.5512176272174316, "learning_rate": 9.885703137603634e-06, "loss": 1.0828, "step": 2653 }, { "epoch": 0.09619078685078468, "grad_norm": 2.4389015915285075, "learning_rate": 9.885578326518972e-06, "loss": 0.9394, "step": 2654 }, { "epoch": 0.09622703055344134, "grad_norm": 2.2416939993455447, "learning_rate": 9.885453448113996e-06, "loss": 0.9458, "step": 2655 }, { "epoch": 0.096263274256098, "grad_norm": 2.6034056084225394, "learning_rate": 9.885328502390423e-06, "loss": 1.1952, "step": 2656 }, { "epoch": 0.09629951795875466, "grad_norm": 2.5113042811414554, "learning_rate": 9.885203489349981e-06, "loss": 1.2242, "step": 2657 }, { "epoch": 0.09633576166141133, "grad_norm": 2.458018559598928, "learning_rate": 9.885078408994388e-06, "loss": 0.8024, "step": 2658 }, { "epoch": 0.096372005364068, "grad_norm": 2.5682050066952415, "learning_rate": 9.884953261325369e-06, "loss": 1.0723, "step": 2659 }, { "epoch": 0.09640824906672465, "grad_norm": 2.537859598474175, "learning_rate": 9.88482804634465e-06, "loss": 0.776, "step": 2660 }, { "epoch": 0.09644449276938132, "grad_norm": 2.747197409129893, "learning_rate": 9.884702764053953e-06, "loss": 1.0471, "step": 2661 }, { "epoch": 0.09648073647203799, "grad_norm": 2.5143834589187097, "learning_rate": 9.884577414455007e-06, "loss": 1.1586, "step": 2662 }, { "epoch": 0.09651698017469465, "grad_norm": 2.783634167499166, "learning_rate": 9.884451997549537e-06, "loss": 1.062, "step": 2663 }, { "epoch": 0.09655322387735131, "grad_norm": 2.7572512297241185, "learning_rate": 9.884326513339273e-06, "loss": 1.1775, "step": 2664 }, { "epoch": 0.09658946758000797, "grad_norm": 2.4797273441381575, "learning_rate": 9.884200961825945e-06, "loss": 1.0037, "step": 2665 }, { "epoch": 0.09662571128266463, "grad_norm": 2.4197010353221207, "learning_rate": 9.884075343011282e-06, "loss": 0.924, "step": 2666 }, { "epoch": 0.0966619549853213, "grad_norm": 2.533809013136917, "learning_rate": 9.883949656897011e-06, "loss": 1.023, "step": 2667 }, { "epoch": 0.09669819868797797, "grad_norm": 2.3587148941852862, "learning_rate": 9.88382390348487e-06, "loss": 1.0182, "step": 2668 }, { "epoch": 0.09673444239063463, "grad_norm": 2.374986657258967, "learning_rate": 9.883698082776589e-06, "loss": 0.9456, "step": 2669 }, { "epoch": 0.09677068609329129, "grad_norm": 2.6757355775952996, "learning_rate": 9.883572194773903e-06, "loss": 1.0131, "step": 2670 }, { "epoch": 0.09680692979594796, "grad_norm": 2.395986214001317, "learning_rate": 9.883446239478545e-06, "loss": 1.0028, "step": 2671 }, { "epoch": 0.09684317349860462, "grad_norm": 2.611588059379401, "learning_rate": 9.88332021689225e-06, "loss": 0.9941, "step": 2672 }, { "epoch": 0.09687941720126128, "grad_norm": 2.6520402586995147, "learning_rate": 9.883194127016757e-06, "loss": 1.0386, "step": 2673 }, { "epoch": 0.09691566090391794, "grad_norm": 2.705578058661124, "learning_rate": 9.883067969853802e-06, "loss": 0.9754, "step": 2674 }, { "epoch": 0.0969519046065746, "grad_norm": 2.3691315279185035, "learning_rate": 9.882941745405123e-06, "loss": 0.8897, "step": 2675 }, { "epoch": 0.09698814830923128, "grad_norm": 2.2320772304502037, "learning_rate": 9.88281545367246e-06, "loss": 0.9263, "step": 2676 }, { "epoch": 0.09702439201188794, "grad_norm": 2.3889681582869793, "learning_rate": 9.882689094657552e-06, "loss": 0.8365, "step": 2677 }, { "epoch": 0.0970606357145446, "grad_norm": 2.5097357853602356, "learning_rate": 9.882562668362144e-06, "loss": 1.0478, "step": 2678 }, { "epoch": 0.09709687941720126, "grad_norm": 2.3554773766787194, "learning_rate": 9.882436174787973e-06, "loss": 0.9287, "step": 2679 }, { "epoch": 0.09713312311985793, "grad_norm": 2.5521832378071316, "learning_rate": 9.882309613936785e-06, "loss": 1.2896, "step": 2680 }, { "epoch": 0.09716936682251459, "grad_norm": 2.277051838274846, "learning_rate": 9.882182985810323e-06, "loss": 0.8877, "step": 2681 }, { "epoch": 0.09720561052517125, "grad_norm": 2.5306451567756323, "learning_rate": 9.882056290410333e-06, "loss": 1.0806, "step": 2682 }, { "epoch": 0.09724185422782791, "grad_norm": 2.3985923261974746, "learning_rate": 9.881929527738559e-06, "loss": 1.0168, "step": 2683 }, { "epoch": 0.09727809793048457, "grad_norm": 2.581474520305003, "learning_rate": 9.88180269779675e-06, "loss": 0.8077, "step": 2684 }, { "epoch": 0.09731434163314125, "grad_norm": 2.263687249045011, "learning_rate": 9.88167580058665e-06, "loss": 0.8034, "step": 2685 }, { "epoch": 0.0973505853357979, "grad_norm": 2.552778369043009, "learning_rate": 9.881548836110012e-06, "loss": 1.0309, "step": 2686 }, { "epoch": 0.09738682903845457, "grad_norm": 2.6453225025768163, "learning_rate": 9.881421804368581e-06, "loss": 0.948, "step": 2687 }, { "epoch": 0.09742307274111123, "grad_norm": 2.419808252974668, "learning_rate": 9.88129470536411e-06, "loss": 1.1582, "step": 2688 }, { "epoch": 0.0974593164437679, "grad_norm": 2.371418474104759, "learning_rate": 9.881167539098354e-06, "loss": 0.9088, "step": 2689 }, { "epoch": 0.09749556014642456, "grad_norm": 2.536062275216077, "learning_rate": 9.881040305573057e-06, "loss": 0.8042, "step": 2690 }, { "epoch": 0.09753180384908122, "grad_norm": 2.6176658511331006, "learning_rate": 9.880913004789978e-06, "loss": 1.1088, "step": 2691 }, { "epoch": 0.09756804755173788, "grad_norm": 2.5132246055344014, "learning_rate": 9.88078563675087e-06, "loss": 1.0778, "step": 2692 }, { "epoch": 0.09760429125439454, "grad_norm": 2.522379958526149, "learning_rate": 9.880658201457486e-06, "loss": 0.9569, "step": 2693 }, { "epoch": 0.09764053495705122, "grad_norm": 2.6210678278064226, "learning_rate": 9.880530698911586e-06, "loss": 0.975, "step": 2694 }, { "epoch": 0.09767677865970788, "grad_norm": 2.673307413173413, "learning_rate": 9.880403129114924e-06, "loss": 0.849, "step": 2695 }, { "epoch": 0.09771302236236454, "grad_norm": 2.254200482309341, "learning_rate": 9.880275492069258e-06, "loss": 0.933, "step": 2696 }, { "epoch": 0.0977492660650212, "grad_norm": 2.398889476160121, "learning_rate": 9.880147787776348e-06, "loss": 1.002, "step": 2697 }, { "epoch": 0.09778550976767787, "grad_norm": 2.3479581312790847, "learning_rate": 9.880020016237952e-06, "loss": 1.2384, "step": 2698 }, { "epoch": 0.09782175347033453, "grad_norm": 2.3192574426100867, "learning_rate": 9.879892177455832e-06, "loss": 1.0209, "step": 2699 }, { "epoch": 0.09785799717299119, "grad_norm": 2.7217517850561688, "learning_rate": 9.879764271431748e-06, "loss": 1.0143, "step": 2700 }, { "epoch": 0.09789424087564785, "grad_norm": 2.3776196178480165, "learning_rate": 9.879636298167466e-06, "loss": 0.9767, "step": 2701 }, { "epoch": 0.09793048457830451, "grad_norm": 2.647795734104665, "learning_rate": 9.879508257664744e-06, "loss": 1.1152, "step": 2702 }, { "epoch": 0.09796672828096119, "grad_norm": 2.287238066609932, "learning_rate": 9.879380149925351e-06, "loss": 0.9975, "step": 2703 }, { "epoch": 0.09800297198361785, "grad_norm": 2.5095004462265895, "learning_rate": 9.87925197495105e-06, "loss": 1.2106, "step": 2704 }, { "epoch": 0.09803921568627451, "grad_norm": 2.689401718068999, "learning_rate": 9.879123732743608e-06, "loss": 1.1471, "step": 2705 }, { "epoch": 0.09807545938893117, "grad_norm": 2.407320883859778, "learning_rate": 9.878995423304791e-06, "loss": 0.9956, "step": 2706 }, { "epoch": 0.09811170309158784, "grad_norm": 2.4340415233122146, "learning_rate": 9.878867046636368e-06, "loss": 1.0049, "step": 2707 }, { "epoch": 0.0981479467942445, "grad_norm": 2.5103269953393843, "learning_rate": 9.878738602740109e-06, "loss": 1.0534, "step": 2708 }, { "epoch": 0.09818419049690116, "grad_norm": 2.8969597441334543, "learning_rate": 9.87861009161778e-06, "loss": 0.9699, "step": 2709 }, { "epoch": 0.09822043419955782, "grad_norm": 2.5391846557535827, "learning_rate": 9.878481513271156e-06, "loss": 1.1249, "step": 2710 }, { "epoch": 0.09825667790221448, "grad_norm": 2.6869770141303952, "learning_rate": 9.878352867702009e-06, "loss": 1.0582, "step": 2711 }, { "epoch": 0.09829292160487116, "grad_norm": 2.5908278468244417, "learning_rate": 9.878224154912106e-06, "loss": 0.9975, "step": 2712 }, { "epoch": 0.09832916530752782, "grad_norm": 2.4080322025580125, "learning_rate": 9.878095374903228e-06, "loss": 0.9132, "step": 2713 }, { "epoch": 0.09836540901018448, "grad_norm": 2.4055280844053026, "learning_rate": 9.877966527677143e-06, "loss": 1.1559, "step": 2714 }, { "epoch": 0.09840165271284114, "grad_norm": 2.3004548739645836, "learning_rate": 9.877837613235632e-06, "loss": 1.0292, "step": 2715 }, { "epoch": 0.09843789641549781, "grad_norm": 3.0473748348772074, "learning_rate": 9.877708631580467e-06, "loss": 0.8567, "step": 2716 }, { "epoch": 0.09847414011815447, "grad_norm": 2.5506908618036985, "learning_rate": 9.877579582713427e-06, "loss": 0.8512, "step": 2717 }, { "epoch": 0.09851038382081113, "grad_norm": 2.731787337917695, "learning_rate": 9.877450466636292e-06, "loss": 1.1787, "step": 2718 }, { "epoch": 0.0985466275234678, "grad_norm": 2.838685101091237, "learning_rate": 9.877321283350836e-06, "loss": 1.047, "step": 2719 }, { "epoch": 0.09858287122612445, "grad_norm": 2.58390424110328, "learning_rate": 9.877192032858845e-06, "loss": 0.9162, "step": 2720 }, { "epoch": 0.09861911492878113, "grad_norm": 2.5024153728663, "learning_rate": 9.877062715162096e-06, "loss": 1.0742, "step": 2721 }, { "epoch": 0.09865535863143779, "grad_norm": 2.1607092966423793, "learning_rate": 9.876933330262373e-06, "loss": 0.9156, "step": 2722 }, { "epoch": 0.09869160233409445, "grad_norm": 2.7543831071285307, "learning_rate": 9.876803878161459e-06, "loss": 0.9234, "step": 2723 }, { "epoch": 0.09872784603675111, "grad_norm": 2.664587465519395, "learning_rate": 9.876674358861136e-06, "loss": 0.9542, "step": 2724 }, { "epoch": 0.09876408973940778, "grad_norm": 2.597093918641471, "learning_rate": 9.87654477236319e-06, "loss": 0.8576, "step": 2725 }, { "epoch": 0.09880033344206444, "grad_norm": 2.7821258073568975, "learning_rate": 9.876415118669408e-06, "loss": 1.0564, "step": 2726 }, { "epoch": 0.0988365771447211, "grad_norm": 2.4455164273840615, "learning_rate": 9.876285397781571e-06, "loss": 1.0582, "step": 2727 }, { "epoch": 0.09887282084737777, "grad_norm": 2.445367911772637, "learning_rate": 9.876155609701474e-06, "loss": 1.2199, "step": 2728 }, { "epoch": 0.09890906455003443, "grad_norm": 2.6057289663112306, "learning_rate": 9.8760257544309e-06, "loss": 1.0503, "step": 2729 }, { "epoch": 0.0989453082526911, "grad_norm": 2.4623558415198383, "learning_rate": 9.875895831971639e-06, "loss": 1.0366, "step": 2730 }, { "epoch": 0.09898155195534776, "grad_norm": 2.472782288306329, "learning_rate": 9.875765842325483e-06, "loss": 1.0565, "step": 2731 }, { "epoch": 0.09901779565800442, "grad_norm": 2.470630029339906, "learning_rate": 9.875635785494223e-06, "loss": 1.1139, "step": 2732 }, { "epoch": 0.09905403936066108, "grad_norm": 2.3888051805883683, "learning_rate": 9.875505661479651e-06, "loss": 0.9973, "step": 2733 }, { "epoch": 0.09909028306331776, "grad_norm": 2.4589609097446106, "learning_rate": 9.875375470283558e-06, "loss": 1.1174, "step": 2734 }, { "epoch": 0.09912652676597442, "grad_norm": 2.231984533224004, "learning_rate": 9.875245211907741e-06, "loss": 0.932, "step": 2735 }, { "epoch": 0.09916277046863108, "grad_norm": 2.5213760948162682, "learning_rate": 9.875114886353993e-06, "loss": 0.9969, "step": 2736 }, { "epoch": 0.09919901417128774, "grad_norm": 2.651476774635738, "learning_rate": 9.87498449362411e-06, "loss": 1.1193, "step": 2737 }, { "epoch": 0.0992352578739444, "grad_norm": 2.5966947170941816, "learning_rate": 9.874854033719889e-06, "loss": 1.0369, "step": 2738 }, { "epoch": 0.09927150157660107, "grad_norm": 2.2852561339795776, "learning_rate": 9.874723506643128e-06, "loss": 0.9291, "step": 2739 }, { "epoch": 0.09930774527925773, "grad_norm": 2.602838086563272, "learning_rate": 9.874592912395625e-06, "loss": 1.182, "step": 2740 }, { "epoch": 0.09934398898191439, "grad_norm": 2.37224139417929, "learning_rate": 9.87446225097918e-06, "loss": 1.0756, "step": 2741 }, { "epoch": 0.09938023268457105, "grad_norm": 2.472716727846383, "learning_rate": 9.874331522395593e-06, "loss": 1.1603, "step": 2742 }, { "epoch": 0.09941647638722773, "grad_norm": 2.3360616470047275, "learning_rate": 9.874200726646666e-06, "loss": 1.1297, "step": 2743 }, { "epoch": 0.09945272008988439, "grad_norm": 2.309174360300823, "learning_rate": 9.874069863734199e-06, "loss": 0.9366, "step": 2744 }, { "epoch": 0.09948896379254105, "grad_norm": 2.184697701489846, "learning_rate": 9.873938933659999e-06, "loss": 0.9101, "step": 2745 }, { "epoch": 0.0995252074951977, "grad_norm": 2.606652564562078, "learning_rate": 9.873807936425868e-06, "loss": 0.932, "step": 2746 }, { "epoch": 0.09956145119785437, "grad_norm": 2.5050052196099215, "learning_rate": 9.87367687203361e-06, "loss": 0.9753, "step": 2747 }, { "epoch": 0.09959769490051104, "grad_norm": 2.9461883461055316, "learning_rate": 9.873545740485033e-06, "loss": 1.0262, "step": 2748 }, { "epoch": 0.0996339386031677, "grad_norm": 2.372230503044988, "learning_rate": 9.873414541781942e-06, "loss": 1.1822, "step": 2749 }, { "epoch": 0.09967018230582436, "grad_norm": 2.662974053465216, "learning_rate": 9.873283275926147e-06, "loss": 1.145, "step": 2750 }, { "epoch": 0.09970642600848102, "grad_norm": 2.329153587877564, "learning_rate": 9.873151942919457e-06, "loss": 1.019, "step": 2751 }, { "epoch": 0.0997426697111377, "grad_norm": 2.484300455341191, "learning_rate": 9.873020542763678e-06, "loss": 1.1137, "step": 2752 }, { "epoch": 0.09977891341379436, "grad_norm": 2.2318706299870636, "learning_rate": 9.872889075460625e-06, "loss": 0.9692, "step": 2753 }, { "epoch": 0.09981515711645102, "grad_norm": 2.4105749908390153, "learning_rate": 9.872757541012106e-06, "loss": 0.8359, "step": 2754 }, { "epoch": 0.09985140081910768, "grad_norm": 2.875393598159641, "learning_rate": 9.872625939419935e-06, "loss": 1.14, "step": 2755 }, { "epoch": 0.09988764452176434, "grad_norm": 2.462330310366274, "learning_rate": 9.872494270685927e-06, "loss": 0.9242, "step": 2756 }, { "epoch": 0.09992388822442101, "grad_norm": 2.8535259533159465, "learning_rate": 9.872362534811895e-06, "loss": 1.087, "step": 2757 }, { "epoch": 0.09996013192707767, "grad_norm": 2.76065047578255, "learning_rate": 9.872230731799653e-06, "loss": 1.009, "step": 2758 }, { "epoch": 0.09999637562973433, "grad_norm": 2.410442548951788, "learning_rate": 9.872098861651018e-06, "loss": 1.0098, "step": 2759 }, { "epoch": 0.10003261933239099, "grad_norm": 2.5016287825779617, "learning_rate": 9.871966924367806e-06, "loss": 0.9617, "step": 2760 }, { "epoch": 0.10006886303504767, "grad_norm": 2.284713833679559, "learning_rate": 9.871834919951837e-06, "loss": 0.8968, "step": 2761 }, { "epoch": 0.10010510673770433, "grad_norm": 2.871321785979573, "learning_rate": 9.87170284840493e-06, "loss": 0.983, "step": 2762 }, { "epoch": 0.10014135044036099, "grad_norm": 2.6648261780896623, "learning_rate": 9.871570709728903e-06, "loss": 1.2402, "step": 2763 }, { "epoch": 0.10017759414301765, "grad_norm": 2.511653478583299, "learning_rate": 9.87143850392558e-06, "loss": 0.9972, "step": 2764 }, { "epoch": 0.10021383784567431, "grad_norm": 2.481961295072375, "learning_rate": 9.871306230996779e-06, "loss": 0.9773, "step": 2765 }, { "epoch": 0.10025008154833098, "grad_norm": 2.319162348636369, "learning_rate": 9.871173890944325e-06, "loss": 1.1326, "step": 2766 }, { "epoch": 0.10028632525098764, "grad_norm": 2.489372577654017, "learning_rate": 9.871041483770038e-06, "loss": 0.9706, "step": 2767 }, { "epoch": 0.1003225689536443, "grad_norm": 2.5172822905801286, "learning_rate": 9.87090900947575e-06, "loss": 0.8663, "step": 2768 }, { "epoch": 0.10035881265630096, "grad_norm": 2.39058476358855, "learning_rate": 9.870776468063278e-06, "loss": 1.1487, "step": 2769 }, { "epoch": 0.10039505635895764, "grad_norm": 2.537784863679688, "learning_rate": 9.870643859534451e-06, "loss": 0.6917, "step": 2770 }, { "epoch": 0.1004313000616143, "grad_norm": 2.392125054529515, "learning_rate": 9.870511183891102e-06, "loss": 0.9126, "step": 2771 }, { "epoch": 0.10046754376427096, "grad_norm": 2.321456698908924, "learning_rate": 9.870378441135051e-06, "loss": 1.0815, "step": 2772 }, { "epoch": 0.10050378746692762, "grad_norm": 2.5929413146954317, "learning_rate": 9.870245631268132e-06, "loss": 1.0258, "step": 2773 }, { "epoch": 0.10054003116958428, "grad_norm": 2.4392838640138277, "learning_rate": 9.870112754292172e-06, "loss": 0.9849, "step": 2774 }, { "epoch": 0.10057627487224095, "grad_norm": 2.5274228028141428, "learning_rate": 9.869979810209004e-06, "loss": 1.0449, "step": 2775 }, { "epoch": 0.10061251857489761, "grad_norm": 2.3649123740224147, "learning_rate": 9.869846799020461e-06, "loss": 0.9922, "step": 2776 }, { "epoch": 0.10064876227755427, "grad_norm": 2.4737742342999542, "learning_rate": 9.869713720728375e-06, "loss": 1.0227, "step": 2777 }, { "epoch": 0.10068500598021093, "grad_norm": 2.5330053181755066, "learning_rate": 9.869580575334576e-06, "loss": 1.039, "step": 2778 }, { "epoch": 0.10072124968286761, "grad_norm": 2.934757640706496, "learning_rate": 9.869447362840904e-06, "loss": 1.1811, "step": 2779 }, { "epoch": 0.10075749338552427, "grad_norm": 2.642072609415457, "learning_rate": 9.86931408324919e-06, "loss": 0.9392, "step": 2780 }, { "epoch": 0.10079373708818093, "grad_norm": 2.262488176106167, "learning_rate": 9.869180736561276e-06, "loss": 1.1234, "step": 2781 }, { "epoch": 0.10082998079083759, "grad_norm": 2.4137473031318963, "learning_rate": 9.869047322778994e-06, "loss": 1.0294, "step": 2782 }, { "epoch": 0.10086622449349425, "grad_norm": 2.5722501247900285, "learning_rate": 9.868913841904185e-06, "loss": 0.878, "step": 2783 }, { "epoch": 0.10090246819615092, "grad_norm": 2.5920890933205576, "learning_rate": 9.86878029393869e-06, "loss": 0.9125, "step": 2784 }, { "epoch": 0.10093871189880758, "grad_norm": 2.615606224401394, "learning_rate": 9.868646678884344e-06, "loss": 1.0217, "step": 2785 }, { "epoch": 0.10097495560146424, "grad_norm": 2.2110263299371393, "learning_rate": 9.868512996742992e-06, "loss": 1.2678, "step": 2786 }, { "epoch": 0.1010111993041209, "grad_norm": 2.6545247902231752, "learning_rate": 9.868379247516476e-06, "loss": 1.4029, "step": 2787 }, { "epoch": 0.10104744300677758, "grad_norm": 2.8904737429317002, "learning_rate": 9.868245431206638e-06, "loss": 0.8993, "step": 2788 }, { "epoch": 0.10108368670943424, "grad_norm": 2.5056818116557236, "learning_rate": 9.868111547815323e-06, "loss": 1.1355, "step": 2789 }, { "epoch": 0.1011199304120909, "grad_norm": 2.2049347108406416, "learning_rate": 9.867977597344373e-06, "loss": 0.9822, "step": 2790 }, { "epoch": 0.10115617411474756, "grad_norm": 2.4955187289134235, "learning_rate": 9.867843579795638e-06, "loss": 1.0803, "step": 2791 }, { "epoch": 0.10119241781740422, "grad_norm": 2.539787025412157, "learning_rate": 9.86770949517096e-06, "loss": 0.991, "step": 2792 }, { "epoch": 0.1012286615200609, "grad_norm": 2.5563171758135215, "learning_rate": 9.86757534347219e-06, "loss": 1.0024, "step": 2793 }, { "epoch": 0.10126490522271756, "grad_norm": 2.2313009066857443, "learning_rate": 9.867441124701177e-06, "loss": 0.8371, "step": 2794 }, { "epoch": 0.10130114892537422, "grad_norm": 2.7248281862895856, "learning_rate": 9.867306838859766e-06, "loss": 1.0706, "step": 2795 }, { "epoch": 0.10133739262803088, "grad_norm": 2.2786056163036967, "learning_rate": 9.867172485949815e-06, "loss": 0.9919, "step": 2796 }, { "epoch": 0.10137363633068755, "grad_norm": 2.8910970976902957, "learning_rate": 9.867038065973166e-06, "loss": 1.0395, "step": 2797 }, { "epoch": 0.10140988003334421, "grad_norm": 2.941414900371056, "learning_rate": 9.86690357893168e-06, "loss": 0.923, "step": 2798 }, { "epoch": 0.10144612373600087, "grad_norm": 2.4072290111664567, "learning_rate": 9.866769024827203e-06, "loss": 1.0434, "step": 2799 }, { "epoch": 0.10148236743865753, "grad_norm": 2.2657884212391393, "learning_rate": 9.866634403661592e-06, "loss": 0.9963, "step": 2800 }, { "epoch": 0.10151861114131419, "grad_norm": 2.4592120081918556, "learning_rate": 9.866499715436704e-06, "loss": 0.9943, "step": 2801 }, { "epoch": 0.10155485484397087, "grad_norm": 2.5258613322516172, "learning_rate": 9.866364960154393e-06, "loss": 1.0433, "step": 2802 }, { "epoch": 0.10159109854662753, "grad_norm": 2.436203222305094, "learning_rate": 9.866230137816515e-06, "loss": 0.8912, "step": 2803 }, { "epoch": 0.10162734224928419, "grad_norm": 2.4795085757809607, "learning_rate": 9.86609524842493e-06, "loss": 0.8312, "step": 2804 }, { "epoch": 0.10166358595194085, "grad_norm": 2.471008635400409, "learning_rate": 9.865960291981495e-06, "loss": 1.3422, "step": 2805 }, { "epoch": 0.10169982965459752, "grad_norm": 2.732982913533407, "learning_rate": 9.86582526848807e-06, "loss": 1.1528, "step": 2806 }, { "epoch": 0.10173607335725418, "grad_norm": 2.639328057042969, "learning_rate": 9.865690177946515e-06, "loss": 1.1019, "step": 2807 }, { "epoch": 0.10177231705991084, "grad_norm": 2.6625463085177787, "learning_rate": 9.865555020358692e-06, "loss": 1.1186, "step": 2808 }, { "epoch": 0.1018085607625675, "grad_norm": 2.6429925597948003, "learning_rate": 9.865419795726463e-06, "loss": 1.257, "step": 2809 }, { "epoch": 0.10184480446522416, "grad_norm": 2.340788988349507, "learning_rate": 9.865284504051692e-06, "loss": 0.9946, "step": 2810 }, { "epoch": 0.10188104816788084, "grad_norm": 2.4932294900092895, "learning_rate": 9.865149145336243e-06, "loss": 1.1175, "step": 2811 }, { "epoch": 0.1019172918705375, "grad_norm": 3.040427377161674, "learning_rate": 9.865013719581982e-06, "loss": 1.1251, "step": 2812 }, { "epoch": 0.10195353557319416, "grad_norm": 2.567768289433707, "learning_rate": 9.864878226790772e-06, "loss": 1.0424, "step": 2813 }, { "epoch": 0.10198977927585082, "grad_norm": 2.4644861541442475, "learning_rate": 9.864742666964485e-06, "loss": 0.8887, "step": 2814 }, { "epoch": 0.10202602297850749, "grad_norm": 2.376046355933816, "learning_rate": 9.864607040104985e-06, "loss": 0.9869, "step": 2815 }, { "epoch": 0.10206226668116415, "grad_norm": 2.344249474987388, "learning_rate": 9.864471346214142e-06, "loss": 1.0166, "step": 2816 }, { "epoch": 0.10209851038382081, "grad_norm": 2.573495658553683, "learning_rate": 9.864335585293826e-06, "loss": 0.9191, "step": 2817 }, { "epoch": 0.10213475408647747, "grad_norm": 2.46308335023783, "learning_rate": 9.864199757345907e-06, "loss": 0.9427, "step": 2818 }, { "epoch": 0.10217099778913413, "grad_norm": 2.1778960370397975, "learning_rate": 9.864063862372258e-06, "loss": 0.7627, "step": 2819 }, { "epoch": 0.1022072414917908, "grad_norm": 2.6232204443077975, "learning_rate": 9.86392790037475e-06, "loss": 1.0389, "step": 2820 }, { "epoch": 0.10224348519444747, "grad_norm": 2.642355260700506, "learning_rate": 9.863791871355257e-06, "loss": 0.8904, "step": 2821 }, { "epoch": 0.10227972889710413, "grad_norm": 2.491910227803728, "learning_rate": 9.863655775315654e-06, "loss": 0.8654, "step": 2822 }, { "epoch": 0.10231597259976079, "grad_norm": 2.969106722932252, "learning_rate": 9.863519612257815e-06, "loss": 0.9316, "step": 2823 }, { "epoch": 0.10235221630241745, "grad_norm": 2.285630892314391, "learning_rate": 9.863383382183618e-06, "loss": 0.9189, "step": 2824 }, { "epoch": 0.10238846000507412, "grad_norm": 2.8290978616214995, "learning_rate": 9.863247085094938e-06, "loss": 1.0696, "step": 2825 }, { "epoch": 0.10242470370773078, "grad_norm": 2.663355822507382, "learning_rate": 9.863110720993656e-06, "loss": 0.9211, "step": 2826 }, { "epoch": 0.10246094741038744, "grad_norm": 2.2347962920526543, "learning_rate": 9.86297428988165e-06, "loss": 0.9742, "step": 2827 }, { "epoch": 0.1024971911130441, "grad_norm": 2.363653621147964, "learning_rate": 9.862837791760798e-06, "loss": 1.0059, "step": 2828 }, { "epoch": 0.10253343481570078, "grad_norm": 2.5170524619052297, "learning_rate": 9.862701226632981e-06, "loss": 1.0585, "step": 2829 }, { "epoch": 0.10256967851835744, "grad_norm": 2.3965938414335333, "learning_rate": 9.862564594500084e-06, "loss": 0.9381, "step": 2830 }, { "epoch": 0.1026059222210141, "grad_norm": 2.0887958801650433, "learning_rate": 9.862427895363986e-06, "loss": 1.0452, "step": 2831 }, { "epoch": 0.10264216592367076, "grad_norm": 2.3732365153279895, "learning_rate": 9.862291129226573e-06, "loss": 0.9706, "step": 2832 }, { "epoch": 0.10267840962632742, "grad_norm": 2.5676935263218086, "learning_rate": 9.86215429608973e-06, "loss": 0.9389, "step": 2833 }, { "epoch": 0.10271465332898409, "grad_norm": 2.9728587032719442, "learning_rate": 9.86201739595534e-06, "loss": 1.2092, "step": 2834 }, { "epoch": 0.10275089703164075, "grad_norm": 2.59161509640294, "learning_rate": 9.861880428825291e-06, "loss": 1.0553, "step": 2835 }, { "epoch": 0.10278714073429741, "grad_norm": 2.795836145224977, "learning_rate": 9.86174339470147e-06, "loss": 1.0566, "step": 2836 }, { "epoch": 0.10282338443695407, "grad_norm": 2.3516099642930004, "learning_rate": 9.861606293585765e-06, "loss": 0.9901, "step": 2837 }, { "epoch": 0.10285962813961075, "grad_norm": 2.6517442673651983, "learning_rate": 9.861469125480068e-06, "loss": 0.9369, "step": 2838 }, { "epoch": 0.10289587184226741, "grad_norm": 2.6217741399426227, "learning_rate": 9.861331890386263e-06, "loss": 1.187, "step": 2839 }, { "epoch": 0.10293211554492407, "grad_norm": 2.4410457621563544, "learning_rate": 9.861194588306247e-06, "loss": 1.1014, "step": 2840 }, { "epoch": 0.10296835924758073, "grad_norm": 2.4092016947402515, "learning_rate": 9.861057219241907e-06, "loss": 0.8855, "step": 2841 }, { "epoch": 0.10300460295023739, "grad_norm": 2.8086326082752238, "learning_rate": 9.860919783195141e-06, "loss": 0.8682, "step": 2842 }, { "epoch": 0.10304084665289406, "grad_norm": 2.4706821594889954, "learning_rate": 9.86078228016784e-06, "loss": 1.1121, "step": 2843 }, { "epoch": 0.10307709035555072, "grad_norm": 2.5541881823883172, "learning_rate": 9.860644710161897e-06, "loss": 0.9325, "step": 2844 }, { "epoch": 0.10311333405820738, "grad_norm": 2.4406489240318257, "learning_rate": 9.86050707317921e-06, "loss": 1.0479, "step": 2845 }, { "epoch": 0.10314957776086404, "grad_norm": 2.583540962525619, "learning_rate": 9.860369369221677e-06, "loss": 0.9852, "step": 2846 }, { "epoch": 0.10318582146352072, "grad_norm": 2.3556759835276373, "learning_rate": 9.860231598291193e-06, "loss": 1.0364, "step": 2847 }, { "epoch": 0.10322206516617738, "grad_norm": 2.303751730107934, "learning_rate": 9.860093760389656e-06, "loss": 0.8983, "step": 2848 }, { "epoch": 0.10325830886883404, "grad_norm": 2.4173966818556494, "learning_rate": 9.859955855518966e-06, "loss": 0.9699, "step": 2849 }, { "epoch": 0.1032945525714907, "grad_norm": 2.533810350187643, "learning_rate": 9.859817883681024e-06, "loss": 0.9996, "step": 2850 }, { "epoch": 0.10333079627414736, "grad_norm": 2.3722250239741407, "learning_rate": 9.85967984487773e-06, "loss": 1.0816, "step": 2851 }, { "epoch": 0.10336703997680403, "grad_norm": 2.4556965137854205, "learning_rate": 9.859541739110987e-06, "loss": 0.9465, "step": 2852 }, { "epoch": 0.1034032836794607, "grad_norm": 2.298153349256547, "learning_rate": 9.8594035663827e-06, "loss": 0.9046, "step": 2853 }, { "epoch": 0.10343952738211735, "grad_norm": 2.6496469067191883, "learning_rate": 9.859265326694768e-06, "loss": 0.9009, "step": 2854 }, { "epoch": 0.10347577108477402, "grad_norm": 2.5951783995030433, "learning_rate": 9.8591270200491e-06, "loss": 1.0275, "step": 2855 }, { "epoch": 0.10351201478743069, "grad_norm": 2.4910971755836355, "learning_rate": 9.858988646447599e-06, "loss": 0.9822, "step": 2856 }, { "epoch": 0.10354825849008735, "grad_norm": 2.6830965292408107, "learning_rate": 9.858850205892174e-06, "loss": 0.9721, "step": 2857 }, { "epoch": 0.10358450219274401, "grad_norm": 2.452597386763993, "learning_rate": 9.858711698384733e-06, "loss": 1.2355, "step": 2858 }, { "epoch": 0.10362074589540067, "grad_norm": 2.8581067677048604, "learning_rate": 9.858573123927182e-06, "loss": 1.1396, "step": 2859 }, { "epoch": 0.10365698959805733, "grad_norm": 2.5402762683838636, "learning_rate": 9.85843448252143e-06, "loss": 0.9954, "step": 2860 }, { "epoch": 0.103693233300714, "grad_norm": 2.1561491819670713, "learning_rate": 9.85829577416939e-06, "loss": 0.8756, "step": 2861 }, { "epoch": 0.10372947700337067, "grad_norm": 2.6072919114830015, "learning_rate": 9.858156998872974e-06, "loss": 1.0328, "step": 2862 }, { "epoch": 0.10376572070602733, "grad_norm": 2.511284902599985, "learning_rate": 9.858018156634092e-06, "loss": 1.061, "step": 2863 }, { "epoch": 0.10380196440868399, "grad_norm": 2.448255861105818, "learning_rate": 9.857879247454658e-06, "loss": 1.01, "step": 2864 }, { "epoch": 0.10383820811134066, "grad_norm": 2.5992293107465967, "learning_rate": 9.857740271336586e-06, "loss": 0.9921, "step": 2865 }, { "epoch": 0.10387445181399732, "grad_norm": 2.1609178327772725, "learning_rate": 9.85760122828179e-06, "loss": 0.9405, "step": 2866 }, { "epoch": 0.10391069551665398, "grad_norm": 2.4331611199563494, "learning_rate": 9.857462118292187e-06, "loss": 0.8676, "step": 2867 }, { "epoch": 0.10394693921931064, "grad_norm": 2.2981273846317527, "learning_rate": 9.857322941369697e-06, "loss": 1.0541, "step": 2868 }, { "epoch": 0.1039831829219673, "grad_norm": 2.40806576484381, "learning_rate": 9.857183697516231e-06, "loss": 1.0002, "step": 2869 }, { "epoch": 0.10401942662462398, "grad_norm": 2.5768904312791268, "learning_rate": 9.857044386733711e-06, "loss": 1.0694, "step": 2870 }, { "epoch": 0.10405567032728064, "grad_norm": 2.86892878559355, "learning_rate": 9.856905009024058e-06, "loss": 1.0513, "step": 2871 }, { "epoch": 0.1040919140299373, "grad_norm": 2.4361388287854586, "learning_rate": 9.85676556438919e-06, "loss": 0.9736, "step": 2872 }, { "epoch": 0.10412815773259396, "grad_norm": 2.3578149581722263, "learning_rate": 9.856626052831031e-06, "loss": 1.0354, "step": 2873 }, { "epoch": 0.10416440143525063, "grad_norm": 2.5013203387584984, "learning_rate": 9.856486474351503e-06, "loss": 0.9919, "step": 2874 }, { "epoch": 0.10420064513790729, "grad_norm": 2.44032948279087, "learning_rate": 9.856346828952526e-06, "loss": 0.9267, "step": 2875 }, { "epoch": 0.10423688884056395, "grad_norm": 2.0907633632711664, "learning_rate": 9.856207116636028e-06, "loss": 0.9538, "step": 2876 }, { "epoch": 0.10427313254322061, "grad_norm": 2.4509857344869115, "learning_rate": 9.856067337403935e-06, "loss": 1.025, "step": 2877 }, { "epoch": 0.10430937624587727, "grad_norm": 2.2335954851700843, "learning_rate": 9.855927491258168e-06, "loss": 0.9066, "step": 2878 }, { "epoch": 0.10434561994853395, "grad_norm": 2.3440272330016425, "learning_rate": 9.855787578200657e-06, "loss": 0.9112, "step": 2879 }, { "epoch": 0.1043818636511906, "grad_norm": 2.2519384148373103, "learning_rate": 9.85564759823333e-06, "loss": 0.9894, "step": 2880 }, { "epoch": 0.10441810735384727, "grad_norm": 2.56454892457789, "learning_rate": 9.855507551358118e-06, "loss": 0.9669, "step": 2881 }, { "epoch": 0.10445435105650393, "grad_norm": 2.439545240057663, "learning_rate": 9.855367437576946e-06, "loss": 1.0961, "step": 2882 }, { "epoch": 0.1044905947591606, "grad_norm": 2.456477348801409, "learning_rate": 9.855227256891748e-06, "loss": 0.8453, "step": 2883 }, { "epoch": 0.10452683846181726, "grad_norm": 2.260593197695956, "learning_rate": 9.855087009304456e-06, "loss": 1.0621, "step": 2884 }, { "epoch": 0.10456308216447392, "grad_norm": 2.623492550075407, "learning_rate": 9.854946694816998e-06, "loss": 1.1968, "step": 2885 }, { "epoch": 0.10459932586713058, "grad_norm": 2.4583169633278286, "learning_rate": 9.854806313431314e-06, "loss": 1.0222, "step": 2886 }, { "epoch": 0.10463556956978724, "grad_norm": 2.5490921351758913, "learning_rate": 9.854665865149334e-06, "loss": 1.0739, "step": 2887 }, { "epoch": 0.10467181327244392, "grad_norm": 2.4618753334467667, "learning_rate": 9.854525349972994e-06, "loss": 0.9346, "step": 2888 }, { "epoch": 0.10470805697510058, "grad_norm": 2.769195557328729, "learning_rate": 9.854384767904232e-06, "loss": 1.1204, "step": 2889 }, { "epoch": 0.10474430067775724, "grad_norm": 2.5816613750697983, "learning_rate": 9.854244118944983e-06, "loss": 1.0189, "step": 2890 }, { "epoch": 0.1047805443804139, "grad_norm": 2.497524781942941, "learning_rate": 9.854103403097183e-06, "loss": 1.0739, "step": 2891 }, { "epoch": 0.10481678808307057, "grad_norm": 14.781325103868037, "learning_rate": 9.853962620362779e-06, "loss": 1.9576, "step": 2892 }, { "epoch": 0.10485303178572723, "grad_norm": 2.306095280599642, "learning_rate": 9.853821770743701e-06, "loss": 1.0403, "step": 2893 }, { "epoch": 0.10488927548838389, "grad_norm": 2.462632963897426, "learning_rate": 9.853680854241896e-06, "loss": 0.9853, "step": 2894 }, { "epoch": 0.10492551919104055, "grad_norm": 2.4409310630410257, "learning_rate": 9.853539870859306e-06, "loss": 1.109, "step": 2895 }, { "epoch": 0.10496176289369721, "grad_norm": 2.427046695181752, "learning_rate": 9.85339882059787e-06, "loss": 0.8204, "step": 2896 }, { "epoch": 0.10499800659635389, "grad_norm": 2.5260664842398075, "learning_rate": 9.853257703459533e-06, "loss": 1.0245, "step": 2897 }, { "epoch": 0.10503425029901055, "grad_norm": 2.565719515752278, "learning_rate": 9.853116519446242e-06, "loss": 1.1053, "step": 2898 }, { "epoch": 0.10507049400166721, "grad_norm": 2.5636256772967343, "learning_rate": 9.85297526855994e-06, "loss": 1.1072, "step": 2899 }, { "epoch": 0.10510673770432387, "grad_norm": 2.5216614766715804, "learning_rate": 9.852833950802573e-06, "loss": 1.0546, "step": 2900 }, { "epoch": 0.10514298140698054, "grad_norm": 2.3390194000439077, "learning_rate": 9.85269256617609e-06, "loss": 0.8524, "step": 2901 }, { "epoch": 0.1051792251096372, "grad_norm": 2.906039829524625, "learning_rate": 9.852551114682436e-06, "loss": 0.8771, "step": 2902 }, { "epoch": 0.10521546881229386, "grad_norm": 2.469054583117515, "learning_rate": 9.852409596323564e-06, "loss": 1.0065, "step": 2903 }, { "epoch": 0.10525171251495052, "grad_norm": 2.3629611318432997, "learning_rate": 9.852268011101422e-06, "loss": 0.9325, "step": 2904 }, { "epoch": 0.10528795621760718, "grad_norm": 2.6513051251277853, "learning_rate": 9.852126359017962e-06, "loss": 1.2082, "step": 2905 }, { "epoch": 0.10532419992026386, "grad_norm": 2.5035671433086275, "learning_rate": 9.851984640075135e-06, "loss": 1.0482, "step": 2906 }, { "epoch": 0.10536044362292052, "grad_norm": 2.253924250830244, "learning_rate": 9.851842854274894e-06, "loss": 0.9541, "step": 2907 }, { "epoch": 0.10539668732557718, "grad_norm": 2.3168498370341815, "learning_rate": 9.851701001619194e-06, "loss": 0.9945, "step": 2908 }, { "epoch": 0.10543293102823384, "grad_norm": 2.5036468382296286, "learning_rate": 9.851559082109987e-06, "loss": 1.0915, "step": 2909 }, { "epoch": 0.10546917473089051, "grad_norm": 2.346554443353211, "learning_rate": 9.85141709574923e-06, "loss": 0.8685, "step": 2910 }, { "epoch": 0.10550541843354717, "grad_norm": 2.665669232814521, "learning_rate": 9.851275042538879e-06, "loss": 1.102, "step": 2911 }, { "epoch": 0.10554166213620383, "grad_norm": 2.5874444085516086, "learning_rate": 9.851132922480893e-06, "loss": 1.1544, "step": 2912 }, { "epoch": 0.1055779058388605, "grad_norm": 2.4937864214950327, "learning_rate": 9.850990735577229e-06, "loss": 1.0813, "step": 2913 }, { "epoch": 0.10561414954151715, "grad_norm": 2.526874944216866, "learning_rate": 9.850848481829844e-06, "loss": 1.2034, "step": 2914 }, { "epoch": 0.10565039324417383, "grad_norm": 2.2519867583178925, "learning_rate": 9.850706161240704e-06, "loss": 0.9103, "step": 2915 }, { "epoch": 0.10568663694683049, "grad_norm": 2.4637523080951866, "learning_rate": 9.850563773811766e-06, "loss": 1.0097, "step": 2916 }, { "epoch": 0.10572288064948715, "grad_norm": 2.806871875253426, "learning_rate": 9.85042131954499e-06, "loss": 1.3123, "step": 2917 }, { "epoch": 0.10575912435214381, "grad_norm": 2.2874621258164947, "learning_rate": 9.850278798442346e-06, "loss": 0.95, "step": 2918 }, { "epoch": 0.10579536805480048, "grad_norm": 2.595206002039954, "learning_rate": 9.850136210505791e-06, "loss": 0.857, "step": 2919 }, { "epoch": 0.10583161175745714, "grad_norm": 2.3691500079968715, "learning_rate": 9.849993555737292e-06, "loss": 0.9117, "step": 2920 }, { "epoch": 0.1058678554601138, "grad_norm": 2.1836817393744012, "learning_rate": 9.849850834138816e-06, "loss": 0.9149, "step": 2921 }, { "epoch": 0.10590409916277047, "grad_norm": 2.595585221020016, "learning_rate": 9.849708045712327e-06, "loss": 1.0935, "step": 2922 }, { "epoch": 0.10594034286542713, "grad_norm": 2.492195210788596, "learning_rate": 9.849565190459796e-06, "loss": 1.07, "step": 2923 }, { "epoch": 0.1059765865680838, "grad_norm": 3.0627153838144734, "learning_rate": 9.849422268383188e-06, "loss": 0.9237, "step": 2924 }, { "epoch": 0.10601283027074046, "grad_norm": 2.745881095796821, "learning_rate": 9.849279279484476e-06, "loss": 0.9455, "step": 2925 }, { "epoch": 0.10604907397339712, "grad_norm": 2.5546338506036608, "learning_rate": 9.849136223765627e-06, "loss": 1.1057, "step": 2926 }, { "epoch": 0.10608531767605378, "grad_norm": 2.4910220459715298, "learning_rate": 9.848993101228614e-06, "loss": 0.853, "step": 2927 }, { "epoch": 0.10612156137871046, "grad_norm": 2.4769010681124284, "learning_rate": 9.848849911875408e-06, "loss": 0.9187, "step": 2928 }, { "epoch": 0.10615780508136712, "grad_norm": 2.479037968402131, "learning_rate": 9.848706655707984e-06, "loss": 0.9986, "step": 2929 }, { "epoch": 0.10619404878402378, "grad_norm": 2.518702528053859, "learning_rate": 9.848563332728314e-06, "loss": 1.1789, "step": 2930 }, { "epoch": 0.10623029248668044, "grad_norm": 2.64829873556839, "learning_rate": 9.848419942938372e-06, "loss": 1.0492, "step": 2931 }, { "epoch": 0.1062665361893371, "grad_norm": 2.3649552492288675, "learning_rate": 9.848276486340138e-06, "loss": 0.9126, "step": 2932 }, { "epoch": 0.10630277989199377, "grad_norm": 2.40200790198687, "learning_rate": 9.848132962935583e-06, "loss": 1.002, "step": 2933 }, { "epoch": 0.10633902359465043, "grad_norm": 2.647281411803883, "learning_rate": 9.84798937272669e-06, "loss": 0.9203, "step": 2934 }, { "epoch": 0.10637526729730709, "grad_norm": 2.408031848844238, "learning_rate": 9.847845715715436e-06, "loss": 0.9498, "step": 2935 }, { "epoch": 0.10641151099996375, "grad_norm": 2.4210729735692573, "learning_rate": 9.8477019919038e-06, "loss": 0.9397, "step": 2936 }, { "epoch": 0.10644775470262043, "grad_norm": 2.4138606018431545, "learning_rate": 9.84755820129376e-06, "loss": 0.8789, "step": 2937 }, { "epoch": 0.10648399840527709, "grad_norm": 2.678560137008487, "learning_rate": 9.847414343887301e-06, "loss": 1.2005, "step": 2938 }, { "epoch": 0.10652024210793375, "grad_norm": 2.809986786762087, "learning_rate": 9.847270419686405e-06, "loss": 0.9335, "step": 2939 }, { "epoch": 0.1065564858105904, "grad_norm": 2.6554840164627826, "learning_rate": 9.847126428693052e-06, "loss": 1.1853, "step": 2940 }, { "epoch": 0.10659272951324707, "grad_norm": 2.6966031299803404, "learning_rate": 9.84698237090923e-06, "loss": 1.0071, "step": 2941 }, { "epoch": 0.10662897321590374, "grad_norm": 2.2832132393116074, "learning_rate": 9.846838246336923e-06, "loss": 1.0543, "step": 2942 }, { "epoch": 0.1066652169185604, "grad_norm": 2.520785458670515, "learning_rate": 9.846694054978114e-06, "loss": 1.1024, "step": 2943 }, { "epoch": 0.10670146062121706, "grad_norm": 2.728955172818686, "learning_rate": 9.846549796834794e-06, "loss": 0.9339, "step": 2944 }, { "epoch": 0.10673770432387372, "grad_norm": 2.29909883778952, "learning_rate": 9.846405471908948e-06, "loss": 1.046, "step": 2945 }, { "epoch": 0.1067739480265304, "grad_norm": 2.375829812656196, "learning_rate": 9.846261080202564e-06, "loss": 0.9707, "step": 2946 }, { "epoch": 0.10681019172918706, "grad_norm": 2.4638457816337973, "learning_rate": 9.846116621717636e-06, "loss": 0.8762, "step": 2947 }, { "epoch": 0.10684643543184372, "grad_norm": 2.5628755725970263, "learning_rate": 9.845972096456152e-06, "loss": 1.0169, "step": 2948 }, { "epoch": 0.10688267913450038, "grad_norm": 2.5979652371462407, "learning_rate": 9.845827504420101e-06, "loss": 1.0026, "step": 2949 }, { "epoch": 0.10691892283715704, "grad_norm": 2.504602736758642, "learning_rate": 9.845682845611477e-06, "loss": 1.1126, "step": 2950 }, { "epoch": 0.10695516653981371, "grad_norm": 2.460189947579088, "learning_rate": 9.845538120032276e-06, "loss": 0.9333, "step": 2951 }, { "epoch": 0.10699141024247037, "grad_norm": 2.610168304033374, "learning_rate": 9.84539332768449e-06, "loss": 1.1637, "step": 2952 }, { "epoch": 0.10702765394512703, "grad_norm": 2.368509194193472, "learning_rate": 9.845248468570114e-06, "loss": 0.938, "step": 2953 }, { "epoch": 0.10706389764778369, "grad_norm": 2.18272452366593, "learning_rate": 9.845103542691146e-06, "loss": 1.0162, "step": 2954 }, { "epoch": 0.10710014135044037, "grad_norm": 2.424192272447972, "learning_rate": 9.844958550049579e-06, "loss": 0.9091, "step": 2955 }, { "epoch": 0.10713638505309703, "grad_norm": 2.593457620838242, "learning_rate": 9.844813490647414e-06, "loss": 1.0412, "step": 2956 }, { "epoch": 0.10717262875575369, "grad_norm": 2.262179130158608, "learning_rate": 9.84466836448665e-06, "loss": 1.0072, "step": 2957 }, { "epoch": 0.10720887245841035, "grad_norm": 2.5113736334492884, "learning_rate": 9.844523171569285e-06, "loss": 1.0112, "step": 2958 }, { "epoch": 0.10724511616106701, "grad_norm": 2.581866239810852, "learning_rate": 9.84437791189732e-06, "loss": 0.9777, "step": 2959 }, { "epoch": 0.10728135986372368, "grad_norm": 2.6750589943636838, "learning_rate": 9.844232585472758e-06, "loss": 1.1174, "step": 2960 }, { "epoch": 0.10731760356638034, "grad_norm": 2.498935926842868, "learning_rate": 9.844087192297601e-06, "loss": 0.9516, "step": 2961 }, { "epoch": 0.107353847269037, "grad_norm": 2.509151103153637, "learning_rate": 9.843941732373852e-06, "loss": 0.9982, "step": 2962 }, { "epoch": 0.10739009097169366, "grad_norm": 2.704096608996696, "learning_rate": 9.843796205703514e-06, "loss": 1.0486, "step": 2963 }, { "epoch": 0.10742633467435034, "grad_norm": 2.7514088345334353, "learning_rate": 9.843650612288596e-06, "loss": 1.2342, "step": 2964 }, { "epoch": 0.107462578377007, "grad_norm": 2.64477316453597, "learning_rate": 9.8435049521311e-06, "loss": 1.1594, "step": 2965 }, { "epoch": 0.10749882207966366, "grad_norm": 2.437471996495375, "learning_rate": 9.843359225233036e-06, "loss": 1.019, "step": 2966 }, { "epoch": 0.10753506578232032, "grad_norm": 2.0709086533454304, "learning_rate": 9.843213431596412e-06, "loss": 0.8932, "step": 2967 }, { "epoch": 0.10757130948497698, "grad_norm": 2.847452676248944, "learning_rate": 9.843067571223235e-06, "loss": 1.0021, "step": 2968 }, { "epoch": 0.10760755318763365, "grad_norm": 2.437471114737287, "learning_rate": 9.842921644115515e-06, "loss": 1.0074, "step": 2969 }, { "epoch": 0.10764379689029031, "grad_norm": 2.4837546417070784, "learning_rate": 9.842775650275264e-06, "loss": 0.8935, "step": 2970 }, { "epoch": 0.10768004059294697, "grad_norm": 2.517129594710162, "learning_rate": 9.842629589704494e-06, "loss": 0.935, "step": 2971 }, { "epoch": 0.10771628429560363, "grad_norm": 2.3814157535533416, "learning_rate": 9.842483462405216e-06, "loss": 1.1018, "step": 2972 }, { "epoch": 0.10775252799826031, "grad_norm": 2.1574324814248995, "learning_rate": 9.842337268379447e-06, "loss": 0.9011, "step": 2973 }, { "epoch": 0.10778877170091697, "grad_norm": 2.5309883724499223, "learning_rate": 9.842191007629196e-06, "loss": 1.0343, "step": 2974 }, { "epoch": 0.10782501540357363, "grad_norm": 2.431343479159773, "learning_rate": 9.842044680156485e-06, "loss": 0.9533, "step": 2975 }, { "epoch": 0.10786125910623029, "grad_norm": 2.5437055758145033, "learning_rate": 9.841898285963323e-06, "loss": 1.0726, "step": 2976 }, { "epoch": 0.10789750280888695, "grad_norm": 2.582166275819155, "learning_rate": 9.841751825051731e-06, "loss": 1.0281, "step": 2977 }, { "epoch": 0.10793374651154362, "grad_norm": 2.260410633532109, "learning_rate": 9.841605297423728e-06, "loss": 0.8366, "step": 2978 }, { "epoch": 0.10796999021420028, "grad_norm": 2.7419144202504904, "learning_rate": 9.841458703081333e-06, "loss": 1.0871, "step": 2979 }, { "epoch": 0.10800623391685694, "grad_norm": 2.7164715128200325, "learning_rate": 9.841312042026566e-06, "loss": 1.022, "step": 2980 }, { "epoch": 0.1080424776195136, "grad_norm": 2.661022144563065, "learning_rate": 9.841165314261445e-06, "loss": 0.9028, "step": 2981 }, { "epoch": 0.10807872132217028, "grad_norm": 2.4048456216555514, "learning_rate": 9.841018519787994e-06, "loss": 0.7369, "step": 2982 }, { "epoch": 0.10811496502482694, "grad_norm": 2.5095334651713403, "learning_rate": 9.840871658608235e-06, "loss": 1.0286, "step": 2983 }, { "epoch": 0.1081512087274836, "grad_norm": 2.595410744200633, "learning_rate": 9.840724730724193e-06, "loss": 0.9272, "step": 2984 }, { "epoch": 0.10818745243014026, "grad_norm": 2.875491103610128, "learning_rate": 9.840577736137894e-06, "loss": 0.9805, "step": 2985 }, { "epoch": 0.10822369613279692, "grad_norm": 2.4981692048735997, "learning_rate": 9.840430674851358e-06, "loss": 0.97, "step": 2986 }, { "epoch": 0.1082599398354536, "grad_norm": 2.4444345126852682, "learning_rate": 9.840283546866615e-06, "loss": 0.9108, "step": 2987 }, { "epoch": 0.10829618353811025, "grad_norm": 2.123593396193642, "learning_rate": 9.840136352185694e-06, "loss": 0.9241, "step": 2988 }, { "epoch": 0.10833242724076692, "grad_norm": 2.70084807233759, "learning_rate": 9.83998909081062e-06, "loss": 0.9887, "step": 2989 }, { "epoch": 0.10836867094342358, "grad_norm": 2.2573201270176595, "learning_rate": 9.839841762743422e-06, "loss": 0.8442, "step": 2990 }, { "epoch": 0.10840491464608025, "grad_norm": 2.459944529327437, "learning_rate": 9.839694367986134e-06, "loss": 1.0315, "step": 2991 }, { "epoch": 0.10844115834873691, "grad_norm": 2.7009576862488944, "learning_rate": 9.839546906540785e-06, "loss": 1.0856, "step": 2992 }, { "epoch": 0.10847740205139357, "grad_norm": 2.1622120646732834, "learning_rate": 9.839399378409404e-06, "loss": 1.0668, "step": 2993 }, { "epoch": 0.10851364575405023, "grad_norm": 2.8606821730051415, "learning_rate": 9.839251783594029e-06, "loss": 1.1353, "step": 2994 }, { "epoch": 0.10854988945670689, "grad_norm": 2.440180191518769, "learning_rate": 9.839104122096689e-06, "loss": 1.1318, "step": 2995 }, { "epoch": 0.10858613315936357, "grad_norm": 2.4729690859240914, "learning_rate": 9.838956393919421e-06, "loss": 0.7763, "step": 2996 }, { "epoch": 0.10862237686202023, "grad_norm": 2.1907868362700227, "learning_rate": 9.83880859906426e-06, "loss": 0.8934, "step": 2997 }, { "epoch": 0.10865862056467689, "grad_norm": 2.6617822314968596, "learning_rate": 9.838660737533244e-06, "loss": 1.1614, "step": 2998 }, { "epoch": 0.10869486426733355, "grad_norm": 2.281337042147485, "learning_rate": 9.83851280932841e-06, "loss": 0.8678, "step": 2999 }, { "epoch": 0.10873110796999022, "grad_norm": 2.2925920802644217, "learning_rate": 9.838364814451795e-06, "loss": 0.9538, "step": 3000 }, { "epoch": 0.10876735167264688, "grad_norm": 2.6426297037563895, "learning_rate": 9.838216752905439e-06, "loss": 1.1124, "step": 3001 }, { "epoch": 0.10880359537530354, "grad_norm": 2.4661222673755554, "learning_rate": 9.838068624691382e-06, "loss": 1.1312, "step": 3002 }, { "epoch": 0.1088398390779602, "grad_norm": 2.475453048690614, "learning_rate": 9.837920429811666e-06, "loss": 0.991, "step": 3003 }, { "epoch": 0.10887608278061686, "grad_norm": 2.2562480041251933, "learning_rate": 9.83777216826833e-06, "loss": 1.0301, "step": 3004 }, { "epoch": 0.10891232648327354, "grad_norm": 2.4673593881017934, "learning_rate": 9.837623840063421e-06, "loss": 1.0302, "step": 3005 }, { "epoch": 0.1089485701859302, "grad_norm": 2.428640279743277, "learning_rate": 9.837475445198982e-06, "loss": 0.871, "step": 3006 }, { "epoch": 0.10898481388858686, "grad_norm": 2.2481934710553846, "learning_rate": 9.837326983677055e-06, "loss": 1.1825, "step": 3007 }, { "epoch": 0.10902105759124352, "grad_norm": 3.047641622493412, "learning_rate": 9.83717845549969e-06, "loss": 0.9206, "step": 3008 }, { "epoch": 0.10905730129390019, "grad_norm": 2.695736402925965, "learning_rate": 9.83702986066893e-06, "loss": 0.9426, "step": 3009 }, { "epoch": 0.10909354499655685, "grad_norm": 2.6693963101144726, "learning_rate": 9.836881199186825e-06, "loss": 1.0797, "step": 3010 }, { "epoch": 0.10912978869921351, "grad_norm": 2.488567532511577, "learning_rate": 9.836732471055421e-06, "loss": 1.1474, "step": 3011 }, { "epoch": 0.10916603240187017, "grad_norm": 2.7076687369800614, "learning_rate": 9.83658367627677e-06, "loss": 1.0122, "step": 3012 }, { "epoch": 0.10920227610452683, "grad_norm": 2.3732694365569946, "learning_rate": 9.83643481485292e-06, "loss": 0.9995, "step": 3013 }, { "epoch": 0.1092385198071835, "grad_norm": 2.5803225585926657, "learning_rate": 9.836285886785923e-06, "loss": 1.134, "step": 3014 }, { "epoch": 0.10927476350984017, "grad_norm": 2.398707549922695, "learning_rate": 9.836136892077831e-06, "loss": 1.1392, "step": 3015 }, { "epoch": 0.10931100721249683, "grad_norm": 2.236530868549398, "learning_rate": 9.835987830730699e-06, "loss": 1.0579, "step": 3016 }, { "epoch": 0.10934725091515349, "grad_norm": 2.4312009750785757, "learning_rate": 9.835838702746579e-06, "loss": 1.0606, "step": 3017 }, { "epoch": 0.10938349461781016, "grad_norm": 2.6838631380629057, "learning_rate": 9.835689508127524e-06, "loss": 0.9357, "step": 3018 }, { "epoch": 0.10941973832046682, "grad_norm": 2.388895168660569, "learning_rate": 9.835540246875595e-06, "loss": 1.064, "step": 3019 }, { "epoch": 0.10945598202312348, "grad_norm": 2.532203457074078, "learning_rate": 9.835390918992845e-06, "loss": 1.0395, "step": 3020 }, { "epoch": 0.10949222572578014, "grad_norm": 2.4057585326828606, "learning_rate": 9.835241524481331e-06, "loss": 1.0816, "step": 3021 }, { "epoch": 0.1095284694284368, "grad_norm": 2.611079920895509, "learning_rate": 9.835092063343116e-06, "loss": 1.0469, "step": 3022 }, { "epoch": 0.10956471313109348, "grad_norm": 2.338977944867413, "learning_rate": 9.834942535580253e-06, "loss": 0.927, "step": 3023 }, { "epoch": 0.10960095683375014, "grad_norm": 2.270557278915616, "learning_rate": 9.834792941194809e-06, "loss": 1.0541, "step": 3024 }, { "epoch": 0.1096372005364068, "grad_norm": 2.9369254219087173, "learning_rate": 9.834643280188841e-06, "loss": 0.9766, "step": 3025 }, { "epoch": 0.10967344423906346, "grad_norm": 2.4218190661952788, "learning_rate": 9.834493552564413e-06, "loss": 1.1015, "step": 3026 }, { "epoch": 0.10970968794172013, "grad_norm": 2.8402965918273986, "learning_rate": 9.834343758323587e-06, "loss": 1.0502, "step": 3027 }, { "epoch": 0.10974593164437679, "grad_norm": 2.38085502728043, "learning_rate": 9.834193897468429e-06, "loss": 0.9343, "step": 3028 }, { "epoch": 0.10978217534703345, "grad_norm": 2.7425773760525782, "learning_rate": 9.834043970001001e-06, "loss": 1.1043, "step": 3029 }, { "epoch": 0.10981841904969011, "grad_norm": 2.41323151538093, "learning_rate": 9.833893975923372e-06, "loss": 0.9382, "step": 3030 }, { "epoch": 0.10985466275234677, "grad_norm": 2.605236189757305, "learning_rate": 9.833743915237605e-06, "loss": 1.2276, "step": 3031 }, { "epoch": 0.10989090645500345, "grad_norm": 2.6004983609976047, "learning_rate": 9.833593787945773e-06, "loss": 1.0884, "step": 3032 }, { "epoch": 0.10992715015766011, "grad_norm": 2.6199375787076584, "learning_rate": 9.833443594049941e-06, "loss": 1.1204, "step": 3033 }, { "epoch": 0.10996339386031677, "grad_norm": 2.3594411382994953, "learning_rate": 9.833293333552178e-06, "loss": 0.8726, "step": 3034 }, { "epoch": 0.10999963756297343, "grad_norm": 2.6976083381545792, "learning_rate": 9.833143006454559e-06, "loss": 1.0406, "step": 3035 }, { "epoch": 0.1100358812656301, "grad_norm": 2.596390691927461, "learning_rate": 9.83299261275915e-06, "loss": 0.7084, "step": 3036 }, { "epoch": 0.11007212496828676, "grad_norm": 2.48040589922695, "learning_rate": 9.832842152468026e-06, "loss": 0.9712, "step": 3037 }, { "epoch": 0.11010836867094342, "grad_norm": 2.2758650360946917, "learning_rate": 9.832691625583261e-06, "loss": 0.8985, "step": 3038 }, { "epoch": 0.11014461237360008, "grad_norm": 2.3967091435689705, "learning_rate": 9.832541032106928e-06, "loss": 0.9499, "step": 3039 }, { "epoch": 0.11018085607625674, "grad_norm": 2.61154930552439, "learning_rate": 9.832390372041101e-06, "loss": 1.0424, "step": 3040 }, { "epoch": 0.11021709977891342, "grad_norm": 2.2034675977837734, "learning_rate": 9.832239645387857e-06, "loss": 1.0528, "step": 3041 }, { "epoch": 0.11025334348157008, "grad_norm": 2.201506046170325, "learning_rate": 9.832088852149274e-06, "loss": 1.0655, "step": 3042 }, { "epoch": 0.11028958718422674, "grad_norm": 2.59334831809314, "learning_rate": 9.831937992327429e-06, "loss": 1.0194, "step": 3043 }, { "epoch": 0.1103258308868834, "grad_norm": 2.2789493855159897, "learning_rate": 9.8317870659244e-06, "loss": 1.1699, "step": 3044 }, { "epoch": 0.11036207458954007, "grad_norm": 2.5350875510592306, "learning_rate": 9.831636072942269e-06, "loss": 0.9023, "step": 3045 }, { "epoch": 0.11039831829219673, "grad_norm": 2.276878654318682, "learning_rate": 9.831485013383113e-06, "loss": 1.0744, "step": 3046 }, { "epoch": 0.1104345619948534, "grad_norm": 2.547834991650293, "learning_rate": 9.831333887249017e-06, "loss": 1.1648, "step": 3047 }, { "epoch": 0.11047080569751005, "grad_norm": 2.458206131768338, "learning_rate": 9.831182694542062e-06, "loss": 1.1204, "step": 3048 }, { "epoch": 0.11050704940016672, "grad_norm": 2.4330219611681327, "learning_rate": 9.83103143526433e-06, "loss": 0.9564, "step": 3049 }, { "epoch": 0.11054329310282339, "grad_norm": 2.3091555571668425, "learning_rate": 9.830880109417908e-06, "loss": 0.957, "step": 3050 }, { "epoch": 0.11057953680548005, "grad_norm": 2.7362572909762073, "learning_rate": 9.830728717004878e-06, "loss": 1.1166, "step": 3051 }, { "epoch": 0.11061578050813671, "grad_norm": 2.194011216178649, "learning_rate": 9.83057725802733e-06, "loss": 0.7546, "step": 3052 }, { "epoch": 0.11065202421079337, "grad_norm": 2.0855982364674635, "learning_rate": 9.830425732487347e-06, "loss": 0.9246, "step": 3053 }, { "epoch": 0.11068826791345004, "grad_norm": 2.977375112172049, "learning_rate": 9.830274140387019e-06, "loss": 1.0326, "step": 3054 }, { "epoch": 0.1107245116161067, "grad_norm": 2.4661736877139813, "learning_rate": 9.830122481728436e-06, "loss": 0.9886, "step": 3055 }, { "epoch": 0.11076075531876337, "grad_norm": 2.3635366280322057, "learning_rate": 9.829970756513684e-06, "loss": 0.8029, "step": 3056 }, { "epoch": 0.11079699902142003, "grad_norm": 2.4126154479897166, "learning_rate": 9.829818964744858e-06, "loss": 0.9518, "step": 3057 }, { "epoch": 0.11083324272407669, "grad_norm": 2.342463299083177, "learning_rate": 9.829667106424048e-06, "loss": 1.1204, "step": 3058 }, { "epoch": 0.11086948642673336, "grad_norm": 2.8085480352952783, "learning_rate": 9.829515181553344e-06, "loss": 0.9111, "step": 3059 }, { "epoch": 0.11090573012939002, "grad_norm": 2.6268541906239196, "learning_rate": 9.829363190134844e-06, "loss": 1.086, "step": 3060 }, { "epoch": 0.11094197383204668, "grad_norm": 2.4228346240192358, "learning_rate": 9.829211132170637e-06, "loss": 1.0111, "step": 3061 }, { "epoch": 0.11097821753470334, "grad_norm": 2.1748993574765927, "learning_rate": 9.829059007662824e-06, "loss": 0.9088, "step": 3062 }, { "epoch": 0.11101446123736002, "grad_norm": 2.7018505671626047, "learning_rate": 9.828906816613497e-06, "loss": 0.9345, "step": 3063 }, { "epoch": 0.11105070494001668, "grad_norm": 2.2333369015165885, "learning_rate": 9.828754559024756e-06, "loss": 0.9464, "step": 3064 }, { "epoch": 0.11108694864267334, "grad_norm": 2.3485584746780743, "learning_rate": 9.828602234898696e-06, "loss": 1.0562, "step": 3065 }, { "epoch": 0.11112319234533, "grad_norm": 2.5750756540672732, "learning_rate": 9.828449844237418e-06, "loss": 1.0836, "step": 3066 }, { "epoch": 0.11115943604798666, "grad_norm": 2.527580544568506, "learning_rate": 9.82829738704302e-06, "loss": 1.1279, "step": 3067 }, { "epoch": 0.11119567975064333, "grad_norm": 2.4291548077150313, "learning_rate": 9.828144863317605e-06, "loss": 1.1151, "step": 3068 }, { "epoch": 0.11123192345329999, "grad_norm": 2.4131167931652597, "learning_rate": 9.827992273063274e-06, "loss": 1.0446, "step": 3069 }, { "epoch": 0.11126816715595665, "grad_norm": 2.6162271076979065, "learning_rate": 9.82783961628213e-06, "loss": 1.2424, "step": 3070 }, { "epoch": 0.11130441085861331, "grad_norm": 2.4082588216268443, "learning_rate": 9.827686892976274e-06, "loss": 1.0022, "step": 3071 }, { "epoch": 0.11134065456126999, "grad_norm": 2.6378809580316593, "learning_rate": 9.827534103147815e-06, "loss": 1.0916, "step": 3072 }, { "epoch": 0.11137689826392665, "grad_norm": 2.47496316048143, "learning_rate": 9.827381246798853e-06, "loss": 1.0609, "step": 3073 }, { "epoch": 0.1114131419665833, "grad_norm": 2.514499137041745, "learning_rate": 9.827228323931499e-06, "loss": 1.0497, "step": 3074 }, { "epoch": 0.11144938566923997, "grad_norm": 2.7008444874203446, "learning_rate": 9.827075334547858e-06, "loss": 1.0068, "step": 3075 }, { "epoch": 0.11148562937189663, "grad_norm": 2.4754524477211035, "learning_rate": 9.826922278650036e-06, "loss": 1.0115, "step": 3076 }, { "epoch": 0.1115218730745533, "grad_norm": 2.441151851180351, "learning_rate": 9.826769156240146e-06, "loss": 1.0339, "step": 3077 }, { "epoch": 0.11155811677720996, "grad_norm": 2.4898240097695834, "learning_rate": 9.826615967320295e-06, "loss": 1.1589, "step": 3078 }, { "epoch": 0.11159436047986662, "grad_norm": 2.3026017926198494, "learning_rate": 9.826462711892597e-06, "loss": 0.7902, "step": 3079 }, { "epoch": 0.11163060418252328, "grad_norm": 2.3565566551329895, "learning_rate": 9.82630938995916e-06, "loss": 0.9429, "step": 3080 }, { "epoch": 0.11166684788517996, "grad_norm": 2.641434557324622, "learning_rate": 9.826156001522098e-06, "loss": 0.9758, "step": 3081 }, { "epoch": 0.11170309158783662, "grad_norm": 2.7288450005300553, "learning_rate": 9.826002546583526e-06, "loss": 1.138, "step": 3082 }, { "epoch": 0.11173933529049328, "grad_norm": 2.3426422197385164, "learning_rate": 9.825849025145558e-06, "loss": 1.0934, "step": 3083 }, { "epoch": 0.11177557899314994, "grad_norm": 2.4544027462032365, "learning_rate": 9.825695437210309e-06, "loss": 1.0174, "step": 3084 }, { "epoch": 0.1118118226958066, "grad_norm": 2.7919168559862793, "learning_rate": 9.825541782779894e-06, "loss": 0.931, "step": 3085 }, { "epoch": 0.11184806639846327, "grad_norm": 2.2385693360802517, "learning_rate": 9.825388061856434e-06, "loss": 0.8091, "step": 3086 }, { "epoch": 0.11188431010111993, "grad_norm": 2.3141874951679524, "learning_rate": 9.825234274442044e-06, "loss": 1.3317, "step": 3087 }, { "epoch": 0.11192055380377659, "grad_norm": 2.484405368972479, "learning_rate": 9.825080420538843e-06, "loss": 1.1032, "step": 3088 }, { "epoch": 0.11195679750643325, "grad_norm": 2.4766136349103456, "learning_rate": 9.824926500148953e-06, "loss": 0.9765, "step": 3089 }, { "epoch": 0.11199304120908993, "grad_norm": 2.605071150130522, "learning_rate": 9.824772513274491e-06, "loss": 1.086, "step": 3090 }, { "epoch": 0.11202928491174659, "grad_norm": 2.4935025676027123, "learning_rate": 9.824618459917586e-06, "loss": 1.1715, "step": 3091 }, { "epoch": 0.11206552861440325, "grad_norm": 2.610282929442396, "learning_rate": 9.824464340080352e-06, "loss": 1.1961, "step": 3092 }, { "epoch": 0.11210177231705991, "grad_norm": 2.787497319856102, "learning_rate": 9.82431015376492e-06, "loss": 1.1275, "step": 3093 }, { "epoch": 0.11213801601971657, "grad_norm": 3.0882689287340925, "learning_rate": 9.824155900973412e-06, "loss": 0.907, "step": 3094 }, { "epoch": 0.11217425972237324, "grad_norm": 2.4278390399912664, "learning_rate": 9.824001581707951e-06, "loss": 1.2136, "step": 3095 }, { "epoch": 0.1122105034250299, "grad_norm": 2.3582249525540737, "learning_rate": 9.823847195970668e-06, "loss": 0.9932, "step": 3096 }, { "epoch": 0.11224674712768656, "grad_norm": 2.304426282016966, "learning_rate": 9.823692743763687e-06, "loss": 1.2775, "step": 3097 }, { "epoch": 0.11228299083034322, "grad_norm": 2.5655533590170934, "learning_rate": 9.823538225089137e-06, "loss": 1.0682, "step": 3098 }, { "epoch": 0.1123192345329999, "grad_norm": 2.5651884296070975, "learning_rate": 9.823383639949148e-06, "loss": 1.0465, "step": 3099 }, { "epoch": 0.11235547823565656, "grad_norm": 2.322555572251309, "learning_rate": 9.82322898834585e-06, "loss": 1.0649, "step": 3100 }, { "epoch": 0.11239172193831322, "grad_norm": 2.552824714683274, "learning_rate": 9.823074270281373e-06, "loss": 1.4235, "step": 3101 }, { "epoch": 0.11242796564096988, "grad_norm": 2.629842770197334, "learning_rate": 9.82291948575785e-06, "loss": 0.9645, "step": 3102 }, { "epoch": 0.11246420934362654, "grad_norm": 2.500175178515147, "learning_rate": 9.822764634777412e-06, "loss": 1.0886, "step": 3103 }, { "epoch": 0.11250045304628321, "grad_norm": 2.5344346022661792, "learning_rate": 9.822609717342194e-06, "loss": 1.0772, "step": 3104 }, { "epoch": 0.11253669674893987, "grad_norm": 2.616331917153848, "learning_rate": 9.822454733454332e-06, "loss": 1.0814, "step": 3105 }, { "epoch": 0.11257294045159653, "grad_norm": 2.6477949248318136, "learning_rate": 9.82229968311596e-06, "loss": 1.2071, "step": 3106 }, { "epoch": 0.1126091841542532, "grad_norm": 3.1919527315141174, "learning_rate": 9.822144566329214e-06, "loss": 0.9574, "step": 3107 }, { "epoch": 0.11264542785690987, "grad_norm": 2.591565248259666, "learning_rate": 9.821989383096233e-06, "loss": 0.8729, "step": 3108 }, { "epoch": 0.11268167155956653, "grad_norm": 2.300280716291474, "learning_rate": 9.821834133419154e-06, "loss": 1.0033, "step": 3109 }, { "epoch": 0.11271791526222319, "grad_norm": 2.586090568351543, "learning_rate": 9.821678817300118e-06, "loss": 0.915, "step": 3110 }, { "epoch": 0.11275415896487985, "grad_norm": 2.2195696195082686, "learning_rate": 9.821523434741263e-06, "loss": 0.956, "step": 3111 }, { "epoch": 0.11279040266753651, "grad_norm": 2.138764619943318, "learning_rate": 9.82136798574473e-06, "loss": 0.9705, "step": 3112 }, { "epoch": 0.11282664637019318, "grad_norm": 2.489694802693575, "learning_rate": 9.821212470312662e-06, "loss": 0.9912, "step": 3113 }, { "epoch": 0.11286289007284984, "grad_norm": 2.614764737290176, "learning_rate": 9.821056888447205e-06, "loss": 1.0579, "step": 3114 }, { "epoch": 0.1128991337755065, "grad_norm": 2.77649700420378, "learning_rate": 9.820901240150497e-06, "loss": 1.2394, "step": 3115 }, { "epoch": 0.11293537747816317, "grad_norm": 2.0820687823237733, "learning_rate": 9.820745525424687e-06, "loss": 0.8471, "step": 3116 }, { "epoch": 0.11297162118081983, "grad_norm": 2.3800099103782895, "learning_rate": 9.820589744271918e-06, "loss": 0.958, "step": 3117 }, { "epoch": 0.1130078648834765, "grad_norm": 2.49416584448912, "learning_rate": 9.820433896694337e-06, "loss": 1.1049, "step": 3118 }, { "epoch": 0.11304410858613316, "grad_norm": 2.5202776399231563, "learning_rate": 9.820277982694093e-06, "loss": 0.9998, "step": 3119 }, { "epoch": 0.11308035228878982, "grad_norm": 2.5303679147575404, "learning_rate": 9.820122002273334e-06, "loss": 1.0017, "step": 3120 }, { "epoch": 0.11311659599144648, "grad_norm": 2.374256164120273, "learning_rate": 9.81996595543421e-06, "loss": 1.0101, "step": 3121 }, { "epoch": 0.11315283969410316, "grad_norm": 2.5548419796825756, "learning_rate": 9.819809842178869e-06, "loss": 1.1284, "step": 3122 }, { "epoch": 0.11318908339675982, "grad_norm": 2.3515129782376265, "learning_rate": 9.819653662509462e-06, "loss": 1.0647, "step": 3123 }, { "epoch": 0.11322532709941648, "grad_norm": 2.2064917004913007, "learning_rate": 9.819497416428144e-06, "loss": 0.873, "step": 3124 }, { "epoch": 0.11326157080207314, "grad_norm": 2.452410522689421, "learning_rate": 9.819341103937066e-06, "loss": 1.065, "step": 3125 }, { "epoch": 0.1132978145047298, "grad_norm": 2.713042500873751, "learning_rate": 9.819184725038382e-06, "loss": 1.1231, "step": 3126 }, { "epoch": 0.11333405820738647, "grad_norm": 2.514121265102087, "learning_rate": 9.819028279734249e-06, "loss": 1.1015, "step": 3127 }, { "epoch": 0.11337030191004313, "grad_norm": 2.5096350257193936, "learning_rate": 9.818871768026819e-06, "loss": 1.0567, "step": 3128 }, { "epoch": 0.11340654561269979, "grad_norm": 2.3341049935706573, "learning_rate": 9.818715189918249e-06, "loss": 0.9563, "step": 3129 }, { "epoch": 0.11344278931535645, "grad_norm": 2.4221590045115513, "learning_rate": 9.8185585454107e-06, "loss": 1.0989, "step": 3130 }, { "epoch": 0.11347903301801313, "grad_norm": 2.1344060236772817, "learning_rate": 9.818401834506328e-06, "loss": 0.9252, "step": 3131 }, { "epoch": 0.11351527672066979, "grad_norm": 2.6823543772694407, "learning_rate": 9.818245057207292e-06, "loss": 1.0866, "step": 3132 }, { "epoch": 0.11355152042332645, "grad_norm": 2.43393525934988, "learning_rate": 9.818088213515755e-06, "loss": 1.0289, "step": 3133 }, { "epoch": 0.1135877641259831, "grad_norm": 2.4155750557854234, "learning_rate": 9.817931303433874e-06, "loss": 1.0431, "step": 3134 }, { "epoch": 0.11362400782863977, "grad_norm": 2.5395004098733467, "learning_rate": 9.817774326963813e-06, "loss": 1.056, "step": 3135 }, { "epoch": 0.11366025153129644, "grad_norm": 4.325111663516541, "learning_rate": 9.81761728410774e-06, "loss": 0.9871, "step": 3136 }, { "epoch": 0.1136964952339531, "grad_norm": 2.5572601553963903, "learning_rate": 9.81746017486781e-06, "loss": 1.069, "step": 3137 }, { "epoch": 0.11373273893660976, "grad_norm": 2.4863398641378125, "learning_rate": 9.817302999246193e-06, "loss": 1.154, "step": 3138 }, { "epoch": 0.11376898263926642, "grad_norm": 2.6465572224670164, "learning_rate": 9.817145757245055e-06, "loss": 0.9819, "step": 3139 }, { "epoch": 0.1138052263419231, "grad_norm": 2.2828381761687484, "learning_rate": 9.816988448866562e-06, "loss": 0.839, "step": 3140 }, { "epoch": 0.11384147004457976, "grad_norm": 2.5398623795650126, "learning_rate": 9.81683107411288e-06, "loss": 1.0662, "step": 3141 }, { "epoch": 0.11387771374723642, "grad_norm": 2.556046280644409, "learning_rate": 9.81667363298618e-06, "loss": 0.9543, "step": 3142 }, { "epoch": 0.11391395744989308, "grad_norm": 2.3450792770337516, "learning_rate": 9.816516125488631e-06, "loss": 1.1893, "step": 3143 }, { "epoch": 0.11395020115254974, "grad_norm": 2.412281639004543, "learning_rate": 9.816358551622402e-06, "loss": 0.9436, "step": 3144 }, { "epoch": 0.11398644485520641, "grad_norm": 2.322900227197759, "learning_rate": 9.816200911389665e-06, "loss": 1.0033, "step": 3145 }, { "epoch": 0.11402268855786307, "grad_norm": 2.453023442659105, "learning_rate": 9.816043204792592e-06, "loss": 1.0578, "step": 3146 }, { "epoch": 0.11405893226051973, "grad_norm": 2.3027723006467418, "learning_rate": 9.815885431833356e-06, "loss": 0.9082, "step": 3147 }, { "epoch": 0.11409517596317639, "grad_norm": 2.2886271930201656, "learning_rate": 9.815727592514131e-06, "loss": 0.9779, "step": 3148 }, { "epoch": 0.11413141966583307, "grad_norm": 2.3659423568992297, "learning_rate": 9.815569686837094e-06, "loss": 1.0305, "step": 3149 }, { "epoch": 0.11416766336848973, "grad_norm": 2.515492684609756, "learning_rate": 9.815411714804417e-06, "loss": 1.0187, "step": 3150 }, { "epoch": 0.11420390707114639, "grad_norm": 2.6237743769651596, "learning_rate": 9.81525367641828e-06, "loss": 1.1576, "step": 3151 }, { "epoch": 0.11424015077380305, "grad_norm": 2.525206180393596, "learning_rate": 9.815095571680859e-06, "loss": 1.0144, "step": 3152 }, { "epoch": 0.11427639447645971, "grad_norm": 2.641961946921828, "learning_rate": 9.814937400594333e-06, "loss": 1.0665, "step": 3153 }, { "epoch": 0.11431263817911638, "grad_norm": 2.385575090991574, "learning_rate": 9.814779163160884e-06, "loss": 1.0797, "step": 3154 }, { "epoch": 0.11434888188177304, "grad_norm": 2.490763487212159, "learning_rate": 9.814620859382687e-06, "loss": 1.1252, "step": 3155 }, { "epoch": 0.1143851255844297, "grad_norm": 2.244951746989253, "learning_rate": 9.814462489261926e-06, "loss": 0.9179, "step": 3156 }, { "epoch": 0.11442136928708636, "grad_norm": 2.9442379352858956, "learning_rate": 9.814304052800788e-06, "loss": 1.1202, "step": 3157 }, { "epoch": 0.11445761298974304, "grad_norm": 2.450578936929257, "learning_rate": 9.814145550001447e-06, "loss": 1.0126, "step": 3158 }, { "epoch": 0.1144938566923997, "grad_norm": 2.3964270907670002, "learning_rate": 9.813986980866094e-06, "loss": 0.9477, "step": 3159 }, { "epoch": 0.11453010039505636, "grad_norm": 2.395488312737252, "learning_rate": 9.813828345396913e-06, "loss": 1.0541, "step": 3160 }, { "epoch": 0.11456634409771302, "grad_norm": 2.3704946041848527, "learning_rate": 9.813669643596085e-06, "loss": 0.8046, "step": 3161 }, { "epoch": 0.11460258780036968, "grad_norm": 2.4759575193081163, "learning_rate": 9.813510875465803e-06, "loss": 1.1319, "step": 3162 }, { "epoch": 0.11463883150302635, "grad_norm": 2.2304020083298686, "learning_rate": 9.813352041008252e-06, "loss": 1.0511, "step": 3163 }, { "epoch": 0.11467507520568301, "grad_norm": 2.4178139471317124, "learning_rate": 9.81319314022562e-06, "loss": 0.9429, "step": 3164 }, { "epoch": 0.11471131890833967, "grad_norm": 2.6682565231745983, "learning_rate": 9.8130341731201e-06, "loss": 0.9826, "step": 3165 }, { "epoch": 0.11474756261099633, "grad_norm": 2.5548954328177933, "learning_rate": 9.812875139693875e-06, "loss": 1.1758, "step": 3166 }, { "epoch": 0.11478380631365301, "grad_norm": 2.4892298380738467, "learning_rate": 9.812716039949146e-06, "loss": 1.0864, "step": 3167 }, { "epoch": 0.11482005001630967, "grad_norm": 2.5131916668082837, "learning_rate": 9.8125568738881e-06, "loss": 0.9583, "step": 3168 }, { "epoch": 0.11485629371896633, "grad_norm": 2.194943881651093, "learning_rate": 9.81239764151293e-06, "loss": 0.9827, "step": 3169 }, { "epoch": 0.11489253742162299, "grad_norm": 2.683832294220626, "learning_rate": 9.812238342825831e-06, "loss": 0.9916, "step": 3170 }, { "epoch": 0.11492878112427965, "grad_norm": 2.219407629087036, "learning_rate": 9.812078977828995e-06, "loss": 1.1171, "step": 3171 }, { "epoch": 0.11496502482693632, "grad_norm": 2.4231624065997415, "learning_rate": 9.811919546524625e-06, "loss": 0.8484, "step": 3172 }, { "epoch": 0.11500126852959298, "grad_norm": 2.425492074900525, "learning_rate": 9.811760048914913e-06, "loss": 1.0251, "step": 3173 }, { "epoch": 0.11503751223224964, "grad_norm": 2.318298497072729, "learning_rate": 9.811600485002058e-06, "loss": 1.0225, "step": 3174 }, { "epoch": 0.1150737559349063, "grad_norm": 2.877860534562318, "learning_rate": 9.811440854788257e-06, "loss": 1.0672, "step": 3175 }, { "epoch": 0.11510999963756298, "grad_norm": 2.303878939687597, "learning_rate": 9.811281158275711e-06, "loss": 0.9396, "step": 3176 }, { "epoch": 0.11514624334021964, "grad_norm": 2.657495583926983, "learning_rate": 9.811121395466621e-06, "loss": 1.2208, "step": 3177 }, { "epoch": 0.1151824870428763, "grad_norm": 2.528498738593935, "learning_rate": 9.810961566363188e-06, "loss": 1.0008, "step": 3178 }, { "epoch": 0.11521873074553296, "grad_norm": 2.612319153949046, "learning_rate": 9.810801670967614e-06, "loss": 1.1327, "step": 3179 }, { "epoch": 0.11525497444818962, "grad_norm": 2.894847078462113, "learning_rate": 9.810641709282101e-06, "loss": 0.9769, "step": 3180 }, { "epoch": 0.1152912181508463, "grad_norm": 2.8090829655495604, "learning_rate": 9.810481681308857e-06, "loss": 1.0, "step": 3181 }, { "epoch": 0.11532746185350295, "grad_norm": 2.6816390703281874, "learning_rate": 9.810321587050083e-06, "loss": 0.9222, "step": 3182 }, { "epoch": 0.11536370555615962, "grad_norm": 2.332581688840255, "learning_rate": 9.810161426507987e-06, "loss": 0.8174, "step": 3183 }, { "epoch": 0.11539994925881628, "grad_norm": 2.324345318934738, "learning_rate": 9.810001199684775e-06, "loss": 0.9419, "step": 3184 }, { "epoch": 0.11543619296147295, "grad_norm": 2.419201676684401, "learning_rate": 9.809840906582657e-06, "loss": 1.0568, "step": 3185 }, { "epoch": 0.11547243666412961, "grad_norm": 2.3674892946357646, "learning_rate": 9.809680547203839e-06, "loss": 0.9146, "step": 3186 }, { "epoch": 0.11550868036678627, "grad_norm": 2.435765730804536, "learning_rate": 9.809520121550534e-06, "loss": 1.0209, "step": 3187 }, { "epoch": 0.11554492406944293, "grad_norm": 2.7913671922460357, "learning_rate": 9.809359629624949e-06, "loss": 1.1394, "step": 3188 }, { "epoch": 0.11558116777209959, "grad_norm": 2.4912604514308394, "learning_rate": 9.809199071429296e-06, "loss": 1.0387, "step": 3189 }, { "epoch": 0.11561741147475627, "grad_norm": 2.5711436458750625, "learning_rate": 9.809038446965789e-06, "loss": 0.9039, "step": 3190 }, { "epoch": 0.11565365517741293, "grad_norm": 2.4003985926729334, "learning_rate": 9.80887775623664e-06, "loss": 0.7591, "step": 3191 }, { "epoch": 0.11568989888006959, "grad_norm": 2.700279300580718, "learning_rate": 9.808716999244065e-06, "loss": 0.9223, "step": 3192 }, { "epoch": 0.11572614258272625, "grad_norm": 2.6663075665659646, "learning_rate": 9.808556175990278e-06, "loss": 0.9207, "step": 3193 }, { "epoch": 0.11576238628538292, "grad_norm": 2.5509801631185836, "learning_rate": 9.808395286477494e-06, "loss": 1.0337, "step": 3194 }, { "epoch": 0.11579862998803958, "grad_norm": 2.2909505240489785, "learning_rate": 9.80823433070793e-06, "loss": 0.8858, "step": 3195 }, { "epoch": 0.11583487369069624, "grad_norm": 2.574664052202455, "learning_rate": 9.808073308683808e-06, "loss": 1.1211, "step": 3196 }, { "epoch": 0.1158711173933529, "grad_norm": 2.3511399766962144, "learning_rate": 9.807912220407342e-06, "loss": 1.0709, "step": 3197 }, { "epoch": 0.11590736109600956, "grad_norm": 2.6265081247652917, "learning_rate": 9.807751065880753e-06, "loss": 0.9614, "step": 3198 }, { "epoch": 0.11594360479866624, "grad_norm": 2.5046117554790315, "learning_rate": 9.807589845106263e-06, "loss": 1.0273, "step": 3199 }, { "epoch": 0.1159798485013229, "grad_norm": 2.4581300800676735, "learning_rate": 9.807428558086092e-06, "loss": 0.9842, "step": 3200 }, { "epoch": 0.11601609220397956, "grad_norm": 2.6209308179296467, "learning_rate": 9.807267204822462e-06, "loss": 1.0262, "step": 3201 }, { "epoch": 0.11605233590663622, "grad_norm": 2.866121422523838, "learning_rate": 9.807105785317598e-06, "loss": 1.1147, "step": 3202 }, { "epoch": 0.11608857960929289, "grad_norm": 2.5706491635517312, "learning_rate": 9.806944299573723e-06, "loss": 1.0997, "step": 3203 }, { "epoch": 0.11612482331194955, "grad_norm": 2.3782715926809828, "learning_rate": 9.806782747593063e-06, "loss": 1.1113, "step": 3204 }, { "epoch": 0.11616106701460621, "grad_norm": 2.473975101621384, "learning_rate": 9.806621129377845e-06, "loss": 0.9096, "step": 3205 }, { "epoch": 0.11619731071726287, "grad_norm": 2.259216333069971, "learning_rate": 9.806459444930293e-06, "loss": 0.9387, "step": 3206 }, { "epoch": 0.11623355441991953, "grad_norm": 2.2973532546590683, "learning_rate": 9.806297694252639e-06, "loss": 0.9522, "step": 3207 }, { "epoch": 0.1162697981225762, "grad_norm": 2.3773667346066842, "learning_rate": 9.806135877347106e-06, "loss": 1.0671, "step": 3208 }, { "epoch": 0.11630604182523287, "grad_norm": 2.261821299665119, "learning_rate": 9.80597399421593e-06, "loss": 0.9978, "step": 3209 }, { "epoch": 0.11634228552788953, "grad_norm": 2.6424686959484314, "learning_rate": 9.805812044861338e-06, "loss": 1.1439, "step": 3210 }, { "epoch": 0.11637852923054619, "grad_norm": 2.7517050281756594, "learning_rate": 9.805650029285563e-06, "loss": 1.1148, "step": 3211 }, { "epoch": 0.11641477293320286, "grad_norm": 2.237569915314343, "learning_rate": 9.805487947490835e-06, "loss": 1.0011, "step": 3212 }, { "epoch": 0.11645101663585952, "grad_norm": 2.690598033843307, "learning_rate": 9.805325799479392e-06, "loss": 0.9582, "step": 3213 }, { "epoch": 0.11648726033851618, "grad_norm": 2.361447925033729, "learning_rate": 9.805163585253464e-06, "loss": 1.1367, "step": 3214 }, { "epoch": 0.11652350404117284, "grad_norm": 2.107071730482869, "learning_rate": 9.805001304815288e-06, "loss": 1.1128, "step": 3215 }, { "epoch": 0.1165597477438295, "grad_norm": 2.3719949561163105, "learning_rate": 9.8048389581671e-06, "loss": 0.9087, "step": 3216 }, { "epoch": 0.11659599144648618, "grad_norm": 2.605647407536304, "learning_rate": 9.804676545311138e-06, "loss": 1.0238, "step": 3217 }, { "epoch": 0.11663223514914284, "grad_norm": 2.2246022247496526, "learning_rate": 9.804514066249639e-06, "loss": 0.8897, "step": 3218 }, { "epoch": 0.1166684788517995, "grad_norm": 2.8303453885630607, "learning_rate": 9.804351520984841e-06, "loss": 0.8915, "step": 3219 }, { "epoch": 0.11670472255445616, "grad_norm": 2.4155258597446663, "learning_rate": 9.804188909518985e-06, "loss": 1.0445, "step": 3220 }, { "epoch": 0.11674096625711283, "grad_norm": 2.575740568778207, "learning_rate": 9.804026231854311e-06, "loss": 1.2572, "step": 3221 }, { "epoch": 0.11677720995976949, "grad_norm": 2.465561805153875, "learning_rate": 9.803863487993062e-06, "loss": 1.0546, "step": 3222 }, { "epoch": 0.11681345366242615, "grad_norm": 2.448642851339055, "learning_rate": 9.803700677937477e-06, "loss": 1.0891, "step": 3223 }, { "epoch": 0.11684969736508281, "grad_norm": 2.750438831098924, "learning_rate": 9.803537801689805e-06, "loss": 0.9243, "step": 3224 }, { "epoch": 0.11688594106773947, "grad_norm": 2.586514228302436, "learning_rate": 9.803374859252284e-06, "loss": 0.7377, "step": 3225 }, { "epoch": 0.11692218477039615, "grad_norm": 2.56456677447679, "learning_rate": 9.803211850627165e-06, "loss": 0.9373, "step": 3226 }, { "epoch": 0.11695842847305281, "grad_norm": 2.5299214126815226, "learning_rate": 9.803048775816691e-06, "loss": 1.0836, "step": 3227 }, { "epoch": 0.11699467217570947, "grad_norm": 2.7423882038354477, "learning_rate": 9.80288563482311e-06, "loss": 0.9173, "step": 3228 }, { "epoch": 0.11703091587836613, "grad_norm": 2.6604133158643726, "learning_rate": 9.802722427648668e-06, "loss": 1.0237, "step": 3229 }, { "epoch": 0.1170671595810228, "grad_norm": 3.350060735510157, "learning_rate": 9.802559154295616e-06, "loss": 0.9581, "step": 3230 }, { "epoch": 0.11710340328367946, "grad_norm": 2.456429111829794, "learning_rate": 9.802395814766204e-06, "loss": 0.9186, "step": 3231 }, { "epoch": 0.11713964698633612, "grad_norm": 2.5053859699485477, "learning_rate": 9.802232409062681e-06, "loss": 0.914, "step": 3232 }, { "epoch": 0.11717589068899278, "grad_norm": 2.343329486155213, "learning_rate": 9.802068937187302e-06, "loss": 0.9841, "step": 3233 }, { "epoch": 0.11721213439164944, "grad_norm": 2.3606810419573376, "learning_rate": 9.801905399142315e-06, "loss": 0.9465, "step": 3234 }, { "epoch": 0.11724837809430612, "grad_norm": 2.432756227425197, "learning_rate": 9.801741794929977e-06, "loss": 1.0526, "step": 3235 }, { "epoch": 0.11728462179696278, "grad_norm": 2.3816424818346773, "learning_rate": 9.80157812455254e-06, "loss": 0.9555, "step": 3236 }, { "epoch": 0.11732086549961944, "grad_norm": 2.540513197832313, "learning_rate": 9.80141438801226e-06, "loss": 0.9969, "step": 3237 }, { "epoch": 0.1173571092022761, "grad_norm": 2.6555090517586777, "learning_rate": 9.801250585311395e-06, "loss": 1.1222, "step": 3238 }, { "epoch": 0.11739335290493277, "grad_norm": 2.6213197354331412, "learning_rate": 9.8010867164522e-06, "loss": 0.9553, "step": 3239 }, { "epoch": 0.11742959660758943, "grad_norm": 2.470112549851542, "learning_rate": 9.800922781436935e-06, "loss": 0.9952, "step": 3240 }, { "epoch": 0.1174658403102461, "grad_norm": 2.398328252512517, "learning_rate": 9.800758780267855e-06, "loss": 0.975, "step": 3241 }, { "epoch": 0.11750208401290275, "grad_norm": 2.154999372229205, "learning_rate": 9.800594712947224e-06, "loss": 0.8554, "step": 3242 }, { "epoch": 0.11753832771555942, "grad_norm": 2.6365840982072104, "learning_rate": 9.8004305794773e-06, "loss": 0.9585, "step": 3243 }, { "epoch": 0.11757457141821609, "grad_norm": 2.317962089454698, "learning_rate": 9.800266379860348e-06, "loss": 0.9183, "step": 3244 }, { "epoch": 0.11761081512087275, "grad_norm": 2.356824260962341, "learning_rate": 9.800102114098626e-06, "loss": 0.9954, "step": 3245 }, { "epoch": 0.11764705882352941, "grad_norm": 2.313304587511923, "learning_rate": 9.799937782194402e-06, "loss": 0.9879, "step": 3246 }, { "epoch": 0.11768330252618607, "grad_norm": 2.3781860170176947, "learning_rate": 9.799773384149936e-06, "loss": 0.8905, "step": 3247 }, { "epoch": 0.11771954622884274, "grad_norm": 2.6111401524462736, "learning_rate": 9.799608919967499e-06, "loss": 0.9226, "step": 3248 }, { "epoch": 0.1177557899314994, "grad_norm": 2.527331249309803, "learning_rate": 9.79944438964935e-06, "loss": 1.1114, "step": 3249 }, { "epoch": 0.11779203363415607, "grad_norm": 2.5751165616564227, "learning_rate": 9.799279793197761e-06, "loss": 0.8738, "step": 3250 }, { "epoch": 0.11782827733681273, "grad_norm": 2.6034830371445112, "learning_rate": 9.799115130615e-06, "loss": 1.0946, "step": 3251 }, { "epoch": 0.11786452103946939, "grad_norm": 2.6397692769550476, "learning_rate": 9.798950401903334e-06, "loss": 0.9773, "step": 3252 }, { "epoch": 0.11790076474212606, "grad_norm": 2.4945306256262425, "learning_rate": 9.798785607065034e-06, "loss": 0.9851, "step": 3253 }, { "epoch": 0.11793700844478272, "grad_norm": 2.39605464283869, "learning_rate": 9.798620746102373e-06, "loss": 1.0508, "step": 3254 }, { "epoch": 0.11797325214743938, "grad_norm": 2.3725847157866893, "learning_rate": 9.798455819017617e-06, "loss": 0.9866, "step": 3255 }, { "epoch": 0.11800949585009604, "grad_norm": 2.3568884236775323, "learning_rate": 9.798290825813042e-06, "loss": 1.0173, "step": 3256 }, { "epoch": 0.11804573955275272, "grad_norm": 2.448654972653967, "learning_rate": 9.798125766490922e-06, "loss": 0.9323, "step": 3257 }, { "epoch": 0.11808198325540938, "grad_norm": 2.532231603940282, "learning_rate": 9.79796064105353e-06, "loss": 0.9232, "step": 3258 }, { "epoch": 0.11811822695806604, "grad_norm": 2.3994686268783947, "learning_rate": 9.797795449503144e-06, "loss": 1.0109, "step": 3259 }, { "epoch": 0.1181544706607227, "grad_norm": 2.475696070565335, "learning_rate": 9.797630191842037e-06, "loss": 1.1438, "step": 3260 }, { "epoch": 0.11819071436337936, "grad_norm": 2.4700395054177346, "learning_rate": 9.797464868072489e-06, "loss": 1.036, "step": 3261 }, { "epoch": 0.11822695806603603, "grad_norm": 2.2003550148597237, "learning_rate": 9.797299478196773e-06, "loss": 0.9079, "step": 3262 }, { "epoch": 0.11826320176869269, "grad_norm": 2.765315486264649, "learning_rate": 9.797134022217174e-06, "loss": 0.9451, "step": 3263 }, { "epoch": 0.11829944547134935, "grad_norm": 2.512222811802396, "learning_rate": 9.796968500135971e-06, "loss": 0.9134, "step": 3264 }, { "epoch": 0.11833568917400601, "grad_norm": 2.3885755624358533, "learning_rate": 9.79680291195544e-06, "loss": 0.9718, "step": 3265 }, { "epoch": 0.11837193287666269, "grad_norm": 2.1687457513866115, "learning_rate": 9.796637257677868e-06, "loss": 1.0816, "step": 3266 }, { "epoch": 0.11840817657931935, "grad_norm": 2.556361410707698, "learning_rate": 9.796471537305534e-06, "loss": 0.9957, "step": 3267 }, { "epoch": 0.118444420281976, "grad_norm": 2.1994297790569757, "learning_rate": 9.796305750840725e-06, "loss": 0.935, "step": 3268 }, { "epoch": 0.11848066398463267, "grad_norm": 2.6304629535717865, "learning_rate": 9.796139898285721e-06, "loss": 1.3359, "step": 3269 }, { "epoch": 0.11851690768728933, "grad_norm": 2.0432301868734615, "learning_rate": 9.795973979642811e-06, "loss": 0.8807, "step": 3270 }, { "epoch": 0.118553151389946, "grad_norm": 2.3043556487039587, "learning_rate": 9.795807994914281e-06, "loss": 0.9511, "step": 3271 }, { "epoch": 0.11858939509260266, "grad_norm": 2.3745540672445364, "learning_rate": 9.795641944102415e-06, "loss": 1.0521, "step": 3272 }, { "epoch": 0.11862563879525932, "grad_norm": 2.34690824822032, "learning_rate": 9.795475827209504e-06, "loss": 1.0118, "step": 3273 }, { "epoch": 0.11866188249791598, "grad_norm": 2.3561102353979275, "learning_rate": 9.795309644237837e-06, "loss": 0.8889, "step": 3274 }, { "epoch": 0.11869812620057266, "grad_norm": 2.434221758044916, "learning_rate": 9.795143395189704e-06, "loss": 0.9857, "step": 3275 }, { "epoch": 0.11873436990322932, "grad_norm": 2.532387968651236, "learning_rate": 9.794977080067394e-06, "loss": 0.9516, "step": 3276 }, { "epoch": 0.11877061360588598, "grad_norm": 2.5287285591333206, "learning_rate": 9.7948106988732e-06, "loss": 0.9427, "step": 3277 }, { "epoch": 0.11880685730854264, "grad_norm": 2.472199561098872, "learning_rate": 9.794644251609416e-06, "loss": 1.1779, "step": 3278 }, { "epoch": 0.1188431010111993, "grad_norm": 2.343463113815774, "learning_rate": 9.794477738278331e-06, "loss": 1.05, "step": 3279 }, { "epoch": 0.11887934471385597, "grad_norm": 2.5703374937877412, "learning_rate": 9.794311158882245e-06, "loss": 1.098, "step": 3280 }, { "epoch": 0.11891558841651263, "grad_norm": 2.4984979622151684, "learning_rate": 9.794144513423448e-06, "loss": 1.0692, "step": 3281 }, { "epoch": 0.11895183211916929, "grad_norm": 2.8083908827688804, "learning_rate": 9.793977801904242e-06, "loss": 0.9711, "step": 3282 }, { "epoch": 0.11898807582182595, "grad_norm": 2.3001883497682223, "learning_rate": 9.793811024326918e-06, "loss": 1.0303, "step": 3283 }, { "epoch": 0.11902431952448263, "grad_norm": 2.2381432248650466, "learning_rate": 9.793644180693779e-06, "loss": 1.0293, "step": 3284 }, { "epoch": 0.11906056322713929, "grad_norm": 2.6575972110234924, "learning_rate": 9.793477271007122e-06, "loss": 1.0533, "step": 3285 }, { "epoch": 0.11909680692979595, "grad_norm": 2.54546138193916, "learning_rate": 9.793310295269247e-06, "loss": 0.9306, "step": 3286 }, { "epoch": 0.11913305063245261, "grad_norm": 2.8399469111937035, "learning_rate": 9.793143253482455e-06, "loss": 1.0106, "step": 3287 }, { "epoch": 0.11916929433510927, "grad_norm": 2.7086276062440375, "learning_rate": 9.792976145649049e-06, "loss": 0.8778, "step": 3288 }, { "epoch": 0.11920553803776594, "grad_norm": 2.3670488851586504, "learning_rate": 9.792808971771331e-06, "loss": 0.9232, "step": 3289 }, { "epoch": 0.1192417817404226, "grad_norm": 2.752161684749589, "learning_rate": 9.792641731851602e-06, "loss": 0.8754, "step": 3290 }, { "epoch": 0.11927802544307926, "grad_norm": 2.619924508524329, "learning_rate": 9.792474425892168e-06, "loss": 0.8639, "step": 3291 }, { "epoch": 0.11931426914573592, "grad_norm": 2.4592629653974742, "learning_rate": 9.792307053895336e-06, "loss": 0.9306, "step": 3292 }, { "epoch": 0.1193505128483926, "grad_norm": 2.6447027128251848, "learning_rate": 9.792139615863411e-06, "loss": 1.0492, "step": 3293 }, { "epoch": 0.11938675655104926, "grad_norm": 2.3268992275311144, "learning_rate": 9.7919721117987e-06, "loss": 0.9339, "step": 3294 }, { "epoch": 0.11942300025370592, "grad_norm": 2.4651804358891267, "learning_rate": 9.791804541703512e-06, "loss": 0.9016, "step": 3295 }, { "epoch": 0.11945924395636258, "grad_norm": 2.4373315694822923, "learning_rate": 9.791636905580154e-06, "loss": 0.9379, "step": 3296 }, { "epoch": 0.11949548765901924, "grad_norm": 2.3449945140643598, "learning_rate": 9.791469203430939e-06, "loss": 0.8922, "step": 3297 }, { "epoch": 0.11953173136167591, "grad_norm": 2.288720171460877, "learning_rate": 9.791301435258176e-06, "loss": 0.9452, "step": 3298 }, { "epoch": 0.11956797506433257, "grad_norm": 2.3751228409949037, "learning_rate": 9.791133601064175e-06, "loss": 0.9576, "step": 3299 }, { "epoch": 0.11960421876698923, "grad_norm": 2.5198910778794823, "learning_rate": 9.790965700851253e-06, "loss": 1.0922, "step": 3300 }, { "epoch": 0.1196404624696459, "grad_norm": 2.591733975462382, "learning_rate": 9.79079773462172e-06, "loss": 1.0155, "step": 3301 }, { "epoch": 0.11967670617230257, "grad_norm": 2.4495699661246, "learning_rate": 9.790629702377892e-06, "loss": 0.8938, "step": 3302 }, { "epoch": 0.11971294987495923, "grad_norm": 2.488733656184867, "learning_rate": 9.790461604122084e-06, "loss": 0.9685, "step": 3303 }, { "epoch": 0.11974919357761589, "grad_norm": 2.4180647317526107, "learning_rate": 9.790293439856611e-06, "loss": 1.0875, "step": 3304 }, { "epoch": 0.11978543728027255, "grad_norm": 2.2658430632891107, "learning_rate": 9.790125209583791e-06, "loss": 0.9542, "step": 3305 }, { "epoch": 0.11982168098292921, "grad_norm": 2.3130350994560387, "learning_rate": 9.789956913305944e-06, "loss": 0.8524, "step": 3306 }, { "epoch": 0.11985792468558588, "grad_norm": 2.418285977331069, "learning_rate": 9.78978855102539e-06, "loss": 1.0142, "step": 3307 }, { "epoch": 0.11989416838824254, "grad_norm": 2.311238407732837, "learning_rate": 9.789620122744444e-06, "loss": 0.9656, "step": 3308 }, { "epoch": 0.1199304120908992, "grad_norm": 2.665704669009442, "learning_rate": 9.789451628465429e-06, "loss": 1.0167, "step": 3309 }, { "epoch": 0.11996665579355587, "grad_norm": 2.2455803588747294, "learning_rate": 9.789283068190667e-06, "loss": 1.1409, "step": 3310 }, { "epoch": 0.12000289949621254, "grad_norm": 2.39150023892238, "learning_rate": 9.78911444192248e-06, "loss": 1.0193, "step": 3311 }, { "epoch": 0.1200391431988692, "grad_norm": 2.415424945925963, "learning_rate": 9.788945749663195e-06, "loss": 0.9289, "step": 3312 }, { "epoch": 0.12007538690152586, "grad_norm": 2.092458456156085, "learning_rate": 9.788776991415132e-06, "loss": 0.7234, "step": 3313 }, { "epoch": 0.12011163060418252, "grad_norm": 2.6601429925640674, "learning_rate": 9.78860816718062e-06, "loss": 1.017, "step": 3314 }, { "epoch": 0.12014787430683918, "grad_norm": 2.31754998083001, "learning_rate": 9.788439276961982e-06, "loss": 0.9171, "step": 3315 }, { "epoch": 0.12018411800949585, "grad_norm": 2.8242325501072276, "learning_rate": 9.788270320761545e-06, "loss": 1.0421, "step": 3316 }, { "epoch": 0.12022036171215252, "grad_norm": 2.651510454273088, "learning_rate": 9.788101298581643e-06, "loss": 0.9983, "step": 3317 }, { "epoch": 0.12025660541480918, "grad_norm": 2.5120386478921155, "learning_rate": 9.787932210424598e-06, "loss": 1.0989, "step": 3318 }, { "epoch": 0.12029284911746584, "grad_norm": 2.448979029217924, "learning_rate": 9.787763056292744e-06, "loss": 1.1012, "step": 3319 }, { "epoch": 0.12032909282012251, "grad_norm": 2.66645021919756, "learning_rate": 9.78759383618841e-06, "loss": 1.0545, "step": 3320 }, { "epoch": 0.12036533652277917, "grad_norm": 2.46256243396999, "learning_rate": 9.78742455011393e-06, "loss": 0.9761, "step": 3321 }, { "epoch": 0.12040158022543583, "grad_norm": 2.4587795809637716, "learning_rate": 9.787255198071632e-06, "loss": 0.7995, "step": 3322 }, { "epoch": 0.12043782392809249, "grad_norm": 2.19023441152803, "learning_rate": 9.787085780063855e-06, "loss": 1.1052, "step": 3323 }, { "epoch": 0.12047406763074915, "grad_norm": 2.3938312589910122, "learning_rate": 9.78691629609293e-06, "loss": 0.8048, "step": 3324 }, { "epoch": 0.12051031133340583, "grad_norm": 2.410742425358353, "learning_rate": 9.786746746161193e-06, "loss": 0.9092, "step": 3325 }, { "epoch": 0.12054655503606249, "grad_norm": 2.3052676722555474, "learning_rate": 9.786577130270981e-06, "loss": 0.9128, "step": 3326 }, { "epoch": 0.12058279873871915, "grad_norm": 2.402340241939197, "learning_rate": 9.786407448424632e-06, "loss": 0.9062, "step": 3327 }, { "epoch": 0.1206190424413758, "grad_norm": 2.6767825381019694, "learning_rate": 9.786237700624484e-06, "loss": 1.158, "step": 3328 }, { "epoch": 0.12065528614403248, "grad_norm": 2.6975072965954494, "learning_rate": 9.786067886872872e-06, "loss": 1.1048, "step": 3329 }, { "epoch": 0.12069152984668914, "grad_norm": 2.645906950322652, "learning_rate": 9.785898007172142e-06, "loss": 0.9537, "step": 3330 }, { "epoch": 0.1207277735493458, "grad_norm": 2.7605376231012597, "learning_rate": 9.78572806152463e-06, "loss": 0.9867, "step": 3331 }, { "epoch": 0.12076401725200246, "grad_norm": 2.6849173945238283, "learning_rate": 9.78555804993268e-06, "loss": 0.9858, "step": 3332 }, { "epoch": 0.12080026095465912, "grad_norm": 2.9626876197845156, "learning_rate": 9.785387972398635e-06, "loss": 0.9898, "step": 3333 }, { "epoch": 0.1208365046573158, "grad_norm": 2.5879494074110445, "learning_rate": 9.785217828924837e-06, "loss": 1.0902, "step": 3334 }, { "epoch": 0.12087274835997246, "grad_norm": 2.2528684009093434, "learning_rate": 9.785047619513632e-06, "loss": 0.7842, "step": 3335 }, { "epoch": 0.12090899206262912, "grad_norm": 2.4621536957744667, "learning_rate": 9.784877344167364e-06, "loss": 0.9383, "step": 3336 }, { "epoch": 0.12094523576528578, "grad_norm": 2.5791338844963394, "learning_rate": 9.784707002888381e-06, "loss": 1.1081, "step": 3337 }, { "epoch": 0.12098147946794245, "grad_norm": 2.8543582131338296, "learning_rate": 9.784536595679028e-06, "loss": 1.0934, "step": 3338 }, { "epoch": 0.12101772317059911, "grad_norm": 2.272707370449864, "learning_rate": 9.784366122541655e-06, "loss": 0.919, "step": 3339 }, { "epoch": 0.12105396687325577, "grad_norm": 2.44388695152838, "learning_rate": 9.78419558347861e-06, "loss": 1.0576, "step": 3340 }, { "epoch": 0.12109021057591243, "grad_norm": 2.465032499974194, "learning_rate": 9.784024978492244e-06, "loss": 0.9427, "step": 3341 }, { "epoch": 0.12112645427856909, "grad_norm": 2.6293997721812654, "learning_rate": 9.783854307584906e-06, "loss": 1.1165, "step": 3342 }, { "epoch": 0.12116269798122577, "grad_norm": 2.3696057597665137, "learning_rate": 9.78368357075895e-06, "loss": 1.013, "step": 3343 }, { "epoch": 0.12119894168388243, "grad_norm": 2.794777687407408, "learning_rate": 9.783512768016725e-06, "loss": 1.0072, "step": 3344 }, { "epoch": 0.12123518538653909, "grad_norm": 2.6600638458233457, "learning_rate": 9.78334189936059e-06, "loss": 0.9774, "step": 3345 }, { "epoch": 0.12127142908919575, "grad_norm": 2.5152267809827236, "learning_rate": 9.783170964792895e-06, "loss": 0.8295, "step": 3346 }, { "epoch": 0.12130767279185242, "grad_norm": 2.501917637031212, "learning_rate": 9.782999964315997e-06, "loss": 1.0984, "step": 3347 }, { "epoch": 0.12134391649450908, "grad_norm": 2.3451276892923847, "learning_rate": 9.782828897932252e-06, "loss": 0.9197, "step": 3348 }, { "epoch": 0.12138016019716574, "grad_norm": 2.7374578131355363, "learning_rate": 9.782657765644019e-06, "loss": 1.1294, "step": 3349 }, { "epoch": 0.1214164038998224, "grad_norm": 2.5310643560287094, "learning_rate": 9.782486567453653e-06, "loss": 0.9272, "step": 3350 }, { "epoch": 0.12145264760247906, "grad_norm": 2.595981886457358, "learning_rate": 9.782315303363514e-06, "loss": 1.0396, "step": 3351 }, { "epoch": 0.12148889130513574, "grad_norm": 2.2190895182536607, "learning_rate": 9.782143973375963e-06, "loss": 0.9443, "step": 3352 }, { "epoch": 0.1215251350077924, "grad_norm": 2.3443264061154485, "learning_rate": 9.781972577493362e-06, "loss": 1.0484, "step": 3353 }, { "epoch": 0.12156137871044906, "grad_norm": 2.5893090701759722, "learning_rate": 9.781801115718068e-06, "loss": 1.0695, "step": 3354 }, { "epoch": 0.12159762241310572, "grad_norm": 2.5323046194924053, "learning_rate": 9.781629588052448e-06, "loss": 0.8801, "step": 3355 }, { "epoch": 0.12163386611576239, "grad_norm": 2.616035286351726, "learning_rate": 9.781457994498864e-06, "loss": 0.9827, "step": 3356 }, { "epoch": 0.12167010981841905, "grad_norm": 2.417559061286531, "learning_rate": 9.78128633505968e-06, "loss": 0.8974, "step": 3357 }, { "epoch": 0.12170635352107571, "grad_norm": 2.230990278699807, "learning_rate": 9.781114609737262e-06, "loss": 0.8022, "step": 3358 }, { "epoch": 0.12174259722373237, "grad_norm": 2.4464954294610988, "learning_rate": 9.780942818533976e-06, "loss": 0.95, "step": 3359 }, { "epoch": 0.12177884092638903, "grad_norm": 2.2604941521215562, "learning_rate": 9.78077096145219e-06, "loss": 0.9351, "step": 3360 }, { "epoch": 0.12181508462904571, "grad_norm": 2.5166545240030596, "learning_rate": 9.780599038494271e-06, "loss": 0.9151, "step": 3361 }, { "epoch": 0.12185132833170237, "grad_norm": 2.3817805450170373, "learning_rate": 9.780427049662591e-06, "loss": 1.0679, "step": 3362 }, { "epoch": 0.12188757203435903, "grad_norm": 2.401797987040334, "learning_rate": 9.780254994959516e-06, "loss": 1.0928, "step": 3363 }, { "epoch": 0.12192381573701569, "grad_norm": 2.2169959462786313, "learning_rate": 9.780082874387416e-06, "loss": 0.8716, "step": 3364 }, { "epoch": 0.12196005943967236, "grad_norm": 2.476372228839054, "learning_rate": 9.779910687948666e-06, "loss": 1.0018, "step": 3365 }, { "epoch": 0.12199630314232902, "grad_norm": 2.240164745842011, "learning_rate": 9.779738435645638e-06, "loss": 0.8901, "step": 3366 }, { "epoch": 0.12203254684498568, "grad_norm": 2.295736097276923, "learning_rate": 9.779566117480704e-06, "loss": 1.1487, "step": 3367 }, { "epoch": 0.12206879054764234, "grad_norm": 2.340931273043393, "learning_rate": 9.779393733456242e-06, "loss": 1.1788, "step": 3368 }, { "epoch": 0.122105034250299, "grad_norm": 2.6259994996820746, "learning_rate": 9.779221283574622e-06, "loss": 1.2814, "step": 3369 }, { "epoch": 0.12214127795295568, "grad_norm": 2.8822945513019156, "learning_rate": 9.779048767838223e-06, "loss": 1.0405, "step": 3370 }, { "epoch": 0.12217752165561234, "grad_norm": 2.573202156400143, "learning_rate": 9.778876186249424e-06, "loss": 0.9907, "step": 3371 }, { "epoch": 0.122213765358269, "grad_norm": 2.3900406037862467, "learning_rate": 9.7787035388106e-06, "loss": 0.961, "step": 3372 }, { "epoch": 0.12225000906092566, "grad_norm": 2.453185708253859, "learning_rate": 9.778530825524135e-06, "loss": 0.9402, "step": 3373 }, { "epoch": 0.12228625276358233, "grad_norm": 2.5978602424937844, "learning_rate": 9.7783580463924e-06, "loss": 1.003, "step": 3374 }, { "epoch": 0.122322496466239, "grad_norm": 2.5936271843626395, "learning_rate": 9.778185201417785e-06, "loss": 1.0098, "step": 3375 }, { "epoch": 0.12235874016889565, "grad_norm": 2.546662004635534, "learning_rate": 9.778012290602665e-06, "loss": 1.1585, "step": 3376 }, { "epoch": 0.12239498387155232, "grad_norm": 2.2554665010544244, "learning_rate": 9.777839313949426e-06, "loss": 0.8897, "step": 3377 }, { "epoch": 0.12243122757420898, "grad_norm": 2.4711851388721082, "learning_rate": 9.777666271460453e-06, "loss": 1.0194, "step": 3378 }, { "epoch": 0.12246747127686565, "grad_norm": 2.5423499364544577, "learning_rate": 9.777493163138126e-06, "loss": 0.9371, "step": 3379 }, { "epoch": 0.12250371497952231, "grad_norm": 2.6543520048651588, "learning_rate": 9.777319988984833e-06, "loss": 1.0202, "step": 3380 }, { "epoch": 0.12253995868217897, "grad_norm": 2.678846159134499, "learning_rate": 9.777146749002959e-06, "loss": 1.0587, "step": 3381 }, { "epoch": 0.12257620238483563, "grad_norm": 2.48475103370259, "learning_rate": 9.776973443194893e-06, "loss": 1.0993, "step": 3382 }, { "epoch": 0.1226124460874923, "grad_norm": 2.6120890814485014, "learning_rate": 9.776800071563021e-06, "loss": 0.8307, "step": 3383 }, { "epoch": 0.12264868979014897, "grad_norm": 2.3703355198176954, "learning_rate": 9.776626634109734e-06, "loss": 0.8447, "step": 3384 }, { "epoch": 0.12268493349280563, "grad_norm": 2.053582542392546, "learning_rate": 9.77645313083742e-06, "loss": 1.0883, "step": 3385 }, { "epoch": 0.12272117719546229, "grad_norm": 2.557218030277873, "learning_rate": 9.77627956174847e-06, "loss": 0.9581, "step": 3386 }, { "epoch": 0.12275742089811895, "grad_norm": 2.6587608302626324, "learning_rate": 9.776105926845277e-06, "loss": 0.9711, "step": 3387 }, { "epoch": 0.12279366460077562, "grad_norm": 2.2734699910217038, "learning_rate": 9.775932226130234e-06, "loss": 0.8395, "step": 3388 }, { "epoch": 0.12282990830343228, "grad_norm": 2.3426633320522785, "learning_rate": 9.775758459605731e-06, "loss": 1.0486, "step": 3389 }, { "epoch": 0.12286615200608894, "grad_norm": 2.4887677313637213, "learning_rate": 9.775584627274165e-06, "loss": 1.0207, "step": 3390 }, { "epoch": 0.1229023957087456, "grad_norm": 2.523360254288466, "learning_rate": 9.775410729137933e-06, "loss": 1.0568, "step": 3391 }, { "epoch": 0.12293863941140228, "grad_norm": 2.1038570162969283, "learning_rate": 9.775236765199428e-06, "loss": 0.9891, "step": 3392 }, { "epoch": 0.12297488311405894, "grad_norm": 2.490123468035462, "learning_rate": 9.77506273546105e-06, "loss": 1.0414, "step": 3393 }, { "epoch": 0.1230111268167156, "grad_norm": 2.7359592316262638, "learning_rate": 9.774888639925193e-06, "loss": 1.0812, "step": 3394 }, { "epoch": 0.12304737051937226, "grad_norm": 2.3876314613123286, "learning_rate": 9.774714478594259e-06, "loss": 0.8741, "step": 3395 }, { "epoch": 0.12308361422202892, "grad_norm": 2.1620643021431767, "learning_rate": 9.774540251470649e-06, "loss": 1.0639, "step": 3396 }, { "epoch": 0.12311985792468559, "grad_norm": 2.4164872790843703, "learning_rate": 9.77436595855676e-06, "loss": 1.0304, "step": 3397 }, { "epoch": 0.12315610162734225, "grad_norm": 2.685815928663855, "learning_rate": 9.774191599854993e-06, "loss": 1.0435, "step": 3398 }, { "epoch": 0.12319234532999891, "grad_norm": 2.335849867826849, "learning_rate": 9.774017175367756e-06, "loss": 0.9111, "step": 3399 }, { "epoch": 0.12322858903265557, "grad_norm": 2.4451713732284217, "learning_rate": 9.77384268509745e-06, "loss": 0.8061, "step": 3400 }, { "epoch": 0.12326483273531225, "grad_norm": 2.3303497112935325, "learning_rate": 9.77366812904648e-06, "loss": 0.8055, "step": 3401 }, { "epoch": 0.1233010764379689, "grad_norm": 2.3660403384612443, "learning_rate": 9.773493507217247e-06, "loss": 1.0532, "step": 3402 }, { "epoch": 0.12333732014062557, "grad_norm": 2.2373103357640014, "learning_rate": 9.773318819612162e-06, "loss": 0.9434, "step": 3403 }, { "epoch": 0.12337356384328223, "grad_norm": 2.623466564417369, "learning_rate": 9.773144066233631e-06, "loss": 0.9879, "step": 3404 }, { "epoch": 0.12340980754593889, "grad_norm": 2.2695284208315054, "learning_rate": 9.77296924708406e-06, "loss": 0.9698, "step": 3405 }, { "epoch": 0.12344605124859556, "grad_norm": 2.169380385499687, "learning_rate": 9.772794362165861e-06, "loss": 0.7375, "step": 3406 }, { "epoch": 0.12348229495125222, "grad_norm": 2.606244784053737, "learning_rate": 9.77261941148144e-06, "loss": 0.8981, "step": 3407 }, { "epoch": 0.12351853865390888, "grad_norm": 2.3708748767066, "learning_rate": 9.772444395033212e-06, "loss": 0.9249, "step": 3408 }, { "epoch": 0.12355478235656554, "grad_norm": 2.78316394421294, "learning_rate": 9.772269312823585e-06, "loss": 0.9448, "step": 3409 }, { "epoch": 0.12359102605922222, "grad_norm": 2.1311169023794845, "learning_rate": 9.772094164854974e-06, "loss": 0.8354, "step": 3410 }, { "epoch": 0.12362726976187888, "grad_norm": 2.542927599989035, "learning_rate": 9.771918951129793e-06, "loss": 1.1393, "step": 3411 }, { "epoch": 0.12366351346453554, "grad_norm": 2.3950366737132613, "learning_rate": 9.771743671650453e-06, "loss": 0.9744, "step": 3412 }, { "epoch": 0.1236997571671922, "grad_norm": 2.2588198194647036, "learning_rate": 9.771568326419371e-06, "loss": 0.9934, "step": 3413 }, { "epoch": 0.12373600086984886, "grad_norm": 2.6167230642528403, "learning_rate": 9.771392915438963e-06, "loss": 0.8285, "step": 3414 }, { "epoch": 0.12377224457250553, "grad_norm": 2.787115754424881, "learning_rate": 9.771217438711647e-06, "loss": 0.9268, "step": 3415 }, { "epoch": 0.12380848827516219, "grad_norm": 2.4402399202629494, "learning_rate": 9.77104189623984e-06, "loss": 0.9378, "step": 3416 }, { "epoch": 0.12384473197781885, "grad_norm": 2.7262095967937086, "learning_rate": 9.770866288025961e-06, "loss": 1.0911, "step": 3417 }, { "epoch": 0.12388097568047551, "grad_norm": 2.848837657694325, "learning_rate": 9.77069061407243e-06, "loss": 1.1285, "step": 3418 }, { "epoch": 0.12391721938313217, "grad_norm": 2.4920945644322448, "learning_rate": 9.770514874381667e-06, "loss": 1.1136, "step": 3419 }, { "epoch": 0.12395346308578885, "grad_norm": 2.45994252501432, "learning_rate": 9.770339068956095e-06, "loss": 1.1414, "step": 3420 }, { "epoch": 0.12398970678844551, "grad_norm": 2.384773918020876, "learning_rate": 9.770163197798135e-06, "loss": 1.1215, "step": 3421 }, { "epoch": 0.12402595049110217, "grad_norm": 2.421567155595338, "learning_rate": 9.769987260910213e-06, "loss": 0.8531, "step": 3422 }, { "epoch": 0.12406219419375883, "grad_norm": 2.3316922092944408, "learning_rate": 9.769811258294748e-06, "loss": 1.1374, "step": 3423 }, { "epoch": 0.1240984378964155, "grad_norm": 2.182158248598343, "learning_rate": 9.76963518995417e-06, "loss": 1.0095, "step": 3424 }, { "epoch": 0.12413468159907216, "grad_norm": 2.6425966315356257, "learning_rate": 9.769459055890905e-06, "loss": 1.0702, "step": 3425 }, { "epoch": 0.12417092530172882, "grad_norm": 2.9613457951123077, "learning_rate": 9.769282856107376e-06, "loss": 0.9533, "step": 3426 }, { "epoch": 0.12420716900438548, "grad_norm": 2.459184615418399, "learning_rate": 9.769106590606017e-06, "loss": 0.9515, "step": 3427 }, { "epoch": 0.12424341270704214, "grad_norm": 2.7537327065131962, "learning_rate": 9.768930259389252e-06, "loss": 1.104, "step": 3428 }, { "epoch": 0.12427965640969882, "grad_norm": 2.5424630555631316, "learning_rate": 9.76875386245951e-06, "loss": 1.0716, "step": 3429 }, { "epoch": 0.12431590011235548, "grad_norm": 2.3182910089445814, "learning_rate": 9.768577399819226e-06, "loss": 1.0218, "step": 3430 }, { "epoch": 0.12435214381501214, "grad_norm": 2.6459911854119498, "learning_rate": 9.768400871470828e-06, "loss": 0.9121, "step": 3431 }, { "epoch": 0.1243883875176688, "grad_norm": 2.414320619679934, "learning_rate": 9.768224277416751e-06, "loss": 0.9442, "step": 3432 }, { "epoch": 0.12442463122032547, "grad_norm": 2.5267228326179025, "learning_rate": 9.76804761765943e-06, "loss": 0.8922, "step": 3433 }, { "epoch": 0.12446087492298213, "grad_norm": 2.667728828790076, "learning_rate": 9.76787089220129e-06, "loss": 1.013, "step": 3434 }, { "epoch": 0.1244971186256388, "grad_norm": 2.6888854578222134, "learning_rate": 9.767694101044778e-06, "loss": 0.9621, "step": 3435 }, { "epoch": 0.12453336232829545, "grad_norm": 2.5884566026094435, "learning_rate": 9.767517244192322e-06, "loss": 1.0625, "step": 3436 }, { "epoch": 0.12456960603095212, "grad_norm": 2.4311224301013152, "learning_rate": 9.767340321646362e-06, "loss": 0.9723, "step": 3437 }, { "epoch": 0.12460584973360879, "grad_norm": 2.6977434658753543, "learning_rate": 9.767163333409336e-06, "loss": 0.8819, "step": 3438 }, { "epoch": 0.12464209343626545, "grad_norm": 2.3911237362510827, "learning_rate": 9.766986279483683e-06, "loss": 1.1326, "step": 3439 }, { "epoch": 0.12467833713892211, "grad_norm": 2.941000102345814, "learning_rate": 9.766809159871842e-06, "loss": 0.9402, "step": 3440 }, { "epoch": 0.12471458084157877, "grad_norm": 2.579433790427428, "learning_rate": 9.766631974576253e-06, "loss": 1.0032, "step": 3441 }, { "epoch": 0.12475082454423544, "grad_norm": 2.367240196333413, "learning_rate": 9.766454723599359e-06, "loss": 0.7497, "step": 3442 }, { "epoch": 0.1247870682468921, "grad_norm": 2.551844295982835, "learning_rate": 9.7662774069436e-06, "loss": 1.1751, "step": 3443 }, { "epoch": 0.12482331194954877, "grad_norm": 2.752499316576192, "learning_rate": 9.766100024611421e-06, "loss": 0.8903, "step": 3444 }, { "epoch": 0.12485955565220543, "grad_norm": 2.624784129817909, "learning_rate": 9.765922576605267e-06, "loss": 1.0749, "step": 3445 }, { "epoch": 0.12489579935486209, "grad_norm": 2.431507074704549, "learning_rate": 9.765745062927582e-06, "loss": 0.9435, "step": 3446 }, { "epoch": 0.12493204305751876, "grad_norm": 2.863643640996649, "learning_rate": 9.765567483580813e-06, "loss": 1.0769, "step": 3447 }, { "epoch": 0.12496828676017542, "grad_norm": 2.6324165430610154, "learning_rate": 9.765389838567405e-06, "loss": 0.979, "step": 3448 }, { "epoch": 0.12500453046283208, "grad_norm": 2.773859035376425, "learning_rate": 9.765212127889807e-06, "loss": 0.8707, "step": 3449 }, { "epoch": 0.12504077416548876, "grad_norm": 2.5408682197852137, "learning_rate": 9.76503435155047e-06, "loss": 1.1201, "step": 3450 }, { "epoch": 0.1250770178681454, "grad_norm": 2.737440998010062, "learning_rate": 9.76485650955184e-06, "loss": 1.0645, "step": 3451 }, { "epoch": 0.12511326157080208, "grad_norm": 2.574456864677477, "learning_rate": 9.764678601896367e-06, "loss": 1.0117, "step": 3452 }, { "epoch": 0.12514950527345875, "grad_norm": 2.4398543765828973, "learning_rate": 9.764500628586506e-06, "loss": 0.9792, "step": 3453 }, { "epoch": 0.1251857489761154, "grad_norm": 2.6780674449566146, "learning_rate": 9.76432258962471e-06, "loss": 0.9868, "step": 3454 }, { "epoch": 0.12522199267877207, "grad_norm": 2.384498433637413, "learning_rate": 9.764144485013426e-06, "loss": 1.0397, "step": 3455 }, { "epoch": 0.12525823638142872, "grad_norm": 2.6774442810540675, "learning_rate": 9.763966314755114e-06, "loss": 0.8482, "step": 3456 }, { "epoch": 0.1252944800840854, "grad_norm": 2.0958574607984346, "learning_rate": 9.763788078852228e-06, "loss": 0.8852, "step": 3457 }, { "epoch": 0.12533072378674207, "grad_norm": 2.4649699473538114, "learning_rate": 9.763609777307223e-06, "loss": 1.052, "step": 3458 }, { "epoch": 0.1253669674893987, "grad_norm": 2.845068205955083, "learning_rate": 9.763431410122557e-06, "loss": 1.0867, "step": 3459 }, { "epoch": 0.12540321119205539, "grad_norm": 2.277394010768572, "learning_rate": 9.763252977300686e-06, "loss": 0.8582, "step": 3460 }, { "epoch": 0.12543945489471203, "grad_norm": 2.543586639343776, "learning_rate": 9.76307447884407e-06, "loss": 1.0935, "step": 3461 }, { "epoch": 0.1254756985973687, "grad_norm": 2.685943770315524, "learning_rate": 9.762895914755168e-06, "loss": 1.079, "step": 3462 }, { "epoch": 0.12551194230002538, "grad_norm": 3.183224054335252, "learning_rate": 9.76271728503644e-06, "loss": 1.1461, "step": 3463 }, { "epoch": 0.12554818600268203, "grad_norm": 2.39698836980876, "learning_rate": 9.76253858969035e-06, "loss": 0.8022, "step": 3464 }, { "epoch": 0.1255844297053387, "grad_norm": 2.186068232579191, "learning_rate": 9.762359828719357e-06, "loss": 1.0438, "step": 3465 }, { "epoch": 0.12562067340799535, "grad_norm": 2.3609279503254985, "learning_rate": 9.762181002125928e-06, "loss": 0.9622, "step": 3466 }, { "epoch": 0.12565691711065202, "grad_norm": 2.521602509426675, "learning_rate": 9.762002109912523e-06, "loss": 1.0951, "step": 3467 }, { "epoch": 0.1256931608133087, "grad_norm": 2.4259436512332244, "learning_rate": 9.76182315208161e-06, "loss": 0.8794, "step": 3468 }, { "epoch": 0.12572940451596534, "grad_norm": 2.357065903475354, "learning_rate": 9.761644128635651e-06, "loss": 0.827, "step": 3469 }, { "epoch": 0.12576564821862202, "grad_norm": 2.7477325671370543, "learning_rate": 9.761465039577119e-06, "loss": 1.0416, "step": 3470 }, { "epoch": 0.1258018919212787, "grad_norm": 2.819635798211245, "learning_rate": 9.761285884908478e-06, "loss": 1.0722, "step": 3471 }, { "epoch": 0.12583813562393534, "grad_norm": 2.3545928506831015, "learning_rate": 9.761106664632195e-06, "loss": 0.9236, "step": 3472 }, { "epoch": 0.125874379326592, "grad_norm": 2.2557733401978775, "learning_rate": 9.760927378750742e-06, "loss": 0.9269, "step": 3473 }, { "epoch": 0.12591062302924866, "grad_norm": 2.617976183847747, "learning_rate": 9.760748027266591e-06, "loss": 1.0605, "step": 3474 }, { "epoch": 0.12594686673190533, "grad_norm": 2.523594236220076, "learning_rate": 9.76056861018221e-06, "loss": 1.0872, "step": 3475 }, { "epoch": 0.125983110434562, "grad_norm": 2.545979528514368, "learning_rate": 9.760389127500072e-06, "loss": 0.903, "step": 3476 }, { "epoch": 0.12601935413721865, "grad_norm": 2.132345074728341, "learning_rate": 9.760209579222652e-06, "loss": 0.9621, "step": 3477 }, { "epoch": 0.12605559783987533, "grad_norm": 2.6064561437377702, "learning_rate": 9.760029965352422e-06, "loss": 1.0429, "step": 3478 }, { "epoch": 0.12609184154253197, "grad_norm": 2.324158583116803, "learning_rate": 9.759850285891859e-06, "loss": 0.9873, "step": 3479 }, { "epoch": 0.12612808524518865, "grad_norm": 2.4459829445374877, "learning_rate": 9.759670540843437e-06, "loss": 1.1522, "step": 3480 }, { "epoch": 0.12616432894784532, "grad_norm": 2.35633569295755, "learning_rate": 9.759490730209633e-06, "loss": 0.9224, "step": 3481 }, { "epoch": 0.12620057265050197, "grad_norm": 2.418630817248673, "learning_rate": 9.759310853992928e-06, "loss": 0.9421, "step": 3482 }, { "epoch": 0.12623681635315864, "grad_norm": 2.5338131982806114, "learning_rate": 9.759130912195793e-06, "loss": 1.2258, "step": 3483 }, { "epoch": 0.1262730600558153, "grad_norm": 2.286698824231014, "learning_rate": 9.758950904820717e-06, "loss": 1.1644, "step": 3484 }, { "epoch": 0.12630930375847196, "grad_norm": 2.558969367227642, "learning_rate": 9.758770831870172e-06, "loss": 0.9727, "step": 3485 }, { "epoch": 0.12634554746112864, "grad_norm": 2.122233717017616, "learning_rate": 9.758590693346645e-06, "loss": 1.0496, "step": 3486 }, { "epoch": 0.12638179116378528, "grad_norm": 2.340136666337251, "learning_rate": 9.758410489252614e-06, "loss": 0.9645, "step": 3487 }, { "epoch": 0.12641803486644196, "grad_norm": 2.5108555342135754, "learning_rate": 9.758230219590567e-06, "loss": 0.9082, "step": 3488 }, { "epoch": 0.12645427856909863, "grad_norm": 2.4645322019134626, "learning_rate": 9.758049884362983e-06, "loss": 1.1339, "step": 3489 }, { "epoch": 0.12649052227175528, "grad_norm": 2.5780976965805755, "learning_rate": 9.75786948357235e-06, "loss": 1.1115, "step": 3490 }, { "epoch": 0.12652676597441195, "grad_norm": 2.6038476194993088, "learning_rate": 9.757689017221155e-06, "loss": 1.0635, "step": 3491 }, { "epoch": 0.1265630096770686, "grad_norm": 2.3156325947766345, "learning_rate": 9.75750848531188e-06, "loss": 0.8693, "step": 3492 }, { "epoch": 0.12659925337972527, "grad_norm": 2.78157505233727, "learning_rate": 9.757327887847017e-06, "loss": 0.9116, "step": 3493 }, { "epoch": 0.12663549708238195, "grad_norm": 2.6483335976626177, "learning_rate": 9.757147224829052e-06, "loss": 0.9289, "step": 3494 }, { "epoch": 0.1266717407850386, "grad_norm": 2.4147794983877215, "learning_rate": 9.756966496260475e-06, "loss": 1.0526, "step": 3495 }, { "epoch": 0.12670798448769527, "grad_norm": 2.287300311214583, "learning_rate": 9.756785702143777e-06, "loss": 0.9153, "step": 3496 }, { "epoch": 0.12674422819035192, "grad_norm": 2.2201089399747684, "learning_rate": 9.75660484248145e-06, "loss": 0.961, "step": 3497 }, { "epoch": 0.1267804718930086, "grad_norm": 2.3256818456077504, "learning_rate": 9.756423917275983e-06, "loss": 0.9103, "step": 3498 }, { "epoch": 0.12681671559566526, "grad_norm": 2.306023508447613, "learning_rate": 9.75624292652987e-06, "loss": 1.1173, "step": 3499 }, { "epoch": 0.1268529592983219, "grad_norm": 2.535687361127439, "learning_rate": 9.75606187024561e-06, "loss": 0.9583, "step": 3500 }, { "epoch": 0.12688920300097858, "grad_norm": 2.5148733029013335, "learning_rate": 9.755880748425691e-06, "loss": 1.0545, "step": 3501 }, { "epoch": 0.12692544670363523, "grad_norm": 2.2596040234257213, "learning_rate": 9.755699561072611e-06, "loss": 0.7375, "step": 3502 }, { "epoch": 0.1269616904062919, "grad_norm": 2.6216337354369403, "learning_rate": 9.75551830818887e-06, "loss": 0.9345, "step": 3503 }, { "epoch": 0.12699793410894858, "grad_norm": 2.504688787289754, "learning_rate": 9.75533698977696e-06, "loss": 1.1494, "step": 3504 }, { "epoch": 0.12703417781160523, "grad_norm": 2.4529448319869167, "learning_rate": 9.755155605839383e-06, "loss": 0.9628, "step": 3505 }, { "epoch": 0.1270704215142619, "grad_norm": 2.580744491011932, "learning_rate": 9.754974156378638e-06, "loss": 1.0367, "step": 3506 }, { "epoch": 0.12710666521691857, "grad_norm": 2.301925668776572, "learning_rate": 9.754792641397225e-06, "loss": 1.1052, "step": 3507 }, { "epoch": 0.12714290891957522, "grad_norm": 2.409269198518183, "learning_rate": 9.754611060897645e-06, "loss": 1.1578, "step": 3508 }, { "epoch": 0.1271791526222319, "grad_norm": 2.855937001267216, "learning_rate": 9.754429414882401e-06, "loss": 0.8907, "step": 3509 }, { "epoch": 0.12721539632488854, "grad_norm": 2.2772907656708457, "learning_rate": 9.754247703353992e-06, "loss": 1.1163, "step": 3510 }, { "epoch": 0.12725164002754522, "grad_norm": 2.357035811829922, "learning_rate": 9.754065926314927e-06, "loss": 1.0301, "step": 3511 }, { "epoch": 0.1272878837302019, "grad_norm": 2.534624777790899, "learning_rate": 9.753884083767709e-06, "loss": 1.0287, "step": 3512 }, { "epoch": 0.12732412743285854, "grad_norm": 2.619987597755066, "learning_rate": 9.753702175714844e-06, "loss": 1.0192, "step": 3513 }, { "epoch": 0.1273603711355152, "grad_norm": 2.7158545783898, "learning_rate": 9.753520202158835e-06, "loss": 0.9994, "step": 3514 }, { "epoch": 0.12739661483817186, "grad_norm": 2.5373766812166494, "learning_rate": 9.753338163102195e-06, "loss": 0.9626, "step": 3515 }, { "epoch": 0.12743285854082853, "grad_norm": 2.388931786092909, "learning_rate": 9.753156058547429e-06, "loss": 0.9743, "step": 3516 }, { "epoch": 0.1274691022434852, "grad_norm": 2.5020373773009497, "learning_rate": 9.752973888497047e-06, "loss": 0.9333, "step": 3517 }, { "epoch": 0.12750534594614185, "grad_norm": 2.865593112098345, "learning_rate": 9.75279165295356e-06, "loss": 0.9215, "step": 3518 }, { "epoch": 0.12754158964879853, "grad_norm": 2.415246472127179, "learning_rate": 9.752609351919477e-06, "loss": 1.0468, "step": 3519 }, { "epoch": 0.12757783335145517, "grad_norm": 2.176193449944079, "learning_rate": 9.752426985397313e-06, "loss": 0.8053, "step": 3520 }, { "epoch": 0.12761407705411185, "grad_norm": 2.5989542081370853, "learning_rate": 9.752244553389579e-06, "loss": 0.9997, "step": 3521 }, { "epoch": 0.12765032075676852, "grad_norm": 2.672211334441067, "learning_rate": 9.752062055898788e-06, "loss": 0.9674, "step": 3522 }, { "epoch": 0.12768656445942517, "grad_norm": 2.2085873420275095, "learning_rate": 9.751879492927456e-06, "loss": 0.9535, "step": 3523 }, { "epoch": 0.12772280816208184, "grad_norm": 2.4196545965639618, "learning_rate": 9.7516968644781e-06, "loss": 1.05, "step": 3524 }, { "epoch": 0.12775905186473852, "grad_norm": 2.5350236763485383, "learning_rate": 9.751514170553233e-06, "loss": 0.9403, "step": 3525 }, { "epoch": 0.12779529556739516, "grad_norm": 2.5157438577229687, "learning_rate": 9.751331411155375e-06, "loss": 1.064, "step": 3526 }, { "epoch": 0.12783153927005184, "grad_norm": 2.733856544674858, "learning_rate": 9.751148586287046e-06, "loss": 1.1285, "step": 3527 }, { "epoch": 0.12786778297270848, "grad_norm": 2.2933369838576207, "learning_rate": 9.75096569595076e-06, "loss": 1.0859, "step": 3528 }, { "epoch": 0.12790402667536516, "grad_norm": 2.0862907616281805, "learning_rate": 9.75078274014904e-06, "loss": 0.9299, "step": 3529 }, { "epoch": 0.12794027037802183, "grad_norm": 2.4506928349742227, "learning_rate": 9.75059971888441e-06, "loss": 0.9086, "step": 3530 }, { "epoch": 0.12797651408067848, "grad_norm": 2.5492409346354665, "learning_rate": 9.750416632159387e-06, "loss": 0.9501, "step": 3531 }, { "epoch": 0.12801275778333515, "grad_norm": 2.180469913238981, "learning_rate": 9.750233479976495e-06, "loss": 1.0137, "step": 3532 }, { "epoch": 0.1280490014859918, "grad_norm": 2.5764520238732107, "learning_rate": 9.75005026233826e-06, "loss": 1.1433, "step": 3533 }, { "epoch": 0.12808524518864847, "grad_norm": 2.561775205979588, "learning_rate": 9.749866979247203e-06, "loss": 1.0352, "step": 3534 }, { "epoch": 0.12812148889130515, "grad_norm": 2.61575647538504, "learning_rate": 9.749683630705853e-06, "loss": 1.0546, "step": 3535 }, { "epoch": 0.1281577325939618, "grad_norm": 2.198439873394219, "learning_rate": 9.749500216716736e-06, "loss": 1.0275, "step": 3536 }, { "epoch": 0.12819397629661847, "grad_norm": 2.580320116083599, "learning_rate": 9.749316737282377e-06, "loss": 1.0372, "step": 3537 }, { "epoch": 0.1282302199992751, "grad_norm": 2.1148990360830826, "learning_rate": 9.749133192405307e-06, "loss": 0.8201, "step": 3538 }, { "epoch": 0.1282664637019318, "grad_norm": 2.47072264013799, "learning_rate": 9.748949582088051e-06, "loss": 1.1554, "step": 3539 }, { "epoch": 0.12830270740458846, "grad_norm": 2.6948536305075135, "learning_rate": 9.748765906333144e-06, "loss": 1.1785, "step": 3540 }, { "epoch": 0.1283389511072451, "grad_norm": 2.3183486540869596, "learning_rate": 9.748582165143114e-06, "loss": 1.1446, "step": 3541 }, { "epoch": 0.12837519480990178, "grad_norm": 2.6115735914271245, "learning_rate": 9.748398358520494e-06, "loss": 1.0638, "step": 3542 }, { "epoch": 0.12841143851255846, "grad_norm": 2.710514334628124, "learning_rate": 9.748214486467817e-06, "loss": 0.9659, "step": 3543 }, { "epoch": 0.1284476822152151, "grad_norm": 2.3185145261710973, "learning_rate": 9.748030548987616e-06, "loss": 1.0814, "step": 3544 }, { "epoch": 0.12848392591787178, "grad_norm": 2.5221830375791896, "learning_rate": 9.747846546082425e-06, "loss": 1.0957, "step": 3545 }, { "epoch": 0.12852016962052842, "grad_norm": 2.4438412629269797, "learning_rate": 9.747662477754781e-06, "loss": 1.0938, "step": 3546 }, { "epoch": 0.1285564133231851, "grad_norm": 2.354593891717155, "learning_rate": 9.747478344007218e-06, "loss": 1.0726, "step": 3547 }, { "epoch": 0.12859265702584177, "grad_norm": 2.4432315985612845, "learning_rate": 9.747294144842274e-06, "loss": 1.109, "step": 3548 }, { "epoch": 0.12862890072849842, "grad_norm": 2.408128223394147, "learning_rate": 9.747109880262488e-06, "loss": 1.0027, "step": 3549 }, { "epoch": 0.1286651444311551, "grad_norm": 2.2069170223337142, "learning_rate": 9.7469255502704e-06, "loss": 0.8123, "step": 3550 }, { "epoch": 0.12870138813381174, "grad_norm": 2.23914899721476, "learning_rate": 9.74674115486855e-06, "loss": 0.9447, "step": 3551 }, { "epoch": 0.1287376318364684, "grad_norm": 2.2941173734922238, "learning_rate": 9.746556694059475e-06, "loss": 0.862, "step": 3552 }, { "epoch": 0.1287738755391251, "grad_norm": 2.51527940587255, "learning_rate": 9.74637216784572e-06, "loss": 0.9936, "step": 3553 }, { "epoch": 0.12881011924178173, "grad_norm": 2.304674947213253, "learning_rate": 9.746187576229827e-06, "loss": 1.0083, "step": 3554 }, { "epoch": 0.1288463629444384, "grad_norm": 2.918391370059641, "learning_rate": 9.74600291921434e-06, "loss": 1.1488, "step": 3555 }, { "epoch": 0.12888260664709505, "grad_norm": 2.4765356922203923, "learning_rate": 9.745818196801803e-06, "loss": 1.0382, "step": 3556 }, { "epoch": 0.12891885034975173, "grad_norm": 2.1636054631368316, "learning_rate": 9.745633408994762e-06, "loss": 0.8963, "step": 3557 }, { "epoch": 0.1289550940524084, "grad_norm": 2.722536105908906, "learning_rate": 9.745448555795762e-06, "loss": 1.0559, "step": 3558 }, { "epoch": 0.12899133775506505, "grad_norm": 2.635345189923831, "learning_rate": 9.745263637207352e-06, "loss": 0.9754, "step": 3559 }, { "epoch": 0.12902758145772172, "grad_norm": 2.2147737099798155, "learning_rate": 9.745078653232077e-06, "loss": 0.9755, "step": 3560 }, { "epoch": 0.12906382516037837, "grad_norm": 2.3969096375190393, "learning_rate": 9.74489360387249e-06, "loss": 0.9948, "step": 3561 }, { "epoch": 0.12910006886303504, "grad_norm": 2.430685543597617, "learning_rate": 9.744708489131136e-06, "loss": 0.976, "step": 3562 }, { "epoch": 0.12913631256569172, "grad_norm": 2.548111327693745, "learning_rate": 9.744523309010571e-06, "loss": 0.9764, "step": 3563 }, { "epoch": 0.12917255626834837, "grad_norm": 2.4630744382872964, "learning_rate": 9.744338063513344e-06, "loss": 0.8955, "step": 3564 }, { "epoch": 0.12920879997100504, "grad_norm": 2.404859520752727, "learning_rate": 9.744152752642007e-06, "loss": 1.0058, "step": 3565 }, { "epoch": 0.1292450436736617, "grad_norm": 2.7164247730671214, "learning_rate": 9.743967376399112e-06, "loss": 1.0749, "step": 3566 }, { "epoch": 0.12928128737631836, "grad_norm": 2.467091425991453, "learning_rate": 9.74378193478722e-06, "loss": 0.8129, "step": 3567 }, { "epoch": 0.12931753107897503, "grad_norm": 2.5752383288665452, "learning_rate": 9.74359642780888e-06, "loss": 1.1618, "step": 3568 }, { "epoch": 0.12935377478163168, "grad_norm": 2.413698893779768, "learning_rate": 9.743410855466649e-06, "loss": 1.1513, "step": 3569 }, { "epoch": 0.12939001848428835, "grad_norm": 2.239467312818913, "learning_rate": 9.743225217763086e-06, "loss": 1.1009, "step": 3570 }, { "epoch": 0.12942626218694503, "grad_norm": 2.6783193018686235, "learning_rate": 9.743039514700748e-06, "loss": 1.0719, "step": 3571 }, { "epoch": 0.12946250588960168, "grad_norm": 2.4967445666527155, "learning_rate": 9.742853746282194e-06, "loss": 0.8146, "step": 3572 }, { "epoch": 0.12949874959225835, "grad_norm": 2.501616640533447, "learning_rate": 9.742667912509984e-06, "loss": 1.0074, "step": 3573 }, { "epoch": 0.129534993294915, "grad_norm": 2.6302156986874254, "learning_rate": 9.742482013386678e-06, "loss": 1.1319, "step": 3574 }, { "epoch": 0.12957123699757167, "grad_norm": 1.939763143103716, "learning_rate": 9.742296048914839e-06, "loss": 0.7, "step": 3575 }, { "epoch": 0.12960748070022834, "grad_norm": 2.267651451286205, "learning_rate": 9.742110019097027e-06, "loss": 0.9902, "step": 3576 }, { "epoch": 0.129643724402885, "grad_norm": 2.3981326264287754, "learning_rate": 9.741923923935807e-06, "loss": 0.9941, "step": 3577 }, { "epoch": 0.12967996810554167, "grad_norm": 2.485226909894142, "learning_rate": 9.741737763433745e-06, "loss": 1.1323, "step": 3578 }, { "epoch": 0.1297162118081983, "grad_norm": 2.4176960803436676, "learning_rate": 9.741551537593403e-06, "loss": 1.0284, "step": 3579 }, { "epoch": 0.12975245551085499, "grad_norm": 2.310413384838854, "learning_rate": 9.741365246417348e-06, "loss": 1.0552, "step": 3580 }, { "epoch": 0.12978869921351166, "grad_norm": 2.197234157119454, "learning_rate": 9.741178889908148e-06, "loss": 0.8005, "step": 3581 }, { "epoch": 0.1298249429161683, "grad_norm": 2.828821456948195, "learning_rate": 9.74099246806837e-06, "loss": 1.1083, "step": 3582 }, { "epoch": 0.12986118661882498, "grad_norm": 2.28296304047031, "learning_rate": 9.740805980900584e-06, "loss": 0.9036, "step": 3583 }, { "epoch": 0.12989743032148166, "grad_norm": 2.063167250600407, "learning_rate": 9.740619428407359e-06, "loss": 1.0742, "step": 3584 }, { "epoch": 0.1299336740241383, "grad_norm": 2.2062882849808667, "learning_rate": 9.740432810591262e-06, "loss": 0.6567, "step": 3585 }, { "epoch": 0.12996991772679498, "grad_norm": 2.297744438068852, "learning_rate": 9.740246127454872e-06, "loss": 0.7661, "step": 3586 }, { "epoch": 0.13000616142945162, "grad_norm": 2.3154629387380106, "learning_rate": 9.740059379000755e-06, "loss": 0.7957, "step": 3587 }, { "epoch": 0.1300424051321083, "grad_norm": 2.353960885308848, "learning_rate": 9.739872565231486e-06, "loss": 0.9847, "step": 3588 }, { "epoch": 0.13007864883476497, "grad_norm": 2.8728222170772524, "learning_rate": 9.73968568614964e-06, "loss": 0.998, "step": 3589 }, { "epoch": 0.13011489253742162, "grad_norm": 2.7494498800475884, "learning_rate": 9.739498741757794e-06, "loss": 1.0755, "step": 3590 }, { "epoch": 0.1301511362400783, "grad_norm": 2.3257066477579134, "learning_rate": 9.739311732058518e-06, "loss": 0.9313, "step": 3591 }, { "epoch": 0.13018737994273494, "grad_norm": 2.5077535790300756, "learning_rate": 9.739124657054394e-06, "loss": 0.9843, "step": 3592 }, { "epoch": 0.1302236236453916, "grad_norm": 2.393678915432692, "learning_rate": 9.738937516748e-06, "loss": 0.8859, "step": 3593 }, { "epoch": 0.13025986734804829, "grad_norm": 2.6286658643129166, "learning_rate": 9.73875031114191e-06, "loss": 1.0849, "step": 3594 }, { "epoch": 0.13029611105070493, "grad_norm": 2.5086884183757507, "learning_rate": 9.738563040238709e-06, "loss": 0.9437, "step": 3595 }, { "epoch": 0.1303323547533616, "grad_norm": 3.118357690625637, "learning_rate": 9.738375704040975e-06, "loss": 1.1118, "step": 3596 }, { "epoch": 0.13036859845601825, "grad_norm": 2.765883510945987, "learning_rate": 9.73818830255129e-06, "loss": 0.9294, "step": 3597 }, { "epoch": 0.13040484215867493, "grad_norm": 2.5190700264814225, "learning_rate": 9.738000835772234e-06, "loss": 1.0142, "step": 3598 }, { "epoch": 0.1304410858613316, "grad_norm": 2.3962077049770185, "learning_rate": 9.737813303706393e-06, "loss": 0.8034, "step": 3599 }, { "epoch": 0.13047732956398825, "grad_norm": 2.3654984707983604, "learning_rate": 9.73762570635635e-06, "loss": 1.0983, "step": 3600 }, { "epoch": 0.13051357326664492, "grad_norm": 2.238010625905719, "learning_rate": 9.73743804372469e-06, "loss": 1.0115, "step": 3601 }, { "epoch": 0.1305498169693016, "grad_norm": 2.5713181206094182, "learning_rate": 9.737250315813997e-06, "loss": 1.0604, "step": 3602 }, { "epoch": 0.13058606067195824, "grad_norm": 2.4187408378560002, "learning_rate": 9.737062522626862e-06, "loss": 0.9034, "step": 3603 }, { "epoch": 0.13062230437461492, "grad_norm": 2.5405768026498703, "learning_rate": 9.73687466416587e-06, "loss": 1.0949, "step": 3604 }, { "epoch": 0.13065854807727156, "grad_norm": 2.3995852264751343, "learning_rate": 9.736686740433608e-06, "loss": 0.7969, "step": 3605 }, { "epoch": 0.13069479177992824, "grad_norm": 2.239176523943778, "learning_rate": 9.736498751432669e-06, "loss": 0.9268, "step": 3606 }, { "epoch": 0.1307310354825849, "grad_norm": 2.4408931389587694, "learning_rate": 9.736310697165642e-06, "loss": 0.8798, "step": 3607 }, { "epoch": 0.13076727918524156, "grad_norm": 2.2136738460774694, "learning_rate": 9.736122577635116e-06, "loss": 0.8973, "step": 3608 }, { "epoch": 0.13080352288789823, "grad_norm": 2.3370815303371164, "learning_rate": 9.735934392843689e-06, "loss": 0.9417, "step": 3609 }, { "epoch": 0.13083976659055488, "grad_norm": 2.385866261223521, "learning_rate": 9.735746142793947e-06, "loss": 1.0277, "step": 3610 }, { "epoch": 0.13087601029321155, "grad_norm": 2.520449993338362, "learning_rate": 9.735557827488488e-06, "loss": 0.9829, "step": 3611 }, { "epoch": 0.13091225399586823, "grad_norm": 2.4590692815995374, "learning_rate": 9.735369446929906e-06, "loss": 0.9667, "step": 3612 }, { "epoch": 0.13094849769852487, "grad_norm": 2.233720695200449, "learning_rate": 9.735181001120797e-06, "loss": 0.9844, "step": 3613 }, { "epoch": 0.13098474140118155, "grad_norm": 2.4596060372657225, "learning_rate": 9.73499249006376e-06, "loss": 1.0099, "step": 3614 }, { "epoch": 0.1310209851038382, "grad_norm": 2.543913623146442, "learning_rate": 9.734803913761387e-06, "loss": 0.9228, "step": 3615 }, { "epoch": 0.13105722880649487, "grad_norm": 2.603745714689709, "learning_rate": 9.73461527221628e-06, "loss": 0.9249, "step": 3616 }, { "epoch": 0.13109347250915154, "grad_norm": 2.7611137825290912, "learning_rate": 9.734426565431039e-06, "loss": 0.8774, "step": 3617 }, { "epoch": 0.1311297162118082, "grad_norm": 2.784783913786288, "learning_rate": 9.734237793408262e-06, "loss": 0.9611, "step": 3618 }, { "epoch": 0.13116595991446486, "grad_norm": 2.5244691783147397, "learning_rate": 9.734048956150552e-06, "loss": 0.873, "step": 3619 }, { "epoch": 0.13120220361712154, "grad_norm": 2.6636647584559845, "learning_rate": 9.73386005366051e-06, "loss": 1.0348, "step": 3620 }, { "epoch": 0.13123844731977818, "grad_norm": 2.4910233170514013, "learning_rate": 9.73367108594074e-06, "loss": 1.0041, "step": 3621 }, { "epoch": 0.13127469102243486, "grad_norm": 2.4335087288061987, "learning_rate": 9.733482052993846e-06, "loss": 0.8906, "step": 3622 }, { "epoch": 0.1313109347250915, "grad_norm": 2.4844476619349454, "learning_rate": 9.73329295482243e-06, "loss": 0.9211, "step": 3623 }, { "epoch": 0.13134717842774818, "grad_norm": 2.326547401456058, "learning_rate": 9.733103791429102e-06, "loss": 0.9321, "step": 3624 }, { "epoch": 0.13138342213040485, "grad_norm": 2.5574011128665677, "learning_rate": 9.732914562816464e-06, "loss": 0.7974, "step": 3625 }, { "epoch": 0.1314196658330615, "grad_norm": 2.1905214295126845, "learning_rate": 9.732725268987126e-06, "loss": 1.0381, "step": 3626 }, { "epoch": 0.13145590953571817, "grad_norm": 2.3124768422190383, "learning_rate": 9.732535909943698e-06, "loss": 0.9386, "step": 3627 }, { "epoch": 0.13149215323837482, "grad_norm": 2.7264746984646666, "learning_rate": 9.732346485688786e-06, "loss": 1.0999, "step": 3628 }, { "epoch": 0.1315283969410315, "grad_norm": 2.1547276999286304, "learning_rate": 9.732156996225002e-06, "loss": 0.9309, "step": 3629 }, { "epoch": 0.13156464064368817, "grad_norm": 2.1448795558203475, "learning_rate": 9.731967441554956e-06, "loss": 0.8223, "step": 3630 }, { "epoch": 0.13160088434634482, "grad_norm": 2.7505206791117884, "learning_rate": 9.73177782168126e-06, "loss": 1.0392, "step": 3631 }, { "epoch": 0.1316371280490015, "grad_norm": 2.2145698827342666, "learning_rate": 9.731588136606528e-06, "loss": 0.7961, "step": 3632 }, { "epoch": 0.13167337175165814, "grad_norm": 2.1629546618967406, "learning_rate": 9.731398386333373e-06, "loss": 0.9211, "step": 3633 }, { "epoch": 0.1317096154543148, "grad_norm": 2.5256094394717743, "learning_rate": 9.73120857086441e-06, "loss": 0.935, "step": 3634 }, { "epoch": 0.13174585915697148, "grad_norm": 2.477271761248393, "learning_rate": 9.731018690202254e-06, "loss": 0.9165, "step": 3635 }, { "epoch": 0.13178210285962813, "grad_norm": 2.3068066968033327, "learning_rate": 9.730828744349522e-06, "loss": 1.0553, "step": 3636 }, { "epoch": 0.1318183465622848, "grad_norm": 2.1794481578905662, "learning_rate": 9.73063873330883e-06, "loss": 0.8375, "step": 3637 }, { "epoch": 0.13185459026494148, "grad_norm": 2.5636755947792533, "learning_rate": 9.730448657082799e-06, "loss": 1.1023, "step": 3638 }, { "epoch": 0.13189083396759813, "grad_norm": 2.3739704333058422, "learning_rate": 9.730258515674045e-06, "loss": 1.061, "step": 3639 }, { "epoch": 0.1319270776702548, "grad_norm": 2.462509972377641, "learning_rate": 9.730068309085188e-06, "loss": 1.0637, "step": 3640 }, { "epoch": 0.13196332137291145, "grad_norm": 2.17295242218932, "learning_rate": 9.729878037318853e-06, "loss": 1.0373, "step": 3641 }, { "epoch": 0.13199956507556812, "grad_norm": 2.4581136544573803, "learning_rate": 9.729687700377658e-06, "loss": 1.0259, "step": 3642 }, { "epoch": 0.1320358087782248, "grad_norm": 2.402322920431134, "learning_rate": 9.729497298264228e-06, "loss": 1.312, "step": 3643 }, { "epoch": 0.13207205248088144, "grad_norm": 2.340191949457037, "learning_rate": 9.729306830981183e-06, "loss": 0.9874, "step": 3644 }, { "epoch": 0.13210829618353812, "grad_norm": 2.1346874761860715, "learning_rate": 9.729116298531152e-06, "loss": 0.9308, "step": 3645 }, { "epoch": 0.13214453988619476, "grad_norm": 2.542415175983316, "learning_rate": 9.728925700916758e-06, "loss": 1.1007, "step": 3646 }, { "epoch": 0.13218078358885144, "grad_norm": 2.4110541281877813, "learning_rate": 9.72873503814063e-06, "loss": 0.8874, "step": 3647 }, { "epoch": 0.1322170272915081, "grad_norm": 2.6758084807374085, "learning_rate": 9.72854431020539e-06, "loss": 0.9236, "step": 3648 }, { "epoch": 0.13225327099416476, "grad_norm": 2.763102325014019, "learning_rate": 9.728353517113668e-06, "loss": 1.1823, "step": 3649 }, { "epoch": 0.13228951469682143, "grad_norm": 2.505913715701824, "learning_rate": 9.728162658868097e-06, "loss": 0.9148, "step": 3650 }, { "epoch": 0.13232575839947808, "grad_norm": 2.453241427943895, "learning_rate": 9.727971735471305e-06, "loss": 1.0768, "step": 3651 }, { "epoch": 0.13236200210213475, "grad_norm": 2.6919593771523034, "learning_rate": 9.727780746925918e-06, "loss": 1.2076, "step": 3652 }, { "epoch": 0.13239824580479143, "grad_norm": 2.4518919707943128, "learning_rate": 9.727589693234576e-06, "loss": 1.0594, "step": 3653 }, { "epoch": 0.13243448950744807, "grad_norm": 2.156196723655098, "learning_rate": 9.727398574399905e-06, "loss": 0.9213, "step": 3654 }, { "epoch": 0.13247073321010475, "grad_norm": 2.921903681803554, "learning_rate": 9.72720739042454e-06, "loss": 1.0962, "step": 3655 }, { "epoch": 0.13250697691276142, "grad_norm": 2.2665558596653836, "learning_rate": 9.727016141311117e-06, "loss": 1.0866, "step": 3656 }, { "epoch": 0.13254322061541807, "grad_norm": 2.5016384137474468, "learning_rate": 9.726824827062269e-06, "loss": 1.1476, "step": 3657 }, { "epoch": 0.13257946431807474, "grad_norm": 2.2841806535757803, "learning_rate": 9.726633447680636e-06, "loss": 1.056, "step": 3658 }, { "epoch": 0.1326157080207314, "grad_norm": 2.475501308104161, "learning_rate": 9.72644200316885e-06, "loss": 1.217, "step": 3659 }, { "epoch": 0.13265195172338806, "grad_norm": 2.468757752430954, "learning_rate": 9.726250493529554e-06, "loss": 0.8838, "step": 3660 }, { "epoch": 0.13268819542604474, "grad_norm": 2.318865959076027, "learning_rate": 9.726058918765383e-06, "loss": 1.0757, "step": 3661 }, { "epoch": 0.13272443912870138, "grad_norm": 2.4044237409867653, "learning_rate": 9.72586727887898e-06, "loss": 1.0385, "step": 3662 }, { "epoch": 0.13276068283135806, "grad_norm": 2.5963331329639794, "learning_rate": 9.725675573872981e-06, "loss": 1.2079, "step": 3663 }, { "epoch": 0.1327969265340147, "grad_norm": 2.876355692065606, "learning_rate": 9.725483803750033e-06, "loss": 1.0608, "step": 3664 }, { "epoch": 0.13283317023667138, "grad_norm": 2.378024418202493, "learning_rate": 9.725291968512776e-06, "loss": 0.8851, "step": 3665 }, { "epoch": 0.13286941393932805, "grad_norm": 2.2369770928608372, "learning_rate": 9.725100068163853e-06, "loss": 1.0328, "step": 3666 }, { "epoch": 0.1329056576419847, "grad_norm": 2.4773086754350073, "learning_rate": 9.724908102705909e-06, "loss": 1.175, "step": 3667 }, { "epoch": 0.13294190134464137, "grad_norm": 2.536437805564313, "learning_rate": 9.724716072141588e-06, "loss": 0.8097, "step": 3668 }, { "epoch": 0.13297814504729802, "grad_norm": 2.66360046033445, "learning_rate": 9.724523976473538e-06, "loss": 0.9122, "step": 3669 }, { "epoch": 0.1330143887499547, "grad_norm": 2.4166982629428237, "learning_rate": 9.724331815704405e-06, "loss": 1.0478, "step": 3670 }, { "epoch": 0.13305063245261137, "grad_norm": 2.4469139351320366, "learning_rate": 9.724139589836837e-06, "loss": 0.9897, "step": 3671 }, { "epoch": 0.133086876155268, "grad_norm": 2.468061647248521, "learning_rate": 9.72394729887348e-06, "loss": 1.1264, "step": 3672 }, { "epoch": 0.1331231198579247, "grad_norm": 2.4606173949426657, "learning_rate": 9.723754942816987e-06, "loss": 0.9488, "step": 3673 }, { "epoch": 0.13315936356058136, "grad_norm": 2.6489484696648495, "learning_rate": 9.723562521670008e-06, "loss": 0.9949, "step": 3674 }, { "epoch": 0.133195607263238, "grad_norm": 2.2212133115783534, "learning_rate": 9.723370035435195e-06, "loss": 0.8952, "step": 3675 }, { "epoch": 0.13323185096589468, "grad_norm": 2.629126890074786, "learning_rate": 9.7231774841152e-06, "loss": 1.0039, "step": 3676 }, { "epoch": 0.13326809466855133, "grad_norm": 2.1777440567809183, "learning_rate": 9.722984867712675e-06, "loss": 0.7692, "step": 3677 }, { "epoch": 0.133304338371208, "grad_norm": 2.4053738492317813, "learning_rate": 9.722792186230276e-06, "loss": 0.8904, "step": 3678 }, { "epoch": 0.13334058207386468, "grad_norm": 2.5689404302693783, "learning_rate": 9.722599439670655e-06, "loss": 0.9553, "step": 3679 }, { "epoch": 0.13337682577652132, "grad_norm": 2.4738545227839066, "learning_rate": 9.722406628036472e-06, "loss": 0.8886, "step": 3680 }, { "epoch": 0.133413069479178, "grad_norm": 2.62939425197084, "learning_rate": 9.722213751330381e-06, "loss": 0.9989, "step": 3681 }, { "epoch": 0.13344931318183464, "grad_norm": 2.531581520483669, "learning_rate": 9.722020809555038e-06, "loss": 0.9984, "step": 3682 }, { "epoch": 0.13348555688449132, "grad_norm": 2.439953476522613, "learning_rate": 9.721827802713107e-06, "loss": 1.0259, "step": 3683 }, { "epoch": 0.133521800587148, "grad_norm": 2.900577433364403, "learning_rate": 9.721634730807243e-06, "loss": 1.2038, "step": 3684 }, { "epoch": 0.13355804428980464, "grad_norm": 2.5947178680279124, "learning_rate": 9.721441593840109e-06, "loss": 0.8773, "step": 3685 }, { "epoch": 0.1335942879924613, "grad_norm": 2.520346128822623, "learning_rate": 9.721248391814364e-06, "loss": 1.0004, "step": 3686 }, { "epoch": 0.13363053169511796, "grad_norm": 2.963292361891109, "learning_rate": 9.721055124732672e-06, "loss": 0.9338, "step": 3687 }, { "epoch": 0.13366677539777463, "grad_norm": 2.294385574735967, "learning_rate": 9.720861792597696e-06, "loss": 0.9745, "step": 3688 }, { "epoch": 0.1337030191004313, "grad_norm": 2.166321594522489, "learning_rate": 9.720668395412099e-06, "loss": 1.0532, "step": 3689 }, { "epoch": 0.13373926280308795, "grad_norm": 2.3917252247800875, "learning_rate": 9.720474933178546e-06, "loss": 0.9575, "step": 3690 }, { "epoch": 0.13377550650574463, "grad_norm": 2.0207782427203815, "learning_rate": 9.720281405899705e-06, "loss": 1.0424, "step": 3691 }, { "epoch": 0.1338117502084013, "grad_norm": 2.653317946114512, "learning_rate": 9.720087813578239e-06, "loss": 0.9023, "step": 3692 }, { "epoch": 0.13384799391105795, "grad_norm": 2.4507657327080006, "learning_rate": 9.71989415621682e-06, "loss": 0.9016, "step": 3693 }, { "epoch": 0.13388423761371462, "grad_norm": 2.5834110100755923, "learning_rate": 9.719700433818112e-06, "loss": 1.1077, "step": 3694 }, { "epoch": 0.13392048131637127, "grad_norm": 2.208940592595283, "learning_rate": 9.719506646384786e-06, "loss": 0.9162, "step": 3695 }, { "epoch": 0.13395672501902794, "grad_norm": 2.5315045883337723, "learning_rate": 9.719312793919514e-06, "loss": 0.8752, "step": 3696 }, { "epoch": 0.13399296872168462, "grad_norm": 2.3505175322111405, "learning_rate": 9.719118876424966e-06, "loss": 0.8435, "step": 3697 }, { "epoch": 0.13402921242434127, "grad_norm": 2.507008064933259, "learning_rate": 9.718924893903814e-06, "loss": 1.0275, "step": 3698 }, { "epoch": 0.13406545612699794, "grad_norm": 2.455500326648167, "learning_rate": 9.71873084635873e-06, "loss": 1.0829, "step": 3699 }, { "epoch": 0.13410169982965459, "grad_norm": 2.532534228136921, "learning_rate": 9.718536733792388e-06, "loss": 0.8898, "step": 3700 }, { "epoch": 0.13413794353231126, "grad_norm": 2.4638825816015006, "learning_rate": 9.718342556207466e-06, "loss": 1.1056, "step": 3701 }, { "epoch": 0.13417418723496793, "grad_norm": 2.4462637868084074, "learning_rate": 9.718148313606634e-06, "loss": 0.9074, "step": 3702 }, { "epoch": 0.13421043093762458, "grad_norm": 2.3024957315117645, "learning_rate": 9.717954005992573e-06, "loss": 0.9812, "step": 3703 }, { "epoch": 0.13424667464028125, "grad_norm": 2.6281894976246383, "learning_rate": 9.71775963336796e-06, "loss": 0.913, "step": 3704 }, { "epoch": 0.1342829183429379, "grad_norm": 2.6672252598530117, "learning_rate": 9.717565195735473e-06, "loss": 1.0859, "step": 3705 }, { "epoch": 0.13431916204559458, "grad_norm": 2.497199397240032, "learning_rate": 9.71737069309779e-06, "loss": 1.026, "step": 3706 }, { "epoch": 0.13435540574825125, "grad_norm": 2.549070421477489, "learning_rate": 9.71717612545759e-06, "loss": 1.0748, "step": 3707 }, { "epoch": 0.1343916494509079, "grad_norm": 2.447651009559579, "learning_rate": 9.716981492817558e-06, "loss": 0.9944, "step": 3708 }, { "epoch": 0.13442789315356457, "grad_norm": 2.6841644112121106, "learning_rate": 9.716786795180373e-06, "loss": 0.9636, "step": 3709 }, { "epoch": 0.13446413685622124, "grad_norm": 2.330550923458987, "learning_rate": 9.716592032548719e-06, "loss": 0.8261, "step": 3710 }, { "epoch": 0.1345003805588779, "grad_norm": 2.6257860629563097, "learning_rate": 9.716397204925279e-06, "loss": 0.8994, "step": 3711 }, { "epoch": 0.13453662426153457, "grad_norm": 2.30805525323135, "learning_rate": 9.716202312312737e-06, "loss": 1.163, "step": 3712 }, { "epoch": 0.1345728679641912, "grad_norm": 2.464372669800063, "learning_rate": 9.716007354713782e-06, "loss": 0.9653, "step": 3713 }, { "epoch": 0.13460911166684789, "grad_norm": 2.3750163364195993, "learning_rate": 9.715812332131094e-06, "loss": 0.8884, "step": 3714 }, { "epoch": 0.13464535536950456, "grad_norm": 13.84603098907347, "learning_rate": 9.715617244567367e-06, "loss": 2.2189, "step": 3715 }, { "epoch": 0.1346815990721612, "grad_norm": 2.3927480961520162, "learning_rate": 9.715422092025285e-06, "loss": 0.9713, "step": 3716 }, { "epoch": 0.13471784277481788, "grad_norm": 2.594086052816912, "learning_rate": 9.71522687450754e-06, "loss": 1.0485, "step": 3717 }, { "epoch": 0.13475408647747453, "grad_norm": 2.576355591715425, "learning_rate": 9.715031592016818e-06, "loss": 1.0677, "step": 3718 }, { "epoch": 0.1347903301801312, "grad_norm": 2.378675084722258, "learning_rate": 9.714836244555814e-06, "loss": 1.1274, "step": 3719 }, { "epoch": 0.13482657388278788, "grad_norm": 2.3400458765181757, "learning_rate": 9.714640832127217e-06, "loss": 0.9014, "step": 3720 }, { "epoch": 0.13486281758544452, "grad_norm": 2.3747352205955687, "learning_rate": 9.714445354733722e-06, "loss": 0.9384, "step": 3721 }, { "epoch": 0.1348990612881012, "grad_norm": 2.290746693184952, "learning_rate": 9.714249812378019e-06, "loss": 1.0527, "step": 3722 }, { "epoch": 0.13493530499075784, "grad_norm": 2.5200997592355385, "learning_rate": 9.714054205062805e-06, "loss": 1.0016, "step": 3723 }, { "epoch": 0.13497154869341452, "grad_norm": 2.1306165571905376, "learning_rate": 9.713858532790777e-06, "loss": 1.0633, "step": 3724 }, { "epoch": 0.1350077923960712, "grad_norm": 2.471985593428337, "learning_rate": 9.71366279556463e-06, "loss": 1.1011, "step": 3725 }, { "epoch": 0.13504403609872784, "grad_norm": 2.462316309128517, "learning_rate": 9.713466993387057e-06, "loss": 0.9415, "step": 3726 }, { "epoch": 0.1350802798013845, "grad_norm": 2.2233422475799096, "learning_rate": 9.713271126260762e-06, "loss": 0.9502, "step": 3727 }, { "epoch": 0.13511652350404119, "grad_norm": 2.1909213821419695, "learning_rate": 9.71307519418844e-06, "loss": 1.074, "step": 3728 }, { "epoch": 0.13515276720669783, "grad_norm": 2.5424214146969017, "learning_rate": 9.712879197172793e-06, "loss": 1.111, "step": 3729 }, { "epoch": 0.1351890109093545, "grad_norm": 2.6640074737160773, "learning_rate": 9.712683135216519e-06, "loss": 1.054, "step": 3730 }, { "epoch": 0.13522525461201115, "grad_norm": 2.3406838570522064, "learning_rate": 9.712487008322324e-06, "loss": 1.031, "step": 3731 }, { "epoch": 0.13526149831466783, "grad_norm": 2.5056049242420095, "learning_rate": 9.712290816492906e-06, "loss": 1.129, "step": 3732 }, { "epoch": 0.1352977420173245, "grad_norm": 2.3840884886592173, "learning_rate": 9.712094559730972e-06, "loss": 0.9516, "step": 3733 }, { "epoch": 0.13533398571998115, "grad_norm": 2.6811689276042494, "learning_rate": 9.711898238039225e-06, "loss": 1.0318, "step": 3734 }, { "epoch": 0.13537022942263782, "grad_norm": 2.190786602456542, "learning_rate": 9.71170185142037e-06, "loss": 0.9797, "step": 3735 }, { "epoch": 0.13540647312529447, "grad_norm": 2.354846523538828, "learning_rate": 9.711505399877111e-06, "loss": 0.9043, "step": 3736 }, { "epoch": 0.13544271682795114, "grad_norm": 2.2344474515091934, "learning_rate": 9.711308883412158e-06, "loss": 0.9024, "step": 3737 }, { "epoch": 0.13547896053060782, "grad_norm": 2.414854913479354, "learning_rate": 9.711112302028219e-06, "loss": 1.1864, "step": 3738 }, { "epoch": 0.13551520423326446, "grad_norm": 2.340718516547624, "learning_rate": 9.710915655728e-06, "loss": 1.0518, "step": 3739 }, { "epoch": 0.13555144793592114, "grad_norm": 2.4730969850979476, "learning_rate": 9.710718944514214e-06, "loss": 1.1678, "step": 3740 }, { "epoch": 0.13558769163857778, "grad_norm": 2.2579993719704983, "learning_rate": 9.710522168389569e-06, "loss": 0.8477, "step": 3741 }, { "epoch": 0.13562393534123446, "grad_norm": 2.2481206534018505, "learning_rate": 9.710325327356778e-06, "loss": 0.89, "step": 3742 }, { "epoch": 0.13566017904389113, "grad_norm": 2.601331390500283, "learning_rate": 9.710128421418551e-06, "loss": 0.9959, "step": 3743 }, { "epoch": 0.13569642274654778, "grad_norm": 2.1043147341991957, "learning_rate": 9.709931450577606e-06, "loss": 0.9507, "step": 3744 }, { "epoch": 0.13573266644920445, "grad_norm": 2.5819908701535605, "learning_rate": 9.709734414836652e-06, "loss": 0.9434, "step": 3745 }, { "epoch": 0.13576891015186113, "grad_norm": 2.1221955353786868, "learning_rate": 9.709537314198405e-06, "loss": 0.7996, "step": 3746 }, { "epoch": 0.13580515385451777, "grad_norm": 2.816395275039814, "learning_rate": 9.709340148665583e-06, "loss": 1.0196, "step": 3747 }, { "epoch": 0.13584139755717445, "grad_norm": 2.2693594305244758, "learning_rate": 9.709142918240903e-06, "loss": 0.9928, "step": 3748 }, { "epoch": 0.1358776412598311, "grad_norm": 2.530291648747502, "learning_rate": 9.70894562292708e-06, "loss": 0.8415, "step": 3749 }, { "epoch": 0.13591388496248777, "grad_norm": 2.5605250032930447, "learning_rate": 9.708748262726835e-06, "loss": 1.1231, "step": 3750 }, { "epoch": 0.13595012866514444, "grad_norm": 2.4831349814256027, "learning_rate": 9.708550837642888e-06, "loss": 0.9196, "step": 3751 }, { "epoch": 0.1359863723678011, "grad_norm": 2.4813952817713574, "learning_rate": 9.708353347677956e-06, "loss": 1.0805, "step": 3752 }, { "epoch": 0.13602261607045776, "grad_norm": 2.3787196700802347, "learning_rate": 9.708155792834763e-06, "loss": 1.0556, "step": 3753 }, { "epoch": 0.1360588597731144, "grad_norm": 2.2922676901094134, "learning_rate": 9.70795817311603e-06, "loss": 1.0873, "step": 3754 }, { "epoch": 0.13609510347577108, "grad_norm": 2.4898926270303834, "learning_rate": 9.707760488524483e-06, "loss": 0.9897, "step": 3755 }, { "epoch": 0.13613134717842776, "grad_norm": 2.6043396657623448, "learning_rate": 9.707562739062841e-06, "loss": 1.0383, "step": 3756 }, { "epoch": 0.1361675908810844, "grad_norm": 2.4534179690298807, "learning_rate": 9.707364924733834e-06, "loss": 0.8963, "step": 3757 }, { "epoch": 0.13620383458374108, "grad_norm": 1.9678483832459803, "learning_rate": 9.707167045540181e-06, "loss": 0.855, "step": 3758 }, { "epoch": 0.13624007828639773, "grad_norm": 2.397435935163874, "learning_rate": 9.706969101484617e-06, "loss": 1.0197, "step": 3759 }, { "epoch": 0.1362763219890544, "grad_norm": 2.5135812138990765, "learning_rate": 9.706771092569865e-06, "loss": 0.9354, "step": 3760 }, { "epoch": 0.13631256569171107, "grad_norm": 2.5642517545965298, "learning_rate": 9.706573018798654e-06, "loss": 1.0667, "step": 3761 }, { "epoch": 0.13634880939436772, "grad_norm": 2.241389340690745, "learning_rate": 9.70637488017371e-06, "loss": 0.9442, "step": 3762 }, { "epoch": 0.1363850530970244, "grad_norm": 2.2364604347671606, "learning_rate": 9.70617667669777e-06, "loss": 1.0902, "step": 3763 }, { "epoch": 0.13642129679968107, "grad_norm": 2.512862700729112, "learning_rate": 9.70597840837356e-06, "loss": 1.1277, "step": 3764 }, { "epoch": 0.13645754050233772, "grad_norm": 2.357399377149529, "learning_rate": 9.705780075203815e-06, "loss": 1.0896, "step": 3765 }, { "epoch": 0.1364937842049944, "grad_norm": 2.494553162346752, "learning_rate": 9.705581677191264e-06, "loss": 1.0677, "step": 3766 }, { "epoch": 0.13653002790765104, "grad_norm": 2.336152823365752, "learning_rate": 9.705383214338647e-06, "loss": 1.0945, "step": 3767 }, { "epoch": 0.1365662716103077, "grad_norm": 2.2094641630472953, "learning_rate": 9.705184686648691e-06, "loss": 1.0522, "step": 3768 }, { "epoch": 0.13660251531296438, "grad_norm": 2.882889430367911, "learning_rate": 9.704986094124139e-06, "loss": 0.9671, "step": 3769 }, { "epoch": 0.13663875901562103, "grad_norm": 2.5193671182297326, "learning_rate": 9.704787436767721e-06, "loss": 0.9982, "step": 3770 }, { "epoch": 0.1366750027182777, "grad_norm": 2.3962047401296966, "learning_rate": 9.70458871458218e-06, "loss": 1.158, "step": 3771 }, { "epoch": 0.13671124642093435, "grad_norm": 2.266011687308356, "learning_rate": 9.70438992757025e-06, "loss": 0.8862, "step": 3772 }, { "epoch": 0.13674749012359103, "grad_norm": 2.7485527383144626, "learning_rate": 9.704191075734674e-06, "loss": 1.0477, "step": 3773 }, { "epoch": 0.1367837338262477, "grad_norm": 2.345493225118905, "learning_rate": 9.703992159078189e-06, "loss": 0.8618, "step": 3774 }, { "epoch": 0.13681997752890435, "grad_norm": 2.6248189208447505, "learning_rate": 9.703793177603538e-06, "loss": 0.8338, "step": 3775 }, { "epoch": 0.13685622123156102, "grad_norm": 2.6466633897062812, "learning_rate": 9.70359413131346e-06, "loss": 1.0166, "step": 3776 }, { "epoch": 0.13689246493421767, "grad_norm": 2.4689292736906894, "learning_rate": 9.7033950202107e-06, "loss": 1.0453, "step": 3777 }, { "epoch": 0.13692870863687434, "grad_norm": 2.636388189131, "learning_rate": 9.703195844298002e-06, "loss": 0.9852, "step": 3778 }, { "epoch": 0.13696495233953102, "grad_norm": 2.867776477575247, "learning_rate": 9.702996603578108e-06, "loss": 0.982, "step": 3779 }, { "epoch": 0.13700119604218766, "grad_norm": 2.7359240503468003, "learning_rate": 9.702797298053765e-06, "loss": 1.0689, "step": 3780 }, { "epoch": 0.13703743974484434, "grad_norm": 2.3220606900269805, "learning_rate": 9.702597927727722e-06, "loss": 1.0569, "step": 3781 }, { "epoch": 0.137073683447501, "grad_norm": 2.3474916446822918, "learning_rate": 9.70239849260272e-06, "loss": 1.1104, "step": 3782 }, { "epoch": 0.13710992715015766, "grad_norm": 2.2039387118921625, "learning_rate": 9.702198992681512e-06, "loss": 1.0418, "step": 3783 }, { "epoch": 0.13714617085281433, "grad_norm": 2.362747714918654, "learning_rate": 9.701999427966848e-06, "loss": 1.0258, "step": 3784 }, { "epoch": 0.13718241455547098, "grad_norm": 2.537597036048529, "learning_rate": 9.701799798461472e-06, "loss": 1.0122, "step": 3785 }, { "epoch": 0.13721865825812765, "grad_norm": 2.308900901156811, "learning_rate": 9.701600104168141e-06, "loss": 0.9344, "step": 3786 }, { "epoch": 0.13725490196078433, "grad_norm": 2.622716588267738, "learning_rate": 9.701400345089605e-06, "loss": 1.1566, "step": 3787 }, { "epoch": 0.13729114566344097, "grad_norm": 2.717366692090556, "learning_rate": 9.701200521228613e-06, "loss": 0.8401, "step": 3788 }, { "epoch": 0.13732738936609765, "grad_norm": 2.4272402149907246, "learning_rate": 9.701000632587921e-06, "loss": 0.9284, "step": 3789 }, { "epoch": 0.1373636330687543, "grad_norm": 2.35903602427578, "learning_rate": 9.700800679170284e-06, "loss": 0.931, "step": 3790 }, { "epoch": 0.13739987677141097, "grad_norm": 2.255489744914091, "learning_rate": 9.700600660978457e-06, "loss": 0.8848, "step": 3791 }, { "epoch": 0.13743612047406764, "grad_norm": 2.5085595683304627, "learning_rate": 9.700400578015196e-06, "loss": 1.0997, "step": 3792 }, { "epoch": 0.1374723641767243, "grad_norm": 2.2543211405694894, "learning_rate": 9.700200430283258e-06, "loss": 1.0216, "step": 3793 }, { "epoch": 0.13750860787938096, "grad_norm": 2.2823388029325478, "learning_rate": 9.700000217785398e-06, "loss": 0.9691, "step": 3794 }, { "epoch": 0.1375448515820376, "grad_norm": 2.517651941585846, "learning_rate": 9.69979994052438e-06, "loss": 0.8892, "step": 3795 }, { "epoch": 0.13758109528469428, "grad_norm": 2.2287488872026073, "learning_rate": 9.69959959850296e-06, "loss": 0.8866, "step": 3796 }, { "epoch": 0.13761733898735096, "grad_norm": 2.688375213088761, "learning_rate": 9.699399191723901e-06, "loss": 1.0463, "step": 3797 }, { "epoch": 0.1376535826900076, "grad_norm": 2.0690924036627276, "learning_rate": 9.699198720189961e-06, "loss": 0.8369, "step": 3798 }, { "epoch": 0.13768982639266428, "grad_norm": 2.5984742423051843, "learning_rate": 9.698998183903907e-06, "loss": 1.2518, "step": 3799 }, { "epoch": 0.13772607009532095, "grad_norm": 2.1432809588561703, "learning_rate": 9.6987975828685e-06, "loss": 0.9819, "step": 3800 }, { "epoch": 0.1377623137979776, "grad_norm": 2.455166694636187, "learning_rate": 9.698596917086502e-06, "loss": 0.9485, "step": 3801 }, { "epoch": 0.13779855750063427, "grad_norm": 2.553684677660536, "learning_rate": 9.698396186560681e-06, "loss": 1.0673, "step": 3802 }, { "epoch": 0.13783480120329092, "grad_norm": 2.470105985166476, "learning_rate": 9.698195391293803e-06, "loss": 1.0043, "step": 3803 }, { "epoch": 0.1378710449059476, "grad_norm": 2.440601542787508, "learning_rate": 9.697994531288632e-06, "loss": 0.9864, "step": 3804 }, { "epoch": 0.13790728860860427, "grad_norm": 2.3176600364085256, "learning_rate": 9.69779360654794e-06, "loss": 0.7974, "step": 3805 }, { "epoch": 0.1379435323112609, "grad_norm": 2.4469546183875908, "learning_rate": 9.697592617074491e-06, "loss": 0.8241, "step": 3806 }, { "epoch": 0.1379797760139176, "grad_norm": 2.7108959651534743, "learning_rate": 9.697391562871058e-06, "loss": 1.0164, "step": 3807 }, { "epoch": 0.13801601971657423, "grad_norm": 2.495319574919084, "learning_rate": 9.69719044394041e-06, "loss": 1.1136, "step": 3808 }, { "epoch": 0.1380522634192309, "grad_norm": 2.5683389485353034, "learning_rate": 9.696989260285319e-06, "loss": 1.0556, "step": 3809 }, { "epoch": 0.13808850712188758, "grad_norm": 2.3864172943310367, "learning_rate": 9.696788011908555e-06, "loss": 0.8999, "step": 3810 }, { "epoch": 0.13812475082454423, "grad_norm": 2.0250024057304254, "learning_rate": 9.696586698812893e-06, "loss": 0.9028, "step": 3811 }, { "epoch": 0.1381609945272009, "grad_norm": 2.41681164313043, "learning_rate": 9.696385321001108e-06, "loss": 1.0072, "step": 3812 }, { "epoch": 0.13819723822985755, "grad_norm": 2.500581531660897, "learning_rate": 9.696183878475973e-06, "loss": 0.7691, "step": 3813 }, { "epoch": 0.13823348193251422, "grad_norm": 2.2225532526878338, "learning_rate": 9.695982371240264e-06, "loss": 0.7178, "step": 3814 }, { "epoch": 0.1382697256351709, "grad_norm": 2.431418855254105, "learning_rate": 9.695780799296759e-06, "loss": 0.999, "step": 3815 }, { "epoch": 0.13830596933782754, "grad_norm": 2.3940754897271956, "learning_rate": 9.695579162648234e-06, "loss": 0.9801, "step": 3816 }, { "epoch": 0.13834221304048422, "grad_norm": 2.718951688526831, "learning_rate": 9.695377461297468e-06, "loss": 1.0702, "step": 3817 }, { "epoch": 0.1383784567431409, "grad_norm": 2.4426220904053024, "learning_rate": 9.695175695247238e-06, "loss": 0.9499, "step": 3818 }, { "epoch": 0.13841470044579754, "grad_norm": 2.2995475586164575, "learning_rate": 9.69497386450033e-06, "loss": 0.9592, "step": 3819 }, { "epoch": 0.1384509441484542, "grad_norm": 2.7890170321606487, "learning_rate": 9.694771969059519e-06, "loss": 1.0502, "step": 3820 }, { "epoch": 0.13848718785111086, "grad_norm": 2.182569503498445, "learning_rate": 9.69457000892759e-06, "loss": 0.8133, "step": 3821 }, { "epoch": 0.13852343155376753, "grad_norm": 2.2532824029221157, "learning_rate": 9.694367984107326e-06, "loss": 0.9429, "step": 3822 }, { "epoch": 0.1385596752564242, "grad_norm": 2.7602599262768144, "learning_rate": 9.69416589460151e-06, "loss": 1.1762, "step": 3823 }, { "epoch": 0.13859591895908085, "grad_norm": 2.3733764107131505, "learning_rate": 9.693963740412929e-06, "loss": 1.0399, "step": 3824 }, { "epoch": 0.13863216266173753, "grad_norm": 2.1919709558929803, "learning_rate": 9.693761521544364e-06, "loss": 1.0872, "step": 3825 }, { "epoch": 0.13866840636439418, "grad_norm": 2.6627603050556896, "learning_rate": 9.693559237998604e-06, "loss": 0.9542, "step": 3826 }, { "epoch": 0.13870465006705085, "grad_norm": 2.4462860969833145, "learning_rate": 9.693356889778437e-06, "loss": 1.0571, "step": 3827 }, { "epoch": 0.13874089376970752, "grad_norm": 2.4713777096334995, "learning_rate": 9.69315447688665e-06, "loss": 0.89, "step": 3828 }, { "epoch": 0.13877713747236417, "grad_norm": 2.3109823726769636, "learning_rate": 9.692951999326032e-06, "loss": 1.0918, "step": 3829 }, { "epoch": 0.13881338117502084, "grad_norm": 2.8473627313964314, "learning_rate": 9.692749457099375e-06, "loss": 0.9785, "step": 3830 }, { "epoch": 0.1388496248776775, "grad_norm": 2.307989492890909, "learning_rate": 9.692546850209468e-06, "loss": 0.9355, "step": 3831 }, { "epoch": 0.13888586858033417, "grad_norm": 2.64241684804098, "learning_rate": 9.692344178659104e-06, "loss": 0.8814, "step": 3832 }, { "epoch": 0.13892211228299084, "grad_norm": 2.8310661204210175, "learning_rate": 9.692141442451076e-06, "loss": 0.9654, "step": 3833 }, { "epoch": 0.13895835598564749, "grad_norm": 2.2082347197397443, "learning_rate": 9.691938641588174e-06, "loss": 0.9931, "step": 3834 }, { "epoch": 0.13899459968830416, "grad_norm": 2.5308364284347777, "learning_rate": 9.691735776073197e-06, "loss": 0.883, "step": 3835 }, { "epoch": 0.13903084339096083, "grad_norm": 2.784602665205731, "learning_rate": 9.691532845908937e-06, "loss": 1.0411, "step": 3836 }, { "epoch": 0.13906708709361748, "grad_norm": 2.256102441961072, "learning_rate": 9.691329851098193e-06, "loss": 1.0962, "step": 3837 }, { "epoch": 0.13910333079627415, "grad_norm": 2.4181279643893707, "learning_rate": 9.691126791643761e-06, "loss": 0.96, "step": 3838 }, { "epoch": 0.1391395744989308, "grad_norm": 2.308489970789059, "learning_rate": 9.690923667548438e-06, "loss": 1.0937, "step": 3839 }, { "epoch": 0.13917581820158748, "grad_norm": 2.678231281934654, "learning_rate": 9.690720478815023e-06, "loss": 1.1574, "step": 3840 }, { "epoch": 0.13921206190424415, "grad_norm": 2.3387084542587044, "learning_rate": 9.69051722544632e-06, "loss": 0.826, "step": 3841 }, { "epoch": 0.1392483056069008, "grad_norm": 2.8152316013933185, "learning_rate": 9.690313907445124e-06, "loss": 1.0894, "step": 3842 }, { "epoch": 0.13928454930955747, "grad_norm": 2.2407831681283024, "learning_rate": 9.690110524814238e-06, "loss": 1.0529, "step": 3843 }, { "epoch": 0.13932079301221412, "grad_norm": 2.4280024026830795, "learning_rate": 9.689907077556467e-06, "loss": 1.0264, "step": 3844 }, { "epoch": 0.1393570367148708, "grad_norm": 2.527649536437719, "learning_rate": 9.689703565674612e-06, "loss": 1.0316, "step": 3845 }, { "epoch": 0.13939328041752747, "grad_norm": 2.4377552714014805, "learning_rate": 9.689499989171478e-06, "loss": 0.9389, "step": 3846 }, { "epoch": 0.1394295241201841, "grad_norm": 2.162191470604198, "learning_rate": 9.689296348049871e-06, "loss": 1.1077, "step": 3847 }, { "epoch": 0.13946576782284079, "grad_norm": 2.412416244082557, "learning_rate": 9.689092642312597e-06, "loss": 1.0876, "step": 3848 }, { "epoch": 0.13950201152549743, "grad_norm": 2.2838687867251837, "learning_rate": 9.68888887196246e-06, "loss": 0.9891, "step": 3849 }, { "epoch": 0.1395382552281541, "grad_norm": 2.642784345344728, "learning_rate": 9.688685037002272e-06, "loss": 0.9901, "step": 3850 }, { "epoch": 0.13957449893081078, "grad_norm": 2.5477428467134233, "learning_rate": 9.68848113743484e-06, "loss": 1.0499, "step": 3851 }, { "epoch": 0.13961074263346743, "grad_norm": 2.543692309826104, "learning_rate": 9.688277173262972e-06, "loss": 1.0163, "step": 3852 }, { "epoch": 0.1396469863361241, "grad_norm": 2.5031820510508287, "learning_rate": 9.688073144489481e-06, "loss": 0.9266, "step": 3853 }, { "epoch": 0.13968323003878078, "grad_norm": 2.5583600692374815, "learning_rate": 9.687869051117177e-06, "loss": 1.0874, "step": 3854 }, { "epoch": 0.13971947374143742, "grad_norm": 2.1416282403247475, "learning_rate": 9.687664893148874e-06, "loss": 0.8212, "step": 3855 }, { "epoch": 0.1397557174440941, "grad_norm": 2.4817743014396174, "learning_rate": 9.68746067058738e-06, "loss": 1.0389, "step": 3856 }, { "epoch": 0.13979196114675074, "grad_norm": 2.134288991673551, "learning_rate": 9.687256383435518e-06, "loss": 0.8892, "step": 3857 }, { "epoch": 0.13982820484940742, "grad_norm": 2.1880123827733398, "learning_rate": 9.687052031696094e-06, "loss": 1.0725, "step": 3858 }, { "epoch": 0.1398644485520641, "grad_norm": 2.693930716748389, "learning_rate": 9.686847615371929e-06, "loss": 0.9281, "step": 3859 }, { "epoch": 0.13990069225472074, "grad_norm": 2.4721752481980737, "learning_rate": 9.686643134465837e-06, "loss": 0.9591, "step": 3860 }, { "epoch": 0.1399369359573774, "grad_norm": 2.467218945278576, "learning_rate": 9.686438588980638e-06, "loss": 0.8795, "step": 3861 }, { "epoch": 0.13997317966003406, "grad_norm": 2.6273536934383928, "learning_rate": 9.686233978919151e-06, "loss": 0.8552, "step": 3862 }, { "epoch": 0.14000942336269073, "grad_norm": 2.3005499416549506, "learning_rate": 9.686029304284192e-06, "loss": 1.0248, "step": 3863 }, { "epoch": 0.1400456670653474, "grad_norm": 2.2711589838943143, "learning_rate": 9.685824565078583e-06, "loss": 0.8405, "step": 3864 }, { "epoch": 0.14008191076800405, "grad_norm": 2.6828863928113167, "learning_rate": 9.685619761305145e-06, "loss": 0.9095, "step": 3865 }, { "epoch": 0.14011815447066073, "grad_norm": 1.9859013787387154, "learning_rate": 9.685414892966702e-06, "loss": 0.9679, "step": 3866 }, { "epoch": 0.14015439817331737, "grad_norm": 2.1657767785668476, "learning_rate": 9.685209960066075e-06, "loss": 0.8073, "step": 3867 }, { "epoch": 0.14019064187597405, "grad_norm": 2.6134009753069023, "learning_rate": 9.685004962606089e-06, "loss": 1.0632, "step": 3868 }, { "epoch": 0.14022688557863072, "grad_norm": 2.716257672847126, "learning_rate": 9.684799900589568e-06, "loss": 0.9641, "step": 3869 }, { "epoch": 0.14026312928128737, "grad_norm": 2.5280891824275917, "learning_rate": 9.684594774019336e-06, "loss": 1.1005, "step": 3870 }, { "epoch": 0.14029937298394404, "grad_norm": 2.622605252959637, "learning_rate": 9.684389582898221e-06, "loss": 1.0675, "step": 3871 }, { "epoch": 0.1403356166866007, "grad_norm": 2.3285803807592806, "learning_rate": 9.684184327229054e-06, "loss": 0.9648, "step": 3872 }, { "epoch": 0.14037186038925736, "grad_norm": 2.580957073060303, "learning_rate": 9.683979007014656e-06, "loss": 0.9618, "step": 3873 }, { "epoch": 0.14040810409191404, "grad_norm": 2.3014797916524667, "learning_rate": 9.683773622257862e-06, "loss": 1.08, "step": 3874 }, { "epoch": 0.14044434779457068, "grad_norm": 2.4616824110941304, "learning_rate": 9.6835681729615e-06, "loss": 0.7012, "step": 3875 }, { "epoch": 0.14048059149722736, "grad_norm": 2.385366327087306, "learning_rate": 9.6833626591284e-06, "loss": 0.9737, "step": 3876 }, { "epoch": 0.14051683519988403, "grad_norm": 2.3808109941769557, "learning_rate": 9.683157080761396e-06, "loss": 0.9305, "step": 3877 }, { "epoch": 0.14055307890254068, "grad_norm": 2.3259076384585473, "learning_rate": 9.682951437863319e-06, "loss": 1.0194, "step": 3878 }, { "epoch": 0.14058932260519735, "grad_norm": 2.1229016637766964, "learning_rate": 9.682745730437005e-06, "loss": 0.9815, "step": 3879 }, { "epoch": 0.140625566307854, "grad_norm": 2.351499129978914, "learning_rate": 9.682539958485286e-06, "loss": 0.9871, "step": 3880 }, { "epoch": 0.14066181001051067, "grad_norm": 2.3497730896376425, "learning_rate": 9.682334122010997e-06, "loss": 1.0169, "step": 3881 }, { "epoch": 0.14069805371316735, "grad_norm": 2.3648578668638534, "learning_rate": 9.682128221016978e-06, "loss": 0.9053, "step": 3882 }, { "epoch": 0.140734297415824, "grad_norm": 2.7193531082100955, "learning_rate": 9.681922255506062e-06, "loss": 0.9115, "step": 3883 }, { "epoch": 0.14077054111848067, "grad_norm": 2.5765595342373717, "learning_rate": 9.681716225481088e-06, "loss": 0.9346, "step": 3884 }, { "epoch": 0.14080678482113732, "grad_norm": 2.5711466477988476, "learning_rate": 9.681510130944898e-06, "loss": 1.042, "step": 3885 }, { "epoch": 0.140843028523794, "grad_norm": 2.573184377469163, "learning_rate": 9.681303971900328e-06, "loss": 1.0791, "step": 3886 }, { "epoch": 0.14087927222645066, "grad_norm": 2.415239216629097, "learning_rate": 9.68109774835022e-06, "loss": 1.0794, "step": 3887 }, { "epoch": 0.1409155159291073, "grad_norm": 2.26131469514168, "learning_rate": 9.680891460297419e-06, "loss": 1.094, "step": 3888 }, { "epoch": 0.14095175963176398, "grad_norm": 2.8252755765583615, "learning_rate": 9.680685107744762e-06, "loss": 1.0091, "step": 3889 }, { "epoch": 0.14098800333442063, "grad_norm": 2.7902729233164405, "learning_rate": 9.680478690695097e-06, "loss": 0.9936, "step": 3890 }, { "epoch": 0.1410242470370773, "grad_norm": 2.4812471808779133, "learning_rate": 9.680272209151265e-06, "loss": 0.9602, "step": 3891 }, { "epoch": 0.14106049073973398, "grad_norm": 2.6021404464442957, "learning_rate": 9.68006566311611e-06, "loss": 0.7032, "step": 3892 }, { "epoch": 0.14109673444239063, "grad_norm": 2.4618446652444033, "learning_rate": 9.679859052592486e-06, "loss": 0.9064, "step": 3893 }, { "epoch": 0.1411329781450473, "grad_norm": 2.32768159470571, "learning_rate": 9.67965237758323e-06, "loss": 0.9215, "step": 3894 }, { "epoch": 0.14116922184770397, "grad_norm": 2.2856967545464566, "learning_rate": 9.679445638091197e-06, "loss": 1.1108, "step": 3895 }, { "epoch": 0.14120546555036062, "grad_norm": 2.0417750983886553, "learning_rate": 9.679238834119233e-06, "loss": 0.8493, "step": 3896 }, { "epoch": 0.1412417092530173, "grad_norm": 2.553032387170253, "learning_rate": 9.679031965670188e-06, "loss": 0.9544, "step": 3897 }, { "epoch": 0.14127795295567394, "grad_norm": 2.1653166106656885, "learning_rate": 9.67882503274691e-06, "loss": 0.9038, "step": 3898 }, { "epoch": 0.14131419665833062, "grad_norm": 2.50937610412465, "learning_rate": 9.678618035352253e-06, "loss": 0.8245, "step": 3899 }, { "epoch": 0.1413504403609873, "grad_norm": 2.417172417720909, "learning_rate": 9.67841097348907e-06, "loss": 1.0714, "step": 3900 }, { "epoch": 0.14138668406364394, "grad_norm": 2.100733345556698, "learning_rate": 9.678203847160215e-06, "loss": 1.0293, "step": 3901 }, { "epoch": 0.1414229277663006, "grad_norm": 2.288785448303611, "learning_rate": 9.677996656368538e-06, "loss": 0.9359, "step": 3902 }, { "epoch": 0.14145917146895726, "grad_norm": 2.578634597649807, "learning_rate": 9.677789401116896e-06, "loss": 1.2017, "step": 3903 }, { "epoch": 0.14149541517161393, "grad_norm": 2.420132457962244, "learning_rate": 9.677582081408146e-06, "loss": 1.0571, "step": 3904 }, { "epoch": 0.1415316588742706, "grad_norm": 2.558826300934455, "learning_rate": 9.677374697245144e-06, "loss": 1.0717, "step": 3905 }, { "epoch": 0.14156790257692725, "grad_norm": 2.475192189274232, "learning_rate": 9.677167248630747e-06, "loss": 0.9501, "step": 3906 }, { "epoch": 0.14160414627958393, "grad_norm": 2.5553380031985853, "learning_rate": 9.676959735567814e-06, "loss": 0.927, "step": 3907 }, { "epoch": 0.14164038998224057, "grad_norm": 2.378545822721385, "learning_rate": 9.676752158059205e-06, "loss": 1.0391, "step": 3908 }, { "epoch": 0.14167663368489725, "grad_norm": 2.3290450839601107, "learning_rate": 9.67654451610778e-06, "loss": 0.9457, "step": 3909 }, { "epoch": 0.14171287738755392, "grad_norm": 2.2397203870827833, "learning_rate": 9.676336809716398e-06, "loss": 0.8322, "step": 3910 }, { "epoch": 0.14174912109021057, "grad_norm": 2.25538123103938, "learning_rate": 9.676129038887924e-06, "loss": 1.1173, "step": 3911 }, { "epoch": 0.14178536479286724, "grad_norm": 2.568820100890948, "learning_rate": 9.67592120362522e-06, "loss": 1.1516, "step": 3912 }, { "epoch": 0.14182160849552392, "grad_norm": 2.4829307801304776, "learning_rate": 9.67571330393115e-06, "loss": 0.9538, "step": 3913 }, { "epoch": 0.14185785219818056, "grad_norm": 2.6395489111917207, "learning_rate": 9.675505339808578e-06, "loss": 0.868, "step": 3914 }, { "epoch": 0.14189409590083724, "grad_norm": 2.273791601651863, "learning_rate": 9.67529731126037e-06, "loss": 0.9031, "step": 3915 }, { "epoch": 0.14193033960349388, "grad_norm": 2.486950869383339, "learning_rate": 9.675089218289393e-06, "loss": 1.0566, "step": 3916 }, { "epoch": 0.14196658330615056, "grad_norm": 2.0386811593187613, "learning_rate": 9.674881060898514e-06, "loss": 1.0171, "step": 3917 }, { "epoch": 0.14200282700880723, "grad_norm": 2.6327409328691997, "learning_rate": 9.6746728390906e-06, "loss": 0.9802, "step": 3918 }, { "epoch": 0.14203907071146388, "grad_norm": 2.5800452378832817, "learning_rate": 9.674464552868522e-06, "loss": 0.7841, "step": 3919 }, { "epoch": 0.14207531441412055, "grad_norm": 2.703116497713802, "learning_rate": 9.67425620223515e-06, "loss": 0.9779, "step": 3920 }, { "epoch": 0.1421115581167772, "grad_norm": 2.452891920495335, "learning_rate": 9.674047787193355e-06, "loss": 1.0158, "step": 3921 }, { "epoch": 0.14214780181943387, "grad_norm": 2.080946330836233, "learning_rate": 9.673839307746006e-06, "loss": 0.955, "step": 3922 }, { "epoch": 0.14218404552209055, "grad_norm": 2.4486919994275302, "learning_rate": 9.67363076389598e-06, "loss": 0.8043, "step": 3923 }, { "epoch": 0.1422202892247472, "grad_norm": 2.379785426350665, "learning_rate": 9.673422155646148e-06, "loss": 0.922, "step": 3924 }, { "epoch": 0.14225653292740387, "grad_norm": 2.6169619946754126, "learning_rate": 9.673213482999386e-06, "loss": 1.0642, "step": 3925 }, { "epoch": 0.1422927766300605, "grad_norm": 2.41286418463574, "learning_rate": 9.673004745958567e-06, "loss": 1.1005, "step": 3926 }, { "epoch": 0.1423290203327172, "grad_norm": 2.6455850965771566, "learning_rate": 9.67279594452657e-06, "loss": 1.0343, "step": 3927 }, { "epoch": 0.14236526403537386, "grad_norm": 2.5785304271776255, "learning_rate": 9.67258707870627e-06, "loss": 1.117, "step": 3928 }, { "epoch": 0.1424015077380305, "grad_norm": 2.386420271437168, "learning_rate": 9.672378148500545e-06, "loss": 0.8172, "step": 3929 }, { "epoch": 0.14243775144068718, "grad_norm": 2.2938196860266835, "learning_rate": 9.672169153912277e-06, "loss": 0.964, "step": 3930 }, { "epoch": 0.14247399514334386, "grad_norm": 2.3822291099812234, "learning_rate": 9.671960094944342e-06, "loss": 0.8359, "step": 3931 }, { "epoch": 0.1425102388460005, "grad_norm": 2.502011606858559, "learning_rate": 9.671750971599624e-06, "loss": 0.9788, "step": 3932 }, { "epoch": 0.14254648254865718, "grad_norm": 2.3017274721055476, "learning_rate": 9.671541783881002e-06, "loss": 0.9381, "step": 3933 }, { "epoch": 0.14258272625131382, "grad_norm": 13.764803379597643, "learning_rate": 9.67133253179136e-06, "loss": 1.9128, "step": 3934 }, { "epoch": 0.1426189699539705, "grad_norm": 2.255361924433702, "learning_rate": 9.67112321533358e-06, "loss": 0.9482, "step": 3935 }, { "epoch": 0.14265521365662717, "grad_norm": 2.31026121994246, "learning_rate": 9.670913834510547e-06, "loss": 0.8378, "step": 3936 }, { "epoch": 0.14269145735928382, "grad_norm": 2.331911867198104, "learning_rate": 9.670704389325148e-06, "loss": 1.2002, "step": 3937 }, { "epoch": 0.1427277010619405, "grad_norm": 2.4178968273740056, "learning_rate": 9.670494879780266e-06, "loss": 1.1347, "step": 3938 }, { "epoch": 0.14276394476459714, "grad_norm": 2.3853480010239196, "learning_rate": 9.67028530587879e-06, "loss": 1.0272, "step": 3939 }, { "epoch": 0.1428001884672538, "grad_norm": 2.534133548191693, "learning_rate": 9.670075667623606e-06, "loss": 0.9332, "step": 3940 }, { "epoch": 0.1428364321699105, "grad_norm": 2.66572910326298, "learning_rate": 9.669865965017604e-06, "loss": 1.0331, "step": 3941 }, { "epoch": 0.14287267587256713, "grad_norm": 2.4822290790944996, "learning_rate": 9.669656198063672e-06, "loss": 0.96, "step": 3942 }, { "epoch": 0.1429089195752238, "grad_norm": 2.6226241867051514, "learning_rate": 9.669446366764704e-06, "loss": 1.1282, "step": 3943 }, { "epoch": 0.14294516327788045, "grad_norm": 2.252559942254382, "learning_rate": 9.669236471123587e-06, "loss": 1.0218, "step": 3944 }, { "epoch": 0.14298140698053713, "grad_norm": 2.30940792609533, "learning_rate": 9.669026511143217e-06, "loss": 0.9993, "step": 3945 }, { "epoch": 0.1430176506831938, "grad_norm": 2.4683635742028227, "learning_rate": 9.668816486826484e-06, "loss": 1.0181, "step": 3946 }, { "epoch": 0.14305389438585045, "grad_norm": 2.280958355039969, "learning_rate": 9.668606398176286e-06, "loss": 0.9638, "step": 3947 }, { "epoch": 0.14309013808850712, "grad_norm": 2.509522842599584, "learning_rate": 9.668396245195512e-06, "loss": 0.9674, "step": 3948 }, { "epoch": 0.1431263817911638, "grad_norm": 2.2007199790752887, "learning_rate": 9.668186027887064e-06, "loss": 0.8327, "step": 3949 }, { "epoch": 0.14316262549382044, "grad_norm": 2.343329249670777, "learning_rate": 9.667975746253832e-06, "loss": 0.7824, "step": 3950 }, { "epoch": 0.14319886919647712, "grad_norm": 2.4637985172118344, "learning_rate": 9.667765400298721e-06, "loss": 1.142, "step": 3951 }, { "epoch": 0.14323511289913377, "grad_norm": 2.4191677087196055, "learning_rate": 9.667554990024624e-06, "loss": 1.1244, "step": 3952 }, { "epoch": 0.14327135660179044, "grad_norm": 2.368571057394126, "learning_rate": 9.667344515434443e-06, "loss": 0.8182, "step": 3953 }, { "epoch": 0.1433076003044471, "grad_norm": 2.619596863159179, "learning_rate": 9.667133976531076e-06, "loss": 0.9843, "step": 3954 }, { "epoch": 0.14334384400710376, "grad_norm": 2.5575344446588177, "learning_rate": 9.666923373317425e-06, "loss": 0.9391, "step": 3955 }, { "epoch": 0.14338008770976043, "grad_norm": 2.5861985366840203, "learning_rate": 9.666712705796395e-06, "loss": 1.0354, "step": 3956 }, { "epoch": 0.14341633141241708, "grad_norm": 2.7087138198098155, "learning_rate": 9.666501973970886e-06, "loss": 1.1285, "step": 3957 }, { "epoch": 0.14345257511507375, "grad_norm": 2.482341550450307, "learning_rate": 9.6662911778438e-06, "loss": 1.0181, "step": 3958 }, { "epoch": 0.14348881881773043, "grad_norm": 2.733604664589295, "learning_rate": 9.666080317418044e-06, "loss": 1.1059, "step": 3959 }, { "epoch": 0.14352506252038708, "grad_norm": 2.5450053080134296, "learning_rate": 9.665869392696523e-06, "loss": 1.1866, "step": 3960 }, { "epoch": 0.14356130622304375, "grad_norm": 2.483672884547795, "learning_rate": 9.665658403682145e-06, "loss": 1.122, "step": 3961 }, { "epoch": 0.1435975499257004, "grad_norm": 2.668494300267249, "learning_rate": 9.665447350377814e-06, "loss": 0.7618, "step": 3962 }, { "epoch": 0.14363379362835707, "grad_norm": 2.8256481837762095, "learning_rate": 9.665236232786442e-06, "loss": 1.1794, "step": 3963 }, { "epoch": 0.14367003733101374, "grad_norm": 2.6064783040340704, "learning_rate": 9.665025050910935e-06, "loss": 0.9275, "step": 3964 }, { "epoch": 0.1437062810336704, "grad_norm": 2.5037526175665294, "learning_rate": 9.664813804754206e-06, "loss": 0.8411, "step": 3965 }, { "epoch": 0.14374252473632707, "grad_norm": 2.381386524470642, "learning_rate": 9.664602494319163e-06, "loss": 0.8188, "step": 3966 }, { "epoch": 0.14377876843898374, "grad_norm": 2.428948504792508, "learning_rate": 9.664391119608717e-06, "loss": 1.0828, "step": 3967 }, { "epoch": 0.14381501214164039, "grad_norm": 2.183557509768795, "learning_rate": 9.664179680625783e-06, "loss": 0.9194, "step": 3968 }, { "epoch": 0.14385125584429706, "grad_norm": 2.531135678033514, "learning_rate": 9.663968177373275e-06, "loss": 1.2107, "step": 3969 }, { "epoch": 0.1438874995469537, "grad_norm": 2.58377728816897, "learning_rate": 9.663756609854105e-06, "loss": 1.015, "step": 3970 }, { "epoch": 0.14392374324961038, "grad_norm": 2.3381036361724545, "learning_rate": 9.663544978071189e-06, "loss": 1.0654, "step": 3971 }, { "epoch": 0.14395998695226705, "grad_norm": 2.2857695425077904, "learning_rate": 9.663333282027444e-06, "loss": 1.0273, "step": 3972 }, { "epoch": 0.1439962306549237, "grad_norm": 2.7592056523817408, "learning_rate": 9.663121521725788e-06, "loss": 1.0891, "step": 3973 }, { "epoch": 0.14403247435758038, "grad_norm": 2.559423058120122, "learning_rate": 9.662909697169136e-06, "loss": 1.0565, "step": 3974 }, { "epoch": 0.14406871806023702, "grad_norm": 2.712455015172437, "learning_rate": 9.662697808360411e-06, "loss": 1.0534, "step": 3975 }, { "epoch": 0.1441049617628937, "grad_norm": 2.358791123792647, "learning_rate": 9.662485855302527e-06, "loss": 1.053, "step": 3976 }, { "epoch": 0.14414120546555037, "grad_norm": 2.7617163996045764, "learning_rate": 9.66227383799841e-06, "loss": 0.9613, "step": 3977 }, { "epoch": 0.14417744916820702, "grad_norm": 2.2956287956899866, "learning_rate": 9.662061756450978e-06, "loss": 0.9685, "step": 3978 }, { "epoch": 0.1442136928708637, "grad_norm": 2.4217642306895573, "learning_rate": 9.661849610663154e-06, "loss": 0.8977, "step": 3979 }, { "epoch": 0.14424993657352034, "grad_norm": 2.418865025737309, "learning_rate": 9.661637400637862e-06, "loss": 1.0916, "step": 3980 }, { "epoch": 0.144286180276177, "grad_norm": 2.5737381603403486, "learning_rate": 9.661425126378026e-06, "loss": 1.3239, "step": 3981 }, { "epoch": 0.14432242397883369, "grad_norm": 2.2518341903407735, "learning_rate": 9.66121278788657e-06, "loss": 1.0335, "step": 3982 }, { "epoch": 0.14435866768149033, "grad_norm": 2.155110738195225, "learning_rate": 9.661000385166423e-06, "loss": 0.8775, "step": 3983 }, { "epoch": 0.144394911384147, "grad_norm": 2.37658442408968, "learning_rate": 9.660787918220508e-06, "loss": 1.0879, "step": 3984 }, { "epoch": 0.14443115508680368, "grad_norm": 2.675320695429557, "learning_rate": 9.660575387051756e-06, "loss": 0.8996, "step": 3985 }, { "epoch": 0.14446739878946033, "grad_norm": 2.5814094335753226, "learning_rate": 9.660362791663092e-06, "loss": 0.9411, "step": 3986 }, { "epoch": 0.144503642492117, "grad_norm": 2.2972350680057154, "learning_rate": 9.660150132057449e-06, "loss": 0.8911, "step": 3987 }, { "epoch": 0.14453988619477365, "grad_norm": 2.6074251662043504, "learning_rate": 9.659937408237754e-06, "loss": 0.9904, "step": 3988 }, { "epoch": 0.14457612989743032, "grad_norm": 2.408752228275277, "learning_rate": 9.65972462020694e-06, "loss": 0.8648, "step": 3989 }, { "epoch": 0.144612373600087, "grad_norm": 3.1660438456017514, "learning_rate": 9.659511767967939e-06, "loss": 0.91, "step": 3990 }, { "epoch": 0.14464861730274364, "grad_norm": 2.4115622363992206, "learning_rate": 9.659298851523684e-06, "loss": 1.1148, "step": 3991 }, { "epoch": 0.14468486100540032, "grad_norm": 2.3959241706005527, "learning_rate": 9.659085870877109e-06, "loss": 1.0245, "step": 3992 }, { "epoch": 0.14472110470805696, "grad_norm": 2.483168331642729, "learning_rate": 9.658872826031147e-06, "loss": 0.9733, "step": 3993 }, { "epoch": 0.14475734841071364, "grad_norm": 2.5172096760728033, "learning_rate": 9.658659716988737e-06, "loss": 1.0286, "step": 3994 }, { "epoch": 0.1447935921133703, "grad_norm": 2.68995817503405, "learning_rate": 9.658446543752812e-06, "loss": 0.9548, "step": 3995 }, { "epoch": 0.14482983581602696, "grad_norm": 2.5410921931353725, "learning_rate": 9.658233306326311e-06, "loss": 1.1235, "step": 3996 }, { "epoch": 0.14486607951868363, "grad_norm": 2.1658945776760516, "learning_rate": 9.658020004712172e-06, "loss": 0.7992, "step": 3997 }, { "epoch": 0.14490232322134028, "grad_norm": 2.6639436632975446, "learning_rate": 9.657806638913336e-06, "loss": 0.9707, "step": 3998 }, { "epoch": 0.14493856692399695, "grad_norm": 2.199382650109513, "learning_rate": 9.657593208932741e-06, "loss": 1.0611, "step": 3999 }, { "epoch": 0.14497481062665363, "grad_norm": 2.8705490126739135, "learning_rate": 9.657379714773327e-06, "loss": 1.0473, "step": 4000 }, { "epoch": 0.14501105432931027, "grad_norm": 2.4649094893360446, "learning_rate": 9.657166156438039e-06, "loss": 1.1173, "step": 4001 }, { "epoch": 0.14504729803196695, "grad_norm": 2.0471653307879616, "learning_rate": 9.656952533929818e-06, "loss": 0.84, "step": 4002 }, { "epoch": 0.14508354173462362, "grad_norm": 2.671341865556314, "learning_rate": 9.656738847251606e-06, "loss": 1.3152, "step": 4003 }, { "epoch": 0.14511978543728027, "grad_norm": 2.7739740723689033, "learning_rate": 9.65652509640635e-06, "loss": 0.9748, "step": 4004 }, { "epoch": 0.14515602913993694, "grad_norm": 2.473498229254919, "learning_rate": 9.656311281396994e-06, "loss": 0.9755, "step": 4005 }, { "epoch": 0.1451922728425936, "grad_norm": 2.3791582904175943, "learning_rate": 9.656097402226486e-06, "loss": 0.9838, "step": 4006 }, { "epoch": 0.14522851654525026, "grad_norm": 2.497528584449659, "learning_rate": 9.655883458897769e-06, "loss": 1.0299, "step": 4007 }, { "epoch": 0.14526476024790694, "grad_norm": 2.512131666346431, "learning_rate": 9.655669451413795e-06, "loss": 1.1257, "step": 4008 }, { "epoch": 0.14530100395056358, "grad_norm": 2.0657787878850895, "learning_rate": 9.655455379777511e-06, "loss": 0.8313, "step": 4009 }, { "epoch": 0.14533724765322026, "grad_norm": 2.3825067516647853, "learning_rate": 9.655241243991868e-06, "loss": 1.0012, "step": 4010 }, { "epoch": 0.1453734913558769, "grad_norm": 2.3571548538197806, "learning_rate": 9.655027044059817e-06, "loss": 0.8463, "step": 4011 }, { "epoch": 0.14540973505853358, "grad_norm": 2.4479422272304956, "learning_rate": 9.654812779984308e-06, "loss": 0.954, "step": 4012 }, { "epoch": 0.14544597876119025, "grad_norm": 2.0049604192437487, "learning_rate": 9.654598451768294e-06, "loss": 0.8618, "step": 4013 }, { "epoch": 0.1454822224638469, "grad_norm": 2.51360017034639, "learning_rate": 9.654384059414728e-06, "loss": 0.9234, "step": 4014 }, { "epoch": 0.14551846616650357, "grad_norm": 2.7878365219794707, "learning_rate": 9.654169602926565e-06, "loss": 1.1372, "step": 4015 }, { "epoch": 0.14555470986916022, "grad_norm": 2.512446546348575, "learning_rate": 9.653955082306759e-06, "loss": 1.0095, "step": 4016 }, { "epoch": 0.1455909535718169, "grad_norm": 2.4717391555456114, "learning_rate": 9.653740497558266e-06, "loss": 0.8801, "step": 4017 }, { "epoch": 0.14562719727447357, "grad_norm": 2.31430369111621, "learning_rate": 9.653525848684045e-06, "loss": 1.0717, "step": 4018 }, { "epoch": 0.14566344097713022, "grad_norm": 2.52786182357984, "learning_rate": 9.653311135687051e-06, "loss": 1.2461, "step": 4019 }, { "epoch": 0.1456996846797869, "grad_norm": 2.3676096878701673, "learning_rate": 9.653096358570244e-06, "loss": 1.0111, "step": 4020 }, { "epoch": 0.14573592838244356, "grad_norm": 2.336772029956674, "learning_rate": 9.652881517336584e-06, "loss": 0.937, "step": 4021 }, { "epoch": 0.1457721720851002, "grad_norm": 2.422383181102217, "learning_rate": 9.65266661198903e-06, "loss": 0.843, "step": 4022 }, { "epoch": 0.14580841578775688, "grad_norm": 2.2378900098830536, "learning_rate": 9.652451642530544e-06, "loss": 0.9831, "step": 4023 }, { "epoch": 0.14584465949041353, "grad_norm": 2.110643570448263, "learning_rate": 9.652236608964089e-06, "loss": 0.9736, "step": 4024 }, { "epoch": 0.1458809031930702, "grad_norm": 2.315120342983129, "learning_rate": 9.652021511292627e-06, "loss": 0.7616, "step": 4025 }, { "epoch": 0.14591714689572688, "grad_norm": 2.97069450462403, "learning_rate": 9.651806349519119e-06, "loss": 1.1551, "step": 4026 }, { "epoch": 0.14595339059838353, "grad_norm": 2.2255688341924893, "learning_rate": 9.651591123646537e-06, "loss": 0.955, "step": 4027 }, { "epoch": 0.1459896343010402, "grad_norm": 2.6924975138825875, "learning_rate": 9.65137583367784e-06, "loss": 1.0669, "step": 4028 }, { "epoch": 0.14602587800369685, "grad_norm": 2.4788045632503293, "learning_rate": 9.651160479615998e-06, "loss": 0.9183, "step": 4029 }, { "epoch": 0.14606212170635352, "grad_norm": 2.3792521817792442, "learning_rate": 9.650945061463976e-06, "loss": 1.0224, "step": 4030 }, { "epoch": 0.1460983654090102, "grad_norm": 2.4527258817553226, "learning_rate": 9.650729579224746e-06, "loss": 1.0808, "step": 4031 }, { "epoch": 0.14613460911166684, "grad_norm": 2.324772629831342, "learning_rate": 9.650514032901276e-06, "loss": 1.0758, "step": 4032 }, { "epoch": 0.14617085281432352, "grad_norm": 2.626459326585518, "learning_rate": 9.650298422496532e-06, "loss": 1.1381, "step": 4033 }, { "epoch": 0.14620709651698016, "grad_norm": 2.576129412889366, "learning_rate": 9.65008274801349e-06, "loss": 1.003, "step": 4034 }, { "epoch": 0.14624334021963684, "grad_norm": 2.4268049778505407, "learning_rate": 9.649867009455119e-06, "loss": 1.0013, "step": 4035 }, { "epoch": 0.1462795839222935, "grad_norm": 2.4169491128073646, "learning_rate": 9.649651206824393e-06, "loss": 1.0978, "step": 4036 }, { "epoch": 0.14631582762495016, "grad_norm": 2.508030682441506, "learning_rate": 9.649435340124286e-06, "loss": 1.0009, "step": 4037 }, { "epoch": 0.14635207132760683, "grad_norm": 2.383627762990016, "learning_rate": 9.64921940935777e-06, "loss": 0.907, "step": 4038 }, { "epoch": 0.1463883150302635, "grad_norm": 2.21603913314357, "learning_rate": 9.649003414527826e-06, "loss": 1.0061, "step": 4039 }, { "epoch": 0.14642455873292015, "grad_norm": 2.259026701724148, "learning_rate": 9.648787355637423e-06, "loss": 1.0543, "step": 4040 }, { "epoch": 0.14646080243557683, "grad_norm": 2.6264249303369938, "learning_rate": 9.648571232689544e-06, "loss": 1.0208, "step": 4041 }, { "epoch": 0.14649704613823347, "grad_norm": 2.323100727455089, "learning_rate": 9.648355045687164e-06, "loss": 0.9827, "step": 4042 }, { "epoch": 0.14653328984089015, "grad_norm": 2.5038959867891086, "learning_rate": 9.648138794633262e-06, "loss": 0.891, "step": 4043 }, { "epoch": 0.14656953354354682, "grad_norm": 2.3415495445469023, "learning_rate": 9.64792247953082e-06, "loss": 0.9926, "step": 4044 }, { "epoch": 0.14660577724620347, "grad_norm": 2.551378704498946, "learning_rate": 9.647706100382817e-06, "loss": 0.9816, "step": 4045 }, { "epoch": 0.14664202094886014, "grad_norm": 2.506988539233555, "learning_rate": 9.647489657192232e-06, "loss": 1.0775, "step": 4046 }, { "epoch": 0.1466782646515168, "grad_norm": 2.576488295285267, "learning_rate": 9.647273149962053e-06, "loss": 0.9736, "step": 4047 }, { "epoch": 0.14671450835417346, "grad_norm": 2.30586026344885, "learning_rate": 9.64705657869526e-06, "loss": 0.923, "step": 4048 }, { "epoch": 0.14675075205683014, "grad_norm": 2.4252545439321285, "learning_rate": 9.64683994339484e-06, "loss": 0.9496, "step": 4049 }, { "epoch": 0.14678699575948678, "grad_norm": 2.2877720263142454, "learning_rate": 9.646623244063772e-06, "loss": 1.0177, "step": 4050 }, { "epoch": 0.14682323946214346, "grad_norm": 2.068409278654557, "learning_rate": 9.646406480705048e-06, "loss": 1.0195, "step": 4051 }, { "epoch": 0.1468594831648001, "grad_norm": 2.6056091590918933, "learning_rate": 9.646189653321651e-06, "loss": 1.0255, "step": 4052 }, { "epoch": 0.14689572686745678, "grad_norm": 2.310105162087885, "learning_rate": 9.645972761916574e-06, "loss": 0.9535, "step": 4053 }, { "epoch": 0.14693197057011345, "grad_norm": 2.3369416032620474, "learning_rate": 9.645755806492799e-06, "loss": 1.0139, "step": 4054 }, { "epoch": 0.1469682142727701, "grad_norm": 2.2666855606313088, "learning_rate": 9.64553878705332e-06, "loss": 0.9888, "step": 4055 }, { "epoch": 0.14700445797542677, "grad_norm": 2.43616621346824, "learning_rate": 9.645321703601124e-06, "loss": 0.9441, "step": 4056 }, { "epoch": 0.14704070167808345, "grad_norm": 2.5185497301417725, "learning_rate": 9.645104556139207e-06, "loss": 0.9796, "step": 4057 }, { "epoch": 0.1470769453807401, "grad_norm": 2.4539351871088693, "learning_rate": 9.644887344670557e-06, "loss": 1.0597, "step": 4058 }, { "epoch": 0.14711318908339677, "grad_norm": 2.2641418391543313, "learning_rate": 9.64467006919817e-06, "loss": 1.0065, "step": 4059 }, { "epoch": 0.1471494327860534, "grad_norm": 2.588843606514394, "learning_rate": 9.644452729725036e-06, "loss": 0.8799, "step": 4060 }, { "epoch": 0.1471856764887101, "grad_norm": 2.338277851435541, "learning_rate": 9.644235326254154e-06, "loss": 1.0289, "step": 4061 }, { "epoch": 0.14722192019136676, "grad_norm": 2.3572307317297465, "learning_rate": 9.644017858788519e-06, "loss": 0.9127, "step": 4062 }, { "epoch": 0.1472581638940234, "grad_norm": 2.5152485436840593, "learning_rate": 9.643800327331125e-06, "loss": 1.0161, "step": 4063 }, { "epoch": 0.14729440759668008, "grad_norm": 2.231831440377057, "learning_rate": 9.64358273188497e-06, "loss": 0.8152, "step": 4064 }, { "epoch": 0.14733065129933673, "grad_norm": 2.4355668683933316, "learning_rate": 9.643365072453054e-06, "loss": 1.0539, "step": 4065 }, { "epoch": 0.1473668950019934, "grad_norm": 2.2439706635279055, "learning_rate": 9.643147349038377e-06, "loss": 0.7955, "step": 4066 }, { "epoch": 0.14740313870465008, "grad_norm": 2.0255508168863967, "learning_rate": 9.642929561643937e-06, "loss": 0.6978, "step": 4067 }, { "epoch": 0.14743938240730672, "grad_norm": 2.5120947572181764, "learning_rate": 9.642711710272734e-06, "loss": 1.0678, "step": 4068 }, { "epoch": 0.1474756261099634, "grad_norm": 2.4718822416712234, "learning_rate": 9.642493794927774e-06, "loss": 0.9615, "step": 4069 }, { "epoch": 0.14751186981262004, "grad_norm": 2.411221040760026, "learning_rate": 9.642275815612056e-06, "loss": 0.9153, "step": 4070 }, { "epoch": 0.14754811351527672, "grad_norm": 2.247702686645873, "learning_rate": 9.642057772328583e-06, "loss": 0.834, "step": 4071 }, { "epoch": 0.1475843572179334, "grad_norm": 2.5239727408196577, "learning_rate": 9.641839665080363e-06, "loss": 1.0026, "step": 4072 }, { "epoch": 0.14762060092059004, "grad_norm": 2.4724670162515405, "learning_rate": 9.641621493870402e-06, "loss": 0.9196, "step": 4073 }, { "epoch": 0.1476568446232467, "grad_norm": 2.6450805221930813, "learning_rate": 9.641403258701701e-06, "loss": 1.0688, "step": 4074 }, { "epoch": 0.1476930883259034, "grad_norm": 2.473779638663325, "learning_rate": 9.64118495957727e-06, "loss": 0.8618, "step": 4075 }, { "epoch": 0.14772933202856003, "grad_norm": 2.5916673262108527, "learning_rate": 9.640966596500119e-06, "loss": 1.0247, "step": 4076 }, { "epoch": 0.1477655757312167, "grad_norm": 2.358193671743941, "learning_rate": 9.640748169473253e-06, "loss": 1.0009, "step": 4077 }, { "epoch": 0.14780181943387335, "grad_norm": 2.7064175229263925, "learning_rate": 9.640529678499686e-06, "loss": 1.1275, "step": 4078 }, { "epoch": 0.14783806313653003, "grad_norm": 2.353216837920093, "learning_rate": 9.640311123582424e-06, "loss": 0.8686, "step": 4079 }, { "epoch": 0.1478743068391867, "grad_norm": 2.5300901886818927, "learning_rate": 9.640092504724483e-06, "loss": 1.031, "step": 4080 }, { "epoch": 0.14791055054184335, "grad_norm": 2.1965744524956476, "learning_rate": 9.639873821928873e-06, "loss": 0.8925, "step": 4081 }, { "epoch": 0.14794679424450002, "grad_norm": 2.3298657694945835, "learning_rate": 9.639655075198608e-06, "loss": 0.9355, "step": 4082 }, { "epoch": 0.14798303794715667, "grad_norm": 2.443932101376307, "learning_rate": 9.639436264536702e-06, "loss": 0.9112, "step": 4083 }, { "epoch": 0.14801928164981334, "grad_norm": 2.61206594175167, "learning_rate": 9.63921738994617e-06, "loss": 1.058, "step": 4084 }, { "epoch": 0.14805552535247002, "grad_norm": 2.260905237527549, "learning_rate": 9.638998451430028e-06, "loss": 0.8659, "step": 4085 }, { "epoch": 0.14809176905512667, "grad_norm": 2.373055041144865, "learning_rate": 9.638779448991292e-06, "loss": 0.9063, "step": 4086 }, { "epoch": 0.14812801275778334, "grad_norm": 2.2771103174913168, "learning_rate": 9.638560382632982e-06, "loss": 0.9055, "step": 4087 }, { "epoch": 0.14816425646043999, "grad_norm": 2.793945071144265, "learning_rate": 9.638341252358114e-06, "loss": 1.1922, "step": 4088 }, { "epoch": 0.14820050016309666, "grad_norm": 2.345824181238382, "learning_rate": 9.63812205816971e-06, "loss": 1.2526, "step": 4089 }, { "epoch": 0.14823674386575333, "grad_norm": 2.299739465843703, "learning_rate": 9.637902800070788e-06, "loss": 0.9736, "step": 4090 }, { "epoch": 0.14827298756840998, "grad_norm": 2.38052317331351, "learning_rate": 9.637683478064372e-06, "loss": 1.0154, "step": 4091 }, { "epoch": 0.14830923127106665, "grad_norm": 2.478847178897023, "learning_rate": 9.637464092153482e-06, "loss": 1.0234, "step": 4092 }, { "epoch": 0.14834547497372333, "grad_norm": 2.208775577179247, "learning_rate": 9.63724464234114e-06, "loss": 1.0634, "step": 4093 }, { "epoch": 0.14838171867637998, "grad_norm": 2.4959677851521005, "learning_rate": 9.637025128630374e-06, "loss": 0.957, "step": 4094 }, { "epoch": 0.14841796237903665, "grad_norm": 2.454173454738541, "learning_rate": 9.636805551024203e-06, "loss": 0.8896, "step": 4095 }, { "epoch": 0.1484542060816933, "grad_norm": 2.2911642788773365, "learning_rate": 9.636585909525658e-06, "loss": 1.0337, "step": 4096 }, { "epoch": 0.14849044978434997, "grad_norm": 2.3952714748054156, "learning_rate": 9.636366204137761e-06, "loss": 0.9496, "step": 4097 }, { "epoch": 0.14852669348700664, "grad_norm": 2.6197634352698933, "learning_rate": 9.636146434863544e-06, "loss": 0.9326, "step": 4098 }, { "epoch": 0.1485629371896633, "grad_norm": 2.542133391840709, "learning_rate": 9.635926601706032e-06, "loss": 0.8723, "step": 4099 }, { "epoch": 0.14859918089231997, "grad_norm": 2.2527406645880084, "learning_rate": 9.635706704668255e-06, "loss": 1.0036, "step": 4100 }, { "epoch": 0.1486354245949766, "grad_norm": 2.4851846799479747, "learning_rate": 9.635486743753244e-06, "loss": 0.9162, "step": 4101 }, { "epoch": 0.14867166829763329, "grad_norm": 2.064883314137887, "learning_rate": 9.635266718964028e-06, "loss": 0.9638, "step": 4102 }, { "epoch": 0.14870791200028996, "grad_norm": 1.9649544970615227, "learning_rate": 9.635046630303641e-06, "loss": 0.6999, "step": 4103 }, { "epoch": 0.1487441557029466, "grad_norm": 2.488405776181444, "learning_rate": 9.634826477775113e-06, "loss": 0.8312, "step": 4104 }, { "epoch": 0.14878039940560328, "grad_norm": 2.2499905856571116, "learning_rate": 9.634606261381481e-06, "loss": 1.1248, "step": 4105 }, { "epoch": 0.14881664310825993, "grad_norm": 2.386209463580149, "learning_rate": 9.634385981125777e-06, "loss": 0.8233, "step": 4106 }, { "epoch": 0.1488528868109166, "grad_norm": 2.3978985942129607, "learning_rate": 9.634165637011037e-06, "loss": 0.902, "step": 4107 }, { "epoch": 0.14888913051357328, "grad_norm": 2.53818836944744, "learning_rate": 9.633945229040297e-06, "loss": 1.1695, "step": 4108 }, { "epoch": 0.14892537421622992, "grad_norm": 2.800879192222072, "learning_rate": 9.633724757216594e-06, "loss": 1.0956, "step": 4109 }, { "epoch": 0.1489616179188866, "grad_norm": 2.5190918081921665, "learning_rate": 9.633504221542966e-06, "loss": 1.0484, "step": 4110 }, { "epoch": 0.14899786162154327, "grad_norm": 2.2301316077846627, "learning_rate": 9.633283622022453e-06, "loss": 0.8403, "step": 4111 }, { "epoch": 0.14903410532419992, "grad_norm": 2.386408634651445, "learning_rate": 9.633062958658094e-06, "loss": 0.9476, "step": 4112 }, { "epoch": 0.1490703490268566, "grad_norm": 2.482190932155949, "learning_rate": 9.632842231452929e-06, "loss": 0.8859, "step": 4113 }, { "epoch": 0.14910659272951324, "grad_norm": 2.640998666397078, "learning_rate": 9.63262144041e-06, "loss": 1.0634, "step": 4114 }, { "epoch": 0.1491428364321699, "grad_norm": 2.545122135807725, "learning_rate": 9.63240058553235e-06, "loss": 1.1307, "step": 4115 }, { "epoch": 0.14917908013482659, "grad_norm": 2.745798045058993, "learning_rate": 9.632179666823018e-06, "loss": 0.9338, "step": 4116 }, { "epoch": 0.14921532383748323, "grad_norm": 2.411290541034093, "learning_rate": 9.631958684285054e-06, "loss": 0.9796, "step": 4117 }, { "epoch": 0.1492515675401399, "grad_norm": 2.2238302080064116, "learning_rate": 9.631737637921503e-06, "loss": 1.0345, "step": 4118 }, { "epoch": 0.14928781124279655, "grad_norm": 2.383469957482857, "learning_rate": 9.631516527735405e-06, "loss": 1.0495, "step": 4119 }, { "epoch": 0.14932405494545323, "grad_norm": 2.524013001308468, "learning_rate": 9.631295353729812e-06, "loss": 0.9373, "step": 4120 }, { "epoch": 0.1493602986481099, "grad_norm": 2.5762240005086774, "learning_rate": 9.63107411590777e-06, "loss": 0.9772, "step": 4121 }, { "epoch": 0.14939654235076655, "grad_norm": 2.621592960553944, "learning_rate": 9.630852814272327e-06, "loss": 1.0533, "step": 4122 }, { "epoch": 0.14943278605342322, "grad_norm": 2.525011738835819, "learning_rate": 9.630631448826533e-06, "loss": 0.8912, "step": 4123 }, { "epoch": 0.14946902975607987, "grad_norm": 2.338041774259056, "learning_rate": 9.630410019573438e-06, "loss": 0.9569, "step": 4124 }, { "epoch": 0.14950527345873654, "grad_norm": 2.373471107582467, "learning_rate": 9.630188526516094e-06, "loss": 0.9721, "step": 4125 }, { "epoch": 0.14954151716139322, "grad_norm": 2.5710398238937757, "learning_rate": 9.629966969657551e-06, "loss": 1.1386, "step": 4126 }, { "epoch": 0.14957776086404986, "grad_norm": 2.3269307391729463, "learning_rate": 9.629745349000864e-06, "loss": 0.8835, "step": 4127 }, { "epoch": 0.14961400456670654, "grad_norm": 2.5760806462084096, "learning_rate": 9.629523664549087e-06, "loss": 0.9066, "step": 4128 }, { "epoch": 0.1496502482693632, "grad_norm": 2.298298245383035, "learning_rate": 9.62930191630527e-06, "loss": 0.9899, "step": 4129 }, { "epoch": 0.14968649197201986, "grad_norm": 2.537676734992473, "learning_rate": 9.629080104272477e-06, "loss": 1.0847, "step": 4130 }, { "epoch": 0.14972273567467653, "grad_norm": 2.3405656968569346, "learning_rate": 9.62885822845376e-06, "loss": 1.058, "step": 4131 }, { "epoch": 0.14975897937733318, "grad_norm": 2.691989319647806, "learning_rate": 9.628636288852172e-06, "loss": 0.9341, "step": 4132 }, { "epoch": 0.14979522307998985, "grad_norm": 2.8270939182410633, "learning_rate": 9.628414285470777e-06, "loss": 0.9411, "step": 4133 }, { "epoch": 0.14983146678264653, "grad_norm": 2.2792140307672657, "learning_rate": 9.628192218312634e-06, "loss": 0.9816, "step": 4134 }, { "epoch": 0.14986771048530317, "grad_norm": 2.212160133126011, "learning_rate": 9.6279700873808e-06, "loss": 1.0672, "step": 4135 }, { "epoch": 0.14990395418795985, "grad_norm": 2.4201393957330346, "learning_rate": 9.627747892678336e-06, "loss": 0.8654, "step": 4136 }, { "epoch": 0.1499401978906165, "grad_norm": 2.4176992509134343, "learning_rate": 9.627525634208305e-06, "loss": 0.9975, "step": 4137 }, { "epoch": 0.14997644159327317, "grad_norm": 2.200129550306175, "learning_rate": 9.62730331197377e-06, "loss": 0.9016, "step": 4138 }, { "epoch": 0.15001268529592984, "grad_norm": 2.250773435161406, "learning_rate": 9.627080925977794e-06, "loss": 0.8672, "step": 4139 }, { "epoch": 0.1500489289985865, "grad_norm": 2.283114517286335, "learning_rate": 9.626858476223442e-06, "loss": 0.8983, "step": 4140 }, { "epoch": 0.15008517270124316, "grad_norm": 2.380808194618915, "learning_rate": 9.626635962713778e-06, "loss": 0.889, "step": 4141 }, { "epoch": 0.1501214164038998, "grad_norm": 2.2960672957776223, "learning_rate": 9.626413385451867e-06, "loss": 0.804, "step": 4142 }, { "epoch": 0.15015766010655648, "grad_norm": 2.4635211921156923, "learning_rate": 9.62619074444078e-06, "loss": 0.9051, "step": 4143 }, { "epoch": 0.15019390380921316, "grad_norm": 2.548286857064705, "learning_rate": 9.62596803968358e-06, "loss": 1.0885, "step": 4144 }, { "epoch": 0.1502301475118698, "grad_norm": 2.2737263644741117, "learning_rate": 9.62574527118334e-06, "loss": 0.9904, "step": 4145 }, { "epoch": 0.15026639121452648, "grad_norm": 2.2348078150998893, "learning_rate": 9.625522438943127e-06, "loss": 0.9703, "step": 4146 }, { "epoch": 0.15030263491718315, "grad_norm": 2.50654445404124, "learning_rate": 9.625299542966012e-06, "loss": 0.9747, "step": 4147 }, { "epoch": 0.1503388786198398, "grad_norm": 2.4081873379364014, "learning_rate": 9.625076583255066e-06, "loss": 0.9498, "step": 4148 }, { "epoch": 0.15037512232249647, "grad_norm": 2.390526024615607, "learning_rate": 9.624853559813363e-06, "loss": 1.1916, "step": 4149 }, { "epoch": 0.15041136602515312, "grad_norm": 2.1894192824117504, "learning_rate": 9.624630472643974e-06, "loss": 1.1373, "step": 4150 }, { "epoch": 0.1504476097278098, "grad_norm": 2.316765310056039, "learning_rate": 9.624407321749974e-06, "loss": 0.9225, "step": 4151 }, { "epoch": 0.15048385343046647, "grad_norm": 2.40168053401489, "learning_rate": 9.62418410713444e-06, "loss": 0.9285, "step": 4152 }, { "epoch": 0.15052009713312312, "grad_norm": 2.277231677577263, "learning_rate": 9.623960828800443e-06, "loss": 1.1048, "step": 4153 }, { "epoch": 0.1505563408357798, "grad_norm": 2.698389595835166, "learning_rate": 9.623737486751063e-06, "loss": 1.0991, "step": 4154 }, { "epoch": 0.15059258453843644, "grad_norm": 2.581648566570003, "learning_rate": 9.623514080989377e-06, "loss": 0.9399, "step": 4155 }, { "epoch": 0.1506288282410931, "grad_norm": 2.421428701010095, "learning_rate": 9.623290611518463e-06, "loss": 0.9337, "step": 4156 }, { "epoch": 0.15066507194374978, "grad_norm": 2.571642382875976, "learning_rate": 9.623067078341402e-06, "loss": 1.0617, "step": 4157 }, { "epoch": 0.15070131564640643, "grad_norm": 2.2804843852604253, "learning_rate": 9.62284348146127e-06, "loss": 1.1961, "step": 4158 }, { "epoch": 0.1507375593490631, "grad_norm": 2.4527055759851857, "learning_rate": 9.622619820881151e-06, "loss": 1.0682, "step": 4159 }, { "epoch": 0.15077380305171975, "grad_norm": 2.5014265475451523, "learning_rate": 9.622396096604128e-06, "loss": 1.0855, "step": 4160 }, { "epoch": 0.15081004675437643, "grad_norm": 2.246104459335184, "learning_rate": 9.622172308633282e-06, "loss": 1.0741, "step": 4161 }, { "epoch": 0.1508462904570331, "grad_norm": 2.494134885829058, "learning_rate": 9.621948456971696e-06, "loss": 0.9623, "step": 4162 }, { "epoch": 0.15088253415968975, "grad_norm": 2.406333605867496, "learning_rate": 9.621724541622457e-06, "loss": 0.9741, "step": 4163 }, { "epoch": 0.15091877786234642, "grad_norm": 2.6446575492148225, "learning_rate": 9.621500562588647e-06, "loss": 0.8704, "step": 4164 }, { "epoch": 0.15095502156500307, "grad_norm": 2.6172674276646544, "learning_rate": 9.621276519873358e-06, "loss": 0.9574, "step": 4165 }, { "epoch": 0.15099126526765974, "grad_norm": 2.3947813441099375, "learning_rate": 9.62105241347967e-06, "loss": 0.961, "step": 4166 }, { "epoch": 0.15102750897031642, "grad_norm": 2.7145574703227755, "learning_rate": 9.620828243410675e-06, "loss": 1.106, "step": 4167 }, { "epoch": 0.15106375267297306, "grad_norm": 2.6364831206652575, "learning_rate": 9.62060400966946e-06, "loss": 0.9509, "step": 4168 }, { "epoch": 0.15109999637562974, "grad_norm": 2.2190415878664336, "learning_rate": 9.62037971225912e-06, "loss": 1.0338, "step": 4169 }, { "epoch": 0.1511362400782864, "grad_norm": 2.450004229411529, "learning_rate": 9.62015535118274e-06, "loss": 0.8829, "step": 4170 }, { "epoch": 0.15117248378094306, "grad_norm": 2.4503187935639827, "learning_rate": 9.619930926443412e-06, "loss": 1.0404, "step": 4171 }, { "epoch": 0.15120872748359973, "grad_norm": 2.6428047898258984, "learning_rate": 9.619706438044232e-06, "loss": 0.9378, "step": 4172 }, { "epoch": 0.15124497118625638, "grad_norm": 2.3467078555625798, "learning_rate": 9.61948188598829e-06, "loss": 1.1708, "step": 4173 }, { "epoch": 0.15128121488891305, "grad_norm": 2.6839423979271846, "learning_rate": 9.619257270278681e-06, "loss": 0.8056, "step": 4174 }, { "epoch": 0.15131745859156973, "grad_norm": 2.578744177940457, "learning_rate": 9.619032590918502e-06, "loss": 1.2442, "step": 4175 }, { "epoch": 0.15135370229422637, "grad_norm": 2.555563875976829, "learning_rate": 9.618807847910847e-06, "loss": 1.1167, "step": 4176 }, { "epoch": 0.15138994599688305, "grad_norm": 2.6076204318135243, "learning_rate": 9.618583041258812e-06, "loss": 1.1136, "step": 4177 }, { "epoch": 0.1514261896995397, "grad_norm": 2.453890050659726, "learning_rate": 9.618358170965495e-06, "loss": 1.1358, "step": 4178 }, { "epoch": 0.15146243340219637, "grad_norm": 2.439058461226216, "learning_rate": 9.618133237033999e-06, "loss": 1.131, "step": 4179 }, { "epoch": 0.15149867710485304, "grad_norm": 2.4708507765762477, "learning_rate": 9.617908239467417e-06, "loss": 1.0018, "step": 4180 }, { "epoch": 0.1515349208075097, "grad_norm": 2.4700385918476955, "learning_rate": 9.617683178268856e-06, "loss": 0.8438, "step": 4181 }, { "epoch": 0.15157116451016636, "grad_norm": 2.5140806703282395, "learning_rate": 9.617458053441408e-06, "loss": 1.1996, "step": 4182 }, { "epoch": 0.151607408212823, "grad_norm": 2.347652607261771, "learning_rate": 9.617232864988185e-06, "loss": 1.0361, "step": 4183 }, { "epoch": 0.15164365191547968, "grad_norm": 2.7409989288136667, "learning_rate": 9.617007612912284e-06, "loss": 1.0675, "step": 4184 }, { "epoch": 0.15167989561813636, "grad_norm": 2.384037499976585, "learning_rate": 9.61678229721681e-06, "loss": 1.076, "step": 4185 }, { "epoch": 0.151716139320793, "grad_norm": 2.3953573509064188, "learning_rate": 9.616556917904868e-06, "loss": 1.1521, "step": 4186 }, { "epoch": 0.15175238302344968, "grad_norm": 2.3654735045723023, "learning_rate": 9.616331474979564e-06, "loss": 1.0691, "step": 4187 }, { "epoch": 0.15178862672610635, "grad_norm": 2.3221656633198204, "learning_rate": 9.616105968444005e-06, "loss": 0.9836, "step": 4188 }, { "epoch": 0.151824870428763, "grad_norm": 2.495825388528475, "learning_rate": 9.615880398301296e-06, "loss": 1.2063, "step": 4189 }, { "epoch": 0.15186111413141967, "grad_norm": 2.345435999659085, "learning_rate": 9.615654764554546e-06, "loss": 0.8439, "step": 4190 }, { "epoch": 0.15189735783407632, "grad_norm": 2.5443207810000965, "learning_rate": 9.615429067206869e-06, "loss": 1.0996, "step": 4191 }, { "epoch": 0.151933601536733, "grad_norm": 2.6642991446554145, "learning_rate": 9.615203306261366e-06, "loss": 0.9185, "step": 4192 }, { "epoch": 0.15196984523938967, "grad_norm": 2.5770626559212655, "learning_rate": 9.614977481721156e-06, "loss": 1.0145, "step": 4193 }, { "epoch": 0.1520060889420463, "grad_norm": 2.421329972587747, "learning_rate": 9.614751593589347e-06, "loss": 0.8944, "step": 4194 }, { "epoch": 0.152042332644703, "grad_norm": 2.128837107514122, "learning_rate": 9.614525641869051e-06, "loss": 0.8189, "step": 4195 }, { "epoch": 0.15207857634735963, "grad_norm": 2.7582465212800424, "learning_rate": 9.61429962656338e-06, "loss": 0.8347, "step": 4196 }, { "epoch": 0.1521148200500163, "grad_norm": 2.401881484525323, "learning_rate": 9.614073547675455e-06, "loss": 0.8419, "step": 4197 }, { "epoch": 0.15215106375267298, "grad_norm": 2.4083593328046358, "learning_rate": 9.613847405208385e-06, "loss": 0.9382, "step": 4198 }, { "epoch": 0.15218730745532963, "grad_norm": 2.3733511319739113, "learning_rate": 9.613621199165288e-06, "loss": 0.9763, "step": 4199 }, { "epoch": 0.1522235511579863, "grad_norm": 2.4368015874569835, "learning_rate": 9.613394929549283e-06, "loss": 0.9676, "step": 4200 }, { "epoch": 0.15225979486064295, "grad_norm": 2.509842278687637, "learning_rate": 9.613168596363483e-06, "loss": 1.1076, "step": 4201 }, { "epoch": 0.15229603856329962, "grad_norm": 2.3834940205188038, "learning_rate": 9.61294219961101e-06, "loss": 0.8911, "step": 4202 }, { "epoch": 0.1523322822659563, "grad_norm": 2.3850624046987443, "learning_rate": 9.612715739294986e-06, "loss": 0.8171, "step": 4203 }, { "epoch": 0.15236852596861294, "grad_norm": 2.143603026667142, "learning_rate": 9.612489215418525e-06, "loss": 0.7467, "step": 4204 }, { "epoch": 0.15240476967126962, "grad_norm": 2.4574999908213058, "learning_rate": 9.612262627984756e-06, "loss": 0.9364, "step": 4205 }, { "epoch": 0.1524410133739263, "grad_norm": 2.7417235436615983, "learning_rate": 9.612035976996795e-06, "loss": 0.8535, "step": 4206 }, { "epoch": 0.15247725707658294, "grad_norm": 2.6025828624334597, "learning_rate": 9.611809262457768e-06, "loss": 1.0646, "step": 4207 }, { "epoch": 0.1525135007792396, "grad_norm": 2.3202538700702853, "learning_rate": 9.611582484370799e-06, "loss": 1.124, "step": 4208 }, { "epoch": 0.15254974448189626, "grad_norm": 2.5586336045356144, "learning_rate": 9.611355642739013e-06, "loss": 0.9627, "step": 4209 }, { "epoch": 0.15258598818455293, "grad_norm": 2.271086955866381, "learning_rate": 9.611128737565533e-06, "loss": 0.8518, "step": 4210 }, { "epoch": 0.1526222318872096, "grad_norm": 2.4184201933666882, "learning_rate": 9.61090176885349e-06, "loss": 0.9676, "step": 4211 }, { "epoch": 0.15265847558986625, "grad_norm": 2.4228042052216576, "learning_rate": 9.610674736606007e-06, "loss": 1.0098, "step": 4212 }, { "epoch": 0.15269471929252293, "grad_norm": 2.336119371183706, "learning_rate": 9.610447640826217e-06, "loss": 1.0415, "step": 4213 }, { "epoch": 0.15273096299517958, "grad_norm": 2.179254738938991, "learning_rate": 9.610220481517246e-06, "loss": 0.9176, "step": 4214 }, { "epoch": 0.15276720669783625, "grad_norm": 2.402500348484144, "learning_rate": 9.609993258682224e-06, "loss": 0.958, "step": 4215 }, { "epoch": 0.15280345040049292, "grad_norm": 2.4147038756375734, "learning_rate": 9.609765972324284e-06, "loss": 1.1431, "step": 4216 }, { "epoch": 0.15283969410314957, "grad_norm": 2.343730924792008, "learning_rate": 9.609538622446555e-06, "loss": 1.1118, "step": 4217 }, { "epoch": 0.15287593780580624, "grad_norm": 2.4368883079558574, "learning_rate": 9.609311209052173e-06, "loss": 1.0542, "step": 4218 }, { "epoch": 0.1529121815084629, "grad_norm": 2.4859372207352237, "learning_rate": 9.609083732144269e-06, "loss": 0.9101, "step": 4219 }, { "epoch": 0.15294842521111957, "grad_norm": 2.6715781187041623, "learning_rate": 9.60885619172598e-06, "loss": 0.9789, "step": 4220 }, { "epoch": 0.15298466891377624, "grad_norm": 2.255313673996115, "learning_rate": 9.60862858780044e-06, "loss": 0.9614, "step": 4221 }, { "epoch": 0.15302091261643289, "grad_norm": 2.530781167184049, "learning_rate": 9.608400920370784e-06, "loss": 1.1739, "step": 4222 }, { "epoch": 0.15305715631908956, "grad_norm": 2.293166695431805, "learning_rate": 9.608173189440152e-06, "loss": 1.1949, "step": 4223 }, { "epoch": 0.15309340002174623, "grad_norm": 2.6029510162693814, "learning_rate": 9.60794539501168e-06, "loss": 0.8859, "step": 4224 }, { "epoch": 0.15312964372440288, "grad_norm": 2.4407717372116666, "learning_rate": 9.607717537088506e-06, "loss": 1.0814, "step": 4225 }, { "epoch": 0.15316588742705955, "grad_norm": 2.3781851645304717, "learning_rate": 9.607489615673772e-06, "loss": 1.1254, "step": 4226 }, { "epoch": 0.1532021311297162, "grad_norm": 2.352041118222795, "learning_rate": 9.607261630770618e-06, "loss": 0.8625, "step": 4227 }, { "epoch": 0.15323837483237288, "grad_norm": 2.498028787854593, "learning_rate": 9.607033582382184e-06, "loss": 1.0377, "step": 4228 }, { "epoch": 0.15327461853502955, "grad_norm": 2.4305974760745612, "learning_rate": 9.606805470511614e-06, "loss": 1.0502, "step": 4229 }, { "epoch": 0.1533108622376862, "grad_norm": 2.5884773655013995, "learning_rate": 9.606577295162052e-06, "loss": 1.0139, "step": 4230 }, { "epoch": 0.15334710594034287, "grad_norm": 2.5201565171607885, "learning_rate": 9.60634905633664e-06, "loss": 0.9929, "step": 4231 }, { "epoch": 0.15338334964299952, "grad_norm": 2.4187890517904083, "learning_rate": 9.606120754038522e-06, "loss": 1.0081, "step": 4232 }, { "epoch": 0.1534195933456562, "grad_norm": 2.2484121236597288, "learning_rate": 9.605892388270847e-06, "loss": 0.827, "step": 4233 }, { "epoch": 0.15345583704831287, "grad_norm": 2.544349656934815, "learning_rate": 9.605663959036762e-06, "loss": 0.982, "step": 4234 }, { "epoch": 0.1534920807509695, "grad_norm": 2.428612140429336, "learning_rate": 9.60543546633941e-06, "loss": 1.109, "step": 4235 }, { "epoch": 0.15352832445362619, "grad_norm": 2.141147381304505, "learning_rate": 9.605206910181946e-06, "loss": 0.7976, "step": 4236 }, { "epoch": 0.15356456815628283, "grad_norm": 2.468847759577265, "learning_rate": 9.604978290567512e-06, "loss": 0.7096, "step": 4237 }, { "epoch": 0.1536008118589395, "grad_norm": 2.282383466269271, "learning_rate": 9.604749607499266e-06, "loss": 0.8565, "step": 4238 }, { "epoch": 0.15363705556159618, "grad_norm": 2.2810530888582363, "learning_rate": 9.604520860980353e-06, "loss": 0.9322, "step": 4239 }, { "epoch": 0.15367329926425283, "grad_norm": 2.296064562431278, "learning_rate": 9.604292051013928e-06, "loss": 1.0056, "step": 4240 }, { "epoch": 0.1537095429669095, "grad_norm": 2.599748738293614, "learning_rate": 9.604063177603143e-06, "loss": 1.2352, "step": 4241 }, { "epoch": 0.15374578666956618, "grad_norm": 2.3521160426585177, "learning_rate": 9.60383424075115e-06, "loss": 1.0249, "step": 4242 }, { "epoch": 0.15378203037222282, "grad_norm": 2.4111455054700413, "learning_rate": 9.60360524046111e-06, "loss": 1.1165, "step": 4243 }, { "epoch": 0.1538182740748795, "grad_norm": 2.165754037179731, "learning_rate": 9.60337617673617e-06, "loss": 1.0102, "step": 4244 }, { "epoch": 0.15385451777753614, "grad_norm": 2.604549204536233, "learning_rate": 9.603147049579491e-06, "loss": 1.0375, "step": 4245 }, { "epoch": 0.15389076148019282, "grad_norm": 3.0345387942429656, "learning_rate": 9.60291785899423e-06, "loss": 0.9315, "step": 4246 }, { "epoch": 0.1539270051828495, "grad_norm": 2.31177113284856, "learning_rate": 9.602688604983546e-06, "loss": 0.835, "step": 4247 }, { "epoch": 0.15396324888550614, "grad_norm": 2.3884713797365142, "learning_rate": 9.602459287550595e-06, "loss": 0.8994, "step": 4248 }, { "epoch": 0.1539994925881628, "grad_norm": 2.9015200437531674, "learning_rate": 9.60222990669854e-06, "loss": 1.0441, "step": 4249 }, { "epoch": 0.15403573629081946, "grad_norm": 2.708368307566757, "learning_rate": 9.602000462430538e-06, "loss": 1.0739, "step": 4250 }, { "epoch": 0.15407197999347613, "grad_norm": 2.376579036495288, "learning_rate": 9.601770954749754e-06, "loss": 0.7834, "step": 4251 }, { "epoch": 0.1541082236961328, "grad_norm": 2.424385519659881, "learning_rate": 9.60154138365935e-06, "loss": 0.9224, "step": 4252 }, { "epoch": 0.15414446739878945, "grad_norm": 1.9695828684355454, "learning_rate": 9.601311749162487e-06, "loss": 0.9853, "step": 4253 }, { "epoch": 0.15418071110144613, "grad_norm": 2.5911065786668166, "learning_rate": 9.601082051262332e-06, "loss": 0.9053, "step": 4254 }, { "epoch": 0.15421695480410277, "grad_norm": 2.6838455201627878, "learning_rate": 9.600852289962048e-06, "loss": 0.9206, "step": 4255 }, { "epoch": 0.15425319850675945, "grad_norm": 2.4265980291485123, "learning_rate": 9.600622465264804e-06, "loss": 0.9494, "step": 4256 }, { "epoch": 0.15428944220941612, "grad_norm": 2.038927946833038, "learning_rate": 9.600392577173763e-06, "loss": 0.8758, "step": 4257 }, { "epoch": 0.15432568591207277, "grad_norm": 2.3665294128066816, "learning_rate": 9.600162625692096e-06, "loss": 0.964, "step": 4258 }, { "epoch": 0.15436192961472944, "grad_norm": 2.4437987801461243, "learning_rate": 9.599932610822968e-06, "loss": 1.046, "step": 4259 }, { "epoch": 0.15439817331738612, "grad_norm": 2.185414343814344, "learning_rate": 9.599702532569553e-06, "loss": 1.1624, "step": 4260 }, { "epoch": 0.15443441702004276, "grad_norm": 2.0517158693256543, "learning_rate": 9.599472390935016e-06, "loss": 1.052, "step": 4261 }, { "epoch": 0.15447066072269944, "grad_norm": 2.642734863829772, "learning_rate": 9.599242185922532e-06, "loss": 0.9853, "step": 4262 }, { "epoch": 0.15450690442535608, "grad_norm": 2.3070100353496286, "learning_rate": 9.599011917535275e-06, "loss": 0.9256, "step": 4263 }, { "epoch": 0.15454314812801276, "grad_norm": 2.176902071735061, "learning_rate": 9.598781585776414e-06, "loss": 1.1001, "step": 4264 }, { "epoch": 0.15457939183066943, "grad_norm": 2.5206893286062564, "learning_rate": 9.598551190649121e-06, "loss": 1.0493, "step": 4265 }, { "epoch": 0.15461563553332608, "grad_norm": 2.1458276989167473, "learning_rate": 9.598320732156576e-06, "loss": 0.8441, "step": 4266 }, { "epoch": 0.15465187923598275, "grad_norm": 2.157190115157514, "learning_rate": 9.598090210301953e-06, "loss": 1.0461, "step": 4267 }, { "epoch": 0.1546881229386394, "grad_norm": 2.2607751859447087, "learning_rate": 9.597859625088426e-06, "loss": 1.0506, "step": 4268 }, { "epoch": 0.15472436664129607, "grad_norm": 2.5006702385348873, "learning_rate": 9.597628976519175e-06, "loss": 1.0807, "step": 4269 }, { "epoch": 0.15476061034395275, "grad_norm": 2.439303114027386, "learning_rate": 9.597398264597377e-06, "loss": 0.9389, "step": 4270 }, { "epoch": 0.1547968540466094, "grad_norm": 2.40964955832883, "learning_rate": 9.597167489326212e-06, "loss": 1.0536, "step": 4271 }, { "epoch": 0.15483309774926607, "grad_norm": 2.5352292533867478, "learning_rate": 9.596936650708858e-06, "loss": 0.9306, "step": 4272 }, { "epoch": 0.15486934145192272, "grad_norm": 2.4387479737107545, "learning_rate": 9.596705748748499e-06, "loss": 0.9014, "step": 4273 }, { "epoch": 0.1549055851545794, "grad_norm": 2.151653623145191, "learning_rate": 9.596474783448313e-06, "loss": 0.9878, "step": 4274 }, { "epoch": 0.15494182885723606, "grad_norm": 2.325168093166863, "learning_rate": 9.596243754811485e-06, "loss": 1.1028, "step": 4275 }, { "epoch": 0.1549780725598927, "grad_norm": 2.5522027673061, "learning_rate": 9.596012662841198e-06, "loss": 1.0214, "step": 4276 }, { "epoch": 0.15501431626254938, "grad_norm": 2.740278029321143, "learning_rate": 9.595781507540636e-06, "loss": 1.142, "step": 4277 }, { "epoch": 0.15505055996520606, "grad_norm": 2.4795055071100727, "learning_rate": 9.595550288912984e-06, "loss": 0.9419, "step": 4278 }, { "epoch": 0.1550868036678627, "grad_norm": 2.586011374830454, "learning_rate": 9.595319006961428e-06, "loss": 0.9247, "step": 4279 }, { "epoch": 0.15512304737051938, "grad_norm": 2.3566051390126153, "learning_rate": 9.595087661689157e-06, "loss": 0.9912, "step": 4280 }, { "epoch": 0.15515929107317603, "grad_norm": 2.9220384252246703, "learning_rate": 9.594856253099357e-06, "loss": 1.1565, "step": 4281 }, { "epoch": 0.1551955347758327, "grad_norm": 2.4420777738806643, "learning_rate": 9.594624781195214e-06, "loss": 0.9619, "step": 4282 }, { "epoch": 0.15523177847848937, "grad_norm": 2.28391029221144, "learning_rate": 9.594393245979921e-06, "loss": 1.0082, "step": 4283 }, { "epoch": 0.15526802218114602, "grad_norm": 2.4253527796502334, "learning_rate": 9.59416164745667e-06, "loss": 0.9833, "step": 4284 }, { "epoch": 0.1553042658838027, "grad_norm": 2.0084610307391055, "learning_rate": 9.593929985628648e-06, "loss": 0.8104, "step": 4285 }, { "epoch": 0.15534050958645934, "grad_norm": 2.576702169218737, "learning_rate": 9.593698260499049e-06, "loss": 0.9831, "step": 4286 }, { "epoch": 0.15537675328911602, "grad_norm": 2.6602184933024104, "learning_rate": 9.593466472071066e-06, "loss": 1.0624, "step": 4287 }, { "epoch": 0.1554129969917727, "grad_norm": 2.314459385772318, "learning_rate": 9.593234620347894e-06, "loss": 0.9254, "step": 4288 }, { "epoch": 0.15544924069442934, "grad_norm": 2.214055811448602, "learning_rate": 9.593002705332728e-06, "loss": 0.9143, "step": 4289 }, { "epoch": 0.155485484397086, "grad_norm": 2.3755756355336266, "learning_rate": 9.59277072702876e-06, "loss": 1.0853, "step": 4290 }, { "epoch": 0.15552172809974266, "grad_norm": 2.122167779148229, "learning_rate": 9.59253868543919e-06, "loss": 0.8278, "step": 4291 }, { "epoch": 0.15555797180239933, "grad_norm": 2.588870788684429, "learning_rate": 9.592306580567215e-06, "loss": 1.0399, "step": 4292 }, { "epoch": 0.155594215505056, "grad_norm": 2.7093054994868804, "learning_rate": 9.59207441241603e-06, "loss": 0.9689, "step": 4293 }, { "epoch": 0.15563045920771265, "grad_norm": 2.4009728652570312, "learning_rate": 9.59184218098884e-06, "loss": 1.0589, "step": 4294 }, { "epoch": 0.15566670291036933, "grad_norm": 2.461431386968651, "learning_rate": 9.59160988628884e-06, "loss": 0.9649, "step": 4295 }, { "epoch": 0.155702946613026, "grad_norm": 2.0819850125142074, "learning_rate": 9.591377528319233e-06, "loss": 0.9594, "step": 4296 }, { "epoch": 0.15573919031568265, "grad_norm": 2.176103522285555, "learning_rate": 9.591145107083222e-06, "loss": 1.0359, "step": 4297 }, { "epoch": 0.15577543401833932, "grad_norm": 2.4380997915664646, "learning_rate": 9.590912622584005e-06, "loss": 0.937, "step": 4298 }, { "epoch": 0.15581167772099597, "grad_norm": 2.211105529063482, "learning_rate": 9.590680074824791e-06, "loss": 1.1442, "step": 4299 }, { "epoch": 0.15584792142365264, "grad_norm": 2.5910989053212363, "learning_rate": 9.590447463808781e-06, "loss": 0.7631, "step": 4300 }, { "epoch": 0.15588416512630932, "grad_norm": 2.483304212948095, "learning_rate": 9.590214789539183e-06, "loss": 1.0608, "step": 4301 }, { "epoch": 0.15592040882896596, "grad_norm": 2.3524483173718163, "learning_rate": 9.5899820520192e-06, "loss": 0.8895, "step": 4302 }, { "epoch": 0.15595665253162264, "grad_norm": 2.524428113371419, "learning_rate": 9.589749251252039e-06, "loss": 1.1289, "step": 4303 }, { "epoch": 0.15599289623427928, "grad_norm": 1.9886851103476018, "learning_rate": 9.589516387240911e-06, "loss": 0.8693, "step": 4304 }, { "epoch": 0.15602913993693596, "grad_norm": 2.6117333592485696, "learning_rate": 9.589283459989021e-06, "loss": 1.0461, "step": 4305 }, { "epoch": 0.15606538363959263, "grad_norm": 2.465348252599644, "learning_rate": 9.589050469499581e-06, "loss": 1.0551, "step": 4306 }, { "epoch": 0.15610162734224928, "grad_norm": 2.3969499590854264, "learning_rate": 9.588817415775802e-06, "loss": 0.8134, "step": 4307 }, { "epoch": 0.15613787104490595, "grad_norm": 2.168453524633256, "learning_rate": 9.588584298820894e-06, "loss": 0.8905, "step": 4308 }, { "epoch": 0.1561741147475626, "grad_norm": 2.415464042054438, "learning_rate": 9.588351118638068e-06, "loss": 1.0924, "step": 4309 }, { "epoch": 0.15621035845021927, "grad_norm": 2.6635732562902192, "learning_rate": 9.58811787523054e-06, "loss": 1.1533, "step": 4310 }, { "epoch": 0.15624660215287595, "grad_norm": 2.300948503424313, "learning_rate": 9.587884568601521e-06, "loss": 1.0803, "step": 4311 }, { "epoch": 0.1562828458555326, "grad_norm": 2.6454908412087157, "learning_rate": 9.587651198754229e-06, "loss": 1.0613, "step": 4312 }, { "epoch": 0.15631908955818927, "grad_norm": 2.495595433695284, "learning_rate": 9.587417765691878e-06, "loss": 0.9817, "step": 4313 }, { "epoch": 0.15635533326084594, "grad_norm": 2.4185093802413755, "learning_rate": 9.587184269417684e-06, "loss": 0.9248, "step": 4314 }, { "epoch": 0.1563915769635026, "grad_norm": 2.3316132933458396, "learning_rate": 9.586950709934864e-06, "loss": 0.8527, "step": 4315 }, { "epoch": 0.15642782066615926, "grad_norm": 2.42933551736442, "learning_rate": 9.586717087246637e-06, "loss": 1.118, "step": 4316 }, { "epoch": 0.1564640643688159, "grad_norm": 2.273161905408612, "learning_rate": 9.586483401356225e-06, "loss": 0.9677, "step": 4317 }, { "epoch": 0.15650030807147258, "grad_norm": 2.8889152533675246, "learning_rate": 9.586249652266844e-06, "loss": 0.8683, "step": 4318 }, { "epoch": 0.15653655177412926, "grad_norm": 2.4140897377657216, "learning_rate": 9.586015839981717e-06, "loss": 0.938, "step": 4319 }, { "epoch": 0.1565727954767859, "grad_norm": 2.218206463558617, "learning_rate": 9.585781964504064e-06, "loss": 0.8628, "step": 4320 }, { "epoch": 0.15660903917944258, "grad_norm": 2.304896049196385, "learning_rate": 9.58554802583711e-06, "loss": 0.828, "step": 4321 }, { "epoch": 0.15664528288209922, "grad_norm": 2.6202634199987567, "learning_rate": 9.585314023984078e-06, "loss": 0.8654, "step": 4322 }, { "epoch": 0.1566815265847559, "grad_norm": 2.391736242597148, "learning_rate": 9.585079958948192e-06, "loss": 1.0384, "step": 4323 }, { "epoch": 0.15671777028741257, "grad_norm": 2.8692979782863697, "learning_rate": 9.584845830732676e-06, "loss": 0.9804, "step": 4324 }, { "epoch": 0.15675401399006922, "grad_norm": 2.305842604723293, "learning_rate": 9.584611639340758e-06, "loss": 1.1368, "step": 4325 }, { "epoch": 0.1567902576927259, "grad_norm": 2.4282576751040565, "learning_rate": 9.584377384775663e-06, "loss": 0.9832, "step": 4326 }, { "epoch": 0.15682650139538254, "grad_norm": 2.3040285110972327, "learning_rate": 9.584143067040621e-06, "loss": 0.9875, "step": 4327 }, { "epoch": 0.1568627450980392, "grad_norm": 2.3927114809876233, "learning_rate": 9.58390868613886e-06, "loss": 0.8325, "step": 4328 }, { "epoch": 0.1568989888006959, "grad_norm": 2.47029440304702, "learning_rate": 9.58367424207361e-06, "loss": 0.9793, "step": 4329 }, { "epoch": 0.15693523250335253, "grad_norm": 2.4722858885122445, "learning_rate": 9.5834397348481e-06, "loss": 0.9313, "step": 4330 }, { "epoch": 0.1569714762060092, "grad_norm": 2.2665990135083787, "learning_rate": 9.583205164465564e-06, "loss": 0.7465, "step": 4331 }, { "epoch": 0.15700771990866588, "grad_norm": 2.500209837046908, "learning_rate": 9.582970530929229e-06, "loss": 0.9138, "step": 4332 }, { "epoch": 0.15704396361132253, "grad_norm": 2.63277017519429, "learning_rate": 9.582735834242336e-06, "loss": 0.9853, "step": 4333 }, { "epoch": 0.1570802073139792, "grad_norm": 2.4524945818933817, "learning_rate": 9.582501074408112e-06, "loss": 1.0673, "step": 4334 }, { "epoch": 0.15711645101663585, "grad_norm": 2.3583436999084046, "learning_rate": 9.582266251429797e-06, "loss": 1.1462, "step": 4335 }, { "epoch": 0.15715269471929252, "grad_norm": 2.5592985029227044, "learning_rate": 9.582031365310623e-06, "loss": 0.9319, "step": 4336 }, { "epoch": 0.1571889384219492, "grad_norm": 2.551552643191824, "learning_rate": 9.581796416053828e-06, "loss": 1.064, "step": 4337 }, { "epoch": 0.15722518212460584, "grad_norm": 2.4551308448464892, "learning_rate": 9.58156140366265e-06, "loss": 0.9733, "step": 4338 }, { "epoch": 0.15726142582726252, "grad_norm": 2.7661879132726517, "learning_rate": 9.581326328140325e-06, "loss": 0.8346, "step": 4339 }, { "epoch": 0.15729766952991917, "grad_norm": 2.2645491901953734, "learning_rate": 9.581091189490096e-06, "loss": 0.8943, "step": 4340 }, { "epoch": 0.15733391323257584, "grad_norm": 2.473308675834626, "learning_rate": 9.580855987715201e-06, "loss": 0.9667, "step": 4341 }, { "epoch": 0.1573701569352325, "grad_norm": 2.3495894371595227, "learning_rate": 9.580620722818878e-06, "loss": 0.8873, "step": 4342 }, { "epoch": 0.15740640063788916, "grad_norm": 2.3124133757039815, "learning_rate": 9.580385394804374e-06, "loss": 0.8567, "step": 4343 }, { "epoch": 0.15744264434054583, "grad_norm": 2.2122458967257996, "learning_rate": 9.580150003674929e-06, "loss": 0.8772, "step": 4344 }, { "epoch": 0.15747888804320248, "grad_norm": 2.6791392711421143, "learning_rate": 9.579914549433787e-06, "loss": 0.9762, "step": 4345 }, { "epoch": 0.15751513174585915, "grad_norm": 2.3657991497345146, "learning_rate": 9.579679032084192e-06, "loss": 0.9553, "step": 4346 }, { "epoch": 0.15755137544851583, "grad_norm": 2.5516383513823295, "learning_rate": 9.579443451629392e-06, "loss": 0.8999, "step": 4347 }, { "epoch": 0.15758761915117248, "grad_norm": 2.2204028194318917, "learning_rate": 9.579207808072629e-06, "loss": 0.8799, "step": 4348 }, { "epoch": 0.15762386285382915, "grad_norm": 2.422349984590579, "learning_rate": 9.57897210141715e-06, "loss": 1.0384, "step": 4349 }, { "epoch": 0.15766010655648582, "grad_norm": 2.628594485837924, "learning_rate": 9.578736331666207e-06, "loss": 1.0292, "step": 4350 }, { "epoch": 0.15769635025914247, "grad_norm": 2.3568090957741292, "learning_rate": 9.578500498823044e-06, "loss": 0.8446, "step": 4351 }, { "epoch": 0.15773259396179914, "grad_norm": 2.297740790704879, "learning_rate": 9.578264602890917e-06, "loss": 1.0745, "step": 4352 }, { "epoch": 0.1577688376644558, "grad_norm": 2.6417602399054183, "learning_rate": 9.57802864387307e-06, "loss": 1.0978, "step": 4353 }, { "epoch": 0.15780508136711247, "grad_norm": 2.43315607085532, "learning_rate": 9.577792621772756e-06, "loss": 0.9395, "step": 4354 }, { "epoch": 0.15784132506976914, "grad_norm": 2.300124253451395, "learning_rate": 9.577556536593232e-06, "loss": 0.9721, "step": 4355 }, { "epoch": 0.15787756877242579, "grad_norm": 2.3726919473733705, "learning_rate": 9.577320388337744e-06, "loss": 0.9143, "step": 4356 }, { "epoch": 0.15791381247508246, "grad_norm": 2.254527798848656, "learning_rate": 9.57708417700955e-06, "loss": 0.9791, "step": 4357 }, { "epoch": 0.1579500561777391, "grad_norm": 2.2051835222316747, "learning_rate": 9.576847902611905e-06, "loss": 0.9242, "step": 4358 }, { "epoch": 0.15798629988039578, "grad_norm": 2.2288448924770474, "learning_rate": 9.576611565148062e-06, "loss": 0.9541, "step": 4359 }, { "epoch": 0.15802254358305245, "grad_norm": 2.465819831477286, "learning_rate": 9.57637516462128e-06, "loss": 0.9397, "step": 4360 }, { "epoch": 0.1580587872857091, "grad_norm": 2.3778395819177662, "learning_rate": 9.576138701034818e-06, "loss": 0.8588, "step": 4361 }, { "epoch": 0.15809503098836578, "grad_norm": 1.989899439443672, "learning_rate": 9.57590217439193e-06, "loss": 0.9726, "step": 4362 }, { "epoch": 0.15813127469102242, "grad_norm": 2.341184864997742, "learning_rate": 9.575665584695881e-06, "loss": 0.7598, "step": 4363 }, { "epoch": 0.1581675183936791, "grad_norm": 2.5798125744292473, "learning_rate": 9.575428931949924e-06, "loss": 1.0174, "step": 4364 }, { "epoch": 0.15820376209633577, "grad_norm": 2.3710068292452964, "learning_rate": 9.575192216157325e-06, "loss": 0.9554, "step": 4365 }, { "epoch": 0.15824000579899242, "grad_norm": 2.3470908325359288, "learning_rate": 9.574955437321345e-06, "loss": 1.0707, "step": 4366 }, { "epoch": 0.1582762495016491, "grad_norm": 2.6579842514105017, "learning_rate": 9.574718595445242e-06, "loss": 1.194, "step": 4367 }, { "epoch": 0.15831249320430577, "grad_norm": 2.5498929301235553, "learning_rate": 9.574481690532286e-06, "loss": 0.9746, "step": 4368 }, { "epoch": 0.1583487369069624, "grad_norm": 2.367347760161516, "learning_rate": 9.574244722585741e-06, "loss": 1.0018, "step": 4369 }, { "epoch": 0.15838498060961909, "grad_norm": 2.541510865910282, "learning_rate": 9.574007691608867e-06, "loss": 0.9042, "step": 4370 }, { "epoch": 0.15842122431227573, "grad_norm": 2.417116252314016, "learning_rate": 9.573770597604934e-06, "loss": 0.762, "step": 4371 }, { "epoch": 0.1584574680149324, "grad_norm": 2.121258165438082, "learning_rate": 9.573533440577209e-06, "loss": 0.705, "step": 4372 }, { "epoch": 0.15849371171758908, "grad_norm": 2.8090083773945183, "learning_rate": 9.573296220528959e-06, "loss": 0.9211, "step": 4373 }, { "epoch": 0.15852995542024573, "grad_norm": 3.6494224085341376, "learning_rate": 9.573058937463454e-06, "loss": 1.0006, "step": 4374 }, { "epoch": 0.1585661991229024, "grad_norm": 2.5766806700586784, "learning_rate": 9.57282159138396e-06, "loss": 0.9546, "step": 4375 }, { "epoch": 0.15860244282555905, "grad_norm": 2.658956169942518, "learning_rate": 9.572584182293752e-06, "loss": 1.0144, "step": 4376 }, { "epoch": 0.15863868652821572, "grad_norm": 2.4703856779228786, "learning_rate": 9.572346710196098e-06, "loss": 0.9828, "step": 4377 }, { "epoch": 0.1586749302308724, "grad_norm": 2.6574834405313794, "learning_rate": 9.572109175094272e-06, "loss": 0.9441, "step": 4378 }, { "epoch": 0.15871117393352904, "grad_norm": 2.429534619663885, "learning_rate": 9.571871576991549e-06, "loss": 1.0255, "step": 4379 }, { "epoch": 0.15874741763618572, "grad_norm": 2.3502589601531936, "learning_rate": 9.571633915891198e-06, "loss": 0.9097, "step": 4380 }, { "epoch": 0.15878366133884236, "grad_norm": 2.4879602227912367, "learning_rate": 9.571396191796496e-06, "loss": 1.1063, "step": 4381 }, { "epoch": 0.15881990504149904, "grad_norm": 2.5464993615881863, "learning_rate": 9.57115840471072e-06, "loss": 1.0133, "step": 4382 }, { "epoch": 0.1588561487441557, "grad_norm": 2.421117528861622, "learning_rate": 9.570920554637148e-06, "loss": 1.0337, "step": 4383 }, { "epoch": 0.15889239244681236, "grad_norm": 2.6377037814371516, "learning_rate": 9.570682641579052e-06, "loss": 0.9366, "step": 4384 }, { "epoch": 0.15892863614946903, "grad_norm": 2.6847882925358495, "learning_rate": 9.570444665539715e-06, "loss": 1.0534, "step": 4385 }, { "epoch": 0.1589648798521257, "grad_norm": 2.3849285693759548, "learning_rate": 9.570206626522414e-06, "loss": 1.0954, "step": 4386 }, { "epoch": 0.15900112355478235, "grad_norm": 2.8627608043840755, "learning_rate": 9.56996852453043e-06, "loss": 0.9557, "step": 4387 }, { "epoch": 0.15903736725743903, "grad_norm": 2.427275670365786, "learning_rate": 9.569730359567044e-06, "loss": 0.8458, "step": 4388 }, { "epoch": 0.15907361096009567, "grad_norm": 2.792541529349663, "learning_rate": 9.569492131635537e-06, "loss": 1.096, "step": 4389 }, { "epoch": 0.15910985466275235, "grad_norm": 2.6914690943139106, "learning_rate": 9.569253840739192e-06, "loss": 0.9501, "step": 4390 }, { "epoch": 0.15914609836540902, "grad_norm": 2.4972757771524674, "learning_rate": 9.569015486881293e-06, "loss": 0.9153, "step": 4391 }, { "epoch": 0.15918234206806567, "grad_norm": 2.5617376988798934, "learning_rate": 9.568777070065123e-06, "loss": 0.985, "step": 4392 }, { "epoch": 0.15921858577072234, "grad_norm": 2.339246336318868, "learning_rate": 9.568538590293968e-06, "loss": 1.0441, "step": 4393 }, { "epoch": 0.159254829473379, "grad_norm": 2.594319701716235, "learning_rate": 9.568300047571115e-06, "loss": 0.9686, "step": 4394 }, { "epoch": 0.15929107317603566, "grad_norm": 2.380117745800825, "learning_rate": 9.568061441899849e-06, "loss": 1.0223, "step": 4395 }, { "epoch": 0.15932731687869234, "grad_norm": 2.3442437978720827, "learning_rate": 9.56782277328346e-06, "loss": 1.0529, "step": 4396 }, { "epoch": 0.15936356058134898, "grad_norm": 2.2985058400563334, "learning_rate": 9.567584041725234e-06, "loss": 0.7886, "step": 4397 }, { "epoch": 0.15939980428400566, "grad_norm": 2.5588659392751767, "learning_rate": 9.567345247228464e-06, "loss": 0.9632, "step": 4398 }, { "epoch": 0.1594360479866623, "grad_norm": 2.247989798215105, "learning_rate": 9.56710638979644e-06, "loss": 0.999, "step": 4399 }, { "epoch": 0.15947229168931898, "grad_norm": 2.501107528103679, "learning_rate": 9.566867469432449e-06, "loss": 0.9446, "step": 4400 }, { "epoch": 0.15950853539197565, "grad_norm": 2.4687208175085966, "learning_rate": 9.566628486139787e-06, "loss": 1.0281, "step": 4401 }, { "epoch": 0.1595447790946323, "grad_norm": 2.4278564988996982, "learning_rate": 9.566389439921747e-06, "loss": 0.8623, "step": 4402 }, { "epoch": 0.15958102279728897, "grad_norm": 2.233568405163934, "learning_rate": 9.566150330781623e-06, "loss": 0.8603, "step": 4403 }, { "epoch": 0.15961726649994565, "grad_norm": 2.502979115358813, "learning_rate": 9.565911158722707e-06, "loss": 0.987, "step": 4404 }, { "epoch": 0.1596535102026023, "grad_norm": 2.6466891465706466, "learning_rate": 9.565671923748297e-06, "loss": 1.016, "step": 4405 }, { "epoch": 0.15968975390525897, "grad_norm": 2.630166762320374, "learning_rate": 9.56543262586169e-06, "loss": 1.1575, "step": 4406 }, { "epoch": 0.15972599760791562, "grad_norm": 2.247822159700105, "learning_rate": 9.565193265066183e-06, "loss": 0.9295, "step": 4407 }, { "epoch": 0.1597622413105723, "grad_norm": 2.61633133222356, "learning_rate": 9.564953841365073e-06, "loss": 1.054, "step": 4408 }, { "epoch": 0.15979848501322896, "grad_norm": 2.5577647229567533, "learning_rate": 9.564714354761659e-06, "loss": 1.0231, "step": 4409 }, { "epoch": 0.1598347287158856, "grad_norm": 2.6559972871543547, "learning_rate": 9.564474805259243e-06, "loss": 1.1532, "step": 4410 }, { "epoch": 0.15987097241854228, "grad_norm": 2.7777257620561686, "learning_rate": 9.564235192861125e-06, "loss": 1.0472, "step": 4411 }, { "epoch": 0.15990721612119893, "grad_norm": 2.213093671471563, "learning_rate": 9.563995517570605e-06, "loss": 0.8401, "step": 4412 }, { "epoch": 0.1599434598238556, "grad_norm": 2.1929886151480935, "learning_rate": 9.563755779390988e-06, "loss": 1.0536, "step": 4413 }, { "epoch": 0.15997970352651228, "grad_norm": 2.415494376803704, "learning_rate": 9.563515978325577e-06, "loss": 0.9481, "step": 4414 }, { "epoch": 0.16001594722916893, "grad_norm": 2.402535469040004, "learning_rate": 9.563276114377674e-06, "loss": 1.0033, "step": 4415 }, { "epoch": 0.1600521909318256, "grad_norm": 2.3091734657563934, "learning_rate": 9.563036187550587e-06, "loss": 0.7563, "step": 4416 }, { "epoch": 0.16008843463448225, "grad_norm": 2.496255957107422, "learning_rate": 9.562796197847621e-06, "loss": 1.0271, "step": 4417 }, { "epoch": 0.16012467833713892, "grad_norm": 2.4127307612054394, "learning_rate": 9.562556145272083e-06, "loss": 1.0093, "step": 4418 }, { "epoch": 0.1601609220397956, "grad_norm": 2.4199707526924743, "learning_rate": 9.56231602982728e-06, "loss": 1.1776, "step": 4419 }, { "epoch": 0.16019716574245224, "grad_norm": 2.666603244341595, "learning_rate": 9.56207585151652e-06, "loss": 0.9735, "step": 4420 }, { "epoch": 0.16023340944510892, "grad_norm": 2.271557154838773, "learning_rate": 9.561835610343117e-06, "loss": 1.0536, "step": 4421 }, { "epoch": 0.1602696531477656, "grad_norm": 2.1062782496308885, "learning_rate": 9.561595306310378e-06, "loss": 0.9435, "step": 4422 }, { "epoch": 0.16030589685042224, "grad_norm": 2.387457785501034, "learning_rate": 9.561354939421611e-06, "loss": 1.0533, "step": 4423 }, { "epoch": 0.1603421405530789, "grad_norm": 2.5509943774541317, "learning_rate": 9.561114509680135e-06, "loss": 1.1212, "step": 4424 }, { "epoch": 0.16037838425573556, "grad_norm": 2.5412169388249617, "learning_rate": 9.560874017089258e-06, "loss": 0.9719, "step": 4425 }, { "epoch": 0.16041462795839223, "grad_norm": 2.330650536238558, "learning_rate": 9.560633461652296e-06, "loss": 0.8999, "step": 4426 }, { "epoch": 0.1604508716610489, "grad_norm": 2.700464806290623, "learning_rate": 9.560392843372563e-06, "loss": 1.0033, "step": 4427 }, { "epoch": 0.16048711536370555, "grad_norm": 2.7807004405808415, "learning_rate": 9.560152162253373e-06, "loss": 1.1124, "step": 4428 }, { "epoch": 0.16052335906636223, "grad_norm": 2.695277795213939, "learning_rate": 9.559911418298047e-06, "loss": 0.9887, "step": 4429 }, { "epoch": 0.16055960276901887, "grad_norm": 2.432953041915562, "learning_rate": 9.559670611509897e-06, "loss": 1.0712, "step": 4430 }, { "epoch": 0.16059584647167555, "grad_norm": 2.1938808573746673, "learning_rate": 9.559429741892245e-06, "loss": 0.8895, "step": 4431 }, { "epoch": 0.16063209017433222, "grad_norm": 2.335023692155837, "learning_rate": 9.559188809448407e-06, "loss": 1.1152, "step": 4432 }, { "epoch": 0.16066833387698887, "grad_norm": 2.211568646785095, "learning_rate": 9.558947814181706e-06, "loss": 0.922, "step": 4433 }, { "epoch": 0.16070457757964554, "grad_norm": 2.588530104468592, "learning_rate": 9.558706756095461e-06, "loss": 0.8755, "step": 4434 }, { "epoch": 0.1607408212823022, "grad_norm": 2.5565906126615796, "learning_rate": 9.558465635192993e-06, "loss": 1.0188, "step": 4435 }, { "epoch": 0.16077706498495886, "grad_norm": 2.3440032411256935, "learning_rate": 9.558224451477628e-06, "loss": 0.9035, "step": 4436 }, { "epoch": 0.16081330868761554, "grad_norm": 2.19139897058604, "learning_rate": 9.557983204952684e-06, "loss": 0.7876, "step": 4437 }, { "epoch": 0.16084955239027218, "grad_norm": 2.1620764372978516, "learning_rate": 9.557741895621488e-06, "loss": 0.9862, "step": 4438 }, { "epoch": 0.16088579609292886, "grad_norm": 2.4961451312319114, "learning_rate": 9.557500523487366e-06, "loss": 1.0314, "step": 4439 }, { "epoch": 0.16092203979558553, "grad_norm": 2.2023202820413275, "learning_rate": 9.557259088553644e-06, "loss": 0.9488, "step": 4440 }, { "epoch": 0.16095828349824218, "grad_norm": 2.423229455205093, "learning_rate": 9.557017590823648e-06, "loss": 1.0616, "step": 4441 }, { "epoch": 0.16099452720089885, "grad_norm": 2.3499172205917183, "learning_rate": 9.556776030300704e-06, "loss": 0.9865, "step": 4442 }, { "epoch": 0.1610307709035555, "grad_norm": 2.3883388152687717, "learning_rate": 9.556534406988142e-06, "loss": 0.9488, "step": 4443 }, { "epoch": 0.16106701460621217, "grad_norm": 2.7280120267848984, "learning_rate": 9.556292720889292e-06, "loss": 1.2442, "step": 4444 }, { "epoch": 0.16110325830886885, "grad_norm": 2.766187442393475, "learning_rate": 9.556050972007485e-06, "loss": 1.0546, "step": 4445 }, { "epoch": 0.1611395020115255, "grad_norm": 2.669806016700927, "learning_rate": 9.55580916034605e-06, "loss": 0.9669, "step": 4446 }, { "epoch": 0.16117574571418217, "grad_norm": 2.280388771835581, "learning_rate": 9.555567285908321e-06, "loss": 0.9491, "step": 4447 }, { "epoch": 0.1612119894168388, "grad_norm": 2.4639717570712762, "learning_rate": 9.555325348697628e-06, "loss": 1.0405, "step": 4448 }, { "epoch": 0.1612482331194955, "grad_norm": 2.475173888699165, "learning_rate": 9.55508334871731e-06, "loss": 0.9142, "step": 4449 }, { "epoch": 0.16128447682215216, "grad_norm": 2.6605901318401677, "learning_rate": 9.554841285970696e-06, "loss": 0.9778, "step": 4450 }, { "epoch": 0.1613207205248088, "grad_norm": 2.3805757860803265, "learning_rate": 9.554599160461123e-06, "loss": 1.0957, "step": 4451 }, { "epoch": 0.16135696422746548, "grad_norm": 2.394686837385637, "learning_rate": 9.554356972191929e-06, "loss": 0.9812, "step": 4452 }, { "epoch": 0.16139320793012213, "grad_norm": 2.232328805930231, "learning_rate": 9.55411472116645e-06, "loss": 0.9627, "step": 4453 }, { "epoch": 0.1614294516327788, "grad_norm": 2.714470595788359, "learning_rate": 9.553872407388025e-06, "loss": 1.0982, "step": 4454 }, { "epoch": 0.16146569533543548, "grad_norm": 2.4818548338890496, "learning_rate": 9.553630030859992e-06, "loss": 0.8863, "step": 4455 }, { "epoch": 0.16150193903809212, "grad_norm": 2.4632911025233333, "learning_rate": 9.553387591585691e-06, "loss": 1.0985, "step": 4456 }, { "epoch": 0.1615381827407488, "grad_norm": 2.6338129000855504, "learning_rate": 9.553145089568464e-06, "loss": 1.1719, "step": 4457 }, { "epoch": 0.16157442644340544, "grad_norm": 2.5351521300005877, "learning_rate": 9.55290252481165e-06, "loss": 1.0323, "step": 4458 }, { "epoch": 0.16161067014606212, "grad_norm": 2.40247155190087, "learning_rate": 9.552659897318592e-06, "loss": 1.1271, "step": 4459 }, { "epoch": 0.1616469138487188, "grad_norm": 2.078423379365874, "learning_rate": 9.552417207092635e-06, "loss": 0.7887, "step": 4460 }, { "epoch": 0.16168315755137544, "grad_norm": 2.5706228602029477, "learning_rate": 9.552174454137122e-06, "loss": 1.0518, "step": 4461 }, { "epoch": 0.1617194012540321, "grad_norm": 2.9134004270419434, "learning_rate": 9.551931638455398e-06, "loss": 1.2656, "step": 4462 }, { "epoch": 0.1617556449566888, "grad_norm": 2.353795765962007, "learning_rate": 9.55168876005081e-06, "loss": 1.0392, "step": 4463 }, { "epoch": 0.16179188865934543, "grad_norm": 2.6983211176357758, "learning_rate": 9.551445818926703e-06, "loss": 0.9218, "step": 4464 }, { "epoch": 0.1618281323620021, "grad_norm": 2.0086098194103834, "learning_rate": 9.551202815086425e-06, "loss": 0.8638, "step": 4465 }, { "epoch": 0.16186437606465875, "grad_norm": 2.293046723501377, "learning_rate": 9.550959748533324e-06, "loss": 0.8894, "step": 4466 }, { "epoch": 0.16190061976731543, "grad_norm": 2.2848335090872385, "learning_rate": 9.55071661927075e-06, "loss": 1.0914, "step": 4467 }, { "epoch": 0.1619368634699721, "grad_norm": 2.40065857881588, "learning_rate": 9.550473427302054e-06, "loss": 0.8348, "step": 4468 }, { "epoch": 0.16197310717262875, "grad_norm": 2.2233509566257954, "learning_rate": 9.550230172630587e-06, "loss": 1.1838, "step": 4469 }, { "epoch": 0.16200935087528542, "grad_norm": 2.3745445783199375, "learning_rate": 9.549986855259699e-06, "loss": 1.002, "step": 4470 }, { "epoch": 0.16204559457794207, "grad_norm": 2.6964402244355914, "learning_rate": 9.549743475192743e-06, "loss": 0.9121, "step": 4471 }, { "epoch": 0.16208183828059874, "grad_norm": 2.545002928941248, "learning_rate": 9.549500032433075e-06, "loss": 1.0891, "step": 4472 }, { "epoch": 0.16211808198325542, "grad_norm": 2.5907344748185994, "learning_rate": 9.549256526984048e-06, "loss": 1.1234, "step": 4473 }, { "epoch": 0.16215432568591207, "grad_norm": 2.7018423379798024, "learning_rate": 9.549012958849017e-06, "loss": 0.968, "step": 4474 }, { "epoch": 0.16219056938856874, "grad_norm": 2.648652454638237, "learning_rate": 9.548769328031338e-06, "loss": 0.9968, "step": 4475 }, { "epoch": 0.16222681309122539, "grad_norm": 2.113421351826365, "learning_rate": 9.548525634534368e-06, "loss": 0.8497, "step": 4476 }, { "epoch": 0.16226305679388206, "grad_norm": 2.305946999554332, "learning_rate": 9.548281878361468e-06, "loss": 0.9782, "step": 4477 }, { "epoch": 0.16229930049653873, "grad_norm": 2.3130166386365283, "learning_rate": 9.548038059515994e-06, "loss": 0.859, "step": 4478 }, { "epoch": 0.16233554419919538, "grad_norm": 2.3849648212949166, "learning_rate": 9.547794178001305e-06, "loss": 0.8765, "step": 4479 }, { "epoch": 0.16237178790185205, "grad_norm": 2.5306736486528116, "learning_rate": 9.547550233820763e-06, "loss": 1.0179, "step": 4480 }, { "epoch": 0.16240803160450873, "grad_norm": 2.317071481434883, "learning_rate": 9.547306226977729e-06, "loss": 0.7844, "step": 4481 }, { "epoch": 0.16244427530716538, "grad_norm": 2.381276954373809, "learning_rate": 9.547062157475564e-06, "loss": 1.0214, "step": 4482 }, { "epoch": 0.16248051900982205, "grad_norm": 2.022255235371993, "learning_rate": 9.546818025317634e-06, "loss": 0.9501, "step": 4483 }, { "epoch": 0.1625167627124787, "grad_norm": 2.558258261332055, "learning_rate": 9.546573830507301e-06, "loss": 1.0942, "step": 4484 }, { "epoch": 0.16255300641513537, "grad_norm": 2.2412741044028737, "learning_rate": 9.546329573047933e-06, "loss": 0.9658, "step": 4485 }, { "epoch": 0.16258925011779204, "grad_norm": 2.6205873916106808, "learning_rate": 9.546085252942889e-06, "loss": 1.045, "step": 4486 }, { "epoch": 0.1626254938204487, "grad_norm": 2.554834991822844, "learning_rate": 9.545840870195541e-06, "loss": 0.8738, "step": 4487 }, { "epoch": 0.16266173752310537, "grad_norm": 2.216630295258278, "learning_rate": 9.545596424809256e-06, "loss": 0.9222, "step": 4488 }, { "epoch": 0.162697981225762, "grad_norm": 2.5707869814744053, "learning_rate": 9.545351916787399e-06, "loss": 0.9794, "step": 4489 }, { "epoch": 0.16273422492841869, "grad_norm": 2.3490021328529225, "learning_rate": 9.545107346133345e-06, "loss": 1.0557, "step": 4490 }, { "epoch": 0.16277046863107536, "grad_norm": 2.4619384078148867, "learning_rate": 9.544862712850458e-06, "loss": 0.9091, "step": 4491 }, { "epoch": 0.162806712333732, "grad_norm": 2.299149230250271, "learning_rate": 9.544618016942112e-06, "loss": 0.8341, "step": 4492 }, { "epoch": 0.16284295603638868, "grad_norm": 2.3126282123426827, "learning_rate": 9.544373258411679e-06, "loss": 0.8787, "step": 4493 }, { "epoch": 0.16287919973904533, "grad_norm": 2.542209713988156, "learning_rate": 9.54412843726253e-06, "loss": 0.9719, "step": 4494 }, { "epoch": 0.162915443441702, "grad_norm": 2.542247327892064, "learning_rate": 9.543883553498039e-06, "loss": 1.0123, "step": 4495 }, { "epoch": 0.16295168714435868, "grad_norm": 2.340186355139744, "learning_rate": 9.543638607121582e-06, "loss": 1.1011, "step": 4496 }, { "epoch": 0.16298793084701532, "grad_norm": 2.5296457303072786, "learning_rate": 9.54339359813653e-06, "loss": 0.893, "step": 4497 }, { "epoch": 0.163024174549672, "grad_norm": 2.3063433478048565, "learning_rate": 9.543148526546264e-06, "loss": 0.9853, "step": 4498 }, { "epoch": 0.16306041825232867, "grad_norm": 2.754822420058806, "learning_rate": 9.54290339235416e-06, "loss": 0.8819, "step": 4499 }, { "epoch": 0.16309666195498532, "grad_norm": 2.262777820804812, "learning_rate": 9.542658195563593e-06, "loss": 0.879, "step": 4500 }, { "epoch": 0.163132905657642, "grad_norm": 2.2171815474172027, "learning_rate": 9.542412936177945e-06, "loss": 0.991, "step": 4501 }, { "epoch": 0.16316914936029864, "grad_norm": 2.4393978884350807, "learning_rate": 9.542167614200593e-06, "loss": 0.9058, "step": 4502 }, { "epoch": 0.1632053930629553, "grad_norm": 2.65381496042779, "learning_rate": 9.541922229634917e-06, "loss": 0.994, "step": 4503 }, { "epoch": 0.16324163676561199, "grad_norm": 2.3486836744513306, "learning_rate": 9.541676782484302e-06, "loss": 0.8041, "step": 4504 }, { "epoch": 0.16327788046826863, "grad_norm": 2.5248411174884082, "learning_rate": 9.541431272752127e-06, "loss": 0.9812, "step": 4505 }, { "epoch": 0.1633141241709253, "grad_norm": 2.523087701134529, "learning_rate": 9.541185700441775e-06, "loss": 1.0094, "step": 4506 }, { "epoch": 0.16335036787358195, "grad_norm": 2.364138079391874, "learning_rate": 9.54094006555663e-06, "loss": 1.035, "step": 4507 }, { "epoch": 0.16338661157623863, "grad_norm": 2.687007470611462, "learning_rate": 9.540694368100077e-06, "loss": 1.0298, "step": 4508 }, { "epoch": 0.1634228552788953, "grad_norm": 2.297277236715246, "learning_rate": 9.540448608075504e-06, "loss": 0.9346, "step": 4509 }, { "epoch": 0.16345909898155195, "grad_norm": 2.79798240873564, "learning_rate": 9.540202785486293e-06, "loss": 0.8668, "step": 4510 }, { "epoch": 0.16349534268420862, "grad_norm": 2.3295879403971584, "learning_rate": 9.539956900335834e-06, "loss": 0.8374, "step": 4511 }, { "epoch": 0.16353158638686527, "grad_norm": 2.1986304729963244, "learning_rate": 9.539710952627515e-06, "loss": 1.0029, "step": 4512 }, { "epoch": 0.16356783008952194, "grad_norm": 2.3668213276391077, "learning_rate": 9.539464942364724e-06, "loss": 1.019, "step": 4513 }, { "epoch": 0.16360407379217862, "grad_norm": 2.4257399432639093, "learning_rate": 9.53921886955085e-06, "loss": 0.9497, "step": 4514 }, { "epoch": 0.16364031749483526, "grad_norm": 2.2547550930774847, "learning_rate": 9.538972734189288e-06, "loss": 0.7809, "step": 4515 }, { "epoch": 0.16367656119749194, "grad_norm": 2.273090666735518, "learning_rate": 9.538726536283425e-06, "loss": 0.9924, "step": 4516 }, { "epoch": 0.1637128049001486, "grad_norm": 2.47088356990492, "learning_rate": 9.538480275836658e-06, "loss": 0.9508, "step": 4517 }, { "epoch": 0.16374904860280526, "grad_norm": 2.2822791853737203, "learning_rate": 9.538233952852373e-06, "loss": 1.0709, "step": 4518 }, { "epoch": 0.16378529230546193, "grad_norm": 2.6601669023765546, "learning_rate": 9.537987567333973e-06, "loss": 1.0618, "step": 4519 }, { "epoch": 0.16382153600811858, "grad_norm": 2.5037341407654314, "learning_rate": 9.537741119284847e-06, "loss": 1.0657, "step": 4520 }, { "epoch": 0.16385777971077525, "grad_norm": 2.3817345310738847, "learning_rate": 9.537494608708393e-06, "loss": 0.9771, "step": 4521 }, { "epoch": 0.16389402341343193, "grad_norm": 2.5731834666339424, "learning_rate": 9.537248035608007e-06, "loss": 1.0947, "step": 4522 }, { "epoch": 0.16393026711608857, "grad_norm": 2.4356552376050815, "learning_rate": 9.537001399987087e-06, "loss": 0.7966, "step": 4523 }, { "epoch": 0.16396651081874525, "grad_norm": 2.5525875152242437, "learning_rate": 9.536754701849031e-06, "loss": 0.9198, "step": 4524 }, { "epoch": 0.1640027545214019, "grad_norm": 2.676103412926838, "learning_rate": 9.536507941197241e-06, "loss": 0.9624, "step": 4525 }, { "epoch": 0.16403899822405857, "grad_norm": 2.2749680538956274, "learning_rate": 9.536261118035116e-06, "loss": 1.0674, "step": 4526 }, { "epoch": 0.16407524192671524, "grad_norm": 2.2964983388090054, "learning_rate": 9.536014232366054e-06, "loss": 0.7396, "step": 4527 }, { "epoch": 0.1641114856293719, "grad_norm": 2.3707069525572777, "learning_rate": 9.535767284193458e-06, "loss": 1.0001, "step": 4528 }, { "epoch": 0.16414772933202856, "grad_norm": 2.5723685218392545, "learning_rate": 9.535520273520734e-06, "loss": 1.0213, "step": 4529 }, { "epoch": 0.1641839730346852, "grad_norm": 2.217287428871081, "learning_rate": 9.535273200351283e-06, "loss": 0.9182, "step": 4530 }, { "epoch": 0.16422021673734188, "grad_norm": 2.1826974530972754, "learning_rate": 9.535026064688511e-06, "loss": 0.7767, "step": 4531 }, { "epoch": 0.16425646043999856, "grad_norm": 2.2971055199673507, "learning_rate": 9.53477886653582e-06, "loss": 0.9242, "step": 4532 }, { "epoch": 0.1642927041426552, "grad_norm": 2.663365647843566, "learning_rate": 9.534531605896623e-06, "loss": 1.0713, "step": 4533 }, { "epoch": 0.16432894784531188, "grad_norm": 2.66759661779134, "learning_rate": 9.534284282774318e-06, "loss": 1.0408, "step": 4534 }, { "epoch": 0.16436519154796855, "grad_norm": 2.277324303959888, "learning_rate": 9.534036897172321e-06, "loss": 1.0511, "step": 4535 }, { "epoch": 0.1644014352506252, "grad_norm": 2.532078174672626, "learning_rate": 9.533789449094038e-06, "loss": 0.9806, "step": 4536 }, { "epoch": 0.16443767895328187, "grad_norm": 2.494318392462673, "learning_rate": 9.533541938542877e-06, "loss": 0.9967, "step": 4537 }, { "epoch": 0.16447392265593852, "grad_norm": 2.367975909440141, "learning_rate": 9.53329436552225e-06, "loss": 0.9692, "step": 4538 }, { "epoch": 0.1645101663585952, "grad_norm": 2.2760740328910893, "learning_rate": 9.533046730035569e-06, "loss": 1.0644, "step": 4539 }, { "epoch": 0.16454641006125187, "grad_norm": 2.470359390244511, "learning_rate": 9.532799032086244e-06, "loss": 0.7727, "step": 4540 }, { "epoch": 0.16458265376390852, "grad_norm": 2.297649514632865, "learning_rate": 9.53255127167769e-06, "loss": 0.8954, "step": 4541 }, { "epoch": 0.1646188974665652, "grad_norm": 2.270535967091179, "learning_rate": 9.532303448813323e-06, "loss": 0.8857, "step": 4542 }, { "epoch": 0.16465514116922184, "grad_norm": 2.405794089473025, "learning_rate": 9.532055563496554e-06, "loss": 0.9274, "step": 4543 }, { "epoch": 0.1646913848718785, "grad_norm": 2.6330215677217077, "learning_rate": 9.531807615730799e-06, "loss": 1.1208, "step": 4544 }, { "epoch": 0.16472762857453518, "grad_norm": 2.343514975831308, "learning_rate": 9.531559605519476e-06, "loss": 0.993, "step": 4545 }, { "epoch": 0.16476387227719183, "grad_norm": 3.020896634310854, "learning_rate": 9.531311532866004e-06, "loss": 0.9973, "step": 4546 }, { "epoch": 0.1648001159798485, "grad_norm": 2.546261919370404, "learning_rate": 9.531063397773799e-06, "loss": 1.0431, "step": 4547 }, { "epoch": 0.16483635968250515, "grad_norm": 2.4682190417661367, "learning_rate": 9.530815200246281e-06, "loss": 0.889, "step": 4548 }, { "epoch": 0.16487260338516183, "grad_norm": 2.465087855907219, "learning_rate": 9.530566940286869e-06, "loss": 0.9151, "step": 4549 }, { "epoch": 0.1649088470878185, "grad_norm": 2.431268572901253, "learning_rate": 9.530318617898983e-06, "loss": 0.9159, "step": 4550 }, { "epoch": 0.16494509079047515, "grad_norm": 2.6362547864927226, "learning_rate": 9.530070233086049e-06, "loss": 0.8795, "step": 4551 }, { "epoch": 0.16498133449313182, "grad_norm": 2.326519009866201, "learning_rate": 9.529821785851485e-06, "loss": 1.0386, "step": 4552 }, { "epoch": 0.1650175781957885, "grad_norm": 2.4233322903945904, "learning_rate": 9.529573276198718e-06, "loss": 0.9254, "step": 4553 }, { "epoch": 0.16505382189844514, "grad_norm": 2.394215951763281, "learning_rate": 9.52932470413117e-06, "loss": 0.9487, "step": 4554 }, { "epoch": 0.16509006560110182, "grad_norm": 2.812860234575083, "learning_rate": 9.529076069652267e-06, "loss": 1.247, "step": 4555 }, { "epoch": 0.16512630930375846, "grad_norm": 2.8713087936637236, "learning_rate": 9.528827372765434e-06, "loss": 0.9549, "step": 4556 }, { "epoch": 0.16516255300641514, "grad_norm": 2.7666944532825237, "learning_rate": 9.5285786134741e-06, "loss": 1.0891, "step": 4557 }, { "epoch": 0.1651987967090718, "grad_norm": 2.3380392782918036, "learning_rate": 9.52832979178169e-06, "loss": 0.9222, "step": 4558 }, { "epoch": 0.16523504041172846, "grad_norm": 2.542843243475879, "learning_rate": 9.528080907691634e-06, "loss": 1.0057, "step": 4559 }, { "epoch": 0.16527128411438513, "grad_norm": 2.4465590921888487, "learning_rate": 9.527831961207363e-06, "loss": 1.0189, "step": 4560 }, { "epoch": 0.16530752781704178, "grad_norm": 2.8577120416213155, "learning_rate": 9.527582952332306e-06, "loss": 1.0037, "step": 4561 }, { "epoch": 0.16534377151969845, "grad_norm": 2.273524597204244, "learning_rate": 9.527333881069894e-06, "loss": 0.8964, "step": 4562 }, { "epoch": 0.16538001522235513, "grad_norm": 2.317273093164928, "learning_rate": 9.527084747423558e-06, "loss": 1.0747, "step": 4563 }, { "epoch": 0.16541625892501177, "grad_norm": 2.2669332456033677, "learning_rate": 9.526835551396733e-06, "loss": 1.0083, "step": 4564 }, { "epoch": 0.16545250262766845, "grad_norm": 2.494061890068194, "learning_rate": 9.526586292992852e-06, "loss": 0.9224, "step": 4565 }, { "epoch": 0.1654887463303251, "grad_norm": 2.708681823782093, "learning_rate": 9.52633697221535e-06, "loss": 1.1956, "step": 4566 }, { "epoch": 0.16552499003298177, "grad_norm": 2.584498827256706, "learning_rate": 9.526087589067658e-06, "loss": 0.9614, "step": 4567 }, { "epoch": 0.16556123373563844, "grad_norm": 2.512631474731318, "learning_rate": 9.525838143553221e-06, "loss": 1.0212, "step": 4568 }, { "epoch": 0.1655974774382951, "grad_norm": 2.416463017060429, "learning_rate": 9.525588635675468e-06, "loss": 0.9685, "step": 4569 }, { "epoch": 0.16563372114095176, "grad_norm": 2.4527092079659805, "learning_rate": 9.525339065437844e-06, "loss": 0.9578, "step": 4570 }, { "epoch": 0.16566996484360844, "grad_norm": 2.3627791493703647, "learning_rate": 9.525089432843783e-06, "loss": 0.8879, "step": 4571 }, { "epoch": 0.16570620854626508, "grad_norm": 2.6500531127773392, "learning_rate": 9.524839737896725e-06, "loss": 1.1701, "step": 4572 }, { "epoch": 0.16574245224892176, "grad_norm": 2.2757787761152115, "learning_rate": 9.524589980600114e-06, "loss": 0.8073, "step": 4573 }, { "epoch": 0.1657786959515784, "grad_norm": 2.6994132067569034, "learning_rate": 9.524340160957389e-06, "loss": 0.983, "step": 4574 }, { "epoch": 0.16581493965423508, "grad_norm": 2.2300097883819014, "learning_rate": 9.524090278971992e-06, "loss": 1.0137, "step": 4575 }, { "epoch": 0.16585118335689175, "grad_norm": 2.346062498426884, "learning_rate": 9.523840334647367e-06, "loss": 1.0696, "step": 4576 }, { "epoch": 0.1658874270595484, "grad_norm": 2.380551234556601, "learning_rate": 9.523590327986958e-06, "loss": 1.0757, "step": 4577 }, { "epoch": 0.16592367076220507, "grad_norm": 2.692879765028872, "learning_rate": 9.523340258994212e-06, "loss": 1.0332, "step": 4578 }, { "epoch": 0.16595991446486172, "grad_norm": 2.734126031944526, "learning_rate": 9.523090127672571e-06, "loss": 0.9679, "step": 4579 }, { "epoch": 0.1659961581675184, "grad_norm": 2.7390126251997113, "learning_rate": 9.522839934025485e-06, "loss": 0.9299, "step": 4580 }, { "epoch": 0.16603240187017507, "grad_norm": 2.3752688992829825, "learning_rate": 9.522589678056399e-06, "loss": 0.972, "step": 4581 }, { "epoch": 0.1660686455728317, "grad_norm": 2.330809188291074, "learning_rate": 9.522339359768762e-06, "loss": 0.8884, "step": 4582 }, { "epoch": 0.1661048892754884, "grad_norm": 2.2971346563958353, "learning_rate": 9.522088979166025e-06, "loss": 0.8609, "step": 4583 }, { "epoch": 0.16614113297814503, "grad_norm": 2.595931839422188, "learning_rate": 9.521838536251636e-06, "loss": 0.8942, "step": 4584 }, { "epoch": 0.1661773766808017, "grad_norm": 2.467628131937555, "learning_rate": 9.521588031029046e-06, "loss": 1.1028, "step": 4585 }, { "epoch": 0.16621362038345838, "grad_norm": 2.4542366397235345, "learning_rate": 9.521337463501707e-06, "loss": 0.9976, "step": 4586 }, { "epoch": 0.16624986408611503, "grad_norm": 2.087529070486771, "learning_rate": 9.521086833673074e-06, "loss": 1.0071, "step": 4587 }, { "epoch": 0.1662861077887717, "grad_norm": 2.3752418489762803, "learning_rate": 9.520836141546597e-06, "loss": 0.9944, "step": 4588 }, { "epoch": 0.16632235149142838, "grad_norm": 2.569874119073244, "learning_rate": 9.520585387125733e-06, "loss": 0.9823, "step": 4589 }, { "epoch": 0.16635859519408502, "grad_norm": 2.140392906671451, "learning_rate": 9.520334570413937e-06, "loss": 0.8081, "step": 4590 }, { "epoch": 0.1663948388967417, "grad_norm": 2.49923660774691, "learning_rate": 9.520083691414664e-06, "loss": 1.0913, "step": 4591 }, { "epoch": 0.16643108259939834, "grad_norm": 2.4058970849403747, "learning_rate": 9.519832750131371e-06, "loss": 0.9629, "step": 4592 }, { "epoch": 0.16646732630205502, "grad_norm": 2.4291543393781927, "learning_rate": 9.519581746567517e-06, "loss": 1.0424, "step": 4593 }, { "epoch": 0.1665035700047117, "grad_norm": 2.4271343563538332, "learning_rate": 9.51933068072656e-06, "loss": 0.9384, "step": 4594 }, { "epoch": 0.16653981370736834, "grad_norm": 2.4389010450493562, "learning_rate": 9.519079552611958e-06, "loss": 0.922, "step": 4595 }, { "epoch": 0.166576057410025, "grad_norm": 2.0870691791317846, "learning_rate": 9.518828362227173e-06, "loss": 0.9226, "step": 4596 }, { "epoch": 0.16661230111268166, "grad_norm": 2.628311993027648, "learning_rate": 9.518577109575666e-06, "loss": 0.9781, "step": 4597 }, { "epoch": 0.16664854481533833, "grad_norm": 2.4739022638121693, "learning_rate": 9.5183257946609e-06, "loss": 0.9884, "step": 4598 }, { "epoch": 0.166684788517995, "grad_norm": 2.3056205662614846, "learning_rate": 9.518074417486338e-06, "loss": 0.9735, "step": 4599 }, { "epoch": 0.16672103222065165, "grad_norm": 2.5166268775433496, "learning_rate": 9.517822978055442e-06, "loss": 1.2489, "step": 4600 }, { "epoch": 0.16675727592330833, "grad_norm": 2.465255850619028, "learning_rate": 9.517571476371676e-06, "loss": 0.8497, "step": 4601 }, { "epoch": 0.16679351962596498, "grad_norm": 2.5043383200847775, "learning_rate": 9.51731991243851e-06, "loss": 0.9208, "step": 4602 }, { "epoch": 0.16682976332862165, "grad_norm": 2.448279776819358, "learning_rate": 9.517068286259406e-06, "loss": 0.8281, "step": 4603 }, { "epoch": 0.16686600703127832, "grad_norm": 2.3808159283845187, "learning_rate": 9.516816597837832e-06, "loss": 0.9448, "step": 4604 }, { "epoch": 0.16690225073393497, "grad_norm": 2.948374453247955, "learning_rate": 9.516564847177259e-06, "loss": 0.9086, "step": 4605 }, { "epoch": 0.16693849443659164, "grad_norm": 2.2623728348363383, "learning_rate": 9.516313034281153e-06, "loss": 0.893, "step": 4606 }, { "epoch": 0.16697473813924832, "grad_norm": 2.2089039783127076, "learning_rate": 9.516061159152985e-06, "loss": 0.7418, "step": 4607 }, { "epoch": 0.16701098184190497, "grad_norm": 2.2119843505494683, "learning_rate": 9.515809221796225e-06, "loss": 0.9476, "step": 4608 }, { "epoch": 0.16704722554456164, "grad_norm": 2.34648324173587, "learning_rate": 9.515557222214345e-06, "loss": 0.903, "step": 4609 }, { "epoch": 0.16708346924721829, "grad_norm": 2.591769252307976, "learning_rate": 9.515305160410819e-06, "loss": 0.9332, "step": 4610 }, { "epoch": 0.16711971294987496, "grad_norm": 2.2862700677575787, "learning_rate": 9.515053036389118e-06, "loss": 0.9324, "step": 4611 }, { "epoch": 0.16715595665253163, "grad_norm": 2.7187327433135207, "learning_rate": 9.514800850152714e-06, "loss": 1.0348, "step": 4612 }, { "epoch": 0.16719220035518828, "grad_norm": 1.993404793207771, "learning_rate": 9.514548601705087e-06, "loss": 0.8154, "step": 4613 }, { "epoch": 0.16722844405784495, "grad_norm": 2.2791659226504413, "learning_rate": 9.51429629104971e-06, "loss": 0.9142, "step": 4614 }, { "epoch": 0.1672646877605016, "grad_norm": 2.244727239919798, "learning_rate": 9.51404391819006e-06, "loss": 1.0164, "step": 4615 }, { "epoch": 0.16730093146315828, "grad_norm": 2.2743593385431438, "learning_rate": 9.513791483129618e-06, "loss": 1.03, "step": 4616 }, { "epoch": 0.16733717516581495, "grad_norm": 2.4057059817890813, "learning_rate": 9.513538985871856e-06, "loss": 1.0131, "step": 4617 }, { "epoch": 0.1673734188684716, "grad_norm": 2.6086256319201673, "learning_rate": 9.513286426420256e-06, "loss": 0.9747, "step": 4618 }, { "epoch": 0.16740966257112827, "grad_norm": 2.5731363455128875, "learning_rate": 9.5130338047783e-06, "loss": 1.117, "step": 4619 }, { "epoch": 0.16744590627378492, "grad_norm": 2.636700969225966, "learning_rate": 9.512781120949469e-06, "loss": 0.9433, "step": 4620 }, { "epoch": 0.1674821499764416, "grad_norm": 2.3554613920282574, "learning_rate": 9.512528374937241e-06, "loss": 1.0315, "step": 4621 }, { "epoch": 0.16751839367909827, "grad_norm": 2.606790061259277, "learning_rate": 9.512275566745102e-06, "loss": 1.121, "step": 4622 }, { "epoch": 0.1675546373817549, "grad_norm": 2.452104561059293, "learning_rate": 9.512022696376535e-06, "loss": 0.8824, "step": 4623 }, { "epoch": 0.16759088108441159, "grad_norm": 2.6717438702988443, "learning_rate": 9.511769763835023e-06, "loss": 0.9942, "step": 4624 }, { "epoch": 0.16762712478706826, "grad_norm": 2.043544819243406, "learning_rate": 9.511516769124054e-06, "loss": 0.8926, "step": 4625 }, { "epoch": 0.1676633684897249, "grad_norm": 2.5289709920438654, "learning_rate": 9.51126371224711e-06, "loss": 1.0925, "step": 4626 }, { "epoch": 0.16769961219238158, "grad_norm": 2.340417599628167, "learning_rate": 9.511010593207681e-06, "loss": 0.8736, "step": 4627 }, { "epoch": 0.16773585589503823, "grad_norm": 2.399536534651461, "learning_rate": 9.510757412009254e-06, "loss": 0.8815, "step": 4628 }, { "epoch": 0.1677720995976949, "grad_norm": 2.922220444470166, "learning_rate": 9.51050416865532e-06, "loss": 1.076, "step": 4629 }, { "epoch": 0.16780834330035158, "grad_norm": 2.437387482616963, "learning_rate": 9.510250863149364e-06, "loss": 1.0283, "step": 4630 }, { "epoch": 0.16784458700300822, "grad_norm": 2.6890036206233296, "learning_rate": 9.509997495494881e-06, "loss": 0.9954, "step": 4631 }, { "epoch": 0.1678808307056649, "grad_norm": 2.348921034938062, "learning_rate": 9.509744065695359e-06, "loss": 0.9607, "step": 4632 }, { "epoch": 0.16791707440832154, "grad_norm": 2.6109996639847424, "learning_rate": 9.509490573754291e-06, "loss": 0.9268, "step": 4633 }, { "epoch": 0.16795331811097822, "grad_norm": 2.593563844413245, "learning_rate": 9.509237019675171e-06, "loss": 1.1802, "step": 4634 }, { "epoch": 0.1679895618136349, "grad_norm": 2.562904288900895, "learning_rate": 9.50898340346149e-06, "loss": 0.9065, "step": 4635 }, { "epoch": 0.16802580551629154, "grad_norm": 2.7457511389684384, "learning_rate": 9.508729725116746e-06, "loss": 0.9216, "step": 4636 }, { "epoch": 0.1680620492189482, "grad_norm": 2.3756213456257367, "learning_rate": 9.508475984644433e-06, "loss": 1.2101, "step": 4637 }, { "epoch": 0.16809829292160486, "grad_norm": 2.50441653944505, "learning_rate": 9.508222182048048e-06, "loss": 0.7545, "step": 4638 }, { "epoch": 0.16813453662426153, "grad_norm": 2.407740080974176, "learning_rate": 9.507968317331087e-06, "loss": 1.0672, "step": 4639 }, { "epoch": 0.1681707803269182, "grad_norm": 2.1650013075701766, "learning_rate": 9.50771439049705e-06, "loss": 0.9072, "step": 4640 }, { "epoch": 0.16820702402957485, "grad_norm": 2.42687876145448, "learning_rate": 9.507460401549434e-06, "loss": 0.9577, "step": 4641 }, { "epoch": 0.16824326773223153, "grad_norm": 2.3744589307509356, "learning_rate": 9.50720635049174e-06, "loss": 0.9297, "step": 4642 }, { "epoch": 0.1682795114348882, "grad_norm": 2.7100573021700365, "learning_rate": 9.506952237327469e-06, "loss": 0.8767, "step": 4643 }, { "epoch": 0.16831575513754485, "grad_norm": 2.288312868959025, "learning_rate": 9.50669806206012e-06, "loss": 1.0882, "step": 4644 }, { "epoch": 0.16835199884020152, "grad_norm": 1.909533801865764, "learning_rate": 9.506443824693199e-06, "loss": 0.9395, "step": 4645 }, { "epoch": 0.16838824254285817, "grad_norm": 2.4681323209339654, "learning_rate": 9.506189525230208e-06, "loss": 0.9362, "step": 4646 }, { "epoch": 0.16842448624551484, "grad_norm": 2.278612433312867, "learning_rate": 9.505935163674649e-06, "loss": 0.9946, "step": 4647 }, { "epoch": 0.16846072994817152, "grad_norm": 2.3778157483907396, "learning_rate": 9.50568074003003e-06, "loss": 0.9818, "step": 4648 }, { "epoch": 0.16849697365082816, "grad_norm": 2.1999590104330453, "learning_rate": 9.505426254299853e-06, "loss": 0.8904, "step": 4649 }, { "epoch": 0.16853321735348484, "grad_norm": 2.8317191338159153, "learning_rate": 9.505171706487627e-06, "loss": 1.1421, "step": 4650 }, { "epoch": 0.16856946105614148, "grad_norm": 2.474264649542442, "learning_rate": 9.504917096596861e-06, "loss": 1.0663, "step": 4651 }, { "epoch": 0.16860570475879816, "grad_norm": 2.2244007379379305, "learning_rate": 9.504662424631064e-06, "loss": 0.9036, "step": 4652 }, { "epoch": 0.16864194846145483, "grad_norm": 2.2206706992537026, "learning_rate": 9.50440769059374e-06, "loss": 0.9659, "step": 4653 }, { "epoch": 0.16867819216411148, "grad_norm": 2.1958198455532902, "learning_rate": 9.504152894488403e-06, "loss": 1.0334, "step": 4654 }, { "epoch": 0.16871443586676815, "grad_norm": 2.7206097651437027, "learning_rate": 9.503898036318564e-06, "loss": 1.1144, "step": 4655 }, { "epoch": 0.1687506795694248, "grad_norm": 2.5298368741849715, "learning_rate": 9.503643116087733e-06, "loss": 0.9998, "step": 4656 }, { "epoch": 0.16878692327208147, "grad_norm": 2.257139137366199, "learning_rate": 9.503388133799425e-06, "loss": 0.8475, "step": 4657 }, { "epoch": 0.16882316697473815, "grad_norm": 2.6294839113146726, "learning_rate": 9.503133089457151e-06, "loss": 1.0881, "step": 4658 }, { "epoch": 0.1688594106773948, "grad_norm": 2.372827164804985, "learning_rate": 9.502877983064427e-06, "loss": 0.8963, "step": 4659 }, { "epoch": 0.16889565438005147, "grad_norm": 2.696911228459441, "learning_rate": 9.502622814624765e-06, "loss": 0.9908, "step": 4660 }, { "epoch": 0.16893189808270814, "grad_norm": 2.389702901259307, "learning_rate": 9.502367584141687e-06, "loss": 0.9958, "step": 4661 }, { "epoch": 0.1689681417853648, "grad_norm": 2.3499690274578557, "learning_rate": 9.502112291618705e-06, "loss": 0.8966, "step": 4662 }, { "epoch": 0.16900438548802146, "grad_norm": 2.34490267045685, "learning_rate": 9.501856937059338e-06, "loss": 0.8573, "step": 4663 }, { "epoch": 0.1690406291906781, "grad_norm": 2.4941429379945763, "learning_rate": 9.501601520467106e-06, "loss": 1.1135, "step": 4664 }, { "epoch": 0.16907687289333478, "grad_norm": 2.4264755811239054, "learning_rate": 9.501346041845527e-06, "loss": 0.9437, "step": 4665 }, { "epoch": 0.16911311659599146, "grad_norm": 2.3295574553024143, "learning_rate": 9.501090501198123e-06, "loss": 1.0579, "step": 4666 }, { "epoch": 0.1691493602986481, "grad_norm": 2.3627334957937567, "learning_rate": 9.500834898528412e-06, "loss": 0.9492, "step": 4667 }, { "epoch": 0.16918560400130478, "grad_norm": 2.3458731649848934, "learning_rate": 9.500579233839921e-06, "loss": 1.0306, "step": 4668 }, { "epoch": 0.16922184770396143, "grad_norm": 2.1735084405459917, "learning_rate": 9.500323507136168e-06, "loss": 0.87, "step": 4669 }, { "epoch": 0.1692580914066181, "grad_norm": 2.3261189708528187, "learning_rate": 9.50006771842068e-06, "loss": 0.9913, "step": 4670 }, { "epoch": 0.16929433510927477, "grad_norm": 2.3767074504932926, "learning_rate": 9.49981186769698e-06, "loss": 0.8795, "step": 4671 }, { "epoch": 0.16933057881193142, "grad_norm": 2.282683355698862, "learning_rate": 9.499555954968592e-06, "loss": 0.9097, "step": 4672 }, { "epoch": 0.1693668225145881, "grad_norm": 2.102102039721528, "learning_rate": 9.499299980239046e-06, "loss": 0.9059, "step": 4673 }, { "epoch": 0.16940306621724474, "grad_norm": 2.4552427013633453, "learning_rate": 9.499043943511868e-06, "loss": 1.0669, "step": 4674 }, { "epoch": 0.16943930991990142, "grad_norm": 2.273072711519824, "learning_rate": 9.498787844790584e-06, "loss": 0.7888, "step": 4675 }, { "epoch": 0.1694755536225581, "grad_norm": 2.358421717764828, "learning_rate": 9.498531684078725e-06, "loss": 0.8643, "step": 4676 }, { "epoch": 0.16951179732521474, "grad_norm": 2.481622105481607, "learning_rate": 9.498275461379819e-06, "loss": 0.9476, "step": 4677 }, { "epoch": 0.1695480410278714, "grad_norm": 2.376859454655569, "learning_rate": 9.498019176697398e-06, "loss": 0.9873, "step": 4678 }, { "epoch": 0.16958428473052808, "grad_norm": 2.464896793747158, "learning_rate": 9.497762830034993e-06, "loss": 1.1199, "step": 4679 }, { "epoch": 0.16962052843318473, "grad_norm": 2.4876816672885265, "learning_rate": 9.497506421396137e-06, "loss": 0.9478, "step": 4680 }, { "epoch": 0.1696567721358414, "grad_norm": 2.427793627999957, "learning_rate": 9.497249950784361e-06, "loss": 0.9792, "step": 4681 }, { "epoch": 0.16969301583849805, "grad_norm": 2.403314767734754, "learning_rate": 9.496993418203204e-06, "loss": 0.9565, "step": 4682 }, { "epoch": 0.16972925954115473, "grad_norm": 2.362456863205187, "learning_rate": 9.496736823656193e-06, "loss": 1.2944, "step": 4683 }, { "epoch": 0.1697655032438114, "grad_norm": 2.2332609528452227, "learning_rate": 9.496480167146871e-06, "loss": 1.0733, "step": 4684 }, { "epoch": 0.16980174694646805, "grad_norm": 2.11205692369904, "learning_rate": 9.496223448678772e-06, "loss": 0.9076, "step": 4685 }, { "epoch": 0.16983799064912472, "grad_norm": 2.245729515664774, "learning_rate": 9.495966668255431e-06, "loss": 1.0295, "step": 4686 }, { "epoch": 0.16987423435178137, "grad_norm": 2.26146633813655, "learning_rate": 9.49570982588039e-06, "loss": 1.0593, "step": 4687 }, { "epoch": 0.16991047805443804, "grad_norm": 2.252647187848546, "learning_rate": 9.495452921557187e-06, "loss": 1.0053, "step": 4688 }, { "epoch": 0.16994672175709472, "grad_norm": 2.429073546489585, "learning_rate": 9.49519595528936e-06, "loss": 1.0358, "step": 4689 }, { "epoch": 0.16998296545975136, "grad_norm": 2.510358371993552, "learning_rate": 9.494938927080451e-06, "loss": 1.1338, "step": 4690 }, { "epoch": 0.17001920916240804, "grad_norm": 2.3931246163833237, "learning_rate": 9.494681836934003e-06, "loss": 1.086, "step": 4691 }, { "epoch": 0.17005545286506468, "grad_norm": 2.280904578011019, "learning_rate": 9.494424684853559e-06, "loss": 1.0592, "step": 4692 }, { "epoch": 0.17009169656772136, "grad_norm": 2.444078730922941, "learning_rate": 9.494167470842658e-06, "loss": 0.9969, "step": 4693 }, { "epoch": 0.17012794027037803, "grad_norm": 2.327131889038875, "learning_rate": 9.493910194904849e-06, "loss": 0.9838, "step": 4694 }, { "epoch": 0.17016418397303468, "grad_norm": 4.140918899886444, "learning_rate": 9.493652857043675e-06, "loss": 0.9391, "step": 4695 }, { "epoch": 0.17020042767569135, "grad_norm": 2.3230860148085597, "learning_rate": 9.493395457262682e-06, "loss": 0.9902, "step": 4696 }, { "epoch": 0.17023667137834803, "grad_norm": 2.081451400893518, "learning_rate": 9.493137995565418e-06, "loss": 0.854, "step": 4697 }, { "epoch": 0.17027291508100467, "grad_norm": 2.0915260063864363, "learning_rate": 9.492880471955429e-06, "loss": 0.9115, "step": 4698 }, { "epoch": 0.17030915878366135, "grad_norm": 2.204881104031669, "learning_rate": 9.492622886436264e-06, "loss": 1.0562, "step": 4699 }, { "epoch": 0.170345402486318, "grad_norm": 2.250016209198473, "learning_rate": 9.492365239011475e-06, "loss": 0.9558, "step": 4700 }, { "epoch": 0.17038164618897467, "grad_norm": 2.7212557502598944, "learning_rate": 9.492107529684607e-06, "loss": 1.1069, "step": 4701 }, { "epoch": 0.17041788989163134, "grad_norm": 2.5514102816862563, "learning_rate": 9.491849758459215e-06, "loss": 1.1148, "step": 4702 }, { "epoch": 0.170454133594288, "grad_norm": 2.360881061607966, "learning_rate": 9.49159192533885e-06, "loss": 1.0413, "step": 4703 }, { "epoch": 0.17049037729694466, "grad_norm": 2.3143349572215817, "learning_rate": 9.491334030327064e-06, "loss": 0.8453, "step": 4704 }, { "epoch": 0.1705266209996013, "grad_norm": 2.3364054866244746, "learning_rate": 9.491076073427412e-06, "loss": 1.1306, "step": 4705 }, { "epoch": 0.17056286470225798, "grad_norm": 2.5659984719567586, "learning_rate": 9.490818054643449e-06, "loss": 1.2096, "step": 4706 }, { "epoch": 0.17059910840491466, "grad_norm": 2.4456371601777542, "learning_rate": 9.490559973978729e-06, "loss": 0.9444, "step": 4707 }, { "epoch": 0.1706353521075713, "grad_norm": 2.303646259235002, "learning_rate": 9.490301831436806e-06, "loss": 0.8853, "step": 4708 }, { "epoch": 0.17067159581022798, "grad_norm": 2.610666127820157, "learning_rate": 9.49004362702124e-06, "loss": 0.98, "step": 4709 }, { "epoch": 0.17070783951288462, "grad_norm": 2.454566657092995, "learning_rate": 9.48978536073559e-06, "loss": 0.814, "step": 4710 }, { "epoch": 0.1707440832155413, "grad_norm": 2.8205306428824266, "learning_rate": 9.489527032583413e-06, "loss": 0.8879, "step": 4711 }, { "epoch": 0.17078032691819797, "grad_norm": 2.394176076141765, "learning_rate": 9.489268642568267e-06, "loss": 1.0297, "step": 4712 }, { "epoch": 0.17081657062085462, "grad_norm": 2.5256413444404977, "learning_rate": 9.489010190693714e-06, "loss": 0.9845, "step": 4713 }, { "epoch": 0.1708528143235113, "grad_norm": 2.583935893444497, "learning_rate": 9.488751676963316e-06, "loss": 0.9031, "step": 4714 }, { "epoch": 0.17088905802616797, "grad_norm": 2.58866878825815, "learning_rate": 9.488493101380636e-06, "loss": 0.9538, "step": 4715 }, { "epoch": 0.1709253017288246, "grad_norm": 2.0770394836593176, "learning_rate": 9.488234463949235e-06, "loss": 1.057, "step": 4716 }, { "epoch": 0.1709615454314813, "grad_norm": 2.2786268029093075, "learning_rate": 9.487975764672677e-06, "loss": 0.9268, "step": 4717 }, { "epoch": 0.17099778913413793, "grad_norm": 2.392551870491659, "learning_rate": 9.487717003554527e-06, "loss": 0.9685, "step": 4718 }, { "epoch": 0.1710340328367946, "grad_norm": 2.36493780868628, "learning_rate": 9.48745818059835e-06, "loss": 1.0907, "step": 4719 }, { "epoch": 0.17107027653945128, "grad_norm": 2.3616446358234175, "learning_rate": 9.487199295807715e-06, "loss": 0.8462, "step": 4720 }, { "epoch": 0.17110652024210793, "grad_norm": 2.2924547127238997, "learning_rate": 9.486940349186187e-06, "loss": 0.8341, "step": 4721 }, { "epoch": 0.1711427639447646, "grad_norm": 2.5679410197012995, "learning_rate": 9.486681340737334e-06, "loss": 0.9632, "step": 4722 }, { "epoch": 0.17117900764742125, "grad_norm": 2.5634990133272764, "learning_rate": 9.486422270464726e-06, "loss": 1.0286, "step": 4723 }, { "epoch": 0.17121525135007792, "grad_norm": 2.4297606496318567, "learning_rate": 9.486163138371934e-06, "loss": 0.873, "step": 4724 }, { "epoch": 0.1712514950527346, "grad_norm": 2.0534023679821103, "learning_rate": 9.485903944462524e-06, "loss": 0.8589, "step": 4725 }, { "epoch": 0.17128773875539124, "grad_norm": 2.629687221188024, "learning_rate": 9.485644688740072e-06, "loss": 1.1643, "step": 4726 }, { "epoch": 0.17132398245804792, "grad_norm": 2.6868464215845655, "learning_rate": 9.485385371208148e-06, "loss": 1.0486, "step": 4727 }, { "epoch": 0.17136022616070457, "grad_norm": 2.3777442134628406, "learning_rate": 9.485125991870328e-06, "loss": 0.9791, "step": 4728 }, { "epoch": 0.17139646986336124, "grad_norm": 2.5207500408232946, "learning_rate": 9.484866550730184e-06, "loss": 0.8281, "step": 4729 }, { "epoch": 0.1714327135660179, "grad_norm": 2.5116590896239464, "learning_rate": 9.484607047791293e-06, "loss": 0.9504, "step": 4730 }, { "epoch": 0.17146895726867456, "grad_norm": 2.3210389019304434, "learning_rate": 9.484347483057225e-06, "loss": 0.8423, "step": 4731 }, { "epoch": 0.17150520097133123, "grad_norm": 2.7288071321401848, "learning_rate": 9.484087856531564e-06, "loss": 0.9228, "step": 4732 }, { "epoch": 0.1715414446739879, "grad_norm": 2.4061488364532795, "learning_rate": 9.483828168217883e-06, "loss": 0.9772, "step": 4733 }, { "epoch": 0.17157768837664455, "grad_norm": 2.3972538537519656, "learning_rate": 9.48356841811976e-06, "loss": 0.8666, "step": 4734 }, { "epoch": 0.17161393207930123, "grad_norm": 2.372891341355698, "learning_rate": 9.483308606240777e-06, "loss": 1.0659, "step": 4735 }, { "epoch": 0.17165017578195788, "grad_norm": 2.5081958108021922, "learning_rate": 9.483048732584515e-06, "loss": 0.9465, "step": 4736 }, { "epoch": 0.17168641948461455, "grad_norm": 2.506267070738672, "learning_rate": 9.48278879715455e-06, "loss": 1.0281, "step": 4737 }, { "epoch": 0.17172266318727122, "grad_norm": 2.0838097042723773, "learning_rate": 9.482528799954467e-06, "loss": 1.0125, "step": 4738 }, { "epoch": 0.17175890688992787, "grad_norm": 2.4037178980273417, "learning_rate": 9.48226874098785e-06, "loss": 0.9136, "step": 4739 }, { "epoch": 0.17179515059258454, "grad_norm": 2.3594966422654875, "learning_rate": 9.482008620258279e-06, "loss": 1.0279, "step": 4740 }, { "epoch": 0.1718313942952412, "grad_norm": 2.2109459638515303, "learning_rate": 9.481748437769338e-06, "loss": 0.8451, "step": 4741 }, { "epoch": 0.17186763799789787, "grad_norm": 2.4153683700591726, "learning_rate": 9.481488193524617e-06, "loss": 1.1336, "step": 4742 }, { "epoch": 0.17190388170055454, "grad_norm": 2.2365684285283947, "learning_rate": 9.481227887527697e-06, "loss": 1.0309, "step": 4743 }, { "epoch": 0.17194012540321119, "grad_norm": 2.5776416637779778, "learning_rate": 9.480967519782169e-06, "loss": 0.9668, "step": 4744 }, { "epoch": 0.17197636910586786, "grad_norm": 2.499490308942671, "learning_rate": 9.480707090291616e-06, "loss": 1.0856, "step": 4745 }, { "epoch": 0.1720126128085245, "grad_norm": 2.578083800280594, "learning_rate": 9.480446599059631e-06, "loss": 0.8942, "step": 4746 }, { "epoch": 0.17204885651118118, "grad_norm": 2.410760569635043, "learning_rate": 9.480186046089801e-06, "loss": 0.7705, "step": 4747 }, { "epoch": 0.17208510021383785, "grad_norm": 2.2904886494733376, "learning_rate": 9.479925431385717e-06, "loss": 0.9634, "step": 4748 }, { "epoch": 0.1721213439164945, "grad_norm": 2.735498992631743, "learning_rate": 9.47966475495097e-06, "loss": 0.955, "step": 4749 }, { "epoch": 0.17215758761915118, "grad_norm": 2.457037396041435, "learning_rate": 9.479404016789152e-06, "loss": 0.8708, "step": 4750 }, { "epoch": 0.17219383132180782, "grad_norm": 2.3951731396137634, "learning_rate": 9.479143216903855e-06, "loss": 0.8995, "step": 4751 }, { "epoch": 0.1722300750244645, "grad_norm": 2.487574261309284, "learning_rate": 9.478882355298674e-06, "loss": 1.0254, "step": 4752 }, { "epoch": 0.17226631872712117, "grad_norm": 2.336678479850815, "learning_rate": 9.478621431977203e-06, "loss": 0.9497, "step": 4753 }, { "epoch": 0.17230256242977782, "grad_norm": 2.3428507621424655, "learning_rate": 9.478360446943036e-06, "loss": 1.0635, "step": 4754 }, { "epoch": 0.1723388061324345, "grad_norm": 2.4876127971849056, "learning_rate": 9.478099400199771e-06, "loss": 0.8506, "step": 4755 }, { "epoch": 0.17237504983509117, "grad_norm": 3.042371342587797, "learning_rate": 9.477838291751007e-06, "loss": 1.0382, "step": 4756 }, { "epoch": 0.1724112935377478, "grad_norm": 2.0497318354260576, "learning_rate": 9.477577121600337e-06, "loss": 0.8627, "step": 4757 }, { "epoch": 0.17244753724040449, "grad_norm": 2.344263042853756, "learning_rate": 9.477315889751363e-06, "loss": 0.8956, "step": 4758 }, { "epoch": 0.17248378094306113, "grad_norm": 2.319609732846436, "learning_rate": 9.477054596207683e-06, "loss": 0.8934, "step": 4759 }, { "epoch": 0.1725200246457178, "grad_norm": 2.5351772459412105, "learning_rate": 9.476793240972897e-06, "loss": 0.9532, "step": 4760 }, { "epoch": 0.17255626834837448, "grad_norm": 2.5862021489248517, "learning_rate": 9.47653182405061e-06, "loss": 1.0352, "step": 4761 }, { "epoch": 0.17259251205103113, "grad_norm": 2.3133888959745286, "learning_rate": 9.476270345444421e-06, "loss": 1.1593, "step": 4762 }, { "epoch": 0.1726287557536878, "grad_norm": 2.220645255931956, "learning_rate": 9.476008805157934e-06, "loss": 0.9248, "step": 4763 }, { "epoch": 0.17266499945634445, "grad_norm": 2.5357931387842156, "learning_rate": 9.475747203194752e-06, "loss": 0.8978, "step": 4764 }, { "epoch": 0.17270124315900112, "grad_norm": 2.8661753881207783, "learning_rate": 9.475485539558481e-06, "loss": 0.9574, "step": 4765 }, { "epoch": 0.1727374868616578, "grad_norm": 2.2968314653805852, "learning_rate": 9.475223814252726e-06, "loss": 0.9422, "step": 4766 }, { "epoch": 0.17277373056431444, "grad_norm": 2.4162771677911237, "learning_rate": 9.474962027281094e-06, "loss": 1.0004, "step": 4767 }, { "epoch": 0.17280997426697112, "grad_norm": 3.05484663816504, "learning_rate": 9.474700178647191e-06, "loss": 0.9335, "step": 4768 }, { "epoch": 0.17284621796962776, "grad_norm": 2.0398533564378716, "learning_rate": 9.474438268354626e-06, "loss": 0.8663, "step": 4769 }, { "epoch": 0.17288246167228444, "grad_norm": 2.0795847537910217, "learning_rate": 9.474176296407007e-06, "loss": 0.8161, "step": 4770 }, { "epoch": 0.1729187053749411, "grad_norm": 2.227336477480959, "learning_rate": 9.473914262807944e-06, "loss": 0.8748, "step": 4771 }, { "epoch": 0.17295494907759776, "grad_norm": 2.616401546645186, "learning_rate": 9.47365216756105e-06, "loss": 0.915, "step": 4772 }, { "epoch": 0.17299119278025443, "grad_norm": 2.550182535161653, "learning_rate": 9.473390010669934e-06, "loss": 1.0954, "step": 4773 }, { "epoch": 0.1730274364829111, "grad_norm": 2.4295006609236074, "learning_rate": 9.473127792138209e-06, "loss": 1.2314, "step": 4774 }, { "epoch": 0.17306368018556775, "grad_norm": 2.4658913349351, "learning_rate": 9.472865511969486e-06, "loss": 1.0059, "step": 4775 }, { "epoch": 0.17309992388822443, "grad_norm": 2.599400269808497, "learning_rate": 9.472603170167386e-06, "loss": 1.3106, "step": 4776 }, { "epoch": 0.17313616759088107, "grad_norm": 2.2854001447742966, "learning_rate": 9.472340766735515e-06, "loss": 1.0765, "step": 4777 }, { "epoch": 0.17317241129353775, "grad_norm": 2.3512738191935143, "learning_rate": 9.472078301677495e-06, "loss": 0.9077, "step": 4778 }, { "epoch": 0.17320865499619442, "grad_norm": 2.5449217370162955, "learning_rate": 9.471815774996941e-06, "loss": 0.7645, "step": 4779 }, { "epoch": 0.17324489869885107, "grad_norm": 2.5453358094022027, "learning_rate": 9.471553186697468e-06, "loss": 0.7397, "step": 4780 }, { "epoch": 0.17328114240150774, "grad_norm": 2.455332525561816, "learning_rate": 9.4712905367827e-06, "loss": 0.9159, "step": 4781 }, { "epoch": 0.1733173861041644, "grad_norm": 2.3819597842338616, "learning_rate": 9.471027825256249e-06, "loss": 1.0078, "step": 4782 }, { "epoch": 0.17335362980682106, "grad_norm": 2.3442963384942956, "learning_rate": 9.47076505212174e-06, "loss": 1.0905, "step": 4783 }, { "epoch": 0.17338987350947774, "grad_norm": 2.5913987273256103, "learning_rate": 9.470502217382794e-06, "loss": 1.1454, "step": 4784 }, { "epoch": 0.17342611721213438, "grad_norm": 2.25271648676354, "learning_rate": 9.470239321043029e-06, "loss": 1.0772, "step": 4785 }, { "epoch": 0.17346236091479106, "grad_norm": 2.399948348829502, "learning_rate": 9.46997636310607e-06, "loss": 1.0565, "step": 4786 }, { "epoch": 0.1734986046174477, "grad_norm": 2.563805483053579, "learning_rate": 9.46971334357554e-06, "loss": 1.0857, "step": 4787 }, { "epoch": 0.17353484832010438, "grad_norm": 2.496422777595982, "learning_rate": 9.469450262455064e-06, "loss": 0.9402, "step": 4788 }, { "epoch": 0.17357109202276105, "grad_norm": 2.3951011312162076, "learning_rate": 9.469187119748265e-06, "loss": 1.0233, "step": 4789 }, { "epoch": 0.1736073357254177, "grad_norm": 2.555506050757777, "learning_rate": 9.468923915458772e-06, "loss": 1.0011, "step": 4790 }, { "epoch": 0.17364357942807437, "grad_norm": 2.6945785128987856, "learning_rate": 9.46866064959021e-06, "loss": 1.0515, "step": 4791 }, { "epoch": 0.17367982313073105, "grad_norm": 2.4817043547071274, "learning_rate": 9.468397322146206e-06, "loss": 1.0204, "step": 4792 }, { "epoch": 0.1737160668333877, "grad_norm": 2.1379274467194738, "learning_rate": 9.46813393313039e-06, "loss": 0.9395, "step": 4793 }, { "epoch": 0.17375231053604437, "grad_norm": 2.5146313706092, "learning_rate": 9.46787048254639e-06, "loss": 0.9996, "step": 4794 }, { "epoch": 0.17378855423870102, "grad_norm": 2.2239127446775613, "learning_rate": 9.467606970397838e-06, "loss": 0.8579, "step": 4795 }, { "epoch": 0.1738247979413577, "grad_norm": 2.352734431880633, "learning_rate": 9.467343396688363e-06, "loss": 0.9742, "step": 4796 }, { "epoch": 0.17386104164401436, "grad_norm": 2.6025130468634705, "learning_rate": 9.467079761421596e-06, "loss": 1.101, "step": 4797 }, { "epoch": 0.173897285346671, "grad_norm": 2.2459785945072688, "learning_rate": 9.466816064601174e-06, "loss": 1.0082, "step": 4798 }, { "epoch": 0.17393352904932768, "grad_norm": 2.7454234514633677, "learning_rate": 9.466552306230727e-06, "loss": 0.9719, "step": 4799 }, { "epoch": 0.17396977275198433, "grad_norm": 2.5348197882378867, "learning_rate": 9.466288486313892e-06, "loss": 1.1687, "step": 4800 }, { "epoch": 0.174006016454641, "grad_norm": 2.299637364704144, "learning_rate": 9.466024604854301e-06, "loss": 1.0037, "step": 4801 }, { "epoch": 0.17404226015729768, "grad_norm": 2.7953114191994004, "learning_rate": 9.465760661855593e-06, "loss": 1.1701, "step": 4802 }, { "epoch": 0.17407850385995433, "grad_norm": 2.377114133165073, "learning_rate": 9.465496657321403e-06, "loss": 0.9896, "step": 4803 }, { "epoch": 0.174114747562611, "grad_norm": 2.6801179838262326, "learning_rate": 9.46523259125537e-06, "loss": 1.025, "step": 4804 }, { "epoch": 0.17415099126526765, "grad_norm": 2.3978134623755403, "learning_rate": 9.464968463661133e-06, "loss": 1.0837, "step": 4805 }, { "epoch": 0.17418723496792432, "grad_norm": 2.0703657940802103, "learning_rate": 9.46470427454233e-06, "loss": 0.7805, "step": 4806 }, { "epoch": 0.174223478670581, "grad_norm": 2.5083499709791375, "learning_rate": 9.464440023902602e-06, "loss": 0.9428, "step": 4807 }, { "epoch": 0.17425972237323764, "grad_norm": 2.466268349191303, "learning_rate": 9.464175711745592e-06, "loss": 0.9851, "step": 4808 }, { "epoch": 0.17429596607589432, "grad_norm": 2.6277707781062465, "learning_rate": 9.46391133807494e-06, "loss": 0.8233, "step": 4809 }, { "epoch": 0.174332209778551, "grad_norm": 2.5522949898851115, "learning_rate": 9.463646902894288e-06, "loss": 0.9773, "step": 4810 }, { "epoch": 0.17436845348120764, "grad_norm": 2.172856201414037, "learning_rate": 9.463382406207281e-06, "loss": 1.0082, "step": 4811 }, { "epoch": 0.1744046971838643, "grad_norm": 2.1523038599579323, "learning_rate": 9.463117848017566e-06, "loss": 0.8945, "step": 4812 }, { "epoch": 0.17444094088652096, "grad_norm": 2.655458746354755, "learning_rate": 9.462853228328785e-06, "loss": 0.9589, "step": 4813 }, { "epoch": 0.17447718458917763, "grad_norm": 2.4011519171376303, "learning_rate": 9.462588547144587e-06, "loss": 0.9404, "step": 4814 }, { "epoch": 0.1745134282918343, "grad_norm": 2.2555561425809922, "learning_rate": 9.462323804468617e-06, "loss": 1.0165, "step": 4815 }, { "epoch": 0.17454967199449095, "grad_norm": 2.639140990718592, "learning_rate": 9.462059000304524e-06, "loss": 0.9763, "step": 4816 }, { "epoch": 0.17458591569714763, "grad_norm": 2.524846587562158, "learning_rate": 9.461794134655955e-06, "loss": 0.7802, "step": 4817 }, { "epoch": 0.17462215939980427, "grad_norm": 2.418847543225884, "learning_rate": 9.461529207526564e-06, "loss": 0.9256, "step": 4818 }, { "epoch": 0.17465840310246095, "grad_norm": 2.632381631469106, "learning_rate": 9.461264218919995e-06, "loss": 1.1393, "step": 4819 }, { "epoch": 0.17469464680511762, "grad_norm": 2.60733394134742, "learning_rate": 9.460999168839906e-06, "loss": 0.9862, "step": 4820 }, { "epoch": 0.17473089050777427, "grad_norm": 2.3500928791617843, "learning_rate": 9.460734057289945e-06, "loss": 1.0249, "step": 4821 }, { "epoch": 0.17476713421043094, "grad_norm": 2.513228166753596, "learning_rate": 9.460468884273767e-06, "loss": 0.8506, "step": 4822 }, { "epoch": 0.1748033779130876, "grad_norm": 2.524064346941296, "learning_rate": 9.460203649795025e-06, "loss": 1.0803, "step": 4823 }, { "epoch": 0.17483962161574426, "grad_norm": 2.3529895009166526, "learning_rate": 9.459938353857375e-06, "loss": 0.9378, "step": 4824 }, { "epoch": 0.17487586531840094, "grad_norm": 2.1993025301736746, "learning_rate": 9.45967299646447e-06, "loss": 0.844, "step": 4825 }, { "epoch": 0.17491210902105758, "grad_norm": 2.4104197666084555, "learning_rate": 9.45940757761997e-06, "loss": 0.9173, "step": 4826 }, { "epoch": 0.17494835272371426, "grad_norm": 2.448510512767658, "learning_rate": 9.45914209732753e-06, "loss": 0.9402, "step": 4827 }, { "epoch": 0.17498459642637093, "grad_norm": 2.461030090593397, "learning_rate": 9.458876555590809e-06, "loss": 0.9523, "step": 4828 }, { "epoch": 0.17502084012902758, "grad_norm": 2.5980373437653608, "learning_rate": 9.458610952413465e-06, "loss": 0.9396, "step": 4829 }, { "epoch": 0.17505708383168425, "grad_norm": 2.43735452381177, "learning_rate": 9.458345287799158e-06, "loss": 1.0505, "step": 4830 }, { "epoch": 0.1750933275343409, "grad_norm": 2.8673513095091394, "learning_rate": 9.458079561751552e-06, "loss": 1.0942, "step": 4831 }, { "epoch": 0.17512957123699757, "grad_norm": 2.4470873331322744, "learning_rate": 9.457813774274303e-06, "loss": 0.9841, "step": 4832 }, { "epoch": 0.17516581493965425, "grad_norm": 2.2530136856078595, "learning_rate": 9.457547925371078e-06, "loss": 0.6749, "step": 4833 }, { "epoch": 0.1752020586423109, "grad_norm": 2.4847492866978778, "learning_rate": 9.457282015045537e-06, "loss": 0.9962, "step": 4834 }, { "epoch": 0.17523830234496757, "grad_norm": 2.5170356299314984, "learning_rate": 9.457016043301346e-06, "loss": 0.9035, "step": 4835 }, { "epoch": 0.1752745460476242, "grad_norm": 2.6024779543376884, "learning_rate": 9.45675001014217e-06, "loss": 1.0508, "step": 4836 }, { "epoch": 0.1753107897502809, "grad_norm": 2.4742399712441445, "learning_rate": 9.456483915571672e-06, "loss": 0.9972, "step": 4837 }, { "epoch": 0.17534703345293756, "grad_norm": 2.287436248171129, "learning_rate": 9.456217759593525e-06, "loss": 0.9997, "step": 4838 }, { "epoch": 0.1753832771555942, "grad_norm": 2.434013517451368, "learning_rate": 9.455951542211388e-06, "loss": 0.9764, "step": 4839 }, { "epoch": 0.17541952085825088, "grad_norm": 2.3650798561693898, "learning_rate": 9.455685263428936e-06, "loss": 1.1357, "step": 4840 }, { "epoch": 0.17545576456090753, "grad_norm": 2.4874140460560996, "learning_rate": 9.455418923249834e-06, "loss": 1.032, "step": 4841 }, { "epoch": 0.1754920082635642, "grad_norm": 2.2892332200241707, "learning_rate": 9.455152521677757e-06, "loss": 0.8985, "step": 4842 }, { "epoch": 0.17552825196622088, "grad_norm": 2.3622244251058673, "learning_rate": 9.454886058716371e-06, "loss": 0.9991, "step": 4843 }, { "epoch": 0.17556449566887752, "grad_norm": 2.5065283136140715, "learning_rate": 9.454619534369349e-06, "loss": 1.0141, "step": 4844 }, { "epoch": 0.1756007393715342, "grad_norm": 2.7129527855434885, "learning_rate": 9.454352948640364e-06, "loss": 0.977, "step": 4845 }, { "epoch": 0.17563698307419087, "grad_norm": 2.350068807345908, "learning_rate": 9.45408630153309e-06, "loss": 0.9541, "step": 4846 }, { "epoch": 0.17567322677684752, "grad_norm": 2.3149464189345674, "learning_rate": 9.453819593051198e-06, "loss": 0.9746, "step": 4847 }, { "epoch": 0.1757094704795042, "grad_norm": 2.4656852474940227, "learning_rate": 9.45355282319837e-06, "loss": 1.0605, "step": 4848 }, { "epoch": 0.17574571418216084, "grad_norm": 2.5924104459561708, "learning_rate": 9.453285991978273e-06, "loss": 0.9341, "step": 4849 }, { "epoch": 0.1757819578848175, "grad_norm": 2.2081290187874383, "learning_rate": 9.45301909939459e-06, "loss": 0.9279, "step": 4850 }, { "epoch": 0.1758182015874742, "grad_norm": 2.3179970891919983, "learning_rate": 9.452752145450996e-06, "loss": 0.8656, "step": 4851 }, { "epoch": 0.17585444529013083, "grad_norm": 2.3653627989050543, "learning_rate": 9.452485130151171e-06, "loss": 0.9955, "step": 4852 }, { "epoch": 0.1758906889927875, "grad_norm": 2.3628173489068485, "learning_rate": 9.452218053498796e-06, "loss": 0.9926, "step": 4853 }, { "epoch": 0.17592693269544415, "grad_norm": 2.4612355868463367, "learning_rate": 9.451950915497547e-06, "loss": 0.9722, "step": 4854 }, { "epoch": 0.17596317639810083, "grad_norm": 2.5425630761853397, "learning_rate": 9.451683716151105e-06, "loss": 0.9745, "step": 4855 }, { "epoch": 0.1759994201007575, "grad_norm": 2.3967475970297856, "learning_rate": 9.451416455463156e-06, "loss": 0.8783, "step": 4856 }, { "epoch": 0.17603566380341415, "grad_norm": 2.339136051409043, "learning_rate": 9.45114913343738e-06, "loss": 1.0234, "step": 4857 }, { "epoch": 0.17607190750607082, "grad_norm": 2.1001691142602463, "learning_rate": 9.45088175007746e-06, "loss": 1.0442, "step": 4858 }, { "epoch": 0.17610815120872747, "grad_norm": 2.4612526282266582, "learning_rate": 9.450614305387084e-06, "loss": 1.0686, "step": 4859 }, { "epoch": 0.17614439491138414, "grad_norm": 2.3686864796935656, "learning_rate": 9.450346799369931e-06, "loss": 1.0597, "step": 4860 }, { "epoch": 0.17618063861404082, "grad_norm": 2.612217840627723, "learning_rate": 9.450079232029692e-06, "loss": 1.0749, "step": 4861 }, { "epoch": 0.17621688231669747, "grad_norm": 2.201084518951876, "learning_rate": 9.449811603370053e-06, "loss": 0.9726, "step": 4862 }, { "epoch": 0.17625312601935414, "grad_norm": 2.759473918962026, "learning_rate": 9.449543913394702e-06, "loss": 1.0694, "step": 4863 }, { "epoch": 0.1762893697220108, "grad_norm": 2.525911307834982, "learning_rate": 9.449276162107326e-06, "loss": 0.9066, "step": 4864 }, { "epoch": 0.17632561342466746, "grad_norm": 2.7268391745712783, "learning_rate": 9.449008349511614e-06, "loss": 0.9724, "step": 4865 }, { "epoch": 0.17636185712732413, "grad_norm": 2.4731953797272, "learning_rate": 9.44874047561126e-06, "loss": 0.8558, "step": 4866 }, { "epoch": 0.17639810082998078, "grad_norm": 2.2402986159291394, "learning_rate": 9.44847254040995e-06, "loss": 0.9922, "step": 4867 }, { "epoch": 0.17643434453263745, "grad_norm": 2.4811622064063914, "learning_rate": 9.448204543911382e-06, "loss": 0.8091, "step": 4868 }, { "epoch": 0.17647058823529413, "grad_norm": 2.190645135361709, "learning_rate": 9.447936486119242e-06, "loss": 0.8632, "step": 4869 }, { "epoch": 0.17650683193795078, "grad_norm": 2.498414802392145, "learning_rate": 9.44766836703723e-06, "loss": 1.0739, "step": 4870 }, { "epoch": 0.17654307564060745, "grad_norm": 2.321652300696742, "learning_rate": 9.447400186669037e-06, "loss": 0.8663, "step": 4871 }, { "epoch": 0.1765793193432641, "grad_norm": 2.2467221549841665, "learning_rate": 9.44713194501836e-06, "loss": 0.8558, "step": 4872 }, { "epoch": 0.17661556304592077, "grad_norm": 2.1785239801824665, "learning_rate": 9.446863642088892e-06, "loss": 0.9649, "step": 4873 }, { "epoch": 0.17665180674857744, "grad_norm": 2.308768687318889, "learning_rate": 9.446595277884334e-06, "loss": 0.8888, "step": 4874 }, { "epoch": 0.1766880504512341, "grad_norm": 2.349258285017432, "learning_rate": 9.446326852408382e-06, "loss": 0.9669, "step": 4875 }, { "epoch": 0.17672429415389077, "grad_norm": 2.5699278427415417, "learning_rate": 9.446058365664734e-06, "loss": 0.9451, "step": 4876 }, { "epoch": 0.1767605378565474, "grad_norm": 2.652614655840818, "learning_rate": 9.445789817657091e-06, "loss": 1.1353, "step": 4877 }, { "epoch": 0.17679678155920409, "grad_norm": 2.360821030673543, "learning_rate": 9.445521208389155e-06, "loss": 0.7657, "step": 4878 }, { "epoch": 0.17683302526186076, "grad_norm": 2.2812854824635336, "learning_rate": 9.445252537864625e-06, "loss": 1.193, "step": 4879 }, { "epoch": 0.1768692689645174, "grad_norm": 2.6891632258651943, "learning_rate": 9.444983806087202e-06, "loss": 1.0743, "step": 4880 }, { "epoch": 0.17690551266717408, "grad_norm": 2.4675276263819113, "learning_rate": 9.44471501306059e-06, "loss": 0.9694, "step": 4881 }, { "epoch": 0.17694175636983075, "grad_norm": 2.4816556991095786, "learning_rate": 9.444446158788494e-06, "loss": 0.895, "step": 4882 }, { "epoch": 0.1769780000724874, "grad_norm": 2.4694613355690773, "learning_rate": 9.444177243274619e-06, "loss": 1.0026, "step": 4883 }, { "epoch": 0.17701424377514408, "grad_norm": 2.460791512847466, "learning_rate": 9.443908266522667e-06, "loss": 1.0024, "step": 4884 }, { "epoch": 0.17705048747780072, "grad_norm": 2.4362509573451723, "learning_rate": 9.443639228536347e-06, "loss": 0.8334, "step": 4885 }, { "epoch": 0.1770867311804574, "grad_norm": 2.3748226087019355, "learning_rate": 9.443370129319367e-06, "loss": 0.9316, "step": 4886 }, { "epoch": 0.17712297488311407, "grad_norm": 2.3093950915640167, "learning_rate": 9.443100968875434e-06, "loss": 1.0771, "step": 4887 }, { "epoch": 0.17715921858577072, "grad_norm": 2.739044693867813, "learning_rate": 9.442831747208256e-06, "loss": 0.8236, "step": 4888 }, { "epoch": 0.1771954622884274, "grad_norm": 2.4710688493473243, "learning_rate": 9.442562464321545e-06, "loss": 1.0554, "step": 4889 }, { "epoch": 0.17723170599108404, "grad_norm": 2.4165129111935157, "learning_rate": 9.442293120219009e-06, "loss": 0.9695, "step": 4890 }, { "epoch": 0.1772679496937407, "grad_norm": 2.0549958726618938, "learning_rate": 9.44202371490436e-06, "loss": 0.8446, "step": 4891 }, { "epoch": 0.17730419339639739, "grad_norm": 2.64424585380184, "learning_rate": 9.441754248381311e-06, "loss": 1.0606, "step": 4892 }, { "epoch": 0.17734043709905403, "grad_norm": 2.597810526291576, "learning_rate": 9.441484720653574e-06, "loss": 0.9441, "step": 4893 }, { "epoch": 0.1773766808017107, "grad_norm": 2.6101922691451622, "learning_rate": 9.441215131724865e-06, "loss": 0.9168, "step": 4894 }, { "epoch": 0.17741292450436735, "grad_norm": 2.1376520283903555, "learning_rate": 9.440945481598896e-06, "loss": 1.0191, "step": 4895 }, { "epoch": 0.17744916820702403, "grad_norm": 2.8321658912503005, "learning_rate": 9.440675770279385e-06, "loss": 0.998, "step": 4896 }, { "epoch": 0.1774854119096807, "grad_norm": 2.513788534352133, "learning_rate": 9.440405997770049e-06, "loss": 0.9163, "step": 4897 }, { "epoch": 0.17752165561233735, "grad_norm": 2.2926817740619074, "learning_rate": 9.4401361640746e-06, "loss": 0.9028, "step": 4898 }, { "epoch": 0.17755789931499402, "grad_norm": 2.437096794817932, "learning_rate": 9.439866269196762e-06, "loss": 1.0386, "step": 4899 }, { "epoch": 0.1775941430176507, "grad_norm": 2.147097206465993, "learning_rate": 9.439596313140253e-06, "loss": 0.9946, "step": 4900 }, { "epoch": 0.17763038672030734, "grad_norm": 2.371413965141103, "learning_rate": 9.43932629590879e-06, "loss": 1.1693, "step": 4901 }, { "epoch": 0.17766663042296402, "grad_norm": 2.3202622186265702, "learning_rate": 9.439056217506098e-06, "loss": 0.8935, "step": 4902 }, { "epoch": 0.17770287412562066, "grad_norm": 2.8748257833808295, "learning_rate": 9.438786077935893e-06, "loss": 1.0228, "step": 4903 }, { "epoch": 0.17773911782827734, "grad_norm": 2.542761692950645, "learning_rate": 9.438515877201902e-06, "loss": 1.026, "step": 4904 }, { "epoch": 0.177775361530934, "grad_norm": 2.6438239361157767, "learning_rate": 9.438245615307845e-06, "loss": 0.938, "step": 4905 }, { "epoch": 0.17781160523359066, "grad_norm": 2.3862562001691505, "learning_rate": 9.437975292257451e-06, "loss": 0.9581, "step": 4906 }, { "epoch": 0.17784784893624733, "grad_norm": 2.6661581619775783, "learning_rate": 9.437704908054438e-06, "loss": 1.0869, "step": 4907 }, { "epoch": 0.17788409263890398, "grad_norm": 2.5528025882927525, "learning_rate": 9.437434462702537e-06, "loss": 0.9342, "step": 4908 }, { "epoch": 0.17792033634156065, "grad_norm": 2.545197388385528, "learning_rate": 9.437163956205473e-06, "loss": 0.9835, "step": 4909 }, { "epoch": 0.17795658004421733, "grad_norm": 2.332924346419421, "learning_rate": 9.436893388566971e-06, "loss": 0.9396, "step": 4910 }, { "epoch": 0.17799282374687397, "grad_norm": 2.3624865313103305, "learning_rate": 9.436622759790763e-06, "loss": 0.9881, "step": 4911 }, { "epoch": 0.17802906744953065, "grad_norm": 2.4573223998883806, "learning_rate": 9.436352069880576e-06, "loss": 0.8941, "step": 4912 }, { "epoch": 0.1780653111521873, "grad_norm": 2.459100363437247, "learning_rate": 9.436081318840143e-06, "loss": 0.9098, "step": 4913 }, { "epoch": 0.17810155485484397, "grad_norm": 2.3230234095021824, "learning_rate": 9.435810506673189e-06, "loss": 1.153, "step": 4914 }, { "epoch": 0.17813779855750064, "grad_norm": 2.4967763202812043, "learning_rate": 9.43553963338345e-06, "loss": 0.9375, "step": 4915 }, { "epoch": 0.1781740422601573, "grad_norm": 2.0565678048272855, "learning_rate": 9.435268698974658e-06, "loss": 0.8785, "step": 4916 }, { "epoch": 0.17821028596281396, "grad_norm": 2.5154884569913176, "learning_rate": 9.434997703450545e-06, "loss": 0.9204, "step": 4917 }, { "epoch": 0.17824652966547064, "grad_norm": 2.507005370766508, "learning_rate": 9.434726646814846e-06, "loss": 0.9541, "step": 4918 }, { "epoch": 0.17828277336812728, "grad_norm": 2.254684242476841, "learning_rate": 9.434455529071296e-06, "loss": 0.9066, "step": 4919 }, { "epoch": 0.17831901707078396, "grad_norm": 2.3989199659360785, "learning_rate": 9.434184350223629e-06, "loss": 0.8766, "step": 4920 }, { "epoch": 0.1783552607734406, "grad_norm": 2.3554984082449404, "learning_rate": 9.433913110275585e-06, "loss": 0.9631, "step": 4921 }, { "epoch": 0.17839150447609728, "grad_norm": 2.312963049090756, "learning_rate": 9.4336418092309e-06, "loss": 0.8749, "step": 4922 }, { "epoch": 0.17842774817875395, "grad_norm": 2.5472162692717792, "learning_rate": 9.433370447093314e-06, "loss": 1.0302, "step": 4923 }, { "epoch": 0.1784639918814106, "grad_norm": 2.425921106701315, "learning_rate": 9.433099023866561e-06, "loss": 0.8339, "step": 4924 }, { "epoch": 0.17850023558406727, "grad_norm": 2.2049961017896997, "learning_rate": 9.432827539554386e-06, "loss": 0.713, "step": 4925 }, { "epoch": 0.17853647928672392, "grad_norm": 2.104898404230483, "learning_rate": 9.432555994160528e-06, "loss": 0.9984, "step": 4926 }, { "epoch": 0.1785727229893806, "grad_norm": 2.2530214881521142, "learning_rate": 9.43228438768873e-06, "loss": 0.9692, "step": 4927 }, { "epoch": 0.17860896669203727, "grad_norm": 2.4695480599391244, "learning_rate": 9.432012720142734e-06, "loss": 1.0654, "step": 4928 }, { "epoch": 0.17864521039469392, "grad_norm": 2.1680433132722614, "learning_rate": 9.431740991526281e-06, "loss": 0.8008, "step": 4929 }, { "epoch": 0.1786814540973506, "grad_norm": 2.473288113686125, "learning_rate": 9.43146920184312e-06, "loss": 1.0095, "step": 4930 }, { "epoch": 0.17871769780000724, "grad_norm": 2.3921757224483784, "learning_rate": 9.431197351096992e-06, "loss": 0.8421, "step": 4931 }, { "epoch": 0.1787539415026639, "grad_norm": 2.4443922262161193, "learning_rate": 9.430925439291646e-06, "loss": 0.8747, "step": 4932 }, { "epoch": 0.17879018520532058, "grad_norm": 2.4628561061905425, "learning_rate": 9.430653466430824e-06, "loss": 0.9912, "step": 4933 }, { "epoch": 0.17882642890797723, "grad_norm": 2.531233222938724, "learning_rate": 9.430381432518279e-06, "loss": 0.8558, "step": 4934 }, { "epoch": 0.1788626726106339, "grad_norm": 2.256492832914117, "learning_rate": 9.430109337557757e-06, "loss": 1.0407, "step": 4935 }, { "epoch": 0.17889891631329058, "grad_norm": 2.2589842433340395, "learning_rate": 9.429837181553008e-06, "loss": 0.9027, "step": 4936 }, { "epoch": 0.17893516001594723, "grad_norm": 2.5654087433052126, "learning_rate": 9.429564964507781e-06, "loss": 1.0288, "step": 4937 }, { "epoch": 0.1789714037186039, "grad_norm": 2.6396915751779613, "learning_rate": 9.429292686425827e-06, "loss": 1.0983, "step": 4938 }, { "epoch": 0.17900764742126055, "grad_norm": 2.3970324845943676, "learning_rate": 9.4290203473109e-06, "loss": 0.9215, "step": 4939 }, { "epoch": 0.17904389112391722, "grad_norm": 2.485252714617327, "learning_rate": 9.42874794716675e-06, "loss": 1.1529, "step": 4940 }, { "epoch": 0.1790801348265739, "grad_norm": 2.1539743625119643, "learning_rate": 9.428475485997133e-06, "loss": 0.8301, "step": 4941 }, { "epoch": 0.17911637852923054, "grad_norm": 2.650979438002955, "learning_rate": 9.4282029638058e-06, "loss": 1.0889, "step": 4942 }, { "epoch": 0.17915262223188722, "grad_norm": 2.3240330997325174, "learning_rate": 9.42793038059651e-06, "loss": 1.2851, "step": 4943 }, { "epoch": 0.17918886593454386, "grad_norm": 2.507850312380765, "learning_rate": 9.427657736373018e-06, "loss": 1.1098, "step": 4944 }, { "epoch": 0.17922510963720054, "grad_norm": 2.391807055460278, "learning_rate": 9.427385031139079e-06, "loss": 0.7156, "step": 4945 }, { "epoch": 0.1792613533398572, "grad_norm": 2.268800905723293, "learning_rate": 9.42711226489845e-06, "loss": 1.1702, "step": 4946 }, { "epoch": 0.17929759704251386, "grad_norm": 2.696937211425745, "learning_rate": 9.426839437654894e-06, "loss": 1.0349, "step": 4947 }, { "epoch": 0.17933384074517053, "grad_norm": 2.3363919027943534, "learning_rate": 9.426566549412167e-06, "loss": 0.9275, "step": 4948 }, { "epoch": 0.17937008444782718, "grad_norm": 2.3347395403181634, "learning_rate": 9.42629360017403e-06, "loss": 0.8168, "step": 4949 }, { "epoch": 0.17940632815048385, "grad_norm": 2.395189237922319, "learning_rate": 9.426020589944245e-06, "loss": 1.0401, "step": 4950 }, { "epoch": 0.17944257185314053, "grad_norm": 2.7522214979357087, "learning_rate": 9.425747518726572e-06, "loss": 1.0698, "step": 4951 }, { "epoch": 0.17947881555579717, "grad_norm": 2.158275693869205, "learning_rate": 9.425474386524775e-06, "loss": 0.9016, "step": 4952 }, { "epoch": 0.17951505925845385, "grad_norm": 2.1681377526067704, "learning_rate": 9.425201193342618e-06, "loss": 1.1212, "step": 4953 }, { "epoch": 0.17955130296111052, "grad_norm": 2.358542972016477, "learning_rate": 9.424927939183865e-06, "loss": 1.09, "step": 4954 }, { "epoch": 0.17958754666376717, "grad_norm": 2.368939109785572, "learning_rate": 9.42465462405228e-06, "loss": 1.1022, "step": 4955 }, { "epoch": 0.17962379036642384, "grad_norm": 2.169761952806023, "learning_rate": 9.42438124795163e-06, "loss": 0.9089, "step": 4956 }, { "epoch": 0.1796600340690805, "grad_norm": 2.4990957521004207, "learning_rate": 9.424107810885683e-06, "loss": 1.1819, "step": 4957 }, { "epoch": 0.17969627777173716, "grad_norm": 2.4353373286228077, "learning_rate": 9.423834312858206e-06, "loss": 0.8493, "step": 4958 }, { "epoch": 0.17973252147439384, "grad_norm": 2.5735507564629345, "learning_rate": 9.423560753872968e-06, "loss": 0.9244, "step": 4959 }, { "epoch": 0.17976876517705048, "grad_norm": 2.536479208869804, "learning_rate": 9.423287133933737e-06, "loss": 1.0549, "step": 4960 }, { "epoch": 0.17980500887970716, "grad_norm": 2.44336291977672, "learning_rate": 9.423013453044284e-06, "loss": 0.8436, "step": 4961 }, { "epoch": 0.1798412525823638, "grad_norm": 2.343691770799506, "learning_rate": 9.422739711208382e-06, "loss": 0.9796, "step": 4962 }, { "epoch": 0.17987749628502048, "grad_norm": 2.429123151568174, "learning_rate": 9.422465908429803e-06, "loss": 1.0622, "step": 4963 }, { "epoch": 0.17991373998767715, "grad_norm": 2.611535790149344, "learning_rate": 9.422192044712315e-06, "loss": 0.9852, "step": 4964 }, { "epoch": 0.1799499836903338, "grad_norm": 2.4178947639885005, "learning_rate": 9.421918120059696e-06, "loss": 1.1749, "step": 4965 }, { "epoch": 0.17998622739299047, "grad_norm": 2.4195554485799575, "learning_rate": 9.421644134475722e-06, "loss": 1.0258, "step": 4966 }, { "epoch": 0.18002247109564712, "grad_norm": 2.4131504731976627, "learning_rate": 9.421370087964164e-06, "loss": 0.9374, "step": 4967 }, { "epoch": 0.1800587147983038, "grad_norm": 2.368069155819112, "learning_rate": 9.4210959805288e-06, "loss": 0.9574, "step": 4968 }, { "epoch": 0.18009495850096047, "grad_norm": 3.002511577631293, "learning_rate": 9.420821812173407e-06, "loss": 0.9575, "step": 4969 }, { "epoch": 0.1801312022036171, "grad_norm": 2.627926738383108, "learning_rate": 9.420547582901765e-06, "loss": 1.0185, "step": 4970 }, { "epoch": 0.1801674459062738, "grad_norm": 2.3385196169847933, "learning_rate": 9.420273292717647e-06, "loss": 1.0047, "step": 4971 }, { "epoch": 0.18020368960893046, "grad_norm": 2.740245101929277, "learning_rate": 9.419998941624839e-06, "loss": 1.0497, "step": 4972 }, { "epoch": 0.1802399333115871, "grad_norm": 2.7017067623823134, "learning_rate": 9.41972452962712e-06, "loss": 1.0518, "step": 4973 }, { "epoch": 0.18027617701424378, "grad_norm": 2.3805329473316297, "learning_rate": 9.419450056728268e-06, "loss": 0.9262, "step": 4974 }, { "epoch": 0.18031242071690043, "grad_norm": 2.3229403709145413, "learning_rate": 9.419175522932068e-06, "loss": 0.8512, "step": 4975 }, { "epoch": 0.1803486644195571, "grad_norm": 2.404382363405895, "learning_rate": 9.418900928242301e-06, "loss": 0.9805, "step": 4976 }, { "epoch": 0.18038490812221378, "grad_norm": 2.2843152595765193, "learning_rate": 9.418626272662751e-06, "loss": 0.9633, "step": 4977 }, { "epoch": 0.18042115182487042, "grad_norm": 2.667721960767252, "learning_rate": 9.418351556197206e-06, "loss": 1.2131, "step": 4978 }, { "epoch": 0.1804573955275271, "grad_norm": 2.3823877712788026, "learning_rate": 9.418076778849446e-06, "loss": 1.0046, "step": 4979 }, { "epoch": 0.18049363923018374, "grad_norm": 2.2938953749735775, "learning_rate": 9.41780194062326e-06, "loss": 0.9015, "step": 4980 }, { "epoch": 0.18052988293284042, "grad_norm": 2.4889265007626427, "learning_rate": 9.417527041522436e-06, "loss": 0.8721, "step": 4981 }, { "epoch": 0.1805661266354971, "grad_norm": 2.342666534346483, "learning_rate": 9.417252081550761e-06, "loss": 0.9048, "step": 4982 }, { "epoch": 0.18060237033815374, "grad_norm": 2.2725395246952727, "learning_rate": 9.416977060712024e-06, "loss": 0.9022, "step": 4983 }, { "epoch": 0.1806386140408104, "grad_norm": 2.6814130225993473, "learning_rate": 9.416701979010013e-06, "loss": 1.0243, "step": 4984 }, { "epoch": 0.18067485774346706, "grad_norm": 2.545940535950309, "learning_rate": 9.416426836448521e-06, "loss": 1.0989, "step": 4985 }, { "epoch": 0.18071110144612373, "grad_norm": 2.3527321909789642, "learning_rate": 9.41615163303134e-06, "loss": 1.1348, "step": 4986 }, { "epoch": 0.1807473451487804, "grad_norm": 2.248701847531816, "learning_rate": 9.415876368762257e-06, "loss": 0.7414, "step": 4987 }, { "epoch": 0.18078358885143705, "grad_norm": 2.3919110655163687, "learning_rate": 9.41560104364507e-06, "loss": 0.9224, "step": 4988 }, { "epoch": 0.18081983255409373, "grad_norm": 2.3459761897150457, "learning_rate": 9.415325657683571e-06, "loss": 0.9611, "step": 4989 }, { "epoch": 0.1808560762567504, "grad_norm": 2.7004393789755516, "learning_rate": 9.415050210881553e-06, "loss": 0.8899, "step": 4990 }, { "epoch": 0.18089231995940705, "grad_norm": 2.188306816575438, "learning_rate": 9.414774703242815e-06, "loss": 0.9824, "step": 4991 }, { "epoch": 0.18092856366206372, "grad_norm": 2.2201019320291455, "learning_rate": 9.414499134771151e-06, "loss": 0.9766, "step": 4992 }, { "epoch": 0.18096480736472037, "grad_norm": 2.4636219518563514, "learning_rate": 9.41422350547036e-06, "loss": 1.0415, "step": 4993 }, { "epoch": 0.18100105106737704, "grad_norm": 2.5092177314161925, "learning_rate": 9.413947815344237e-06, "loss": 0.9716, "step": 4994 }, { "epoch": 0.18103729477003372, "grad_norm": 2.454363681609279, "learning_rate": 9.413672064396584e-06, "loss": 0.968, "step": 4995 }, { "epoch": 0.18107353847269037, "grad_norm": 2.7562844191247593, "learning_rate": 9.4133962526312e-06, "loss": 0.9199, "step": 4996 }, { "epoch": 0.18110978217534704, "grad_norm": 2.5462400827074183, "learning_rate": 9.413120380051882e-06, "loss": 1.0026, "step": 4997 }, { "epoch": 0.18114602587800369, "grad_norm": 2.45247158382829, "learning_rate": 9.412844446662436e-06, "loss": 0.915, "step": 4998 }, { "epoch": 0.18118226958066036, "grad_norm": 2.209303055045045, "learning_rate": 9.412568452466662e-06, "loss": 0.9613, "step": 4999 }, { "epoch": 0.18121851328331703, "grad_norm": 2.129081023839988, "learning_rate": 9.412292397468363e-06, "loss": 0.7629, "step": 5000 }, { "epoch": 0.18125475698597368, "grad_norm": 2.249655994820793, "learning_rate": 9.412016281671343e-06, "loss": 1.028, "step": 5001 }, { "epoch": 0.18129100068863035, "grad_norm": 2.1093243638810066, "learning_rate": 9.411740105079407e-06, "loss": 0.8073, "step": 5002 }, { "epoch": 0.181327244391287, "grad_norm": 2.4786097725279967, "learning_rate": 9.411463867696362e-06, "loss": 1.1295, "step": 5003 }, { "epoch": 0.18136348809394368, "grad_norm": 2.446638018043648, "learning_rate": 9.41118756952601e-06, "loss": 0.8312, "step": 5004 }, { "epoch": 0.18139973179660035, "grad_norm": 2.4337555318072392, "learning_rate": 9.410911210572164e-06, "loss": 0.972, "step": 5005 }, { "epoch": 0.181435975499257, "grad_norm": 2.5269130884852715, "learning_rate": 9.410634790838627e-06, "loss": 1.1884, "step": 5006 }, { "epoch": 0.18147221920191367, "grad_norm": 2.067651629317634, "learning_rate": 9.410358310329212e-06, "loss": 0.7747, "step": 5007 }, { "epoch": 0.18150846290457034, "grad_norm": 2.2952685219565008, "learning_rate": 9.410081769047725e-06, "loss": 0.9683, "step": 5008 }, { "epoch": 0.181544706607227, "grad_norm": 2.2905132582442134, "learning_rate": 9.40980516699798e-06, "loss": 0.8526, "step": 5009 }, { "epoch": 0.18158095030988367, "grad_norm": 2.171107272562952, "learning_rate": 9.409528504183785e-06, "loss": 0.8983, "step": 5010 }, { "epoch": 0.1816171940125403, "grad_norm": 2.301988234982242, "learning_rate": 9.409251780608956e-06, "loss": 1.0368, "step": 5011 }, { "epoch": 0.18165343771519699, "grad_norm": 2.549817696955815, "learning_rate": 9.408974996277303e-06, "loss": 0.9492, "step": 5012 }, { "epoch": 0.18168968141785366, "grad_norm": 2.2126397089581142, "learning_rate": 9.408698151192642e-06, "loss": 0.8878, "step": 5013 }, { "epoch": 0.1817259251205103, "grad_norm": 2.333876712457622, "learning_rate": 9.408421245358784e-06, "loss": 0.9883, "step": 5014 }, { "epoch": 0.18176216882316698, "grad_norm": 2.696492730303022, "learning_rate": 9.40814427877955e-06, "loss": 1.0019, "step": 5015 }, { "epoch": 0.18179841252582363, "grad_norm": 2.441878294688217, "learning_rate": 9.407867251458754e-06, "loss": 0.9832, "step": 5016 }, { "epoch": 0.1818346562284803, "grad_norm": 2.1197816902161075, "learning_rate": 9.407590163400212e-06, "loss": 0.8929, "step": 5017 }, { "epoch": 0.18187089993113698, "grad_norm": 2.4359421547408195, "learning_rate": 9.407313014607742e-06, "loss": 0.9676, "step": 5018 }, { "epoch": 0.18190714363379362, "grad_norm": 2.2261180449525093, "learning_rate": 9.407035805085166e-06, "loss": 1.1596, "step": 5019 }, { "epoch": 0.1819433873364503, "grad_norm": 2.9004473632700374, "learning_rate": 9.4067585348363e-06, "loss": 1.053, "step": 5020 }, { "epoch": 0.18197963103910694, "grad_norm": 2.403497002164185, "learning_rate": 9.406481203864967e-06, "loss": 1.0723, "step": 5021 }, { "epoch": 0.18201587474176362, "grad_norm": 2.1902267359541954, "learning_rate": 9.40620381217499e-06, "loss": 0.9111, "step": 5022 }, { "epoch": 0.1820521184444203, "grad_norm": 2.2438123207659904, "learning_rate": 9.405926359770187e-06, "loss": 0.8929, "step": 5023 }, { "epoch": 0.18208836214707694, "grad_norm": 2.4801881786250672, "learning_rate": 9.405648846654384e-06, "loss": 0.8598, "step": 5024 }, { "epoch": 0.1821246058497336, "grad_norm": 2.404041764994692, "learning_rate": 9.405371272831405e-06, "loss": 0.9261, "step": 5025 }, { "epoch": 0.18216084955239029, "grad_norm": 2.5811630988901637, "learning_rate": 9.405093638305074e-06, "loss": 0.8856, "step": 5026 }, { "epoch": 0.18219709325504693, "grad_norm": 2.431959928766364, "learning_rate": 9.404815943079217e-06, "loss": 0.897, "step": 5027 }, { "epoch": 0.1822333369577036, "grad_norm": 2.277092798118511, "learning_rate": 9.404538187157659e-06, "loss": 1.1053, "step": 5028 }, { "epoch": 0.18226958066036025, "grad_norm": 2.4807885412098085, "learning_rate": 9.40426037054423e-06, "loss": 1.1084, "step": 5029 }, { "epoch": 0.18230582436301693, "grad_norm": 2.3117589597223085, "learning_rate": 9.403982493242756e-06, "loss": 1.1617, "step": 5030 }, { "epoch": 0.1823420680656736, "grad_norm": 2.089801166613318, "learning_rate": 9.403704555257067e-06, "loss": 0.9098, "step": 5031 }, { "epoch": 0.18237831176833025, "grad_norm": 2.7655046294396977, "learning_rate": 9.403426556590992e-06, "loss": 0.875, "step": 5032 }, { "epoch": 0.18241455547098692, "grad_norm": 2.2310034181956153, "learning_rate": 9.403148497248362e-06, "loss": 1.0103, "step": 5033 }, { "epoch": 0.18245079917364357, "grad_norm": 2.2002346644692463, "learning_rate": 9.40287037723301e-06, "loss": 0.9241, "step": 5034 }, { "epoch": 0.18248704287630024, "grad_norm": 2.385811467512419, "learning_rate": 9.402592196548766e-06, "loss": 0.9875, "step": 5035 }, { "epoch": 0.18252328657895692, "grad_norm": 2.1821299567811154, "learning_rate": 9.402313955199465e-06, "loss": 0.7347, "step": 5036 }, { "epoch": 0.18255953028161356, "grad_norm": 2.7206198984384846, "learning_rate": 9.402035653188939e-06, "loss": 1.1268, "step": 5037 }, { "epoch": 0.18259577398427024, "grad_norm": 2.463008038023946, "learning_rate": 9.401757290521023e-06, "loss": 0.9561, "step": 5038 }, { "epoch": 0.18263201768692688, "grad_norm": 2.46386785117043, "learning_rate": 9.401478867199556e-06, "loss": 1.0049, "step": 5039 }, { "epoch": 0.18266826138958356, "grad_norm": 2.374385423750132, "learning_rate": 9.40120038322837e-06, "loss": 0.9671, "step": 5040 }, { "epoch": 0.18270450509224023, "grad_norm": 2.323680280438303, "learning_rate": 9.400921838611306e-06, "loss": 0.9774, "step": 5041 }, { "epoch": 0.18274074879489688, "grad_norm": 2.566365725761854, "learning_rate": 9.4006432333522e-06, "loss": 1.0492, "step": 5042 }, { "epoch": 0.18277699249755355, "grad_norm": 2.62052877830976, "learning_rate": 9.400364567454892e-06, "loss": 0.7514, "step": 5043 }, { "epoch": 0.1828132362002102, "grad_norm": 2.4209235013682293, "learning_rate": 9.400085840923221e-06, "loss": 0.9804, "step": 5044 }, { "epoch": 0.18284947990286687, "grad_norm": 2.381755224455073, "learning_rate": 9.39980705376103e-06, "loss": 0.9247, "step": 5045 }, { "epoch": 0.18288572360552355, "grad_norm": 2.3764606879332453, "learning_rate": 9.399528205972157e-06, "loss": 0.8854, "step": 5046 }, { "epoch": 0.1829219673081802, "grad_norm": 2.1974545595487185, "learning_rate": 9.399249297560447e-06, "loss": 0.8212, "step": 5047 }, { "epoch": 0.18295821101083687, "grad_norm": 2.6453576285665585, "learning_rate": 9.398970328529742e-06, "loss": 1.0187, "step": 5048 }, { "epoch": 0.18299445471349354, "grad_norm": 2.6999914398585037, "learning_rate": 9.398691298883886e-06, "loss": 0.9111, "step": 5049 }, { "epoch": 0.1830306984161502, "grad_norm": 2.7803747990326344, "learning_rate": 9.398412208626723e-06, "loss": 1.0318, "step": 5050 }, { "epoch": 0.18306694211880686, "grad_norm": 2.4116112489222545, "learning_rate": 9.398133057762102e-06, "loss": 0.8824, "step": 5051 }, { "epoch": 0.1831031858214635, "grad_norm": 2.6601547333733366, "learning_rate": 9.397853846293865e-06, "loss": 0.9616, "step": 5052 }, { "epoch": 0.18313942952412018, "grad_norm": 2.333991572582969, "learning_rate": 9.397574574225863e-06, "loss": 1.0553, "step": 5053 }, { "epoch": 0.18317567322677686, "grad_norm": 2.544968359249499, "learning_rate": 9.397295241561943e-06, "loss": 0.8984, "step": 5054 }, { "epoch": 0.1832119169294335, "grad_norm": 2.6368018868435557, "learning_rate": 9.397015848305954e-06, "loss": 1.0458, "step": 5055 }, { "epoch": 0.18324816063209018, "grad_norm": 2.14256022798709, "learning_rate": 9.396736394461745e-06, "loss": 0.8133, "step": 5056 }, { "epoch": 0.18328440433474683, "grad_norm": 2.35729085006206, "learning_rate": 9.39645688003317e-06, "loss": 1.1285, "step": 5057 }, { "epoch": 0.1833206480374035, "grad_norm": 2.638172471875908, "learning_rate": 9.396177305024076e-06, "loss": 0.9539, "step": 5058 }, { "epoch": 0.18335689174006017, "grad_norm": 2.4370135574850456, "learning_rate": 9.395897669438317e-06, "loss": 1.1266, "step": 5059 }, { "epoch": 0.18339313544271682, "grad_norm": 2.2853684786958803, "learning_rate": 9.395617973279749e-06, "loss": 0.917, "step": 5060 }, { "epoch": 0.1834293791453735, "grad_norm": 2.3915184704101664, "learning_rate": 9.395338216552222e-06, "loss": 0.9781, "step": 5061 }, { "epoch": 0.18346562284803014, "grad_norm": 2.222958349550842, "learning_rate": 9.395058399259592e-06, "loss": 0.9295, "step": 5062 }, { "epoch": 0.18350186655068682, "grad_norm": 2.018020827095469, "learning_rate": 9.394778521405717e-06, "loss": 0.9466, "step": 5063 }, { "epoch": 0.1835381102533435, "grad_norm": 2.413564707151491, "learning_rate": 9.39449858299445e-06, "loss": 1.0967, "step": 5064 }, { "epoch": 0.18357435395600014, "grad_norm": 2.5833194101793118, "learning_rate": 9.39421858402965e-06, "loss": 0.9873, "step": 5065 }, { "epoch": 0.1836105976586568, "grad_norm": 2.5907960711952978, "learning_rate": 9.393938524515179e-06, "loss": 0.8876, "step": 5066 }, { "epoch": 0.18364684136131348, "grad_norm": 2.697449039150722, "learning_rate": 9.39365840445489e-06, "loss": 1.1865, "step": 5067 }, { "epoch": 0.18368308506397013, "grad_norm": 2.595214414019949, "learning_rate": 9.393378223852645e-06, "loss": 1.1075, "step": 5068 }, { "epoch": 0.1837193287666268, "grad_norm": 2.3429863152184023, "learning_rate": 9.393097982712306e-06, "loss": 1.0067, "step": 5069 }, { "epoch": 0.18375557246928345, "grad_norm": 2.582349790198505, "learning_rate": 9.392817681037733e-06, "loss": 1.0151, "step": 5070 }, { "epoch": 0.18379181617194013, "grad_norm": 2.503637598270108, "learning_rate": 9.39253731883279e-06, "loss": 0.9304, "step": 5071 }, { "epoch": 0.1838280598745968, "grad_norm": 2.4877229533801675, "learning_rate": 9.392256896101337e-06, "loss": 1.0405, "step": 5072 }, { "epoch": 0.18386430357725345, "grad_norm": 2.499868008263258, "learning_rate": 9.391976412847243e-06, "loss": 1.079, "step": 5073 }, { "epoch": 0.18390054727991012, "grad_norm": 2.8036228777287584, "learning_rate": 9.391695869074369e-06, "loss": 0.8315, "step": 5074 }, { "epoch": 0.18393679098256677, "grad_norm": 2.3574787454408277, "learning_rate": 9.391415264786583e-06, "loss": 0.9121, "step": 5075 }, { "epoch": 0.18397303468522344, "grad_norm": 2.39274907243817, "learning_rate": 9.391134599987748e-06, "loss": 0.8604, "step": 5076 }, { "epoch": 0.18400927838788012, "grad_norm": 2.470062991874146, "learning_rate": 9.390853874681736e-06, "loss": 1.1532, "step": 5077 }, { "epoch": 0.18404552209053676, "grad_norm": 2.4536092119822306, "learning_rate": 9.390573088872412e-06, "loss": 1.101, "step": 5078 }, { "epoch": 0.18408176579319344, "grad_norm": 2.4050846138158883, "learning_rate": 9.390292242563648e-06, "loss": 0.8638, "step": 5079 }, { "epoch": 0.18411800949585008, "grad_norm": 2.41126861475657, "learning_rate": 9.39001133575931e-06, "loss": 1.0315, "step": 5080 }, { "epoch": 0.18415425319850676, "grad_norm": 2.4489337054986935, "learning_rate": 9.389730368463272e-06, "loss": 0.944, "step": 5081 }, { "epoch": 0.18419049690116343, "grad_norm": 2.606967917264336, "learning_rate": 9.389449340679404e-06, "loss": 0.9516, "step": 5082 }, { "epoch": 0.18422674060382008, "grad_norm": 2.463265603274811, "learning_rate": 9.389168252411579e-06, "loss": 0.9415, "step": 5083 }, { "epoch": 0.18426298430647675, "grad_norm": 2.6048791064724015, "learning_rate": 9.38888710366367e-06, "loss": 0.8538, "step": 5084 }, { "epoch": 0.18429922800913343, "grad_norm": 2.4132666549984547, "learning_rate": 9.38860589443955e-06, "loss": 1.042, "step": 5085 }, { "epoch": 0.18433547171179007, "grad_norm": 2.512966210008813, "learning_rate": 9.388324624743096e-06, "loss": 0.8867, "step": 5086 }, { "epoch": 0.18437171541444675, "grad_norm": 2.594400210311495, "learning_rate": 9.388043294578182e-06, "loss": 1.03, "step": 5087 }, { "epoch": 0.1844079591171034, "grad_norm": 2.4226123119235714, "learning_rate": 9.387761903948686e-06, "loss": 0.8279, "step": 5088 }, { "epoch": 0.18444420281976007, "grad_norm": 2.4838514060579073, "learning_rate": 9.387480452858484e-06, "loss": 1.042, "step": 5089 }, { "epoch": 0.18448044652241674, "grad_norm": 2.468061203916601, "learning_rate": 9.387198941311455e-06, "loss": 1.0683, "step": 5090 }, { "epoch": 0.1845166902250734, "grad_norm": 2.4731236537542505, "learning_rate": 9.386917369311477e-06, "loss": 0.9693, "step": 5091 }, { "epoch": 0.18455293392773006, "grad_norm": 2.3320745544311765, "learning_rate": 9.386635736862431e-06, "loss": 1.0038, "step": 5092 }, { "epoch": 0.1845891776303867, "grad_norm": 2.5033705190158733, "learning_rate": 9.3863540439682e-06, "loss": 0.9044, "step": 5093 }, { "epoch": 0.18462542133304338, "grad_norm": 2.654038201141114, "learning_rate": 9.386072290632659e-06, "loss": 1.1671, "step": 5094 }, { "epoch": 0.18466166503570006, "grad_norm": 2.569538314321349, "learning_rate": 9.385790476859697e-06, "loss": 1.0114, "step": 5095 }, { "epoch": 0.1846979087383567, "grad_norm": 2.20406409078486, "learning_rate": 9.385508602653195e-06, "loss": 0.9127, "step": 5096 }, { "epoch": 0.18473415244101338, "grad_norm": 2.409370638791936, "learning_rate": 9.385226668017036e-06, "loss": 1.0908, "step": 5097 }, { "epoch": 0.18477039614367002, "grad_norm": 2.5558022392834645, "learning_rate": 9.384944672955106e-06, "loss": 0.7467, "step": 5098 }, { "epoch": 0.1848066398463267, "grad_norm": 2.1402440386460713, "learning_rate": 9.38466261747129e-06, "loss": 1.0099, "step": 5099 }, { "epoch": 0.18484288354898337, "grad_norm": 2.3987784614066547, "learning_rate": 9.384380501569475e-06, "loss": 0.881, "step": 5100 }, { "epoch": 0.18487912725164002, "grad_norm": 2.4283934409451757, "learning_rate": 9.384098325253547e-06, "loss": 1.1357, "step": 5101 }, { "epoch": 0.1849153709542967, "grad_norm": 2.394892084980908, "learning_rate": 9.383816088527396e-06, "loss": 0.8703, "step": 5102 }, { "epoch": 0.18495161465695337, "grad_norm": 2.3802573683211943, "learning_rate": 9.383533791394912e-06, "loss": 0.7744, "step": 5103 }, { "epoch": 0.18498785835961, "grad_norm": 2.253500137194248, "learning_rate": 9.383251433859982e-06, "loss": 1.0035, "step": 5104 }, { "epoch": 0.1850241020622667, "grad_norm": 2.116633648921101, "learning_rate": 9.382969015926498e-06, "loss": 0.7671, "step": 5105 }, { "epoch": 0.18506034576492333, "grad_norm": 2.819427150984733, "learning_rate": 9.382686537598353e-06, "loss": 1.1431, "step": 5106 }, { "epoch": 0.18509658946758, "grad_norm": 2.1162127118383793, "learning_rate": 9.382403998879438e-06, "loss": 0.7948, "step": 5107 }, { "epoch": 0.18513283317023668, "grad_norm": 2.4753708185779724, "learning_rate": 9.382121399773644e-06, "loss": 1.0265, "step": 5108 }, { "epoch": 0.18516907687289333, "grad_norm": 2.646965951999691, "learning_rate": 9.381838740284869e-06, "loss": 1.1078, "step": 5109 }, { "epoch": 0.18520532057555, "grad_norm": 2.553938947060244, "learning_rate": 9.381556020417007e-06, "loss": 1.1891, "step": 5110 }, { "epoch": 0.18524156427820665, "grad_norm": 2.5727788431544143, "learning_rate": 9.381273240173953e-06, "loss": 1.1368, "step": 5111 }, { "epoch": 0.18527780798086332, "grad_norm": 2.065736150517828, "learning_rate": 9.380990399559602e-06, "loss": 0.9902, "step": 5112 }, { "epoch": 0.18531405168352, "grad_norm": 2.2914322754487677, "learning_rate": 9.380707498577854e-06, "loss": 1.0547, "step": 5113 }, { "epoch": 0.18535029538617664, "grad_norm": 2.597279123484716, "learning_rate": 9.380424537232605e-06, "loss": 0.901, "step": 5114 }, { "epoch": 0.18538653908883332, "grad_norm": 2.4581650815684233, "learning_rate": 9.380141515527756e-06, "loss": 1.0543, "step": 5115 }, { "epoch": 0.18542278279148997, "grad_norm": 2.2534128664585027, "learning_rate": 9.379858433467206e-06, "loss": 0.8742, "step": 5116 }, { "epoch": 0.18545902649414664, "grad_norm": 2.32295472599761, "learning_rate": 9.379575291054855e-06, "loss": 0.8456, "step": 5117 }, { "epoch": 0.1854952701968033, "grad_norm": 2.140611355814331, "learning_rate": 9.379292088294604e-06, "loss": 0.7103, "step": 5118 }, { "epoch": 0.18553151389945996, "grad_norm": 2.390537698038569, "learning_rate": 9.37900882519036e-06, "loss": 1.1185, "step": 5119 }, { "epoch": 0.18556775760211663, "grad_norm": 2.56372713335777, "learning_rate": 9.37872550174602e-06, "loss": 0.8959, "step": 5120 }, { "epoch": 0.1856040013047733, "grad_norm": 2.159099987296167, "learning_rate": 9.378442117965492e-06, "loss": 0.9005, "step": 5121 }, { "epoch": 0.18564024500742995, "grad_norm": 2.3320167616988314, "learning_rate": 9.378158673852678e-06, "loss": 1.1025, "step": 5122 }, { "epoch": 0.18567648871008663, "grad_norm": 2.221063032228333, "learning_rate": 9.377875169411486e-06, "loss": 1.1084, "step": 5123 }, { "epoch": 0.18571273241274328, "grad_norm": 2.562990225916262, "learning_rate": 9.377591604645823e-06, "loss": 0.9754, "step": 5124 }, { "epoch": 0.18574897611539995, "grad_norm": 2.3231648055608627, "learning_rate": 9.377307979559593e-06, "loss": 0.8342, "step": 5125 }, { "epoch": 0.18578521981805662, "grad_norm": 2.5084659428816716, "learning_rate": 9.377024294156708e-06, "loss": 0.976, "step": 5126 }, { "epoch": 0.18582146352071327, "grad_norm": 2.5663159598507694, "learning_rate": 9.376740548441076e-06, "loss": 1.0719, "step": 5127 }, { "epoch": 0.18585770722336994, "grad_norm": 2.419832153648333, "learning_rate": 9.376456742416605e-06, "loss": 1.1336, "step": 5128 }, { "epoch": 0.1858939509260266, "grad_norm": 2.4177294833445586, "learning_rate": 9.376172876087206e-06, "loss": 0.9904, "step": 5129 }, { "epoch": 0.18593019462868327, "grad_norm": 2.46866317500054, "learning_rate": 9.375888949456793e-06, "loss": 0.9456, "step": 5130 }, { "epoch": 0.18596643833133994, "grad_norm": 2.471350471914432, "learning_rate": 9.375604962529276e-06, "loss": 0.8435, "step": 5131 }, { "epoch": 0.18600268203399659, "grad_norm": 2.1543052589945004, "learning_rate": 9.37532091530857e-06, "loss": 0.9184, "step": 5132 }, { "epoch": 0.18603892573665326, "grad_norm": 2.434347001627845, "learning_rate": 9.375036807798587e-06, "loss": 0.9768, "step": 5133 }, { "epoch": 0.1860751694393099, "grad_norm": 2.297458655482551, "learning_rate": 9.374752640003242e-06, "loss": 0.9441, "step": 5134 }, { "epoch": 0.18611141314196658, "grad_norm": 2.215667350658219, "learning_rate": 9.374468411926451e-06, "loss": 0.8797, "step": 5135 }, { "epoch": 0.18614765684462325, "grad_norm": 2.258355663275976, "learning_rate": 9.374184123572133e-06, "loss": 1.0459, "step": 5136 }, { "epoch": 0.1861839005472799, "grad_norm": 2.448516801144207, "learning_rate": 9.373899774944202e-06, "loss": 0.9123, "step": 5137 }, { "epoch": 0.18622014424993658, "grad_norm": 2.2793863337792772, "learning_rate": 9.373615366046578e-06, "loss": 0.9342, "step": 5138 }, { "epoch": 0.18625638795259325, "grad_norm": 2.1175482024575203, "learning_rate": 9.37333089688318e-06, "loss": 1.0079, "step": 5139 }, { "epoch": 0.1862926316552499, "grad_norm": 2.3361323827559155, "learning_rate": 9.373046367457925e-06, "loss": 0.9612, "step": 5140 }, { "epoch": 0.18632887535790657, "grad_norm": 2.4565539187930305, "learning_rate": 9.372761777774737e-06, "loss": 0.9659, "step": 5141 }, { "epoch": 0.18636511906056322, "grad_norm": 2.495488804301315, "learning_rate": 9.372477127837535e-06, "loss": 0.993, "step": 5142 }, { "epoch": 0.1864013627632199, "grad_norm": 2.3592897212050685, "learning_rate": 9.372192417650245e-06, "loss": 0.9504, "step": 5143 }, { "epoch": 0.18643760646587657, "grad_norm": 2.1099790673449754, "learning_rate": 9.371907647216786e-06, "loss": 0.848, "step": 5144 }, { "epoch": 0.1864738501685332, "grad_norm": 2.4000787611927183, "learning_rate": 9.371622816541085e-06, "loss": 0.9293, "step": 5145 }, { "epoch": 0.18651009387118989, "grad_norm": 2.3352977547011187, "learning_rate": 9.371337925627063e-06, "loss": 1.0335, "step": 5146 }, { "epoch": 0.18654633757384653, "grad_norm": 2.3741353850147697, "learning_rate": 9.371052974478649e-06, "loss": 1.067, "step": 5147 }, { "epoch": 0.1865825812765032, "grad_norm": 2.38319596351613, "learning_rate": 9.37076796309977e-06, "loss": 1.0183, "step": 5148 }, { "epoch": 0.18661882497915988, "grad_norm": 2.2927154410346993, "learning_rate": 9.37048289149435e-06, "loss": 1.1305, "step": 5149 }, { "epoch": 0.18665506868181653, "grad_norm": 2.6209237968047505, "learning_rate": 9.37019775966632e-06, "loss": 0.9154, "step": 5150 }, { "epoch": 0.1866913123844732, "grad_norm": 2.647203013204668, "learning_rate": 9.369912567619608e-06, "loss": 0.969, "step": 5151 }, { "epoch": 0.18672755608712985, "grad_norm": 2.3578438946968348, "learning_rate": 9.369627315358143e-06, "loss": 1.0194, "step": 5152 }, { "epoch": 0.18676379978978652, "grad_norm": 2.681352910110141, "learning_rate": 9.369342002885857e-06, "loss": 1.0963, "step": 5153 }, { "epoch": 0.1868000434924432, "grad_norm": 2.3263783200178008, "learning_rate": 9.369056630206681e-06, "loss": 1.1303, "step": 5154 }, { "epoch": 0.18683628719509984, "grad_norm": 2.388899025471475, "learning_rate": 9.368771197324546e-06, "loss": 1.1579, "step": 5155 }, { "epoch": 0.18687253089775652, "grad_norm": 2.1371129694679936, "learning_rate": 9.368485704243386e-06, "loss": 1.1413, "step": 5156 }, { "epoch": 0.1869087746004132, "grad_norm": 2.306177674646797, "learning_rate": 9.368200150967136e-06, "loss": 0.9002, "step": 5157 }, { "epoch": 0.18694501830306984, "grad_norm": 2.3482753670908636, "learning_rate": 9.36791453749973e-06, "loss": 0.9896, "step": 5158 }, { "epoch": 0.1869812620057265, "grad_norm": 2.230001216874599, "learning_rate": 9.367628863845103e-06, "loss": 0.9785, "step": 5159 }, { "epoch": 0.18701750570838316, "grad_norm": 2.2039440257398057, "learning_rate": 9.36734313000719e-06, "loss": 0.8653, "step": 5160 }, { "epoch": 0.18705374941103983, "grad_norm": 2.217523961197757, "learning_rate": 9.367057335989931e-06, "loss": 0.9968, "step": 5161 }, { "epoch": 0.1870899931136965, "grad_norm": 2.3429204535797585, "learning_rate": 9.366771481797264e-06, "loss": 0.7895, "step": 5162 }, { "epoch": 0.18712623681635315, "grad_norm": 2.628887880748698, "learning_rate": 9.366485567433124e-06, "loss": 1.0368, "step": 5163 }, { "epoch": 0.18716248051900983, "grad_norm": 2.5755158866883905, "learning_rate": 9.366199592901456e-06, "loss": 0.9113, "step": 5164 }, { "epoch": 0.18719872422166647, "grad_norm": 2.300694254798719, "learning_rate": 9.365913558206198e-06, "loss": 1.0427, "step": 5165 }, { "epoch": 0.18723496792432315, "grad_norm": 2.7744340094988855, "learning_rate": 9.36562746335129e-06, "loss": 0.9854, "step": 5166 }, { "epoch": 0.18727121162697982, "grad_norm": 2.326552571064703, "learning_rate": 9.365341308340678e-06, "loss": 0.9599, "step": 5167 }, { "epoch": 0.18730745532963647, "grad_norm": 2.281590077926475, "learning_rate": 9.365055093178303e-06, "loss": 1.0586, "step": 5168 }, { "epoch": 0.18734369903229314, "grad_norm": 2.443816354934426, "learning_rate": 9.364768817868107e-06, "loss": 1.0428, "step": 5169 }, { "epoch": 0.1873799427349498, "grad_norm": 2.26708073153385, "learning_rate": 9.364482482414036e-06, "loss": 0.8331, "step": 5170 }, { "epoch": 0.18741618643760646, "grad_norm": 2.4945995556113387, "learning_rate": 9.364196086820038e-06, "loss": 1.1013, "step": 5171 }, { "epoch": 0.18745243014026314, "grad_norm": 2.3398017606646118, "learning_rate": 9.363909631090058e-06, "loss": 0.934, "step": 5172 }, { "epoch": 0.18748867384291978, "grad_norm": 2.2487708527909636, "learning_rate": 9.36362311522804e-06, "loss": 0.9484, "step": 5173 }, { "epoch": 0.18752491754557646, "grad_norm": 2.4279008999742606, "learning_rate": 9.363336539237934e-06, "loss": 1.1472, "step": 5174 }, { "epoch": 0.18756116124823313, "grad_norm": 2.537948595749144, "learning_rate": 9.363049903123692e-06, "loss": 1.1466, "step": 5175 }, { "epoch": 0.18759740495088978, "grad_norm": 2.0937386174195534, "learning_rate": 9.362763206889261e-06, "loss": 1.012, "step": 5176 }, { "epoch": 0.18763364865354645, "grad_norm": 2.5278332202269804, "learning_rate": 9.36247645053859e-06, "loss": 1.0898, "step": 5177 }, { "epoch": 0.1876698923562031, "grad_norm": 2.214934270726287, "learning_rate": 9.362189634075632e-06, "loss": 0.9717, "step": 5178 }, { "epoch": 0.18770613605885977, "grad_norm": 2.616158088796257, "learning_rate": 9.36190275750434e-06, "loss": 1.0786, "step": 5179 }, { "epoch": 0.18774237976151645, "grad_norm": 2.505455317293217, "learning_rate": 9.361615820828665e-06, "loss": 1.0371, "step": 5180 }, { "epoch": 0.1877786234641731, "grad_norm": 2.423805482821082, "learning_rate": 9.36132882405256e-06, "loss": 0.9139, "step": 5181 }, { "epoch": 0.18781486716682977, "grad_norm": 2.3762491566051565, "learning_rate": 9.361041767179983e-06, "loss": 1.0735, "step": 5182 }, { "epoch": 0.18785111086948642, "grad_norm": 2.401756396790113, "learning_rate": 9.360754650214889e-06, "loss": 1.0082, "step": 5183 }, { "epoch": 0.1878873545721431, "grad_norm": 2.6807939011159276, "learning_rate": 9.360467473161233e-06, "loss": 1.0721, "step": 5184 }, { "epoch": 0.18792359827479976, "grad_norm": 2.3803206542597173, "learning_rate": 9.360180236022972e-06, "loss": 1.2165, "step": 5185 }, { "epoch": 0.1879598419774564, "grad_norm": 2.4687489640140003, "learning_rate": 9.359892938804064e-06, "loss": 1.1892, "step": 5186 }, { "epoch": 0.18799608568011308, "grad_norm": 2.525076071553485, "learning_rate": 9.359605581508469e-06, "loss": 0.9641, "step": 5187 }, { "epoch": 0.18803232938276973, "grad_norm": 2.7083966068946115, "learning_rate": 9.359318164140145e-06, "loss": 1.0615, "step": 5188 }, { "epoch": 0.1880685730854264, "grad_norm": 2.07062180055785, "learning_rate": 9.359030686703053e-06, "loss": 0.9686, "step": 5189 }, { "epoch": 0.18810481678808308, "grad_norm": 2.633930078768958, "learning_rate": 9.358743149201156e-06, "loss": 1.0578, "step": 5190 }, { "epoch": 0.18814106049073973, "grad_norm": 2.4916545661921163, "learning_rate": 9.358455551638412e-06, "loss": 0.9853, "step": 5191 }, { "epoch": 0.1881773041933964, "grad_norm": 2.564682868056523, "learning_rate": 9.358167894018787e-06, "loss": 1.061, "step": 5192 }, { "epoch": 0.18821354789605307, "grad_norm": 2.3964160218502935, "learning_rate": 9.357880176346245e-06, "loss": 0.9008, "step": 5193 }, { "epoch": 0.18824979159870972, "grad_norm": 2.3544057644252434, "learning_rate": 9.357592398624748e-06, "loss": 1.0066, "step": 5194 }, { "epoch": 0.1882860353013664, "grad_norm": 2.359267715228137, "learning_rate": 9.357304560858265e-06, "loss": 0.8393, "step": 5195 }, { "epoch": 0.18832227900402304, "grad_norm": 2.5329631564065975, "learning_rate": 9.35701666305076e-06, "loss": 0.9411, "step": 5196 }, { "epoch": 0.18835852270667972, "grad_norm": 2.459871072194624, "learning_rate": 9.356728705206199e-06, "loss": 1.0842, "step": 5197 }, { "epoch": 0.1883947664093364, "grad_norm": 2.295464184317298, "learning_rate": 9.356440687328555e-06, "loss": 0.9383, "step": 5198 }, { "epoch": 0.18843101011199304, "grad_norm": 2.1891535891517373, "learning_rate": 9.356152609421788e-06, "loss": 1.1566, "step": 5199 }, { "epoch": 0.1884672538146497, "grad_norm": 2.2384140006717868, "learning_rate": 9.355864471489876e-06, "loss": 1.004, "step": 5200 }, { "epoch": 0.18850349751730636, "grad_norm": 2.543608302007271, "learning_rate": 9.355576273536786e-06, "loss": 0.9675, "step": 5201 }, { "epoch": 0.18853974121996303, "grad_norm": 2.5831019858358957, "learning_rate": 9.355288015566487e-06, "loss": 1.0632, "step": 5202 }, { "epoch": 0.1885759849226197, "grad_norm": 2.297560580983659, "learning_rate": 9.354999697582955e-06, "loss": 0.7815, "step": 5203 }, { "epoch": 0.18861222862527635, "grad_norm": 2.4705153725887214, "learning_rate": 9.354711319590161e-06, "loss": 0.9193, "step": 5204 }, { "epoch": 0.18864847232793303, "grad_norm": 2.3754763508440457, "learning_rate": 9.354422881592078e-06, "loss": 0.9104, "step": 5205 }, { "epoch": 0.18868471603058967, "grad_norm": 2.427887262024602, "learning_rate": 9.35413438359268e-06, "loss": 0.9811, "step": 5206 }, { "epoch": 0.18872095973324635, "grad_norm": 2.675368289420584, "learning_rate": 9.353845825595946e-06, "loss": 1.1577, "step": 5207 }, { "epoch": 0.18875720343590302, "grad_norm": 2.1541555825002137, "learning_rate": 9.353557207605848e-06, "loss": 1.0354, "step": 5208 }, { "epoch": 0.18879344713855967, "grad_norm": 2.534585588759319, "learning_rate": 9.353268529626364e-06, "loss": 1.0203, "step": 5209 }, { "epoch": 0.18882969084121634, "grad_norm": 2.245706410041882, "learning_rate": 9.352979791661474e-06, "loss": 0.7826, "step": 5210 }, { "epoch": 0.18886593454387302, "grad_norm": 2.5666603481926886, "learning_rate": 9.352690993715154e-06, "loss": 0.8323, "step": 5211 }, { "epoch": 0.18890217824652966, "grad_norm": 1.684939170679628, "learning_rate": 9.352402135791385e-06, "loss": 0.847, "step": 5212 }, { "epoch": 0.18893842194918634, "grad_norm": 2.4271201705003076, "learning_rate": 9.352113217894147e-06, "loss": 1.1751, "step": 5213 }, { "epoch": 0.18897466565184298, "grad_norm": 2.3831425037073752, "learning_rate": 9.35182424002742e-06, "loss": 1.0434, "step": 5214 }, { "epoch": 0.18901090935449966, "grad_norm": 2.382109199027419, "learning_rate": 9.351535202195188e-06, "loss": 0.8244, "step": 5215 }, { "epoch": 0.18904715305715633, "grad_norm": 2.4794913665936185, "learning_rate": 9.351246104401433e-06, "loss": 0.9457, "step": 5216 }, { "epoch": 0.18908339675981298, "grad_norm": 2.4256408494785235, "learning_rate": 9.350956946650136e-06, "loss": 1.1062, "step": 5217 }, { "epoch": 0.18911964046246965, "grad_norm": 2.494540032074563, "learning_rate": 9.350667728945285e-06, "loss": 0.8984, "step": 5218 }, { "epoch": 0.1891558841651263, "grad_norm": 2.605004743213346, "learning_rate": 9.350378451290864e-06, "loss": 1.0488, "step": 5219 }, { "epoch": 0.18919212786778297, "grad_norm": 2.3109654460683395, "learning_rate": 9.350089113690858e-06, "loss": 1.0058, "step": 5220 }, { "epoch": 0.18922837157043965, "grad_norm": 2.2889517713071705, "learning_rate": 9.349799716149257e-06, "loss": 0.8934, "step": 5221 }, { "epoch": 0.1892646152730963, "grad_norm": 2.499813367231614, "learning_rate": 9.349510258670045e-06, "loss": 1.0555, "step": 5222 }, { "epoch": 0.18930085897575297, "grad_norm": 2.3709407212112166, "learning_rate": 9.349220741257213e-06, "loss": 0.9307, "step": 5223 }, { "epoch": 0.1893371026784096, "grad_norm": 2.4175315326903015, "learning_rate": 9.348931163914749e-06, "loss": 0.9035, "step": 5224 }, { "epoch": 0.1893733463810663, "grad_norm": 2.4348837010113837, "learning_rate": 9.348641526646644e-06, "loss": 0.9925, "step": 5225 }, { "epoch": 0.18940959008372296, "grad_norm": 2.15914146520297, "learning_rate": 9.348351829456888e-06, "loss": 0.9359, "step": 5226 }, { "epoch": 0.1894458337863796, "grad_norm": 1.8716270175272662, "learning_rate": 9.348062072349474e-06, "loss": 0.8239, "step": 5227 }, { "epoch": 0.18948207748903628, "grad_norm": 2.419636837842311, "learning_rate": 9.347772255328394e-06, "loss": 0.9522, "step": 5228 }, { "epoch": 0.18951832119169296, "grad_norm": 2.434886093736823, "learning_rate": 9.347482378397643e-06, "loss": 1.0626, "step": 5229 }, { "epoch": 0.1895545648943496, "grad_norm": 2.159583915428832, "learning_rate": 9.347192441561212e-06, "loss": 0.9783, "step": 5230 }, { "epoch": 0.18959080859700628, "grad_norm": 2.452352540564991, "learning_rate": 9.3469024448231e-06, "loss": 0.8973, "step": 5231 }, { "epoch": 0.18962705229966292, "grad_norm": 2.598692225768166, "learning_rate": 9.346612388187301e-06, "loss": 0.946, "step": 5232 }, { "epoch": 0.1896632960023196, "grad_norm": 2.265657662512516, "learning_rate": 9.34632227165781e-06, "loss": 0.9996, "step": 5233 }, { "epoch": 0.18969953970497627, "grad_norm": 2.45128800085588, "learning_rate": 9.34603209523863e-06, "loss": 0.9085, "step": 5234 }, { "epoch": 0.18973578340763292, "grad_norm": 2.620202650148301, "learning_rate": 9.345741858933755e-06, "loss": 1.0158, "step": 5235 }, { "epoch": 0.1897720271102896, "grad_norm": 2.226324236381455, "learning_rate": 9.345451562747184e-06, "loss": 1.0791, "step": 5236 }, { "epoch": 0.18980827081294624, "grad_norm": 2.390878278640537, "learning_rate": 9.34516120668292e-06, "loss": 0.9279, "step": 5237 }, { "epoch": 0.1898445145156029, "grad_norm": 2.045732234814393, "learning_rate": 9.344870790744962e-06, "loss": 0.8196, "step": 5238 }, { "epoch": 0.1898807582182596, "grad_norm": 2.6547797847564536, "learning_rate": 9.344580314937311e-06, "loss": 0.8948, "step": 5239 }, { "epoch": 0.18991700192091623, "grad_norm": 2.471403273164791, "learning_rate": 9.344289779263974e-06, "loss": 1.0559, "step": 5240 }, { "epoch": 0.1899532456235729, "grad_norm": 2.103288001922468, "learning_rate": 9.343999183728947e-06, "loss": 0.8952, "step": 5241 }, { "epoch": 0.18998948932622955, "grad_norm": 2.4310070220308737, "learning_rate": 9.34370852833624e-06, "loss": 0.8741, "step": 5242 }, { "epoch": 0.19002573302888623, "grad_norm": 2.3195645578352497, "learning_rate": 9.343417813089858e-06, "loss": 0.8909, "step": 5243 }, { "epoch": 0.1900619767315429, "grad_norm": 2.422012300497557, "learning_rate": 9.343127037993804e-06, "loss": 0.9849, "step": 5244 }, { "epoch": 0.19009822043419955, "grad_norm": 2.3620786794590822, "learning_rate": 9.342836203052086e-06, "loss": 1.1241, "step": 5245 }, { "epoch": 0.19013446413685622, "grad_norm": 2.3260860141090145, "learning_rate": 9.342545308268712e-06, "loss": 0.8663, "step": 5246 }, { "epoch": 0.1901707078395129, "grad_norm": 2.545291547949198, "learning_rate": 9.34225435364769e-06, "loss": 1.0272, "step": 5247 }, { "epoch": 0.19020695154216954, "grad_norm": 2.355712350945455, "learning_rate": 9.34196333919303e-06, "loss": 0.8725, "step": 5248 }, { "epoch": 0.19024319524482622, "grad_norm": 2.0955883617403104, "learning_rate": 9.341672264908738e-06, "loss": 0.7693, "step": 5249 }, { "epoch": 0.19027943894748287, "grad_norm": 2.32852417032295, "learning_rate": 9.34138113079883e-06, "loss": 0.9558, "step": 5250 }, { "epoch": 0.19031568265013954, "grad_norm": 2.2676828238829705, "learning_rate": 9.341089936867316e-06, "loss": 0.9654, "step": 5251 }, { "epoch": 0.1903519263527962, "grad_norm": 2.6329093205913927, "learning_rate": 9.340798683118208e-06, "loss": 0.9992, "step": 5252 }, { "epoch": 0.19038817005545286, "grad_norm": 2.457243757551954, "learning_rate": 9.340507369555519e-06, "loss": 1.0539, "step": 5253 }, { "epoch": 0.19042441375810953, "grad_norm": 2.44578382186525, "learning_rate": 9.340215996183264e-06, "loss": 0.898, "step": 5254 }, { "epoch": 0.19046065746076618, "grad_norm": 2.2671035082110778, "learning_rate": 9.339924563005457e-06, "loss": 1.0073, "step": 5255 }, { "epoch": 0.19049690116342285, "grad_norm": 2.4349509036010097, "learning_rate": 9.339633070026114e-06, "loss": 1.0031, "step": 5256 }, { "epoch": 0.19053314486607953, "grad_norm": 2.7092894256009195, "learning_rate": 9.339341517249252e-06, "loss": 1.1005, "step": 5257 }, { "epoch": 0.19056938856873618, "grad_norm": 2.6326034397128266, "learning_rate": 9.339049904678888e-06, "loss": 1.0295, "step": 5258 }, { "epoch": 0.19060563227139285, "grad_norm": 2.5585339520419907, "learning_rate": 9.338758232319041e-06, "loss": 1.1445, "step": 5259 }, { "epoch": 0.1906418759740495, "grad_norm": 2.5844039791574964, "learning_rate": 9.33846650017373e-06, "loss": 0.877, "step": 5260 }, { "epoch": 0.19067811967670617, "grad_norm": 2.0942712560647303, "learning_rate": 9.338174708246974e-06, "loss": 0.8256, "step": 5261 }, { "epoch": 0.19071436337936284, "grad_norm": 2.6109156138408633, "learning_rate": 9.337882856542794e-06, "loss": 1.0391, "step": 5262 }, { "epoch": 0.1907506070820195, "grad_norm": 2.1097946191563905, "learning_rate": 9.337590945065211e-06, "loss": 1.0384, "step": 5263 }, { "epoch": 0.19078685078467617, "grad_norm": 2.3067055374965872, "learning_rate": 9.337298973818249e-06, "loss": 1.0195, "step": 5264 }, { "epoch": 0.19082309448733284, "grad_norm": 2.657403388301006, "learning_rate": 9.33700694280593e-06, "loss": 1.0842, "step": 5265 }, { "epoch": 0.19085933818998949, "grad_norm": 2.7518763557006705, "learning_rate": 9.336714852032278e-06, "loss": 0.8589, "step": 5266 }, { "epoch": 0.19089558189264616, "grad_norm": 2.2680196817660665, "learning_rate": 9.33642270150132e-06, "loss": 0.9816, "step": 5267 }, { "epoch": 0.1909318255953028, "grad_norm": 2.269412391791382, "learning_rate": 9.336130491217077e-06, "loss": 0.9897, "step": 5268 }, { "epoch": 0.19096806929795948, "grad_norm": 2.4949847102277833, "learning_rate": 9.33583822118358e-06, "loss": 1.0658, "step": 5269 }, { "epoch": 0.19100431300061615, "grad_norm": 2.3192132763687154, "learning_rate": 9.335545891404852e-06, "loss": 1.1798, "step": 5270 }, { "epoch": 0.1910405567032728, "grad_norm": 2.3469970896520964, "learning_rate": 9.335253501884926e-06, "loss": 0.8263, "step": 5271 }, { "epoch": 0.19107680040592948, "grad_norm": 2.6886131728218174, "learning_rate": 9.334961052627829e-06, "loss": 0.8337, "step": 5272 }, { "epoch": 0.19111304410858612, "grad_norm": 2.4348241671441215, "learning_rate": 9.334668543637588e-06, "loss": 0.8996, "step": 5273 }, { "epoch": 0.1911492878112428, "grad_norm": 2.1104770060647366, "learning_rate": 9.334375974918239e-06, "loss": 0.8918, "step": 5274 }, { "epoch": 0.19118553151389947, "grad_norm": 2.4406234910285627, "learning_rate": 9.334083346473809e-06, "loss": 0.8959, "step": 5275 }, { "epoch": 0.19122177521655612, "grad_norm": 2.2101516533375234, "learning_rate": 9.333790658308331e-06, "loss": 0.9968, "step": 5276 }, { "epoch": 0.1912580189192128, "grad_norm": 2.3983962874640006, "learning_rate": 9.33349791042584e-06, "loss": 0.8538, "step": 5277 }, { "epoch": 0.19129426262186944, "grad_norm": 2.7378489086111863, "learning_rate": 9.333205102830367e-06, "loss": 1.083, "step": 5278 }, { "epoch": 0.1913305063245261, "grad_norm": 2.3655720176926414, "learning_rate": 9.33291223552595e-06, "loss": 0.9759, "step": 5279 }, { "epoch": 0.19136675002718279, "grad_norm": 2.5960597337308364, "learning_rate": 9.332619308516621e-06, "loss": 0.9788, "step": 5280 }, { "epoch": 0.19140299372983943, "grad_norm": 2.1385416247972784, "learning_rate": 9.33232632180642e-06, "loss": 0.9907, "step": 5281 }, { "epoch": 0.1914392374324961, "grad_norm": 2.3971099331947987, "learning_rate": 9.332033275399383e-06, "loss": 0.8994, "step": 5282 }, { "epoch": 0.19147548113515278, "grad_norm": 2.3435975051822155, "learning_rate": 9.331740169299545e-06, "loss": 0.9476, "step": 5283 }, { "epoch": 0.19151172483780943, "grad_norm": 2.435072903278092, "learning_rate": 9.33144700351095e-06, "loss": 1.0823, "step": 5284 }, { "epoch": 0.1915479685404661, "grad_norm": 2.467646028647776, "learning_rate": 9.331153778037632e-06, "loss": 0.8861, "step": 5285 }, { "epoch": 0.19158421224312275, "grad_norm": 2.500345326099224, "learning_rate": 9.330860492883635e-06, "loss": 1.0436, "step": 5286 }, { "epoch": 0.19162045594577942, "grad_norm": 2.285695604889875, "learning_rate": 9.330567148053e-06, "loss": 0.9552, "step": 5287 }, { "epoch": 0.1916566996484361, "grad_norm": 2.3064331301156993, "learning_rate": 9.330273743549769e-06, "loss": 0.9554, "step": 5288 }, { "epoch": 0.19169294335109274, "grad_norm": 2.739466886694969, "learning_rate": 9.329980279377986e-06, "loss": 1.03, "step": 5289 }, { "epoch": 0.19172918705374942, "grad_norm": 2.2569536658843847, "learning_rate": 9.329686755541691e-06, "loss": 0.9331, "step": 5290 }, { "epoch": 0.19176543075640606, "grad_norm": 2.2075967161715577, "learning_rate": 9.329393172044933e-06, "loss": 0.882, "step": 5291 }, { "epoch": 0.19180167445906274, "grad_norm": 2.2436016201073055, "learning_rate": 9.329099528891754e-06, "loss": 0.9509, "step": 5292 }, { "epoch": 0.1918379181617194, "grad_norm": 2.189141333915077, "learning_rate": 9.328805826086202e-06, "loss": 0.8988, "step": 5293 }, { "epoch": 0.19187416186437606, "grad_norm": 2.3684681453414456, "learning_rate": 9.328512063632325e-06, "loss": 1.0027, "step": 5294 }, { "epoch": 0.19191040556703273, "grad_norm": 2.4607126295153856, "learning_rate": 9.328218241534166e-06, "loss": 1.0165, "step": 5295 }, { "epoch": 0.19194664926968938, "grad_norm": 2.3604529422111473, "learning_rate": 9.32792435979578e-06, "loss": 0.9052, "step": 5296 }, { "epoch": 0.19198289297234605, "grad_norm": 2.5606057111869513, "learning_rate": 9.327630418421214e-06, "loss": 0.9294, "step": 5297 }, { "epoch": 0.19201913667500273, "grad_norm": 2.6115448624004696, "learning_rate": 9.32733641741452e-06, "loss": 0.8993, "step": 5298 }, { "epoch": 0.19205538037765937, "grad_norm": 2.3478710516444252, "learning_rate": 9.327042356779743e-06, "loss": 0.9491, "step": 5299 }, { "epoch": 0.19209162408031605, "grad_norm": 2.2715488256551777, "learning_rate": 9.326748236520941e-06, "loss": 1.0906, "step": 5300 }, { "epoch": 0.19212786778297272, "grad_norm": 2.2818994145970355, "learning_rate": 9.326454056642164e-06, "loss": 0.9183, "step": 5301 }, { "epoch": 0.19216411148562937, "grad_norm": 2.6870072934492493, "learning_rate": 9.326159817147468e-06, "loss": 1.0096, "step": 5302 }, { "epoch": 0.19220035518828604, "grad_norm": 2.373320939111144, "learning_rate": 9.325865518040906e-06, "loss": 0.8734, "step": 5303 }, { "epoch": 0.1922365988909427, "grad_norm": 2.3114320765249134, "learning_rate": 9.325571159326535e-06, "loss": 0.9177, "step": 5304 }, { "epoch": 0.19227284259359936, "grad_norm": 2.464309495319376, "learning_rate": 9.325276741008406e-06, "loss": 1.0421, "step": 5305 }, { "epoch": 0.19230908629625604, "grad_norm": 2.7418885584208486, "learning_rate": 9.32498226309058e-06, "loss": 1.1402, "step": 5306 }, { "epoch": 0.19234532999891268, "grad_norm": 2.075377427172653, "learning_rate": 9.324687725577117e-06, "loss": 0.9333, "step": 5307 }, { "epoch": 0.19238157370156936, "grad_norm": 2.5730125216865076, "learning_rate": 9.32439312847207e-06, "loss": 0.9001, "step": 5308 }, { "epoch": 0.192417817404226, "grad_norm": 2.529052782012289, "learning_rate": 9.324098471779502e-06, "loss": 1.0589, "step": 5309 }, { "epoch": 0.19245406110688268, "grad_norm": 2.3843922439560656, "learning_rate": 9.323803755503472e-06, "loss": 1.1901, "step": 5310 }, { "epoch": 0.19249030480953935, "grad_norm": 2.6307564653650193, "learning_rate": 9.323508979648042e-06, "loss": 0.9097, "step": 5311 }, { "epoch": 0.192526548512196, "grad_norm": 2.331137890304551, "learning_rate": 9.32321414421727e-06, "loss": 0.9339, "step": 5312 }, { "epoch": 0.19256279221485267, "grad_norm": 2.4867491125226664, "learning_rate": 9.322919249215225e-06, "loss": 1.2379, "step": 5313 }, { "epoch": 0.19259903591750932, "grad_norm": 2.503688452211324, "learning_rate": 9.322624294645966e-06, "loss": 0.9301, "step": 5314 }, { "epoch": 0.192635279620166, "grad_norm": 2.409554606949312, "learning_rate": 9.322329280513558e-06, "loss": 1.0993, "step": 5315 }, { "epoch": 0.19267152332282267, "grad_norm": 2.386981225069107, "learning_rate": 9.322034206822067e-06, "loss": 0.8853, "step": 5316 }, { "epoch": 0.19270776702547932, "grad_norm": 2.7669459479880874, "learning_rate": 9.321739073575558e-06, "loss": 1.1525, "step": 5317 }, { "epoch": 0.192744010728136, "grad_norm": 2.3061021393031167, "learning_rate": 9.321443880778099e-06, "loss": 0.9439, "step": 5318 }, { "epoch": 0.19278025443079266, "grad_norm": 2.3074509372810654, "learning_rate": 9.321148628433756e-06, "loss": 1.0753, "step": 5319 }, { "epoch": 0.1928164981334493, "grad_norm": 2.570574637913335, "learning_rate": 9.3208533165466e-06, "loss": 1.0661, "step": 5320 }, { "epoch": 0.19285274183610598, "grad_norm": 2.4175487838196084, "learning_rate": 9.320557945120697e-06, "loss": 1.0325, "step": 5321 }, { "epoch": 0.19288898553876263, "grad_norm": 2.6015292215829597, "learning_rate": 9.32026251416012e-06, "loss": 0.9059, "step": 5322 }, { "epoch": 0.1929252292414193, "grad_norm": 2.3151846319587137, "learning_rate": 9.319967023668937e-06, "loss": 0.8632, "step": 5323 }, { "epoch": 0.19296147294407598, "grad_norm": 2.7409581289706777, "learning_rate": 9.319671473651221e-06, "loss": 1.0536, "step": 5324 }, { "epoch": 0.19299771664673263, "grad_norm": 2.169511850116151, "learning_rate": 9.319375864111045e-06, "loss": 0.8154, "step": 5325 }, { "epoch": 0.1930339603493893, "grad_norm": 2.4975310041293803, "learning_rate": 9.319080195052481e-06, "loss": 1.115, "step": 5326 }, { "epoch": 0.19307020405204595, "grad_norm": 2.153910342082454, "learning_rate": 9.318784466479605e-06, "loss": 0.9102, "step": 5327 }, { "epoch": 0.19310644775470262, "grad_norm": 2.610314931455223, "learning_rate": 9.31848867839649e-06, "loss": 1.1174, "step": 5328 }, { "epoch": 0.1931426914573593, "grad_norm": 2.651126988920226, "learning_rate": 9.318192830807213e-06, "loss": 0.9542, "step": 5329 }, { "epoch": 0.19317893516001594, "grad_norm": 2.4827176660857346, "learning_rate": 9.317896923715853e-06, "loss": 1.1272, "step": 5330 }, { "epoch": 0.19321517886267262, "grad_norm": 2.1496232042697665, "learning_rate": 9.31760095712648e-06, "loss": 0.7095, "step": 5331 }, { "epoch": 0.19325142256532926, "grad_norm": 2.2781644238411003, "learning_rate": 9.31730493104318e-06, "loss": 1.0545, "step": 5332 }, { "epoch": 0.19328766626798594, "grad_norm": 2.4057251327079716, "learning_rate": 9.317008845470028e-06, "loss": 0.9007, "step": 5333 }, { "epoch": 0.1933239099706426, "grad_norm": 2.4560321353496133, "learning_rate": 9.316712700411107e-06, "loss": 1.116, "step": 5334 }, { "epoch": 0.19336015367329926, "grad_norm": 2.3867214076157164, "learning_rate": 9.316416495870494e-06, "loss": 0.8936, "step": 5335 }, { "epoch": 0.19339639737595593, "grad_norm": 2.154093268241101, "learning_rate": 9.316120231852273e-06, "loss": 0.9179, "step": 5336 }, { "epoch": 0.1934326410786126, "grad_norm": 2.7679200604788496, "learning_rate": 9.315823908360524e-06, "loss": 1.1005, "step": 5337 }, { "epoch": 0.19346888478126925, "grad_norm": 2.6235546773261733, "learning_rate": 9.315527525399333e-06, "loss": 1.1228, "step": 5338 }, { "epoch": 0.19350512848392593, "grad_norm": 2.1915655367706877, "learning_rate": 9.315231082972783e-06, "loss": 1.021, "step": 5339 }, { "epoch": 0.19354137218658257, "grad_norm": 2.258648379815747, "learning_rate": 9.314934581084957e-06, "loss": 1.0082, "step": 5340 }, { "epoch": 0.19357761588923925, "grad_norm": 2.9194580547878446, "learning_rate": 9.314638019739941e-06, "loss": 1.0605, "step": 5341 }, { "epoch": 0.19361385959189592, "grad_norm": 2.128707362151793, "learning_rate": 9.314341398941826e-06, "loss": 1.0893, "step": 5342 }, { "epoch": 0.19365010329455257, "grad_norm": 2.6162930007392458, "learning_rate": 9.314044718694692e-06, "loss": 0.9599, "step": 5343 }, { "epoch": 0.19368634699720924, "grad_norm": 2.6975035236213065, "learning_rate": 9.313747979002634e-06, "loss": 1.0157, "step": 5344 }, { "epoch": 0.1937225906998659, "grad_norm": 2.096851211331336, "learning_rate": 9.313451179869738e-06, "loss": 0.896, "step": 5345 }, { "epoch": 0.19375883440252256, "grad_norm": 2.5987286535099887, "learning_rate": 9.31315432130009e-06, "loss": 1.0515, "step": 5346 }, { "epoch": 0.19379507810517924, "grad_norm": 2.5732469967684173, "learning_rate": 9.312857403297786e-06, "loss": 0.8283, "step": 5347 }, { "epoch": 0.19383132180783588, "grad_norm": 2.36130983073605, "learning_rate": 9.312560425866916e-06, "loss": 1.0144, "step": 5348 }, { "epoch": 0.19386756551049256, "grad_norm": 2.4152546780377113, "learning_rate": 9.312263389011571e-06, "loss": 1.0074, "step": 5349 }, { "epoch": 0.1939038092131492, "grad_norm": 2.237972132176981, "learning_rate": 9.311966292735844e-06, "loss": 0.9282, "step": 5350 }, { "epoch": 0.19394005291580588, "grad_norm": 2.6513627813590825, "learning_rate": 9.311669137043828e-06, "loss": 0.9036, "step": 5351 }, { "epoch": 0.19397629661846255, "grad_norm": 2.3687587179838436, "learning_rate": 9.311371921939621e-06, "loss": 1.0973, "step": 5352 }, { "epoch": 0.1940125403211192, "grad_norm": 2.308089357695537, "learning_rate": 9.311074647427316e-06, "loss": 0.974, "step": 5353 }, { "epoch": 0.19404878402377587, "grad_norm": 2.176739866848914, "learning_rate": 9.31077731351101e-06, "loss": 0.8182, "step": 5354 }, { "epoch": 0.19408502772643252, "grad_norm": 2.488930021043764, "learning_rate": 9.310479920194799e-06, "loss": 1.0842, "step": 5355 }, { "epoch": 0.1941212714290892, "grad_norm": 2.397207053696935, "learning_rate": 9.310182467482781e-06, "loss": 0.959, "step": 5356 }, { "epoch": 0.19415751513174587, "grad_norm": 2.3179087586113987, "learning_rate": 9.309884955379056e-06, "loss": 0.9759, "step": 5357 }, { "epoch": 0.1941937588344025, "grad_norm": 2.5742312989351417, "learning_rate": 9.309587383887723e-06, "loss": 0.896, "step": 5358 }, { "epoch": 0.1942300025370592, "grad_norm": 2.4766271373730575, "learning_rate": 9.309289753012882e-06, "loss": 1.0089, "step": 5359 }, { "epoch": 0.19426624623971586, "grad_norm": 2.5677140685131117, "learning_rate": 9.308992062758634e-06, "loss": 1.0096, "step": 5360 }, { "epoch": 0.1943024899423725, "grad_norm": 2.4416840579929495, "learning_rate": 9.308694313129081e-06, "loss": 0.9174, "step": 5361 }, { "epoch": 0.19433873364502918, "grad_norm": 2.101185584967564, "learning_rate": 9.308396504128328e-06, "loss": 0.8283, "step": 5362 }, { "epoch": 0.19437497734768583, "grad_norm": 2.5383449499106807, "learning_rate": 9.308098635760475e-06, "loss": 0.9406, "step": 5363 }, { "epoch": 0.1944112210503425, "grad_norm": 2.489796505382893, "learning_rate": 9.307800708029629e-06, "loss": 0.8593, "step": 5364 }, { "epoch": 0.19444746475299918, "grad_norm": 2.547704276720103, "learning_rate": 9.307502720939895e-06, "loss": 1.0131, "step": 5365 }, { "epoch": 0.19448370845565582, "grad_norm": 2.266762981888309, "learning_rate": 9.307204674495377e-06, "loss": 0.9691, "step": 5366 }, { "epoch": 0.1945199521583125, "grad_norm": 2.555973317258878, "learning_rate": 9.306906568700186e-06, "loss": 0.9708, "step": 5367 }, { "epoch": 0.19455619586096914, "grad_norm": 2.3892763683723803, "learning_rate": 9.306608403558426e-06, "loss": 1.0407, "step": 5368 }, { "epoch": 0.19459243956362582, "grad_norm": 2.7441545125016558, "learning_rate": 9.306310179074207e-06, "loss": 1.0081, "step": 5369 }, { "epoch": 0.1946286832662825, "grad_norm": 2.455303764439154, "learning_rate": 9.306011895251637e-06, "loss": 0.9349, "step": 5370 }, { "epoch": 0.19466492696893914, "grad_norm": 2.560870266867471, "learning_rate": 9.305713552094828e-06, "loss": 0.986, "step": 5371 }, { "epoch": 0.1947011706715958, "grad_norm": 2.382070491265631, "learning_rate": 9.30541514960789e-06, "loss": 0.8991, "step": 5372 }, { "epoch": 0.19473741437425246, "grad_norm": 2.3537632620370776, "learning_rate": 9.305116687794935e-06, "loss": 1.1223, "step": 5373 }, { "epoch": 0.19477365807690913, "grad_norm": 2.295547324682327, "learning_rate": 9.304818166660077e-06, "loss": 1.0442, "step": 5374 }, { "epoch": 0.1948099017795658, "grad_norm": 2.247752021554279, "learning_rate": 9.304519586207426e-06, "loss": 0.8838, "step": 5375 }, { "epoch": 0.19484614548222245, "grad_norm": 2.41618407750146, "learning_rate": 9.304220946441101e-06, "loss": 1.1056, "step": 5376 }, { "epoch": 0.19488238918487913, "grad_norm": 2.411970642021393, "learning_rate": 9.303922247365211e-06, "loss": 1.0513, "step": 5377 }, { "epoch": 0.1949186328875358, "grad_norm": 2.658454827484053, "learning_rate": 9.303623488983879e-06, "loss": 1.0366, "step": 5378 }, { "epoch": 0.19495487659019245, "grad_norm": 2.2169684606788094, "learning_rate": 9.303324671301216e-06, "loss": 1.0033, "step": 5379 }, { "epoch": 0.19499112029284912, "grad_norm": 2.4933422886531456, "learning_rate": 9.30302579432134e-06, "loss": 0.8149, "step": 5380 }, { "epoch": 0.19502736399550577, "grad_norm": 2.530268840182911, "learning_rate": 9.302726858048375e-06, "loss": 0.9015, "step": 5381 }, { "epoch": 0.19506360769816244, "grad_norm": 2.663896448812345, "learning_rate": 9.302427862486435e-06, "loss": 1.1539, "step": 5382 }, { "epoch": 0.19509985140081912, "grad_norm": 2.482891000181203, "learning_rate": 9.302128807639639e-06, "loss": 1.0627, "step": 5383 }, { "epoch": 0.19513609510347577, "grad_norm": 2.5474731895589398, "learning_rate": 9.30182969351211e-06, "loss": 1.0342, "step": 5384 }, { "epoch": 0.19517233880613244, "grad_norm": 2.544365233131096, "learning_rate": 9.301530520107972e-06, "loss": 0.9651, "step": 5385 }, { "epoch": 0.19520858250878909, "grad_norm": 2.441037677008101, "learning_rate": 9.301231287431342e-06, "loss": 0.9492, "step": 5386 }, { "epoch": 0.19524482621144576, "grad_norm": 2.324091628846524, "learning_rate": 9.300931995486348e-06, "loss": 1.0451, "step": 5387 }, { "epoch": 0.19528106991410243, "grad_norm": 2.4471027841234343, "learning_rate": 9.300632644277112e-06, "loss": 1.1377, "step": 5388 }, { "epoch": 0.19531731361675908, "grad_norm": 2.210076288382214, "learning_rate": 9.30033323380776e-06, "loss": 0.9293, "step": 5389 }, { "epoch": 0.19535355731941575, "grad_norm": 2.624542262810147, "learning_rate": 9.300033764082414e-06, "loss": 0.9555, "step": 5390 }, { "epoch": 0.1953898010220724, "grad_norm": 2.307667913670196, "learning_rate": 9.299734235105206e-06, "loss": 0.9459, "step": 5391 }, { "epoch": 0.19542604472472908, "grad_norm": 2.4515215250712825, "learning_rate": 9.299434646880259e-06, "loss": 0.8536, "step": 5392 }, { "epoch": 0.19546228842738575, "grad_norm": 2.3446905394063533, "learning_rate": 9.299134999411703e-06, "loss": 0.9863, "step": 5393 }, { "epoch": 0.1954985321300424, "grad_norm": 2.3321600429946754, "learning_rate": 9.298835292703667e-06, "loss": 0.9429, "step": 5394 }, { "epoch": 0.19553477583269907, "grad_norm": 2.4450946808789076, "learning_rate": 9.298535526760281e-06, "loss": 0.9708, "step": 5395 }, { "epoch": 0.19557101953535574, "grad_norm": 2.555304299254709, "learning_rate": 9.298235701585675e-06, "loss": 1.1318, "step": 5396 }, { "epoch": 0.1956072632380124, "grad_norm": 2.1422148421369234, "learning_rate": 9.29793581718398e-06, "loss": 0.8964, "step": 5397 }, { "epoch": 0.19564350694066907, "grad_norm": 2.436572410673081, "learning_rate": 9.29763587355933e-06, "loss": 0.8777, "step": 5398 }, { "epoch": 0.1956797506433257, "grad_norm": 2.6642016477120842, "learning_rate": 9.297335870715855e-06, "loss": 0.896, "step": 5399 }, { "epoch": 0.19571599434598239, "grad_norm": 2.454102701387985, "learning_rate": 9.297035808657691e-06, "loss": 1.1175, "step": 5400 }, { "epoch": 0.19575223804863906, "grad_norm": 2.3773313859517793, "learning_rate": 9.296735687388974e-06, "loss": 1.0135, "step": 5401 }, { "epoch": 0.1957884817512957, "grad_norm": 2.3802196178494324, "learning_rate": 9.296435506913837e-06, "loss": 1.2487, "step": 5402 }, { "epoch": 0.19582472545395238, "grad_norm": 2.329638600483754, "learning_rate": 9.296135267236416e-06, "loss": 0.884, "step": 5403 }, { "epoch": 0.19586096915660903, "grad_norm": 2.2790978348476387, "learning_rate": 9.29583496836085e-06, "loss": 0.9491, "step": 5404 }, { "epoch": 0.1958972128592657, "grad_norm": 2.5174746024016903, "learning_rate": 9.295534610291276e-06, "loss": 0.8202, "step": 5405 }, { "epoch": 0.19593345656192238, "grad_norm": 2.4436581544806724, "learning_rate": 9.295234193031833e-06, "loss": 0.7475, "step": 5406 }, { "epoch": 0.19596970026457902, "grad_norm": 2.277148450702117, "learning_rate": 9.29493371658666e-06, "loss": 1.0383, "step": 5407 }, { "epoch": 0.1960059439672357, "grad_norm": 2.173790840137131, "learning_rate": 9.2946331809599e-06, "loss": 0.8577, "step": 5408 }, { "epoch": 0.19604218766989234, "grad_norm": 2.6081567220441757, "learning_rate": 9.29433258615569e-06, "loss": 0.9902, "step": 5409 }, { "epoch": 0.19607843137254902, "grad_norm": 2.398074855169584, "learning_rate": 9.294031932178174e-06, "loss": 0.9737, "step": 5410 }, { "epoch": 0.1961146750752057, "grad_norm": 2.1483927462176444, "learning_rate": 9.293731219031496e-06, "loss": 0.9992, "step": 5411 }, { "epoch": 0.19615091877786234, "grad_norm": 2.1740876865105694, "learning_rate": 9.293430446719798e-06, "loss": 0.9669, "step": 5412 }, { "epoch": 0.196187162480519, "grad_norm": 2.5720400635217597, "learning_rate": 9.293129615247225e-06, "loss": 0.9904, "step": 5413 }, { "epoch": 0.19622340618317569, "grad_norm": 2.7785583300016796, "learning_rate": 9.292828724617921e-06, "loss": 1.1247, "step": 5414 }, { "epoch": 0.19625964988583233, "grad_norm": 2.463613873849602, "learning_rate": 9.292527774836035e-06, "loss": 1.1749, "step": 5415 }, { "epoch": 0.196295893588489, "grad_norm": 2.0800140735310317, "learning_rate": 9.292226765905714e-06, "loss": 0.9483, "step": 5416 }, { "epoch": 0.19633213729114565, "grad_norm": 2.149466265787852, "learning_rate": 9.2919256978311e-06, "loss": 1.1621, "step": 5417 }, { "epoch": 0.19636838099380233, "grad_norm": 2.6866086788513166, "learning_rate": 9.291624570616349e-06, "loss": 0.974, "step": 5418 }, { "epoch": 0.196404624696459, "grad_norm": 2.449746988775646, "learning_rate": 9.291323384265605e-06, "loss": 0.8617, "step": 5419 }, { "epoch": 0.19644086839911565, "grad_norm": 2.4837100153805354, "learning_rate": 9.291022138783021e-06, "loss": 1.0375, "step": 5420 }, { "epoch": 0.19647711210177232, "grad_norm": 2.154421751778152, "learning_rate": 9.290720834172748e-06, "loss": 0.9366, "step": 5421 }, { "epoch": 0.19651335580442897, "grad_norm": 2.613050104987707, "learning_rate": 9.290419470438935e-06, "loss": 0.8877, "step": 5422 }, { "epoch": 0.19654959950708564, "grad_norm": 2.311438684965666, "learning_rate": 9.290118047585737e-06, "loss": 0.9386, "step": 5423 }, { "epoch": 0.19658584320974232, "grad_norm": 2.319126730433157, "learning_rate": 9.289816565617307e-06, "loss": 0.7798, "step": 5424 }, { "epoch": 0.19662208691239896, "grad_norm": 2.420720830417439, "learning_rate": 9.289515024537798e-06, "loss": 0.9617, "step": 5425 }, { "epoch": 0.19665833061505564, "grad_norm": 2.3686156905015463, "learning_rate": 9.289213424351368e-06, "loss": 1.009, "step": 5426 }, { "epoch": 0.19669457431771228, "grad_norm": 2.3669019450299444, "learning_rate": 9.28891176506217e-06, "loss": 1.0011, "step": 5427 }, { "epoch": 0.19673081802036896, "grad_norm": 2.112015364383687, "learning_rate": 9.288610046674362e-06, "loss": 0.9431, "step": 5428 }, { "epoch": 0.19676706172302563, "grad_norm": 2.2620424786040703, "learning_rate": 9.288308269192101e-06, "loss": 0.7903, "step": 5429 }, { "epoch": 0.19680330542568228, "grad_norm": 2.479401937147214, "learning_rate": 9.288006432619545e-06, "loss": 0.9217, "step": 5430 }, { "epoch": 0.19683954912833895, "grad_norm": 2.4719609436388987, "learning_rate": 9.287704536960855e-06, "loss": 1.0961, "step": 5431 }, { "epoch": 0.19687579283099563, "grad_norm": 2.393464331865627, "learning_rate": 9.28740258222019e-06, "loss": 1.033, "step": 5432 }, { "epoch": 0.19691203653365227, "grad_norm": 2.449277061751199, "learning_rate": 9.28710056840171e-06, "loss": 1.1636, "step": 5433 }, { "epoch": 0.19694828023630895, "grad_norm": 2.4638721720575782, "learning_rate": 9.286798495509575e-06, "loss": 0.8479, "step": 5434 }, { "epoch": 0.1969845239389656, "grad_norm": 2.4275137988138997, "learning_rate": 9.28649636354795e-06, "loss": 0.9834, "step": 5435 }, { "epoch": 0.19702076764162227, "grad_norm": 2.756365568838535, "learning_rate": 9.286194172520999e-06, "loss": 0.9463, "step": 5436 }, { "epoch": 0.19705701134427894, "grad_norm": 2.9023775053493575, "learning_rate": 9.285891922432883e-06, "loss": 0.8678, "step": 5437 }, { "epoch": 0.1970932550469356, "grad_norm": 2.34770671676331, "learning_rate": 9.28558961328777e-06, "loss": 0.8183, "step": 5438 }, { "epoch": 0.19712949874959226, "grad_norm": 2.5011362405209074, "learning_rate": 9.285287245089822e-06, "loss": 1.0705, "step": 5439 }, { "epoch": 0.1971657424522489, "grad_norm": 2.4088825281222523, "learning_rate": 9.284984817843209e-06, "loss": 1.0452, "step": 5440 }, { "epoch": 0.19720198615490558, "grad_norm": 2.452968500722293, "learning_rate": 9.284682331552096e-06, "loss": 0.9295, "step": 5441 }, { "epoch": 0.19723822985756226, "grad_norm": 2.404589369844872, "learning_rate": 9.284379786220652e-06, "loss": 0.8467, "step": 5442 }, { "epoch": 0.1972744735602189, "grad_norm": 2.579118824416557, "learning_rate": 9.284077181853044e-06, "loss": 1.0509, "step": 5443 }, { "epoch": 0.19731071726287558, "grad_norm": 2.154243841932733, "learning_rate": 9.283774518453446e-06, "loss": 0.9002, "step": 5444 }, { "epoch": 0.19734696096553223, "grad_norm": 2.4605974824891272, "learning_rate": 9.283471796026025e-06, "loss": 1.04, "step": 5445 }, { "epoch": 0.1973832046681889, "grad_norm": 2.721985080849628, "learning_rate": 9.283169014574953e-06, "loss": 1.0228, "step": 5446 }, { "epoch": 0.19741944837084557, "grad_norm": 2.1620357140312847, "learning_rate": 9.282866174104401e-06, "loss": 0.8863, "step": 5447 }, { "epoch": 0.19745569207350222, "grad_norm": 2.1066727428454466, "learning_rate": 9.282563274618546e-06, "loss": 0.8794, "step": 5448 }, { "epoch": 0.1974919357761589, "grad_norm": 2.5028401126443005, "learning_rate": 9.282260316121556e-06, "loss": 0.9575, "step": 5449 }, { "epoch": 0.19752817947881557, "grad_norm": 2.435277514531445, "learning_rate": 9.28195729861761e-06, "loss": 1.0666, "step": 5450 }, { "epoch": 0.19756442318147222, "grad_norm": 2.3670296855382498, "learning_rate": 9.281654222110884e-06, "loss": 0.9186, "step": 5451 }, { "epoch": 0.1976006668841289, "grad_norm": 2.420749846792263, "learning_rate": 9.28135108660555e-06, "loss": 1.0498, "step": 5452 }, { "epoch": 0.19763691058678554, "grad_norm": 2.4786749789561866, "learning_rate": 9.281047892105788e-06, "loss": 1.0726, "step": 5453 }, { "epoch": 0.1976731542894422, "grad_norm": 2.5902719570622357, "learning_rate": 9.280744638615775e-06, "loss": 0.8635, "step": 5454 }, { "epoch": 0.19770939799209888, "grad_norm": 2.8158149515124755, "learning_rate": 9.280441326139691e-06, "loss": 0.9456, "step": 5455 }, { "epoch": 0.19774564169475553, "grad_norm": 2.2894542862766953, "learning_rate": 9.280137954681713e-06, "loss": 0.9577, "step": 5456 }, { "epoch": 0.1977818853974122, "grad_norm": 2.5195318304233933, "learning_rate": 9.279834524246023e-06, "loss": 1.088, "step": 5457 }, { "epoch": 0.19781812910006885, "grad_norm": 2.692745300660131, "learning_rate": 9.279531034836801e-06, "loss": 1.0032, "step": 5458 }, { "epoch": 0.19785437280272553, "grad_norm": 2.279875440452133, "learning_rate": 9.27922748645823e-06, "loss": 0.8803, "step": 5459 }, { "epoch": 0.1978906165053822, "grad_norm": 2.1258907833085505, "learning_rate": 9.278923879114493e-06, "loss": 1.0596, "step": 5460 }, { "epoch": 0.19792686020803885, "grad_norm": 2.3634344740112674, "learning_rate": 9.27862021280977e-06, "loss": 0.8218, "step": 5461 }, { "epoch": 0.19796310391069552, "grad_norm": 2.2579701811927775, "learning_rate": 9.27831648754825e-06, "loss": 0.8572, "step": 5462 }, { "epoch": 0.19799934761335217, "grad_norm": 2.5008733095981426, "learning_rate": 9.278012703334115e-06, "loss": 0.916, "step": 5463 }, { "epoch": 0.19803559131600884, "grad_norm": 2.341272941364868, "learning_rate": 9.277708860171554e-06, "loss": 0.9972, "step": 5464 }, { "epoch": 0.19807183501866552, "grad_norm": 2.502919739125161, "learning_rate": 9.277404958064753e-06, "loss": 0.8997, "step": 5465 }, { "epoch": 0.19810807872132216, "grad_norm": 2.3065475619979323, "learning_rate": 9.277100997017896e-06, "loss": 0.7503, "step": 5466 }, { "epoch": 0.19814432242397884, "grad_norm": 2.5902345761760333, "learning_rate": 9.276796977035176e-06, "loss": 0.9007, "step": 5467 }, { "epoch": 0.1981805661266355, "grad_norm": 2.6529520736061456, "learning_rate": 9.276492898120779e-06, "loss": 1.0761, "step": 5468 }, { "epoch": 0.19821680982929216, "grad_norm": 2.4910650931111866, "learning_rate": 9.276188760278894e-06, "loss": 0.8637, "step": 5469 }, { "epoch": 0.19825305353194883, "grad_norm": 2.479515243093042, "learning_rate": 9.275884563513717e-06, "loss": 0.7954, "step": 5470 }, { "epoch": 0.19828929723460548, "grad_norm": 2.397326866457031, "learning_rate": 9.275580307829437e-06, "loss": 0.9299, "step": 5471 }, { "epoch": 0.19832554093726215, "grad_norm": 2.2157201778767295, "learning_rate": 9.275275993230245e-06, "loss": 1.0507, "step": 5472 }, { "epoch": 0.19836178463991883, "grad_norm": 2.2460999666875976, "learning_rate": 9.274971619720335e-06, "loss": 1.1052, "step": 5473 }, { "epoch": 0.19839802834257547, "grad_norm": 2.708156119604731, "learning_rate": 9.274667187303904e-06, "loss": 0.9069, "step": 5474 }, { "epoch": 0.19843427204523215, "grad_norm": 2.279194131777823, "learning_rate": 9.274362695985141e-06, "loss": 1.0161, "step": 5475 }, { "epoch": 0.1984705157478888, "grad_norm": 2.4196647264145446, "learning_rate": 9.274058145768246e-06, "loss": 0.9785, "step": 5476 }, { "epoch": 0.19850675945054547, "grad_norm": 2.2690124260363818, "learning_rate": 9.273753536657416e-06, "loss": 1.036, "step": 5477 }, { "epoch": 0.19854300315320214, "grad_norm": 2.3322320609818945, "learning_rate": 9.273448868656846e-06, "loss": 1.0202, "step": 5478 }, { "epoch": 0.1985792468558588, "grad_norm": 2.270905712658038, "learning_rate": 9.273144141770737e-06, "loss": 1.0021, "step": 5479 }, { "epoch": 0.19861549055851546, "grad_norm": 2.595173623037012, "learning_rate": 9.272839356003283e-06, "loss": 0.9312, "step": 5480 }, { "epoch": 0.1986517342611721, "grad_norm": 2.428584699917811, "learning_rate": 9.272534511358687e-06, "loss": 0.8964, "step": 5481 }, { "epoch": 0.19868797796382878, "grad_norm": 2.4782119415111477, "learning_rate": 9.272229607841151e-06, "loss": 0.79, "step": 5482 }, { "epoch": 0.19872422166648546, "grad_norm": 2.460817766667154, "learning_rate": 9.271924645454873e-06, "loss": 0.887, "step": 5483 }, { "epoch": 0.1987604653691421, "grad_norm": 2.203110970385544, "learning_rate": 9.271619624204059e-06, "loss": 0.8438, "step": 5484 }, { "epoch": 0.19879670907179878, "grad_norm": 2.4941579119627217, "learning_rate": 9.27131454409291e-06, "loss": 1.0285, "step": 5485 }, { "epoch": 0.19883295277445545, "grad_norm": 2.361798363582361, "learning_rate": 9.271009405125628e-06, "loss": 0.9622, "step": 5486 }, { "epoch": 0.1988691964771121, "grad_norm": 2.5311963329369003, "learning_rate": 9.27070420730642e-06, "loss": 0.7693, "step": 5487 }, { "epoch": 0.19890544017976877, "grad_norm": 2.3757267769188806, "learning_rate": 9.270398950639491e-06, "loss": 0.8736, "step": 5488 }, { "epoch": 0.19894168388242542, "grad_norm": 2.474909790591335, "learning_rate": 9.270093635129048e-06, "loss": 0.7836, "step": 5489 }, { "epoch": 0.1989779275850821, "grad_norm": 1.9655796832669874, "learning_rate": 9.269788260779297e-06, "loss": 0.8348, "step": 5490 }, { "epoch": 0.19901417128773877, "grad_norm": 2.690778352302211, "learning_rate": 9.269482827594445e-06, "loss": 0.9849, "step": 5491 }, { "epoch": 0.1990504149903954, "grad_norm": 2.434332601734033, "learning_rate": 9.269177335578704e-06, "loss": 0.9794, "step": 5492 }, { "epoch": 0.1990866586930521, "grad_norm": 2.502784899341436, "learning_rate": 9.268871784736277e-06, "loss": 0.9785, "step": 5493 }, { "epoch": 0.19912290239570873, "grad_norm": 2.549454694428103, "learning_rate": 9.268566175071383e-06, "loss": 0.9128, "step": 5494 }, { "epoch": 0.1991591460983654, "grad_norm": 2.291214566227521, "learning_rate": 9.268260506588227e-06, "loss": 0.7554, "step": 5495 }, { "epoch": 0.19919538980102208, "grad_norm": 2.051796795029409, "learning_rate": 9.267954779291022e-06, "loss": 0.8968, "step": 5496 }, { "epoch": 0.19923163350367873, "grad_norm": 2.4116719859447318, "learning_rate": 9.267648993183982e-06, "loss": 1.0256, "step": 5497 }, { "epoch": 0.1992678772063354, "grad_norm": 2.426968960063452, "learning_rate": 9.267343148271318e-06, "loss": 1.0914, "step": 5498 }, { "epoch": 0.19930412090899205, "grad_norm": 2.1410927089129994, "learning_rate": 9.267037244557249e-06, "loss": 0.8804, "step": 5499 }, { "epoch": 0.19934036461164872, "grad_norm": 2.3831107807290675, "learning_rate": 9.266731282045985e-06, "loss": 0.9524, "step": 5500 }, { "epoch": 0.1993766083143054, "grad_norm": 2.4451237643794066, "learning_rate": 9.266425260741746e-06, "loss": 1.0174, "step": 5501 }, { "epoch": 0.19941285201696204, "grad_norm": 2.367401213858372, "learning_rate": 9.266119180648747e-06, "loss": 0.955, "step": 5502 }, { "epoch": 0.19944909571961872, "grad_norm": 2.270938242879503, "learning_rate": 9.265813041771204e-06, "loss": 0.8713, "step": 5503 }, { "epoch": 0.1994853394222754, "grad_norm": 2.2673804321614845, "learning_rate": 9.26550684411334e-06, "loss": 1.111, "step": 5504 }, { "epoch": 0.19952158312493204, "grad_norm": 2.0740580729534828, "learning_rate": 9.265200587679369e-06, "loss": 0.8596, "step": 5505 }, { "epoch": 0.1995578268275887, "grad_norm": 2.508703242385435, "learning_rate": 9.264894272473515e-06, "loss": 0.9791, "step": 5506 }, { "epoch": 0.19959407053024536, "grad_norm": 2.1973411835859973, "learning_rate": 9.264587898499996e-06, "loss": 1.1085, "step": 5507 }, { "epoch": 0.19963031423290203, "grad_norm": 2.7692373907637906, "learning_rate": 9.264281465763036e-06, "loss": 1.0751, "step": 5508 }, { "epoch": 0.1996665579355587, "grad_norm": 2.3670010389254807, "learning_rate": 9.263974974266856e-06, "loss": 0.9883, "step": 5509 }, { "epoch": 0.19970280163821535, "grad_norm": 2.4778577162147264, "learning_rate": 9.26366842401568e-06, "loss": 1.0241, "step": 5510 }, { "epoch": 0.19973904534087203, "grad_norm": 2.3042749736460966, "learning_rate": 9.26336181501373e-06, "loss": 1.0319, "step": 5511 }, { "epoch": 0.19977528904352868, "grad_norm": 2.686273454247506, "learning_rate": 9.263055147265234e-06, "loss": 0.9517, "step": 5512 }, { "epoch": 0.19981153274618535, "grad_norm": 2.6333817112468156, "learning_rate": 9.262748420774416e-06, "loss": 0.9764, "step": 5513 }, { "epoch": 0.19984777644884202, "grad_norm": 2.3859590827881787, "learning_rate": 9.262441635545502e-06, "loss": 0.8597, "step": 5514 }, { "epoch": 0.19988402015149867, "grad_norm": 2.442629778713423, "learning_rate": 9.262134791582722e-06, "loss": 1.065, "step": 5515 }, { "epoch": 0.19992026385415534, "grad_norm": 2.4044376592594703, "learning_rate": 9.2618278888903e-06, "loss": 1.0062, "step": 5516 }, { "epoch": 0.199956507556812, "grad_norm": 2.4678262304744494, "learning_rate": 9.261520927472469e-06, "loss": 0.994, "step": 5517 }, { "epoch": 0.19999275125946867, "grad_norm": 2.60875781653288, "learning_rate": 9.261213907333457e-06, "loss": 0.9894, "step": 5518 }, { "epoch": 0.20002899496212534, "grad_norm": 2.5949369485384235, "learning_rate": 9.260906828477494e-06, "loss": 0.9162, "step": 5519 }, { "epoch": 0.20006523866478199, "grad_norm": 2.4290972535434516, "learning_rate": 9.260599690908812e-06, "loss": 0.8677, "step": 5520 }, { "epoch": 0.20010148236743866, "grad_norm": 2.2430340934612656, "learning_rate": 9.26029249463164e-06, "loss": 1.1282, "step": 5521 }, { "epoch": 0.20013772607009533, "grad_norm": 2.6407093564487596, "learning_rate": 9.259985239650217e-06, "loss": 0.9066, "step": 5522 }, { "epoch": 0.20017396977275198, "grad_norm": 2.3376058900474326, "learning_rate": 9.259677925968774e-06, "loss": 1.0753, "step": 5523 }, { "epoch": 0.20021021347540865, "grad_norm": 2.347591497673065, "learning_rate": 9.259370553591545e-06, "loss": 1.0124, "step": 5524 }, { "epoch": 0.2002464571780653, "grad_norm": 2.532385159695746, "learning_rate": 9.259063122522765e-06, "loss": 1.0523, "step": 5525 }, { "epoch": 0.20028270088072198, "grad_norm": 2.2476723001103833, "learning_rate": 9.258755632766672e-06, "loss": 0.85, "step": 5526 }, { "epoch": 0.20031894458337865, "grad_norm": 2.5639112387506486, "learning_rate": 9.2584480843275e-06, "loss": 1.1464, "step": 5527 }, { "epoch": 0.2003551882860353, "grad_norm": 2.1390188721865613, "learning_rate": 9.25814047720949e-06, "loss": 0.7925, "step": 5528 }, { "epoch": 0.20039143198869197, "grad_norm": 2.2215460587308105, "learning_rate": 9.257832811416881e-06, "loss": 0.9894, "step": 5529 }, { "epoch": 0.20042767569134862, "grad_norm": 3.2324983944494403, "learning_rate": 9.25752508695391e-06, "loss": 0.8502, "step": 5530 }, { "epoch": 0.2004639193940053, "grad_norm": 2.347203374502992, "learning_rate": 9.257217303824816e-06, "loss": 0.9285, "step": 5531 }, { "epoch": 0.20050016309666197, "grad_norm": 2.716486423457673, "learning_rate": 9.256909462033844e-06, "loss": 0.9883, "step": 5532 }, { "epoch": 0.2005364067993186, "grad_norm": 2.2805204016834675, "learning_rate": 9.256601561585234e-06, "loss": 0.9875, "step": 5533 }, { "epoch": 0.20057265050197529, "grad_norm": 2.464429590857931, "learning_rate": 9.256293602483228e-06, "loss": 0.8902, "step": 5534 }, { "epoch": 0.20060889420463193, "grad_norm": 2.363026801809358, "learning_rate": 9.255985584732073e-06, "loss": 0.9832, "step": 5535 }, { "epoch": 0.2006451379072886, "grad_norm": 2.334693963592649, "learning_rate": 9.255677508336007e-06, "loss": 0.946, "step": 5536 }, { "epoch": 0.20068138160994528, "grad_norm": 2.2875066443726157, "learning_rate": 9.255369373299281e-06, "loss": 1.0933, "step": 5537 }, { "epoch": 0.20071762531260193, "grad_norm": 2.602269800067794, "learning_rate": 9.255061179626137e-06, "loss": 0.9022, "step": 5538 }, { "epoch": 0.2007538690152586, "grad_norm": 2.2992019567622175, "learning_rate": 9.254752927320826e-06, "loss": 0.8447, "step": 5539 }, { "epoch": 0.20079011271791528, "grad_norm": 2.487647805963802, "learning_rate": 9.25444461638759e-06, "loss": 1.1093, "step": 5540 }, { "epoch": 0.20082635642057192, "grad_norm": 2.2181208184236914, "learning_rate": 9.254136246830681e-06, "loss": 0.8857, "step": 5541 }, { "epoch": 0.2008626001232286, "grad_norm": 2.4213299503679298, "learning_rate": 9.253827818654348e-06, "loss": 0.879, "step": 5542 }, { "epoch": 0.20089884382588524, "grad_norm": 2.5697621435471745, "learning_rate": 9.25351933186284e-06, "loss": 0.963, "step": 5543 }, { "epoch": 0.20093508752854192, "grad_norm": 2.341719631672521, "learning_rate": 9.253210786460409e-06, "loss": 0.7734, "step": 5544 }, { "epoch": 0.2009713312311986, "grad_norm": 2.207002383521942, "learning_rate": 9.252902182451305e-06, "loss": 0.9142, "step": 5545 }, { "epoch": 0.20100757493385524, "grad_norm": 2.500347014258434, "learning_rate": 9.252593519839779e-06, "loss": 0.8125, "step": 5546 }, { "epoch": 0.2010438186365119, "grad_norm": 2.57558030175937, "learning_rate": 9.252284798630089e-06, "loss": 0.8582, "step": 5547 }, { "epoch": 0.20108006233916856, "grad_norm": 2.424826774987576, "learning_rate": 9.251976018826484e-06, "loss": 0.7323, "step": 5548 }, { "epoch": 0.20111630604182523, "grad_norm": 2.3185856223832064, "learning_rate": 9.251667180433223e-06, "loss": 0.8754, "step": 5549 }, { "epoch": 0.2011525497444819, "grad_norm": 2.2697527662360057, "learning_rate": 9.251358283454557e-06, "loss": 0.8774, "step": 5550 }, { "epoch": 0.20118879344713855, "grad_norm": 2.1779496567469163, "learning_rate": 9.251049327894747e-06, "loss": 1.2236, "step": 5551 }, { "epoch": 0.20122503714979523, "grad_norm": 2.3417284231067317, "learning_rate": 9.250740313758048e-06, "loss": 1.0874, "step": 5552 }, { "epoch": 0.20126128085245187, "grad_norm": 2.178160014107487, "learning_rate": 9.250431241048718e-06, "loss": 0.8515, "step": 5553 }, { "epoch": 0.20129752455510855, "grad_norm": 2.449343150808474, "learning_rate": 9.250122109771016e-06, "loss": 0.9365, "step": 5554 }, { "epoch": 0.20133376825776522, "grad_norm": 2.519286113241556, "learning_rate": 9.249812919929203e-06, "loss": 0.874, "step": 5555 }, { "epoch": 0.20137001196042187, "grad_norm": 2.483663149436272, "learning_rate": 9.249503671527536e-06, "loss": 1.0136, "step": 5556 }, { "epoch": 0.20140625566307854, "grad_norm": 2.456175473362847, "learning_rate": 9.24919436457028e-06, "loss": 1.1416, "step": 5557 }, { "epoch": 0.20144249936573522, "grad_norm": 2.2877329216079443, "learning_rate": 9.248884999061694e-06, "loss": 0.8719, "step": 5558 }, { "epoch": 0.20147874306839186, "grad_norm": 2.125969458388955, "learning_rate": 9.248575575006044e-06, "loss": 0.8304, "step": 5559 }, { "epoch": 0.20151498677104854, "grad_norm": 2.4396026410705645, "learning_rate": 9.24826609240759e-06, "loss": 0.872, "step": 5560 }, { "epoch": 0.20155123047370518, "grad_norm": 2.1705039514987505, "learning_rate": 9.2479565512706e-06, "loss": 0.9348, "step": 5561 }, { "epoch": 0.20158747417636186, "grad_norm": 2.4396163357331213, "learning_rate": 9.247646951599338e-06, "loss": 1.0957, "step": 5562 }, { "epoch": 0.20162371787901853, "grad_norm": 2.5367781655522452, "learning_rate": 9.247337293398069e-06, "loss": 0.8804, "step": 5563 }, { "epoch": 0.20165996158167518, "grad_norm": 2.4354032860140626, "learning_rate": 9.24702757667106e-06, "loss": 0.7955, "step": 5564 }, { "epoch": 0.20169620528433185, "grad_norm": 2.619174592110256, "learning_rate": 9.246717801422582e-06, "loss": 1.0232, "step": 5565 }, { "epoch": 0.2017324489869885, "grad_norm": 2.30318167838612, "learning_rate": 9.246407967656901e-06, "loss": 0.888, "step": 5566 }, { "epoch": 0.20176869268964517, "grad_norm": 2.7814885830758866, "learning_rate": 9.246098075378284e-06, "loss": 0.8563, "step": 5567 }, { "epoch": 0.20180493639230185, "grad_norm": 2.401607157120523, "learning_rate": 9.245788124591006e-06, "loss": 0.9376, "step": 5568 }, { "epoch": 0.2018411800949585, "grad_norm": 2.3465555045398823, "learning_rate": 9.245478115299334e-06, "loss": 0.9526, "step": 5569 }, { "epoch": 0.20187742379761517, "grad_norm": 2.377507623598463, "learning_rate": 9.24516804750754e-06, "loss": 0.8695, "step": 5570 }, { "epoch": 0.20191366750027182, "grad_norm": 2.643466269465984, "learning_rate": 9.244857921219899e-06, "loss": 1.0482, "step": 5571 }, { "epoch": 0.2019499112029285, "grad_norm": 2.232376807908484, "learning_rate": 9.244547736440683e-06, "loss": 0.9276, "step": 5572 }, { "epoch": 0.20198615490558516, "grad_norm": 2.4498258233439825, "learning_rate": 9.244237493174166e-06, "loss": 0.9865, "step": 5573 }, { "epoch": 0.2020223986082418, "grad_norm": 2.3838393712018426, "learning_rate": 9.243927191424625e-06, "loss": 0.8635, "step": 5574 }, { "epoch": 0.20205864231089848, "grad_norm": 2.018557053959743, "learning_rate": 9.243616831196331e-06, "loss": 0.8991, "step": 5575 }, { "epoch": 0.20209488601355516, "grad_norm": 2.8431523632159923, "learning_rate": 9.243306412493564e-06, "loss": 1.0988, "step": 5576 }, { "epoch": 0.2021311297162118, "grad_norm": 2.5852277170178404, "learning_rate": 9.242995935320602e-06, "loss": 1.0498, "step": 5577 }, { "epoch": 0.20216737341886848, "grad_norm": 2.398075304961278, "learning_rate": 9.242685399681723e-06, "loss": 1.2289, "step": 5578 }, { "epoch": 0.20220361712152513, "grad_norm": 2.583623792122305, "learning_rate": 9.242374805581203e-06, "loss": 1.0415, "step": 5579 }, { "epoch": 0.2022398608241818, "grad_norm": 2.3607357751580427, "learning_rate": 9.242064153023325e-06, "loss": 0.9147, "step": 5580 }, { "epoch": 0.20227610452683847, "grad_norm": 2.167517766026473, "learning_rate": 9.24175344201237e-06, "loss": 0.9334, "step": 5581 }, { "epoch": 0.20231234822949512, "grad_norm": 2.4999375687328365, "learning_rate": 9.241442672552614e-06, "loss": 0.9227, "step": 5582 }, { "epoch": 0.2023485919321518, "grad_norm": 2.6453662757631253, "learning_rate": 9.241131844648346e-06, "loss": 0.8858, "step": 5583 }, { "epoch": 0.20238483563480844, "grad_norm": 2.8028121427156423, "learning_rate": 9.240820958303845e-06, "loss": 0.9275, "step": 5584 }, { "epoch": 0.20242107933746512, "grad_norm": 2.391456371584975, "learning_rate": 9.240510013523396e-06, "loss": 1.097, "step": 5585 }, { "epoch": 0.2024573230401218, "grad_norm": 2.191960469189596, "learning_rate": 9.240199010311285e-06, "loss": 0.8269, "step": 5586 }, { "epoch": 0.20249356674277844, "grad_norm": 2.658023974488635, "learning_rate": 9.239887948671796e-06, "loss": 0.9791, "step": 5587 }, { "epoch": 0.2025298104454351, "grad_norm": 2.146340588273181, "learning_rate": 9.239576828609213e-06, "loss": 0.9, "step": 5588 }, { "epoch": 0.20256605414809176, "grad_norm": 2.564958623066531, "learning_rate": 9.239265650127827e-06, "loss": 1.0133, "step": 5589 }, { "epoch": 0.20260229785074843, "grad_norm": 2.2781531499015077, "learning_rate": 9.238954413231923e-06, "loss": 0.8811, "step": 5590 }, { "epoch": 0.2026385415534051, "grad_norm": 2.5135309640233316, "learning_rate": 9.238643117925791e-06, "loss": 1.0092, "step": 5591 }, { "epoch": 0.20267478525606175, "grad_norm": 2.3319940185427863, "learning_rate": 9.238331764213722e-06, "loss": 0.9949, "step": 5592 }, { "epoch": 0.20271102895871843, "grad_norm": 2.4965562962393215, "learning_rate": 9.238020352100004e-06, "loss": 0.9977, "step": 5593 }, { "epoch": 0.2027472726613751, "grad_norm": 2.4578704674767247, "learning_rate": 9.237708881588927e-06, "loss": 0.9053, "step": 5594 }, { "epoch": 0.20278351636403175, "grad_norm": 2.5015553580778946, "learning_rate": 9.237397352684786e-06, "loss": 0.8649, "step": 5595 }, { "epoch": 0.20281976006668842, "grad_norm": 2.218930681623873, "learning_rate": 9.23708576539187e-06, "loss": 0.8716, "step": 5596 }, { "epoch": 0.20285600376934507, "grad_norm": 2.20553590229896, "learning_rate": 9.236774119714478e-06, "loss": 0.8162, "step": 5597 }, { "epoch": 0.20289224747200174, "grad_norm": 2.4372626966466067, "learning_rate": 9.236462415656899e-06, "loss": 0.9937, "step": 5598 }, { "epoch": 0.20292849117465842, "grad_norm": 2.2727007025720174, "learning_rate": 9.23615065322343e-06, "loss": 0.9682, "step": 5599 }, { "epoch": 0.20296473487731506, "grad_norm": 2.5131622843577155, "learning_rate": 9.235838832418367e-06, "loss": 0.9882, "step": 5600 }, { "epoch": 0.20300097857997174, "grad_norm": 2.320909242256444, "learning_rate": 9.235526953246008e-06, "loss": 0.9845, "step": 5601 }, { "epoch": 0.20303722228262838, "grad_norm": 2.3361022065918697, "learning_rate": 9.235215015710647e-06, "loss": 0.9058, "step": 5602 }, { "epoch": 0.20307346598528506, "grad_norm": 2.652139744017771, "learning_rate": 9.234903019816587e-06, "loss": 1.0336, "step": 5603 }, { "epoch": 0.20310970968794173, "grad_norm": 2.3667154605903638, "learning_rate": 9.234590965568123e-06, "loss": 0.8333, "step": 5604 }, { "epoch": 0.20314595339059838, "grad_norm": 2.7246922661887854, "learning_rate": 9.234278852969556e-06, "loss": 0.9778, "step": 5605 }, { "epoch": 0.20318219709325505, "grad_norm": 2.6444784863710566, "learning_rate": 9.233966682025188e-06, "loss": 1.0395, "step": 5606 }, { "epoch": 0.2032184407959117, "grad_norm": 2.482428969665402, "learning_rate": 9.23365445273932e-06, "loss": 0.908, "step": 5607 }, { "epoch": 0.20325468449856837, "grad_norm": 2.4428217484629657, "learning_rate": 9.233342165116254e-06, "loss": 0.9662, "step": 5608 }, { "epoch": 0.20329092820122505, "grad_norm": 2.7868714519759923, "learning_rate": 9.233029819160294e-06, "loss": 1.0885, "step": 5609 }, { "epoch": 0.2033271719038817, "grad_norm": 2.4207680497498085, "learning_rate": 9.23271741487574e-06, "loss": 0.927, "step": 5610 }, { "epoch": 0.20336341560653837, "grad_norm": 2.37740333129758, "learning_rate": 9.232404952266902e-06, "loss": 0.8413, "step": 5611 }, { "epoch": 0.20339965930919504, "grad_norm": 2.3904997998266815, "learning_rate": 9.232092431338085e-06, "loss": 1.1355, "step": 5612 }, { "epoch": 0.2034359030118517, "grad_norm": 2.762131527584303, "learning_rate": 9.23177985209359e-06, "loss": 1.1097, "step": 5613 }, { "epoch": 0.20347214671450836, "grad_norm": 2.1809991145393495, "learning_rate": 9.23146721453773e-06, "loss": 0.9689, "step": 5614 }, { "epoch": 0.203508390417165, "grad_norm": 2.6671220941792044, "learning_rate": 9.231154518674813e-06, "loss": 0.9006, "step": 5615 }, { "epoch": 0.20354463411982168, "grad_norm": 2.4399386444941404, "learning_rate": 9.230841764509141e-06, "loss": 0.9429, "step": 5616 }, { "epoch": 0.20358087782247836, "grad_norm": 2.578861616090362, "learning_rate": 9.230528952045033e-06, "loss": 1.0183, "step": 5617 }, { "epoch": 0.203617121525135, "grad_norm": 2.3024030910975135, "learning_rate": 9.230216081286791e-06, "loss": 0.9827, "step": 5618 }, { "epoch": 0.20365336522779168, "grad_norm": 2.3125823903245903, "learning_rate": 9.229903152238732e-06, "loss": 0.8329, "step": 5619 }, { "epoch": 0.20368960893044832, "grad_norm": 2.3883757574102593, "learning_rate": 9.229590164905165e-06, "loss": 1.1129, "step": 5620 }, { "epoch": 0.203725852633105, "grad_norm": 2.63645549919112, "learning_rate": 9.229277119290403e-06, "loss": 0.9994, "step": 5621 }, { "epoch": 0.20376209633576167, "grad_norm": 2.685577216567973, "learning_rate": 9.22896401539876e-06, "loss": 0.9385, "step": 5622 }, { "epoch": 0.20379834003841832, "grad_norm": 2.4490969922156616, "learning_rate": 9.228650853234552e-06, "loss": 1.1297, "step": 5623 }, { "epoch": 0.203834583741075, "grad_norm": 2.6114460574171536, "learning_rate": 9.228337632802091e-06, "loss": 1.0777, "step": 5624 }, { "epoch": 0.20387082744373164, "grad_norm": 2.247418864668117, "learning_rate": 9.228024354105696e-06, "loss": 0.988, "step": 5625 }, { "epoch": 0.2039070711463883, "grad_norm": 2.4632384447837223, "learning_rate": 9.227711017149681e-06, "loss": 1.0735, "step": 5626 }, { "epoch": 0.203943314849045, "grad_norm": 2.1839384286660755, "learning_rate": 9.227397621938366e-06, "loss": 0.8721, "step": 5627 }, { "epoch": 0.20397955855170163, "grad_norm": 2.821517550536971, "learning_rate": 9.227084168476068e-06, "loss": 0.9017, "step": 5628 }, { "epoch": 0.2040158022543583, "grad_norm": 2.3730546994171005, "learning_rate": 9.226770656767105e-06, "loss": 1.1773, "step": 5629 }, { "epoch": 0.20405204595701498, "grad_norm": 2.5205504054570276, "learning_rate": 9.226457086815801e-06, "loss": 0.9198, "step": 5630 }, { "epoch": 0.20408828965967163, "grad_norm": 2.7000610726903633, "learning_rate": 9.226143458626474e-06, "loss": 1.09, "step": 5631 }, { "epoch": 0.2041245333623283, "grad_norm": 2.1782972323745096, "learning_rate": 9.225829772203446e-06, "loss": 0.9259, "step": 5632 }, { "epoch": 0.20416077706498495, "grad_norm": 2.669916204291486, "learning_rate": 9.22551602755104e-06, "loss": 1.1662, "step": 5633 }, { "epoch": 0.20419702076764162, "grad_norm": 2.472795378090797, "learning_rate": 9.225202224673576e-06, "loss": 0.9301, "step": 5634 }, { "epoch": 0.2042332644702983, "grad_norm": 2.611709803109538, "learning_rate": 9.224888363575383e-06, "loss": 1.0654, "step": 5635 }, { "epoch": 0.20426950817295494, "grad_norm": 2.292353580969346, "learning_rate": 9.224574444260783e-06, "loss": 0.8326, "step": 5636 }, { "epoch": 0.20430575187561162, "grad_norm": 2.200109479087053, "learning_rate": 9.224260466734103e-06, "loss": 0.9582, "step": 5637 }, { "epoch": 0.20434199557826827, "grad_norm": 2.2162344406625882, "learning_rate": 9.223946430999667e-06, "loss": 0.8459, "step": 5638 }, { "epoch": 0.20437823928092494, "grad_norm": 2.311930576902758, "learning_rate": 9.223632337061805e-06, "loss": 1.0605, "step": 5639 }, { "epoch": 0.2044144829835816, "grad_norm": 1.86073165236177, "learning_rate": 9.223318184924842e-06, "loss": 0.7953, "step": 5640 }, { "epoch": 0.20445072668623826, "grad_norm": 2.1444865163030054, "learning_rate": 9.22300397459311e-06, "loss": 0.8822, "step": 5641 }, { "epoch": 0.20448697038889493, "grad_norm": 2.359393758217347, "learning_rate": 9.222689706070938e-06, "loss": 0.9272, "step": 5642 }, { "epoch": 0.20452321409155158, "grad_norm": 2.4126019902522073, "learning_rate": 9.222375379362653e-06, "loss": 0.8862, "step": 5643 }, { "epoch": 0.20455945779420825, "grad_norm": 2.3525538249635103, "learning_rate": 9.222060994472593e-06, "loss": 0.9074, "step": 5644 }, { "epoch": 0.20459570149686493, "grad_norm": 2.3607378216439145, "learning_rate": 9.221746551405082e-06, "loss": 0.9429, "step": 5645 }, { "epoch": 0.20463194519952158, "grad_norm": 2.542796297891375, "learning_rate": 9.221432050164458e-06, "loss": 1.015, "step": 5646 }, { "epoch": 0.20466818890217825, "grad_norm": 2.4640978216383536, "learning_rate": 9.221117490755054e-06, "loss": 0.8222, "step": 5647 }, { "epoch": 0.2047044326048349, "grad_norm": 2.301131328344268, "learning_rate": 9.220802873181202e-06, "loss": 0.796, "step": 5648 }, { "epoch": 0.20474067630749157, "grad_norm": 2.3985263138951596, "learning_rate": 9.220488197447242e-06, "loss": 1.1196, "step": 5649 }, { "epoch": 0.20477692001014824, "grad_norm": 2.2362355880999254, "learning_rate": 9.220173463557505e-06, "loss": 1.0342, "step": 5650 }, { "epoch": 0.2048131637128049, "grad_norm": 2.4187986494835227, "learning_rate": 9.219858671516331e-06, "loss": 0.7344, "step": 5651 }, { "epoch": 0.20484940741546157, "grad_norm": 2.137925904026399, "learning_rate": 9.219543821328057e-06, "loss": 0.8276, "step": 5652 }, { "epoch": 0.20488565111811824, "grad_norm": 2.5221241694532597, "learning_rate": 9.219228912997018e-06, "loss": 0.7955, "step": 5653 }, { "epoch": 0.20492189482077489, "grad_norm": 2.422041860521989, "learning_rate": 9.21891394652756e-06, "loss": 1.0077, "step": 5654 }, { "epoch": 0.20495813852343156, "grad_norm": 2.474114717216781, "learning_rate": 9.218598921924017e-06, "loss": 0.8193, "step": 5655 }, { "epoch": 0.2049943822260882, "grad_norm": 2.2111512723953473, "learning_rate": 9.218283839190733e-06, "loss": 1.0008, "step": 5656 }, { "epoch": 0.20503062592874488, "grad_norm": 2.53404332354361, "learning_rate": 9.217968698332049e-06, "loss": 0.8762, "step": 5657 }, { "epoch": 0.20506686963140155, "grad_norm": 2.610608322737811, "learning_rate": 9.217653499352305e-06, "loss": 1.0104, "step": 5658 }, { "epoch": 0.2051031133340582, "grad_norm": 2.563833134543702, "learning_rate": 9.21733824225585e-06, "loss": 0.9, "step": 5659 }, { "epoch": 0.20513935703671488, "grad_norm": 2.2687255046202055, "learning_rate": 9.217022927047024e-06, "loss": 0.8473, "step": 5660 }, { "epoch": 0.20517560073937152, "grad_norm": 2.2283159688563807, "learning_rate": 9.216707553730171e-06, "loss": 1.0879, "step": 5661 }, { "epoch": 0.2052118444420282, "grad_norm": 2.2030716975046993, "learning_rate": 9.216392122309638e-06, "loss": 0.8991, "step": 5662 }, { "epoch": 0.20524808814468487, "grad_norm": 2.3550959443086974, "learning_rate": 9.216076632789772e-06, "loss": 0.9637, "step": 5663 }, { "epoch": 0.20528433184734152, "grad_norm": 2.2203975692401836, "learning_rate": 9.215761085174922e-06, "loss": 1.0331, "step": 5664 }, { "epoch": 0.2053205755499982, "grad_norm": 2.205390359894026, "learning_rate": 9.215445479469432e-06, "loss": 0.9981, "step": 5665 }, { "epoch": 0.20535681925265484, "grad_norm": 2.1583941849862027, "learning_rate": 9.215129815677651e-06, "loss": 0.9014, "step": 5666 }, { "epoch": 0.2053930629553115, "grad_norm": 2.231126969593664, "learning_rate": 9.214814093803932e-06, "loss": 0.9971, "step": 5667 }, { "epoch": 0.20542930665796819, "grad_norm": 2.395494101820043, "learning_rate": 9.214498313852625e-06, "loss": 0.8974, "step": 5668 }, { "epoch": 0.20546555036062483, "grad_norm": 2.4486620439903755, "learning_rate": 9.214182475828078e-06, "loss": 0.8579, "step": 5669 }, { "epoch": 0.2055017940632815, "grad_norm": 2.3352053817806073, "learning_rate": 9.213866579734645e-06, "loss": 0.8845, "step": 5670 }, { "epoch": 0.20553803776593818, "grad_norm": 2.5405300718656387, "learning_rate": 9.213550625576682e-06, "loss": 1.0264, "step": 5671 }, { "epoch": 0.20557428146859483, "grad_norm": 2.2558219449896186, "learning_rate": 9.213234613358537e-06, "loss": 1.0449, "step": 5672 }, { "epoch": 0.2056105251712515, "grad_norm": 2.644211800268396, "learning_rate": 9.212918543084568e-06, "loss": 1.1468, "step": 5673 }, { "epoch": 0.20564676887390815, "grad_norm": 2.1066306357017726, "learning_rate": 9.212602414759128e-06, "loss": 1.0368, "step": 5674 }, { "epoch": 0.20568301257656482, "grad_norm": 2.498779558161803, "learning_rate": 9.212286228386576e-06, "loss": 0.9408, "step": 5675 }, { "epoch": 0.2057192562792215, "grad_norm": 2.3769958264757913, "learning_rate": 9.211969983971267e-06, "loss": 1.0466, "step": 5676 }, { "epoch": 0.20575549998187814, "grad_norm": 2.4839157247641945, "learning_rate": 9.211653681517558e-06, "loss": 0.8939, "step": 5677 }, { "epoch": 0.20579174368453482, "grad_norm": 2.412740907980515, "learning_rate": 9.21133732102981e-06, "loss": 0.7966, "step": 5678 }, { "epoch": 0.20582798738719146, "grad_norm": 2.602538875410482, "learning_rate": 9.21102090251238e-06, "loss": 0.9467, "step": 5679 }, { "epoch": 0.20586423108984814, "grad_norm": 2.3652960610328453, "learning_rate": 9.210704425969629e-06, "loss": 0.9393, "step": 5680 }, { "epoch": 0.2059004747925048, "grad_norm": 2.3765265696393825, "learning_rate": 9.210387891405917e-06, "loss": 0.9698, "step": 5681 }, { "epoch": 0.20593671849516146, "grad_norm": 2.468067287662241, "learning_rate": 9.210071298825606e-06, "loss": 1.0134, "step": 5682 }, { "epoch": 0.20597296219781813, "grad_norm": 2.4557034599937504, "learning_rate": 9.20975464823306e-06, "loss": 0.977, "step": 5683 }, { "epoch": 0.20600920590047478, "grad_norm": 2.5233001555997165, "learning_rate": 9.209437939632639e-06, "loss": 0.9158, "step": 5684 }, { "epoch": 0.20604544960313145, "grad_norm": 2.1724353254899396, "learning_rate": 9.20912117302871e-06, "loss": 0.8463, "step": 5685 }, { "epoch": 0.20608169330578813, "grad_norm": 2.559452678590783, "learning_rate": 9.208804348425637e-06, "loss": 1.0437, "step": 5686 }, { "epoch": 0.20611793700844477, "grad_norm": 2.162108830746476, "learning_rate": 9.208487465827785e-06, "loss": 0.8795, "step": 5687 }, { "epoch": 0.20615418071110145, "grad_norm": 2.411355017703764, "learning_rate": 9.208170525239522e-06, "loss": 0.898, "step": 5688 }, { "epoch": 0.20619042441375812, "grad_norm": 2.5858939261836196, "learning_rate": 9.207853526665212e-06, "loss": 1.3035, "step": 5689 }, { "epoch": 0.20622666811641477, "grad_norm": 2.0833666817281715, "learning_rate": 9.207536470109226e-06, "loss": 0.6685, "step": 5690 }, { "epoch": 0.20626291181907144, "grad_norm": 2.489752502936403, "learning_rate": 9.207219355575933e-06, "loss": 0.9476, "step": 5691 }, { "epoch": 0.2062991555217281, "grad_norm": 2.416323087341093, "learning_rate": 9.206902183069702e-06, "loss": 1.2223, "step": 5692 }, { "epoch": 0.20633539922438476, "grad_norm": 2.6179687646301146, "learning_rate": 9.206584952594903e-06, "loss": 0.8463, "step": 5693 }, { "epoch": 0.20637164292704144, "grad_norm": 2.8241500634535224, "learning_rate": 9.206267664155906e-06, "loss": 0.9703, "step": 5694 }, { "epoch": 0.20640788662969808, "grad_norm": 2.4700874847930168, "learning_rate": 9.205950317757086e-06, "loss": 0.9723, "step": 5695 }, { "epoch": 0.20644413033235476, "grad_norm": 2.3674731565045812, "learning_rate": 9.205632913402813e-06, "loss": 1.07, "step": 5696 }, { "epoch": 0.2064803740350114, "grad_norm": 2.580511984390157, "learning_rate": 9.205315451097463e-06, "loss": 1.0021, "step": 5697 }, { "epoch": 0.20651661773766808, "grad_norm": 2.4321862765693862, "learning_rate": 9.20499793084541e-06, "loss": 0.9788, "step": 5698 }, { "epoch": 0.20655286144032475, "grad_norm": 2.4827535315233087, "learning_rate": 9.204680352651028e-06, "loss": 1.1249, "step": 5699 }, { "epoch": 0.2065891051429814, "grad_norm": 2.5445710230963416, "learning_rate": 9.204362716518693e-06, "loss": 0.9912, "step": 5700 }, { "epoch": 0.20662534884563807, "grad_norm": 2.841606259520187, "learning_rate": 9.204045022452782e-06, "loss": 1.1008, "step": 5701 }, { "epoch": 0.20666159254829472, "grad_norm": 2.4306679162241935, "learning_rate": 9.203727270457674e-06, "loss": 1.0056, "step": 5702 }, { "epoch": 0.2066978362509514, "grad_norm": 2.4198085383978403, "learning_rate": 9.203409460537746e-06, "loss": 0.9421, "step": 5703 }, { "epoch": 0.20673407995360807, "grad_norm": 2.3272677600257445, "learning_rate": 9.203091592697378e-06, "loss": 1.0184, "step": 5704 }, { "epoch": 0.20677032365626472, "grad_norm": 2.0726462759606723, "learning_rate": 9.202773666940951e-06, "loss": 0.966, "step": 5705 }, { "epoch": 0.2068065673589214, "grad_norm": 2.1339026980713434, "learning_rate": 9.202455683272844e-06, "loss": 0.9094, "step": 5706 }, { "epoch": 0.20684281106157806, "grad_norm": 2.4859093296747647, "learning_rate": 9.202137641697438e-06, "loss": 0.9751, "step": 5707 }, { "epoch": 0.2068790547642347, "grad_norm": 2.1125455286195542, "learning_rate": 9.201819542219118e-06, "loss": 0.952, "step": 5708 }, { "epoch": 0.20691529846689138, "grad_norm": 2.342860980947324, "learning_rate": 9.201501384842265e-06, "loss": 0.8786, "step": 5709 }, { "epoch": 0.20695154216954803, "grad_norm": 2.5571394794069304, "learning_rate": 9.201183169571264e-06, "loss": 0.9512, "step": 5710 }, { "epoch": 0.2069877858722047, "grad_norm": 2.52967818806211, "learning_rate": 9.200864896410501e-06, "loss": 1.0203, "step": 5711 }, { "epoch": 0.20702402957486138, "grad_norm": 2.2361682234602176, "learning_rate": 9.200546565364359e-06, "loss": 1.079, "step": 5712 }, { "epoch": 0.20706027327751803, "grad_norm": 2.3080285696272598, "learning_rate": 9.200228176437228e-06, "loss": 1.0088, "step": 5713 }, { "epoch": 0.2070965169801747, "grad_norm": 2.2704956094906277, "learning_rate": 9.199909729633489e-06, "loss": 1.0132, "step": 5714 }, { "epoch": 0.20713276068283135, "grad_norm": 2.377875475316263, "learning_rate": 9.199591224957536e-06, "loss": 0.8633, "step": 5715 }, { "epoch": 0.20716900438548802, "grad_norm": 2.2864293454969276, "learning_rate": 9.199272662413756e-06, "loss": 0.9725, "step": 5716 }, { "epoch": 0.2072052480881447, "grad_norm": 2.0495142636277315, "learning_rate": 9.198954042006538e-06, "loss": 0.8922, "step": 5717 }, { "epoch": 0.20724149179080134, "grad_norm": 2.0460567752662553, "learning_rate": 9.198635363740274e-06, "loss": 0.9223, "step": 5718 }, { "epoch": 0.20727773549345802, "grad_norm": 2.3068695706738422, "learning_rate": 9.198316627619352e-06, "loss": 1.0091, "step": 5719 }, { "epoch": 0.20731397919611466, "grad_norm": 2.314700596413906, "learning_rate": 9.197997833648167e-06, "loss": 1.0787, "step": 5720 }, { "epoch": 0.20735022289877134, "grad_norm": 2.3877782114689294, "learning_rate": 9.19767898183111e-06, "loss": 0.9008, "step": 5721 }, { "epoch": 0.207386466601428, "grad_norm": 2.1050873512712975, "learning_rate": 9.197360072172576e-06, "loss": 0.9588, "step": 5722 }, { "epoch": 0.20742271030408466, "grad_norm": 2.135113668815313, "learning_rate": 9.197041104676957e-06, "loss": 0.7156, "step": 5723 }, { "epoch": 0.20745895400674133, "grad_norm": 2.3336364960191553, "learning_rate": 9.196722079348652e-06, "loss": 0.9951, "step": 5724 }, { "epoch": 0.207495197709398, "grad_norm": 2.6856890238699274, "learning_rate": 9.196402996192055e-06, "loss": 1.0416, "step": 5725 }, { "epoch": 0.20753144141205465, "grad_norm": 2.032866830072236, "learning_rate": 9.196083855211562e-06, "loss": 0.957, "step": 5726 }, { "epoch": 0.20756768511471133, "grad_norm": 2.2319211350522696, "learning_rate": 9.195764656411571e-06, "loss": 0.905, "step": 5727 }, { "epoch": 0.20760392881736797, "grad_norm": 2.1659499529066446, "learning_rate": 9.195445399796481e-06, "loss": 0.9215, "step": 5728 }, { "epoch": 0.20764017252002465, "grad_norm": 2.4183636607451153, "learning_rate": 9.19512608537069e-06, "loss": 0.8765, "step": 5729 }, { "epoch": 0.20767641622268132, "grad_norm": 2.77212438588486, "learning_rate": 9.194806713138597e-06, "loss": 0.9298, "step": 5730 }, { "epoch": 0.20771265992533797, "grad_norm": 2.6354943687464645, "learning_rate": 9.194487283104607e-06, "loss": 0.8961, "step": 5731 }, { "epoch": 0.20774890362799464, "grad_norm": 2.439266183563154, "learning_rate": 9.19416779527312e-06, "loss": 0.943, "step": 5732 }, { "epoch": 0.2077851473306513, "grad_norm": 2.3242169070610537, "learning_rate": 9.193848249648534e-06, "loss": 1.0713, "step": 5733 }, { "epoch": 0.20782139103330796, "grad_norm": 2.5948706354089666, "learning_rate": 9.193528646235258e-06, "loss": 1.0222, "step": 5734 }, { "epoch": 0.20785763473596464, "grad_norm": 2.4500692092210112, "learning_rate": 9.193208985037693e-06, "loss": 0.9154, "step": 5735 }, { "epoch": 0.20789387843862128, "grad_norm": 2.523535968178017, "learning_rate": 9.192889266060243e-06, "loss": 0.9433, "step": 5736 }, { "epoch": 0.20793012214127796, "grad_norm": 2.278517619874692, "learning_rate": 9.192569489307316e-06, "loss": 0.9584, "step": 5737 }, { "epoch": 0.2079663658439346, "grad_norm": 2.592797925379216, "learning_rate": 9.192249654783316e-06, "loss": 0.9868, "step": 5738 }, { "epoch": 0.20800260954659128, "grad_norm": 2.481413289023713, "learning_rate": 9.191929762492652e-06, "loss": 0.9334, "step": 5739 }, { "epoch": 0.20803885324924795, "grad_norm": 2.409622519520648, "learning_rate": 9.19160981243973e-06, "loss": 0.9587, "step": 5740 }, { "epoch": 0.2080750969519046, "grad_norm": 2.2979815919931936, "learning_rate": 9.191289804628961e-06, "loss": 1.0865, "step": 5741 }, { "epoch": 0.20811134065456127, "grad_norm": 2.355015511245772, "learning_rate": 9.190969739064752e-06, "loss": 0.7997, "step": 5742 }, { "epoch": 0.20814758435721795, "grad_norm": 2.364978768526205, "learning_rate": 9.190649615751516e-06, "loss": 0.997, "step": 5743 }, { "epoch": 0.2081838280598746, "grad_norm": 2.411747250547676, "learning_rate": 9.190329434693661e-06, "loss": 1.0035, "step": 5744 }, { "epoch": 0.20822007176253127, "grad_norm": 2.3519136414827395, "learning_rate": 9.190009195895601e-06, "loss": 0.9941, "step": 5745 }, { "epoch": 0.2082563154651879, "grad_norm": 2.3837331572848535, "learning_rate": 9.18968889936175e-06, "loss": 0.9225, "step": 5746 }, { "epoch": 0.2082925591678446, "grad_norm": 2.566029293714549, "learning_rate": 9.18936854509652e-06, "loss": 1.0261, "step": 5747 }, { "epoch": 0.20832880287050126, "grad_norm": 2.4520066973011554, "learning_rate": 9.189048133104324e-06, "loss": 1.0432, "step": 5748 }, { "epoch": 0.2083650465731579, "grad_norm": 2.100718067053377, "learning_rate": 9.188727663389577e-06, "loss": 0.8924, "step": 5749 }, { "epoch": 0.20840129027581458, "grad_norm": 2.0449832643133097, "learning_rate": 9.188407135956697e-06, "loss": 0.741, "step": 5750 }, { "epoch": 0.20843753397847123, "grad_norm": 2.5747131847516234, "learning_rate": 9.1880865508101e-06, "loss": 1.0736, "step": 5751 }, { "epoch": 0.2084737776811279, "grad_norm": 2.2058873255358855, "learning_rate": 9.187765907954204e-06, "loss": 0.9807, "step": 5752 }, { "epoch": 0.20851002138378458, "grad_norm": 2.275738235028919, "learning_rate": 9.187445207393424e-06, "loss": 0.904, "step": 5753 }, { "epoch": 0.20854626508644122, "grad_norm": 2.310509963505087, "learning_rate": 9.187124449132183e-06, "loss": 0.8881, "step": 5754 }, { "epoch": 0.2085825087890979, "grad_norm": 2.674596141518984, "learning_rate": 9.1868036331749e-06, "loss": 0.8744, "step": 5755 }, { "epoch": 0.20861875249175454, "grad_norm": 2.448138987157216, "learning_rate": 9.186482759525995e-06, "loss": 1.0432, "step": 5756 }, { "epoch": 0.20865499619441122, "grad_norm": 2.211099077355861, "learning_rate": 9.18616182818989e-06, "loss": 0.8127, "step": 5757 }, { "epoch": 0.2086912398970679, "grad_norm": 2.4822826042485957, "learning_rate": 9.185840839171005e-06, "loss": 1.0612, "step": 5758 }, { "epoch": 0.20872748359972454, "grad_norm": 2.4956325418119083, "learning_rate": 9.185519792473765e-06, "loss": 0.9411, "step": 5759 }, { "epoch": 0.2087637273023812, "grad_norm": 2.7131376018621958, "learning_rate": 9.185198688102594e-06, "loss": 1.0606, "step": 5760 }, { "epoch": 0.2087999710050379, "grad_norm": 2.3606884881738432, "learning_rate": 9.184877526061917e-06, "loss": 1.003, "step": 5761 }, { "epoch": 0.20883621470769453, "grad_norm": 2.1692960754628747, "learning_rate": 9.184556306356157e-06, "loss": 0.968, "step": 5762 }, { "epoch": 0.2088724584103512, "grad_norm": 2.532397029752321, "learning_rate": 9.184235028989743e-06, "loss": 1.1372, "step": 5763 }, { "epoch": 0.20890870211300785, "grad_norm": 2.4967794578442386, "learning_rate": 9.1839136939671e-06, "loss": 0.9263, "step": 5764 }, { "epoch": 0.20894494581566453, "grad_norm": 2.4981295251121804, "learning_rate": 9.183592301292658e-06, "loss": 1.036, "step": 5765 }, { "epoch": 0.2089811895183212, "grad_norm": 2.2189473858968203, "learning_rate": 9.183270850970843e-06, "loss": 0.8092, "step": 5766 }, { "epoch": 0.20901743322097785, "grad_norm": 2.5406051518865476, "learning_rate": 9.182949343006087e-06, "loss": 0.9724, "step": 5767 }, { "epoch": 0.20905367692363452, "grad_norm": 2.2688446384067227, "learning_rate": 9.182627777402817e-06, "loss": 0.9645, "step": 5768 }, { "epoch": 0.20908992062629117, "grad_norm": 2.3608301740339237, "learning_rate": 9.182306154165466e-06, "loss": 1.0908, "step": 5769 }, { "epoch": 0.20912616432894784, "grad_norm": 2.247185624321754, "learning_rate": 9.181984473298468e-06, "loss": 0.9126, "step": 5770 }, { "epoch": 0.20916240803160452, "grad_norm": 2.3083842772285497, "learning_rate": 9.18166273480625e-06, "loss": 0.9087, "step": 5771 }, { "epoch": 0.20919865173426117, "grad_norm": 2.259429754133756, "learning_rate": 9.181340938693251e-06, "loss": 1.0062, "step": 5772 }, { "epoch": 0.20923489543691784, "grad_norm": 2.4399521932411905, "learning_rate": 9.1810190849639e-06, "loss": 0.9655, "step": 5773 }, { "epoch": 0.20927113913957449, "grad_norm": 2.6979028129586693, "learning_rate": 9.180697173622637e-06, "loss": 0.9993, "step": 5774 }, { "epoch": 0.20930738284223116, "grad_norm": 2.227083896712747, "learning_rate": 9.180375204673895e-06, "loss": 0.9873, "step": 5775 }, { "epoch": 0.20934362654488783, "grad_norm": 2.5303264458489214, "learning_rate": 9.18005317812211e-06, "loss": 1.0037, "step": 5776 }, { "epoch": 0.20937987024754448, "grad_norm": 2.3708753444333883, "learning_rate": 9.179731093971722e-06, "loss": 0.8601, "step": 5777 }, { "epoch": 0.20941611395020115, "grad_norm": 2.499877832153476, "learning_rate": 9.179408952227164e-06, "loss": 1.0001, "step": 5778 }, { "epoch": 0.20945235765285783, "grad_norm": 2.4240682556795012, "learning_rate": 9.17908675289288e-06, "loss": 0.8421, "step": 5779 }, { "epoch": 0.20948860135551448, "grad_norm": 2.548556908732074, "learning_rate": 9.17876449597331e-06, "loss": 1.0452, "step": 5780 }, { "epoch": 0.20952484505817115, "grad_norm": 2.717468838295375, "learning_rate": 9.17844218147289e-06, "loss": 1.0087, "step": 5781 }, { "epoch": 0.2095610887608278, "grad_norm": 2.4311717020809533, "learning_rate": 9.178119809396065e-06, "loss": 1.0083, "step": 5782 }, { "epoch": 0.20959733246348447, "grad_norm": 2.234940090262702, "learning_rate": 9.177797379747275e-06, "loss": 0.929, "step": 5783 }, { "epoch": 0.20963357616614114, "grad_norm": 2.3301825761786725, "learning_rate": 9.177474892530965e-06, "loss": 0.8089, "step": 5784 }, { "epoch": 0.2096698198687978, "grad_norm": 2.7441313375609586, "learning_rate": 9.177152347751575e-06, "loss": 0.8738, "step": 5785 }, { "epoch": 0.20970606357145447, "grad_norm": 2.374102543286558, "learning_rate": 9.176829745413553e-06, "loss": 0.9947, "step": 5786 }, { "epoch": 0.2097423072741111, "grad_norm": 2.435211211235625, "learning_rate": 9.176507085521344e-06, "loss": 0.9919, "step": 5787 }, { "epoch": 0.20977855097676779, "grad_norm": 2.351304588962146, "learning_rate": 9.176184368079392e-06, "loss": 0.8641, "step": 5788 }, { "epoch": 0.20981479467942446, "grad_norm": 2.3682277034611556, "learning_rate": 9.175861593092147e-06, "loss": 0.9848, "step": 5789 }, { "epoch": 0.2098510383820811, "grad_norm": 2.4621420117418134, "learning_rate": 9.175538760564054e-06, "loss": 0.9716, "step": 5790 }, { "epoch": 0.20988728208473778, "grad_norm": 2.3868623852364323, "learning_rate": 9.175215870499562e-06, "loss": 1.0113, "step": 5791 }, { "epoch": 0.20992352578739443, "grad_norm": 2.077509143060554, "learning_rate": 9.17489292290312e-06, "loss": 1.1671, "step": 5792 }, { "epoch": 0.2099597694900511, "grad_norm": 2.4348417821310435, "learning_rate": 9.174569917779179e-06, "loss": 0.9615, "step": 5793 }, { "epoch": 0.20999601319270778, "grad_norm": 2.4152834922750643, "learning_rate": 9.17424685513219e-06, "loss": 1.0127, "step": 5794 }, { "epoch": 0.21003225689536442, "grad_norm": 2.694773321907568, "learning_rate": 9.1739237349666e-06, "loss": 1.0667, "step": 5795 }, { "epoch": 0.2100685005980211, "grad_norm": 2.6638349251699363, "learning_rate": 9.17360055728687e-06, "loss": 1.1626, "step": 5796 }, { "epoch": 0.21010474430067777, "grad_norm": 2.719069572183638, "learning_rate": 9.173277322097446e-06, "loss": 0.8571, "step": 5797 }, { "epoch": 0.21014098800333442, "grad_norm": 2.3671942309284253, "learning_rate": 9.172954029402785e-06, "loss": 1.1769, "step": 5798 }, { "epoch": 0.2101772317059911, "grad_norm": 2.3893202047530373, "learning_rate": 9.172630679207341e-06, "loss": 1.0742, "step": 5799 }, { "epoch": 0.21021347540864774, "grad_norm": 2.5317983208631616, "learning_rate": 9.17230727151557e-06, "loss": 0.981, "step": 5800 }, { "epoch": 0.2102497191113044, "grad_norm": 2.208667733098994, "learning_rate": 9.171983806331928e-06, "loss": 0.9387, "step": 5801 }, { "epoch": 0.21028596281396109, "grad_norm": 2.400865148614437, "learning_rate": 9.171660283660871e-06, "loss": 0.9638, "step": 5802 }, { "epoch": 0.21032220651661773, "grad_norm": 2.3887214184108276, "learning_rate": 9.17133670350686e-06, "loss": 0.9544, "step": 5803 }, { "epoch": 0.2103584502192744, "grad_norm": 2.257564283730197, "learning_rate": 9.171013065874352e-06, "loss": 1.0249, "step": 5804 }, { "epoch": 0.21039469392193105, "grad_norm": 2.3770834983906544, "learning_rate": 9.170689370767805e-06, "loss": 0.9335, "step": 5805 }, { "epoch": 0.21043093762458773, "grad_norm": 2.533103163889627, "learning_rate": 9.170365618191683e-06, "loss": 1.1165, "step": 5806 }, { "epoch": 0.2104671813272444, "grad_norm": 2.2664104611660485, "learning_rate": 9.170041808150443e-06, "loss": 0.6727, "step": 5807 }, { "epoch": 0.21050342502990105, "grad_norm": 2.57155889497576, "learning_rate": 9.16971794064855e-06, "loss": 1.1558, "step": 5808 }, { "epoch": 0.21053966873255772, "grad_norm": 2.391090579936194, "learning_rate": 9.169394015690465e-06, "loss": 0.8848, "step": 5809 }, { "epoch": 0.21057591243521437, "grad_norm": 2.4207720020593633, "learning_rate": 9.169070033280651e-06, "loss": 0.9916, "step": 5810 }, { "epoch": 0.21061215613787104, "grad_norm": 2.333032882521577, "learning_rate": 9.168745993423575e-06, "loss": 1.0049, "step": 5811 }, { "epoch": 0.21064839984052772, "grad_norm": 2.4775989183476383, "learning_rate": 9.1684218961237e-06, "loss": 1.0761, "step": 5812 }, { "epoch": 0.21068464354318436, "grad_norm": 2.359614607242951, "learning_rate": 9.16809774138549e-06, "loss": 0.9425, "step": 5813 }, { "epoch": 0.21072088724584104, "grad_norm": 2.8933305883981237, "learning_rate": 9.167773529213417e-06, "loss": 1.1222, "step": 5814 }, { "epoch": 0.2107571309484977, "grad_norm": 2.5744241365742644, "learning_rate": 9.167449259611945e-06, "loss": 0.864, "step": 5815 }, { "epoch": 0.21079337465115436, "grad_norm": 2.08212275756574, "learning_rate": 9.167124932585541e-06, "loss": 0.9357, "step": 5816 }, { "epoch": 0.21082961835381103, "grad_norm": 2.2637072086970855, "learning_rate": 9.166800548138676e-06, "loss": 1.0728, "step": 5817 }, { "epoch": 0.21086586205646768, "grad_norm": 2.3577456258580844, "learning_rate": 9.166476106275822e-06, "loss": 0.8822, "step": 5818 }, { "epoch": 0.21090210575912435, "grad_norm": 2.7556458724598922, "learning_rate": 9.166151607001445e-06, "loss": 1.0301, "step": 5819 }, { "epoch": 0.21093834946178103, "grad_norm": 2.3886943476846265, "learning_rate": 9.165827050320017e-06, "loss": 1.0816, "step": 5820 }, { "epoch": 0.21097459316443767, "grad_norm": 2.4531660486229017, "learning_rate": 9.165502436236013e-06, "loss": 0.9493, "step": 5821 }, { "epoch": 0.21101083686709435, "grad_norm": 2.378647271909785, "learning_rate": 9.165177764753905e-06, "loss": 0.9092, "step": 5822 }, { "epoch": 0.211047080569751, "grad_norm": 2.3813929645185192, "learning_rate": 9.164853035878167e-06, "loss": 0.793, "step": 5823 }, { "epoch": 0.21108332427240767, "grad_norm": 2.068956550401325, "learning_rate": 9.164528249613271e-06, "loss": 0.7263, "step": 5824 }, { "epoch": 0.21111956797506434, "grad_norm": 2.6277100338862676, "learning_rate": 9.164203405963697e-06, "loss": 0.8049, "step": 5825 }, { "epoch": 0.211155811677721, "grad_norm": 2.276665907886203, "learning_rate": 9.163878504933916e-06, "loss": 1.0333, "step": 5826 }, { "epoch": 0.21119205538037766, "grad_norm": 2.0825409546701223, "learning_rate": 9.163553546528408e-06, "loss": 0.9617, "step": 5827 }, { "epoch": 0.2112282990830343, "grad_norm": 2.4623511989764424, "learning_rate": 9.163228530751652e-06, "loss": 1.0442, "step": 5828 }, { "epoch": 0.21126454278569098, "grad_norm": 2.2700185648035087, "learning_rate": 9.162903457608123e-06, "loss": 0.8326, "step": 5829 }, { "epoch": 0.21130078648834766, "grad_norm": 2.4125033134548777, "learning_rate": 9.162578327102302e-06, "loss": 0.8805, "step": 5830 }, { "epoch": 0.2113370301910043, "grad_norm": 2.057464673731765, "learning_rate": 9.16225313923867e-06, "loss": 0.7138, "step": 5831 }, { "epoch": 0.21137327389366098, "grad_norm": 2.099229563435334, "learning_rate": 9.161927894021705e-06, "loss": 0.9097, "step": 5832 }, { "epoch": 0.21140951759631765, "grad_norm": 2.28820265851776, "learning_rate": 9.161602591455893e-06, "loss": 1.0408, "step": 5833 }, { "epoch": 0.2114457612989743, "grad_norm": 2.392255547601563, "learning_rate": 9.161277231545713e-06, "loss": 0.8971, "step": 5834 }, { "epoch": 0.21148200500163097, "grad_norm": 2.285812915350443, "learning_rate": 9.16095181429565e-06, "loss": 0.8942, "step": 5835 }, { "epoch": 0.21151824870428762, "grad_norm": 2.2904257606566114, "learning_rate": 9.160626339710186e-06, "loss": 0.9753, "step": 5836 }, { "epoch": 0.2115544924069443, "grad_norm": 2.5132653947051327, "learning_rate": 9.16030080779381e-06, "loss": 0.9732, "step": 5837 }, { "epoch": 0.21159073610960097, "grad_norm": 2.485349698449036, "learning_rate": 9.159975218551003e-06, "loss": 0.8667, "step": 5838 }, { "epoch": 0.21162697981225762, "grad_norm": 2.3585210023387533, "learning_rate": 9.159649571986254e-06, "loss": 0.8184, "step": 5839 }, { "epoch": 0.2116632235149143, "grad_norm": 2.4734042687990394, "learning_rate": 9.159323868104049e-06, "loss": 0.9483, "step": 5840 }, { "epoch": 0.21169946721757094, "grad_norm": 2.167968373246517, "learning_rate": 9.158998106908876e-06, "loss": 0.8804, "step": 5841 }, { "epoch": 0.2117357109202276, "grad_norm": 2.2179890540359324, "learning_rate": 9.158672288405226e-06, "loss": 0.9661, "step": 5842 }, { "epoch": 0.21177195462288428, "grad_norm": 2.4925951353603906, "learning_rate": 9.158346412597586e-06, "loss": 1.1103, "step": 5843 }, { "epoch": 0.21180819832554093, "grad_norm": 2.4136474335684293, "learning_rate": 9.158020479490448e-06, "loss": 0.941, "step": 5844 }, { "epoch": 0.2118444420281976, "grad_norm": 2.229139785979508, "learning_rate": 9.157694489088302e-06, "loss": 1.0242, "step": 5845 }, { "epoch": 0.21188068573085425, "grad_norm": 2.3379692487920254, "learning_rate": 9.15736844139564e-06, "loss": 0.768, "step": 5846 }, { "epoch": 0.21191692943351093, "grad_norm": 2.2672997660234824, "learning_rate": 9.157042336416955e-06, "loss": 0.8399, "step": 5847 }, { "epoch": 0.2119531731361676, "grad_norm": 2.236510244245393, "learning_rate": 9.15671617415674e-06, "loss": 1.1894, "step": 5848 }, { "epoch": 0.21198941683882425, "grad_norm": 2.313449566123127, "learning_rate": 9.156389954619494e-06, "loss": 0.9116, "step": 5849 }, { "epoch": 0.21202566054148092, "grad_norm": 2.500362404808437, "learning_rate": 9.156063677809704e-06, "loss": 0.8114, "step": 5850 }, { "epoch": 0.2120619042441376, "grad_norm": 2.328437908593726, "learning_rate": 9.155737343731873e-06, "loss": 0.91, "step": 5851 }, { "epoch": 0.21209814794679424, "grad_norm": 2.4235378183949328, "learning_rate": 9.155410952390493e-06, "loss": 1.0006, "step": 5852 }, { "epoch": 0.21213439164945092, "grad_norm": 2.3519229167179407, "learning_rate": 9.155084503790063e-06, "loss": 0.8169, "step": 5853 }, { "epoch": 0.21217063535210756, "grad_norm": 2.691846665311343, "learning_rate": 9.154757997935082e-06, "loss": 0.9235, "step": 5854 }, { "epoch": 0.21220687905476424, "grad_norm": 2.5530422813232727, "learning_rate": 9.154431434830049e-06, "loss": 1.0063, "step": 5855 }, { "epoch": 0.2122431227574209, "grad_norm": 2.063112285615013, "learning_rate": 9.15410481447946e-06, "loss": 0.866, "step": 5856 }, { "epoch": 0.21227936646007756, "grad_norm": 2.554232426867499, "learning_rate": 9.153778136887822e-06, "loss": 0.9909, "step": 5857 }, { "epoch": 0.21231561016273423, "grad_norm": 2.4767432351454914, "learning_rate": 9.153451402059634e-06, "loss": 0.9456, "step": 5858 }, { "epoch": 0.21235185386539088, "grad_norm": 2.490892301699549, "learning_rate": 9.153124609999396e-06, "loss": 1.1114, "step": 5859 }, { "epoch": 0.21238809756804755, "grad_norm": 2.11977200999756, "learning_rate": 9.152797760711613e-06, "loss": 1.0384, "step": 5860 }, { "epoch": 0.21242434127070423, "grad_norm": 2.5648201432816444, "learning_rate": 9.15247085420079e-06, "loss": 0.9391, "step": 5861 }, { "epoch": 0.21246058497336087, "grad_norm": 2.6026577849980037, "learning_rate": 9.152143890471427e-06, "loss": 0.9336, "step": 5862 }, { "epoch": 0.21249682867601755, "grad_norm": 2.321835907504641, "learning_rate": 9.151816869528035e-06, "loss": 0.972, "step": 5863 }, { "epoch": 0.2125330723786742, "grad_norm": 2.3017725477021886, "learning_rate": 9.151489791375117e-06, "loss": 0.8622, "step": 5864 }, { "epoch": 0.21256931608133087, "grad_norm": 2.3181742084944, "learning_rate": 9.15116265601718e-06, "loss": 1.0745, "step": 5865 }, { "epoch": 0.21260555978398754, "grad_norm": 2.1204525884623533, "learning_rate": 9.150835463458732e-06, "loss": 0.8175, "step": 5866 }, { "epoch": 0.2126418034866442, "grad_norm": 2.2384992003399797, "learning_rate": 9.150508213704282e-06, "loss": 1.092, "step": 5867 }, { "epoch": 0.21267804718930086, "grad_norm": 2.687566161954636, "learning_rate": 9.150180906758338e-06, "loss": 0.7904, "step": 5868 }, { "epoch": 0.21271429089195754, "grad_norm": 2.380269041108955, "learning_rate": 9.149853542625412e-06, "loss": 1.1317, "step": 5869 }, { "epoch": 0.21275053459461418, "grad_norm": 2.4439840841101312, "learning_rate": 9.149526121310013e-06, "loss": 1.0187, "step": 5870 }, { "epoch": 0.21278677829727086, "grad_norm": 2.6474311789080254, "learning_rate": 9.149198642816654e-06, "loss": 1.0054, "step": 5871 }, { "epoch": 0.2128230219999275, "grad_norm": 2.084133571237376, "learning_rate": 9.148871107149848e-06, "loss": 0.9373, "step": 5872 }, { "epoch": 0.21285926570258418, "grad_norm": 2.754881713515189, "learning_rate": 9.148543514314106e-06, "loss": 0.9369, "step": 5873 }, { "epoch": 0.21289550940524085, "grad_norm": 2.28887955397446, "learning_rate": 9.148215864313943e-06, "loss": 0.9857, "step": 5874 }, { "epoch": 0.2129317531078975, "grad_norm": 2.7222412984721056, "learning_rate": 9.147888157153876e-06, "loss": 0.8986, "step": 5875 }, { "epoch": 0.21296799681055417, "grad_norm": 2.513556751930475, "learning_rate": 9.147560392838417e-06, "loss": 0.9585, "step": 5876 }, { "epoch": 0.21300424051321082, "grad_norm": 2.385375833835601, "learning_rate": 9.147232571372085e-06, "loss": 0.9681, "step": 5877 }, { "epoch": 0.2130404842158675, "grad_norm": 2.458625105492372, "learning_rate": 9.146904692759397e-06, "loss": 0.9279, "step": 5878 }, { "epoch": 0.21307672791852417, "grad_norm": 2.2996424305328715, "learning_rate": 9.146576757004869e-06, "loss": 0.9167, "step": 5879 }, { "epoch": 0.2131129716211808, "grad_norm": 2.439582924649417, "learning_rate": 9.146248764113021e-06, "loss": 1.1995, "step": 5880 }, { "epoch": 0.2131492153238375, "grad_norm": 2.215350470726309, "learning_rate": 9.145920714088375e-06, "loss": 0.9535, "step": 5881 }, { "epoch": 0.21318545902649413, "grad_norm": 2.286136066960772, "learning_rate": 9.145592606935447e-06, "loss": 1.024, "step": 5882 }, { "epoch": 0.2132217027291508, "grad_norm": 2.2843838662857956, "learning_rate": 9.14526444265876e-06, "loss": 1.0018, "step": 5883 }, { "epoch": 0.21325794643180748, "grad_norm": 2.002121360861707, "learning_rate": 9.144936221262835e-06, "loss": 0.8666, "step": 5884 }, { "epoch": 0.21329419013446413, "grad_norm": 15.137343617932146, "learning_rate": 9.144607942752196e-06, "loss": 1.7284, "step": 5885 }, { "epoch": 0.2133304338371208, "grad_norm": 2.3758236791140126, "learning_rate": 9.144279607131367e-06, "loss": 0.9584, "step": 5886 }, { "epoch": 0.21336667753977748, "grad_norm": 2.2739260423593017, "learning_rate": 9.143951214404873e-06, "loss": 1.0202, "step": 5887 }, { "epoch": 0.21340292124243412, "grad_norm": 2.389273053648149, "learning_rate": 9.143622764577236e-06, "loss": 0.9208, "step": 5888 }, { "epoch": 0.2134391649450908, "grad_norm": 2.348058113863504, "learning_rate": 9.143294257652982e-06, "loss": 0.8092, "step": 5889 }, { "epoch": 0.21347540864774744, "grad_norm": 2.362698437747921, "learning_rate": 9.14296569363664e-06, "loss": 0.9789, "step": 5890 }, { "epoch": 0.21351165235040412, "grad_norm": 2.2842843244539477, "learning_rate": 9.142637072532736e-06, "loss": 0.8653, "step": 5891 }, { "epoch": 0.2135478960530608, "grad_norm": 2.3145275182253826, "learning_rate": 9.1423083943458e-06, "loss": 0.9242, "step": 5892 }, { "epoch": 0.21358413975571744, "grad_norm": 2.3408277243172, "learning_rate": 9.141979659080357e-06, "loss": 0.9691, "step": 5893 }, { "epoch": 0.2136203834583741, "grad_norm": 2.774234894985718, "learning_rate": 9.141650866740942e-06, "loss": 0.9557, "step": 5894 }, { "epoch": 0.21365662716103076, "grad_norm": 2.1938298824520284, "learning_rate": 9.141322017332082e-06, "loss": 0.8515, "step": 5895 }, { "epoch": 0.21369287086368743, "grad_norm": 2.3708928812418804, "learning_rate": 9.140993110858307e-06, "loss": 1.1279, "step": 5896 }, { "epoch": 0.2137291145663441, "grad_norm": 2.1766695968347607, "learning_rate": 9.140664147324154e-06, "loss": 0.9877, "step": 5897 }, { "epoch": 0.21376535826900075, "grad_norm": 2.304033180553705, "learning_rate": 9.140335126734152e-06, "loss": 0.8003, "step": 5898 }, { "epoch": 0.21380160197165743, "grad_norm": 2.1855954355478118, "learning_rate": 9.140006049092836e-06, "loss": 0.8404, "step": 5899 }, { "epoch": 0.21383784567431408, "grad_norm": 2.0545982487320016, "learning_rate": 9.139676914404741e-06, "loss": 0.9197, "step": 5900 }, { "epoch": 0.21387408937697075, "grad_norm": 2.659505531594798, "learning_rate": 9.139347722674402e-06, "loss": 1.0076, "step": 5901 }, { "epoch": 0.21391033307962742, "grad_norm": 2.3976876719639706, "learning_rate": 9.139018473906353e-06, "loss": 1.1986, "step": 5902 }, { "epoch": 0.21394657678228407, "grad_norm": 2.7163468749541595, "learning_rate": 9.138689168105135e-06, "loss": 0.9775, "step": 5903 }, { "epoch": 0.21398282048494074, "grad_norm": 2.599322706641573, "learning_rate": 9.138359805275282e-06, "loss": 1.0846, "step": 5904 }, { "epoch": 0.21401906418759742, "grad_norm": 2.342840239728417, "learning_rate": 9.138030385421333e-06, "loss": 1.0098, "step": 5905 }, { "epoch": 0.21405530789025407, "grad_norm": 2.446875150584367, "learning_rate": 9.137700908547828e-06, "loss": 0.9137, "step": 5906 }, { "epoch": 0.21409155159291074, "grad_norm": 2.3479863173889224, "learning_rate": 9.137371374659307e-06, "loss": 1.0887, "step": 5907 }, { "epoch": 0.21412779529556739, "grad_norm": 2.434530559621618, "learning_rate": 9.137041783760312e-06, "loss": 0.9142, "step": 5908 }, { "epoch": 0.21416403899822406, "grad_norm": 2.2283831578405895, "learning_rate": 9.136712135855382e-06, "loss": 1.078, "step": 5909 }, { "epoch": 0.21420028270088073, "grad_norm": 2.5823177305227443, "learning_rate": 9.136382430949061e-06, "loss": 0.9405, "step": 5910 }, { "epoch": 0.21423652640353738, "grad_norm": 2.195192281558806, "learning_rate": 9.136052669045891e-06, "loss": 0.9488, "step": 5911 }, { "epoch": 0.21427277010619405, "grad_norm": 2.2143922800916314, "learning_rate": 9.135722850150418e-06, "loss": 0.8982, "step": 5912 }, { "epoch": 0.2143090138088507, "grad_norm": 2.5145527939735977, "learning_rate": 9.135392974267185e-06, "loss": 1.0083, "step": 5913 }, { "epoch": 0.21434525751150738, "grad_norm": 2.2262188303444614, "learning_rate": 9.135063041400736e-06, "loss": 0.9834, "step": 5914 }, { "epoch": 0.21438150121416405, "grad_norm": 2.486702522286014, "learning_rate": 9.134733051555622e-06, "loss": 1.0275, "step": 5915 }, { "epoch": 0.2144177449168207, "grad_norm": 2.2823812844776312, "learning_rate": 9.134403004736385e-06, "loss": 0.8271, "step": 5916 }, { "epoch": 0.21445398861947737, "grad_norm": 2.3200244460148807, "learning_rate": 9.134072900947575e-06, "loss": 0.7383, "step": 5917 }, { "epoch": 0.21449023232213402, "grad_norm": 2.7094744447756165, "learning_rate": 9.133742740193741e-06, "loss": 0.9806, "step": 5918 }, { "epoch": 0.2145264760247907, "grad_norm": 2.3660812691579682, "learning_rate": 9.133412522479433e-06, "loss": 0.9691, "step": 5919 }, { "epoch": 0.21456271972744737, "grad_norm": 2.17008404292115, "learning_rate": 9.133082247809199e-06, "loss": 1.0135, "step": 5920 }, { "epoch": 0.214598963430104, "grad_norm": 2.3203537692212595, "learning_rate": 9.132751916187593e-06, "loss": 1.0388, "step": 5921 }, { "epoch": 0.21463520713276069, "grad_norm": 2.3137105746081685, "learning_rate": 9.132421527619164e-06, "loss": 0.9438, "step": 5922 }, { "epoch": 0.21467145083541736, "grad_norm": 2.4175171820538823, "learning_rate": 9.132091082108466e-06, "loss": 0.9956, "step": 5923 }, { "epoch": 0.214707694538074, "grad_norm": 2.4109234206136536, "learning_rate": 9.13176057966005e-06, "loss": 0.9567, "step": 5924 }, { "epoch": 0.21474393824073068, "grad_norm": 2.439833360199531, "learning_rate": 9.131430020278473e-06, "loss": 1.1214, "step": 5925 }, { "epoch": 0.21478018194338733, "grad_norm": 2.3626088748385774, "learning_rate": 9.131099403968288e-06, "loss": 0.948, "step": 5926 }, { "epoch": 0.214816425646044, "grad_norm": 2.513238756343274, "learning_rate": 9.130768730734055e-06, "loss": 1.0977, "step": 5927 }, { "epoch": 0.21485266934870068, "grad_norm": 2.9588280802355365, "learning_rate": 9.130438000580323e-06, "loss": 0.9859, "step": 5928 }, { "epoch": 0.21488891305135732, "grad_norm": 2.3418833571005786, "learning_rate": 9.130107213511656e-06, "loss": 0.9301, "step": 5929 }, { "epoch": 0.214925156754014, "grad_norm": 2.4796914507477887, "learning_rate": 9.129776369532609e-06, "loss": 0.9165, "step": 5930 }, { "epoch": 0.21496140045667064, "grad_norm": 2.3368858742263354, "learning_rate": 9.129445468647742e-06, "loss": 0.8677, "step": 5931 }, { "epoch": 0.21499764415932732, "grad_norm": 2.373537038925192, "learning_rate": 9.129114510861612e-06, "loss": 0.8771, "step": 5932 }, { "epoch": 0.215033887861984, "grad_norm": 2.557430609317819, "learning_rate": 9.128783496178782e-06, "loss": 0.8829, "step": 5933 }, { "epoch": 0.21507013156464064, "grad_norm": 2.544716081572349, "learning_rate": 9.128452424603813e-06, "loss": 1.0431, "step": 5934 }, { "epoch": 0.2151063752672973, "grad_norm": 2.305929905105296, "learning_rate": 9.128121296141266e-06, "loss": 0.9134, "step": 5935 }, { "epoch": 0.21514261896995396, "grad_norm": 1.9914120746310844, "learning_rate": 9.127790110795703e-06, "loss": 0.9472, "step": 5936 }, { "epoch": 0.21517886267261063, "grad_norm": 2.7002592360130317, "learning_rate": 9.127458868571691e-06, "loss": 0.9932, "step": 5937 }, { "epoch": 0.2152151063752673, "grad_norm": 2.3833485505940493, "learning_rate": 9.127127569473791e-06, "loss": 1.0132, "step": 5938 }, { "epoch": 0.21525135007792395, "grad_norm": 2.7013586052449354, "learning_rate": 9.126796213506568e-06, "loss": 0.9402, "step": 5939 }, { "epoch": 0.21528759378058063, "grad_norm": 2.3082196006721007, "learning_rate": 9.12646480067459e-06, "loss": 0.9805, "step": 5940 }, { "epoch": 0.21532383748323727, "grad_norm": 2.543772858396359, "learning_rate": 9.126133330982421e-06, "loss": 1.0237, "step": 5941 }, { "epoch": 0.21536008118589395, "grad_norm": 2.367218748480557, "learning_rate": 9.125801804434633e-06, "loss": 0.8509, "step": 5942 }, { "epoch": 0.21539632488855062, "grad_norm": 2.4748664838305556, "learning_rate": 9.12547022103579e-06, "loss": 0.8399, "step": 5943 }, { "epoch": 0.21543256859120727, "grad_norm": 2.3913063482277925, "learning_rate": 9.125138580790462e-06, "loss": 1.0982, "step": 5944 }, { "epoch": 0.21546881229386394, "grad_norm": 2.634343865724542, "learning_rate": 9.124806883703219e-06, "loss": 1.2812, "step": 5945 }, { "epoch": 0.21550505599652062, "grad_norm": 2.265698108095602, "learning_rate": 9.124475129778631e-06, "loss": 0.8222, "step": 5946 }, { "epoch": 0.21554129969917726, "grad_norm": 2.326605324825717, "learning_rate": 9.124143319021272e-06, "loss": 0.9633, "step": 5947 }, { "epoch": 0.21557754340183394, "grad_norm": 2.3533095450543695, "learning_rate": 9.123811451435712e-06, "loss": 0.9371, "step": 5948 }, { "epoch": 0.21561378710449058, "grad_norm": 2.2934737468057396, "learning_rate": 9.123479527026525e-06, "loss": 0.9919, "step": 5949 }, { "epoch": 0.21565003080714726, "grad_norm": 2.300050049098017, "learning_rate": 9.123147545798281e-06, "loss": 0.8171, "step": 5950 }, { "epoch": 0.21568627450980393, "grad_norm": 2.4769581026850584, "learning_rate": 9.12281550775556e-06, "loss": 0.9892, "step": 5951 }, { "epoch": 0.21572251821246058, "grad_norm": 2.4443968457954273, "learning_rate": 9.122483412902934e-06, "loss": 0.9836, "step": 5952 }, { "epoch": 0.21575876191511725, "grad_norm": 2.157495290206026, "learning_rate": 9.122151261244978e-06, "loss": 1.0488, "step": 5953 }, { "epoch": 0.2157950056177739, "grad_norm": 2.2908493568135304, "learning_rate": 9.121819052786273e-06, "loss": 1.0567, "step": 5954 }, { "epoch": 0.21583124932043057, "grad_norm": 2.0821745979192428, "learning_rate": 9.121486787531392e-06, "loss": 0.9851, "step": 5955 }, { "epoch": 0.21586749302308725, "grad_norm": 2.1165415658868203, "learning_rate": 9.121154465484919e-06, "loss": 0.8373, "step": 5956 }, { "epoch": 0.2159037367257439, "grad_norm": 2.3620594720414605, "learning_rate": 9.120822086651426e-06, "loss": 0.9576, "step": 5957 }, { "epoch": 0.21593998042840057, "grad_norm": 2.2513281895644877, "learning_rate": 9.1204896510355e-06, "loss": 1.0592, "step": 5958 }, { "epoch": 0.21597622413105722, "grad_norm": 2.2173229149345706, "learning_rate": 9.120157158641717e-06, "loss": 1.0198, "step": 5959 }, { "epoch": 0.2160124678337139, "grad_norm": 14.37658756970522, "learning_rate": 9.11982460947466e-06, "loss": 1.8698, "step": 5960 }, { "epoch": 0.21604871153637056, "grad_norm": 2.1664736568610805, "learning_rate": 9.119492003538912e-06, "loss": 0.9501, "step": 5961 }, { "epoch": 0.2160849552390272, "grad_norm": 2.2539406305971306, "learning_rate": 9.119159340839057e-06, "loss": 0.9886, "step": 5962 }, { "epoch": 0.21612119894168388, "grad_norm": 2.4419600051888484, "learning_rate": 9.118826621379675e-06, "loss": 1.0133, "step": 5963 }, { "epoch": 0.21615744264434056, "grad_norm": 2.4792666828151018, "learning_rate": 9.118493845165355e-06, "loss": 0.9843, "step": 5964 }, { "epoch": 0.2161936863469972, "grad_norm": 2.6232241957229117, "learning_rate": 9.11816101220068e-06, "loss": 0.8694, "step": 5965 }, { "epoch": 0.21622993004965388, "grad_norm": 2.3364972168262805, "learning_rate": 9.117828122490235e-06, "loss": 1.1264, "step": 5966 }, { "epoch": 0.21626617375231053, "grad_norm": 2.7076533839047547, "learning_rate": 9.117495176038612e-06, "loss": 1.0318, "step": 5967 }, { "epoch": 0.2163024174549672, "grad_norm": 2.522231313388542, "learning_rate": 9.117162172850394e-06, "loss": 1.15, "step": 5968 }, { "epoch": 0.21633866115762387, "grad_norm": 2.2990075404107233, "learning_rate": 9.116829112930171e-06, "loss": 1.0159, "step": 5969 }, { "epoch": 0.21637490486028052, "grad_norm": 2.7384809948166624, "learning_rate": 9.116495996282532e-06, "loss": 1.0443, "step": 5970 }, { "epoch": 0.2164111485629372, "grad_norm": 2.3670241091062993, "learning_rate": 9.11616282291207e-06, "loss": 1.0188, "step": 5971 }, { "epoch": 0.21644739226559384, "grad_norm": 2.7801287396456478, "learning_rate": 9.115829592823372e-06, "loss": 1.0108, "step": 5972 }, { "epoch": 0.21648363596825052, "grad_norm": 2.7473787321852834, "learning_rate": 9.115496306021032e-06, "loss": 1.0093, "step": 5973 }, { "epoch": 0.2165198796709072, "grad_norm": 2.3961826795971923, "learning_rate": 9.115162962509642e-06, "loss": 1.0072, "step": 5974 }, { "epoch": 0.21655612337356384, "grad_norm": 2.6133744241819237, "learning_rate": 9.114829562293795e-06, "loss": 1.0425, "step": 5975 }, { "epoch": 0.2165923670762205, "grad_norm": 2.386782587495678, "learning_rate": 9.114496105378083e-06, "loss": 0.9474, "step": 5976 }, { "epoch": 0.21662861077887716, "grad_norm": 2.430858253100146, "learning_rate": 9.114162591767106e-06, "loss": 1.09, "step": 5977 }, { "epoch": 0.21666485448153383, "grad_norm": 2.4580519452169485, "learning_rate": 9.113829021465456e-06, "loss": 0.9566, "step": 5978 }, { "epoch": 0.2167010981841905, "grad_norm": 2.274481247821975, "learning_rate": 9.11349539447773e-06, "loss": 1.1297, "step": 5979 }, { "epoch": 0.21673734188684715, "grad_norm": 2.581850501750171, "learning_rate": 9.113161710808524e-06, "loss": 1.0925, "step": 5980 }, { "epoch": 0.21677358558950383, "grad_norm": 2.4759742351282767, "learning_rate": 9.112827970462438e-06, "loss": 0.9584, "step": 5981 }, { "epoch": 0.2168098292921605, "grad_norm": 2.235286977363621, "learning_rate": 9.11249417344407e-06, "loss": 0.9121, "step": 5982 }, { "epoch": 0.21684607299481715, "grad_norm": 2.3979041519794166, "learning_rate": 9.11216031975802e-06, "loss": 0.9165, "step": 5983 }, { "epoch": 0.21688231669747382, "grad_norm": 2.5501313973661683, "learning_rate": 9.111826409408888e-06, "loss": 1.0616, "step": 5984 }, { "epoch": 0.21691856040013047, "grad_norm": 2.37746820412891, "learning_rate": 9.111492442401274e-06, "loss": 0.9509, "step": 5985 }, { "epoch": 0.21695480410278714, "grad_norm": 2.129600647130469, "learning_rate": 9.11115841873978e-06, "loss": 0.8985, "step": 5986 }, { "epoch": 0.21699104780544382, "grad_norm": 2.294821778706645, "learning_rate": 9.11082433842901e-06, "loss": 0.8575, "step": 5987 }, { "epoch": 0.21702729150810046, "grad_norm": 2.269517138628599, "learning_rate": 9.110490201473567e-06, "loss": 1.0089, "step": 5988 }, { "epoch": 0.21706353521075714, "grad_norm": 2.265532625939597, "learning_rate": 9.110156007878054e-06, "loss": 0.9977, "step": 5989 }, { "epoch": 0.21709977891341378, "grad_norm": 2.269123472883458, "learning_rate": 9.10982175764708e-06, "loss": 1.0302, "step": 5990 }, { "epoch": 0.21713602261607046, "grad_norm": 2.581580778281128, "learning_rate": 9.109487450785243e-06, "loss": 1.0272, "step": 5991 }, { "epoch": 0.21717226631872713, "grad_norm": 2.150321987598764, "learning_rate": 9.109153087297158e-06, "loss": 0.9094, "step": 5992 }, { "epoch": 0.21720851002138378, "grad_norm": 2.1747765930244785, "learning_rate": 9.108818667187428e-06, "loss": 1.009, "step": 5993 }, { "epoch": 0.21724475372404045, "grad_norm": 2.3784489491696745, "learning_rate": 9.10848419046066e-06, "loss": 0.9993, "step": 5994 }, { "epoch": 0.2172809974266971, "grad_norm": 2.217541285804461, "learning_rate": 9.108149657121466e-06, "loss": 1.0214, "step": 5995 }, { "epoch": 0.21731724112935377, "grad_norm": 2.3366702809667586, "learning_rate": 9.107815067174455e-06, "loss": 1.2057, "step": 5996 }, { "epoch": 0.21735348483201045, "grad_norm": 2.2808260894670624, "learning_rate": 9.107480420624235e-06, "loss": 1.0218, "step": 5997 }, { "epoch": 0.2173897285346671, "grad_norm": 2.480073657544431, "learning_rate": 9.10714571747542e-06, "loss": 0.9183, "step": 5998 }, { "epoch": 0.21742597223732377, "grad_norm": 2.4066808729308464, "learning_rate": 9.10681095773262e-06, "loss": 0.9768, "step": 5999 }, { "epoch": 0.21746221593998044, "grad_norm": 2.225384487600114, "learning_rate": 9.106476141400449e-06, "loss": 0.7869, "step": 6000 }, { "epoch": 0.2174984596426371, "grad_norm": 2.2049506874514644, "learning_rate": 9.106141268483521e-06, "loss": 1.1289, "step": 6001 }, { "epoch": 0.21753470334529376, "grad_norm": 2.0205654009752094, "learning_rate": 9.10580633898645e-06, "loss": 0.9162, "step": 6002 }, { "epoch": 0.2175709470479504, "grad_norm": 2.130456885557308, "learning_rate": 9.105471352913848e-06, "loss": 0.9784, "step": 6003 }, { "epoch": 0.21760719075060708, "grad_norm": 2.3588663616710748, "learning_rate": 9.105136310270337e-06, "loss": 1.1041, "step": 6004 }, { "epoch": 0.21764343445326376, "grad_norm": 2.549259949026312, "learning_rate": 9.104801211060528e-06, "loss": 1.0831, "step": 6005 }, { "epoch": 0.2176796781559204, "grad_norm": 2.320575702099024, "learning_rate": 9.104466055289043e-06, "loss": 0.9545, "step": 6006 }, { "epoch": 0.21771592185857708, "grad_norm": 2.400125752747799, "learning_rate": 9.104130842960496e-06, "loss": 1.0444, "step": 6007 }, { "epoch": 0.21775216556123372, "grad_norm": 2.279158707937934, "learning_rate": 9.103795574079508e-06, "loss": 1.0346, "step": 6008 }, { "epoch": 0.2177884092638904, "grad_norm": 2.209772812427842, "learning_rate": 9.1034602486507e-06, "loss": 1.0172, "step": 6009 }, { "epoch": 0.21782465296654707, "grad_norm": 2.4070114912071454, "learning_rate": 9.103124866678692e-06, "loss": 0.9289, "step": 6010 }, { "epoch": 0.21786089666920372, "grad_norm": 2.119097878018827, "learning_rate": 9.102789428168104e-06, "loss": 0.9157, "step": 6011 }, { "epoch": 0.2178971403718604, "grad_norm": 2.1681195172282863, "learning_rate": 9.102453933123558e-06, "loss": 0.8928, "step": 6012 }, { "epoch": 0.21793338407451704, "grad_norm": 2.355505135989511, "learning_rate": 9.10211838154968e-06, "loss": 0.8595, "step": 6013 }, { "epoch": 0.2179696277771737, "grad_norm": 2.744555001059398, "learning_rate": 9.10178277345109e-06, "loss": 1.0974, "step": 6014 }, { "epoch": 0.2180058714798304, "grad_norm": 2.375543028371034, "learning_rate": 9.101447108832415e-06, "loss": 0.99, "step": 6015 }, { "epoch": 0.21804211518248703, "grad_norm": 2.5653395873176184, "learning_rate": 9.101111387698278e-06, "loss": 0.9725, "step": 6016 }, { "epoch": 0.2180783588851437, "grad_norm": 2.4075316355780085, "learning_rate": 9.100775610053308e-06, "loss": 0.9221, "step": 6017 }, { "epoch": 0.21811460258780038, "grad_norm": 2.325893585051023, "learning_rate": 9.100439775902131e-06, "loss": 1.0662, "step": 6018 }, { "epoch": 0.21815084629045703, "grad_norm": 2.233688790008292, "learning_rate": 9.10010388524937e-06, "loss": 1.0424, "step": 6019 }, { "epoch": 0.2181870899931137, "grad_norm": 2.200791102854281, "learning_rate": 9.09976793809966e-06, "loss": 0.9007, "step": 6020 }, { "epoch": 0.21822333369577035, "grad_norm": 2.593137181443723, "learning_rate": 9.099431934457626e-06, "loss": 1.1541, "step": 6021 }, { "epoch": 0.21825957739842702, "grad_norm": 2.433515932555522, "learning_rate": 9.0990958743279e-06, "loss": 0.962, "step": 6022 }, { "epoch": 0.2182958211010837, "grad_norm": 2.4923998791390103, "learning_rate": 9.098759757715111e-06, "loss": 1.0524, "step": 6023 }, { "epoch": 0.21833206480374034, "grad_norm": 2.537064253046553, "learning_rate": 9.098423584623892e-06, "loss": 1.002, "step": 6024 }, { "epoch": 0.21836830850639702, "grad_norm": 3.1127448347957025, "learning_rate": 9.098087355058876e-06, "loss": 0.9785, "step": 6025 }, { "epoch": 0.21840455220905367, "grad_norm": 2.2964833715424793, "learning_rate": 9.097751069024693e-06, "loss": 1.2679, "step": 6026 }, { "epoch": 0.21844079591171034, "grad_norm": 2.0644297343830935, "learning_rate": 9.09741472652598e-06, "loss": 0.8741, "step": 6027 }, { "epoch": 0.218477039614367, "grad_norm": 2.4006888076885153, "learning_rate": 9.097078327567368e-06, "loss": 1.0415, "step": 6028 }, { "epoch": 0.21851328331702366, "grad_norm": 2.4463069892754983, "learning_rate": 9.096741872153496e-06, "loss": 1.106, "step": 6029 }, { "epoch": 0.21854952701968033, "grad_norm": 2.496577679829077, "learning_rate": 9.096405360288996e-06, "loss": 1.1527, "step": 6030 }, { "epoch": 0.21858577072233698, "grad_norm": 2.4080623076170076, "learning_rate": 9.096068791978511e-06, "loss": 0.8653, "step": 6031 }, { "epoch": 0.21862201442499365, "grad_norm": 2.333709363549479, "learning_rate": 9.095732167226675e-06, "loss": 1.0408, "step": 6032 }, { "epoch": 0.21865825812765033, "grad_norm": 2.64043198191886, "learning_rate": 9.095395486038124e-06, "loss": 1.006, "step": 6033 }, { "epoch": 0.21869450183030698, "grad_norm": 2.5341589852981756, "learning_rate": 9.095058748417503e-06, "loss": 1.1656, "step": 6034 }, { "epoch": 0.21873074553296365, "grad_norm": 2.4735031541739714, "learning_rate": 9.094721954369447e-06, "loss": 1.1414, "step": 6035 }, { "epoch": 0.21876698923562032, "grad_norm": 2.3881159555816014, "learning_rate": 9.094385103898599e-06, "loss": 1.0086, "step": 6036 }, { "epoch": 0.21880323293827697, "grad_norm": 3.1302661693695946, "learning_rate": 9.094048197009602e-06, "loss": 0.87, "step": 6037 }, { "epoch": 0.21883947664093364, "grad_norm": 2.314738202900744, "learning_rate": 9.093711233707096e-06, "loss": 0.9196, "step": 6038 }, { "epoch": 0.2188757203435903, "grad_norm": 2.3961267588843462, "learning_rate": 9.093374213995724e-06, "loss": 1.0406, "step": 6039 }, { "epoch": 0.21891196404624697, "grad_norm": 2.2370822781453517, "learning_rate": 9.093037137880132e-06, "loss": 1.0392, "step": 6040 }, { "epoch": 0.21894820774890364, "grad_norm": 2.2153146348825734, "learning_rate": 9.092700005364965e-06, "loss": 0.9508, "step": 6041 }, { "epoch": 0.21898445145156029, "grad_norm": 2.3236781338270465, "learning_rate": 9.092362816454865e-06, "loss": 0.9896, "step": 6042 }, { "epoch": 0.21902069515421696, "grad_norm": 2.3083446642608836, "learning_rate": 9.092025571154481e-06, "loss": 1.1105, "step": 6043 }, { "epoch": 0.2190569388568736, "grad_norm": 2.480558139623313, "learning_rate": 9.091688269468459e-06, "loss": 1.2162, "step": 6044 }, { "epoch": 0.21909318255953028, "grad_norm": 1.9319620340577839, "learning_rate": 9.091350911401448e-06, "loss": 0.9753, "step": 6045 }, { "epoch": 0.21912942626218695, "grad_norm": 2.0790491907401387, "learning_rate": 9.091013496958095e-06, "loss": 1.1812, "step": 6046 }, { "epoch": 0.2191656699648436, "grad_norm": 2.498042963628387, "learning_rate": 9.09067602614305e-06, "loss": 0.8763, "step": 6047 }, { "epoch": 0.21920191366750028, "grad_norm": 2.4311652977277203, "learning_rate": 9.090338498960964e-06, "loss": 0.8518, "step": 6048 }, { "epoch": 0.21923815737015692, "grad_norm": 2.4201750573403857, "learning_rate": 9.090000915416488e-06, "loss": 0.9646, "step": 6049 }, { "epoch": 0.2192744010728136, "grad_norm": 2.1666047407847615, "learning_rate": 9.089663275514272e-06, "loss": 1.053, "step": 6050 }, { "epoch": 0.21931064477547027, "grad_norm": 2.4889060454810186, "learning_rate": 9.08932557925897e-06, "loss": 0.9802, "step": 6051 }, { "epoch": 0.21934688847812692, "grad_norm": 2.1560938030030736, "learning_rate": 9.088987826655234e-06, "loss": 0.9551, "step": 6052 }, { "epoch": 0.2193831321807836, "grad_norm": 2.6724911005655696, "learning_rate": 9.088650017707718e-06, "loss": 0.9245, "step": 6053 }, { "epoch": 0.21941937588344027, "grad_norm": 2.748345730711701, "learning_rate": 9.088312152421079e-06, "loss": 1.0459, "step": 6054 }, { "epoch": 0.2194556195860969, "grad_norm": 2.2561746548356587, "learning_rate": 9.08797423079997e-06, "loss": 1.022, "step": 6055 }, { "epoch": 0.21949186328875359, "grad_norm": 2.3936276846312174, "learning_rate": 9.08763625284905e-06, "loss": 0.8106, "step": 6056 }, { "epoch": 0.21952810699141023, "grad_norm": 2.2270280190339817, "learning_rate": 9.087298218572972e-06, "loss": 0.8572, "step": 6057 }, { "epoch": 0.2195643506940669, "grad_norm": 2.1718936871968215, "learning_rate": 9.086960127976399e-06, "loss": 0.7952, "step": 6058 }, { "epoch": 0.21960059439672358, "grad_norm": 2.451723357435114, "learning_rate": 9.086621981063986e-06, "loss": 0.9584, "step": 6059 }, { "epoch": 0.21963683809938023, "grad_norm": 2.361833003926246, "learning_rate": 9.086283777840393e-06, "loss": 0.7895, "step": 6060 }, { "epoch": 0.2196730818020369, "grad_norm": 2.209602967025883, "learning_rate": 9.085945518310282e-06, "loss": 0.9642, "step": 6061 }, { "epoch": 0.21970932550469355, "grad_norm": 2.435990103936312, "learning_rate": 9.085607202478312e-06, "loss": 0.9296, "step": 6062 }, { "epoch": 0.21974556920735022, "grad_norm": 2.33427356110753, "learning_rate": 9.085268830349146e-06, "loss": 1.0391, "step": 6063 }, { "epoch": 0.2197818129100069, "grad_norm": 2.236153192042912, "learning_rate": 9.084930401927446e-06, "loss": 0.9206, "step": 6064 }, { "epoch": 0.21981805661266354, "grad_norm": 2.596019865350317, "learning_rate": 9.084591917217877e-06, "loss": 1.1415, "step": 6065 }, { "epoch": 0.21985430031532022, "grad_norm": 2.4414850415852443, "learning_rate": 9.0842533762251e-06, "loss": 1.0049, "step": 6066 }, { "epoch": 0.21989054401797686, "grad_norm": 2.6543520130351537, "learning_rate": 9.083914778953782e-06, "loss": 1.0743, "step": 6067 }, { "epoch": 0.21992678772063354, "grad_norm": 2.1815915897110902, "learning_rate": 9.083576125408588e-06, "loss": 0.9609, "step": 6068 }, { "epoch": 0.2199630314232902, "grad_norm": 2.445468407209344, "learning_rate": 9.083237415594184e-06, "loss": 0.9792, "step": 6069 }, { "epoch": 0.21999927512594686, "grad_norm": 2.111491268576807, "learning_rate": 9.082898649515239e-06, "loss": 0.803, "step": 6070 }, { "epoch": 0.22003551882860353, "grad_norm": 2.402591651759869, "learning_rate": 9.082559827176421e-06, "loss": 1.0124, "step": 6071 }, { "epoch": 0.2200717625312602, "grad_norm": 2.6176322633845723, "learning_rate": 9.082220948582395e-06, "loss": 0.8681, "step": 6072 }, { "epoch": 0.22010800623391685, "grad_norm": 2.3131968491801826, "learning_rate": 9.081882013737835e-06, "loss": 0.8027, "step": 6073 }, { "epoch": 0.22014424993657353, "grad_norm": 2.367861346443553, "learning_rate": 9.081543022647408e-06, "loss": 0.897, "step": 6074 }, { "epoch": 0.22018049363923017, "grad_norm": 2.471286714722322, "learning_rate": 9.081203975315788e-06, "loss": 1.0631, "step": 6075 }, { "epoch": 0.22021673734188685, "grad_norm": 2.2871296475090537, "learning_rate": 9.080864871747645e-06, "loss": 0.9101, "step": 6076 }, { "epoch": 0.22025298104454352, "grad_norm": 2.463641276943377, "learning_rate": 9.080525711947652e-06, "loss": 1.0909, "step": 6077 }, { "epoch": 0.22028922474720017, "grad_norm": 2.690354780603174, "learning_rate": 9.080186495920482e-06, "loss": 0.898, "step": 6078 }, { "epoch": 0.22032546844985684, "grad_norm": 2.3649701786123924, "learning_rate": 9.079847223670811e-06, "loss": 1.0776, "step": 6079 }, { "epoch": 0.2203617121525135, "grad_norm": 2.4587452710896467, "learning_rate": 9.079507895203312e-06, "loss": 0.9328, "step": 6080 }, { "epoch": 0.22039795585517016, "grad_norm": 2.0589141857071716, "learning_rate": 9.079168510522661e-06, "loss": 0.8075, "step": 6081 }, { "epoch": 0.22043419955782684, "grad_norm": 2.3579941148163113, "learning_rate": 9.078829069633535e-06, "loss": 0.96, "step": 6082 }, { "epoch": 0.22047044326048348, "grad_norm": 2.3819999709266444, "learning_rate": 9.078489572540612e-06, "loss": 0.8166, "step": 6083 }, { "epoch": 0.22050668696314016, "grad_norm": 2.1439162531389693, "learning_rate": 9.078150019248568e-06, "loss": 0.8656, "step": 6084 }, { "epoch": 0.2205429306657968, "grad_norm": 2.4542598562487115, "learning_rate": 9.077810409762084e-06, "loss": 0.9651, "step": 6085 }, { "epoch": 0.22057917436845348, "grad_norm": 2.383816525285918, "learning_rate": 9.077470744085839e-06, "loss": 0.914, "step": 6086 }, { "epoch": 0.22061541807111015, "grad_norm": 2.0676024927478025, "learning_rate": 9.077131022224513e-06, "loss": 0.8638, "step": 6087 }, { "epoch": 0.2206516617737668, "grad_norm": 2.4291342046167173, "learning_rate": 9.076791244182788e-06, "loss": 1.088, "step": 6088 }, { "epoch": 0.22068790547642347, "grad_norm": 2.4511333738686716, "learning_rate": 9.076451409965344e-06, "loss": 0.9043, "step": 6089 }, { "epoch": 0.22072414917908015, "grad_norm": 2.479352590834229, "learning_rate": 9.076111519576865e-06, "loss": 0.8963, "step": 6090 }, { "epoch": 0.2207603928817368, "grad_norm": 2.2568199294364106, "learning_rate": 9.075771573022034e-06, "loss": 0.8262, "step": 6091 }, { "epoch": 0.22079663658439347, "grad_norm": 2.621257247699314, "learning_rate": 9.075431570305536e-06, "loss": 1.0944, "step": 6092 }, { "epoch": 0.22083288028705012, "grad_norm": 2.514079736405648, "learning_rate": 9.075091511432057e-06, "loss": 1.0651, "step": 6093 }, { "epoch": 0.2208691239897068, "grad_norm": 2.2719254891579492, "learning_rate": 9.074751396406279e-06, "loss": 1.0597, "step": 6094 }, { "epoch": 0.22090536769236346, "grad_norm": 2.1427774539587454, "learning_rate": 9.074411225232894e-06, "loss": 0.8835, "step": 6095 }, { "epoch": 0.2209416113950201, "grad_norm": 2.5132269578754642, "learning_rate": 9.074070997916584e-06, "loss": 1.0142, "step": 6096 }, { "epoch": 0.22097785509767678, "grad_norm": 2.385538967736365, "learning_rate": 9.073730714462042e-06, "loss": 1.195, "step": 6097 }, { "epoch": 0.22101409880033343, "grad_norm": 2.1587920517509818, "learning_rate": 9.073390374873951e-06, "loss": 1.0703, "step": 6098 }, { "epoch": 0.2210503425029901, "grad_norm": 2.3729195260637543, "learning_rate": 9.073049979157007e-06, "loss": 1.0091, "step": 6099 }, { "epoch": 0.22108658620564678, "grad_norm": 2.524216517217378, "learning_rate": 9.072709527315897e-06, "loss": 1.0157, "step": 6100 }, { "epoch": 0.22112282990830343, "grad_norm": 2.8754152447025265, "learning_rate": 9.072369019355311e-06, "loss": 1.0908, "step": 6101 }, { "epoch": 0.2211590736109601, "grad_norm": 2.064158853857208, "learning_rate": 9.072028455279945e-06, "loss": 0.8549, "step": 6102 }, { "epoch": 0.22119531731361675, "grad_norm": 2.4226495840791418, "learning_rate": 9.07168783509449e-06, "loss": 0.9934, "step": 6103 }, { "epoch": 0.22123156101627342, "grad_norm": 2.616867070860191, "learning_rate": 9.071347158803639e-06, "loss": 1.0717, "step": 6104 }, { "epoch": 0.2212678047189301, "grad_norm": 2.4492078294889934, "learning_rate": 9.071006426412083e-06, "loss": 1.0122, "step": 6105 }, { "epoch": 0.22130404842158674, "grad_norm": 2.483823546365654, "learning_rate": 9.070665637924525e-06, "loss": 0.9108, "step": 6106 }, { "epoch": 0.22134029212424342, "grad_norm": 2.4503739601316252, "learning_rate": 9.070324793345654e-06, "loss": 0.9868, "step": 6107 }, { "epoch": 0.2213765358269001, "grad_norm": 2.525615837445821, "learning_rate": 9.069983892680169e-06, "loss": 0.9952, "step": 6108 }, { "epoch": 0.22141277952955674, "grad_norm": 2.5971158549018427, "learning_rate": 9.069642935932767e-06, "loss": 0.8929, "step": 6109 }, { "epoch": 0.2214490232322134, "grad_norm": 2.1322075667806897, "learning_rate": 9.069301923108148e-06, "loss": 0.8255, "step": 6110 }, { "epoch": 0.22148526693487006, "grad_norm": 2.358094266750842, "learning_rate": 9.068960854211009e-06, "loss": 0.9305, "step": 6111 }, { "epoch": 0.22152151063752673, "grad_norm": 2.279777426443994, "learning_rate": 9.06861972924605e-06, "loss": 0.7752, "step": 6112 }, { "epoch": 0.2215577543401834, "grad_norm": 2.4665921092850014, "learning_rate": 9.068278548217973e-06, "loss": 1.0313, "step": 6113 }, { "epoch": 0.22159399804284005, "grad_norm": 2.501219054511207, "learning_rate": 9.067937311131476e-06, "loss": 1.0327, "step": 6114 }, { "epoch": 0.22163024174549673, "grad_norm": 2.6484605430834347, "learning_rate": 9.067596017991263e-06, "loss": 0.9424, "step": 6115 }, { "epoch": 0.22166648544815337, "grad_norm": 2.349874469871311, "learning_rate": 9.06725466880204e-06, "loss": 0.9637, "step": 6116 }, { "epoch": 0.22170272915081005, "grad_norm": 2.2582870787080096, "learning_rate": 9.066913263568505e-06, "loss": 0.986, "step": 6117 }, { "epoch": 0.22173897285346672, "grad_norm": 2.4284743357275804, "learning_rate": 9.066571802295363e-06, "loss": 1.0781, "step": 6118 }, { "epoch": 0.22177521655612337, "grad_norm": 2.580040289801774, "learning_rate": 9.066230284987325e-06, "loss": 1.0871, "step": 6119 }, { "epoch": 0.22181146025878004, "grad_norm": 2.441008402400702, "learning_rate": 9.06588871164909e-06, "loss": 1.0471, "step": 6120 }, { "epoch": 0.2218477039614367, "grad_norm": 2.7181190417884653, "learning_rate": 9.065547082285368e-06, "loss": 1.1137, "step": 6121 }, { "epoch": 0.22188394766409336, "grad_norm": 2.0624416465254494, "learning_rate": 9.065205396900867e-06, "loss": 0.9977, "step": 6122 }, { "epoch": 0.22192019136675004, "grad_norm": 2.668448342268396, "learning_rate": 9.064863655500293e-06, "loss": 0.8777, "step": 6123 }, { "epoch": 0.22195643506940668, "grad_norm": 2.4505110097753353, "learning_rate": 9.064521858088357e-06, "loss": 0.9652, "step": 6124 }, { "epoch": 0.22199267877206336, "grad_norm": 2.3406634519334584, "learning_rate": 9.064180004669768e-06, "loss": 0.987, "step": 6125 }, { "epoch": 0.22202892247472003, "grad_norm": 2.5402933981262703, "learning_rate": 9.063838095249236e-06, "loss": 0.9852, "step": 6126 }, { "epoch": 0.22206516617737668, "grad_norm": 2.383168942618362, "learning_rate": 9.063496129831473e-06, "loss": 1.0225, "step": 6127 }, { "epoch": 0.22210140988003335, "grad_norm": 2.317777653062581, "learning_rate": 9.063154108421192e-06, "loss": 0.8311, "step": 6128 }, { "epoch": 0.22213765358269, "grad_norm": 2.3256766429938116, "learning_rate": 9.062812031023102e-06, "loss": 0.8963, "step": 6129 }, { "epoch": 0.22217389728534667, "grad_norm": 2.6115105207792237, "learning_rate": 9.062469897641921e-06, "loss": 0.7786, "step": 6130 }, { "epoch": 0.22221014098800335, "grad_norm": 2.398327967921238, "learning_rate": 9.062127708282362e-06, "loss": 1.0539, "step": 6131 }, { "epoch": 0.22224638469066, "grad_norm": 2.591116994485799, "learning_rate": 9.06178546294914e-06, "loss": 0.9106, "step": 6132 }, { "epoch": 0.22228262839331667, "grad_norm": 2.294335483705057, "learning_rate": 9.06144316164697e-06, "loss": 0.7661, "step": 6133 }, { "epoch": 0.2223188720959733, "grad_norm": 2.7732981975772213, "learning_rate": 9.06110080438057e-06, "loss": 0.9527, "step": 6134 }, { "epoch": 0.22235511579863, "grad_norm": 2.401267237816546, "learning_rate": 9.060758391154656e-06, "loss": 0.9453, "step": 6135 }, { "epoch": 0.22239135950128666, "grad_norm": 2.459494987027926, "learning_rate": 9.060415921973948e-06, "loss": 0.9246, "step": 6136 }, { "epoch": 0.2224276032039433, "grad_norm": 2.464392941464041, "learning_rate": 9.060073396843165e-06, "loss": 0.9924, "step": 6137 }, { "epoch": 0.22246384690659998, "grad_norm": 2.549071650676491, "learning_rate": 9.059730815767024e-06, "loss": 1.0587, "step": 6138 }, { "epoch": 0.22250009060925663, "grad_norm": 2.498555765174897, "learning_rate": 9.05938817875025e-06, "loss": 0.9021, "step": 6139 }, { "epoch": 0.2225363343119133, "grad_norm": 2.203624402449267, "learning_rate": 9.059045485797562e-06, "loss": 0.8765, "step": 6140 }, { "epoch": 0.22257257801456998, "grad_norm": 2.404649253961999, "learning_rate": 9.05870273691368e-06, "loss": 0.966, "step": 6141 }, { "epoch": 0.22260882171722662, "grad_norm": 2.282863101282474, "learning_rate": 9.05835993210333e-06, "loss": 0.807, "step": 6142 }, { "epoch": 0.2226450654198833, "grad_norm": 2.4007242912617, "learning_rate": 9.058017071371235e-06, "loss": 0.9411, "step": 6143 }, { "epoch": 0.22268130912253997, "grad_norm": 2.355846667754482, "learning_rate": 9.057674154722118e-06, "loss": 0.9233, "step": 6144 }, { "epoch": 0.22271755282519662, "grad_norm": 2.492158372857093, "learning_rate": 9.057331182160705e-06, "loss": 1.1025, "step": 6145 }, { "epoch": 0.2227537965278533, "grad_norm": 2.1864408660094083, "learning_rate": 9.056988153691723e-06, "loss": 0.9045, "step": 6146 }, { "epoch": 0.22279004023050994, "grad_norm": 2.07036616063768, "learning_rate": 9.056645069319898e-06, "loss": 0.9897, "step": 6147 }, { "epoch": 0.2228262839331666, "grad_norm": 2.7669819140861587, "learning_rate": 9.056301929049956e-06, "loss": 0.8394, "step": 6148 }, { "epoch": 0.2228625276358233, "grad_norm": 2.7162690865500507, "learning_rate": 9.055958732886628e-06, "loss": 1.024, "step": 6149 }, { "epoch": 0.22289877133847993, "grad_norm": 2.389562509570053, "learning_rate": 9.05561548083464e-06, "loss": 1.0474, "step": 6150 }, { "epoch": 0.2229350150411366, "grad_norm": 2.3333919665873992, "learning_rate": 9.055272172898723e-06, "loss": 0.9111, "step": 6151 }, { "epoch": 0.22297125874379325, "grad_norm": 2.0205767902261105, "learning_rate": 9.05492880908361e-06, "loss": 0.8258, "step": 6152 }, { "epoch": 0.22300750244644993, "grad_norm": 2.2902247887624982, "learning_rate": 9.054585389394031e-06, "loss": 0.7696, "step": 6153 }, { "epoch": 0.2230437461491066, "grad_norm": 2.4215410931760797, "learning_rate": 9.054241913834717e-06, "loss": 1.0789, "step": 6154 }, { "epoch": 0.22307998985176325, "grad_norm": 2.4338223876229588, "learning_rate": 9.0538983824104e-06, "loss": 0.9775, "step": 6155 }, { "epoch": 0.22311623355441992, "grad_norm": 2.2637531396032005, "learning_rate": 9.053554795125815e-06, "loss": 0.9488, "step": 6156 }, { "epoch": 0.22315247725707657, "grad_norm": 2.2231798936958502, "learning_rate": 9.053211151985697e-06, "loss": 0.7814, "step": 6157 }, { "epoch": 0.22318872095973324, "grad_norm": 2.3862105755680916, "learning_rate": 9.05286745299478e-06, "loss": 0.9313, "step": 6158 }, { "epoch": 0.22322496466238992, "grad_norm": 2.1800314650567536, "learning_rate": 9.052523698157801e-06, "loss": 0.802, "step": 6159 }, { "epoch": 0.22326120836504657, "grad_norm": 2.0203845661163498, "learning_rate": 9.052179887479498e-06, "loss": 0.8032, "step": 6160 }, { "epoch": 0.22329745206770324, "grad_norm": 2.4827213056013955, "learning_rate": 9.051836020964605e-06, "loss": 0.9875, "step": 6161 }, { "epoch": 0.2233336957703599, "grad_norm": 2.3327796657286584, "learning_rate": 9.051492098617862e-06, "loss": 0.9337, "step": 6162 }, { "epoch": 0.22336993947301656, "grad_norm": 2.2999559798884213, "learning_rate": 9.05114812044401e-06, "loss": 0.8825, "step": 6163 }, { "epoch": 0.22340618317567323, "grad_norm": 2.426078142671979, "learning_rate": 9.050804086447784e-06, "loss": 0.9712, "step": 6164 }, { "epoch": 0.22344242687832988, "grad_norm": 2.3928548074774545, "learning_rate": 9.05045999663393e-06, "loss": 1.061, "step": 6165 }, { "epoch": 0.22347867058098655, "grad_norm": 2.3458972212783413, "learning_rate": 9.050115851007186e-06, "loss": 0.9992, "step": 6166 }, { "epoch": 0.22351491428364323, "grad_norm": 2.546374350432349, "learning_rate": 9.049771649572295e-06, "loss": 1.0389, "step": 6167 }, { "epoch": 0.22355115798629988, "grad_norm": 2.44111296075598, "learning_rate": 9.049427392334e-06, "loss": 1.0867, "step": 6168 }, { "epoch": 0.22358740168895655, "grad_norm": 2.315021509499791, "learning_rate": 9.049083079297042e-06, "loss": 0.9066, "step": 6169 }, { "epoch": 0.2236236453916132, "grad_norm": 2.2059471384679914, "learning_rate": 9.048738710466172e-06, "loss": 1.0542, "step": 6170 }, { "epoch": 0.22365988909426987, "grad_norm": 2.3228106627784904, "learning_rate": 9.04839428584613e-06, "loss": 1.1126, "step": 6171 }, { "epoch": 0.22369613279692654, "grad_norm": 2.2752467042973583, "learning_rate": 9.048049805441664e-06, "loss": 0.9001, "step": 6172 }, { "epoch": 0.2237323764995832, "grad_norm": 2.2022939890116016, "learning_rate": 9.047705269257517e-06, "loss": 0.9547, "step": 6173 }, { "epoch": 0.22376862020223987, "grad_norm": 2.091218340909765, "learning_rate": 9.04736067729844e-06, "loss": 0.9567, "step": 6174 }, { "epoch": 0.2238048639048965, "grad_norm": 2.343266001428021, "learning_rate": 9.047016029569182e-06, "loss": 0.8391, "step": 6175 }, { "epoch": 0.22384110760755319, "grad_norm": 2.6391943068585872, "learning_rate": 9.046671326074491e-06, "loss": 1.0526, "step": 6176 }, { "epoch": 0.22387735131020986, "grad_norm": 2.355912287140442, "learning_rate": 9.046326566819117e-06, "loss": 1.0748, "step": 6177 }, { "epoch": 0.2239135950128665, "grad_norm": 2.7521433894650675, "learning_rate": 9.045981751807807e-06, "loss": 1.1088, "step": 6178 }, { "epoch": 0.22394983871552318, "grad_norm": 2.671982505544763, "learning_rate": 9.045636881045318e-06, "loss": 0.9489, "step": 6179 }, { "epoch": 0.22398608241817985, "grad_norm": 2.271167124598486, "learning_rate": 9.045291954536399e-06, "loss": 1.1574, "step": 6180 }, { "epoch": 0.2240223261208365, "grad_norm": 2.6502864245107145, "learning_rate": 9.044946972285803e-06, "loss": 0.8517, "step": 6181 }, { "epoch": 0.22405856982349318, "grad_norm": 2.31433118882768, "learning_rate": 9.044601934298284e-06, "loss": 1.0019, "step": 6182 }, { "epoch": 0.22409481352614982, "grad_norm": 2.3297868079627087, "learning_rate": 9.044256840578596e-06, "loss": 0.8378, "step": 6183 }, { "epoch": 0.2241310572288065, "grad_norm": 2.4790713964080844, "learning_rate": 9.043911691131495e-06, "loss": 0.9122, "step": 6184 }, { "epoch": 0.22416730093146317, "grad_norm": 2.486505912322703, "learning_rate": 9.043566485961736e-06, "loss": 0.9812, "step": 6185 }, { "epoch": 0.22420354463411982, "grad_norm": 2.4659471061936444, "learning_rate": 9.043221225074078e-06, "loss": 0.9137, "step": 6186 }, { "epoch": 0.2242397883367765, "grad_norm": 2.584068690010154, "learning_rate": 9.042875908473276e-06, "loss": 0.9994, "step": 6187 }, { "epoch": 0.22427603203943314, "grad_norm": 2.1654231977201253, "learning_rate": 9.042530536164089e-06, "loss": 1.0145, "step": 6188 }, { "epoch": 0.2243122757420898, "grad_norm": 2.700976640300115, "learning_rate": 9.042185108151276e-06, "loss": 0.9565, "step": 6189 }, { "epoch": 0.22434851944474649, "grad_norm": 2.377786625684284, "learning_rate": 9.041839624439598e-06, "loss": 0.8921, "step": 6190 }, { "epoch": 0.22438476314740313, "grad_norm": 2.531942494749707, "learning_rate": 9.041494085033812e-06, "loss": 1.0651, "step": 6191 }, { "epoch": 0.2244210068500598, "grad_norm": 2.413509301251167, "learning_rate": 9.041148489938683e-06, "loss": 1.0946, "step": 6192 }, { "epoch": 0.22445725055271645, "grad_norm": 2.424688585677204, "learning_rate": 9.04080283915897e-06, "loss": 1.0049, "step": 6193 }, { "epoch": 0.22449349425537313, "grad_norm": 2.4846050856517157, "learning_rate": 9.040457132699441e-06, "loss": 0.974, "step": 6194 }, { "epoch": 0.2245297379580298, "grad_norm": 2.1769475609308278, "learning_rate": 9.040111370564855e-06, "loss": 0.7917, "step": 6195 }, { "epoch": 0.22456598166068645, "grad_norm": 2.3551957341440444, "learning_rate": 9.039765552759976e-06, "loss": 1.044, "step": 6196 }, { "epoch": 0.22460222536334312, "grad_norm": 2.264388020685208, "learning_rate": 9.039419679289574e-06, "loss": 0.8771, "step": 6197 }, { "epoch": 0.2246384690659998, "grad_norm": 2.3420148687918925, "learning_rate": 9.03907375015841e-06, "loss": 0.9826, "step": 6198 }, { "epoch": 0.22467471276865644, "grad_norm": 2.2222728727711982, "learning_rate": 9.03872776537125e-06, "loss": 0.979, "step": 6199 }, { "epoch": 0.22471095647131312, "grad_norm": 2.3501594271165147, "learning_rate": 9.038381724932868e-06, "loss": 1.1007, "step": 6200 }, { "epoch": 0.22474720017396976, "grad_norm": 2.3494762725394915, "learning_rate": 9.038035628848027e-06, "loss": 0.9552, "step": 6201 }, { "epoch": 0.22478344387662644, "grad_norm": 2.584022751029124, "learning_rate": 9.037689477121496e-06, "loss": 1.0308, "step": 6202 }, { "epoch": 0.2248196875792831, "grad_norm": 2.1419556721188227, "learning_rate": 9.037343269758047e-06, "loss": 0.7959, "step": 6203 }, { "epoch": 0.22485593128193976, "grad_norm": 2.520645196658495, "learning_rate": 9.036997006762449e-06, "loss": 1.0135, "step": 6204 }, { "epoch": 0.22489217498459643, "grad_norm": 2.3197121563213234, "learning_rate": 9.036650688139474e-06, "loss": 1.0766, "step": 6205 }, { "epoch": 0.22492841868725308, "grad_norm": 2.445837433332088, "learning_rate": 9.036304313893893e-06, "loss": 1.0419, "step": 6206 }, { "epoch": 0.22496466238990975, "grad_norm": 2.294684313554331, "learning_rate": 9.035957884030481e-06, "loss": 0.9012, "step": 6207 }, { "epoch": 0.22500090609256643, "grad_norm": 2.0895257122010564, "learning_rate": 9.03561139855401e-06, "loss": 0.9035, "step": 6208 }, { "epoch": 0.22503714979522307, "grad_norm": 2.1541473774752964, "learning_rate": 9.035264857469253e-06, "loss": 1.0161, "step": 6209 }, { "epoch": 0.22507339349787975, "grad_norm": 1.9982802490869946, "learning_rate": 9.034918260780988e-06, "loss": 0.9141, "step": 6210 }, { "epoch": 0.2251096372005364, "grad_norm": 2.4347642469798587, "learning_rate": 9.034571608493989e-06, "loss": 1.0326, "step": 6211 }, { "epoch": 0.22514588090319307, "grad_norm": 2.2960853325436714, "learning_rate": 9.034224900613034e-06, "loss": 1.0997, "step": 6212 }, { "epoch": 0.22518212460584974, "grad_norm": 2.1328490151608674, "learning_rate": 9.0338781371429e-06, "loss": 0.8229, "step": 6213 }, { "epoch": 0.2252183683085064, "grad_norm": 2.282582053332448, "learning_rate": 9.033531318088364e-06, "loss": 1.0553, "step": 6214 }, { "epoch": 0.22525461201116306, "grad_norm": 2.4705142012185908, "learning_rate": 9.033184443454206e-06, "loss": 0.8207, "step": 6215 }, { "epoch": 0.22529085571381974, "grad_norm": 2.1322957422801565, "learning_rate": 9.032837513245205e-06, "loss": 0.9315, "step": 6216 }, { "epoch": 0.22532709941647638, "grad_norm": 2.487116609670879, "learning_rate": 9.032490527466144e-06, "loss": 1.1121, "step": 6217 }, { "epoch": 0.22536334311913306, "grad_norm": 2.2643809768069345, "learning_rate": 9.0321434861218e-06, "loss": 0.9249, "step": 6218 }, { "epoch": 0.2253995868217897, "grad_norm": 2.303133204444585, "learning_rate": 9.031796389216958e-06, "loss": 1.0305, "step": 6219 }, { "epoch": 0.22543583052444638, "grad_norm": 2.471254538680335, "learning_rate": 9.0314492367564e-06, "loss": 1.1249, "step": 6220 }, { "epoch": 0.22547207422710305, "grad_norm": 2.148754775678757, "learning_rate": 9.031102028744911e-06, "loss": 0.9284, "step": 6221 }, { "epoch": 0.2255083179297597, "grad_norm": 2.4564790147336812, "learning_rate": 9.030754765187273e-06, "loss": 0.8638, "step": 6222 }, { "epoch": 0.22554456163241637, "grad_norm": 2.4411065771691627, "learning_rate": 9.030407446088272e-06, "loss": 0.9371, "step": 6223 }, { "epoch": 0.22558080533507302, "grad_norm": 2.5023871628815186, "learning_rate": 9.030060071452695e-06, "loss": 0.9745, "step": 6224 }, { "epoch": 0.2256170490377297, "grad_norm": 2.211137622718547, "learning_rate": 9.029712641285326e-06, "loss": 0.9451, "step": 6225 }, { "epoch": 0.22565329274038637, "grad_norm": 2.566133565013416, "learning_rate": 9.029365155590955e-06, "loss": 0.9573, "step": 6226 }, { "epoch": 0.22568953644304302, "grad_norm": 2.2941914461910584, "learning_rate": 9.029017614374368e-06, "loss": 0.8425, "step": 6227 }, { "epoch": 0.2257257801456997, "grad_norm": 2.5183923833725643, "learning_rate": 9.028670017640356e-06, "loss": 0.9627, "step": 6228 }, { "epoch": 0.22576202384835634, "grad_norm": 2.268666740017516, "learning_rate": 9.028322365393706e-06, "loss": 0.9787, "step": 6229 }, { "epoch": 0.225798267551013, "grad_norm": 2.403248125629753, "learning_rate": 9.02797465763921e-06, "loss": 0.9781, "step": 6230 }, { "epoch": 0.22583451125366968, "grad_norm": 2.592538957297723, "learning_rate": 9.027626894381663e-06, "loss": 1.0227, "step": 6231 }, { "epoch": 0.22587075495632633, "grad_norm": 2.475994693714603, "learning_rate": 9.02727907562585e-06, "loss": 1.017, "step": 6232 }, { "epoch": 0.225906998658983, "grad_norm": 2.5384300407537674, "learning_rate": 9.026931201376567e-06, "loss": 1.0593, "step": 6233 }, { "epoch": 0.22594324236163965, "grad_norm": 2.561695409897901, "learning_rate": 9.026583271638609e-06, "loss": 1.0449, "step": 6234 }, { "epoch": 0.22597948606429633, "grad_norm": 2.43427582751826, "learning_rate": 9.026235286416767e-06, "loss": 0.8823, "step": 6235 }, { "epoch": 0.226015729766953, "grad_norm": 2.595000032150337, "learning_rate": 9.025887245715839e-06, "loss": 0.9981, "step": 6236 }, { "epoch": 0.22605197346960965, "grad_norm": 2.043071266965665, "learning_rate": 9.025539149540618e-06, "loss": 0.9568, "step": 6237 }, { "epoch": 0.22608821717226632, "grad_norm": 2.1731471813351146, "learning_rate": 9.025190997895902e-06, "loss": 0.946, "step": 6238 }, { "epoch": 0.226124460874923, "grad_norm": 2.6482578835892676, "learning_rate": 9.02484279078649e-06, "loss": 0.9931, "step": 6239 }, { "epoch": 0.22616070457757964, "grad_norm": 2.232449632758378, "learning_rate": 9.024494528217178e-06, "loss": 0.9897, "step": 6240 }, { "epoch": 0.22619694828023632, "grad_norm": 2.251379023176138, "learning_rate": 9.024146210192765e-06, "loss": 0.9808, "step": 6241 }, { "epoch": 0.22623319198289296, "grad_norm": 2.289308509160195, "learning_rate": 9.023797836718053e-06, "loss": 0.9538, "step": 6242 }, { "epoch": 0.22626943568554964, "grad_norm": 2.1417887042280044, "learning_rate": 9.023449407797837e-06, "loss": 0.8702, "step": 6243 }, { "epoch": 0.2263056793882063, "grad_norm": 2.3676155265737107, "learning_rate": 9.023100923436925e-06, "loss": 0.8895, "step": 6244 }, { "epoch": 0.22634192309086296, "grad_norm": 2.359885250843821, "learning_rate": 9.022752383640112e-06, "loss": 0.9956, "step": 6245 }, { "epoch": 0.22637816679351963, "grad_norm": 2.3145125635171735, "learning_rate": 9.022403788412206e-06, "loss": 0.97, "step": 6246 }, { "epoch": 0.22641441049617628, "grad_norm": 2.110635435368423, "learning_rate": 9.022055137758008e-06, "loss": 0.978, "step": 6247 }, { "epoch": 0.22645065419883295, "grad_norm": 2.3617290714050987, "learning_rate": 9.021706431682322e-06, "loss": 1.0109, "step": 6248 }, { "epoch": 0.22648689790148963, "grad_norm": 2.3418779896790385, "learning_rate": 9.021357670189954e-06, "loss": 0.9886, "step": 6249 }, { "epoch": 0.22652314160414627, "grad_norm": 2.1794684248659464, "learning_rate": 9.021008853285711e-06, "loss": 1.0446, "step": 6250 }, { "epoch": 0.22655938530680295, "grad_norm": 2.3493835590172547, "learning_rate": 9.020659980974395e-06, "loss": 1.0835, "step": 6251 }, { "epoch": 0.2265956290094596, "grad_norm": 2.6701204439447976, "learning_rate": 9.020311053260818e-06, "loss": 0.9053, "step": 6252 }, { "epoch": 0.22663187271211627, "grad_norm": 2.233550093117925, "learning_rate": 9.019962070149786e-06, "loss": 0.8354, "step": 6253 }, { "epoch": 0.22666811641477294, "grad_norm": 2.305839591715515, "learning_rate": 9.019613031646108e-06, "loss": 0.9439, "step": 6254 }, { "epoch": 0.2267043601174296, "grad_norm": 2.440803899531916, "learning_rate": 9.019263937754591e-06, "loss": 1.0981, "step": 6255 }, { "epoch": 0.22674060382008626, "grad_norm": 2.2414179539092376, "learning_rate": 9.018914788480051e-06, "loss": 1.0932, "step": 6256 }, { "epoch": 0.22677684752274294, "grad_norm": 2.407785456405904, "learning_rate": 9.018565583827294e-06, "loss": 0.8939, "step": 6257 }, { "epoch": 0.22681309122539958, "grad_norm": 2.0873662526515853, "learning_rate": 9.018216323801134e-06, "loss": 0.8999, "step": 6258 }, { "epoch": 0.22684933492805626, "grad_norm": 2.560430130380615, "learning_rate": 9.017867008406382e-06, "loss": 0.9185, "step": 6259 }, { "epoch": 0.2268855786307129, "grad_norm": 2.188843098988386, "learning_rate": 9.017517637647854e-06, "loss": 0.7448, "step": 6260 }, { "epoch": 0.22692182233336958, "grad_norm": 2.325149993585923, "learning_rate": 9.017168211530363e-06, "loss": 0.9706, "step": 6261 }, { "epoch": 0.22695806603602625, "grad_norm": 2.512274291830374, "learning_rate": 9.016818730058723e-06, "loss": 1.0245, "step": 6262 }, { "epoch": 0.2269943097386829, "grad_norm": 2.688404289535226, "learning_rate": 9.01646919323775e-06, "loss": 0.8535, "step": 6263 }, { "epoch": 0.22703055344133957, "grad_norm": 2.5512927504214877, "learning_rate": 9.016119601072262e-06, "loss": 1.002, "step": 6264 }, { "epoch": 0.22706679714399622, "grad_norm": 2.4909608987793352, "learning_rate": 9.015769953567072e-06, "loss": 0.9189, "step": 6265 }, { "epoch": 0.2271030408466529, "grad_norm": 2.2955036542904264, "learning_rate": 9.015420250727002e-06, "loss": 0.8118, "step": 6266 }, { "epoch": 0.22713928454930957, "grad_norm": 2.7318489425175567, "learning_rate": 9.01507049255687e-06, "loss": 1.0499, "step": 6267 }, { "epoch": 0.2271755282519662, "grad_norm": 2.3248873838819963, "learning_rate": 9.014720679061494e-06, "loss": 0.8763, "step": 6268 }, { "epoch": 0.2272117719546229, "grad_norm": 2.643071632036423, "learning_rate": 9.014370810245696e-06, "loss": 0.9386, "step": 6269 }, { "epoch": 0.22724801565727953, "grad_norm": 2.753645825997022, "learning_rate": 9.014020886114297e-06, "loss": 0.9932, "step": 6270 }, { "epoch": 0.2272842593599362, "grad_norm": 2.2073676883114692, "learning_rate": 9.013670906672115e-06, "loss": 1.0542, "step": 6271 }, { "epoch": 0.22732050306259288, "grad_norm": 2.361823576261924, "learning_rate": 9.013320871923976e-06, "loss": 1.2599, "step": 6272 }, { "epoch": 0.22735674676524953, "grad_norm": 2.557992434958467, "learning_rate": 9.012970781874704e-06, "loss": 1.0443, "step": 6273 }, { "epoch": 0.2273929904679062, "grad_norm": 2.5006184182275843, "learning_rate": 9.012620636529121e-06, "loss": 0.9795, "step": 6274 }, { "epoch": 0.22742923417056288, "grad_norm": 2.0959273931807463, "learning_rate": 9.012270435892052e-06, "loss": 1.0154, "step": 6275 }, { "epoch": 0.22746547787321952, "grad_norm": 2.5853646910820065, "learning_rate": 9.011920179968324e-06, "loss": 1.0548, "step": 6276 }, { "epoch": 0.2275017215758762, "grad_norm": 2.3454849220771474, "learning_rate": 9.01156986876276e-06, "loss": 0.9117, "step": 6277 }, { "epoch": 0.22753796527853284, "grad_norm": 2.6245902178476275, "learning_rate": 9.01121950228019e-06, "loss": 1.0734, "step": 6278 }, { "epoch": 0.22757420898118952, "grad_norm": 2.2072467224538337, "learning_rate": 9.010869080525443e-06, "loss": 0.8914, "step": 6279 }, { "epoch": 0.2276104526838462, "grad_norm": 2.1198816103082194, "learning_rate": 9.010518603503344e-06, "loss": 0.9074, "step": 6280 }, { "epoch": 0.22764669638650284, "grad_norm": 2.4718534895790567, "learning_rate": 9.010168071218726e-06, "loss": 1.0262, "step": 6281 }, { "epoch": 0.2276829400891595, "grad_norm": 2.44127797472215, "learning_rate": 9.009817483676416e-06, "loss": 0.8555, "step": 6282 }, { "epoch": 0.22771918379181616, "grad_norm": 2.625107322706347, "learning_rate": 9.009466840881244e-06, "loss": 1.058, "step": 6283 }, { "epoch": 0.22775542749447283, "grad_norm": 2.3216578002536723, "learning_rate": 9.009116142838046e-06, "loss": 1.0165, "step": 6284 }, { "epoch": 0.2277916711971295, "grad_norm": 2.4971024764876493, "learning_rate": 9.008765389551652e-06, "loss": 1.0119, "step": 6285 }, { "epoch": 0.22782791489978615, "grad_norm": 2.345318594689, "learning_rate": 9.008414581026897e-06, "loss": 0.8956, "step": 6286 }, { "epoch": 0.22786415860244283, "grad_norm": 2.2693892938758684, "learning_rate": 9.008063717268611e-06, "loss": 0.9385, "step": 6287 }, { "epoch": 0.22790040230509948, "grad_norm": 2.3844707653289356, "learning_rate": 9.007712798281632e-06, "loss": 0.8766, "step": 6288 }, { "epoch": 0.22793664600775615, "grad_norm": 2.484238680265828, "learning_rate": 9.007361824070795e-06, "loss": 0.9765, "step": 6289 }, { "epoch": 0.22797288971041282, "grad_norm": 2.222220976659831, "learning_rate": 9.007010794640933e-06, "loss": 0.9112, "step": 6290 }, { "epoch": 0.22800913341306947, "grad_norm": 2.296139182966194, "learning_rate": 9.006659709996888e-06, "loss": 0.9064, "step": 6291 }, { "epoch": 0.22804537711572614, "grad_norm": 2.17293457914546, "learning_rate": 9.006308570143497e-06, "loss": 0.9592, "step": 6292 }, { "epoch": 0.22808162081838282, "grad_norm": 2.572131857600632, "learning_rate": 9.005957375085592e-06, "loss": 1.0077, "step": 6293 }, { "epoch": 0.22811786452103947, "grad_norm": 2.5869880856565035, "learning_rate": 9.00560612482802e-06, "loss": 1.0617, "step": 6294 }, { "epoch": 0.22815410822369614, "grad_norm": 2.3073429667860226, "learning_rate": 9.005254819375618e-06, "loss": 0.9276, "step": 6295 }, { "epoch": 0.22819035192635279, "grad_norm": 2.2263004489966356, "learning_rate": 9.004903458733228e-06, "loss": 0.8719, "step": 6296 }, { "epoch": 0.22822659562900946, "grad_norm": 2.7763634299888365, "learning_rate": 9.004552042905687e-06, "loss": 1.0182, "step": 6297 }, { "epoch": 0.22826283933166613, "grad_norm": 2.4762809425982835, "learning_rate": 9.004200571897843e-06, "loss": 1.1531, "step": 6298 }, { "epoch": 0.22829908303432278, "grad_norm": 2.6650645369205077, "learning_rate": 9.003849045714537e-06, "loss": 0.9736, "step": 6299 }, { "epoch": 0.22833532673697945, "grad_norm": 2.5786367305283067, "learning_rate": 9.003497464360614e-06, "loss": 1.1573, "step": 6300 }, { "epoch": 0.2283715704396361, "grad_norm": 2.3395170255470057, "learning_rate": 9.003145827840914e-06, "loss": 0.9158, "step": 6301 }, { "epoch": 0.22840781414229278, "grad_norm": 2.4398154848108975, "learning_rate": 9.002794136160287e-06, "loss": 1.1075, "step": 6302 }, { "epoch": 0.22844405784494945, "grad_norm": 2.498565425177994, "learning_rate": 9.002442389323578e-06, "loss": 0.9732, "step": 6303 }, { "epoch": 0.2284803015476061, "grad_norm": 2.550684644816408, "learning_rate": 9.002090587335633e-06, "loss": 0.8516, "step": 6304 }, { "epoch": 0.22851654525026277, "grad_norm": 1.8843199013266558, "learning_rate": 9.0017387302013e-06, "loss": 0.8498, "step": 6305 }, { "epoch": 0.22855278895291942, "grad_norm": 2.409292878005279, "learning_rate": 9.001386817925427e-06, "loss": 1.0285, "step": 6306 }, { "epoch": 0.2285890326555761, "grad_norm": 2.4417251233102752, "learning_rate": 9.001034850512864e-06, "loss": 0.8066, "step": 6307 }, { "epoch": 0.22862527635823277, "grad_norm": 2.168547942801484, "learning_rate": 9.00068282796846e-06, "loss": 0.89, "step": 6308 }, { "epoch": 0.2286615200608894, "grad_norm": 2.168404352415893, "learning_rate": 9.000330750297068e-06, "loss": 0.8623, "step": 6309 }, { "epoch": 0.22869776376354609, "grad_norm": 2.5615248732257183, "learning_rate": 8.999978617503536e-06, "loss": 1.0337, "step": 6310 }, { "epoch": 0.22873400746620276, "grad_norm": 2.4953764958217857, "learning_rate": 8.999626429592717e-06, "loss": 1.0269, "step": 6311 }, { "epoch": 0.2287702511688594, "grad_norm": 2.6244293498585485, "learning_rate": 8.999274186569467e-06, "loss": 1.1551, "step": 6312 }, { "epoch": 0.22880649487151608, "grad_norm": 2.490453616518961, "learning_rate": 8.998921888438637e-06, "loss": 1.1385, "step": 6313 }, { "epoch": 0.22884273857417273, "grad_norm": 2.289232401499815, "learning_rate": 8.99856953520508e-06, "loss": 1.0179, "step": 6314 }, { "epoch": 0.2288789822768294, "grad_norm": 2.3709845564755847, "learning_rate": 8.998217126873655e-06, "loss": 0.8717, "step": 6315 }, { "epoch": 0.22891522597948608, "grad_norm": 2.1979074473340625, "learning_rate": 8.997864663449216e-06, "loss": 0.9017, "step": 6316 }, { "epoch": 0.22895146968214272, "grad_norm": 2.318810057770688, "learning_rate": 8.997512144936618e-06, "loss": 0.8351, "step": 6317 }, { "epoch": 0.2289877133847994, "grad_norm": 2.519051016561054, "learning_rate": 8.997159571340722e-06, "loss": 1.0314, "step": 6318 }, { "epoch": 0.22902395708745604, "grad_norm": 2.327925323012611, "learning_rate": 8.996806942666384e-06, "loss": 1.0457, "step": 6319 }, { "epoch": 0.22906020079011272, "grad_norm": 2.5888487783434133, "learning_rate": 8.996454258918462e-06, "loss": 0.915, "step": 6320 }, { "epoch": 0.2290964444927694, "grad_norm": 2.4059983393713225, "learning_rate": 8.996101520101819e-06, "loss": 1.0814, "step": 6321 }, { "epoch": 0.22913268819542604, "grad_norm": 2.370840258435577, "learning_rate": 8.995748726221315e-06, "loss": 0.883, "step": 6322 }, { "epoch": 0.2291689318980827, "grad_norm": 2.7196613194253, "learning_rate": 8.995395877281807e-06, "loss": 0.9159, "step": 6323 }, { "epoch": 0.22920517560073936, "grad_norm": 2.1757964970381725, "learning_rate": 8.995042973288163e-06, "loss": 0.8369, "step": 6324 }, { "epoch": 0.22924141930339603, "grad_norm": 2.304837233625676, "learning_rate": 8.99469001424524e-06, "loss": 0.8422, "step": 6325 }, { "epoch": 0.2292776630060527, "grad_norm": 2.2974249171993435, "learning_rate": 8.994337000157906e-06, "loss": 0.8976, "step": 6326 }, { "epoch": 0.22931390670870935, "grad_norm": 2.3249418023713053, "learning_rate": 8.993983931031023e-06, "loss": 1.126, "step": 6327 }, { "epoch": 0.22935015041136603, "grad_norm": 2.4258970254916057, "learning_rate": 8.993630806869459e-06, "loss": 0.986, "step": 6328 }, { "epoch": 0.2293863941140227, "grad_norm": 2.4841801356347037, "learning_rate": 8.993277627678076e-06, "loss": 1.0117, "step": 6329 }, { "epoch": 0.22942263781667935, "grad_norm": 2.50133809459985, "learning_rate": 8.992924393461745e-06, "loss": 1.0732, "step": 6330 }, { "epoch": 0.22945888151933602, "grad_norm": 2.424770219567302, "learning_rate": 8.992571104225327e-06, "loss": 0.907, "step": 6331 }, { "epoch": 0.22949512522199267, "grad_norm": 2.119402931290144, "learning_rate": 8.992217759973696e-06, "loss": 0.8979, "step": 6332 }, { "epoch": 0.22953136892464934, "grad_norm": 2.261121844511199, "learning_rate": 8.991864360711717e-06, "loss": 1.032, "step": 6333 }, { "epoch": 0.22956761262730602, "grad_norm": 2.646453432712496, "learning_rate": 8.991510906444263e-06, "loss": 1.1538, "step": 6334 }, { "epoch": 0.22960385632996266, "grad_norm": 2.5334069246639284, "learning_rate": 8.991157397176202e-06, "loss": 1.0642, "step": 6335 }, { "epoch": 0.22964010003261934, "grad_norm": 2.164534210187507, "learning_rate": 8.990803832912407e-06, "loss": 1.0383, "step": 6336 }, { "epoch": 0.22967634373527598, "grad_norm": 2.6619043732102434, "learning_rate": 8.990450213657747e-06, "loss": 0.7633, "step": 6337 }, { "epoch": 0.22971258743793266, "grad_norm": 2.587974148688539, "learning_rate": 8.990096539417098e-06, "loss": 0.7773, "step": 6338 }, { "epoch": 0.22974883114058933, "grad_norm": 2.3892972323000907, "learning_rate": 8.98974281019533e-06, "loss": 0.9092, "step": 6339 }, { "epoch": 0.22978507484324598, "grad_norm": 2.328942826783905, "learning_rate": 8.98938902599732e-06, "loss": 1.0851, "step": 6340 }, { "epoch": 0.22982131854590265, "grad_norm": 2.1237216214965895, "learning_rate": 8.989035186827942e-06, "loss": 0.9829, "step": 6341 }, { "epoch": 0.2298575622485593, "grad_norm": 2.1557707952094334, "learning_rate": 8.988681292692071e-06, "loss": 1.0465, "step": 6342 }, { "epoch": 0.22989380595121597, "grad_norm": 2.3607060878059096, "learning_rate": 8.988327343594586e-06, "loss": 0.9584, "step": 6343 }, { "epoch": 0.22993004965387265, "grad_norm": 2.3394728657187693, "learning_rate": 8.98797333954036e-06, "loss": 0.8852, "step": 6344 }, { "epoch": 0.2299662933565293, "grad_norm": 2.605754611879821, "learning_rate": 8.987619280534275e-06, "loss": 0.9436, "step": 6345 }, { "epoch": 0.23000253705918597, "grad_norm": 2.424940005315229, "learning_rate": 8.987265166581206e-06, "loss": 1.1581, "step": 6346 }, { "epoch": 0.23003878076184264, "grad_norm": 2.431152672564522, "learning_rate": 8.986910997686035e-06, "loss": 1.062, "step": 6347 }, { "epoch": 0.2300750244644993, "grad_norm": 2.7370408913470463, "learning_rate": 8.986556773853643e-06, "loss": 1.0508, "step": 6348 }, { "epoch": 0.23011126816715596, "grad_norm": 2.6248391053806577, "learning_rate": 8.986202495088909e-06, "loss": 1.0418, "step": 6349 }, { "epoch": 0.2301475118698126, "grad_norm": 2.07612270590521, "learning_rate": 8.985848161396715e-06, "loss": 0.8128, "step": 6350 }, { "epoch": 0.23018375557246928, "grad_norm": 2.1525929448386916, "learning_rate": 8.985493772781944e-06, "loss": 0.886, "step": 6351 }, { "epoch": 0.23021999927512596, "grad_norm": 2.6036524829055505, "learning_rate": 8.985139329249479e-06, "loss": 1.0227, "step": 6352 }, { "epoch": 0.2302562429777826, "grad_norm": 2.1064332501353507, "learning_rate": 8.984784830804205e-06, "loss": 0.8889, "step": 6353 }, { "epoch": 0.23029248668043928, "grad_norm": 2.087321582257359, "learning_rate": 8.984430277451004e-06, "loss": 0.7138, "step": 6354 }, { "epoch": 0.23032873038309593, "grad_norm": 2.1104376246365972, "learning_rate": 8.984075669194766e-06, "loss": 0.8524, "step": 6355 }, { "epoch": 0.2303649740857526, "grad_norm": 2.390668014539163, "learning_rate": 8.983721006040374e-06, "loss": 0.9358, "step": 6356 }, { "epoch": 0.23040121778840927, "grad_norm": 2.3925502696765313, "learning_rate": 8.983366287992715e-06, "loss": 1.0576, "step": 6357 }, { "epoch": 0.23043746149106592, "grad_norm": 2.492824966295285, "learning_rate": 8.983011515056678e-06, "loss": 0.9164, "step": 6358 }, { "epoch": 0.2304737051937226, "grad_norm": 2.2035195859406036, "learning_rate": 8.98265668723715e-06, "loss": 0.9526, "step": 6359 }, { "epoch": 0.23050994889637924, "grad_norm": 2.11922029424649, "learning_rate": 8.982301804539024e-06, "loss": 0.9415, "step": 6360 }, { "epoch": 0.23054619259903592, "grad_norm": 2.7021529904553625, "learning_rate": 8.981946866967185e-06, "loss": 1.0428, "step": 6361 }, { "epoch": 0.2305824363016926, "grad_norm": 2.447838748051441, "learning_rate": 8.981591874526528e-06, "loss": 0.9908, "step": 6362 }, { "epoch": 0.23061868000434924, "grad_norm": 2.547196491883209, "learning_rate": 8.981236827221942e-06, "loss": 1.0544, "step": 6363 }, { "epoch": 0.2306549237070059, "grad_norm": 2.839849168720179, "learning_rate": 8.980881725058321e-06, "loss": 1.1068, "step": 6364 }, { "epoch": 0.23069116740966258, "grad_norm": 2.3847741099321875, "learning_rate": 8.980526568040558e-06, "loss": 0.9519, "step": 6365 }, { "epoch": 0.23072741111231923, "grad_norm": 2.1855924674626355, "learning_rate": 8.980171356173546e-06, "loss": 0.9525, "step": 6366 }, { "epoch": 0.2307636548149759, "grad_norm": 2.2880074402992556, "learning_rate": 8.979816089462179e-06, "loss": 1.1238, "step": 6367 }, { "epoch": 0.23079989851763255, "grad_norm": 2.1837728816185322, "learning_rate": 8.979460767911354e-06, "loss": 0.8737, "step": 6368 }, { "epoch": 0.23083614222028923, "grad_norm": 2.238072279974069, "learning_rate": 8.979105391525966e-06, "loss": 0.8491, "step": 6369 }, { "epoch": 0.2308723859229459, "grad_norm": 2.4215849598216956, "learning_rate": 8.97874996031091e-06, "loss": 1.1274, "step": 6370 }, { "epoch": 0.23090862962560255, "grad_norm": 2.425408054955892, "learning_rate": 8.97839447427109e-06, "loss": 1.0981, "step": 6371 }, { "epoch": 0.23094487332825922, "grad_norm": 2.1701228125302077, "learning_rate": 8.978038933411397e-06, "loss": 0.6526, "step": 6372 }, { "epoch": 0.23098111703091587, "grad_norm": 2.2766967951597774, "learning_rate": 8.977683337736734e-06, "loss": 1.0051, "step": 6373 }, { "epoch": 0.23101736073357254, "grad_norm": 2.136812262371718, "learning_rate": 8.977327687252001e-06, "loss": 0.8008, "step": 6374 }, { "epoch": 0.23105360443622922, "grad_norm": 2.155794512621101, "learning_rate": 8.976971981962097e-06, "loss": 1.0526, "step": 6375 }, { "epoch": 0.23108984813888586, "grad_norm": 2.621774912546971, "learning_rate": 8.976616221871926e-06, "loss": 1.0845, "step": 6376 }, { "epoch": 0.23112609184154254, "grad_norm": 2.428092944283514, "learning_rate": 8.976260406986386e-06, "loss": 0.9873, "step": 6377 }, { "epoch": 0.23116233554419918, "grad_norm": 2.3939827961410995, "learning_rate": 8.975904537310386e-06, "loss": 0.9959, "step": 6378 }, { "epoch": 0.23119857924685586, "grad_norm": 2.3725083472058093, "learning_rate": 8.975548612848822e-06, "loss": 1.0115, "step": 6379 }, { "epoch": 0.23123482294951253, "grad_norm": 2.4510536951876163, "learning_rate": 8.975192633606604e-06, "loss": 1.0564, "step": 6380 }, { "epoch": 0.23127106665216918, "grad_norm": 2.7650506891448368, "learning_rate": 8.974836599588637e-06, "loss": 0.9941, "step": 6381 }, { "epoch": 0.23130731035482585, "grad_norm": 2.2100722102173296, "learning_rate": 8.974480510799823e-06, "loss": 0.8083, "step": 6382 }, { "epoch": 0.23134355405748253, "grad_norm": 2.628073638300237, "learning_rate": 8.974124367245071e-06, "loss": 0.8852, "step": 6383 }, { "epoch": 0.23137979776013917, "grad_norm": 2.8374021386666266, "learning_rate": 8.973768168929293e-06, "loss": 0.9032, "step": 6384 }, { "epoch": 0.23141604146279585, "grad_norm": 2.3991931868671643, "learning_rate": 8.97341191585739e-06, "loss": 1.0983, "step": 6385 }, { "epoch": 0.2314522851654525, "grad_norm": 2.3076552642995662, "learning_rate": 8.973055608034274e-06, "loss": 1.0519, "step": 6386 }, { "epoch": 0.23148852886810917, "grad_norm": 2.6267519935435533, "learning_rate": 8.972699245464853e-06, "loss": 1.05, "step": 6387 }, { "epoch": 0.23152477257076584, "grad_norm": 2.2647865600779693, "learning_rate": 8.972342828154042e-06, "loss": 0.8906, "step": 6388 }, { "epoch": 0.2315610162734225, "grad_norm": 2.550005898921529, "learning_rate": 8.971986356106747e-06, "loss": 0.9464, "step": 6389 }, { "epoch": 0.23159725997607916, "grad_norm": 2.373762359736169, "learning_rate": 8.971629829327882e-06, "loss": 0.9197, "step": 6390 }, { "epoch": 0.2316335036787358, "grad_norm": 2.3995818704402065, "learning_rate": 8.971273247822361e-06, "loss": 0.8447, "step": 6391 }, { "epoch": 0.23166974738139248, "grad_norm": 2.187413835202255, "learning_rate": 8.970916611595096e-06, "loss": 0.7313, "step": 6392 }, { "epoch": 0.23170599108404916, "grad_norm": 2.618079387420217, "learning_rate": 8.970559920651003e-06, "loss": 1.1331, "step": 6393 }, { "epoch": 0.2317422347867058, "grad_norm": 2.5300739321603136, "learning_rate": 8.970203174994993e-06, "loss": 0.8293, "step": 6394 }, { "epoch": 0.23177847848936248, "grad_norm": 2.3143554742318453, "learning_rate": 8.969846374631987e-06, "loss": 0.8126, "step": 6395 }, { "epoch": 0.23181472219201912, "grad_norm": 2.3687460695754576, "learning_rate": 8.969489519566897e-06, "loss": 0.968, "step": 6396 }, { "epoch": 0.2318509658946758, "grad_norm": 2.1819541914719207, "learning_rate": 8.969132609804643e-06, "loss": 0.8679, "step": 6397 }, { "epoch": 0.23188720959733247, "grad_norm": 2.5238649404000424, "learning_rate": 8.968775645350141e-06, "loss": 1.0391, "step": 6398 }, { "epoch": 0.23192345329998912, "grad_norm": 2.263611541094268, "learning_rate": 8.968418626208313e-06, "loss": 0.917, "step": 6399 }, { "epoch": 0.2319596970026458, "grad_norm": 2.059538307497778, "learning_rate": 8.968061552384076e-06, "loss": 0.8428, "step": 6400 }, { "epoch": 0.23199594070530247, "grad_norm": 2.561604271100288, "learning_rate": 8.96770442388235e-06, "loss": 1.0297, "step": 6401 }, { "epoch": 0.2320321844079591, "grad_norm": 2.6969511932850647, "learning_rate": 8.967347240708056e-06, "loss": 0.856, "step": 6402 }, { "epoch": 0.2320684281106158, "grad_norm": 2.3956010599091124, "learning_rate": 8.966990002866116e-06, "loss": 0.7644, "step": 6403 }, { "epoch": 0.23210467181327243, "grad_norm": 2.57290197938238, "learning_rate": 8.966632710361453e-06, "loss": 1.019, "step": 6404 }, { "epoch": 0.2321409155159291, "grad_norm": 2.2455847199721357, "learning_rate": 8.96627536319899e-06, "loss": 1.1389, "step": 6405 }, { "epoch": 0.23217715921858578, "grad_norm": 2.061822714562297, "learning_rate": 8.965917961383652e-06, "loss": 0.864, "step": 6406 }, { "epoch": 0.23221340292124243, "grad_norm": 2.778613986205169, "learning_rate": 8.965560504920363e-06, "loss": 1.0594, "step": 6407 }, { "epoch": 0.2322496466238991, "grad_norm": 2.199421529346665, "learning_rate": 8.965202993814049e-06, "loss": 1.1267, "step": 6408 }, { "epoch": 0.23228589032655575, "grad_norm": 2.5841150558701016, "learning_rate": 8.964845428069636e-06, "loss": 1.0172, "step": 6409 }, { "epoch": 0.23232213402921242, "grad_norm": 2.293046492086288, "learning_rate": 8.964487807692051e-06, "loss": 0.8689, "step": 6410 }, { "epoch": 0.2323583777318691, "grad_norm": 2.6526230777291895, "learning_rate": 8.96413013268622e-06, "loss": 1.033, "step": 6411 }, { "epoch": 0.23239462143452574, "grad_norm": 2.063974778468226, "learning_rate": 8.963772403057074e-06, "loss": 0.9763, "step": 6412 }, { "epoch": 0.23243086513718242, "grad_norm": 2.3588916333099617, "learning_rate": 8.963414618809541e-06, "loss": 0.9398, "step": 6413 }, { "epoch": 0.23246710883983907, "grad_norm": 2.34201511595647, "learning_rate": 8.963056779948553e-06, "loss": 1.0145, "step": 6414 }, { "epoch": 0.23250335254249574, "grad_norm": 2.542402639051452, "learning_rate": 8.962698886479037e-06, "loss": 0.931, "step": 6415 }, { "epoch": 0.2325395962451524, "grad_norm": 2.458380412862965, "learning_rate": 8.962340938405928e-06, "loss": 0.9045, "step": 6416 }, { "epoch": 0.23257583994780906, "grad_norm": 2.3721158949218095, "learning_rate": 8.961982935734159e-06, "loss": 0.9963, "step": 6417 }, { "epoch": 0.23261208365046573, "grad_norm": 2.044968501704234, "learning_rate": 8.96162487846866e-06, "loss": 0.8214, "step": 6418 }, { "epoch": 0.2326483273531224, "grad_norm": 2.224484644513397, "learning_rate": 8.961266766614366e-06, "loss": 0.9952, "step": 6419 }, { "epoch": 0.23268457105577905, "grad_norm": 2.350212053839673, "learning_rate": 8.96090860017621e-06, "loss": 1.091, "step": 6420 }, { "epoch": 0.23272081475843573, "grad_norm": 2.4036898553962684, "learning_rate": 8.960550379159132e-06, "loss": 0.9962, "step": 6421 }, { "epoch": 0.23275705846109238, "grad_norm": 2.273511222386684, "learning_rate": 8.960192103568064e-06, "loss": 1.0398, "step": 6422 }, { "epoch": 0.23279330216374905, "grad_norm": 2.259533348278372, "learning_rate": 8.959833773407943e-06, "loss": 1.0393, "step": 6423 }, { "epoch": 0.23282954586640572, "grad_norm": 2.833545164637824, "learning_rate": 8.959475388683708e-06, "loss": 0.8933, "step": 6424 }, { "epoch": 0.23286578956906237, "grad_norm": 1.9917325676307194, "learning_rate": 8.959116949400297e-06, "loss": 0.9436, "step": 6425 }, { "epoch": 0.23290203327171904, "grad_norm": 2.6627343311112006, "learning_rate": 8.958758455562649e-06, "loss": 1.0667, "step": 6426 }, { "epoch": 0.2329382769743757, "grad_norm": 2.419198768979547, "learning_rate": 8.958399907175703e-06, "loss": 0.7878, "step": 6427 }, { "epoch": 0.23297452067703237, "grad_norm": 2.2983453548845376, "learning_rate": 8.9580413042444e-06, "loss": 1.0558, "step": 6428 }, { "epoch": 0.23301076437968904, "grad_norm": 2.6227982199491304, "learning_rate": 8.957682646773682e-06, "loss": 0.9054, "step": 6429 }, { "epoch": 0.23304700808234569, "grad_norm": 2.1962377763139234, "learning_rate": 8.957323934768492e-06, "loss": 1.0327, "step": 6430 }, { "epoch": 0.23308325178500236, "grad_norm": 2.545282275608773, "learning_rate": 8.956965168233769e-06, "loss": 1.0466, "step": 6431 }, { "epoch": 0.233119495487659, "grad_norm": 2.416474932294838, "learning_rate": 8.956606347174461e-06, "loss": 1.0841, "step": 6432 }, { "epoch": 0.23315573919031568, "grad_norm": 2.1177910083973797, "learning_rate": 8.95624747159551e-06, "loss": 1.0806, "step": 6433 }, { "epoch": 0.23319198289297235, "grad_norm": 2.1257712499107, "learning_rate": 8.95588854150186e-06, "loss": 0.9097, "step": 6434 }, { "epoch": 0.233228226595629, "grad_norm": 2.1329747284607445, "learning_rate": 8.955529556898459e-06, "loss": 0.9496, "step": 6435 }, { "epoch": 0.23326447029828568, "grad_norm": 2.14797279879211, "learning_rate": 8.955170517790253e-06, "loss": 0.982, "step": 6436 }, { "epoch": 0.23330071400094235, "grad_norm": 2.304522433457325, "learning_rate": 8.95481142418219e-06, "loss": 0.9594, "step": 6437 }, { "epoch": 0.233336957703599, "grad_norm": 2.6410985182878806, "learning_rate": 8.954452276079218e-06, "loss": 0.9455, "step": 6438 }, { "epoch": 0.23337320140625567, "grad_norm": 2.3717681507292325, "learning_rate": 8.954093073486283e-06, "loss": 0.9384, "step": 6439 }, { "epoch": 0.23340944510891232, "grad_norm": 2.7601894330558214, "learning_rate": 8.95373381640834e-06, "loss": 0.9501, "step": 6440 }, { "epoch": 0.233445688811569, "grad_norm": 2.418416022251742, "learning_rate": 8.953374504850334e-06, "loss": 0.8672, "step": 6441 }, { "epoch": 0.23348193251422567, "grad_norm": 2.462774094455237, "learning_rate": 8.953015138817218e-06, "loss": 0.9842, "step": 6442 }, { "epoch": 0.2335181762168823, "grad_norm": 2.4696557619825494, "learning_rate": 8.952655718313945e-06, "loss": 0.9951, "step": 6443 }, { "epoch": 0.23355441991953899, "grad_norm": 2.316114107545154, "learning_rate": 8.952296243345466e-06, "loss": 1.2772, "step": 6444 }, { "epoch": 0.23359066362219563, "grad_norm": 2.1563806776955174, "learning_rate": 8.951936713916736e-06, "loss": 0.9901, "step": 6445 }, { "epoch": 0.2336269073248523, "grad_norm": 2.6985462597494734, "learning_rate": 8.951577130032706e-06, "loss": 0.987, "step": 6446 }, { "epoch": 0.23366315102750898, "grad_norm": 2.3170235400108328, "learning_rate": 8.951217491698335e-06, "loss": 0.9179, "step": 6447 }, { "epoch": 0.23369939473016563, "grad_norm": 2.21965983740233, "learning_rate": 8.950857798918576e-06, "loss": 1.1039, "step": 6448 }, { "epoch": 0.2337356384328223, "grad_norm": 2.5725599180785457, "learning_rate": 8.950498051698386e-06, "loss": 0.8779, "step": 6449 }, { "epoch": 0.23377188213547895, "grad_norm": 2.5322341860153945, "learning_rate": 8.950138250042722e-06, "loss": 0.8014, "step": 6450 }, { "epoch": 0.23380812583813562, "grad_norm": 2.575737305931666, "learning_rate": 8.949778393956542e-06, "loss": 1.0009, "step": 6451 }, { "epoch": 0.2338443695407923, "grad_norm": 2.4794198479255374, "learning_rate": 8.949418483444805e-06, "loss": 1.0507, "step": 6452 }, { "epoch": 0.23388061324344894, "grad_norm": 2.218050980852429, "learning_rate": 8.949058518512468e-06, "loss": 0.8839, "step": 6453 }, { "epoch": 0.23391685694610562, "grad_norm": 2.4172192125859735, "learning_rate": 8.948698499164494e-06, "loss": 0.9823, "step": 6454 }, { "epoch": 0.2339531006487623, "grad_norm": 2.821983585565272, "learning_rate": 8.948338425405844e-06, "loss": 1.0624, "step": 6455 }, { "epoch": 0.23398934435141894, "grad_norm": 2.3982255117682536, "learning_rate": 8.947978297241477e-06, "loss": 1.0435, "step": 6456 }, { "epoch": 0.2340255880540756, "grad_norm": 2.368270000412311, "learning_rate": 8.947618114676357e-06, "loss": 0.7838, "step": 6457 }, { "epoch": 0.23406183175673226, "grad_norm": 2.3471226722586813, "learning_rate": 8.947257877715448e-06, "loss": 0.7482, "step": 6458 }, { "epoch": 0.23409807545938893, "grad_norm": 2.5317369336628404, "learning_rate": 8.94689758636371e-06, "loss": 0.8273, "step": 6459 }, { "epoch": 0.2341343191620456, "grad_norm": 2.0468145254154475, "learning_rate": 8.946537240626112e-06, "loss": 0.8061, "step": 6460 }, { "epoch": 0.23417056286470225, "grad_norm": 2.5042088070095585, "learning_rate": 8.946176840507617e-06, "loss": 0.9414, "step": 6461 }, { "epoch": 0.23420680656735893, "grad_norm": 2.572553818107184, "learning_rate": 8.945816386013192e-06, "loss": 1.1078, "step": 6462 }, { "epoch": 0.23424305027001557, "grad_norm": 2.407724145253044, "learning_rate": 8.945455877147804e-06, "loss": 0.984, "step": 6463 }, { "epoch": 0.23427929397267225, "grad_norm": 2.4060891765312236, "learning_rate": 8.94509531391642e-06, "loss": 1.215, "step": 6464 }, { "epoch": 0.23431553767532892, "grad_norm": 2.3014615348609113, "learning_rate": 8.944734696324008e-06, "loss": 0.9009, "step": 6465 }, { "epoch": 0.23435178137798557, "grad_norm": 15.19525669345048, "learning_rate": 8.944374024375537e-06, "loss": 1.9054, "step": 6466 }, { "epoch": 0.23438802508064224, "grad_norm": 2.0706066303265334, "learning_rate": 8.94401329807598e-06, "loss": 0.9808, "step": 6467 }, { "epoch": 0.2344242687832989, "grad_norm": 2.266527825899576, "learning_rate": 8.943652517430302e-06, "loss": 0.8223, "step": 6468 }, { "epoch": 0.23446051248595556, "grad_norm": 2.2658939584474025, "learning_rate": 8.943291682443478e-06, "loss": 0.9406, "step": 6469 }, { "epoch": 0.23449675618861224, "grad_norm": 2.1624540921528914, "learning_rate": 8.942930793120482e-06, "loss": 0.818, "step": 6470 }, { "epoch": 0.23453299989126888, "grad_norm": 2.2793631439318505, "learning_rate": 8.942569849466281e-06, "loss": 0.9529, "step": 6471 }, { "epoch": 0.23456924359392556, "grad_norm": 2.7695225471177567, "learning_rate": 8.942208851485854e-06, "loss": 0.9457, "step": 6472 }, { "epoch": 0.23460548729658223, "grad_norm": 2.54463056001547, "learning_rate": 8.941847799184173e-06, "loss": 1.0546, "step": 6473 }, { "epoch": 0.23464173099923888, "grad_norm": 2.3445178491647787, "learning_rate": 8.941486692566212e-06, "loss": 1.1612, "step": 6474 }, { "epoch": 0.23467797470189555, "grad_norm": 2.320086976220296, "learning_rate": 8.94112553163695e-06, "loss": 1.1164, "step": 6475 }, { "epoch": 0.2347142184045522, "grad_norm": 2.315011109607885, "learning_rate": 8.94076431640136e-06, "loss": 0.9493, "step": 6476 }, { "epoch": 0.23475046210720887, "grad_norm": 2.308902616849448, "learning_rate": 8.94040304686442e-06, "loss": 0.85, "step": 6477 }, { "epoch": 0.23478670580986555, "grad_norm": 2.4911872648990028, "learning_rate": 8.940041723031111e-06, "loss": 0.883, "step": 6478 }, { "epoch": 0.2348229495125222, "grad_norm": 2.419695522355876, "learning_rate": 8.939680344906409e-06, "loss": 1.0558, "step": 6479 }, { "epoch": 0.23485919321517887, "grad_norm": 2.7036475654073353, "learning_rate": 8.939318912495296e-06, "loss": 0.8629, "step": 6480 }, { "epoch": 0.23489543691783552, "grad_norm": 2.5620174652456136, "learning_rate": 8.93895742580275e-06, "loss": 0.8466, "step": 6481 }, { "epoch": 0.2349316806204922, "grad_norm": 2.3788246237784607, "learning_rate": 8.938595884833752e-06, "loss": 1.0018, "step": 6482 }, { "epoch": 0.23496792432314886, "grad_norm": 2.273568422944184, "learning_rate": 8.938234289593287e-06, "loss": 1.0283, "step": 6483 }, { "epoch": 0.2350041680258055, "grad_norm": 2.514183843572399, "learning_rate": 8.937872640086333e-06, "loss": 0.9837, "step": 6484 }, { "epoch": 0.23504041172846218, "grad_norm": 2.139756503281486, "learning_rate": 8.937510936317876e-06, "loss": 0.8404, "step": 6485 }, { "epoch": 0.23507665543111883, "grad_norm": 2.1435521095863446, "learning_rate": 8.937149178292899e-06, "loss": 0.9521, "step": 6486 }, { "epoch": 0.2351128991337755, "grad_norm": 2.3546351729157338, "learning_rate": 8.936787366016389e-06, "loss": 1.0409, "step": 6487 }, { "epoch": 0.23514914283643218, "grad_norm": 2.3200155834453007, "learning_rate": 8.936425499493328e-06, "loss": 1.2268, "step": 6488 }, { "epoch": 0.23518538653908883, "grad_norm": 2.7089023640790453, "learning_rate": 8.936063578728706e-06, "loss": 1.0726, "step": 6489 }, { "epoch": 0.2352216302417455, "grad_norm": 2.6826454429414235, "learning_rate": 8.93570160372751e-06, "loss": 0.9317, "step": 6490 }, { "epoch": 0.23525787394440217, "grad_norm": 2.2498755303623166, "learning_rate": 8.935339574494724e-06, "loss": 0.8568, "step": 6491 }, { "epoch": 0.23529411764705882, "grad_norm": 2.5755534300082803, "learning_rate": 8.934977491035337e-06, "loss": 1.0047, "step": 6492 }, { "epoch": 0.2353303613497155, "grad_norm": 2.2946295380078894, "learning_rate": 8.934615353354343e-06, "loss": 0.9622, "step": 6493 }, { "epoch": 0.23536660505237214, "grad_norm": 2.7604485481043337, "learning_rate": 8.934253161456728e-06, "loss": 1.0339, "step": 6494 }, { "epoch": 0.23540284875502882, "grad_norm": 2.5492100968349183, "learning_rate": 8.933890915347483e-06, "loss": 0.8487, "step": 6495 }, { "epoch": 0.2354390924576855, "grad_norm": 2.3252141072328403, "learning_rate": 8.9335286150316e-06, "loss": 0.9266, "step": 6496 }, { "epoch": 0.23547533616034214, "grad_norm": 2.2565307745582817, "learning_rate": 8.933166260514073e-06, "loss": 0.9138, "step": 6497 }, { "epoch": 0.2355115798629988, "grad_norm": 2.370697113899531, "learning_rate": 8.932803851799893e-06, "loss": 1.0286, "step": 6498 }, { "epoch": 0.23554782356565546, "grad_norm": 2.3718547033461954, "learning_rate": 8.932441388894054e-06, "loss": 1.1191, "step": 6499 }, { "epoch": 0.23558406726831213, "grad_norm": 2.263667056890289, "learning_rate": 8.932078871801551e-06, "loss": 1.1631, "step": 6500 }, { "epoch": 0.2356203109709688, "grad_norm": 2.4113115181338785, "learning_rate": 8.93171630052738e-06, "loss": 1.1211, "step": 6501 }, { "epoch": 0.23565655467362545, "grad_norm": 2.4796114499397066, "learning_rate": 8.931353675076537e-06, "loss": 1.0294, "step": 6502 }, { "epoch": 0.23569279837628213, "grad_norm": 2.346608173293077, "learning_rate": 8.930990995454017e-06, "loss": 0.7289, "step": 6503 }, { "epoch": 0.23572904207893877, "grad_norm": 2.529139302835978, "learning_rate": 8.930628261664817e-06, "loss": 1.0143, "step": 6504 }, { "epoch": 0.23576528578159545, "grad_norm": 2.3630155067571272, "learning_rate": 8.930265473713939e-06, "loss": 0.8009, "step": 6505 }, { "epoch": 0.23580152948425212, "grad_norm": 2.5494858744996867, "learning_rate": 8.929902631606378e-06, "loss": 0.9023, "step": 6506 }, { "epoch": 0.23583777318690877, "grad_norm": 2.362382507110383, "learning_rate": 8.929539735347137e-06, "loss": 0.8856, "step": 6507 }, { "epoch": 0.23587401688956544, "grad_norm": 2.5637273073661517, "learning_rate": 8.929176784941214e-06, "loss": 0.9213, "step": 6508 }, { "epoch": 0.23591026059222212, "grad_norm": 2.4732648073096226, "learning_rate": 8.92881378039361e-06, "loss": 0.9127, "step": 6509 }, { "epoch": 0.23594650429487876, "grad_norm": 2.2966584371212613, "learning_rate": 8.92845072170933e-06, "loss": 1.0837, "step": 6510 }, { "epoch": 0.23598274799753544, "grad_norm": 2.1742197885470995, "learning_rate": 8.928087608893374e-06, "loss": 0.8086, "step": 6511 }, { "epoch": 0.23601899170019208, "grad_norm": 2.5733486575001883, "learning_rate": 8.927724441950746e-06, "loss": 1.0738, "step": 6512 }, { "epoch": 0.23605523540284876, "grad_norm": 2.384493535955526, "learning_rate": 8.927361220886452e-06, "loss": 0.8286, "step": 6513 }, { "epoch": 0.23609147910550543, "grad_norm": 2.251983544244803, "learning_rate": 8.926997945705495e-06, "loss": 0.925, "step": 6514 }, { "epoch": 0.23612772280816208, "grad_norm": 2.362860851940506, "learning_rate": 8.92663461641288e-06, "loss": 0.8829, "step": 6515 }, { "epoch": 0.23616396651081875, "grad_norm": 2.275956965073207, "learning_rate": 8.926271233013617e-06, "loss": 1.0676, "step": 6516 }, { "epoch": 0.2362002102134754, "grad_norm": 2.4086983034853047, "learning_rate": 8.92590779551271e-06, "loss": 1.0835, "step": 6517 }, { "epoch": 0.23623645391613207, "grad_norm": 2.277861355159851, "learning_rate": 8.925544303915167e-06, "loss": 0.9475, "step": 6518 }, { "epoch": 0.23627269761878875, "grad_norm": 2.4116358950244754, "learning_rate": 8.925180758225999e-06, "loss": 1.0539, "step": 6519 }, { "epoch": 0.2363089413214454, "grad_norm": 2.531243528933213, "learning_rate": 8.924817158450214e-06, "loss": 1.124, "step": 6520 }, { "epoch": 0.23634518502410207, "grad_norm": 2.3789004190512104, "learning_rate": 8.924453504592821e-06, "loss": 1.1302, "step": 6521 }, { "epoch": 0.2363814287267587, "grad_norm": 2.2585119400123905, "learning_rate": 8.924089796658832e-06, "loss": 0.9218, "step": 6522 }, { "epoch": 0.2364176724294154, "grad_norm": 2.5751254891222373, "learning_rate": 8.92372603465326e-06, "loss": 1.1536, "step": 6523 }, { "epoch": 0.23645391613207206, "grad_norm": 2.2859135938570634, "learning_rate": 8.923362218581117e-06, "loss": 0.9603, "step": 6524 }, { "epoch": 0.2364901598347287, "grad_norm": 2.2744722408567646, "learning_rate": 8.922998348447413e-06, "loss": 1.0421, "step": 6525 }, { "epoch": 0.23652640353738538, "grad_norm": 2.4631817187358913, "learning_rate": 8.922634424257166e-06, "loss": 1.0667, "step": 6526 }, { "epoch": 0.23656264724004203, "grad_norm": 2.1955374741331206, "learning_rate": 8.922270446015389e-06, "loss": 0.9747, "step": 6527 }, { "epoch": 0.2365988909426987, "grad_norm": 2.469131828707264, "learning_rate": 8.921906413727097e-06, "loss": 1.0258, "step": 6528 }, { "epoch": 0.23663513464535538, "grad_norm": 2.4623612183671804, "learning_rate": 8.921542327397305e-06, "loss": 1.0134, "step": 6529 }, { "epoch": 0.23667137834801202, "grad_norm": 2.5564315712804375, "learning_rate": 8.921178187031034e-06, "loss": 0.9598, "step": 6530 }, { "epoch": 0.2367076220506687, "grad_norm": 2.603890119447682, "learning_rate": 8.920813992633298e-06, "loss": 0.9899, "step": 6531 }, { "epoch": 0.23674386575332537, "grad_norm": 2.2178916264437705, "learning_rate": 8.920449744209117e-06, "loss": 0.8846, "step": 6532 }, { "epoch": 0.23678010945598202, "grad_norm": 2.457674045522139, "learning_rate": 8.920085441763508e-06, "loss": 0.7164, "step": 6533 }, { "epoch": 0.2368163531586387, "grad_norm": 2.3760946263955156, "learning_rate": 8.919721085301493e-06, "loss": 0.7404, "step": 6534 }, { "epoch": 0.23685259686129534, "grad_norm": 2.205968905598424, "learning_rate": 8.919356674828092e-06, "loss": 1.1321, "step": 6535 }, { "epoch": 0.236888840563952, "grad_norm": 2.3097154094355448, "learning_rate": 8.918992210348327e-06, "loss": 0.9526, "step": 6536 }, { "epoch": 0.2369250842666087, "grad_norm": 2.6660259549068845, "learning_rate": 8.918627691867218e-06, "loss": 1.04, "step": 6537 }, { "epoch": 0.23696132796926533, "grad_norm": 2.1243884727532634, "learning_rate": 8.918263119389791e-06, "loss": 0.8736, "step": 6538 }, { "epoch": 0.236997571671922, "grad_norm": 2.5591635102661705, "learning_rate": 8.917898492921067e-06, "loss": 0.8865, "step": 6539 }, { "epoch": 0.23703381537457865, "grad_norm": 2.3171068740773353, "learning_rate": 8.917533812466072e-06, "loss": 0.9874, "step": 6540 }, { "epoch": 0.23707005907723533, "grad_norm": 2.503678766546792, "learning_rate": 8.917169078029829e-06, "loss": 1.0565, "step": 6541 }, { "epoch": 0.237106302779892, "grad_norm": 2.4328955934617396, "learning_rate": 8.916804289617366e-06, "loss": 0.8841, "step": 6542 }, { "epoch": 0.23714254648254865, "grad_norm": 2.1486228361027413, "learning_rate": 8.91643944723371e-06, "loss": 0.9912, "step": 6543 }, { "epoch": 0.23717879018520532, "grad_norm": 2.4172094257718117, "learning_rate": 8.916074550883884e-06, "loss": 0.9855, "step": 6544 }, { "epoch": 0.23721503388786197, "grad_norm": 2.2972977435726474, "learning_rate": 8.915709600572922e-06, "loss": 0.8617, "step": 6545 }, { "epoch": 0.23725127759051864, "grad_norm": 2.5493688047143177, "learning_rate": 8.915344596305848e-06, "loss": 1.104, "step": 6546 }, { "epoch": 0.23728752129317532, "grad_norm": 2.282947464868713, "learning_rate": 8.914979538087693e-06, "loss": 1.0202, "step": 6547 }, { "epoch": 0.23732376499583197, "grad_norm": 2.141348376059048, "learning_rate": 8.91461442592349e-06, "loss": 0.8057, "step": 6548 }, { "epoch": 0.23736000869848864, "grad_norm": 2.191628926044033, "learning_rate": 8.914249259818265e-06, "loss": 0.8111, "step": 6549 }, { "epoch": 0.2373962524011453, "grad_norm": 2.2474486666818865, "learning_rate": 8.913884039777054e-06, "loss": 1.0453, "step": 6550 }, { "epoch": 0.23743249610380196, "grad_norm": 1.9803943824477992, "learning_rate": 8.913518765804888e-06, "loss": 0.9291, "step": 6551 }, { "epoch": 0.23746873980645863, "grad_norm": 2.5345891869271426, "learning_rate": 8.9131534379068e-06, "loss": 1.004, "step": 6552 }, { "epoch": 0.23750498350911528, "grad_norm": 2.2394711735791146, "learning_rate": 8.912788056087824e-06, "loss": 1.0579, "step": 6553 }, { "epoch": 0.23754122721177195, "grad_norm": 2.4841796897092623, "learning_rate": 8.912422620352995e-06, "loss": 0.9801, "step": 6554 }, { "epoch": 0.23757747091442863, "grad_norm": 2.3314073447035097, "learning_rate": 8.912057130707348e-06, "loss": 0.9322, "step": 6555 }, { "epoch": 0.23761371461708528, "grad_norm": 2.0967265281911556, "learning_rate": 8.91169158715592e-06, "loss": 0.9298, "step": 6556 }, { "epoch": 0.23764995831974195, "grad_norm": 2.501387477975403, "learning_rate": 8.911325989703749e-06, "loss": 1.058, "step": 6557 }, { "epoch": 0.2376862020223986, "grad_norm": 2.5513996582761207, "learning_rate": 8.91096033835587e-06, "loss": 1.0585, "step": 6558 }, { "epoch": 0.23772244572505527, "grad_norm": 2.5618533003077917, "learning_rate": 8.910594633117324e-06, "loss": 0.8841, "step": 6559 }, { "epoch": 0.23775868942771194, "grad_norm": 2.6151917867145063, "learning_rate": 8.910228873993147e-06, "loss": 1.025, "step": 6560 }, { "epoch": 0.2377949331303686, "grad_norm": 2.28892816005416, "learning_rate": 8.909863060988381e-06, "loss": 0.8882, "step": 6561 }, { "epoch": 0.23783117683302527, "grad_norm": 2.142946884168284, "learning_rate": 8.90949719410807e-06, "loss": 0.7888, "step": 6562 }, { "epoch": 0.2378674205356819, "grad_norm": 2.3229659078905938, "learning_rate": 8.909131273357248e-06, "loss": 0.9645, "step": 6563 }, { "epoch": 0.23790366423833859, "grad_norm": 2.563089148312459, "learning_rate": 8.908765298740963e-06, "loss": 0.7815, "step": 6564 }, { "epoch": 0.23793990794099526, "grad_norm": 2.2907670488569436, "learning_rate": 8.908399270264256e-06, "loss": 0.8655, "step": 6565 }, { "epoch": 0.2379761516436519, "grad_norm": 2.4044947230179203, "learning_rate": 8.90803318793217e-06, "loss": 1.0563, "step": 6566 }, { "epoch": 0.23801239534630858, "grad_norm": 2.468195789880593, "learning_rate": 8.907667051749752e-06, "loss": 1.0012, "step": 6567 }, { "epoch": 0.23804863904896525, "grad_norm": 2.2070477000698254, "learning_rate": 8.907300861722044e-06, "loss": 0.8265, "step": 6568 }, { "epoch": 0.2380848827516219, "grad_norm": 2.430807330704444, "learning_rate": 8.906934617854095e-06, "loss": 0.9861, "step": 6569 }, { "epoch": 0.23812112645427858, "grad_norm": 2.3986608306562984, "learning_rate": 8.906568320150947e-06, "loss": 1.0539, "step": 6570 }, { "epoch": 0.23815737015693522, "grad_norm": 2.347340122966851, "learning_rate": 8.906201968617651e-06, "loss": 0.952, "step": 6571 }, { "epoch": 0.2381936138595919, "grad_norm": 2.403755264454337, "learning_rate": 8.905835563259255e-06, "loss": 0.8737, "step": 6572 }, { "epoch": 0.23822985756224857, "grad_norm": 2.1159653197397494, "learning_rate": 8.905469104080808e-06, "loss": 0.8457, "step": 6573 }, { "epoch": 0.23826610126490522, "grad_norm": 2.8146225571471617, "learning_rate": 8.905102591087357e-06, "loss": 1.0153, "step": 6574 }, { "epoch": 0.2383023449675619, "grad_norm": 2.0152747380300826, "learning_rate": 8.904736024283955e-06, "loss": 0.9122, "step": 6575 }, { "epoch": 0.23833858867021854, "grad_norm": 2.2967871565269493, "learning_rate": 8.90436940367565e-06, "loss": 0.8661, "step": 6576 }, { "epoch": 0.2383748323728752, "grad_norm": 2.065715580076563, "learning_rate": 8.904002729267499e-06, "loss": 0.9117, "step": 6577 }, { "epoch": 0.23841107607553189, "grad_norm": 2.533972312945428, "learning_rate": 8.90363600106455e-06, "loss": 1.0563, "step": 6578 }, { "epoch": 0.23844731977818853, "grad_norm": 2.3392919849456906, "learning_rate": 8.903269219071857e-06, "loss": 0.9456, "step": 6579 }, { "epoch": 0.2384835634808452, "grad_norm": 2.2299959025755607, "learning_rate": 8.902902383294477e-06, "loss": 1.1155, "step": 6580 }, { "epoch": 0.23851980718350185, "grad_norm": 2.3743959432647874, "learning_rate": 8.90253549373746e-06, "loss": 1.0569, "step": 6581 }, { "epoch": 0.23855605088615853, "grad_norm": 2.7268102893764388, "learning_rate": 8.902168550405865e-06, "loss": 1.0324, "step": 6582 }, { "epoch": 0.2385922945888152, "grad_norm": 2.234924801991104, "learning_rate": 8.901801553304748e-06, "loss": 0.9202, "step": 6583 }, { "epoch": 0.23862853829147185, "grad_norm": 2.169075343471727, "learning_rate": 8.901434502439164e-06, "loss": 1.0652, "step": 6584 }, { "epoch": 0.23866478199412852, "grad_norm": 2.354304561959407, "learning_rate": 8.901067397814174e-06, "loss": 0.9162, "step": 6585 }, { "epoch": 0.2387010256967852, "grad_norm": 2.3953669640703796, "learning_rate": 8.900700239434833e-06, "loss": 1.0473, "step": 6586 }, { "epoch": 0.23873726939944184, "grad_norm": 2.768953676302795, "learning_rate": 8.900333027306203e-06, "loss": 0.8475, "step": 6587 }, { "epoch": 0.23877351310209852, "grad_norm": 2.990620650690259, "learning_rate": 8.89996576143334e-06, "loss": 0.8917, "step": 6588 }, { "epoch": 0.23880975680475516, "grad_norm": 2.48631932248887, "learning_rate": 8.89959844182131e-06, "loss": 0.8968, "step": 6589 }, { "epoch": 0.23884600050741184, "grad_norm": 2.108449971391101, "learning_rate": 8.899231068475171e-06, "loss": 0.8646, "step": 6590 }, { "epoch": 0.2388822442100685, "grad_norm": 2.4811247050424803, "learning_rate": 8.898863641399986e-06, "loss": 0.96, "step": 6591 }, { "epoch": 0.23891848791272516, "grad_norm": 2.6504957946813588, "learning_rate": 8.898496160600818e-06, "loss": 0.9785, "step": 6592 }, { "epoch": 0.23895473161538183, "grad_norm": 2.2770516182903475, "learning_rate": 8.898128626082731e-06, "loss": 0.9301, "step": 6593 }, { "epoch": 0.23899097531803848, "grad_norm": 2.6235398431555828, "learning_rate": 8.89776103785079e-06, "loss": 1.0966, "step": 6594 }, { "epoch": 0.23902721902069515, "grad_norm": 2.28438404241659, "learning_rate": 8.897393395910059e-06, "loss": 0.8793, "step": 6595 }, { "epoch": 0.23906346272335183, "grad_norm": 2.3780147267096203, "learning_rate": 8.897025700265602e-06, "loss": 0.8375, "step": 6596 }, { "epoch": 0.23909970642600847, "grad_norm": 2.4457477815070394, "learning_rate": 8.89665795092249e-06, "loss": 1.0074, "step": 6597 }, { "epoch": 0.23913595012866515, "grad_norm": 2.2754875860706414, "learning_rate": 8.896290147885787e-06, "loss": 0.8753, "step": 6598 }, { "epoch": 0.2391721938313218, "grad_norm": 2.459916857720315, "learning_rate": 8.895922291160561e-06, "loss": 0.8931, "step": 6599 }, { "epoch": 0.23920843753397847, "grad_norm": 2.5671386793942457, "learning_rate": 8.895554380751883e-06, "loss": 0.9347, "step": 6600 }, { "epoch": 0.23924468123663514, "grad_norm": 2.967852460803568, "learning_rate": 8.895186416664823e-06, "loss": 0.9425, "step": 6601 }, { "epoch": 0.2392809249392918, "grad_norm": 2.3400475105336627, "learning_rate": 8.894818398904448e-06, "loss": 0.9011, "step": 6602 }, { "epoch": 0.23931716864194846, "grad_norm": 2.481940686391892, "learning_rate": 8.894450327475832e-06, "loss": 0.9186, "step": 6603 }, { "epoch": 0.23935341234460514, "grad_norm": 2.3830591155152447, "learning_rate": 8.894082202384044e-06, "loss": 0.9135, "step": 6604 }, { "epoch": 0.23938965604726178, "grad_norm": 2.5427141077845627, "learning_rate": 8.893714023634162e-06, "loss": 1.0375, "step": 6605 }, { "epoch": 0.23942589974991846, "grad_norm": 2.7712959659867242, "learning_rate": 8.893345791231252e-06, "loss": 1.0322, "step": 6606 }, { "epoch": 0.2394621434525751, "grad_norm": 2.311740234597666, "learning_rate": 8.892977505180394e-06, "loss": 0.9935, "step": 6607 }, { "epoch": 0.23949838715523178, "grad_norm": 2.4730152161391836, "learning_rate": 8.892609165486658e-06, "loss": 0.9419, "step": 6608 }, { "epoch": 0.23953463085788845, "grad_norm": 2.336249567138532, "learning_rate": 8.892240772155123e-06, "loss": 0.8936, "step": 6609 }, { "epoch": 0.2395708745605451, "grad_norm": 2.136728727806574, "learning_rate": 8.891872325190865e-06, "loss": 0.8184, "step": 6610 }, { "epoch": 0.23960711826320177, "grad_norm": 2.2828296957697805, "learning_rate": 8.891503824598959e-06, "loss": 0.918, "step": 6611 }, { "epoch": 0.23964336196585842, "grad_norm": 2.3086286867992403, "learning_rate": 8.891135270384485e-06, "loss": 1.1651, "step": 6612 }, { "epoch": 0.2396796056685151, "grad_norm": 2.3105772095495, "learning_rate": 8.890766662552519e-06, "loss": 0.9409, "step": 6613 }, { "epoch": 0.23971584937117177, "grad_norm": 2.2677714603206485, "learning_rate": 8.890398001108143e-06, "loss": 0.6803, "step": 6614 }, { "epoch": 0.23975209307382842, "grad_norm": 2.4134568657194206, "learning_rate": 8.890029286056434e-06, "loss": 0.9738, "step": 6615 }, { "epoch": 0.2397883367764851, "grad_norm": 2.319713314179827, "learning_rate": 8.889660517402474e-06, "loss": 0.991, "step": 6616 }, { "epoch": 0.23982458047914174, "grad_norm": 2.2297399430133167, "learning_rate": 8.889291695151344e-06, "loss": 0.8662, "step": 6617 }, { "epoch": 0.2398608241817984, "grad_norm": 2.466438801929482, "learning_rate": 8.888922819308128e-06, "loss": 1.0129, "step": 6618 }, { "epoch": 0.23989706788445508, "grad_norm": 2.163896557818721, "learning_rate": 8.888553889877906e-06, "loss": 0.7948, "step": 6619 }, { "epoch": 0.23993331158711173, "grad_norm": 2.5210428618853467, "learning_rate": 8.888184906865764e-06, "loss": 1.0308, "step": 6620 }, { "epoch": 0.2399695552897684, "grad_norm": 2.2167394305615176, "learning_rate": 8.887815870276786e-06, "loss": 0.9124, "step": 6621 }, { "epoch": 0.24000579899242508, "grad_norm": 2.3759230793517823, "learning_rate": 8.887446780116055e-06, "loss": 0.9456, "step": 6622 }, { "epoch": 0.24004204269508173, "grad_norm": 2.636850730418287, "learning_rate": 8.88707763638866e-06, "loss": 0.9648, "step": 6623 }, { "epoch": 0.2400782863977384, "grad_norm": 2.6367082561590935, "learning_rate": 8.886708439099685e-06, "loss": 1.0132, "step": 6624 }, { "epoch": 0.24011453010039505, "grad_norm": 2.6138884544763585, "learning_rate": 8.886339188254221e-06, "loss": 1.0879, "step": 6625 }, { "epoch": 0.24015077380305172, "grad_norm": 2.4193099262891895, "learning_rate": 8.88596988385735e-06, "loss": 1.1273, "step": 6626 }, { "epoch": 0.2401870175057084, "grad_norm": 2.7637296729648906, "learning_rate": 8.885600525914166e-06, "loss": 1.0072, "step": 6627 }, { "epoch": 0.24022326120836504, "grad_norm": 2.395932041530787, "learning_rate": 8.885231114429756e-06, "loss": 1.0452, "step": 6628 }, { "epoch": 0.24025950491102172, "grad_norm": 2.084367560772845, "learning_rate": 8.88486164940921e-06, "loss": 0.886, "step": 6629 }, { "epoch": 0.24029574861367836, "grad_norm": 2.5495660360317736, "learning_rate": 8.884492130857623e-06, "loss": 0.904, "step": 6630 }, { "epoch": 0.24033199231633504, "grad_norm": 2.5280164537897365, "learning_rate": 8.88412255878008e-06, "loss": 1.0217, "step": 6631 }, { "epoch": 0.2403682360189917, "grad_norm": 2.351195979713947, "learning_rate": 8.88375293318168e-06, "loss": 1.0554, "step": 6632 }, { "epoch": 0.24040447972164836, "grad_norm": 2.071382976814881, "learning_rate": 8.883383254067511e-06, "loss": 0.9153, "step": 6633 }, { "epoch": 0.24044072342430503, "grad_norm": 2.2539331272061527, "learning_rate": 8.883013521442671e-06, "loss": 0.8682, "step": 6634 }, { "epoch": 0.24047696712696168, "grad_norm": 2.3661343810089606, "learning_rate": 8.882643735312252e-06, "loss": 0.9334, "step": 6635 }, { "epoch": 0.24051321082961835, "grad_norm": 2.558732741654661, "learning_rate": 8.882273895681352e-06, "loss": 0.9039, "step": 6636 }, { "epoch": 0.24054945453227503, "grad_norm": 2.411826391528545, "learning_rate": 8.881904002555064e-06, "loss": 0.9264, "step": 6637 }, { "epoch": 0.24058569823493167, "grad_norm": 2.727182743985726, "learning_rate": 8.881534055938487e-06, "loss": 1.0527, "step": 6638 }, { "epoch": 0.24062194193758835, "grad_norm": 2.3607052804833937, "learning_rate": 8.881164055836719e-06, "loss": 0.9835, "step": 6639 }, { "epoch": 0.24065818564024502, "grad_norm": 2.2752466395947297, "learning_rate": 8.880794002254858e-06, "loss": 0.8867, "step": 6640 }, { "epoch": 0.24069442934290167, "grad_norm": 2.249798220787667, "learning_rate": 8.880423895198002e-06, "loss": 0.9995, "step": 6641 }, { "epoch": 0.24073067304555834, "grad_norm": 2.3216798955846585, "learning_rate": 8.88005373467125e-06, "loss": 1.0641, "step": 6642 }, { "epoch": 0.240766916748215, "grad_norm": 2.444556913197641, "learning_rate": 8.879683520679707e-06, "loss": 0.8276, "step": 6643 }, { "epoch": 0.24080316045087166, "grad_norm": 2.751120472664567, "learning_rate": 8.879313253228469e-06, "loss": 0.9812, "step": 6644 }, { "epoch": 0.24083940415352834, "grad_norm": 2.4119916338897056, "learning_rate": 8.87894293232264e-06, "loss": 0.9461, "step": 6645 }, { "epoch": 0.24087564785618498, "grad_norm": 2.383723276529489, "learning_rate": 8.878572557967325e-06, "loss": 0.844, "step": 6646 }, { "epoch": 0.24091189155884166, "grad_norm": 2.5882770510374518, "learning_rate": 8.878202130167625e-06, "loss": 0.9499, "step": 6647 }, { "epoch": 0.2409481352614983, "grad_norm": 2.413145044099019, "learning_rate": 8.877831648928646e-06, "loss": 0.9104, "step": 6648 }, { "epoch": 0.24098437896415498, "grad_norm": 2.2649344577061283, "learning_rate": 8.877461114255493e-06, "loss": 0.9398, "step": 6649 }, { "epoch": 0.24102062266681165, "grad_norm": 2.4134983754960704, "learning_rate": 8.87709052615327e-06, "loss": 0.9111, "step": 6650 }, { "epoch": 0.2410568663694683, "grad_norm": 2.6900008203977293, "learning_rate": 8.876719884627083e-06, "loss": 0.9083, "step": 6651 }, { "epoch": 0.24109311007212497, "grad_norm": 2.34981712886029, "learning_rate": 8.876349189682042e-06, "loss": 0.9538, "step": 6652 }, { "epoch": 0.24112935377478162, "grad_norm": 2.5851313094277666, "learning_rate": 8.875978441323252e-06, "loss": 1.0442, "step": 6653 }, { "epoch": 0.2411655974774383, "grad_norm": 2.3348560839291173, "learning_rate": 8.875607639555824e-06, "loss": 0.7751, "step": 6654 }, { "epoch": 0.24120184118009497, "grad_norm": 2.5232676810578902, "learning_rate": 8.875236784384867e-06, "loss": 0.7876, "step": 6655 }, { "epoch": 0.2412380848827516, "grad_norm": 2.2794904232586077, "learning_rate": 8.87486587581549e-06, "loss": 0.8723, "step": 6656 }, { "epoch": 0.2412743285854083, "grad_norm": 2.2767989819060306, "learning_rate": 8.874494913852806e-06, "loss": 0.9855, "step": 6657 }, { "epoch": 0.24131057228806496, "grad_norm": 2.4420129029825484, "learning_rate": 8.874123898501923e-06, "loss": 0.9672, "step": 6658 }, { "epoch": 0.2413468159907216, "grad_norm": 2.2283963937796987, "learning_rate": 8.87375282976796e-06, "loss": 0.8821, "step": 6659 }, { "epoch": 0.24138305969337828, "grad_norm": 2.141987931011302, "learning_rate": 8.87338170765602e-06, "loss": 1.0153, "step": 6660 }, { "epoch": 0.24141930339603493, "grad_norm": 2.3312919604796067, "learning_rate": 8.873010532171226e-06, "loss": 0.9835, "step": 6661 }, { "epoch": 0.2414555470986916, "grad_norm": 2.5656665703281676, "learning_rate": 8.87263930331869e-06, "loss": 1.0915, "step": 6662 }, { "epoch": 0.24149179080134828, "grad_norm": 2.255331333371487, "learning_rate": 8.872268021103525e-06, "loss": 0.8713, "step": 6663 }, { "epoch": 0.24152803450400492, "grad_norm": 2.197504055450733, "learning_rate": 8.87189668553085e-06, "loss": 1.0385, "step": 6664 }, { "epoch": 0.2415642782066616, "grad_norm": 2.319941650330603, "learning_rate": 8.87152529660578e-06, "loss": 0.9917, "step": 6665 }, { "epoch": 0.24160052190931824, "grad_norm": 2.4154937987327756, "learning_rate": 8.87115385433343e-06, "loss": 0.9046, "step": 6666 }, { "epoch": 0.24163676561197492, "grad_norm": 2.292079349699587, "learning_rate": 8.870782358718923e-06, "loss": 0.8879, "step": 6667 }, { "epoch": 0.2416730093146316, "grad_norm": 2.2447843933918024, "learning_rate": 8.870410809767375e-06, "loss": 1.0559, "step": 6668 }, { "epoch": 0.24170925301728824, "grad_norm": 2.4726343564700404, "learning_rate": 8.870039207483908e-06, "loss": 0.776, "step": 6669 }, { "epoch": 0.2417454967199449, "grad_norm": 2.1159387054953362, "learning_rate": 8.869667551873641e-06, "loss": 0.976, "step": 6670 }, { "epoch": 0.24178174042260156, "grad_norm": 2.434887161871129, "learning_rate": 8.869295842941696e-06, "loss": 0.7986, "step": 6671 }, { "epoch": 0.24181798412525823, "grad_norm": 2.5580743581926284, "learning_rate": 8.868924080693195e-06, "loss": 0.9471, "step": 6672 }, { "epoch": 0.2418542278279149, "grad_norm": 2.5464828713687315, "learning_rate": 8.86855226513326e-06, "loss": 1.0615, "step": 6673 }, { "epoch": 0.24189047153057155, "grad_norm": 2.341868917976288, "learning_rate": 8.868180396267012e-06, "loss": 0.917, "step": 6674 }, { "epoch": 0.24192671523322823, "grad_norm": 2.31067841471594, "learning_rate": 8.867808474099579e-06, "loss": 1.0178, "step": 6675 }, { "epoch": 0.2419629589358849, "grad_norm": 2.615458862986339, "learning_rate": 8.867436498636085e-06, "loss": 1.0652, "step": 6676 }, { "epoch": 0.24199920263854155, "grad_norm": 2.0877831124697908, "learning_rate": 8.867064469881655e-06, "loss": 0.8835, "step": 6677 }, { "epoch": 0.24203544634119822, "grad_norm": 2.3924908922901134, "learning_rate": 8.866692387841413e-06, "loss": 1.0705, "step": 6678 }, { "epoch": 0.24207169004385487, "grad_norm": 2.480531676757077, "learning_rate": 8.866320252520491e-06, "loss": 0.9786, "step": 6679 }, { "epoch": 0.24210793374651154, "grad_norm": 2.3663144249548527, "learning_rate": 8.865948063924012e-06, "loss": 0.8386, "step": 6680 }, { "epoch": 0.24214417744916822, "grad_norm": 2.567364786207796, "learning_rate": 8.865575822057109e-06, "loss": 0.9039, "step": 6681 }, { "epoch": 0.24218042115182487, "grad_norm": 2.6602628288943517, "learning_rate": 8.865203526924908e-06, "loss": 1.0206, "step": 6682 }, { "epoch": 0.24221666485448154, "grad_norm": 2.2684503270342344, "learning_rate": 8.864831178532541e-06, "loss": 0.8902, "step": 6683 }, { "epoch": 0.24225290855713819, "grad_norm": 2.3649040053822388, "learning_rate": 8.864458776885136e-06, "loss": 0.9015, "step": 6684 }, { "epoch": 0.24228915225979486, "grad_norm": 2.509748044163358, "learning_rate": 8.864086321987825e-06, "loss": 0.9899, "step": 6685 }, { "epoch": 0.24232539596245153, "grad_norm": 2.4509766328315234, "learning_rate": 8.863713813845744e-06, "loss": 0.9814, "step": 6686 }, { "epoch": 0.24236163966510818, "grad_norm": 2.2435658589445473, "learning_rate": 8.863341252464024e-06, "loss": 0.8484, "step": 6687 }, { "epoch": 0.24239788336776485, "grad_norm": 2.055826561740285, "learning_rate": 8.862968637847797e-06, "loss": 0.8892, "step": 6688 }, { "epoch": 0.2424341270704215, "grad_norm": 2.5791068952531226, "learning_rate": 8.862595970002197e-06, "loss": 1.042, "step": 6689 }, { "epoch": 0.24247037077307818, "grad_norm": 2.4705051377391665, "learning_rate": 8.862223248932362e-06, "loss": 1.0499, "step": 6690 }, { "epoch": 0.24250661447573485, "grad_norm": 2.266704012113779, "learning_rate": 8.861850474643425e-06, "loss": 1.0246, "step": 6691 }, { "epoch": 0.2425428581783915, "grad_norm": 2.4757669185021616, "learning_rate": 8.861477647140526e-06, "loss": 1.1067, "step": 6692 }, { "epoch": 0.24257910188104817, "grad_norm": 2.2220346703894327, "learning_rate": 8.861104766428799e-06, "loss": 1.0061, "step": 6693 }, { "epoch": 0.24261534558370484, "grad_norm": 2.3392556915013336, "learning_rate": 8.860731832513386e-06, "loss": 1.1095, "step": 6694 }, { "epoch": 0.2426515892863615, "grad_norm": 2.5749010219271726, "learning_rate": 8.86035884539942e-06, "loss": 1.03, "step": 6695 }, { "epoch": 0.24268783298901817, "grad_norm": 2.433772948796271, "learning_rate": 8.859985805092044e-06, "loss": 1.1284, "step": 6696 }, { "epoch": 0.2427240766916748, "grad_norm": 2.324013119370502, "learning_rate": 8.8596127115964e-06, "loss": 0.9125, "step": 6697 }, { "epoch": 0.24276032039433149, "grad_norm": 2.453219910159199, "learning_rate": 8.859239564917626e-06, "loss": 1.0442, "step": 6698 }, { "epoch": 0.24279656409698816, "grad_norm": 2.5103819155313682, "learning_rate": 8.858866365060866e-06, "loss": 1.1704, "step": 6699 }, { "epoch": 0.2428328077996448, "grad_norm": 2.210213260236591, "learning_rate": 8.858493112031258e-06, "loss": 0.8713, "step": 6700 }, { "epoch": 0.24286905150230148, "grad_norm": 2.415353637736416, "learning_rate": 8.858119805833952e-06, "loss": 1.0202, "step": 6701 }, { "epoch": 0.24290529520495813, "grad_norm": 2.4023157348099904, "learning_rate": 8.857746446474086e-06, "loss": 1.0729, "step": 6702 }, { "epoch": 0.2429415389076148, "grad_norm": 2.5690069784341723, "learning_rate": 8.85737303395681e-06, "loss": 1.0311, "step": 6703 }, { "epoch": 0.24297778261027148, "grad_norm": 2.0640326601986834, "learning_rate": 8.856999568287263e-06, "loss": 1.0026, "step": 6704 }, { "epoch": 0.24301402631292812, "grad_norm": 2.0766840388873047, "learning_rate": 8.856626049470595e-06, "loss": 0.9618, "step": 6705 }, { "epoch": 0.2430502700155848, "grad_norm": 2.3479373470093727, "learning_rate": 8.856252477511955e-06, "loss": 1.1, "step": 6706 }, { "epoch": 0.24308651371824144, "grad_norm": 2.374286215468614, "learning_rate": 8.855878852416486e-06, "loss": 0.9468, "step": 6707 }, { "epoch": 0.24312275742089812, "grad_norm": 2.4902339556605355, "learning_rate": 8.855505174189338e-06, "loss": 0.9879, "step": 6708 }, { "epoch": 0.2431590011235548, "grad_norm": 2.4750846921318437, "learning_rate": 8.855131442835663e-06, "loss": 0.9842, "step": 6709 }, { "epoch": 0.24319524482621144, "grad_norm": 2.4577723250399544, "learning_rate": 8.854757658360606e-06, "loss": 0.967, "step": 6710 }, { "epoch": 0.2432314885288681, "grad_norm": 2.1149059394930934, "learning_rate": 8.85438382076932e-06, "loss": 1.0221, "step": 6711 }, { "epoch": 0.24326773223152479, "grad_norm": 2.1263624680203757, "learning_rate": 8.854009930066955e-06, "loss": 1.0187, "step": 6712 }, { "epoch": 0.24330397593418143, "grad_norm": 2.4577379892795923, "learning_rate": 8.853635986258667e-06, "loss": 1.0678, "step": 6713 }, { "epoch": 0.2433402196368381, "grad_norm": 2.3754323146394976, "learning_rate": 8.853261989349604e-06, "loss": 0.9956, "step": 6714 }, { "epoch": 0.24337646333949475, "grad_norm": 2.3103462113669373, "learning_rate": 8.852887939344921e-06, "loss": 0.8196, "step": 6715 }, { "epoch": 0.24341270704215143, "grad_norm": 2.5361759473996064, "learning_rate": 8.852513836249772e-06, "loss": 1.0506, "step": 6716 }, { "epoch": 0.2434489507448081, "grad_norm": 2.973811864547097, "learning_rate": 8.852139680069314e-06, "loss": 0.8918, "step": 6717 }, { "epoch": 0.24348519444746475, "grad_norm": 2.39133520623263, "learning_rate": 8.8517654708087e-06, "loss": 0.8656, "step": 6718 }, { "epoch": 0.24352143815012142, "grad_norm": 2.6578704234111306, "learning_rate": 8.851391208473086e-06, "loss": 0.9708, "step": 6719 }, { "epoch": 0.24355768185277807, "grad_norm": 2.1065463293368123, "learning_rate": 8.851016893067632e-06, "loss": 0.7598, "step": 6720 }, { "epoch": 0.24359392555543474, "grad_norm": 2.2457959546728743, "learning_rate": 8.850642524597492e-06, "loss": 0.9853, "step": 6721 }, { "epoch": 0.24363016925809142, "grad_norm": 2.0134033416291133, "learning_rate": 8.85026810306783e-06, "loss": 0.9423, "step": 6722 }, { "epoch": 0.24366641296074806, "grad_norm": 2.2573275180899173, "learning_rate": 8.8498936284838e-06, "loss": 0.976, "step": 6723 }, { "epoch": 0.24370265666340474, "grad_norm": 2.3513049414680833, "learning_rate": 8.849519100850566e-06, "loss": 0.9797, "step": 6724 }, { "epoch": 0.24373890036606138, "grad_norm": 2.3881720291227446, "learning_rate": 8.849144520173285e-06, "loss": 0.9891, "step": 6725 }, { "epoch": 0.24377514406871806, "grad_norm": 2.6855366950104314, "learning_rate": 8.848769886457121e-06, "loss": 0.9594, "step": 6726 }, { "epoch": 0.24381138777137473, "grad_norm": 2.5329945062387513, "learning_rate": 8.848395199707236e-06, "loss": 0.9423, "step": 6727 }, { "epoch": 0.24384763147403138, "grad_norm": 2.362688028366539, "learning_rate": 8.84802045992879e-06, "loss": 1.0305, "step": 6728 }, { "epoch": 0.24388387517668805, "grad_norm": 2.202396872678489, "learning_rate": 8.847645667126952e-06, "loss": 0.9261, "step": 6729 }, { "epoch": 0.24392011887934473, "grad_norm": 2.3115480622436517, "learning_rate": 8.847270821306883e-06, "loss": 1.0892, "step": 6730 }, { "epoch": 0.24395636258200137, "grad_norm": 2.687246275764724, "learning_rate": 8.84689592247375e-06, "loss": 1.0136, "step": 6731 }, { "epoch": 0.24399260628465805, "grad_norm": 2.19337295044951, "learning_rate": 8.846520970632717e-06, "loss": 1.0868, "step": 6732 }, { "epoch": 0.2440288499873147, "grad_norm": 2.2755102248577757, "learning_rate": 8.84614596578895e-06, "loss": 0.9943, "step": 6733 }, { "epoch": 0.24406509368997137, "grad_norm": 2.411494435051457, "learning_rate": 8.84577090794762e-06, "loss": 1.0622, "step": 6734 }, { "epoch": 0.24410133739262804, "grad_norm": 2.226969389765505, "learning_rate": 8.845395797113892e-06, "loss": 0.9557, "step": 6735 }, { "epoch": 0.2441375810952847, "grad_norm": 2.2259944025247655, "learning_rate": 8.845020633292933e-06, "loss": 0.7096, "step": 6736 }, { "epoch": 0.24417382479794136, "grad_norm": 2.9689388140869797, "learning_rate": 8.844645416489918e-06, "loss": 0.9043, "step": 6737 }, { "epoch": 0.244210068500598, "grad_norm": 2.650435142685173, "learning_rate": 8.844270146710014e-06, "loss": 1.0373, "step": 6738 }, { "epoch": 0.24424631220325468, "grad_norm": 2.2105724411280496, "learning_rate": 8.843894823958391e-06, "loss": 0.829, "step": 6739 }, { "epoch": 0.24428255590591136, "grad_norm": 2.233116203447474, "learning_rate": 8.843519448240223e-06, "loss": 1.0306, "step": 6740 }, { "epoch": 0.244318799608568, "grad_norm": 2.252183138455895, "learning_rate": 8.843144019560681e-06, "loss": 0.8877, "step": 6741 }, { "epoch": 0.24435504331122468, "grad_norm": 2.199801732306257, "learning_rate": 8.84276853792494e-06, "loss": 0.9588, "step": 6742 }, { "epoch": 0.24439128701388133, "grad_norm": 4.096486369227886, "learning_rate": 8.84239300333817e-06, "loss": 1.0134, "step": 6743 }, { "epoch": 0.244427530716538, "grad_norm": 2.2885089914187486, "learning_rate": 8.84201741580555e-06, "loss": 0.9087, "step": 6744 }, { "epoch": 0.24446377441919467, "grad_norm": 2.7291041617481624, "learning_rate": 8.841641775332254e-06, "loss": 1.06, "step": 6745 }, { "epoch": 0.24450001812185132, "grad_norm": 2.4062209777925347, "learning_rate": 8.841266081923456e-06, "loss": 0.7452, "step": 6746 }, { "epoch": 0.244536261824508, "grad_norm": 2.600818739173469, "learning_rate": 8.840890335584336e-06, "loss": 0.8855, "step": 6747 }, { "epoch": 0.24457250552716467, "grad_norm": 2.447673495270785, "learning_rate": 8.840514536320069e-06, "loss": 0.9095, "step": 6748 }, { "epoch": 0.24460874922982132, "grad_norm": 2.5264226405899066, "learning_rate": 8.840138684135836e-06, "loss": 0.9092, "step": 6749 }, { "epoch": 0.244644992932478, "grad_norm": 2.6588109385488785, "learning_rate": 8.839762779036814e-06, "loss": 0.9616, "step": 6750 }, { "epoch": 0.24468123663513464, "grad_norm": 2.4863361345574337, "learning_rate": 8.839386821028182e-06, "loss": 0.9565, "step": 6751 }, { "epoch": 0.2447174803377913, "grad_norm": 2.5716290923973015, "learning_rate": 8.83901081011512e-06, "loss": 0.9317, "step": 6752 }, { "epoch": 0.24475372404044798, "grad_norm": 2.38092951573978, "learning_rate": 8.838634746302815e-06, "loss": 1.0574, "step": 6753 }, { "epoch": 0.24478996774310463, "grad_norm": 2.6186767738167713, "learning_rate": 8.838258629596442e-06, "loss": 0.8276, "step": 6754 }, { "epoch": 0.2448262114457613, "grad_norm": 2.5084173506819076, "learning_rate": 8.837882460001186e-06, "loss": 0.8994, "step": 6755 }, { "epoch": 0.24486245514841795, "grad_norm": 2.173810984537751, "learning_rate": 8.837506237522232e-06, "loss": 0.8683, "step": 6756 }, { "epoch": 0.24489869885107463, "grad_norm": 2.06132116963083, "learning_rate": 8.837129962164764e-06, "loss": 0.9671, "step": 6757 }, { "epoch": 0.2449349425537313, "grad_norm": 2.221180205649154, "learning_rate": 8.836753633933964e-06, "loss": 1.0389, "step": 6758 }, { "epoch": 0.24497118625638795, "grad_norm": 2.5977484063754184, "learning_rate": 8.83637725283502e-06, "loss": 0.977, "step": 6759 }, { "epoch": 0.24500742995904462, "grad_norm": 2.1441291320176865, "learning_rate": 8.836000818873116e-06, "loss": 0.7654, "step": 6760 }, { "epoch": 0.24504367366170127, "grad_norm": 2.365877299657944, "learning_rate": 8.835624332053442e-06, "loss": 0.8118, "step": 6761 }, { "epoch": 0.24507991736435794, "grad_norm": 2.705539713613227, "learning_rate": 8.835247792381185e-06, "loss": 0.8937, "step": 6762 }, { "epoch": 0.24511616106701462, "grad_norm": 2.58173451093825, "learning_rate": 8.834871199861532e-06, "loss": 0.96, "step": 6763 }, { "epoch": 0.24515240476967126, "grad_norm": 2.4834244547376647, "learning_rate": 8.834494554499673e-06, "loss": 0.8928, "step": 6764 }, { "epoch": 0.24518864847232794, "grad_norm": 2.276080837458732, "learning_rate": 8.8341178563008e-06, "loss": 0.9718, "step": 6765 }, { "epoch": 0.2452248921749846, "grad_norm": 2.194784958802374, "learning_rate": 8.833741105270099e-06, "loss": 0.7714, "step": 6766 }, { "epoch": 0.24526113587764126, "grad_norm": 2.195870623274054, "learning_rate": 8.833364301412765e-06, "loss": 0.8963, "step": 6767 }, { "epoch": 0.24529737958029793, "grad_norm": 2.202764898824602, "learning_rate": 8.83298744473399e-06, "loss": 0.8266, "step": 6768 }, { "epoch": 0.24533362328295458, "grad_norm": 2.4509656378140816, "learning_rate": 8.832610535238965e-06, "loss": 1.0151, "step": 6769 }, { "epoch": 0.24536986698561125, "grad_norm": 2.349197077739174, "learning_rate": 8.832233572932887e-06, "loss": 1.0596, "step": 6770 }, { "epoch": 0.24540611068826793, "grad_norm": 2.2713052412332115, "learning_rate": 8.831856557820946e-06, "loss": 0.9302, "step": 6771 }, { "epoch": 0.24544235439092457, "grad_norm": 2.35956758833958, "learning_rate": 8.831479489908338e-06, "loss": 0.9786, "step": 6772 }, { "epoch": 0.24547859809358125, "grad_norm": 2.125289239314028, "learning_rate": 8.831102369200262e-06, "loss": 0.9285, "step": 6773 }, { "epoch": 0.2455148417962379, "grad_norm": 2.364445116790878, "learning_rate": 8.830725195701913e-06, "loss": 0.8874, "step": 6774 }, { "epoch": 0.24555108549889457, "grad_norm": 2.2987348922185085, "learning_rate": 8.830347969418484e-06, "loss": 0.9618, "step": 6775 }, { "epoch": 0.24558732920155124, "grad_norm": 2.4311233403433072, "learning_rate": 8.82997069035518e-06, "loss": 0.9735, "step": 6776 }, { "epoch": 0.2456235729042079, "grad_norm": 2.268806981853451, "learning_rate": 8.829593358517195e-06, "loss": 0.834, "step": 6777 }, { "epoch": 0.24565981660686456, "grad_norm": 2.198410635553343, "learning_rate": 8.829215973909729e-06, "loss": 1.0951, "step": 6778 }, { "epoch": 0.2456960603095212, "grad_norm": 2.4037221396922863, "learning_rate": 8.828838536537984e-06, "loss": 0.8934, "step": 6779 }, { "epoch": 0.24573230401217788, "grad_norm": 2.653141625483527, "learning_rate": 8.828461046407158e-06, "loss": 0.9767, "step": 6780 }, { "epoch": 0.24576854771483456, "grad_norm": 2.3820612673941683, "learning_rate": 8.828083503522455e-06, "loss": 0.8633, "step": 6781 }, { "epoch": 0.2458047914174912, "grad_norm": 2.0419679652006, "learning_rate": 8.827705907889077e-06, "loss": 0.8182, "step": 6782 }, { "epoch": 0.24584103512014788, "grad_norm": 2.093243922853389, "learning_rate": 8.827328259512226e-06, "loss": 0.8695, "step": 6783 }, { "epoch": 0.24587727882280455, "grad_norm": 2.2958233268326103, "learning_rate": 8.826950558397106e-06, "loss": 0.8607, "step": 6784 }, { "epoch": 0.2459135225254612, "grad_norm": 2.5482735745108758, "learning_rate": 8.826572804548923e-06, "loss": 1.0952, "step": 6785 }, { "epoch": 0.24594976622811787, "grad_norm": 2.2229949670136713, "learning_rate": 8.826194997972879e-06, "loss": 0.9059, "step": 6786 }, { "epoch": 0.24598600993077452, "grad_norm": 2.132315194107727, "learning_rate": 8.825817138674184e-06, "loss": 0.7794, "step": 6787 }, { "epoch": 0.2460222536334312, "grad_norm": 2.5921048381633915, "learning_rate": 8.82543922665804e-06, "loss": 0.9126, "step": 6788 }, { "epoch": 0.24605849733608787, "grad_norm": 2.5404679185239813, "learning_rate": 8.82506126192966e-06, "loss": 0.9973, "step": 6789 }, { "epoch": 0.2460947410387445, "grad_norm": 2.2645125337495893, "learning_rate": 8.824683244494246e-06, "loss": 0.9991, "step": 6790 }, { "epoch": 0.2461309847414012, "grad_norm": 2.3098287876362855, "learning_rate": 8.824305174357012e-06, "loss": 0.9739, "step": 6791 }, { "epoch": 0.24616722844405783, "grad_norm": 2.119026206055236, "learning_rate": 8.823927051523165e-06, "loss": 0.9213, "step": 6792 }, { "epoch": 0.2462034721467145, "grad_norm": 2.3327309308015156, "learning_rate": 8.823548875997916e-06, "loss": 0.9042, "step": 6793 }, { "epoch": 0.24623971584937118, "grad_norm": 2.452496215646352, "learning_rate": 8.823170647786476e-06, "loss": 0.9928, "step": 6794 }, { "epoch": 0.24627595955202783, "grad_norm": 2.330190964647644, "learning_rate": 8.822792366894057e-06, "loss": 0.981, "step": 6795 }, { "epoch": 0.2463122032546845, "grad_norm": 2.2040424281206374, "learning_rate": 8.822414033325871e-06, "loss": 0.9913, "step": 6796 }, { "epoch": 0.24634844695734115, "grad_norm": 2.162344302547022, "learning_rate": 8.82203564708713e-06, "loss": 0.7683, "step": 6797 }, { "epoch": 0.24638469065999782, "grad_norm": 2.3042069252719153, "learning_rate": 8.821657208183053e-06, "loss": 0.8399, "step": 6798 }, { "epoch": 0.2464209343626545, "grad_norm": 2.5390666458973454, "learning_rate": 8.821278716618847e-06, "loss": 1.0653, "step": 6799 }, { "epoch": 0.24645717806531114, "grad_norm": 2.3788874566966403, "learning_rate": 8.820900172399733e-06, "loss": 0.9282, "step": 6800 }, { "epoch": 0.24649342176796782, "grad_norm": 2.414756985133039, "learning_rate": 8.820521575530924e-06, "loss": 0.9441, "step": 6801 }, { "epoch": 0.2465296654706245, "grad_norm": 2.2336417123299124, "learning_rate": 8.82014292601764e-06, "loss": 0.7987, "step": 6802 }, { "epoch": 0.24656590917328114, "grad_norm": 2.328019301465296, "learning_rate": 8.819764223865095e-06, "loss": 0.9661, "step": 6803 }, { "epoch": 0.2466021528759378, "grad_norm": 2.3951006676094533, "learning_rate": 8.819385469078513e-06, "loss": 1.0611, "step": 6804 }, { "epoch": 0.24663839657859446, "grad_norm": 2.382893319855582, "learning_rate": 8.819006661663105e-06, "loss": 0.8936, "step": 6805 }, { "epoch": 0.24667464028125113, "grad_norm": 2.6108634078970754, "learning_rate": 8.818627801624095e-06, "loss": 1.0921, "step": 6806 }, { "epoch": 0.2467108839839078, "grad_norm": 2.6135128254057984, "learning_rate": 8.818248888966705e-06, "loss": 1.0273, "step": 6807 }, { "epoch": 0.24674712768656445, "grad_norm": 2.2994263210141757, "learning_rate": 8.817869923696154e-06, "loss": 0.9748, "step": 6808 }, { "epoch": 0.24678337138922113, "grad_norm": 2.52679676200185, "learning_rate": 8.817490905817665e-06, "loss": 0.9635, "step": 6809 }, { "epoch": 0.24681961509187778, "grad_norm": 2.0786613427915164, "learning_rate": 8.81711183533646e-06, "loss": 0.9841, "step": 6810 }, { "epoch": 0.24685585879453445, "grad_norm": 2.220544722068452, "learning_rate": 8.816732712257761e-06, "loss": 0.9389, "step": 6811 }, { "epoch": 0.24689210249719112, "grad_norm": 2.431655009425089, "learning_rate": 8.816353536586794e-06, "loss": 1.0177, "step": 6812 }, { "epoch": 0.24692834619984777, "grad_norm": 2.41148169942097, "learning_rate": 8.815974308328786e-06, "loss": 1.0071, "step": 6813 }, { "epoch": 0.24696458990250444, "grad_norm": 2.3512861912452023, "learning_rate": 8.815595027488957e-06, "loss": 0.8164, "step": 6814 }, { "epoch": 0.2470008336051611, "grad_norm": 2.281280830359161, "learning_rate": 8.815215694072537e-06, "loss": 0.9464, "step": 6815 }, { "epoch": 0.24703707730781777, "grad_norm": 2.428157555231051, "learning_rate": 8.81483630808475e-06, "loss": 0.8581, "step": 6816 }, { "epoch": 0.24707332101047444, "grad_norm": 2.378594881303204, "learning_rate": 8.81445686953083e-06, "loss": 1.1633, "step": 6817 }, { "epoch": 0.24710956471313109, "grad_norm": 2.5495975430340736, "learning_rate": 8.814077378415999e-06, "loss": 0.902, "step": 6818 }, { "epoch": 0.24714580841578776, "grad_norm": 2.514747246561787, "learning_rate": 8.813697834745488e-06, "loss": 0.9807, "step": 6819 }, { "epoch": 0.24718205211844443, "grad_norm": 2.603980715974964, "learning_rate": 8.813318238524527e-06, "loss": 0.8366, "step": 6820 }, { "epoch": 0.24721829582110108, "grad_norm": 2.6094167453881307, "learning_rate": 8.812938589758349e-06, "loss": 0.9552, "step": 6821 }, { "epoch": 0.24725453952375775, "grad_norm": 2.351425819717844, "learning_rate": 8.81255888845218e-06, "loss": 0.937, "step": 6822 }, { "epoch": 0.2472907832264144, "grad_norm": 2.304130762827074, "learning_rate": 8.812179134611258e-06, "loss": 0.8918, "step": 6823 }, { "epoch": 0.24732702692907108, "grad_norm": 2.1971510633468907, "learning_rate": 8.811799328240812e-06, "loss": 0.771, "step": 6824 }, { "epoch": 0.24736327063172775, "grad_norm": 2.3092688857460235, "learning_rate": 8.811419469346076e-06, "loss": 1.0195, "step": 6825 }, { "epoch": 0.2473995143343844, "grad_norm": 2.387802682619983, "learning_rate": 8.811039557932285e-06, "loss": 0.9432, "step": 6826 }, { "epoch": 0.24743575803704107, "grad_norm": 2.410036894452903, "learning_rate": 8.810659594004676e-06, "loss": 1.2282, "step": 6827 }, { "epoch": 0.24747200173969772, "grad_norm": 2.076327468571007, "learning_rate": 8.81027957756848e-06, "loss": 0.8059, "step": 6828 }, { "epoch": 0.2475082454423544, "grad_norm": 2.5327978739957713, "learning_rate": 8.809899508628937e-06, "loss": 1.1106, "step": 6829 }, { "epoch": 0.24754448914501107, "grad_norm": 2.26826414134794, "learning_rate": 8.809519387191282e-06, "loss": 0.9527, "step": 6830 }, { "epoch": 0.2475807328476677, "grad_norm": 2.6130838436720576, "learning_rate": 8.809139213260753e-06, "loss": 1.0057, "step": 6831 }, { "epoch": 0.24761697655032439, "grad_norm": 2.1216321386986983, "learning_rate": 8.808758986842592e-06, "loss": 1.0212, "step": 6832 }, { "epoch": 0.24765322025298103, "grad_norm": 2.550453034940645, "learning_rate": 8.808378707942033e-06, "loss": 0.8666, "step": 6833 }, { "epoch": 0.2476894639556377, "grad_norm": 2.2152431813347, "learning_rate": 8.80799837656432e-06, "loss": 1.0142, "step": 6834 }, { "epoch": 0.24772570765829438, "grad_norm": 2.362686583020266, "learning_rate": 8.807617992714693e-06, "loss": 1.0134, "step": 6835 }, { "epoch": 0.24776195136095103, "grad_norm": 2.430400051954233, "learning_rate": 8.807237556398391e-06, "loss": 0.9459, "step": 6836 }, { "epoch": 0.2477981950636077, "grad_norm": 2.3371891540751224, "learning_rate": 8.80685706762066e-06, "loss": 1.0572, "step": 6837 }, { "epoch": 0.24783443876626435, "grad_norm": 2.3694249888055348, "learning_rate": 8.80647652638674e-06, "loss": 0.9377, "step": 6838 }, { "epoch": 0.24787068246892102, "grad_norm": 2.2291312137675336, "learning_rate": 8.806095932701875e-06, "loss": 0.959, "step": 6839 }, { "epoch": 0.2479069261715777, "grad_norm": 2.4986851030538917, "learning_rate": 8.805715286571311e-06, "loss": 1.1051, "step": 6840 }, { "epoch": 0.24794316987423434, "grad_norm": 2.3812881611720362, "learning_rate": 8.80533458800029e-06, "loss": 0.9332, "step": 6841 }, { "epoch": 0.24797941357689102, "grad_norm": 2.539025347559773, "learning_rate": 8.804953836994061e-06, "loss": 1.0372, "step": 6842 }, { "epoch": 0.2480156572795477, "grad_norm": 2.098121354102496, "learning_rate": 8.80457303355787e-06, "loss": 1.1718, "step": 6843 }, { "epoch": 0.24805190098220434, "grad_norm": 2.1078273617522982, "learning_rate": 8.804192177696963e-06, "loss": 0.9362, "step": 6844 }, { "epoch": 0.248088144684861, "grad_norm": 2.1433779532613753, "learning_rate": 8.803811269416588e-06, "loss": 1.0915, "step": 6845 }, { "epoch": 0.24812438838751766, "grad_norm": 2.6558429832425867, "learning_rate": 8.803430308721994e-06, "loss": 1.0521, "step": 6846 }, { "epoch": 0.24816063209017433, "grad_norm": 2.601355047009421, "learning_rate": 8.803049295618431e-06, "loss": 0.9244, "step": 6847 }, { "epoch": 0.248196875792831, "grad_norm": 2.2811354447151126, "learning_rate": 8.802668230111149e-06, "loss": 1.0246, "step": 6848 }, { "epoch": 0.24823311949548765, "grad_norm": 2.35427118930841, "learning_rate": 8.802287112205399e-06, "loss": 1.1029, "step": 6849 }, { "epoch": 0.24826936319814433, "grad_norm": 2.442147889577554, "learning_rate": 8.801905941906433e-06, "loss": 0.9348, "step": 6850 }, { "epoch": 0.24830560690080097, "grad_norm": 2.205133548421246, "learning_rate": 8.8015247192195e-06, "loss": 1.2103, "step": 6851 }, { "epoch": 0.24834185060345765, "grad_norm": 14.73316504007833, "learning_rate": 8.801143444149856e-06, "loss": 1.5835, "step": 6852 }, { "epoch": 0.24837809430611432, "grad_norm": 2.2477885806675757, "learning_rate": 8.800762116702754e-06, "loss": 0.8696, "step": 6853 }, { "epoch": 0.24841433800877097, "grad_norm": 2.3446098620473146, "learning_rate": 8.800380736883451e-06, "loss": 0.9835, "step": 6854 }, { "epoch": 0.24845058171142764, "grad_norm": 2.4855960977167237, "learning_rate": 8.799999304697198e-06, "loss": 1.1362, "step": 6855 }, { "epoch": 0.2484868254140843, "grad_norm": 2.3973500119250146, "learning_rate": 8.799617820149253e-06, "loss": 0.8912, "step": 6856 }, { "epoch": 0.24852306911674096, "grad_norm": 2.5253315416352313, "learning_rate": 8.799236283244872e-06, "loss": 1.0018, "step": 6857 }, { "epoch": 0.24855931281939764, "grad_norm": 2.1505775944129923, "learning_rate": 8.798854693989312e-06, "loss": 1.0467, "step": 6858 }, { "epoch": 0.24859555652205428, "grad_norm": 2.5518641278219945, "learning_rate": 8.798473052387834e-06, "loss": 1.0469, "step": 6859 }, { "epoch": 0.24863180022471096, "grad_norm": 2.539787229174833, "learning_rate": 8.798091358445693e-06, "loss": 1.2009, "step": 6860 }, { "epoch": 0.24866804392736763, "grad_norm": 2.591599371072685, "learning_rate": 8.79770961216815e-06, "loss": 0.9666, "step": 6861 }, { "epoch": 0.24870428763002428, "grad_norm": 2.714482005482862, "learning_rate": 8.797327813560465e-06, "loss": 1.1447, "step": 6862 }, { "epoch": 0.24874053133268095, "grad_norm": 2.4925906932145585, "learning_rate": 8.796945962627901e-06, "loss": 0.9334, "step": 6863 }, { "epoch": 0.2487767750353376, "grad_norm": 2.2013979920357674, "learning_rate": 8.796564059375717e-06, "loss": 0.9664, "step": 6864 }, { "epoch": 0.24881301873799427, "grad_norm": 2.6208957510880446, "learning_rate": 8.796182103809176e-06, "loss": 0.8503, "step": 6865 }, { "epoch": 0.24884926244065095, "grad_norm": 2.2606795395663144, "learning_rate": 8.795800095933542e-06, "loss": 0.8834, "step": 6866 }, { "epoch": 0.2488855061433076, "grad_norm": 2.504611146891098, "learning_rate": 8.79541803575408e-06, "loss": 1.0749, "step": 6867 }, { "epoch": 0.24892174984596427, "grad_norm": 2.3528578038080505, "learning_rate": 8.79503592327605e-06, "loss": 0.9402, "step": 6868 }, { "epoch": 0.24895799354862092, "grad_norm": 2.7413368759189076, "learning_rate": 8.794653758504722e-06, "loss": 0.9261, "step": 6869 }, { "epoch": 0.2489942372512776, "grad_norm": 2.1046391476857225, "learning_rate": 8.794271541445358e-06, "loss": 1.0812, "step": 6870 }, { "epoch": 0.24903048095393426, "grad_norm": 2.716221099098462, "learning_rate": 8.79388927210323e-06, "loss": 1.0025, "step": 6871 }, { "epoch": 0.2490667246565909, "grad_norm": 2.064795426055433, "learning_rate": 8.7935069504836e-06, "loss": 0.8838, "step": 6872 }, { "epoch": 0.24910296835924758, "grad_norm": 2.776502855583219, "learning_rate": 8.793124576591739e-06, "loss": 0.9695, "step": 6873 }, { "epoch": 0.24913921206190423, "grad_norm": 2.203109442329449, "learning_rate": 8.792742150432914e-06, "loss": 0.6745, "step": 6874 }, { "epoch": 0.2491754557645609, "grad_norm": 2.584327027327521, "learning_rate": 8.792359672012396e-06, "loss": 0.8599, "step": 6875 }, { "epoch": 0.24921169946721758, "grad_norm": 2.2476196125717207, "learning_rate": 8.791977141335457e-06, "loss": 1.0772, "step": 6876 }, { "epoch": 0.24924794316987423, "grad_norm": 2.328484102442228, "learning_rate": 8.791594558407368e-06, "loss": 0.9714, "step": 6877 }, { "epoch": 0.2492841868725309, "grad_norm": 2.131518558770496, "learning_rate": 8.791211923233396e-06, "loss": 0.997, "step": 6878 }, { "epoch": 0.24932043057518757, "grad_norm": 2.3409372092432092, "learning_rate": 8.790829235818816e-06, "loss": 0.9448, "step": 6879 }, { "epoch": 0.24935667427784422, "grad_norm": 2.305479096212443, "learning_rate": 8.790446496168904e-06, "loss": 0.6927, "step": 6880 }, { "epoch": 0.2493929179805009, "grad_norm": 2.349415644812512, "learning_rate": 8.790063704288932e-06, "loss": 0.9131, "step": 6881 }, { "epoch": 0.24942916168315754, "grad_norm": 2.1232006692129177, "learning_rate": 8.789680860184172e-06, "loss": 0.8294, "step": 6882 }, { "epoch": 0.24946540538581422, "grad_norm": 2.2643967788872197, "learning_rate": 8.789297963859903e-06, "loss": 0.8906, "step": 6883 }, { "epoch": 0.2495016490884709, "grad_norm": 2.5230278932024697, "learning_rate": 8.7889150153214e-06, "loss": 0.9633, "step": 6884 }, { "epoch": 0.24953789279112754, "grad_norm": 2.4580069787395353, "learning_rate": 8.788532014573939e-06, "loss": 1.0651, "step": 6885 }, { "epoch": 0.2495741364937842, "grad_norm": 2.1507432044320987, "learning_rate": 8.788148961622798e-06, "loss": 0.8802, "step": 6886 }, { "epoch": 0.24961038019644086, "grad_norm": 2.5830607976772617, "learning_rate": 8.787765856473257e-06, "loss": 0.9737, "step": 6887 }, { "epoch": 0.24964662389909753, "grad_norm": 2.429380597182143, "learning_rate": 8.78738269913059e-06, "loss": 0.9869, "step": 6888 }, { "epoch": 0.2496828676017542, "grad_norm": 2.2460949025780446, "learning_rate": 8.786999489600081e-06, "loss": 0.9123, "step": 6889 }, { "epoch": 0.24971911130441085, "grad_norm": 2.353327858299624, "learning_rate": 8.78661622788701e-06, "loss": 0.896, "step": 6890 }, { "epoch": 0.24975535500706753, "grad_norm": 2.143934004023279, "learning_rate": 8.786232913996658e-06, "loss": 0.9026, "step": 6891 }, { "epoch": 0.24979159870972417, "grad_norm": 2.084467442435686, "learning_rate": 8.785849547934306e-06, "loss": 1.0273, "step": 6892 }, { "epoch": 0.24982784241238085, "grad_norm": 2.097438597718829, "learning_rate": 8.785466129705237e-06, "loss": 0.8942, "step": 6893 }, { "epoch": 0.24986408611503752, "grad_norm": 2.274477542922059, "learning_rate": 8.785082659314733e-06, "loss": 0.7367, "step": 6894 }, { "epoch": 0.24990032981769417, "grad_norm": 2.1618829769839056, "learning_rate": 8.78469913676808e-06, "loss": 0.7623, "step": 6895 }, { "epoch": 0.24993657352035084, "grad_norm": 2.0841151040441686, "learning_rate": 8.784315562070561e-06, "loss": 0.9995, "step": 6896 }, { "epoch": 0.24997281722300752, "grad_norm": 2.127594284089165, "learning_rate": 8.783931935227463e-06, "loss": 0.8214, "step": 6897 }, { "epoch": 0.25000906092566416, "grad_norm": 2.6397495607230677, "learning_rate": 8.783548256244073e-06, "loss": 0.893, "step": 6898 }, { "epoch": 0.2500453046283208, "grad_norm": 2.4943337128055774, "learning_rate": 8.783164525125674e-06, "loss": 0.903, "step": 6899 }, { "epoch": 0.2500815483309775, "grad_norm": 2.39736851442291, "learning_rate": 8.782780741877556e-06, "loss": 0.8885, "step": 6900 }, { "epoch": 0.25011779203363416, "grad_norm": 2.784805075758564, "learning_rate": 8.782396906505006e-06, "loss": 0.9454, "step": 6901 }, { "epoch": 0.2501540357362908, "grad_norm": 2.5397215131534563, "learning_rate": 8.782013019013315e-06, "loss": 1.0453, "step": 6902 }, { "epoch": 0.2501902794389475, "grad_norm": 2.6158417124393787, "learning_rate": 8.781629079407774e-06, "loss": 1.0714, "step": 6903 }, { "epoch": 0.25022652314160415, "grad_norm": 2.516221058181969, "learning_rate": 8.78124508769367e-06, "loss": 0.936, "step": 6904 }, { "epoch": 0.2502627668442608, "grad_norm": 2.564017020822492, "learning_rate": 8.780861043876296e-06, "loss": 0.9841, "step": 6905 }, { "epoch": 0.2502990105469175, "grad_norm": 2.3514063438988853, "learning_rate": 8.780476947960942e-06, "loss": 1.0661, "step": 6906 }, { "epoch": 0.25033525424957415, "grad_norm": 2.3403468542464028, "learning_rate": 8.780092799952903e-06, "loss": 0.7519, "step": 6907 }, { "epoch": 0.2503714979522308, "grad_norm": 2.5231257008601182, "learning_rate": 8.779708599857471e-06, "loss": 0.9621, "step": 6908 }, { "epoch": 0.25040774165488744, "grad_norm": 2.38424624365495, "learning_rate": 8.77932434767994e-06, "loss": 1.0665, "step": 6909 }, { "epoch": 0.25044398535754414, "grad_norm": 2.1271592979258207, "learning_rate": 8.778940043425605e-06, "loss": 0.8328, "step": 6910 }, { "epoch": 0.2504802290602008, "grad_norm": 2.5156300955624222, "learning_rate": 8.778555687099764e-06, "loss": 1.1112, "step": 6911 }, { "epoch": 0.25051647276285743, "grad_norm": 2.449913293120826, "learning_rate": 8.778171278707708e-06, "loss": 0.8764, "step": 6912 }, { "epoch": 0.25055271646551414, "grad_norm": 2.4718745395976116, "learning_rate": 8.77778681825474e-06, "loss": 0.8687, "step": 6913 }, { "epoch": 0.2505889601681708, "grad_norm": 2.660375517509268, "learning_rate": 8.77740230574615e-06, "loss": 1.2809, "step": 6914 }, { "epoch": 0.25062520387082743, "grad_norm": 2.185831697991456, "learning_rate": 8.777017741187243e-06, "loss": 0.859, "step": 6915 }, { "epoch": 0.25066144757348413, "grad_norm": 2.3381915851518498, "learning_rate": 8.776633124583315e-06, "loss": 0.9535, "step": 6916 }, { "epoch": 0.2506976912761408, "grad_norm": 2.3453300587785013, "learning_rate": 8.776248455939669e-06, "loss": 1.0236, "step": 6917 }, { "epoch": 0.2507339349787974, "grad_norm": 2.5275883673757744, "learning_rate": 8.7758637352616e-06, "loss": 0.8596, "step": 6918 }, { "epoch": 0.25077017868145407, "grad_norm": 2.437128442770772, "learning_rate": 8.775478962554415e-06, "loss": 0.8605, "step": 6919 }, { "epoch": 0.25080642238411077, "grad_norm": 2.457870502487976, "learning_rate": 8.775094137823413e-06, "loss": 1.0643, "step": 6920 }, { "epoch": 0.2508426660867674, "grad_norm": 2.3252529720176427, "learning_rate": 8.774709261073895e-06, "loss": 1.0242, "step": 6921 }, { "epoch": 0.25087890978942406, "grad_norm": 2.022497091941892, "learning_rate": 8.774324332311167e-06, "loss": 0.8473, "step": 6922 }, { "epoch": 0.25091515349208077, "grad_norm": 2.2992398329883676, "learning_rate": 8.773939351540532e-06, "loss": 0.9923, "step": 6923 }, { "epoch": 0.2509513971947374, "grad_norm": 2.441485801627226, "learning_rate": 8.773554318767295e-06, "loss": 0.9477, "step": 6924 }, { "epoch": 0.25098764089739406, "grad_norm": 2.2518764452538504, "learning_rate": 8.773169233996763e-06, "loss": 1.0337, "step": 6925 }, { "epoch": 0.25102388460005076, "grad_norm": 2.6147464517686543, "learning_rate": 8.772784097234239e-06, "loss": 0.8951, "step": 6926 }, { "epoch": 0.2510601283027074, "grad_norm": 2.3534677220296354, "learning_rate": 8.772398908485034e-06, "loss": 0.9281, "step": 6927 }, { "epoch": 0.25109637200536405, "grad_norm": 2.5807192294190684, "learning_rate": 8.772013667754452e-06, "loss": 0.8967, "step": 6928 }, { "epoch": 0.25113261570802076, "grad_norm": 2.1811671513700595, "learning_rate": 8.771628375047802e-06, "loss": 0.8837, "step": 6929 }, { "epoch": 0.2511688594106774, "grad_norm": 2.243254708325192, "learning_rate": 8.771243030370398e-06, "loss": 0.9351, "step": 6930 }, { "epoch": 0.25120510311333405, "grad_norm": 2.5922170602567043, "learning_rate": 8.770857633727542e-06, "loss": 0.8175, "step": 6931 }, { "epoch": 0.2512413468159907, "grad_norm": 2.30573365581376, "learning_rate": 8.77047218512455e-06, "loss": 0.9045, "step": 6932 }, { "epoch": 0.2512775905186474, "grad_norm": 2.1782392171150198, "learning_rate": 8.770086684566732e-06, "loss": 0.8188, "step": 6933 }, { "epoch": 0.25131383422130404, "grad_norm": 2.3818303351803767, "learning_rate": 8.769701132059397e-06, "loss": 0.9775, "step": 6934 }, { "epoch": 0.2513500779239607, "grad_norm": 2.2583620404417437, "learning_rate": 8.769315527607861e-06, "loss": 0.9475, "step": 6935 }, { "epoch": 0.2513863216266174, "grad_norm": 2.525429996199373, "learning_rate": 8.76892987121744e-06, "loss": 0.887, "step": 6936 }, { "epoch": 0.25142256532927404, "grad_norm": 2.818082905200148, "learning_rate": 8.768544162893443e-06, "loss": 1.0595, "step": 6937 }, { "epoch": 0.2514588090319307, "grad_norm": 2.213594543501972, "learning_rate": 8.768158402641184e-06, "loss": 0.9941, "step": 6938 }, { "epoch": 0.2514950527345874, "grad_norm": 2.270681261052689, "learning_rate": 8.767772590465984e-06, "loss": 0.9115, "step": 6939 }, { "epoch": 0.25153129643724403, "grad_norm": 2.2414274031691717, "learning_rate": 8.767386726373155e-06, "loss": 0.9482, "step": 6940 }, { "epoch": 0.2515675401399007, "grad_norm": 2.419537869241751, "learning_rate": 8.767000810368016e-06, "loss": 0.996, "step": 6941 }, { "epoch": 0.2516037838425574, "grad_norm": 2.449568586361568, "learning_rate": 8.766614842455883e-06, "loss": 1.0243, "step": 6942 }, { "epoch": 0.25164002754521403, "grad_norm": 2.344292653889583, "learning_rate": 8.766228822642077e-06, "loss": 0.9757, "step": 6943 }, { "epoch": 0.2516762712478707, "grad_norm": 2.531052352355119, "learning_rate": 8.765842750931914e-06, "loss": 1.0059, "step": 6944 }, { "epoch": 0.2517125149505273, "grad_norm": 2.108601700812559, "learning_rate": 8.765456627330718e-06, "loss": 0.8198, "step": 6945 }, { "epoch": 0.251748758653184, "grad_norm": 2.1957155499783205, "learning_rate": 8.765070451843805e-06, "loss": 0.8504, "step": 6946 }, { "epoch": 0.25178500235584067, "grad_norm": 2.53680857807247, "learning_rate": 8.764684224476496e-06, "loss": 1.0796, "step": 6947 }, { "epoch": 0.2518212460584973, "grad_norm": 2.409729786857409, "learning_rate": 8.764297945234119e-06, "loss": 0.9803, "step": 6948 }, { "epoch": 0.251857489761154, "grad_norm": 2.1865311726465304, "learning_rate": 8.763911614121991e-06, "loss": 0.9697, "step": 6949 }, { "epoch": 0.25189373346381067, "grad_norm": 2.2924087867239127, "learning_rate": 8.763525231145437e-06, "loss": 0.8916, "step": 6950 }, { "epoch": 0.2519299771664673, "grad_norm": 2.3901432002859493, "learning_rate": 8.763138796309782e-06, "loss": 0.9333, "step": 6951 }, { "epoch": 0.251966220869124, "grad_norm": 2.572312512612186, "learning_rate": 8.762752309620352e-06, "loss": 0.8821, "step": 6952 }, { "epoch": 0.25200246457178066, "grad_norm": 2.530793258511988, "learning_rate": 8.762365771082468e-06, "loss": 1.0735, "step": 6953 }, { "epoch": 0.2520387082744373, "grad_norm": 2.433449154118724, "learning_rate": 8.761979180701461e-06, "loss": 0.9061, "step": 6954 }, { "epoch": 0.25207495197709395, "grad_norm": 2.3311093544469177, "learning_rate": 8.761592538482655e-06, "loss": 0.8087, "step": 6955 }, { "epoch": 0.25211119567975065, "grad_norm": 2.1265866017299397, "learning_rate": 8.76120584443138e-06, "loss": 0.8118, "step": 6956 }, { "epoch": 0.2521474393824073, "grad_norm": 2.357031489881474, "learning_rate": 8.760819098552962e-06, "loss": 0.8059, "step": 6957 }, { "epoch": 0.25218368308506395, "grad_norm": 2.4007385111592616, "learning_rate": 8.76043230085273e-06, "loss": 1.0773, "step": 6958 }, { "epoch": 0.25221992678772065, "grad_norm": 2.2904690103233816, "learning_rate": 8.76004545133602e-06, "loss": 1.047, "step": 6959 }, { "epoch": 0.2522561704903773, "grad_norm": 2.3084309750098027, "learning_rate": 8.759658550008153e-06, "loss": 1.0141, "step": 6960 }, { "epoch": 0.25229241419303394, "grad_norm": 2.1723216197901243, "learning_rate": 8.759271596874465e-06, "loss": 0.8267, "step": 6961 }, { "epoch": 0.25232865789569064, "grad_norm": 2.171973529495857, "learning_rate": 8.758884591940291e-06, "loss": 0.8427, "step": 6962 }, { "epoch": 0.2523649015983473, "grad_norm": 2.302597864010036, "learning_rate": 8.758497535210959e-06, "loss": 0.982, "step": 6963 }, { "epoch": 0.25240114530100394, "grad_norm": 1.8718927720082186, "learning_rate": 8.758110426691804e-06, "loss": 0.9389, "step": 6964 }, { "epoch": 0.25243738900366064, "grad_norm": 2.5962343954160736, "learning_rate": 8.757723266388159e-06, "loss": 1.0683, "step": 6965 }, { "epoch": 0.2524736327063173, "grad_norm": 2.454907946623914, "learning_rate": 8.757336054305361e-06, "loss": 1.0667, "step": 6966 }, { "epoch": 0.25250987640897393, "grad_norm": 2.5349159957507834, "learning_rate": 8.756948790448745e-06, "loss": 1.1155, "step": 6967 }, { "epoch": 0.2525461201116306, "grad_norm": 2.4356686394941813, "learning_rate": 8.756561474823646e-06, "loss": 1.0422, "step": 6968 }, { "epoch": 0.2525823638142873, "grad_norm": 2.684082682351612, "learning_rate": 8.756174107435404e-06, "loss": 0.8744, "step": 6969 }, { "epoch": 0.2526186075169439, "grad_norm": 2.400100295015932, "learning_rate": 8.755786688289352e-06, "loss": 0.8567, "step": 6970 }, { "epoch": 0.2526548512196006, "grad_norm": 2.720248913707752, "learning_rate": 8.755399217390831e-06, "loss": 1.0243, "step": 6971 }, { "epoch": 0.2526910949222573, "grad_norm": 2.4119826840231107, "learning_rate": 8.75501169474518e-06, "loss": 0.8486, "step": 6972 }, { "epoch": 0.2527273386249139, "grad_norm": 2.4450748042839345, "learning_rate": 8.75462412035774e-06, "loss": 1.0594, "step": 6973 }, { "epoch": 0.25276358232757057, "grad_norm": 2.559980985130193, "learning_rate": 8.75423649423385e-06, "loss": 0.9306, "step": 6974 }, { "epoch": 0.25279982603022727, "grad_norm": 2.3637999399864063, "learning_rate": 8.75384881637885e-06, "loss": 0.983, "step": 6975 }, { "epoch": 0.2528360697328839, "grad_norm": 2.2780691734940524, "learning_rate": 8.753461086798085e-06, "loss": 0.8977, "step": 6976 }, { "epoch": 0.25287231343554056, "grad_norm": 2.31408732539282, "learning_rate": 8.753073305496897e-06, "loss": 0.9988, "step": 6977 }, { "epoch": 0.25290855713819727, "grad_norm": 2.2244197482846486, "learning_rate": 8.752685472480628e-06, "loss": 0.8282, "step": 6978 }, { "epoch": 0.2529448008408539, "grad_norm": 2.4322990662779156, "learning_rate": 8.752297587754622e-06, "loss": 0.8441, "step": 6979 }, { "epoch": 0.25298104454351056, "grad_norm": 2.715174601407126, "learning_rate": 8.751909651324224e-06, "loss": 0.9688, "step": 6980 }, { "epoch": 0.2530172882461672, "grad_norm": 2.5580644830903885, "learning_rate": 8.751521663194781e-06, "loss": 1.0534, "step": 6981 }, { "epoch": 0.2530535319488239, "grad_norm": 2.413556783908092, "learning_rate": 8.75113362337164e-06, "loss": 1.0347, "step": 6982 }, { "epoch": 0.25308977565148055, "grad_norm": 2.3090936846837034, "learning_rate": 8.750745531860145e-06, "loss": 1.0712, "step": 6983 }, { "epoch": 0.2531260193541372, "grad_norm": 2.494120581463848, "learning_rate": 8.750357388665643e-06, "loss": 1.1144, "step": 6984 }, { "epoch": 0.2531622630567939, "grad_norm": 2.3076500669863127, "learning_rate": 8.749969193793486e-06, "loss": 1.2982, "step": 6985 }, { "epoch": 0.25319850675945055, "grad_norm": 2.386144961530369, "learning_rate": 8.74958094724902e-06, "loss": 1.0246, "step": 6986 }, { "epoch": 0.2532347504621072, "grad_norm": 2.3485723776577987, "learning_rate": 8.7491926490376e-06, "loss": 0.9947, "step": 6987 }, { "epoch": 0.2532709941647639, "grad_norm": 2.393581882022753, "learning_rate": 8.74880429916457e-06, "loss": 0.9103, "step": 6988 }, { "epoch": 0.25330723786742054, "grad_norm": 2.3687334211082502, "learning_rate": 8.748415897635285e-06, "loss": 1.1458, "step": 6989 }, { "epoch": 0.2533434815700772, "grad_norm": 2.422210855760169, "learning_rate": 8.748027444455095e-06, "loss": 0.9274, "step": 6990 }, { "epoch": 0.25337972527273384, "grad_norm": 2.453301707635342, "learning_rate": 8.747638939629352e-06, "loss": 0.9118, "step": 6991 }, { "epoch": 0.25341596897539054, "grad_norm": 2.477433397655646, "learning_rate": 8.747250383163414e-06, "loss": 0.8933, "step": 6992 }, { "epoch": 0.2534522126780472, "grad_norm": 2.3858939087346136, "learning_rate": 8.74686177506263e-06, "loss": 0.908, "step": 6993 }, { "epoch": 0.25348845638070383, "grad_norm": 2.613355685574443, "learning_rate": 8.746473115332358e-06, "loss": 0.9596, "step": 6994 }, { "epoch": 0.25352470008336053, "grad_norm": 2.5372446309805525, "learning_rate": 8.746084403977953e-06, "loss": 0.9593, "step": 6995 }, { "epoch": 0.2535609437860172, "grad_norm": 2.5212605298490494, "learning_rate": 8.745695641004768e-06, "loss": 0.9126, "step": 6996 }, { "epoch": 0.2535971874886738, "grad_norm": 2.5582768404325944, "learning_rate": 8.745306826418163e-06, "loss": 0.9183, "step": 6997 }, { "epoch": 0.2536334311913305, "grad_norm": 2.451866671134445, "learning_rate": 8.744917960223496e-06, "loss": 0.8672, "step": 6998 }, { "epoch": 0.2536696748939872, "grad_norm": 2.2834576145768124, "learning_rate": 8.744529042426124e-06, "loss": 0.9837, "step": 6999 }, { "epoch": 0.2537059185966438, "grad_norm": 2.443842032562659, "learning_rate": 8.744140073031407e-06, "loss": 0.9471, "step": 7000 }, { "epoch": 0.2537421622993005, "grad_norm": 2.3002267063606774, "learning_rate": 8.743751052044703e-06, "loss": 1.0376, "step": 7001 }, { "epoch": 0.25377840600195717, "grad_norm": 2.256987354368647, "learning_rate": 8.743361979471375e-06, "loss": 0.8867, "step": 7002 }, { "epoch": 0.2538146497046138, "grad_norm": 2.2865442568088175, "learning_rate": 8.742972855316781e-06, "loss": 0.888, "step": 7003 }, { "epoch": 0.25385089340727046, "grad_norm": 2.3589902095540896, "learning_rate": 8.742583679586285e-06, "loss": 1.0497, "step": 7004 }, { "epoch": 0.25388713710992716, "grad_norm": 2.4341090604545266, "learning_rate": 8.74219445228525e-06, "loss": 0.8132, "step": 7005 }, { "epoch": 0.2539233808125838, "grad_norm": 2.4961021699885553, "learning_rate": 8.74180517341904e-06, "loss": 0.9922, "step": 7006 }, { "epoch": 0.25395962451524046, "grad_norm": 2.173067768267483, "learning_rate": 8.741415842993017e-06, "loss": 0.9321, "step": 7007 }, { "epoch": 0.25399586821789716, "grad_norm": 2.2837946936452416, "learning_rate": 8.741026461012545e-06, "loss": 0.93, "step": 7008 }, { "epoch": 0.2540321119205538, "grad_norm": 2.25575390653007, "learning_rate": 8.740637027482992e-06, "loss": 1.0142, "step": 7009 }, { "epoch": 0.25406835562321045, "grad_norm": 2.16982798335733, "learning_rate": 8.740247542409723e-06, "loss": 1.0294, "step": 7010 }, { "epoch": 0.25410459932586715, "grad_norm": 2.4896811585866603, "learning_rate": 8.739858005798104e-06, "loss": 0.9775, "step": 7011 }, { "epoch": 0.2541408430285238, "grad_norm": 2.410906137030956, "learning_rate": 8.739468417653505e-06, "loss": 1.1047, "step": 7012 }, { "epoch": 0.25417708673118045, "grad_norm": 2.3397685600017373, "learning_rate": 8.73907877798129e-06, "loss": 0.9979, "step": 7013 }, { "epoch": 0.25421333043383715, "grad_norm": 2.3012134785016465, "learning_rate": 8.738689086786833e-06, "loss": 0.873, "step": 7014 }, { "epoch": 0.2542495741364938, "grad_norm": 2.3427951741280966, "learning_rate": 8.738299344075503e-06, "loss": 0.8537, "step": 7015 }, { "epoch": 0.25428581783915044, "grad_norm": 2.593109128722367, "learning_rate": 8.737909549852665e-06, "loss": 1.0974, "step": 7016 }, { "epoch": 0.2543220615418071, "grad_norm": 2.3124951247281595, "learning_rate": 8.737519704123697e-06, "loss": 0.9009, "step": 7017 }, { "epoch": 0.2543583052444638, "grad_norm": 2.327350346530619, "learning_rate": 8.737129806893968e-06, "loss": 0.9662, "step": 7018 }, { "epoch": 0.25439454894712044, "grad_norm": 2.4108207484664943, "learning_rate": 8.736739858168848e-06, "loss": 0.9498, "step": 7019 }, { "epoch": 0.2544307926497771, "grad_norm": 2.4122478673667795, "learning_rate": 8.736349857953715e-06, "loss": 0.978, "step": 7020 }, { "epoch": 0.2544670363524338, "grad_norm": 2.6412713309629066, "learning_rate": 8.735959806253939e-06, "loss": 1.0547, "step": 7021 }, { "epoch": 0.25450328005509043, "grad_norm": 2.176230006756555, "learning_rate": 8.735569703074897e-06, "loss": 0.9075, "step": 7022 }, { "epoch": 0.2545395237577471, "grad_norm": 2.464771624418149, "learning_rate": 8.735179548421964e-06, "loss": 1.1035, "step": 7023 }, { "epoch": 0.2545757674604038, "grad_norm": 1.9560619962386234, "learning_rate": 8.734789342300513e-06, "loss": 0.8439, "step": 7024 }, { "epoch": 0.2546120111630604, "grad_norm": 2.5830201421014394, "learning_rate": 8.734399084715927e-06, "loss": 0.8678, "step": 7025 }, { "epoch": 0.25464825486571707, "grad_norm": 2.5861077053046455, "learning_rate": 8.734008775673579e-06, "loss": 0.9193, "step": 7026 }, { "epoch": 0.2546844985683737, "grad_norm": 2.417275480315563, "learning_rate": 8.733618415178849e-06, "loss": 1.015, "step": 7027 }, { "epoch": 0.2547207422710304, "grad_norm": 2.3705578807643737, "learning_rate": 8.733228003237114e-06, "loss": 0.9531, "step": 7028 }, { "epoch": 0.25475698597368707, "grad_norm": 2.476351345856957, "learning_rate": 8.732837539853755e-06, "loss": 0.6838, "step": 7029 }, { "epoch": 0.2547932296763437, "grad_norm": 2.906062151679577, "learning_rate": 8.732447025034153e-06, "loss": 1.1363, "step": 7030 }, { "epoch": 0.2548294733790004, "grad_norm": 2.4111664814071365, "learning_rate": 8.732056458783688e-06, "loss": 0.9287, "step": 7031 }, { "epoch": 0.25486571708165706, "grad_norm": 2.2203097637428284, "learning_rate": 8.731665841107744e-06, "loss": 0.8432, "step": 7032 }, { "epoch": 0.2549019607843137, "grad_norm": 2.431517429752857, "learning_rate": 8.731275172011699e-06, "loss": 0.9313, "step": 7033 }, { "epoch": 0.2549382044869704, "grad_norm": 2.3363492536934087, "learning_rate": 8.730884451500939e-06, "loss": 1.1491, "step": 7034 }, { "epoch": 0.25497444818962706, "grad_norm": 2.6305525325538173, "learning_rate": 8.730493679580849e-06, "loss": 1.0302, "step": 7035 }, { "epoch": 0.2550106918922837, "grad_norm": 2.3371633819737414, "learning_rate": 8.730102856256811e-06, "loss": 0.9437, "step": 7036 }, { "epoch": 0.2550469355949404, "grad_norm": 2.7557799558625433, "learning_rate": 8.72971198153421e-06, "loss": 0.9818, "step": 7037 }, { "epoch": 0.25508317929759705, "grad_norm": 2.2897275134840656, "learning_rate": 8.729321055418435e-06, "loss": 0.8593, "step": 7038 }, { "epoch": 0.2551194230002537, "grad_norm": 2.426683436611879, "learning_rate": 8.728930077914874e-06, "loss": 1.0991, "step": 7039 }, { "epoch": 0.25515566670291034, "grad_norm": 2.7049036302641762, "learning_rate": 8.728539049028908e-06, "loss": 1.0143, "step": 7040 }, { "epoch": 0.25519191040556705, "grad_norm": 2.4161019985467753, "learning_rate": 8.728147968765931e-06, "loss": 0.9112, "step": 7041 }, { "epoch": 0.2552281541082237, "grad_norm": 2.29832776438166, "learning_rate": 8.72775683713133e-06, "loss": 1.1649, "step": 7042 }, { "epoch": 0.25526439781088034, "grad_norm": 2.1748155614902016, "learning_rate": 8.727365654130492e-06, "loss": 1.0157, "step": 7043 }, { "epoch": 0.25530064151353704, "grad_norm": 2.2703026918013554, "learning_rate": 8.72697441976881e-06, "loss": 0.8165, "step": 7044 }, { "epoch": 0.2553368852161937, "grad_norm": 2.360597351116518, "learning_rate": 8.726583134051675e-06, "loss": 1.2373, "step": 7045 }, { "epoch": 0.25537312891885033, "grad_norm": 2.557905659643041, "learning_rate": 8.726191796984481e-06, "loss": 0.8656, "step": 7046 }, { "epoch": 0.25540937262150704, "grad_norm": 2.38852651878112, "learning_rate": 8.725800408572614e-06, "loss": 0.8346, "step": 7047 }, { "epoch": 0.2554456163241637, "grad_norm": 2.3849415236487026, "learning_rate": 8.725408968821472e-06, "loss": 1.0745, "step": 7048 }, { "epoch": 0.25548186002682033, "grad_norm": 2.1147346775774905, "learning_rate": 8.725017477736445e-06, "loss": 0.8902, "step": 7049 }, { "epoch": 0.25551810372947703, "grad_norm": 2.262214195062275, "learning_rate": 8.724625935322933e-06, "loss": 0.812, "step": 7050 }, { "epoch": 0.2555543474321337, "grad_norm": 2.058430070660636, "learning_rate": 8.724234341586327e-06, "loss": 0.8616, "step": 7051 }, { "epoch": 0.2555905911347903, "grad_norm": 2.1644063493230163, "learning_rate": 8.723842696532024e-06, "loss": 0.9166, "step": 7052 }, { "epoch": 0.25562683483744697, "grad_norm": 2.5590904607785103, "learning_rate": 8.723451000165422e-06, "loss": 1.0304, "step": 7053 }, { "epoch": 0.25566307854010367, "grad_norm": 2.2776615101197044, "learning_rate": 8.723059252491915e-06, "loss": 0.8363, "step": 7054 }, { "epoch": 0.2556993222427603, "grad_norm": 2.517614487122728, "learning_rate": 8.722667453516905e-06, "loss": 0.8769, "step": 7055 }, { "epoch": 0.25573556594541696, "grad_norm": 2.4442163596730193, "learning_rate": 8.722275603245789e-06, "loss": 1.0426, "step": 7056 }, { "epoch": 0.25577180964807367, "grad_norm": 2.357786620277255, "learning_rate": 8.721883701683964e-06, "loss": 1.0418, "step": 7057 }, { "epoch": 0.2558080533507303, "grad_norm": 2.6235506715850634, "learning_rate": 8.721491748836834e-06, "loss": 0.9168, "step": 7058 }, { "epoch": 0.25584429705338696, "grad_norm": 2.6330369036539327, "learning_rate": 8.7210997447098e-06, "loss": 0.8788, "step": 7059 }, { "epoch": 0.25588054075604366, "grad_norm": 2.083581547731586, "learning_rate": 8.72070768930826e-06, "loss": 0.8157, "step": 7060 }, { "epoch": 0.2559167844587003, "grad_norm": 2.532967290047797, "learning_rate": 8.720315582637618e-06, "loss": 1.1416, "step": 7061 }, { "epoch": 0.25595302816135695, "grad_norm": 2.4817666825108193, "learning_rate": 8.719923424703277e-06, "loss": 0.8635, "step": 7062 }, { "epoch": 0.2559892718640136, "grad_norm": 2.43087370771053, "learning_rate": 8.719531215510644e-06, "loss": 1.1078, "step": 7063 }, { "epoch": 0.2560255155666703, "grad_norm": 2.1571828918090583, "learning_rate": 8.719138955065117e-06, "loss": 0.8373, "step": 7064 }, { "epoch": 0.25606175926932695, "grad_norm": 1.9948610775429234, "learning_rate": 8.718746643372107e-06, "loss": 0.854, "step": 7065 }, { "epoch": 0.2560980029719836, "grad_norm": 2.0941320485516455, "learning_rate": 8.718354280437015e-06, "loss": 0.7509, "step": 7066 }, { "epoch": 0.2561342466746403, "grad_norm": 2.469793063601974, "learning_rate": 8.71796186626525e-06, "loss": 1.0583, "step": 7067 }, { "epoch": 0.25617049037729694, "grad_norm": 2.637287002846094, "learning_rate": 8.71756940086222e-06, "loss": 0.9116, "step": 7068 }, { "epoch": 0.2562067340799536, "grad_norm": 2.275130321168135, "learning_rate": 8.717176884233333e-06, "loss": 0.9234, "step": 7069 }, { "epoch": 0.2562429777826103, "grad_norm": 2.497765341765413, "learning_rate": 8.716784316383994e-06, "loss": 0.8038, "step": 7070 }, { "epoch": 0.25627922148526694, "grad_norm": 2.534617960733712, "learning_rate": 8.716391697319617e-06, "loss": 1.2553, "step": 7071 }, { "epoch": 0.2563154651879236, "grad_norm": 2.5811196268287318, "learning_rate": 8.71599902704561e-06, "loss": 0.6797, "step": 7072 }, { "epoch": 0.2563517088905803, "grad_norm": 2.467435608327177, "learning_rate": 8.715606305567383e-06, "loss": 0.8764, "step": 7073 }, { "epoch": 0.25638795259323693, "grad_norm": 2.2706547553375334, "learning_rate": 8.715213532890348e-06, "loss": 0.9341, "step": 7074 }, { "epoch": 0.2564241962958936, "grad_norm": 2.433265074534032, "learning_rate": 8.714820709019918e-06, "loss": 0.9995, "step": 7075 }, { "epoch": 0.2564604399985502, "grad_norm": 2.070870362467687, "learning_rate": 8.714427833961506e-06, "loss": 0.9213, "step": 7076 }, { "epoch": 0.25649668370120693, "grad_norm": 2.032678272991685, "learning_rate": 8.714034907720523e-06, "loss": 0.9132, "step": 7077 }, { "epoch": 0.2565329274038636, "grad_norm": 2.6015160298105284, "learning_rate": 8.713641930302386e-06, "loss": 1.012, "step": 7078 }, { "epoch": 0.2565691711065202, "grad_norm": 2.3910245905978273, "learning_rate": 8.71324890171251e-06, "loss": 1.117, "step": 7079 }, { "epoch": 0.2566054148091769, "grad_norm": 2.2662066523375803, "learning_rate": 8.71285582195631e-06, "loss": 0.969, "step": 7080 }, { "epoch": 0.25664165851183357, "grad_norm": 2.374492602423842, "learning_rate": 8.7124626910392e-06, "loss": 1.0031, "step": 7081 }, { "epoch": 0.2566779022144902, "grad_norm": 2.4569119647897133, "learning_rate": 8.7120695089666e-06, "loss": 0.9916, "step": 7082 }, { "epoch": 0.2567141459171469, "grad_norm": 2.12533319510815, "learning_rate": 8.711676275743928e-06, "loss": 0.858, "step": 7083 }, { "epoch": 0.25675038961980357, "grad_norm": 2.6802818705957887, "learning_rate": 8.711282991376602e-06, "loss": 0.9495, "step": 7084 }, { "epoch": 0.2567866333224602, "grad_norm": 2.226207333959036, "learning_rate": 8.71088965587004e-06, "loss": 0.9982, "step": 7085 }, { "epoch": 0.2568228770251169, "grad_norm": 2.481726349161446, "learning_rate": 8.710496269229663e-06, "loss": 1.0845, "step": 7086 }, { "epoch": 0.25685912072777356, "grad_norm": 2.4045784114218107, "learning_rate": 8.710102831460892e-06, "loss": 1.0995, "step": 7087 }, { "epoch": 0.2568953644304302, "grad_norm": 2.2984395718581694, "learning_rate": 8.709709342569147e-06, "loss": 0.869, "step": 7088 }, { "epoch": 0.25693160813308685, "grad_norm": 2.1639575394103767, "learning_rate": 8.70931580255985e-06, "loss": 0.9259, "step": 7089 }, { "epoch": 0.25696785183574355, "grad_norm": 2.3033096823587798, "learning_rate": 8.708922211438427e-06, "loss": 0.8145, "step": 7090 }, { "epoch": 0.2570040955384002, "grad_norm": 2.2823963108434704, "learning_rate": 8.708528569210297e-06, "loss": 0.7322, "step": 7091 }, { "epoch": 0.25704033924105685, "grad_norm": 2.4228812479646944, "learning_rate": 8.708134875880887e-06, "loss": 0.8307, "step": 7092 }, { "epoch": 0.25707658294371355, "grad_norm": 2.4227278192311896, "learning_rate": 8.707741131455622e-06, "loss": 0.9776, "step": 7093 }, { "epoch": 0.2571128266463702, "grad_norm": 2.303903407939609, "learning_rate": 8.707347335939925e-06, "loss": 0.9617, "step": 7094 }, { "epoch": 0.25714907034902684, "grad_norm": 2.368778642286123, "learning_rate": 8.706953489339226e-06, "loss": 0.8581, "step": 7095 }, { "epoch": 0.25718531405168354, "grad_norm": 2.3428810959936897, "learning_rate": 8.706559591658948e-06, "loss": 0.947, "step": 7096 }, { "epoch": 0.2572215577543402, "grad_norm": 2.489438175085796, "learning_rate": 8.70616564290452e-06, "loss": 0.9374, "step": 7097 }, { "epoch": 0.25725780145699684, "grad_norm": 2.5696352933825413, "learning_rate": 8.70577164308137e-06, "loss": 0.9658, "step": 7098 }, { "epoch": 0.2572940451596535, "grad_norm": 2.4649667008135605, "learning_rate": 8.70537759219493e-06, "loss": 0.9003, "step": 7099 }, { "epoch": 0.2573302888623102, "grad_norm": 2.857143797634828, "learning_rate": 8.704983490250627e-06, "loss": 0.9658, "step": 7100 }, { "epoch": 0.25736653256496683, "grad_norm": 2.3335941322978924, "learning_rate": 8.704589337253892e-06, "loss": 0.957, "step": 7101 }, { "epoch": 0.2574027762676235, "grad_norm": 2.1333205374788444, "learning_rate": 8.704195133210158e-06, "loss": 1.0246, "step": 7102 }, { "epoch": 0.2574390199702802, "grad_norm": 2.3893945698040624, "learning_rate": 8.703800878124853e-06, "loss": 1.0664, "step": 7103 }, { "epoch": 0.2574752636729368, "grad_norm": 2.3777760545028643, "learning_rate": 8.703406572003414e-06, "loss": 0.9154, "step": 7104 }, { "epoch": 0.2575115073755935, "grad_norm": 2.5393962917432895, "learning_rate": 8.703012214851271e-06, "loss": 1.0508, "step": 7105 }, { "epoch": 0.2575477510782502, "grad_norm": 2.454847455112277, "learning_rate": 8.702617806673859e-06, "loss": 1.0278, "step": 7106 }, { "epoch": 0.2575839947809068, "grad_norm": 2.431757602476433, "learning_rate": 8.702223347476612e-06, "loss": 1.0642, "step": 7107 }, { "epoch": 0.25762023848356347, "grad_norm": 2.272719735637283, "learning_rate": 8.70182883726497e-06, "loss": 0.9214, "step": 7108 }, { "epoch": 0.25765648218622017, "grad_norm": 2.33429695326513, "learning_rate": 8.701434276044362e-06, "loss": 0.754, "step": 7109 }, { "epoch": 0.2576927258888768, "grad_norm": 2.6527015763730066, "learning_rate": 8.70103966382023e-06, "loss": 0.6498, "step": 7110 }, { "epoch": 0.25772896959153346, "grad_norm": 2.121662389682015, "learning_rate": 8.700645000598008e-06, "loss": 0.9205, "step": 7111 }, { "epoch": 0.2577652132941901, "grad_norm": 2.2466585596211477, "learning_rate": 8.700250286383136e-06, "loss": 0.9766, "step": 7112 }, { "epoch": 0.2578014569968468, "grad_norm": 2.538568537618957, "learning_rate": 8.699855521181056e-06, "loss": 0.9191, "step": 7113 }, { "epoch": 0.25783770069950346, "grad_norm": 2.4767240468258263, "learning_rate": 8.699460704997202e-06, "loss": 0.9478, "step": 7114 }, { "epoch": 0.2578739444021601, "grad_norm": 2.340164995278024, "learning_rate": 8.699065837837016e-06, "loss": 0.992, "step": 7115 }, { "epoch": 0.2579101881048168, "grad_norm": 2.38046584924631, "learning_rate": 8.698670919705941e-06, "loss": 1.077, "step": 7116 }, { "epoch": 0.25794643180747345, "grad_norm": 2.1008142921877937, "learning_rate": 8.698275950609418e-06, "loss": 0.6122, "step": 7117 }, { "epoch": 0.2579826755101301, "grad_norm": 2.3900183881361374, "learning_rate": 8.697880930552888e-06, "loss": 0.6887, "step": 7118 }, { "epoch": 0.2580189192127868, "grad_norm": 2.3881477403893063, "learning_rate": 8.697485859541796e-06, "loss": 0.8863, "step": 7119 }, { "epoch": 0.25805516291544345, "grad_norm": 2.802688433649682, "learning_rate": 8.697090737581585e-06, "loss": 0.887, "step": 7120 }, { "epoch": 0.2580914066181001, "grad_norm": 2.558725772828892, "learning_rate": 8.6966955646777e-06, "loss": 0.9176, "step": 7121 }, { "epoch": 0.25812765032075674, "grad_norm": 2.161457565686875, "learning_rate": 8.696300340835585e-06, "loss": 0.9017, "step": 7122 }, { "epoch": 0.25816389402341344, "grad_norm": 2.7371263234334027, "learning_rate": 8.695905066060689e-06, "loss": 0.9595, "step": 7123 }, { "epoch": 0.2582001377260701, "grad_norm": 2.509776212125576, "learning_rate": 8.695509740358453e-06, "loss": 1.0061, "step": 7124 }, { "epoch": 0.25823638142872674, "grad_norm": 2.3993394029112998, "learning_rate": 8.695114363734329e-06, "loss": 0.7335, "step": 7125 }, { "epoch": 0.25827262513138344, "grad_norm": 2.6967045627858606, "learning_rate": 8.694718936193764e-06, "loss": 1.0683, "step": 7126 }, { "epoch": 0.2583088688340401, "grad_norm": 2.5901654245709427, "learning_rate": 8.694323457742206e-06, "loss": 0.837, "step": 7127 }, { "epoch": 0.25834511253669673, "grad_norm": 2.1475465462930177, "learning_rate": 8.693927928385106e-06, "loss": 0.909, "step": 7128 }, { "epoch": 0.25838135623935343, "grad_norm": 2.3257367174546406, "learning_rate": 8.693532348127914e-06, "loss": 0.7837, "step": 7129 }, { "epoch": 0.2584175999420101, "grad_norm": 2.0780810118690716, "learning_rate": 8.693136716976078e-06, "loss": 0.9804, "step": 7130 }, { "epoch": 0.2584538436446667, "grad_norm": 2.306824480413975, "learning_rate": 8.692741034935053e-06, "loss": 0.9848, "step": 7131 }, { "epoch": 0.2584900873473234, "grad_norm": 2.3220976662834385, "learning_rate": 8.692345302010288e-06, "loss": 0.8515, "step": 7132 }, { "epoch": 0.2585263310499801, "grad_norm": 2.205749509581261, "learning_rate": 8.691949518207238e-06, "loss": 0.9075, "step": 7133 }, { "epoch": 0.2585625747526367, "grad_norm": 2.1838076779358153, "learning_rate": 8.691553683531358e-06, "loss": 1.1413, "step": 7134 }, { "epoch": 0.25859881845529337, "grad_norm": 2.543832938834878, "learning_rate": 8.6911577979881e-06, "loss": 0.9664, "step": 7135 }, { "epoch": 0.25863506215795007, "grad_norm": 2.404902296540198, "learning_rate": 8.69076186158292e-06, "loss": 0.9487, "step": 7136 }, { "epoch": 0.2586713058606067, "grad_norm": 2.7154711900477673, "learning_rate": 8.690365874321274e-06, "loss": 0.8848, "step": 7137 }, { "epoch": 0.25870754956326336, "grad_norm": 1.9605141604228298, "learning_rate": 8.689969836208618e-06, "loss": 0.6612, "step": 7138 }, { "epoch": 0.25874379326592006, "grad_norm": 2.459039237797141, "learning_rate": 8.68957374725041e-06, "loss": 1.0183, "step": 7139 }, { "epoch": 0.2587800369685767, "grad_norm": 2.489531954026919, "learning_rate": 8.689177607452106e-06, "loss": 0.8751, "step": 7140 }, { "epoch": 0.25881628067123336, "grad_norm": 2.289748743913554, "learning_rate": 8.688781416819166e-06, "loss": 1.0002, "step": 7141 }, { "epoch": 0.25885252437389006, "grad_norm": 2.3995108747249088, "learning_rate": 8.688385175357049e-06, "loss": 0.9921, "step": 7142 }, { "epoch": 0.2588887680765467, "grad_norm": 2.404271891428496, "learning_rate": 8.687988883071215e-06, "loss": 1.0054, "step": 7143 }, { "epoch": 0.25892501177920335, "grad_norm": 2.502703068998152, "learning_rate": 8.687592539967125e-06, "loss": 0.9351, "step": 7144 }, { "epoch": 0.25896125548186005, "grad_norm": 2.3753246914262616, "learning_rate": 8.687196146050238e-06, "loss": 1.0061, "step": 7145 }, { "epoch": 0.2589974991845167, "grad_norm": 2.525616683461458, "learning_rate": 8.686799701326021e-06, "loss": 0.996, "step": 7146 }, { "epoch": 0.25903374288717335, "grad_norm": 2.359688215240023, "learning_rate": 8.686403205799931e-06, "loss": 0.9887, "step": 7147 }, { "epoch": 0.25906998658983, "grad_norm": 2.3011669512706803, "learning_rate": 8.686006659477438e-06, "loss": 1.0438, "step": 7148 }, { "epoch": 0.2591062302924867, "grad_norm": 2.2933447355640855, "learning_rate": 8.685610062363998e-06, "loss": 1.0745, "step": 7149 }, { "epoch": 0.25914247399514334, "grad_norm": 2.3285576367483265, "learning_rate": 8.685213414465083e-06, "loss": 1.0705, "step": 7150 }, { "epoch": 0.2591787176978, "grad_norm": 2.249145261203262, "learning_rate": 8.684816715786155e-06, "loss": 0.7759, "step": 7151 }, { "epoch": 0.2592149614004567, "grad_norm": 2.1013854578473854, "learning_rate": 8.684419966332682e-06, "loss": 0.7786, "step": 7152 }, { "epoch": 0.25925120510311334, "grad_norm": 2.0873639685091927, "learning_rate": 8.68402316611013e-06, "loss": 0.8921, "step": 7153 }, { "epoch": 0.25928744880577, "grad_norm": 2.1147690355555646, "learning_rate": 8.683626315123967e-06, "loss": 0.9536, "step": 7154 }, { "epoch": 0.2593236925084267, "grad_norm": 2.683516206438427, "learning_rate": 8.683229413379659e-06, "loss": 1.0092, "step": 7155 }, { "epoch": 0.25935993621108333, "grad_norm": 2.4424805535769925, "learning_rate": 8.68283246088268e-06, "loss": 0.9454, "step": 7156 }, { "epoch": 0.25939617991374, "grad_norm": 2.3666375699032223, "learning_rate": 8.682435457638495e-06, "loss": 0.9981, "step": 7157 }, { "epoch": 0.2594324236163966, "grad_norm": 2.3285672055447075, "learning_rate": 8.682038403652577e-06, "loss": 1.2033, "step": 7158 }, { "epoch": 0.2594686673190533, "grad_norm": 2.11046759995618, "learning_rate": 8.681641298930398e-06, "loss": 0.9507, "step": 7159 }, { "epoch": 0.25950491102170997, "grad_norm": 2.274073360720369, "learning_rate": 8.681244143477428e-06, "loss": 1.0668, "step": 7160 }, { "epoch": 0.2595411547243666, "grad_norm": 2.423480687219753, "learning_rate": 8.680846937299138e-06, "loss": 1.1151, "step": 7161 }, { "epoch": 0.2595773984270233, "grad_norm": 2.3400834165117885, "learning_rate": 8.680449680401007e-06, "loss": 0.903, "step": 7162 }, { "epoch": 0.25961364212967997, "grad_norm": 2.2638897361453743, "learning_rate": 8.680052372788501e-06, "loss": 0.8111, "step": 7163 }, { "epoch": 0.2596498858323366, "grad_norm": 2.52147877654874, "learning_rate": 8.6796550144671e-06, "loss": 1.1705, "step": 7164 }, { "epoch": 0.2596861295349933, "grad_norm": 2.190419654660354, "learning_rate": 8.67925760544228e-06, "loss": 0.8585, "step": 7165 }, { "epoch": 0.25972237323764996, "grad_norm": 2.3235428704502037, "learning_rate": 8.678860145719516e-06, "loss": 1.0305, "step": 7166 }, { "epoch": 0.2597586169403066, "grad_norm": 2.4591959385730413, "learning_rate": 8.678462635304281e-06, "loss": 0.9978, "step": 7167 }, { "epoch": 0.2597948606429633, "grad_norm": 2.6018984776559426, "learning_rate": 8.678065074202058e-06, "loss": 0.9124, "step": 7168 }, { "epoch": 0.25983110434561996, "grad_norm": 2.3716137728408393, "learning_rate": 8.677667462418321e-06, "loss": 0.8297, "step": 7169 }, { "epoch": 0.2598673480482766, "grad_norm": 2.4393098864232465, "learning_rate": 8.677269799958552e-06, "loss": 0.849, "step": 7170 }, { "epoch": 0.25990359175093325, "grad_norm": 2.302251617633608, "learning_rate": 8.67687208682823e-06, "loss": 0.9731, "step": 7171 }, { "epoch": 0.25993983545358995, "grad_norm": 2.3838867137769166, "learning_rate": 8.676474323032833e-06, "loss": 0.8744, "step": 7172 }, { "epoch": 0.2599760791562466, "grad_norm": 2.417468050920259, "learning_rate": 8.676076508577842e-06, "loss": 0.7542, "step": 7173 }, { "epoch": 0.26001232285890324, "grad_norm": 2.5235489116791796, "learning_rate": 8.675678643468742e-06, "loss": 1.0582, "step": 7174 }, { "epoch": 0.26004856656155995, "grad_norm": 2.3850677348204017, "learning_rate": 8.675280727711011e-06, "loss": 0.9605, "step": 7175 }, { "epoch": 0.2600848102642166, "grad_norm": 2.3306260108334005, "learning_rate": 8.674882761310138e-06, "loss": 0.9515, "step": 7176 }, { "epoch": 0.26012105396687324, "grad_norm": 2.565817540667808, "learning_rate": 8.674484744271599e-06, "loss": 1.0031, "step": 7177 }, { "epoch": 0.26015729766952994, "grad_norm": 2.2523880829557146, "learning_rate": 8.674086676600886e-06, "loss": 0.9796, "step": 7178 }, { "epoch": 0.2601935413721866, "grad_norm": 2.3644843515700567, "learning_rate": 8.673688558303478e-06, "loss": 0.8766, "step": 7179 }, { "epoch": 0.26022978507484323, "grad_norm": 2.432190353993021, "learning_rate": 8.673290389384864e-06, "loss": 0.975, "step": 7180 }, { "epoch": 0.26026602877749994, "grad_norm": 2.481372411426786, "learning_rate": 8.67289216985053e-06, "loss": 0.8889, "step": 7181 }, { "epoch": 0.2603022724801566, "grad_norm": 2.03308698895967, "learning_rate": 8.672493899705965e-06, "loss": 0.992, "step": 7182 }, { "epoch": 0.26033851618281323, "grad_norm": 2.343284286885957, "learning_rate": 8.672095578956651e-06, "loss": 0.9099, "step": 7183 }, { "epoch": 0.2603747598854699, "grad_norm": 2.301254366964257, "learning_rate": 8.671697207608086e-06, "loss": 0.9863, "step": 7184 }, { "epoch": 0.2604110035881266, "grad_norm": 2.2252846573843823, "learning_rate": 8.67129878566575e-06, "loss": 1.0757, "step": 7185 }, { "epoch": 0.2604472472907832, "grad_norm": 2.394715917489973, "learning_rate": 8.670900313135138e-06, "loss": 1.0168, "step": 7186 }, { "epoch": 0.26048349099343987, "grad_norm": 2.2773937663838395, "learning_rate": 8.670501790021742e-06, "loss": 0.928, "step": 7187 }, { "epoch": 0.26051973469609657, "grad_norm": 2.5538109808706393, "learning_rate": 8.670103216331048e-06, "loss": 1.0507, "step": 7188 }, { "epoch": 0.2605559783987532, "grad_norm": 2.1887231841835497, "learning_rate": 8.669704592068553e-06, "loss": 1.1377, "step": 7189 }, { "epoch": 0.26059222210140986, "grad_norm": 2.4255217260953374, "learning_rate": 8.669305917239748e-06, "loss": 0.9572, "step": 7190 }, { "epoch": 0.26062846580406657, "grad_norm": 2.5568986260943514, "learning_rate": 8.668907191850127e-06, "loss": 0.8397, "step": 7191 }, { "epoch": 0.2606647095067232, "grad_norm": 2.417296560196906, "learning_rate": 8.668508415905183e-06, "loss": 0.9208, "step": 7192 }, { "epoch": 0.26070095320937986, "grad_norm": 2.230008914050927, "learning_rate": 8.668109589410411e-06, "loss": 0.7848, "step": 7193 }, { "epoch": 0.2607371969120365, "grad_norm": 2.130077527598518, "learning_rate": 8.66771071237131e-06, "loss": 0.8515, "step": 7194 }, { "epoch": 0.2607734406146932, "grad_norm": 2.389218910317946, "learning_rate": 8.66731178479337e-06, "loss": 0.8966, "step": 7195 }, { "epoch": 0.26080968431734985, "grad_norm": 2.319728244276531, "learning_rate": 8.666912806682093e-06, "loss": 1.0048, "step": 7196 }, { "epoch": 0.2608459280200065, "grad_norm": 2.2593277253397925, "learning_rate": 8.666513778042976e-06, "loss": 0.9491, "step": 7197 }, { "epoch": 0.2608821717226632, "grad_norm": 2.6172562830413644, "learning_rate": 8.666114698881516e-06, "loss": 0.9049, "step": 7198 }, { "epoch": 0.26091841542531985, "grad_norm": 2.2451502968777772, "learning_rate": 8.665715569203212e-06, "loss": 1.1277, "step": 7199 }, { "epoch": 0.2609546591279765, "grad_norm": 2.0135566729300467, "learning_rate": 8.665316389013564e-06, "loss": 0.8265, "step": 7200 }, { "epoch": 0.2609909028306332, "grad_norm": 2.284968580445366, "learning_rate": 8.664917158318075e-06, "loss": 1.0053, "step": 7201 }, { "epoch": 0.26102714653328984, "grad_norm": 2.35560841984032, "learning_rate": 8.664517877122242e-06, "loss": 1.1797, "step": 7202 }, { "epoch": 0.2610633902359465, "grad_norm": 2.3879357839951365, "learning_rate": 8.66411854543157e-06, "loss": 1.0445, "step": 7203 }, { "epoch": 0.2610996339386032, "grad_norm": 2.313605238713859, "learning_rate": 8.66371916325156e-06, "loss": 0.8424, "step": 7204 }, { "epoch": 0.26113587764125984, "grad_norm": 2.550201096968628, "learning_rate": 8.663319730587715e-06, "loss": 0.9621, "step": 7205 }, { "epoch": 0.2611721213439165, "grad_norm": 2.486250796402241, "learning_rate": 8.662920247445542e-06, "loss": 0.9302, "step": 7206 }, { "epoch": 0.26120836504657313, "grad_norm": 2.6267845261668716, "learning_rate": 8.662520713830542e-06, "loss": 0.9501, "step": 7207 }, { "epoch": 0.26124460874922983, "grad_norm": 2.217710840307555, "learning_rate": 8.662121129748221e-06, "loss": 0.7485, "step": 7208 }, { "epoch": 0.2612808524518865, "grad_norm": 2.229080934819412, "learning_rate": 8.661721495204085e-06, "loss": 1.0467, "step": 7209 }, { "epoch": 0.2613170961545431, "grad_norm": 2.4918671207861687, "learning_rate": 8.661321810203643e-06, "loss": 1.0362, "step": 7210 }, { "epoch": 0.26135333985719983, "grad_norm": 2.3773307061044617, "learning_rate": 8.6609220747524e-06, "loss": 0.9563, "step": 7211 }, { "epoch": 0.2613895835598565, "grad_norm": 2.6666400081742623, "learning_rate": 8.660522288855867e-06, "loss": 1.0634, "step": 7212 }, { "epoch": 0.2614258272625131, "grad_norm": 2.4771712470018357, "learning_rate": 8.660122452519546e-06, "loss": 1.0187, "step": 7213 }, { "epoch": 0.2614620709651698, "grad_norm": 2.289321351121366, "learning_rate": 8.659722565748953e-06, "loss": 0.7933, "step": 7214 }, { "epoch": 0.26149831466782647, "grad_norm": 2.4675018447844446, "learning_rate": 8.659322628549598e-06, "loss": 1.0513, "step": 7215 }, { "epoch": 0.2615345583704831, "grad_norm": 2.3682775999654413, "learning_rate": 8.65892264092699e-06, "loss": 0.9315, "step": 7216 }, { "epoch": 0.2615708020731398, "grad_norm": 2.491347342074004, "learning_rate": 8.65852260288664e-06, "loss": 1.0716, "step": 7217 }, { "epoch": 0.26160704577579647, "grad_norm": 2.585318188381726, "learning_rate": 8.658122514434062e-06, "loss": 0.9952, "step": 7218 }, { "epoch": 0.2616432894784531, "grad_norm": 2.3989741243987983, "learning_rate": 8.657722375574767e-06, "loss": 1.0012, "step": 7219 }, { "epoch": 0.26167953318110976, "grad_norm": 2.4320003133504486, "learning_rate": 8.65732218631427e-06, "loss": 0.9012, "step": 7220 }, { "epoch": 0.26171577688376646, "grad_norm": 2.538338794053416, "learning_rate": 8.656921946658085e-06, "loss": 1.0537, "step": 7221 }, { "epoch": 0.2617520205864231, "grad_norm": 2.292670630321844, "learning_rate": 8.656521656611728e-06, "loss": 0.8491, "step": 7222 }, { "epoch": 0.26178826428907975, "grad_norm": 2.529216607938595, "learning_rate": 8.656121316180713e-06, "loss": 0.7141, "step": 7223 }, { "epoch": 0.26182450799173645, "grad_norm": 2.421928655122011, "learning_rate": 8.65572092537056e-06, "loss": 1.0704, "step": 7224 }, { "epoch": 0.2618607516943931, "grad_norm": 2.373428282515153, "learning_rate": 8.65532048418678e-06, "loss": 0.9775, "step": 7225 }, { "epoch": 0.26189699539704975, "grad_norm": 2.0537021449617536, "learning_rate": 8.654919992634897e-06, "loss": 1.0529, "step": 7226 }, { "epoch": 0.26193323909970645, "grad_norm": 2.5313735745431645, "learning_rate": 8.654519450720424e-06, "loss": 1.0128, "step": 7227 }, { "epoch": 0.2619694828023631, "grad_norm": 2.2484257875412763, "learning_rate": 8.654118858448886e-06, "loss": 0.9767, "step": 7228 }, { "epoch": 0.26200572650501974, "grad_norm": 2.622218125272482, "learning_rate": 8.6537182158258e-06, "loss": 0.9726, "step": 7229 }, { "epoch": 0.2620419702076764, "grad_norm": 2.427311290511229, "learning_rate": 8.653317522856686e-06, "loss": 1.0066, "step": 7230 }, { "epoch": 0.2620782139103331, "grad_norm": 2.523495935088416, "learning_rate": 8.652916779547064e-06, "loss": 1.0151, "step": 7231 }, { "epoch": 0.26211445761298974, "grad_norm": 2.441387661951426, "learning_rate": 8.65251598590246e-06, "loss": 0.8385, "step": 7232 }, { "epoch": 0.2621507013156464, "grad_norm": 2.225899680697221, "learning_rate": 8.652115141928394e-06, "loss": 0.9209, "step": 7233 }, { "epoch": 0.2621869450183031, "grad_norm": 2.414898541558715, "learning_rate": 8.651714247630391e-06, "loss": 1.0071, "step": 7234 }, { "epoch": 0.26222318872095973, "grad_norm": 2.3830643273566605, "learning_rate": 8.651313303013975e-06, "loss": 1.0916, "step": 7235 }, { "epoch": 0.2622594324236164, "grad_norm": 2.365086786358209, "learning_rate": 8.650912308084668e-06, "loss": 0.9405, "step": 7236 }, { "epoch": 0.2622956761262731, "grad_norm": 2.2398522489300317, "learning_rate": 8.650511262847998e-06, "loss": 1.0727, "step": 7237 }, { "epoch": 0.2623319198289297, "grad_norm": 2.5103098300888234, "learning_rate": 8.650110167309491e-06, "loss": 0.9831, "step": 7238 }, { "epoch": 0.2623681635315864, "grad_norm": 2.532080786000668, "learning_rate": 8.649709021474673e-06, "loss": 0.9606, "step": 7239 }, { "epoch": 0.2624044072342431, "grad_norm": 2.4180370362801247, "learning_rate": 8.649307825349071e-06, "loss": 0.8845, "step": 7240 }, { "epoch": 0.2624406509368997, "grad_norm": 2.214951887234522, "learning_rate": 8.648906578938215e-06, "loss": 1.0091, "step": 7241 }, { "epoch": 0.26247689463955637, "grad_norm": 2.46024202501774, "learning_rate": 8.648505282247635e-06, "loss": 0.7726, "step": 7242 }, { "epoch": 0.262513138342213, "grad_norm": 2.670847474661927, "learning_rate": 8.648103935282856e-06, "loss": 1.0557, "step": 7243 }, { "epoch": 0.2625493820448697, "grad_norm": 2.5563318516483835, "learning_rate": 8.647702538049412e-06, "loss": 0.9255, "step": 7244 }, { "epoch": 0.26258562574752636, "grad_norm": 2.178684883781275, "learning_rate": 8.647301090552834e-06, "loss": 1.0994, "step": 7245 }, { "epoch": 0.262621869450183, "grad_norm": 2.3171813971049513, "learning_rate": 8.646899592798652e-06, "loss": 0.8775, "step": 7246 }, { "epoch": 0.2626581131528397, "grad_norm": 2.383442499746417, "learning_rate": 8.646498044792397e-06, "loss": 0.9337, "step": 7247 }, { "epoch": 0.26269435685549636, "grad_norm": 2.4237288413393823, "learning_rate": 8.646096446539608e-06, "loss": 0.9543, "step": 7248 }, { "epoch": 0.262730600558153, "grad_norm": 2.4019714850665213, "learning_rate": 8.645694798045814e-06, "loss": 0.9868, "step": 7249 }, { "epoch": 0.2627668442608097, "grad_norm": 2.3528708141804713, "learning_rate": 8.64529309931655e-06, "loss": 0.9309, "step": 7250 }, { "epoch": 0.26280308796346635, "grad_norm": 2.618193246059517, "learning_rate": 8.64489135035735e-06, "loss": 1.0411, "step": 7251 }, { "epoch": 0.262839331666123, "grad_norm": 2.4174465018950926, "learning_rate": 8.644489551173754e-06, "loss": 0.9718, "step": 7252 }, { "epoch": 0.2628755753687797, "grad_norm": 2.2005783460154995, "learning_rate": 8.644087701771295e-06, "loss": 1.0404, "step": 7253 }, { "epoch": 0.26291181907143635, "grad_norm": 2.169434736519741, "learning_rate": 8.643685802155513e-06, "loss": 0.9526, "step": 7254 }, { "epoch": 0.262948062774093, "grad_norm": 2.688790504726137, "learning_rate": 8.643283852331942e-06, "loss": 0.9444, "step": 7255 }, { "epoch": 0.26298430647674964, "grad_norm": 2.3181315110291103, "learning_rate": 8.642881852306125e-06, "loss": 1.0891, "step": 7256 }, { "epoch": 0.26302055017940634, "grad_norm": 2.400240475751649, "learning_rate": 8.642479802083597e-06, "loss": 1.009, "step": 7257 }, { "epoch": 0.263056793882063, "grad_norm": 2.3092402539110712, "learning_rate": 8.642077701669902e-06, "loss": 1.0649, "step": 7258 }, { "epoch": 0.26309303758471964, "grad_norm": 2.342777776262397, "learning_rate": 8.641675551070578e-06, "loss": 1.041, "step": 7259 }, { "epoch": 0.26312928128737634, "grad_norm": 2.480032385980843, "learning_rate": 8.641273350291167e-06, "loss": 0.7859, "step": 7260 }, { "epoch": 0.263165524990033, "grad_norm": 2.2397008368100044, "learning_rate": 8.640871099337213e-06, "loss": 0.816, "step": 7261 }, { "epoch": 0.26320176869268963, "grad_norm": 2.2801665382393406, "learning_rate": 8.640468798214255e-06, "loss": 0.8896, "step": 7262 }, { "epoch": 0.26323801239534633, "grad_norm": 2.228695019987489, "learning_rate": 8.64006644692784e-06, "loss": 0.8782, "step": 7263 }, { "epoch": 0.263274256098003, "grad_norm": 2.4638490595691374, "learning_rate": 8.63966404548351e-06, "loss": 0.9779, "step": 7264 }, { "epoch": 0.2633104998006596, "grad_norm": 2.2789357910567047, "learning_rate": 8.639261593886813e-06, "loss": 1.1132, "step": 7265 }, { "epoch": 0.26334674350331627, "grad_norm": 2.3730903552312532, "learning_rate": 8.63885909214329e-06, "loss": 0.9765, "step": 7266 }, { "epoch": 0.263382987205973, "grad_norm": 2.596655940720989, "learning_rate": 8.638456540258488e-06, "loss": 0.9617, "step": 7267 }, { "epoch": 0.2634192309086296, "grad_norm": 2.336802207735181, "learning_rate": 8.63805393823796e-06, "loss": 1.1293, "step": 7268 }, { "epoch": 0.26345547461128627, "grad_norm": 2.472991080585881, "learning_rate": 8.637651286087244e-06, "loss": 0.9375, "step": 7269 }, { "epoch": 0.26349171831394297, "grad_norm": 2.307579844112126, "learning_rate": 8.637248583811896e-06, "loss": 0.9112, "step": 7270 }, { "epoch": 0.2635279620165996, "grad_norm": 2.4651576073221917, "learning_rate": 8.636845831417463e-06, "loss": 0.9628, "step": 7271 }, { "epoch": 0.26356420571925626, "grad_norm": 1.9009329228988117, "learning_rate": 8.636443028909492e-06, "loss": 0.8417, "step": 7272 }, { "epoch": 0.26360044942191296, "grad_norm": 2.493362086644046, "learning_rate": 8.636040176293536e-06, "loss": 0.9926, "step": 7273 }, { "epoch": 0.2636366931245696, "grad_norm": 2.3136133758566237, "learning_rate": 8.635637273575145e-06, "loss": 0.9005, "step": 7274 }, { "epoch": 0.26367293682722626, "grad_norm": 2.275729624193077, "learning_rate": 8.63523432075987e-06, "loss": 0.9288, "step": 7275 }, { "epoch": 0.26370918052988296, "grad_norm": 2.382591135638314, "learning_rate": 8.634831317853268e-06, "loss": 0.9887, "step": 7276 }, { "epoch": 0.2637454242325396, "grad_norm": 2.4087477152366845, "learning_rate": 8.634428264860885e-06, "loss": 1.015, "step": 7277 }, { "epoch": 0.26378166793519625, "grad_norm": 2.504046841901447, "learning_rate": 8.634025161788283e-06, "loss": 1.0052, "step": 7278 }, { "epoch": 0.2638179116378529, "grad_norm": 2.3447986311798044, "learning_rate": 8.633622008641007e-06, "loss": 0.8823, "step": 7279 }, { "epoch": 0.2638541553405096, "grad_norm": 2.3849060327197487, "learning_rate": 8.63321880542462e-06, "loss": 0.8818, "step": 7280 }, { "epoch": 0.26389039904316625, "grad_norm": 2.6242256367209214, "learning_rate": 8.632815552144675e-06, "loss": 0.9006, "step": 7281 }, { "epoch": 0.2639266427458229, "grad_norm": 2.383191966216421, "learning_rate": 8.632412248806728e-06, "loss": 1.1397, "step": 7282 }, { "epoch": 0.2639628864484796, "grad_norm": 2.5852396795737715, "learning_rate": 8.632008895416338e-06, "loss": 0.9311, "step": 7283 }, { "epoch": 0.26399913015113624, "grad_norm": 2.366790402890991, "learning_rate": 8.631605491979061e-06, "loss": 0.8888, "step": 7284 }, { "epoch": 0.2640353738537929, "grad_norm": 2.340608954301601, "learning_rate": 8.631202038500458e-06, "loss": 0.9465, "step": 7285 }, { "epoch": 0.2640716175564496, "grad_norm": 2.302201775162049, "learning_rate": 8.630798534986086e-06, "loss": 0.8779, "step": 7286 }, { "epoch": 0.26410786125910624, "grad_norm": 2.3897238221086234, "learning_rate": 8.630394981441506e-06, "loss": 1.0253, "step": 7287 }, { "epoch": 0.2641441049617629, "grad_norm": 2.3914994412069914, "learning_rate": 8.62999137787228e-06, "loss": 0.9886, "step": 7288 }, { "epoch": 0.2641803486644196, "grad_norm": 2.240756479109129, "learning_rate": 8.629587724283967e-06, "loss": 1.2822, "step": 7289 }, { "epoch": 0.26421659236707623, "grad_norm": 2.570431212869966, "learning_rate": 8.62918402068213e-06, "loss": 1.0354, "step": 7290 }, { "epoch": 0.2642528360697329, "grad_norm": 2.3589374897054065, "learning_rate": 8.628780267072332e-06, "loss": 1.0761, "step": 7291 }, { "epoch": 0.2642890797723895, "grad_norm": 2.3412007558012884, "learning_rate": 8.628376463460138e-06, "loss": 0.9739, "step": 7292 }, { "epoch": 0.2643253234750462, "grad_norm": 2.413840780703836, "learning_rate": 8.62797260985111e-06, "loss": 0.9095, "step": 7293 }, { "epoch": 0.26436156717770287, "grad_norm": 2.399327034700326, "learning_rate": 8.627568706250812e-06, "loss": 0.8681, "step": 7294 }, { "epoch": 0.2643978108803595, "grad_norm": 2.132670415657037, "learning_rate": 8.627164752664811e-06, "loss": 0.9818, "step": 7295 }, { "epoch": 0.2644340545830162, "grad_norm": 2.2424021317577485, "learning_rate": 8.626760749098677e-06, "loss": 0.6467, "step": 7296 }, { "epoch": 0.26447029828567287, "grad_norm": 1.9379864441161225, "learning_rate": 8.62635669555797e-06, "loss": 0.7849, "step": 7297 }, { "epoch": 0.2645065419883295, "grad_norm": 2.649230946316348, "learning_rate": 8.625952592048261e-06, "loss": 0.9214, "step": 7298 }, { "epoch": 0.2645427856909862, "grad_norm": 2.178250476207768, "learning_rate": 8.62554843857512e-06, "loss": 1.0684, "step": 7299 }, { "epoch": 0.26457902939364286, "grad_norm": 2.3414703602083833, "learning_rate": 8.625144235144112e-06, "loss": 1.0191, "step": 7300 }, { "epoch": 0.2646152730962995, "grad_norm": 2.190840358007625, "learning_rate": 8.62473998176081e-06, "loss": 0.9006, "step": 7301 }, { "epoch": 0.26465151679895615, "grad_norm": 2.2597215750272825, "learning_rate": 8.624335678430784e-06, "loss": 0.8905, "step": 7302 }, { "epoch": 0.26468776050161286, "grad_norm": 2.6497330558403305, "learning_rate": 8.623931325159603e-06, "loss": 1.129, "step": 7303 }, { "epoch": 0.2647240042042695, "grad_norm": 2.1281934453796008, "learning_rate": 8.623526921952841e-06, "loss": 1.0128, "step": 7304 }, { "epoch": 0.26476024790692615, "grad_norm": 2.2908134609529016, "learning_rate": 8.623122468816069e-06, "loss": 0.9912, "step": 7305 }, { "epoch": 0.26479649160958285, "grad_norm": 2.4047639538120658, "learning_rate": 8.622717965754862e-06, "loss": 0.9981, "step": 7306 }, { "epoch": 0.2648327353122395, "grad_norm": 2.736691784710208, "learning_rate": 8.622313412774789e-06, "loss": 1.1173, "step": 7307 }, { "epoch": 0.26486897901489614, "grad_norm": 2.2663845536104894, "learning_rate": 8.621908809881431e-06, "loss": 0.994, "step": 7308 }, { "epoch": 0.26490522271755285, "grad_norm": 2.5402542104557666, "learning_rate": 8.621504157080359e-06, "loss": 0.8481, "step": 7309 }, { "epoch": 0.2649414664202095, "grad_norm": 2.5017363955776237, "learning_rate": 8.621099454377148e-06, "loss": 0.665, "step": 7310 }, { "epoch": 0.26497771012286614, "grad_norm": 2.4793899466125073, "learning_rate": 8.62069470177738e-06, "loss": 0.9228, "step": 7311 }, { "epoch": 0.26501395382552284, "grad_norm": 2.243842943671023, "learning_rate": 8.620289899286628e-06, "loss": 1.0358, "step": 7312 }, { "epoch": 0.2650501975281795, "grad_norm": 2.357814539871019, "learning_rate": 8.619885046910468e-06, "loss": 0.894, "step": 7313 }, { "epoch": 0.26508644123083613, "grad_norm": 2.35251630315589, "learning_rate": 8.619480144654484e-06, "loss": 0.9507, "step": 7314 }, { "epoch": 0.2651226849334928, "grad_norm": 2.9495285213514926, "learning_rate": 8.619075192524252e-06, "loss": 0.9653, "step": 7315 }, { "epoch": 0.2651589286361495, "grad_norm": 2.362183555797208, "learning_rate": 8.61867019052535e-06, "loss": 0.8695, "step": 7316 }, { "epoch": 0.26519517233880613, "grad_norm": 2.432050198080892, "learning_rate": 8.618265138663365e-06, "loss": 0.9797, "step": 7317 }, { "epoch": 0.2652314160414628, "grad_norm": 2.4251677229889608, "learning_rate": 8.617860036943872e-06, "loss": 0.8692, "step": 7318 }, { "epoch": 0.2652676597441195, "grad_norm": 2.415718564678155, "learning_rate": 8.617454885372458e-06, "loss": 1.041, "step": 7319 }, { "epoch": 0.2653039034467761, "grad_norm": 2.4324220221548067, "learning_rate": 8.617049683954702e-06, "loss": 0.9551, "step": 7320 }, { "epoch": 0.26534014714943277, "grad_norm": 2.6366113269579983, "learning_rate": 8.61664443269619e-06, "loss": 0.9062, "step": 7321 }, { "epoch": 0.26537639085208947, "grad_norm": 2.370265930458039, "learning_rate": 8.616239131602503e-06, "loss": 1.0359, "step": 7322 }, { "epoch": 0.2654126345547461, "grad_norm": 2.4136423741581665, "learning_rate": 8.61583378067923e-06, "loss": 0.9818, "step": 7323 }, { "epoch": 0.26544887825740276, "grad_norm": 2.3883645676188046, "learning_rate": 8.615428379931953e-06, "loss": 0.9314, "step": 7324 }, { "epoch": 0.26548512196005947, "grad_norm": 2.237738274670231, "learning_rate": 8.615022929366259e-06, "loss": 0.9608, "step": 7325 }, { "epoch": 0.2655213656627161, "grad_norm": 2.369983260742814, "learning_rate": 8.614617428987738e-06, "loss": 0.898, "step": 7326 }, { "epoch": 0.26555760936537276, "grad_norm": 2.604325162091064, "learning_rate": 8.614211878801973e-06, "loss": 1.0328, "step": 7327 }, { "epoch": 0.2655938530680294, "grad_norm": 2.3311644113109464, "learning_rate": 8.613806278814552e-06, "loss": 0.9409, "step": 7328 }, { "epoch": 0.2656300967706861, "grad_norm": 2.5605326015681675, "learning_rate": 8.61340062903107e-06, "loss": 0.9483, "step": 7329 }, { "epoch": 0.26566634047334275, "grad_norm": 2.3280330978074466, "learning_rate": 8.61299492945711e-06, "loss": 0.9604, "step": 7330 }, { "epoch": 0.2657025841759994, "grad_norm": 2.20756645105364, "learning_rate": 8.612589180098266e-06, "loss": 0.9577, "step": 7331 }, { "epoch": 0.2657388278786561, "grad_norm": 2.7554618216743823, "learning_rate": 8.612183380960126e-06, "loss": 0.9999, "step": 7332 }, { "epoch": 0.26577507158131275, "grad_norm": 2.1986623697714216, "learning_rate": 8.611777532048287e-06, "loss": 1.1431, "step": 7333 }, { "epoch": 0.2658113152839694, "grad_norm": 3.3098611430136944, "learning_rate": 8.611371633368335e-06, "loss": 0.8654, "step": 7334 }, { "epoch": 0.2658475589866261, "grad_norm": 2.191355407465785, "learning_rate": 8.610965684925868e-06, "loss": 0.9018, "step": 7335 }, { "epoch": 0.26588380268928274, "grad_norm": 2.5512572753766602, "learning_rate": 8.610559686726476e-06, "loss": 0.8588, "step": 7336 }, { "epoch": 0.2659200463919394, "grad_norm": 2.3467550427143697, "learning_rate": 8.610153638775756e-06, "loss": 0.9527, "step": 7337 }, { "epoch": 0.26595629009459604, "grad_norm": 2.496198417491091, "learning_rate": 8.609747541079304e-06, "loss": 0.8916, "step": 7338 }, { "epoch": 0.26599253379725274, "grad_norm": 2.5792115321763336, "learning_rate": 8.609341393642709e-06, "loss": 0.9347, "step": 7339 }, { "epoch": 0.2660287774999094, "grad_norm": 2.3765628276372635, "learning_rate": 8.608935196471578e-06, "loss": 0.9333, "step": 7340 }, { "epoch": 0.26606502120256603, "grad_norm": 2.2416364274402527, "learning_rate": 8.6085289495715e-06, "loss": 1.1054, "step": 7341 }, { "epoch": 0.26610126490522273, "grad_norm": 2.4244858050265536, "learning_rate": 8.608122652948075e-06, "loss": 0.9308, "step": 7342 }, { "epoch": 0.2661375086078794, "grad_norm": 2.2493333036343017, "learning_rate": 8.607716306606902e-06, "loss": 0.8647, "step": 7343 }, { "epoch": 0.266173752310536, "grad_norm": 2.2923152492738, "learning_rate": 8.60730991055358e-06, "loss": 0.9784, "step": 7344 }, { "epoch": 0.26620999601319273, "grad_norm": 2.431752740717589, "learning_rate": 8.606903464793711e-06, "loss": 0.9859, "step": 7345 }, { "epoch": 0.2662462397158494, "grad_norm": 2.349133386508286, "learning_rate": 8.606496969332892e-06, "loss": 0.8791, "step": 7346 }, { "epoch": 0.266282483418506, "grad_norm": 2.218097770623361, "learning_rate": 8.606090424176727e-06, "loss": 0.913, "step": 7347 }, { "epoch": 0.2663187271211627, "grad_norm": 2.3874562155112593, "learning_rate": 8.605683829330816e-06, "loss": 1.0679, "step": 7348 }, { "epoch": 0.26635497082381937, "grad_norm": 2.3387616752019946, "learning_rate": 8.605277184800762e-06, "loss": 1.051, "step": 7349 }, { "epoch": 0.266391214526476, "grad_norm": 2.3664656094349317, "learning_rate": 8.60487049059217e-06, "loss": 1.0843, "step": 7350 }, { "epoch": 0.26642745822913266, "grad_norm": 2.4583089723410882, "learning_rate": 8.604463746710644e-06, "loss": 0.7448, "step": 7351 }, { "epoch": 0.26646370193178937, "grad_norm": 2.254714713573917, "learning_rate": 8.604056953161785e-06, "loss": 0.9239, "step": 7352 }, { "epoch": 0.266499945634446, "grad_norm": 2.4472960792515432, "learning_rate": 8.6036501099512e-06, "loss": 0.8994, "step": 7353 }, { "epoch": 0.26653618933710266, "grad_norm": 2.323875633466164, "learning_rate": 8.603243217084499e-06, "loss": 0.9605, "step": 7354 }, { "epoch": 0.26657243303975936, "grad_norm": 2.5477317911055244, "learning_rate": 8.602836274567283e-06, "loss": 1.0055, "step": 7355 }, { "epoch": 0.266608676742416, "grad_norm": 2.4999623035984397, "learning_rate": 8.602429282405164e-06, "loss": 1.0173, "step": 7356 }, { "epoch": 0.26664492044507265, "grad_norm": 2.5161819105973717, "learning_rate": 8.602022240603747e-06, "loss": 0.8307, "step": 7357 }, { "epoch": 0.26668116414772935, "grad_norm": 2.3469094119624265, "learning_rate": 8.601615149168643e-06, "loss": 0.962, "step": 7358 }, { "epoch": 0.266717407850386, "grad_norm": 2.2614223334585892, "learning_rate": 8.60120800810546e-06, "loss": 0.7529, "step": 7359 }, { "epoch": 0.26675365155304265, "grad_norm": 2.60156477659443, "learning_rate": 8.600800817419807e-06, "loss": 0.9301, "step": 7360 }, { "epoch": 0.26678989525569935, "grad_norm": 2.2335082300468283, "learning_rate": 8.600393577117299e-06, "loss": 0.7402, "step": 7361 }, { "epoch": 0.266826138958356, "grad_norm": 2.3473980626609703, "learning_rate": 8.599986287203543e-06, "loss": 1.0645, "step": 7362 }, { "epoch": 0.26686238266101264, "grad_norm": 2.300062807442511, "learning_rate": 8.599578947684154e-06, "loss": 0.9181, "step": 7363 }, { "epoch": 0.2668986263636693, "grad_norm": 2.27370328902574, "learning_rate": 8.599171558564743e-06, "loss": 1.0171, "step": 7364 }, { "epoch": 0.266934870066326, "grad_norm": 2.306093954602955, "learning_rate": 8.598764119850926e-06, "loss": 0.7252, "step": 7365 }, { "epoch": 0.26697111376898264, "grad_norm": 1.9716968995187865, "learning_rate": 8.598356631548316e-06, "loss": 0.8906, "step": 7366 }, { "epoch": 0.2670073574716393, "grad_norm": 2.2534597557002187, "learning_rate": 8.597949093662528e-06, "loss": 1.0177, "step": 7367 }, { "epoch": 0.267043601174296, "grad_norm": 2.397068155114805, "learning_rate": 8.597541506199177e-06, "loss": 0.952, "step": 7368 }, { "epoch": 0.26707984487695263, "grad_norm": 2.1354641655317, "learning_rate": 8.597133869163878e-06, "loss": 0.8859, "step": 7369 }, { "epoch": 0.2671160885796093, "grad_norm": 2.292510842819585, "learning_rate": 8.596726182562253e-06, "loss": 0.9528, "step": 7370 }, { "epoch": 0.267152332282266, "grad_norm": 2.6941937842016515, "learning_rate": 8.596318446399914e-06, "loss": 1.0197, "step": 7371 }, { "epoch": 0.2671885759849226, "grad_norm": 2.8329772396539554, "learning_rate": 8.595910660682483e-06, "loss": 1.2064, "step": 7372 }, { "epoch": 0.2672248196875793, "grad_norm": 2.2008995986044577, "learning_rate": 8.595502825415577e-06, "loss": 1.1675, "step": 7373 }, { "epoch": 0.2672610633902359, "grad_norm": 2.4715332337075857, "learning_rate": 8.595094940604816e-06, "loss": 0.8549, "step": 7374 }, { "epoch": 0.2672973070928926, "grad_norm": 2.5698331515567445, "learning_rate": 8.594687006255823e-06, "loss": 0.8148, "step": 7375 }, { "epoch": 0.26733355079554927, "grad_norm": 2.6343498546048454, "learning_rate": 8.594279022374217e-06, "loss": 0.8263, "step": 7376 }, { "epoch": 0.2673697944982059, "grad_norm": 2.2662523252038453, "learning_rate": 8.593870988965619e-06, "loss": 1.0594, "step": 7377 }, { "epoch": 0.2674060382008626, "grad_norm": 2.4841878771881776, "learning_rate": 8.59346290603565e-06, "loss": 1.1314, "step": 7378 }, { "epoch": 0.26744228190351926, "grad_norm": 2.4736872264760104, "learning_rate": 8.593054773589939e-06, "loss": 0.9294, "step": 7379 }, { "epoch": 0.2674785256061759, "grad_norm": 2.257316510929556, "learning_rate": 8.592646591634105e-06, "loss": 1.0327, "step": 7380 }, { "epoch": 0.2675147693088326, "grad_norm": 2.1369416294691423, "learning_rate": 8.592238360173772e-06, "loss": 1.1615, "step": 7381 }, { "epoch": 0.26755101301148926, "grad_norm": 2.1510983305049702, "learning_rate": 8.591830079214569e-06, "loss": 1.0285, "step": 7382 }, { "epoch": 0.2675872567141459, "grad_norm": 2.2509791915195296, "learning_rate": 8.591421748762119e-06, "loss": 0.9596, "step": 7383 }, { "epoch": 0.2676235004168026, "grad_norm": 2.246161558434688, "learning_rate": 8.59101336882205e-06, "loss": 0.8014, "step": 7384 }, { "epoch": 0.26765974411945925, "grad_norm": 2.4312291503617858, "learning_rate": 8.590604939399987e-06, "loss": 0.9005, "step": 7385 }, { "epoch": 0.2676959878221159, "grad_norm": 2.2705168061135663, "learning_rate": 8.590196460501561e-06, "loss": 0.9859, "step": 7386 }, { "epoch": 0.26773223152477255, "grad_norm": 6.113064499642593, "learning_rate": 8.589787932132399e-06, "loss": 0.8119, "step": 7387 }, { "epoch": 0.26776847522742925, "grad_norm": 2.7671713043358546, "learning_rate": 8.589379354298129e-06, "loss": 1.0479, "step": 7388 }, { "epoch": 0.2678047189300859, "grad_norm": 2.461833213581541, "learning_rate": 8.588970727004384e-06, "loss": 0.9409, "step": 7389 }, { "epoch": 0.26784096263274254, "grad_norm": 2.2190564234002292, "learning_rate": 8.588562050256791e-06, "loss": 1.0618, "step": 7390 }, { "epoch": 0.26787720633539924, "grad_norm": 2.162909896950789, "learning_rate": 8.588153324060984e-06, "loss": 0.7719, "step": 7391 }, { "epoch": 0.2679134500380559, "grad_norm": 2.424057364717674, "learning_rate": 8.587744548422594e-06, "loss": 0.8135, "step": 7392 }, { "epoch": 0.26794969374071254, "grad_norm": 2.4998657418893346, "learning_rate": 8.587335723347256e-06, "loss": 0.8609, "step": 7393 }, { "epoch": 0.26798593744336924, "grad_norm": 2.3629433509712565, "learning_rate": 8.5869268488406e-06, "loss": 1.0104, "step": 7394 }, { "epoch": 0.2680221811460259, "grad_norm": 2.306534934658021, "learning_rate": 8.58651792490826e-06, "loss": 0.9629, "step": 7395 }, { "epoch": 0.26805842484868253, "grad_norm": 2.3880085721320894, "learning_rate": 8.586108951555874e-06, "loss": 1.0518, "step": 7396 }, { "epoch": 0.26809466855133923, "grad_norm": 2.527559581776857, "learning_rate": 8.585699928789073e-06, "loss": 1.0756, "step": 7397 }, { "epoch": 0.2681309122539959, "grad_norm": 2.582767806268639, "learning_rate": 8.585290856613498e-06, "loss": 1.1716, "step": 7398 }, { "epoch": 0.2681671559566525, "grad_norm": 2.2691287861288383, "learning_rate": 8.584881735034783e-06, "loss": 0.8308, "step": 7399 }, { "epoch": 0.26820339965930917, "grad_norm": 2.4330948929997076, "learning_rate": 8.584472564058564e-06, "loss": 0.9237, "step": 7400 }, { "epoch": 0.2682396433619659, "grad_norm": 2.2494929729346422, "learning_rate": 8.584063343690483e-06, "loss": 0.9072, "step": 7401 }, { "epoch": 0.2682758870646225, "grad_norm": 2.8250599262275102, "learning_rate": 8.583654073936176e-06, "loss": 0.9894, "step": 7402 }, { "epoch": 0.26831213076727917, "grad_norm": 2.16144678044188, "learning_rate": 8.583244754801283e-06, "loss": 1.011, "step": 7403 }, { "epoch": 0.26834837446993587, "grad_norm": 2.2928864606835915, "learning_rate": 8.582835386291443e-06, "loss": 0.8581, "step": 7404 }, { "epoch": 0.2683846181725925, "grad_norm": 2.182810019610394, "learning_rate": 8.5824259684123e-06, "loss": 0.9364, "step": 7405 }, { "epoch": 0.26842086187524916, "grad_norm": 2.46118444901269, "learning_rate": 8.582016501169494e-06, "loss": 1.0729, "step": 7406 }, { "epoch": 0.26845710557790586, "grad_norm": 2.1263522045119636, "learning_rate": 8.581606984568666e-06, "loss": 0.9717, "step": 7407 }, { "epoch": 0.2684933492805625, "grad_norm": 2.536978828704861, "learning_rate": 8.581197418615458e-06, "loss": 0.9056, "step": 7408 }, { "epoch": 0.26852959298321916, "grad_norm": 2.451739335445633, "learning_rate": 8.580787803315519e-06, "loss": 0.6951, "step": 7409 }, { "epoch": 0.2685658366858758, "grad_norm": 2.4182140052047805, "learning_rate": 8.580378138674487e-06, "loss": 1.0794, "step": 7410 }, { "epoch": 0.2686020803885325, "grad_norm": 2.4050999025247695, "learning_rate": 8.579968424698012e-06, "loss": 1.1228, "step": 7411 }, { "epoch": 0.26863832409118915, "grad_norm": 2.441926869522679, "learning_rate": 8.579558661391736e-06, "loss": 0.8675, "step": 7412 }, { "epoch": 0.2686745677938458, "grad_norm": 2.1650348307440925, "learning_rate": 8.579148848761306e-06, "loss": 0.8648, "step": 7413 }, { "epoch": 0.2687108114965025, "grad_norm": 2.4753291778387205, "learning_rate": 8.57873898681237e-06, "loss": 1.0402, "step": 7414 }, { "epoch": 0.26874705519915915, "grad_norm": 2.388987727989357, "learning_rate": 8.578329075550575e-06, "loss": 0.8682, "step": 7415 }, { "epoch": 0.2687832989018158, "grad_norm": 2.5246134164091623, "learning_rate": 8.57791911498157e-06, "loss": 0.913, "step": 7416 }, { "epoch": 0.2688195426044725, "grad_norm": 2.6479100613463245, "learning_rate": 8.577509105111002e-06, "loss": 1.0276, "step": 7417 }, { "epoch": 0.26885578630712914, "grad_norm": 2.3458992174143236, "learning_rate": 8.577099045944524e-06, "loss": 1.0409, "step": 7418 }, { "epoch": 0.2688920300097858, "grad_norm": 2.417467694492886, "learning_rate": 8.576688937487783e-06, "loss": 1.014, "step": 7419 }, { "epoch": 0.2689282737124425, "grad_norm": 2.4633968257802006, "learning_rate": 8.576278779746431e-06, "loss": 1.0075, "step": 7420 }, { "epoch": 0.26896451741509914, "grad_norm": 2.319426807295654, "learning_rate": 8.575868572726123e-06, "loss": 0.7882, "step": 7421 }, { "epoch": 0.2690007611177558, "grad_norm": 2.4191042402995357, "learning_rate": 8.575458316432506e-06, "loss": 0.8928, "step": 7422 }, { "epoch": 0.26903700482041243, "grad_norm": 2.5204255731708, "learning_rate": 8.575048010871237e-06, "loss": 0.8784, "step": 7423 }, { "epoch": 0.26907324852306913, "grad_norm": 2.6002857221990445, "learning_rate": 8.574637656047969e-06, "loss": 1.1443, "step": 7424 }, { "epoch": 0.2691094922257258, "grad_norm": 2.2007651741609253, "learning_rate": 8.574227251968355e-06, "loss": 0.8988, "step": 7425 }, { "epoch": 0.2691457359283824, "grad_norm": 2.2782188513745143, "learning_rate": 8.57381679863805e-06, "loss": 1.0422, "step": 7426 }, { "epoch": 0.2691819796310391, "grad_norm": 2.053463627676724, "learning_rate": 8.573406296062714e-06, "loss": 0.9171, "step": 7427 }, { "epoch": 0.26921822333369577, "grad_norm": 2.7356385615227623, "learning_rate": 8.572995744247998e-06, "loss": 0.9657, "step": 7428 }, { "epoch": 0.2692544670363524, "grad_norm": 2.0800325081049515, "learning_rate": 8.572585143199562e-06, "loss": 0.8347, "step": 7429 }, { "epoch": 0.2692907107390091, "grad_norm": 2.1143287279255234, "learning_rate": 8.572174492923064e-06, "loss": 0.8856, "step": 7430 }, { "epoch": 0.26932695444166577, "grad_norm": 2.2754698086663487, "learning_rate": 8.571763793424161e-06, "loss": 0.9956, "step": 7431 }, { "epoch": 0.2693631981443224, "grad_norm": 2.002417112992237, "learning_rate": 8.571353044708514e-06, "loss": 0.8408, "step": 7432 }, { "epoch": 0.26939944184697906, "grad_norm": 2.574043859779339, "learning_rate": 8.570942246781781e-06, "loss": 1.0923, "step": 7433 }, { "epoch": 0.26943568554963576, "grad_norm": 2.496497478412517, "learning_rate": 8.570531399649625e-06, "loss": 0.8398, "step": 7434 }, { "epoch": 0.2694719292522924, "grad_norm": 2.2532877865364, "learning_rate": 8.570120503317703e-06, "loss": 1.0193, "step": 7435 }, { "epoch": 0.26950817295494905, "grad_norm": 2.2030868375141095, "learning_rate": 8.56970955779168e-06, "loss": 0.9714, "step": 7436 }, { "epoch": 0.26954441665760576, "grad_norm": 2.345931296215476, "learning_rate": 8.569298563077223e-06, "loss": 0.7957, "step": 7437 }, { "epoch": 0.2695806603602624, "grad_norm": 2.531139073186345, "learning_rate": 8.568887519179985e-06, "loss": 1.0278, "step": 7438 }, { "epoch": 0.26961690406291905, "grad_norm": 2.3746005541138944, "learning_rate": 8.568476426105637e-06, "loss": 0.8774, "step": 7439 }, { "epoch": 0.26965314776557575, "grad_norm": 2.3577864266921087, "learning_rate": 8.568065283859842e-06, "loss": 0.9549, "step": 7440 }, { "epoch": 0.2696893914682324, "grad_norm": 2.5345130396280013, "learning_rate": 8.567654092448266e-06, "loss": 1.0644, "step": 7441 }, { "epoch": 0.26972563517088904, "grad_norm": 2.660751590229976, "learning_rate": 8.567242851876575e-06, "loss": 0.908, "step": 7442 }, { "epoch": 0.26976187887354575, "grad_norm": 2.2971551903874987, "learning_rate": 8.566831562150433e-06, "loss": 0.9667, "step": 7443 }, { "epoch": 0.2697981225762024, "grad_norm": 2.4687650497667035, "learning_rate": 8.56642022327551e-06, "loss": 0.9545, "step": 7444 }, { "epoch": 0.26983436627885904, "grad_norm": 2.37144454002892, "learning_rate": 8.56600883525747e-06, "loss": 1.0001, "step": 7445 }, { "epoch": 0.2698706099815157, "grad_norm": 2.131496854079557, "learning_rate": 8.56559739810199e-06, "loss": 0.9018, "step": 7446 }, { "epoch": 0.2699068536841724, "grad_norm": 2.4936110860151133, "learning_rate": 8.565185911814733e-06, "loss": 1.0109, "step": 7447 }, { "epoch": 0.26994309738682903, "grad_norm": 2.439221615143182, "learning_rate": 8.564774376401367e-06, "loss": 1.1086, "step": 7448 }, { "epoch": 0.2699793410894857, "grad_norm": 2.2452744148028394, "learning_rate": 8.56436279186757e-06, "loss": 0.9695, "step": 7449 }, { "epoch": 0.2700155847921424, "grad_norm": 2.3945814747073735, "learning_rate": 8.563951158219008e-06, "loss": 0.9789, "step": 7450 }, { "epoch": 0.27005182849479903, "grad_norm": 2.3734793355543777, "learning_rate": 8.563539475461354e-06, "loss": 0.8112, "step": 7451 }, { "epoch": 0.2700880721974557, "grad_norm": 2.4607586195891282, "learning_rate": 8.563127743600282e-06, "loss": 1.0055, "step": 7452 }, { "epoch": 0.2701243159001124, "grad_norm": 2.391570287529142, "learning_rate": 8.562715962641463e-06, "loss": 0.9074, "step": 7453 }, { "epoch": 0.270160559602769, "grad_norm": 2.242617372976152, "learning_rate": 8.562304132590575e-06, "loss": 0.9275, "step": 7454 }, { "epoch": 0.27019680330542567, "grad_norm": 2.2931468321493433, "learning_rate": 8.56189225345329e-06, "loss": 0.8888, "step": 7455 }, { "epoch": 0.27023304700808237, "grad_norm": 2.5462064868032566, "learning_rate": 8.561480325235283e-06, "loss": 0.9975, "step": 7456 }, { "epoch": 0.270269290710739, "grad_norm": 1.9194770074546463, "learning_rate": 8.561068347942232e-06, "loss": 0.81, "step": 7457 }, { "epoch": 0.27030553441339567, "grad_norm": 2.1536515936115004, "learning_rate": 8.56065632157981e-06, "loss": 1.0898, "step": 7458 }, { "epoch": 0.2703417781160523, "grad_norm": 2.462483613837075, "learning_rate": 8.560244246153699e-06, "loss": 0.8549, "step": 7459 }, { "epoch": 0.270378021818709, "grad_norm": 2.5319145489234307, "learning_rate": 8.559832121669576e-06, "loss": 0.9085, "step": 7460 }, { "epoch": 0.27041426552136566, "grad_norm": 2.2507766054432237, "learning_rate": 8.55941994813312e-06, "loss": 0.8474, "step": 7461 }, { "epoch": 0.2704505092240223, "grad_norm": 2.2764121551743415, "learning_rate": 8.559007725550008e-06, "loss": 0.9946, "step": 7462 }, { "epoch": 0.270486752926679, "grad_norm": 2.1724448318628116, "learning_rate": 8.558595453925925e-06, "loss": 0.9525, "step": 7463 }, { "epoch": 0.27052299662933565, "grad_norm": 2.2201530105551908, "learning_rate": 8.558183133266544e-06, "loss": 0.9407, "step": 7464 }, { "epoch": 0.2705592403319923, "grad_norm": 2.673905069145766, "learning_rate": 8.557770763577555e-06, "loss": 1.076, "step": 7465 }, { "epoch": 0.270595484034649, "grad_norm": 2.3958747132774203, "learning_rate": 8.557358344864635e-06, "loss": 0.9881, "step": 7466 }, { "epoch": 0.27063172773730565, "grad_norm": 2.243616525354025, "learning_rate": 8.556945877133468e-06, "loss": 0.9979, "step": 7467 }, { "epoch": 0.2706679714399623, "grad_norm": 2.315642708432336, "learning_rate": 8.556533360389741e-06, "loss": 0.9576, "step": 7468 }, { "epoch": 0.27070421514261894, "grad_norm": 2.129436999889004, "learning_rate": 8.556120794639134e-06, "loss": 1.1549, "step": 7469 }, { "epoch": 0.27074045884527564, "grad_norm": 2.7410385407790447, "learning_rate": 8.555708179887332e-06, "loss": 1.1423, "step": 7470 }, { "epoch": 0.2707767025479323, "grad_norm": 2.448080143524584, "learning_rate": 8.555295516140022e-06, "loss": 0.9245, "step": 7471 }, { "epoch": 0.27081294625058894, "grad_norm": 2.224060117952444, "learning_rate": 8.554882803402889e-06, "loss": 1.0882, "step": 7472 }, { "epoch": 0.27084918995324564, "grad_norm": 2.283675629588558, "learning_rate": 8.554470041681621e-06, "loss": 0.9054, "step": 7473 }, { "epoch": 0.2708854336559023, "grad_norm": 2.394908323429715, "learning_rate": 8.554057230981908e-06, "loss": 0.863, "step": 7474 }, { "epoch": 0.27092167735855893, "grad_norm": 2.520137049923098, "learning_rate": 8.553644371309433e-06, "loss": 0.864, "step": 7475 }, { "epoch": 0.27095792106121563, "grad_norm": 2.432238079850335, "learning_rate": 8.55323146266989e-06, "loss": 0.8867, "step": 7476 }, { "epoch": 0.2709941647638723, "grad_norm": 2.323006102922972, "learning_rate": 8.552818505068965e-06, "loss": 0.9414, "step": 7477 }, { "epoch": 0.2710304084665289, "grad_norm": 2.3382820560379654, "learning_rate": 8.552405498512349e-06, "loss": 0.8256, "step": 7478 }, { "epoch": 0.27106665216918563, "grad_norm": 2.333672989993342, "learning_rate": 8.551992443005735e-06, "loss": 0.9351, "step": 7479 }, { "epoch": 0.2711028958718423, "grad_norm": 3.111976573931425, "learning_rate": 8.551579338554814e-06, "loss": 1.0524, "step": 7480 }, { "epoch": 0.2711391395744989, "grad_norm": 2.367962996165114, "learning_rate": 8.551166185165276e-06, "loss": 0.7949, "step": 7481 }, { "epoch": 0.27117538327715557, "grad_norm": 2.2615644348160187, "learning_rate": 8.550752982842817e-06, "loss": 0.987, "step": 7482 }, { "epoch": 0.27121162697981227, "grad_norm": 2.6135199546753656, "learning_rate": 8.550339731593128e-06, "loss": 1.0198, "step": 7483 }, { "epoch": 0.2712478706824689, "grad_norm": 2.3379410082580097, "learning_rate": 8.549926431421904e-06, "loss": 1.0256, "step": 7484 }, { "epoch": 0.27128411438512556, "grad_norm": 2.376999744336278, "learning_rate": 8.549513082334842e-06, "loss": 0.8383, "step": 7485 }, { "epoch": 0.27132035808778227, "grad_norm": 2.4550581193984145, "learning_rate": 8.549099684337637e-06, "loss": 0.8331, "step": 7486 }, { "epoch": 0.2713566017904389, "grad_norm": 2.3102916283821826, "learning_rate": 8.548686237435983e-06, "loss": 0.9845, "step": 7487 }, { "epoch": 0.27139284549309556, "grad_norm": 2.291173025292705, "learning_rate": 8.548272741635581e-06, "loss": 0.7952, "step": 7488 }, { "epoch": 0.27142908919575226, "grad_norm": 2.125327307672091, "learning_rate": 8.547859196942126e-06, "loss": 0.8664, "step": 7489 }, { "epoch": 0.2714653328984089, "grad_norm": 2.2471833865183513, "learning_rate": 8.547445603361315e-06, "loss": 0.8271, "step": 7490 }, { "epoch": 0.27150157660106555, "grad_norm": 2.6985566522119036, "learning_rate": 8.54703196089885e-06, "loss": 1.021, "step": 7491 }, { "epoch": 0.27153782030372225, "grad_norm": 2.1951045320409537, "learning_rate": 8.546618269560432e-06, "loss": 1.1078, "step": 7492 }, { "epoch": 0.2715740640063789, "grad_norm": 2.378406330719633, "learning_rate": 8.546204529351755e-06, "loss": 1.0925, "step": 7493 }, { "epoch": 0.27161030770903555, "grad_norm": 2.3189718693938284, "learning_rate": 8.545790740278528e-06, "loss": 0.8623, "step": 7494 }, { "epoch": 0.2716465514116922, "grad_norm": 2.384803161234283, "learning_rate": 8.545376902346447e-06, "loss": 0.9008, "step": 7495 }, { "epoch": 0.2716827951143489, "grad_norm": 2.3220357032703047, "learning_rate": 8.54496301556122e-06, "loss": 1.0758, "step": 7496 }, { "epoch": 0.27171903881700554, "grad_norm": 1.9354393163471324, "learning_rate": 8.544549079928541e-06, "loss": 0.7858, "step": 7497 }, { "epoch": 0.2717552825196622, "grad_norm": 2.232469840977735, "learning_rate": 8.544135095454122e-06, "loss": 0.9197, "step": 7498 }, { "epoch": 0.2717915262223189, "grad_norm": 2.415779654436425, "learning_rate": 8.543721062143665e-06, "loss": 1.0491, "step": 7499 }, { "epoch": 0.27182776992497554, "grad_norm": 2.492066192404849, "learning_rate": 8.543306980002874e-06, "loss": 1.0475, "step": 7500 }, { "epoch": 0.2718640136276322, "grad_norm": 2.283541692093501, "learning_rate": 8.542892849037458e-06, "loss": 0.7267, "step": 7501 }, { "epoch": 0.2719002573302889, "grad_norm": 2.36406566070708, "learning_rate": 8.542478669253118e-06, "loss": 0.8974, "step": 7502 }, { "epoch": 0.27193650103294553, "grad_norm": 2.4578029340117813, "learning_rate": 8.542064440655568e-06, "loss": 0.9747, "step": 7503 }, { "epoch": 0.2719727447356022, "grad_norm": 2.2075006501570487, "learning_rate": 8.54165016325051e-06, "loss": 1.0193, "step": 7504 }, { "epoch": 0.2720089884382588, "grad_norm": 2.1920276142729094, "learning_rate": 8.541235837043655e-06, "loss": 1.0272, "step": 7505 }, { "epoch": 0.2720452321409155, "grad_norm": 2.331564275489302, "learning_rate": 8.540821462040712e-06, "loss": 0.8943, "step": 7506 }, { "epoch": 0.2720814758435722, "grad_norm": 2.6236853961945967, "learning_rate": 8.54040703824739e-06, "loss": 1.0617, "step": 7507 }, { "epoch": 0.2721177195462288, "grad_norm": 2.4285149627510183, "learning_rate": 8.5399925656694e-06, "loss": 0.9198, "step": 7508 }, { "epoch": 0.2721539632488855, "grad_norm": 2.1974656432365665, "learning_rate": 8.539578044312454e-06, "loss": 0.9543, "step": 7509 }, { "epoch": 0.27219020695154217, "grad_norm": 2.3301295719654136, "learning_rate": 8.539163474182264e-06, "loss": 0.8246, "step": 7510 }, { "epoch": 0.2722264506541988, "grad_norm": 2.463319888224015, "learning_rate": 8.53874885528454e-06, "loss": 1.0731, "step": 7511 }, { "epoch": 0.2722626943568555, "grad_norm": 2.0841169999667732, "learning_rate": 8.538334187624998e-06, "loss": 0.948, "step": 7512 }, { "epoch": 0.27229893805951216, "grad_norm": 2.584899957248, "learning_rate": 8.53791947120935e-06, "loss": 1.091, "step": 7513 }, { "epoch": 0.2723351817621688, "grad_norm": 2.384154390549064, "learning_rate": 8.537504706043312e-06, "loss": 0.9048, "step": 7514 }, { "epoch": 0.2723714254648255, "grad_norm": 2.3810876173778577, "learning_rate": 8.5370898921326e-06, "loss": 1.1429, "step": 7515 }, { "epoch": 0.27240766916748216, "grad_norm": 2.5817024835414504, "learning_rate": 8.536675029482926e-06, "loss": 1.0645, "step": 7516 }, { "epoch": 0.2724439128701388, "grad_norm": 2.3567624388323547, "learning_rate": 8.53626011810001e-06, "loss": 0.9077, "step": 7517 }, { "epoch": 0.27248015657279545, "grad_norm": 2.1628346434780097, "learning_rate": 8.535845157989568e-06, "loss": 1.0119, "step": 7518 }, { "epoch": 0.27251640027545215, "grad_norm": 2.2561993447309003, "learning_rate": 8.53543014915732e-06, "loss": 1.0367, "step": 7519 }, { "epoch": 0.2725526439781088, "grad_norm": 2.3937819161999325, "learning_rate": 8.53501509160898e-06, "loss": 1.0062, "step": 7520 }, { "epoch": 0.27258888768076545, "grad_norm": 2.2715779070489788, "learning_rate": 8.534599985350272e-06, "loss": 0.9625, "step": 7521 }, { "epoch": 0.27262513138342215, "grad_norm": 2.2766031296417144, "learning_rate": 8.534184830386912e-06, "loss": 1.1141, "step": 7522 }, { "epoch": 0.2726613750860788, "grad_norm": 2.5227048530894445, "learning_rate": 8.533769626724624e-06, "loss": 0.9948, "step": 7523 }, { "epoch": 0.27269761878873544, "grad_norm": 2.3595725797963873, "learning_rate": 8.533354374369128e-06, "loss": 1.0702, "step": 7524 }, { "epoch": 0.27273386249139214, "grad_norm": 2.3054729665346643, "learning_rate": 8.532939073326142e-06, "loss": 1.0309, "step": 7525 }, { "epoch": 0.2727701061940488, "grad_norm": 2.645136809578129, "learning_rate": 8.532523723601397e-06, "loss": 0.9329, "step": 7526 }, { "epoch": 0.27280634989670544, "grad_norm": 2.1591828384009206, "learning_rate": 8.532108325200608e-06, "loss": 0.8343, "step": 7527 }, { "epoch": 0.27284259359936214, "grad_norm": 2.3886153060065634, "learning_rate": 8.531692878129505e-06, "loss": 0.8513, "step": 7528 }, { "epoch": 0.2728788373020188, "grad_norm": 2.1985799721331243, "learning_rate": 8.531277382393808e-06, "loss": 0.9224, "step": 7529 }, { "epoch": 0.27291508100467543, "grad_norm": 2.7845401407240256, "learning_rate": 8.530861837999246e-06, "loss": 1.0039, "step": 7530 }, { "epoch": 0.2729513247073321, "grad_norm": 2.529180248425199, "learning_rate": 8.530446244951542e-06, "loss": 0.9, "step": 7531 }, { "epoch": 0.2729875684099888, "grad_norm": 2.1231580978451636, "learning_rate": 8.530030603256425e-06, "loss": 0.8187, "step": 7532 }, { "epoch": 0.2730238121126454, "grad_norm": 2.3451531642994086, "learning_rate": 8.529614912919619e-06, "loss": 1.0357, "step": 7533 }, { "epoch": 0.27306005581530207, "grad_norm": 2.5019313553454308, "learning_rate": 8.529199173946857e-06, "loss": 0.8906, "step": 7534 }, { "epoch": 0.2730962995179588, "grad_norm": 2.0453143871579886, "learning_rate": 8.528783386343862e-06, "loss": 0.8969, "step": 7535 }, { "epoch": 0.2731325432206154, "grad_norm": 2.2522464839472156, "learning_rate": 8.528367550116369e-06, "loss": 1.0335, "step": 7536 }, { "epoch": 0.27316878692327207, "grad_norm": 2.3585464897772987, "learning_rate": 8.527951665270103e-06, "loss": 1.005, "step": 7537 }, { "epoch": 0.27320503062592877, "grad_norm": 2.3573732164260015, "learning_rate": 8.527535731810798e-06, "loss": 0.8987, "step": 7538 }, { "epoch": 0.2732412743285854, "grad_norm": 2.02568106627039, "learning_rate": 8.527119749744183e-06, "loss": 0.6804, "step": 7539 }, { "epoch": 0.27327751803124206, "grad_norm": 2.616051482667562, "learning_rate": 8.526703719075992e-06, "loss": 1.0265, "step": 7540 }, { "epoch": 0.2733137617338987, "grad_norm": 2.3205418907584687, "learning_rate": 8.526287639811954e-06, "loss": 0.8845, "step": 7541 }, { "epoch": 0.2733500054365554, "grad_norm": 2.0617582038068507, "learning_rate": 8.525871511957808e-06, "loss": 0.9307, "step": 7542 }, { "epoch": 0.27338624913921206, "grad_norm": 2.395636097945902, "learning_rate": 8.525455335519283e-06, "loss": 1.0148, "step": 7543 }, { "epoch": 0.2734224928418687, "grad_norm": 2.3770966313911246, "learning_rate": 8.525039110502118e-06, "loss": 1.0803, "step": 7544 }, { "epoch": 0.2734587365445254, "grad_norm": 2.128186447679314, "learning_rate": 8.524622836912043e-06, "loss": 0.7531, "step": 7545 }, { "epoch": 0.27349498024718205, "grad_norm": 2.639835042045679, "learning_rate": 8.524206514754799e-06, "loss": 0.9636, "step": 7546 }, { "epoch": 0.2735312239498387, "grad_norm": 2.3660733300439065, "learning_rate": 8.523790144036119e-06, "loss": 1.0303, "step": 7547 }, { "epoch": 0.2735674676524954, "grad_norm": 2.324498031680392, "learning_rate": 8.523373724761744e-06, "loss": 0.9294, "step": 7548 }, { "epoch": 0.27360371135515205, "grad_norm": 2.3581327883065843, "learning_rate": 8.522957256937407e-06, "loss": 0.9686, "step": 7549 }, { "epoch": 0.2736399550578087, "grad_norm": 2.378196178935936, "learning_rate": 8.522540740568852e-06, "loss": 0.8057, "step": 7550 }, { "epoch": 0.2736761987604654, "grad_norm": 2.16827246457039, "learning_rate": 8.522124175661816e-06, "loss": 0.9012, "step": 7551 }, { "epoch": 0.27371244246312204, "grad_norm": 2.1676774449750913, "learning_rate": 8.521707562222038e-06, "loss": 1.0151, "step": 7552 }, { "epoch": 0.2737486861657787, "grad_norm": 2.5524710359808584, "learning_rate": 8.52129090025526e-06, "loss": 1.0259, "step": 7553 }, { "epoch": 0.27378492986843533, "grad_norm": 2.412571925654117, "learning_rate": 8.520874189767222e-06, "loss": 0.8464, "step": 7554 }, { "epoch": 0.27382117357109204, "grad_norm": 2.3149468616460207, "learning_rate": 8.520457430763669e-06, "loss": 1.0266, "step": 7555 }, { "epoch": 0.2738574172737487, "grad_norm": 2.2539385684537576, "learning_rate": 8.52004062325034e-06, "loss": 0.8011, "step": 7556 }, { "epoch": 0.27389366097640533, "grad_norm": 2.422168629057946, "learning_rate": 8.51962376723298e-06, "loss": 1.1441, "step": 7557 }, { "epoch": 0.27392990467906203, "grad_norm": 2.251541719487702, "learning_rate": 8.519206862717335e-06, "loss": 0.9708, "step": 7558 }, { "epoch": 0.2739661483817187, "grad_norm": 2.3271640299358136, "learning_rate": 8.518789909709147e-06, "loss": 1.0951, "step": 7559 }, { "epoch": 0.2740023920843753, "grad_norm": 2.1996990036998145, "learning_rate": 8.51837290821416e-06, "loss": 0.8219, "step": 7560 }, { "epoch": 0.274038635787032, "grad_norm": 2.2501101047257506, "learning_rate": 8.517955858238125e-06, "loss": 1.0412, "step": 7561 }, { "epoch": 0.27407487948968867, "grad_norm": 2.3656453511968114, "learning_rate": 8.517538759786785e-06, "loss": 1.0903, "step": 7562 }, { "epoch": 0.2741111231923453, "grad_norm": 1.9170411717206608, "learning_rate": 8.517121612865889e-06, "loss": 0.864, "step": 7563 }, { "epoch": 0.274147366895002, "grad_norm": 2.4137824619661665, "learning_rate": 8.516704417481183e-06, "loss": 0.8587, "step": 7564 }, { "epoch": 0.27418361059765867, "grad_norm": 2.4037228591920257, "learning_rate": 8.516287173638416e-06, "loss": 1.0925, "step": 7565 }, { "epoch": 0.2742198543003153, "grad_norm": 1.9926624894432878, "learning_rate": 8.51586988134334e-06, "loss": 0.9378, "step": 7566 }, { "epoch": 0.27425609800297196, "grad_norm": 2.3031173320788247, "learning_rate": 8.515452540601703e-06, "loss": 0.7525, "step": 7567 }, { "epoch": 0.27429234170562866, "grad_norm": 2.7704588523614753, "learning_rate": 8.515035151419257e-06, "loss": 0.8925, "step": 7568 }, { "epoch": 0.2743285854082853, "grad_norm": 2.277697817244245, "learning_rate": 8.514617713801749e-06, "loss": 0.8969, "step": 7569 }, { "epoch": 0.27436482911094195, "grad_norm": 2.3285804883587784, "learning_rate": 8.514200227754939e-06, "loss": 1.0358, "step": 7570 }, { "epoch": 0.27440107281359866, "grad_norm": 2.407850147820043, "learning_rate": 8.513782693284572e-06, "loss": 0.9197, "step": 7571 }, { "epoch": 0.2744373165162553, "grad_norm": 2.534426183266547, "learning_rate": 8.513365110396406e-06, "loss": 1.0319, "step": 7572 }, { "epoch": 0.27447356021891195, "grad_norm": 2.526650691485423, "learning_rate": 8.512947479096194e-06, "loss": 0.869, "step": 7573 }, { "epoch": 0.27450980392156865, "grad_norm": 2.3607278707019748, "learning_rate": 8.512529799389688e-06, "loss": 1.0807, "step": 7574 }, { "epoch": 0.2745460476242253, "grad_norm": 2.285200995985811, "learning_rate": 8.512112071282648e-06, "loss": 1.101, "step": 7575 }, { "epoch": 0.27458229132688194, "grad_norm": 2.32954536596557, "learning_rate": 8.511694294780827e-06, "loss": 0.9945, "step": 7576 }, { "epoch": 0.2746185350295386, "grad_norm": 2.2465828867880577, "learning_rate": 8.511276469889981e-06, "loss": 0.8169, "step": 7577 }, { "epoch": 0.2746547787321953, "grad_norm": 2.404704082506709, "learning_rate": 8.510858596615871e-06, "loss": 1.0473, "step": 7578 }, { "epoch": 0.27469102243485194, "grad_norm": 2.262734924138215, "learning_rate": 8.510440674964252e-06, "loss": 1.1021, "step": 7579 }, { "epoch": 0.2747272661375086, "grad_norm": 2.4364974099977488, "learning_rate": 8.510022704940883e-06, "loss": 0.8181, "step": 7580 }, { "epoch": 0.2747635098401653, "grad_norm": 2.691616973382068, "learning_rate": 8.509604686551525e-06, "loss": 0.6428, "step": 7581 }, { "epoch": 0.27479975354282193, "grad_norm": 2.47688605280459, "learning_rate": 8.509186619801937e-06, "loss": 0.9147, "step": 7582 }, { "epoch": 0.2748359972454786, "grad_norm": 2.4776732619730226, "learning_rate": 8.508768504697878e-06, "loss": 0.9274, "step": 7583 }, { "epoch": 0.2748722409481353, "grad_norm": 2.3950540092157304, "learning_rate": 8.508350341245113e-06, "loss": 0.9286, "step": 7584 }, { "epoch": 0.27490848465079193, "grad_norm": 2.244134504911429, "learning_rate": 8.5079321294494e-06, "loss": 0.9651, "step": 7585 }, { "epoch": 0.2749447283534486, "grad_norm": 2.1995268398560937, "learning_rate": 8.507513869316505e-06, "loss": 0.8266, "step": 7586 }, { "epoch": 0.2749809720561053, "grad_norm": 2.1880053146457805, "learning_rate": 8.507095560852192e-06, "loss": 0.7951, "step": 7587 }, { "epoch": 0.2750172157587619, "grad_norm": 2.2260412674363037, "learning_rate": 8.506677204062219e-06, "loss": 1.013, "step": 7588 }, { "epoch": 0.27505345946141857, "grad_norm": 2.518669699921677, "learning_rate": 8.506258798952359e-06, "loss": 0.8665, "step": 7589 }, { "epoch": 0.2750897031640752, "grad_norm": 2.2395453135492347, "learning_rate": 8.505840345528372e-06, "loss": 0.997, "step": 7590 }, { "epoch": 0.2751259468667319, "grad_norm": 2.256001928880505, "learning_rate": 8.505421843796023e-06, "loss": 1.0314, "step": 7591 }, { "epoch": 0.27516219056938857, "grad_norm": 2.4457665475335237, "learning_rate": 8.505003293761083e-06, "loss": 0.9876, "step": 7592 }, { "epoch": 0.2751984342720452, "grad_norm": 2.132573692908872, "learning_rate": 8.504584695429318e-06, "loss": 0.8482, "step": 7593 }, { "epoch": 0.2752346779747019, "grad_norm": 2.316120218466355, "learning_rate": 8.504166048806493e-06, "loss": 1.003, "step": 7594 }, { "epoch": 0.27527092167735856, "grad_norm": 2.101481154253857, "learning_rate": 8.503747353898381e-06, "loss": 1.0034, "step": 7595 }, { "epoch": 0.2753071653800152, "grad_norm": 2.4695948740599376, "learning_rate": 8.50332861071075e-06, "loss": 1.0153, "step": 7596 }, { "epoch": 0.2753434090826719, "grad_norm": 2.275818907124825, "learning_rate": 8.502909819249368e-06, "loss": 0.8215, "step": 7597 }, { "epoch": 0.27537965278532855, "grad_norm": 2.374448234972363, "learning_rate": 8.502490979520008e-06, "loss": 0.7651, "step": 7598 }, { "epoch": 0.2754158964879852, "grad_norm": 2.317096111646219, "learning_rate": 8.50207209152844e-06, "loss": 0.9517, "step": 7599 }, { "epoch": 0.2754521401906419, "grad_norm": 2.265879301703808, "learning_rate": 8.501653155280436e-06, "loss": 1.0401, "step": 7600 }, { "epoch": 0.27548838389329855, "grad_norm": 2.2336006937712134, "learning_rate": 8.501234170781772e-06, "loss": 1.012, "step": 7601 }, { "epoch": 0.2755246275959552, "grad_norm": 2.422790532286232, "learning_rate": 8.500815138038216e-06, "loss": 0.9752, "step": 7602 }, { "epoch": 0.27556087129861184, "grad_norm": 2.0886827708815874, "learning_rate": 8.500396057055546e-06, "loss": 0.9064, "step": 7603 }, { "epoch": 0.27559711500126854, "grad_norm": 2.0139162046729653, "learning_rate": 8.499976927839536e-06, "loss": 0.7805, "step": 7604 }, { "epoch": 0.2756333587039252, "grad_norm": 2.0781132011907415, "learning_rate": 8.499557750395958e-06, "loss": 0.9426, "step": 7605 }, { "epoch": 0.27566960240658184, "grad_norm": 2.517229751508548, "learning_rate": 8.499138524730592e-06, "loss": 1.0885, "step": 7606 }, { "epoch": 0.27570584610923854, "grad_norm": 2.331239394234494, "learning_rate": 8.498719250849213e-06, "loss": 0.8356, "step": 7607 }, { "epoch": 0.2757420898118952, "grad_norm": 2.4860862330138445, "learning_rate": 8.498299928757598e-06, "loss": 0.7924, "step": 7608 }, { "epoch": 0.27577833351455183, "grad_norm": 2.374450582677933, "learning_rate": 8.497880558461528e-06, "loss": 0.989, "step": 7609 }, { "epoch": 0.27581457721720853, "grad_norm": 2.553087193158078, "learning_rate": 8.497461139966778e-06, "loss": 0.8164, "step": 7610 }, { "epoch": 0.2758508209198652, "grad_norm": 2.403613167313605, "learning_rate": 8.49704167327913e-06, "loss": 0.866, "step": 7611 }, { "epoch": 0.2758870646225218, "grad_norm": 2.4091615570503664, "learning_rate": 8.49662215840436e-06, "loss": 0.9616, "step": 7612 }, { "epoch": 0.2759233083251785, "grad_norm": 2.5724361027661455, "learning_rate": 8.496202595348253e-06, "loss": 1.0063, "step": 7613 }, { "epoch": 0.2759595520278352, "grad_norm": 2.479932374247297, "learning_rate": 8.495782984116588e-06, "loss": 0.8354, "step": 7614 }, { "epoch": 0.2759957957304918, "grad_norm": 2.3104574826383195, "learning_rate": 8.49536332471515e-06, "loss": 0.892, "step": 7615 }, { "epoch": 0.27603203943314847, "grad_norm": 2.289776076709795, "learning_rate": 8.494943617149716e-06, "loss": 0.9772, "step": 7616 }, { "epoch": 0.27606828313580517, "grad_norm": 2.412564892775753, "learning_rate": 8.494523861426074e-06, "loss": 0.9142, "step": 7617 }, { "epoch": 0.2761045268384618, "grad_norm": 2.4718059080170716, "learning_rate": 8.494104057550007e-06, "loss": 0.8683, "step": 7618 }, { "epoch": 0.27614077054111846, "grad_norm": 2.447156584435403, "learning_rate": 8.493684205527299e-06, "loss": 1.1085, "step": 7619 }, { "epoch": 0.27617701424377517, "grad_norm": 2.1952289846432156, "learning_rate": 8.493264305363735e-06, "loss": 0.8115, "step": 7620 }, { "epoch": 0.2762132579464318, "grad_norm": 2.3780716093566268, "learning_rate": 8.492844357065102e-06, "loss": 1.0351, "step": 7621 }, { "epoch": 0.27624950164908846, "grad_norm": 2.511391344859231, "learning_rate": 8.492424360637185e-06, "loss": 0.8982, "step": 7622 }, { "epoch": 0.27628574535174516, "grad_norm": 2.4848252577653365, "learning_rate": 8.492004316085776e-06, "loss": 1.0096, "step": 7623 }, { "epoch": 0.2763219890544018, "grad_norm": 2.315233649079562, "learning_rate": 8.491584223416656e-06, "loss": 1.0233, "step": 7624 }, { "epoch": 0.27635823275705845, "grad_norm": 2.2921064510436566, "learning_rate": 8.491164082635619e-06, "loss": 0.9837, "step": 7625 }, { "epoch": 0.2763944764597151, "grad_norm": 2.3592805917251223, "learning_rate": 8.49074389374845e-06, "loss": 0.8034, "step": 7626 }, { "epoch": 0.2764307201623718, "grad_norm": 2.287925252058575, "learning_rate": 8.490323656760943e-06, "loss": 0.9414, "step": 7627 }, { "epoch": 0.27646696386502845, "grad_norm": 2.259061003798657, "learning_rate": 8.489903371678887e-06, "loss": 0.956, "step": 7628 }, { "epoch": 0.2765032075676851, "grad_norm": 2.252400302666457, "learning_rate": 8.489483038508074e-06, "loss": 0.721, "step": 7629 }, { "epoch": 0.2765394512703418, "grad_norm": 2.486569476498875, "learning_rate": 8.489062657254293e-06, "loss": 0.9379, "step": 7630 }, { "epoch": 0.27657569497299844, "grad_norm": 2.249495931980385, "learning_rate": 8.488642227923339e-06, "loss": 1.1224, "step": 7631 }, { "epoch": 0.2766119386756551, "grad_norm": 2.3027284756160222, "learning_rate": 8.488221750521006e-06, "loss": 1.082, "step": 7632 }, { "epoch": 0.2766481823783118, "grad_norm": 2.4493144634375463, "learning_rate": 8.487801225053086e-06, "loss": 1.0042, "step": 7633 }, { "epoch": 0.27668442608096844, "grad_norm": 2.388134708052172, "learning_rate": 8.487380651525374e-06, "loss": 0.9403, "step": 7634 }, { "epoch": 0.2767206697836251, "grad_norm": 2.2565208483792203, "learning_rate": 8.486960029943668e-06, "loss": 0.6878, "step": 7635 }, { "epoch": 0.2767569134862818, "grad_norm": 2.6717483464808502, "learning_rate": 8.48653936031376e-06, "loss": 1.0849, "step": 7636 }, { "epoch": 0.27679315718893843, "grad_norm": 2.388518408639659, "learning_rate": 8.486118642641448e-06, "loss": 0.9204, "step": 7637 }, { "epoch": 0.2768294008915951, "grad_norm": 2.3230114995351796, "learning_rate": 8.48569787693253e-06, "loss": 1.0052, "step": 7638 }, { "epoch": 0.2768656445942517, "grad_norm": 2.4003362161751025, "learning_rate": 8.485277063192804e-06, "loss": 0.9247, "step": 7639 }, { "epoch": 0.2769018882969084, "grad_norm": 2.430194949138882, "learning_rate": 8.484856201428067e-06, "loss": 0.8354, "step": 7640 }, { "epoch": 0.2769381319995651, "grad_norm": 2.1197771255093807, "learning_rate": 8.48443529164412e-06, "loss": 0.7537, "step": 7641 }, { "epoch": 0.2769743757022217, "grad_norm": 2.6466792927101968, "learning_rate": 8.484014333846759e-06, "loss": 0.9846, "step": 7642 }, { "epoch": 0.2770106194048784, "grad_norm": 2.3022929037580684, "learning_rate": 8.48359332804179e-06, "loss": 0.8059, "step": 7643 }, { "epoch": 0.27704686310753507, "grad_norm": 2.6181961558756672, "learning_rate": 8.483172274235011e-06, "loss": 1.0856, "step": 7644 }, { "epoch": 0.2770831068101917, "grad_norm": 2.3183977127080686, "learning_rate": 8.482751172432227e-06, "loss": 0.835, "step": 7645 }, { "epoch": 0.2771193505128484, "grad_norm": 2.460015888488052, "learning_rate": 8.482330022639236e-06, "loss": 0.8207, "step": 7646 }, { "epoch": 0.27715559421550506, "grad_norm": 2.3214269984083016, "learning_rate": 8.481908824861844e-06, "loss": 1.1306, "step": 7647 }, { "epoch": 0.2771918379181617, "grad_norm": 2.217799078369354, "learning_rate": 8.481487579105855e-06, "loss": 0.8528, "step": 7648 }, { "epoch": 0.27722808162081836, "grad_norm": 2.8573862911911503, "learning_rate": 8.481066285377072e-06, "loss": 0.9649, "step": 7649 }, { "epoch": 0.27726432532347506, "grad_norm": 2.38836081304566, "learning_rate": 8.480644943681297e-06, "loss": 0.8923, "step": 7650 }, { "epoch": 0.2773005690261317, "grad_norm": 2.6010440488127973, "learning_rate": 8.480223554024346e-06, "loss": 0.9344, "step": 7651 }, { "epoch": 0.27733681272878835, "grad_norm": 2.285246227805694, "learning_rate": 8.479802116412016e-06, "loss": 1.0198, "step": 7652 }, { "epoch": 0.27737305643144505, "grad_norm": 2.117019174457255, "learning_rate": 8.479380630850116e-06, "loss": 0.9739, "step": 7653 }, { "epoch": 0.2774093001341017, "grad_norm": 2.297019869517345, "learning_rate": 8.478959097344458e-06, "loss": 0.7838, "step": 7654 }, { "epoch": 0.27744554383675835, "grad_norm": 2.340601345247731, "learning_rate": 8.478537515900846e-06, "loss": 1.0494, "step": 7655 }, { "epoch": 0.27748178753941505, "grad_norm": 2.3524626389291927, "learning_rate": 8.478115886525091e-06, "loss": 0.9885, "step": 7656 }, { "epoch": 0.2775180312420717, "grad_norm": 2.7516201077072573, "learning_rate": 8.477694209223003e-06, "loss": 1.1238, "step": 7657 }, { "epoch": 0.27755427494472834, "grad_norm": 2.4110031008399258, "learning_rate": 8.477272484000392e-06, "loss": 0.9274, "step": 7658 }, { "epoch": 0.27759051864738504, "grad_norm": 2.5535417263881732, "learning_rate": 8.47685071086307e-06, "loss": 1.0012, "step": 7659 }, { "epoch": 0.2776267623500417, "grad_norm": 2.4280725951767486, "learning_rate": 8.476428889816846e-06, "loss": 0.827, "step": 7660 }, { "epoch": 0.27766300605269834, "grad_norm": 2.401285553522141, "learning_rate": 8.476007020867537e-06, "loss": 1.033, "step": 7661 }, { "epoch": 0.277699249755355, "grad_norm": 2.591293521329533, "learning_rate": 8.47558510402095e-06, "loss": 0.9476, "step": 7662 }, { "epoch": 0.2777354934580117, "grad_norm": 15.308646609598648, "learning_rate": 8.475163139282903e-06, "loss": 1.4985, "step": 7663 }, { "epoch": 0.27777173716066833, "grad_norm": 2.285361491886524, "learning_rate": 8.474741126659211e-06, "loss": 1.0545, "step": 7664 }, { "epoch": 0.277807980863325, "grad_norm": 2.0708992047139443, "learning_rate": 8.474319066155687e-06, "loss": 0.9383, "step": 7665 }, { "epoch": 0.2778442245659817, "grad_norm": 3.097502009269306, "learning_rate": 8.473896957778149e-06, "loss": 1.1575, "step": 7666 }, { "epoch": 0.2778804682686383, "grad_norm": 2.424576145875928, "learning_rate": 8.473474801532409e-06, "loss": 0.9133, "step": 7667 }, { "epoch": 0.27791671197129497, "grad_norm": 2.4789054932676775, "learning_rate": 8.47305259742429e-06, "loss": 0.9455, "step": 7668 }, { "epoch": 0.2779529556739517, "grad_norm": 2.349856767744332, "learning_rate": 8.472630345459605e-06, "loss": 0.9098, "step": 7669 }, { "epoch": 0.2779891993766083, "grad_norm": 2.3397644868407053, "learning_rate": 8.472208045644175e-06, "loss": 0.8574, "step": 7670 }, { "epoch": 0.27802544307926497, "grad_norm": 2.270631255443014, "learning_rate": 8.471785697983817e-06, "loss": 0.7755, "step": 7671 }, { "epoch": 0.27806168678192167, "grad_norm": 2.415304155458177, "learning_rate": 8.471363302484353e-06, "loss": 0.7643, "step": 7672 }, { "epoch": 0.2780979304845783, "grad_norm": 2.3979112000119485, "learning_rate": 8.4709408591516e-06, "loss": 0.9476, "step": 7673 }, { "epoch": 0.27813417418723496, "grad_norm": 2.3516987943084544, "learning_rate": 8.470518367991382e-06, "loss": 1.0059, "step": 7674 }, { "epoch": 0.2781704178898916, "grad_norm": 2.580761517081481, "learning_rate": 8.470095829009521e-06, "loss": 0.8538, "step": 7675 }, { "epoch": 0.2782066615925483, "grad_norm": 1.920775083541858, "learning_rate": 8.469673242211838e-06, "loss": 0.8028, "step": 7676 }, { "epoch": 0.27824290529520496, "grad_norm": 2.139375367482613, "learning_rate": 8.469250607604153e-06, "loss": 0.9371, "step": 7677 }, { "epoch": 0.2782791489978616, "grad_norm": 2.5706843708195493, "learning_rate": 8.468827925192297e-06, "loss": 0.8788, "step": 7678 }, { "epoch": 0.2783153927005183, "grad_norm": 2.24848078087338, "learning_rate": 8.468405194982087e-06, "loss": 1.043, "step": 7679 }, { "epoch": 0.27835163640317495, "grad_norm": 2.7082833614205453, "learning_rate": 8.467982416979353e-06, "loss": 1.0426, "step": 7680 }, { "epoch": 0.2783878801058316, "grad_norm": 2.4440529491871525, "learning_rate": 8.467559591189918e-06, "loss": 1.0427, "step": 7681 }, { "epoch": 0.2784241238084883, "grad_norm": 2.3192258218632844, "learning_rate": 8.467136717619609e-06, "loss": 0.8797, "step": 7682 }, { "epoch": 0.27846036751114495, "grad_norm": 2.5765092132318643, "learning_rate": 8.466713796274253e-06, "loss": 1.0305, "step": 7683 }, { "epoch": 0.2784966112138016, "grad_norm": 2.3719036269810725, "learning_rate": 8.466290827159677e-06, "loss": 0.8475, "step": 7684 }, { "epoch": 0.27853285491645824, "grad_norm": 2.2122918215841163, "learning_rate": 8.46586781028171e-06, "loss": 0.9044, "step": 7685 }, { "epoch": 0.27856909861911494, "grad_norm": 2.2446629595933807, "learning_rate": 8.46544474564618e-06, "loss": 1.0, "step": 7686 }, { "epoch": 0.2786053423217716, "grad_norm": 2.1717813117928935, "learning_rate": 8.465021633258918e-06, "loss": 0.8158, "step": 7687 }, { "epoch": 0.27864158602442823, "grad_norm": 2.0438537479561765, "learning_rate": 8.464598473125753e-06, "loss": 1.1828, "step": 7688 }, { "epoch": 0.27867782972708494, "grad_norm": 2.244099918673965, "learning_rate": 8.464175265252517e-06, "loss": 0.8005, "step": 7689 }, { "epoch": 0.2787140734297416, "grad_norm": 2.396861947631684, "learning_rate": 8.463752009645041e-06, "loss": 0.8675, "step": 7690 }, { "epoch": 0.27875031713239823, "grad_norm": 2.3524638903171002, "learning_rate": 8.463328706309157e-06, "loss": 0.9324, "step": 7691 }, { "epoch": 0.27878656083505493, "grad_norm": 2.386610085377857, "learning_rate": 8.462905355250697e-06, "loss": 0.9542, "step": 7692 }, { "epoch": 0.2788228045377116, "grad_norm": 2.264686276476655, "learning_rate": 8.462481956475496e-06, "loss": 0.8926, "step": 7693 }, { "epoch": 0.2788590482403682, "grad_norm": 2.2332029333991077, "learning_rate": 8.46205850998939e-06, "loss": 0.938, "step": 7694 }, { "epoch": 0.2788952919430249, "grad_norm": 2.363748715476452, "learning_rate": 8.46163501579821e-06, "loss": 0.9702, "step": 7695 }, { "epoch": 0.27893153564568157, "grad_norm": 2.691040623198756, "learning_rate": 8.461211473907791e-06, "loss": 0.8987, "step": 7696 }, { "epoch": 0.2789677793483382, "grad_norm": 2.12229859110877, "learning_rate": 8.460787884323974e-06, "loss": 0.8626, "step": 7697 }, { "epoch": 0.27900402305099486, "grad_norm": 2.199953569657825, "learning_rate": 8.46036424705259e-06, "loss": 0.8571, "step": 7698 }, { "epoch": 0.27904026675365157, "grad_norm": 2.577350925517805, "learning_rate": 8.459940562099481e-06, "loss": 0.7925, "step": 7699 }, { "epoch": 0.2790765104563082, "grad_norm": 2.1488896844076137, "learning_rate": 8.459516829470484e-06, "loss": 0.8615, "step": 7700 }, { "epoch": 0.27911275415896486, "grad_norm": 2.29656390070607, "learning_rate": 8.459093049171436e-06, "loss": 0.9792, "step": 7701 }, { "epoch": 0.27914899786162156, "grad_norm": 2.377968616858445, "learning_rate": 8.45866922120818e-06, "loss": 0.9765, "step": 7702 }, { "epoch": 0.2791852415642782, "grad_norm": 2.2836026020576883, "learning_rate": 8.458245345586552e-06, "loss": 0.9371, "step": 7703 }, { "epoch": 0.27922148526693485, "grad_norm": 2.5073096435758244, "learning_rate": 8.457821422312396e-06, "loss": 0.9689, "step": 7704 }, { "epoch": 0.27925772896959156, "grad_norm": 2.353060146203271, "learning_rate": 8.45739745139155e-06, "loss": 0.8756, "step": 7705 }, { "epoch": 0.2792939726722482, "grad_norm": 2.423808998725028, "learning_rate": 8.45697343282986e-06, "loss": 0.9866, "step": 7706 }, { "epoch": 0.27933021637490485, "grad_norm": 2.6308964968658985, "learning_rate": 8.456549366633166e-06, "loss": 0.9148, "step": 7707 }, { "epoch": 0.27936646007756155, "grad_norm": 2.375983106477961, "learning_rate": 8.456125252807312e-06, "loss": 0.8458, "step": 7708 }, { "epoch": 0.2794027037802182, "grad_norm": 2.4116078776424845, "learning_rate": 8.455701091358143e-06, "loss": 1.0286, "step": 7709 }, { "epoch": 0.27943894748287484, "grad_norm": 2.5132211688005133, "learning_rate": 8.455276882291503e-06, "loss": 0.9308, "step": 7710 }, { "epoch": 0.2794751911855315, "grad_norm": 2.5259549720275665, "learning_rate": 8.454852625613235e-06, "loss": 1.0277, "step": 7711 }, { "epoch": 0.2795114348881882, "grad_norm": 2.2521329376750616, "learning_rate": 8.45442832132919e-06, "loss": 0.9422, "step": 7712 }, { "epoch": 0.27954767859084484, "grad_norm": 2.509274870141768, "learning_rate": 8.454003969445211e-06, "loss": 0.796, "step": 7713 }, { "epoch": 0.2795839222935015, "grad_norm": 2.246509019280807, "learning_rate": 8.453579569967144e-06, "loss": 0.8708, "step": 7714 }, { "epoch": 0.2796201659961582, "grad_norm": 2.15687795208901, "learning_rate": 8.453155122900843e-06, "loss": 0.8362, "step": 7715 }, { "epoch": 0.27965640969881483, "grad_norm": 2.421838377463177, "learning_rate": 8.45273062825215e-06, "loss": 0.8114, "step": 7716 }, { "epoch": 0.2796926534014715, "grad_norm": 2.1206952755923023, "learning_rate": 8.45230608602692e-06, "loss": 1.0783, "step": 7717 }, { "epoch": 0.2797288971041282, "grad_norm": 2.190213648351304, "learning_rate": 8.451881496230999e-06, "loss": 0.9088, "step": 7718 }, { "epoch": 0.27976514080678483, "grad_norm": 2.358285134954171, "learning_rate": 8.451456858870234e-06, "loss": 1.0431, "step": 7719 }, { "epoch": 0.2798013845094415, "grad_norm": 2.5057463388504284, "learning_rate": 8.451032173950486e-06, "loss": 1.1359, "step": 7720 }, { "epoch": 0.2798376282120981, "grad_norm": 2.404058429975274, "learning_rate": 8.450607441477601e-06, "loss": 0.8361, "step": 7721 }, { "epoch": 0.2798738719147548, "grad_norm": 2.2075217241110487, "learning_rate": 8.45018266145743e-06, "loss": 0.8342, "step": 7722 }, { "epoch": 0.27991011561741147, "grad_norm": 2.04978536897893, "learning_rate": 8.449757833895831e-06, "loss": 0.9195, "step": 7723 }, { "epoch": 0.2799463593200681, "grad_norm": 2.302750509812134, "learning_rate": 8.449332958798655e-06, "loss": 0.8454, "step": 7724 }, { "epoch": 0.2799826030227248, "grad_norm": 2.5968306606508498, "learning_rate": 8.448908036171758e-06, "loss": 0.9356, "step": 7725 }, { "epoch": 0.28001884672538147, "grad_norm": 2.580038390557376, "learning_rate": 8.448483066020991e-06, "loss": 1.0721, "step": 7726 }, { "epoch": 0.2800550904280381, "grad_norm": 2.530922559721636, "learning_rate": 8.448058048352215e-06, "loss": 0.9039, "step": 7727 }, { "epoch": 0.2800913341306948, "grad_norm": 2.4757879719882894, "learning_rate": 8.447632983171284e-06, "loss": 0.8244, "step": 7728 }, { "epoch": 0.28012757783335146, "grad_norm": 2.554361198016329, "learning_rate": 8.447207870484056e-06, "loss": 0.874, "step": 7729 }, { "epoch": 0.2801638215360081, "grad_norm": 2.5942773494628306, "learning_rate": 8.446782710296387e-06, "loss": 0.8212, "step": 7730 }, { "epoch": 0.2802000652386648, "grad_norm": 2.4454082765863077, "learning_rate": 8.446357502614138e-06, "loss": 1.1552, "step": 7731 }, { "epoch": 0.28023630894132145, "grad_norm": 2.281914037263944, "learning_rate": 8.445932247443166e-06, "loss": 0.9397, "step": 7732 }, { "epoch": 0.2802725526439781, "grad_norm": 2.3430157358125965, "learning_rate": 8.445506944789332e-06, "loss": 0.9796, "step": 7733 }, { "epoch": 0.28030879634663475, "grad_norm": 2.447628867008022, "learning_rate": 8.445081594658498e-06, "loss": 1.0276, "step": 7734 }, { "epoch": 0.28034504004929145, "grad_norm": 2.373018090312795, "learning_rate": 8.44465619705652e-06, "loss": 0.9582, "step": 7735 }, { "epoch": 0.2803812837519481, "grad_norm": 2.3057183735197504, "learning_rate": 8.444230751989264e-06, "loss": 0.9466, "step": 7736 }, { "epoch": 0.28041752745460474, "grad_norm": 2.316583295532418, "learning_rate": 8.443805259462593e-06, "loss": 0.9697, "step": 7737 }, { "epoch": 0.28045377115726144, "grad_norm": 2.1781173673808936, "learning_rate": 8.443379719482366e-06, "loss": 0.7885, "step": 7738 }, { "epoch": 0.2804900148599181, "grad_norm": 2.6339377849109082, "learning_rate": 8.44295413205445e-06, "loss": 0.9801, "step": 7739 }, { "epoch": 0.28052625856257474, "grad_norm": 2.396024881990824, "learning_rate": 8.442528497184707e-06, "loss": 1.057, "step": 7740 }, { "epoch": 0.28056250226523144, "grad_norm": 2.1320570539045955, "learning_rate": 8.442102814879004e-06, "loss": 0.7427, "step": 7741 }, { "epoch": 0.2805987459678881, "grad_norm": 2.229012404298787, "learning_rate": 8.441677085143208e-06, "loss": 0.9149, "step": 7742 }, { "epoch": 0.28063498967054473, "grad_norm": 2.3686496921374007, "learning_rate": 8.44125130798318e-06, "loss": 1.1676, "step": 7743 }, { "epoch": 0.2806712333732014, "grad_norm": 2.3474081807630305, "learning_rate": 8.440825483404792e-06, "loss": 0.9549, "step": 7744 }, { "epoch": 0.2807074770758581, "grad_norm": 2.2301580935228125, "learning_rate": 8.44039961141391e-06, "loss": 0.9076, "step": 7745 }, { "epoch": 0.2807437207785147, "grad_norm": 2.580153348626851, "learning_rate": 8.4399736920164e-06, "loss": 0.9592, "step": 7746 }, { "epoch": 0.2807799644811714, "grad_norm": 2.2429265521834965, "learning_rate": 8.439547725218135e-06, "loss": 0.9192, "step": 7747 }, { "epoch": 0.2808162081838281, "grad_norm": 2.3518841352392026, "learning_rate": 8.439121711024983e-06, "loss": 0.9438, "step": 7748 }, { "epoch": 0.2808524518864847, "grad_norm": 2.7969122523915426, "learning_rate": 8.438695649442813e-06, "loss": 0.9992, "step": 7749 }, { "epoch": 0.28088869558914137, "grad_norm": 2.389670986255168, "learning_rate": 8.438269540477496e-06, "loss": 0.9601, "step": 7750 }, { "epoch": 0.28092493929179807, "grad_norm": 2.5248811847746793, "learning_rate": 8.437843384134905e-06, "loss": 0.9126, "step": 7751 }, { "epoch": 0.2809611829944547, "grad_norm": 2.265281809891717, "learning_rate": 8.437417180420911e-06, "loss": 1.0243, "step": 7752 }, { "epoch": 0.28099742669711136, "grad_norm": 2.351714972334823, "learning_rate": 8.436990929341387e-06, "loss": 0.8859, "step": 7753 }, { "epoch": 0.28103367039976807, "grad_norm": 2.295169585972529, "learning_rate": 8.436564630902206e-06, "loss": 0.8204, "step": 7754 }, { "epoch": 0.2810699141024247, "grad_norm": 2.2511726022575713, "learning_rate": 8.436138285109243e-06, "loss": 0.8028, "step": 7755 }, { "epoch": 0.28110615780508136, "grad_norm": 2.2362069671073526, "learning_rate": 8.435711891968372e-06, "loss": 0.8554, "step": 7756 }, { "epoch": 0.281142401507738, "grad_norm": 2.4052022904866592, "learning_rate": 8.43528545148547e-06, "loss": 0.943, "step": 7757 }, { "epoch": 0.2811786452103947, "grad_norm": 2.610597838523264, "learning_rate": 8.434858963666412e-06, "loss": 0.9973, "step": 7758 }, { "epoch": 0.28121488891305135, "grad_norm": 2.3731760515266824, "learning_rate": 8.434432428517074e-06, "loss": 0.8482, "step": 7759 }, { "epoch": 0.281251132615708, "grad_norm": 2.3951070618597443, "learning_rate": 8.434005846043336e-06, "loss": 1.1043, "step": 7760 }, { "epoch": 0.2812873763183647, "grad_norm": 2.3687177324846584, "learning_rate": 8.433579216251073e-06, "loss": 0.962, "step": 7761 }, { "epoch": 0.28132362002102135, "grad_norm": 2.1411696235679494, "learning_rate": 8.433152539146166e-06, "loss": 1.0647, "step": 7762 }, { "epoch": 0.281359863723678, "grad_norm": 2.4479825285894066, "learning_rate": 8.432725814734491e-06, "loss": 0.8878, "step": 7763 }, { "epoch": 0.2813961074263347, "grad_norm": 2.3430462405610344, "learning_rate": 8.432299043021933e-06, "loss": 1.0103, "step": 7764 }, { "epoch": 0.28143235112899134, "grad_norm": 2.022406036009772, "learning_rate": 8.431872224014368e-06, "loss": 0.9603, "step": 7765 }, { "epoch": 0.281468594831648, "grad_norm": 2.827409991209498, "learning_rate": 8.43144535771768e-06, "loss": 1.2051, "step": 7766 }, { "epoch": 0.2815048385343047, "grad_norm": 2.4369721470330012, "learning_rate": 8.43101844413775e-06, "loss": 0.7735, "step": 7767 }, { "epoch": 0.28154108223696134, "grad_norm": 2.36812896495169, "learning_rate": 8.430591483280461e-06, "loss": 1.187, "step": 7768 }, { "epoch": 0.281577325939618, "grad_norm": 2.1585729053959444, "learning_rate": 8.430164475151696e-06, "loss": 0.9606, "step": 7769 }, { "epoch": 0.28161356964227463, "grad_norm": 2.4267179076032552, "learning_rate": 8.429737419757341e-06, "loss": 0.9337, "step": 7770 }, { "epoch": 0.28164981334493133, "grad_norm": 2.5953599794183395, "learning_rate": 8.429310317103277e-06, "loss": 1.0525, "step": 7771 }, { "epoch": 0.281686057047588, "grad_norm": 2.2373356322351357, "learning_rate": 8.42888316719539e-06, "loss": 0.9232, "step": 7772 }, { "epoch": 0.2817223007502446, "grad_norm": 2.1567721101631774, "learning_rate": 8.428455970039566e-06, "loss": 0.9007, "step": 7773 }, { "epoch": 0.2817585444529013, "grad_norm": 2.1907805844380963, "learning_rate": 8.428028725641694e-06, "loss": 0.9025, "step": 7774 }, { "epoch": 0.281794788155558, "grad_norm": 2.136944688566144, "learning_rate": 8.42760143400766e-06, "loss": 0.8174, "step": 7775 }, { "epoch": 0.2818310318582146, "grad_norm": 2.4463071430739793, "learning_rate": 8.427174095143349e-06, "loss": 1.0609, "step": 7776 }, { "epoch": 0.2818672755608713, "grad_norm": 2.4663812685117947, "learning_rate": 8.426746709054654e-06, "loss": 0.874, "step": 7777 }, { "epoch": 0.28190351926352797, "grad_norm": 2.0858437902293505, "learning_rate": 8.426319275747459e-06, "loss": 0.9639, "step": 7778 }, { "epoch": 0.2819397629661846, "grad_norm": 2.249232796069389, "learning_rate": 8.425891795227659e-06, "loss": 0.9644, "step": 7779 }, { "epoch": 0.28197600666884126, "grad_norm": 2.212920258906509, "learning_rate": 8.42546426750114e-06, "loss": 1.0176, "step": 7780 }, { "epoch": 0.28201225037149796, "grad_norm": 2.405715954676575, "learning_rate": 8.425036692573797e-06, "loss": 0.9677, "step": 7781 }, { "epoch": 0.2820484940741546, "grad_norm": 2.445475934842596, "learning_rate": 8.424609070451518e-06, "loss": 1.1734, "step": 7782 }, { "epoch": 0.28208473777681126, "grad_norm": 2.117770174735052, "learning_rate": 8.424181401140199e-06, "loss": 0.8444, "step": 7783 }, { "epoch": 0.28212098147946796, "grad_norm": 2.2532773520159832, "learning_rate": 8.42375368464573e-06, "loss": 0.8254, "step": 7784 }, { "epoch": 0.2821572251821246, "grad_norm": 2.071646245267364, "learning_rate": 8.423325920974005e-06, "loss": 0.8063, "step": 7785 }, { "epoch": 0.28219346888478125, "grad_norm": 2.52404505847151, "learning_rate": 8.422898110130922e-06, "loss": 1.0475, "step": 7786 }, { "epoch": 0.28222971258743795, "grad_norm": 2.3110756165203465, "learning_rate": 8.42247025212237e-06, "loss": 0.8296, "step": 7787 }, { "epoch": 0.2822659562900946, "grad_norm": 2.1905528860046357, "learning_rate": 8.42204234695425e-06, "loss": 0.9612, "step": 7788 }, { "epoch": 0.28230219999275125, "grad_norm": 2.433272031493928, "learning_rate": 8.421614394632454e-06, "loss": 1.0478, "step": 7789 }, { "epoch": 0.28233844369540795, "grad_norm": 2.352257875263388, "learning_rate": 8.421186395162883e-06, "loss": 0.8436, "step": 7790 }, { "epoch": 0.2823746873980646, "grad_norm": 2.5463939893931196, "learning_rate": 8.420758348551433e-06, "loss": 0.9313, "step": 7791 }, { "epoch": 0.28241093110072124, "grad_norm": 2.560993781509794, "learning_rate": 8.420330254803999e-06, "loss": 0.9045, "step": 7792 }, { "epoch": 0.2824471748033779, "grad_norm": 2.2256345315038137, "learning_rate": 8.419902113926483e-06, "loss": 0.8962, "step": 7793 }, { "epoch": 0.2824834185060346, "grad_norm": 2.2461396143735852, "learning_rate": 8.419473925924785e-06, "loss": 1.014, "step": 7794 }, { "epoch": 0.28251966220869124, "grad_norm": 2.1111944625009174, "learning_rate": 8.419045690804806e-06, "loss": 0.6984, "step": 7795 }, { "epoch": 0.2825559059113479, "grad_norm": 2.2571285678565385, "learning_rate": 8.418617408572443e-06, "loss": 0.8213, "step": 7796 }, { "epoch": 0.2825921496140046, "grad_norm": 2.518821276269404, "learning_rate": 8.418189079233602e-06, "loss": 0.9136, "step": 7797 }, { "epoch": 0.28262839331666123, "grad_norm": 2.150431600831645, "learning_rate": 8.41776070279418e-06, "loss": 1.1424, "step": 7798 }, { "epoch": 0.2826646370193179, "grad_norm": 2.091365433154149, "learning_rate": 8.417332279260084e-06, "loss": 0.7171, "step": 7799 }, { "epoch": 0.2827008807219746, "grad_norm": 2.3197655207529646, "learning_rate": 8.416903808637215e-06, "loss": 0.859, "step": 7800 }, { "epoch": 0.2827371244246312, "grad_norm": 2.289885187205024, "learning_rate": 8.416475290931478e-06, "loss": 1.088, "step": 7801 }, { "epoch": 0.28277336812728787, "grad_norm": 2.350257587586794, "learning_rate": 8.416046726148779e-06, "loss": 0.9513, "step": 7802 }, { "epoch": 0.2828096118299446, "grad_norm": 2.5411878879662635, "learning_rate": 8.415618114295021e-06, "loss": 0.9235, "step": 7803 }, { "epoch": 0.2828458555326012, "grad_norm": 2.659815726501951, "learning_rate": 8.415189455376112e-06, "loss": 1.0725, "step": 7804 }, { "epoch": 0.28288209923525787, "grad_norm": 2.3291424180174833, "learning_rate": 8.414760749397957e-06, "loss": 0.8506, "step": 7805 }, { "epoch": 0.2829183429379145, "grad_norm": 2.0725360137342177, "learning_rate": 8.414331996366463e-06, "loss": 0.8365, "step": 7806 }, { "epoch": 0.2829545866405712, "grad_norm": 1.9962164670692493, "learning_rate": 8.41390319628754e-06, "loss": 1.0113, "step": 7807 }, { "epoch": 0.28299083034322786, "grad_norm": 2.229950183345715, "learning_rate": 8.413474349167097e-06, "loss": 0.7478, "step": 7808 }, { "epoch": 0.2830270740458845, "grad_norm": 2.3762786533988183, "learning_rate": 8.41304545501104e-06, "loss": 0.8612, "step": 7809 }, { "epoch": 0.2830633177485412, "grad_norm": 2.395902514884097, "learning_rate": 8.412616513825282e-06, "loss": 0.6867, "step": 7810 }, { "epoch": 0.28309956145119786, "grad_norm": 2.1458920520327656, "learning_rate": 8.412187525615731e-06, "loss": 1.1094, "step": 7811 }, { "epoch": 0.2831358051538545, "grad_norm": 2.61278236903975, "learning_rate": 8.411758490388302e-06, "loss": 0.8771, "step": 7812 }, { "epoch": 0.2831720488565112, "grad_norm": 2.50212173720545, "learning_rate": 8.411329408148902e-06, "loss": 1.0035, "step": 7813 }, { "epoch": 0.28320829255916785, "grad_norm": 2.99592182449956, "learning_rate": 8.410900278903446e-06, "loss": 0.8678, "step": 7814 }, { "epoch": 0.2832445362618245, "grad_norm": 2.319457411470696, "learning_rate": 8.410471102657848e-06, "loss": 0.7469, "step": 7815 }, { "epoch": 0.28328077996448114, "grad_norm": 2.344143483482925, "learning_rate": 8.41004187941802e-06, "loss": 0.8233, "step": 7816 }, { "epoch": 0.28331702366713785, "grad_norm": 2.1022313056155486, "learning_rate": 8.409612609189878e-06, "loss": 0.8532, "step": 7817 }, { "epoch": 0.2833532673697945, "grad_norm": 2.527360656306159, "learning_rate": 8.409183291979336e-06, "loss": 0.9473, "step": 7818 }, { "epoch": 0.28338951107245114, "grad_norm": 2.2007746049858574, "learning_rate": 8.408753927792309e-06, "loss": 1.0259, "step": 7819 }, { "epoch": 0.28342575477510784, "grad_norm": 2.021852529953071, "learning_rate": 8.408324516634717e-06, "loss": 0.9261, "step": 7820 }, { "epoch": 0.2834619984777645, "grad_norm": 2.2745701510868055, "learning_rate": 8.407895058512471e-06, "loss": 1.0397, "step": 7821 }, { "epoch": 0.28349824218042113, "grad_norm": 2.572435406630044, "learning_rate": 8.407465553431495e-06, "loss": 1.1223, "step": 7822 }, { "epoch": 0.28353448588307784, "grad_norm": 2.4081115338654553, "learning_rate": 8.407036001397702e-06, "loss": 0.9231, "step": 7823 }, { "epoch": 0.2835707295857345, "grad_norm": 2.596834486637858, "learning_rate": 8.406606402417017e-06, "loss": 0.9305, "step": 7824 }, { "epoch": 0.28360697328839113, "grad_norm": 2.43119848275553, "learning_rate": 8.406176756495353e-06, "loss": 0.9781, "step": 7825 }, { "epoch": 0.28364321699104783, "grad_norm": 2.635705649000894, "learning_rate": 8.405747063638634e-06, "loss": 1.025, "step": 7826 }, { "epoch": 0.2836794606937045, "grad_norm": 2.2856860316823995, "learning_rate": 8.40531732385278e-06, "loss": 1.0912, "step": 7827 }, { "epoch": 0.2837157043963611, "grad_norm": 2.262130294824211, "learning_rate": 8.404887537143712e-06, "loss": 0.9307, "step": 7828 }, { "epoch": 0.28375194809901777, "grad_norm": 2.221159996268316, "learning_rate": 8.404457703517354e-06, "loss": 1.0129, "step": 7829 }, { "epoch": 0.28378819180167447, "grad_norm": 2.153862171890143, "learning_rate": 8.404027822979627e-06, "loss": 0.8916, "step": 7830 }, { "epoch": 0.2838244355043311, "grad_norm": 2.1893147095982357, "learning_rate": 8.403597895536456e-06, "loss": 0.9725, "step": 7831 }, { "epoch": 0.28386067920698776, "grad_norm": 2.7092272034787674, "learning_rate": 8.403167921193766e-06, "loss": 0.8797, "step": 7832 }, { "epoch": 0.28389692290964447, "grad_norm": 2.224637745589684, "learning_rate": 8.402737899957478e-06, "loss": 1.0348, "step": 7833 }, { "epoch": 0.2839331666123011, "grad_norm": 2.3053317547098358, "learning_rate": 8.40230783183352e-06, "loss": 0.8734, "step": 7834 }, { "epoch": 0.28396941031495776, "grad_norm": 1.985921766242236, "learning_rate": 8.401877716827819e-06, "loss": 0.8443, "step": 7835 }, { "epoch": 0.28400565401761446, "grad_norm": 2.672879947586908, "learning_rate": 8.4014475549463e-06, "loss": 0.8841, "step": 7836 }, { "epoch": 0.2840418977202711, "grad_norm": 2.25459103126158, "learning_rate": 8.40101734619489e-06, "loss": 1.0033, "step": 7837 }, { "epoch": 0.28407814142292775, "grad_norm": 2.376409464041315, "learning_rate": 8.400587090579518e-06, "loss": 1.01, "step": 7838 }, { "epoch": 0.28411438512558446, "grad_norm": 2.106042548709626, "learning_rate": 8.400156788106113e-06, "loss": 0.7763, "step": 7839 }, { "epoch": 0.2841506288282411, "grad_norm": 2.2332029895366077, "learning_rate": 8.399726438780603e-06, "loss": 0.8681, "step": 7840 }, { "epoch": 0.28418687253089775, "grad_norm": 2.686147420005232, "learning_rate": 8.399296042608918e-06, "loss": 0.8896, "step": 7841 }, { "epoch": 0.2842231162335544, "grad_norm": 2.1562876744293593, "learning_rate": 8.398865599596993e-06, "loss": 1.0266, "step": 7842 }, { "epoch": 0.2842593599362111, "grad_norm": 2.3126669958647716, "learning_rate": 8.398435109750752e-06, "loss": 0.9378, "step": 7843 }, { "epoch": 0.28429560363886774, "grad_norm": 2.376373361765691, "learning_rate": 8.398004573076133e-06, "loss": 0.9928, "step": 7844 }, { "epoch": 0.2843318473415244, "grad_norm": 2.5443643467894663, "learning_rate": 8.397573989579063e-06, "loss": 0.9271, "step": 7845 }, { "epoch": 0.2843680910441811, "grad_norm": 2.4052881655419136, "learning_rate": 8.397143359265481e-06, "loss": 0.9478, "step": 7846 }, { "epoch": 0.28440433474683774, "grad_norm": 2.3177816806149054, "learning_rate": 8.396712682141318e-06, "loss": 1.0541, "step": 7847 }, { "epoch": 0.2844405784494944, "grad_norm": 2.2134531386947, "learning_rate": 8.396281958212507e-06, "loss": 0.9271, "step": 7848 }, { "epoch": 0.2844768221521511, "grad_norm": 2.222424332121562, "learning_rate": 8.395851187484984e-06, "loss": 0.7297, "step": 7849 }, { "epoch": 0.28451306585480773, "grad_norm": 2.5486276416934577, "learning_rate": 8.395420369964686e-06, "loss": 1.2012, "step": 7850 }, { "epoch": 0.2845493095574644, "grad_norm": 2.8137315479283953, "learning_rate": 8.39498950565755e-06, "loss": 1.001, "step": 7851 }, { "epoch": 0.284585553260121, "grad_norm": 2.3850770415959026, "learning_rate": 8.394558594569511e-06, "loss": 0.935, "step": 7852 }, { "epoch": 0.28462179696277773, "grad_norm": 2.6408514433818797, "learning_rate": 8.394127636706508e-06, "loss": 0.9507, "step": 7853 }, { "epoch": 0.2846580406654344, "grad_norm": 2.491122551423258, "learning_rate": 8.393696632074479e-06, "loss": 1.0288, "step": 7854 }, { "epoch": 0.284694284368091, "grad_norm": 2.2495506334273396, "learning_rate": 8.393265580679362e-06, "loss": 0.9584, "step": 7855 }, { "epoch": 0.2847305280707477, "grad_norm": 2.3144778530753687, "learning_rate": 8.392834482527097e-06, "loss": 0.9324, "step": 7856 }, { "epoch": 0.28476677177340437, "grad_norm": 2.4409116869525325, "learning_rate": 8.392403337623626e-06, "loss": 0.9232, "step": 7857 }, { "epoch": 0.284803015476061, "grad_norm": 2.3921893167702577, "learning_rate": 8.391972145974889e-06, "loss": 1.0594, "step": 7858 }, { "epoch": 0.2848392591787177, "grad_norm": 2.119005374118649, "learning_rate": 8.391540907586826e-06, "loss": 0.947, "step": 7859 }, { "epoch": 0.28487550288137437, "grad_norm": 2.2631380213712573, "learning_rate": 8.391109622465381e-06, "loss": 0.8168, "step": 7860 }, { "epoch": 0.284911746584031, "grad_norm": 2.3571701176378115, "learning_rate": 8.390678290616497e-06, "loss": 0.8843, "step": 7861 }, { "epoch": 0.2849479902866877, "grad_norm": 2.5020881898816345, "learning_rate": 8.390246912046115e-06, "loss": 0.9467, "step": 7862 }, { "epoch": 0.28498423398934436, "grad_norm": 2.4496875808882916, "learning_rate": 8.389815486760182e-06, "loss": 0.9492, "step": 7863 }, { "epoch": 0.285020477692001, "grad_norm": 2.416944728931777, "learning_rate": 8.38938401476464e-06, "loss": 0.9129, "step": 7864 }, { "epoch": 0.28505672139465765, "grad_norm": 2.1745741176930387, "learning_rate": 8.38895249606544e-06, "loss": 0.945, "step": 7865 }, { "epoch": 0.28509296509731435, "grad_norm": 2.420019545909356, "learning_rate": 8.38852093066852e-06, "loss": 0.85, "step": 7866 }, { "epoch": 0.285129208799971, "grad_norm": 2.49260792426434, "learning_rate": 8.388089318579834e-06, "loss": 1.0625, "step": 7867 }, { "epoch": 0.28516545250262765, "grad_norm": 2.436640239149667, "learning_rate": 8.387657659805325e-06, "loss": 0.9769, "step": 7868 }, { "epoch": 0.28520169620528435, "grad_norm": 2.6013321938616283, "learning_rate": 8.38722595435094e-06, "loss": 1.0913, "step": 7869 }, { "epoch": 0.285237939907941, "grad_norm": 2.362629724899264, "learning_rate": 8.386794202222634e-06, "loss": 0.9768, "step": 7870 }, { "epoch": 0.28527418361059764, "grad_norm": 2.350894647234409, "learning_rate": 8.386362403426349e-06, "loss": 0.9174, "step": 7871 }, { "epoch": 0.28531042731325434, "grad_norm": 2.2923836117150533, "learning_rate": 8.38593055796804e-06, "loss": 0.8025, "step": 7872 }, { "epoch": 0.285346671015911, "grad_norm": 1.981962132344794, "learning_rate": 8.385498665853656e-06, "loss": 0.8164, "step": 7873 }, { "epoch": 0.28538291471856764, "grad_norm": 2.354501943901776, "learning_rate": 8.385066727089146e-06, "loss": 0.9674, "step": 7874 }, { "epoch": 0.28541915842122434, "grad_norm": 2.298822490139961, "learning_rate": 8.384634741680464e-06, "loss": 1.1224, "step": 7875 }, { "epoch": 0.285455402123881, "grad_norm": 2.1493608697692443, "learning_rate": 8.384202709633565e-06, "loss": 0.9388, "step": 7876 }, { "epoch": 0.28549164582653763, "grad_norm": 2.4643801358081396, "learning_rate": 8.383770630954396e-06, "loss": 0.8374, "step": 7877 }, { "epoch": 0.2855278895291943, "grad_norm": 2.2873046343595296, "learning_rate": 8.383338505648915e-06, "loss": 0.9012, "step": 7878 }, { "epoch": 0.285564133231851, "grad_norm": 2.384221832090661, "learning_rate": 8.382906333723075e-06, "loss": 0.8436, "step": 7879 }, { "epoch": 0.2856003769345076, "grad_norm": 2.7472246104243774, "learning_rate": 8.382474115182835e-06, "loss": 1.0889, "step": 7880 }, { "epoch": 0.2856366206371643, "grad_norm": 2.537365809800689, "learning_rate": 8.382041850034143e-06, "loss": 1.0883, "step": 7881 }, { "epoch": 0.285672864339821, "grad_norm": 2.39860176715985, "learning_rate": 8.381609538282962e-06, "loss": 0.7992, "step": 7882 }, { "epoch": 0.2857091080424776, "grad_norm": 2.387857379682014, "learning_rate": 8.381177179935247e-06, "loss": 0.8849, "step": 7883 }, { "epoch": 0.28574535174513427, "grad_norm": 2.614087819800827, "learning_rate": 8.380744774996954e-06, "loss": 1.045, "step": 7884 }, { "epoch": 0.28578159544779097, "grad_norm": 2.3653363712347564, "learning_rate": 8.380312323474042e-06, "loss": 0.9381, "step": 7885 }, { "epoch": 0.2858178391504476, "grad_norm": 2.271822138817276, "learning_rate": 8.379879825372473e-06, "loss": 1.0284, "step": 7886 }, { "epoch": 0.28585408285310426, "grad_norm": 2.384609566705436, "learning_rate": 8.379447280698203e-06, "loss": 0.9358, "step": 7887 }, { "epoch": 0.2858903265557609, "grad_norm": 2.3614595092954946, "learning_rate": 8.379014689457193e-06, "loss": 0.7872, "step": 7888 }, { "epoch": 0.2859265702584176, "grad_norm": 2.203568093101592, "learning_rate": 8.378582051655404e-06, "loss": 0.9873, "step": 7889 }, { "epoch": 0.28596281396107426, "grad_norm": 2.4533035261386646, "learning_rate": 8.378149367298798e-06, "loss": 1.0438, "step": 7890 }, { "epoch": 0.2859990576637309, "grad_norm": 2.230859931231623, "learning_rate": 8.377716636393338e-06, "loss": 0.8644, "step": 7891 }, { "epoch": 0.2860353013663876, "grad_norm": 2.4519717511236885, "learning_rate": 8.377283858944984e-06, "loss": 1.0014, "step": 7892 }, { "epoch": 0.28607154506904425, "grad_norm": 2.4231781509915993, "learning_rate": 8.376851034959701e-06, "loss": 0.8371, "step": 7893 }, { "epoch": 0.2861077887717009, "grad_norm": 2.479350610771957, "learning_rate": 8.376418164443453e-06, "loss": 0.9005, "step": 7894 }, { "epoch": 0.2861440324743576, "grad_norm": 2.4016401451184337, "learning_rate": 8.375985247402205e-06, "loss": 1.018, "step": 7895 }, { "epoch": 0.28618027617701425, "grad_norm": 2.3775838112784435, "learning_rate": 8.375552283841922e-06, "loss": 0.9162, "step": 7896 }, { "epoch": 0.2862165198796709, "grad_norm": 2.34062430214545, "learning_rate": 8.375119273768568e-06, "loss": 0.959, "step": 7897 }, { "epoch": 0.2862527635823276, "grad_norm": 2.2222322278498656, "learning_rate": 8.374686217188114e-06, "loss": 0.8374, "step": 7898 }, { "epoch": 0.28628900728498424, "grad_norm": 2.4643207569399004, "learning_rate": 8.374253114106526e-06, "loss": 0.9489, "step": 7899 }, { "epoch": 0.2863252509876409, "grad_norm": 2.255254788944932, "learning_rate": 8.373819964529768e-06, "loss": 1.0773, "step": 7900 }, { "epoch": 0.28636149469029754, "grad_norm": 2.5060044597228885, "learning_rate": 8.373386768463813e-06, "loss": 0.9829, "step": 7901 }, { "epoch": 0.28639773839295424, "grad_norm": 2.121740682723417, "learning_rate": 8.372953525914628e-06, "loss": 0.9295, "step": 7902 }, { "epoch": 0.2864339820956109, "grad_norm": 2.308809685433505, "learning_rate": 8.372520236888183e-06, "loss": 0.9187, "step": 7903 }, { "epoch": 0.28647022579826753, "grad_norm": 2.161833979586702, "learning_rate": 8.372086901390449e-06, "loss": 0.8789, "step": 7904 }, { "epoch": 0.28650646950092423, "grad_norm": 2.277933606835913, "learning_rate": 8.371653519427396e-06, "loss": 0.985, "step": 7905 }, { "epoch": 0.2865427132035809, "grad_norm": 2.375874681240227, "learning_rate": 8.371220091004996e-06, "loss": 0.9632, "step": 7906 }, { "epoch": 0.2865789569062375, "grad_norm": 2.340442898206705, "learning_rate": 8.370786616129224e-06, "loss": 0.9784, "step": 7907 }, { "epoch": 0.2866152006088942, "grad_norm": 2.244996668873852, "learning_rate": 8.370353094806052e-06, "loss": 0.9511, "step": 7908 }, { "epoch": 0.2866514443115509, "grad_norm": 2.4560346200099223, "learning_rate": 8.36991952704145e-06, "loss": 0.9947, "step": 7909 }, { "epoch": 0.2866876880142075, "grad_norm": 2.266770510540413, "learning_rate": 8.369485912841396e-06, "loss": 1.0205, "step": 7910 }, { "epoch": 0.2867239317168642, "grad_norm": 2.377145401260787, "learning_rate": 8.369052252211864e-06, "loss": 1.0722, "step": 7911 }, { "epoch": 0.28676017541952087, "grad_norm": 2.216827006609652, "learning_rate": 8.36861854515883e-06, "loss": 0.8983, "step": 7912 }, { "epoch": 0.2867964191221775, "grad_norm": 2.1031017557992993, "learning_rate": 8.36818479168827e-06, "loss": 1.0115, "step": 7913 }, { "epoch": 0.28683266282483416, "grad_norm": 2.209706822575166, "learning_rate": 8.367750991806158e-06, "loss": 1.0189, "step": 7914 }, { "epoch": 0.28686890652749086, "grad_norm": 2.6191682287540217, "learning_rate": 8.367317145518477e-06, "loss": 1.077, "step": 7915 }, { "epoch": 0.2869051502301475, "grad_norm": 2.41693411612145, "learning_rate": 8.366883252831201e-06, "loss": 0.8972, "step": 7916 }, { "epoch": 0.28694139393280416, "grad_norm": 2.3639137853194074, "learning_rate": 8.366449313750312e-06, "loss": 1.0074, "step": 7917 }, { "epoch": 0.28697763763546086, "grad_norm": 2.498714988798043, "learning_rate": 8.366015328281785e-06, "loss": 0.8329, "step": 7918 }, { "epoch": 0.2870138813381175, "grad_norm": 2.310819823725442, "learning_rate": 8.365581296431602e-06, "loss": 0.8615, "step": 7919 }, { "epoch": 0.28705012504077415, "grad_norm": 2.208295769071825, "learning_rate": 8.365147218205744e-06, "loss": 0.8569, "step": 7920 }, { "epoch": 0.28708636874343085, "grad_norm": 2.137131663663605, "learning_rate": 8.364713093610195e-06, "loss": 0.8088, "step": 7921 }, { "epoch": 0.2871226124460875, "grad_norm": 1.995826535645569, "learning_rate": 8.364278922650933e-06, "loss": 0.9445, "step": 7922 }, { "epoch": 0.28715885614874415, "grad_norm": 2.448244614941572, "learning_rate": 8.363844705333944e-06, "loss": 1.0487, "step": 7923 }, { "epoch": 0.2871950998514008, "grad_norm": 2.461077917329584, "learning_rate": 8.363410441665206e-06, "loss": 0.9457, "step": 7924 }, { "epoch": 0.2872313435540575, "grad_norm": 2.590569551970252, "learning_rate": 8.362976131650708e-06, "loss": 1.0541, "step": 7925 }, { "epoch": 0.28726758725671414, "grad_norm": 2.3286390195139357, "learning_rate": 8.362541775296433e-06, "loss": 0.7977, "step": 7926 }, { "epoch": 0.2873038309593708, "grad_norm": 2.1351380276896696, "learning_rate": 8.362107372608365e-06, "loss": 0.8469, "step": 7927 }, { "epoch": 0.2873400746620275, "grad_norm": 2.199518864749833, "learning_rate": 8.361672923592493e-06, "loss": 0.9513, "step": 7928 }, { "epoch": 0.28737631836468414, "grad_norm": 2.2507431357895684, "learning_rate": 8.361238428254799e-06, "loss": 1.042, "step": 7929 }, { "epoch": 0.2874125620673408, "grad_norm": 2.28141114821897, "learning_rate": 8.360803886601273e-06, "loss": 1.0835, "step": 7930 }, { "epoch": 0.2874488057699975, "grad_norm": 2.518042830539748, "learning_rate": 8.360369298637902e-06, "loss": 0.938, "step": 7931 }, { "epoch": 0.28748504947265413, "grad_norm": 2.4597016362254593, "learning_rate": 8.359934664370672e-06, "loss": 0.8913, "step": 7932 }, { "epoch": 0.2875212931753108, "grad_norm": 2.200972865344652, "learning_rate": 8.359499983805577e-06, "loss": 0.8698, "step": 7933 }, { "epoch": 0.2875575368779675, "grad_norm": 2.226176821578166, "learning_rate": 8.359065256948604e-06, "loss": 1.0033, "step": 7934 }, { "epoch": 0.2875937805806241, "grad_norm": 2.5639744012986583, "learning_rate": 8.358630483805743e-06, "loss": 1.1327, "step": 7935 }, { "epoch": 0.28763002428328077, "grad_norm": 2.1781052029173513, "learning_rate": 8.358195664382984e-06, "loss": 0.8207, "step": 7936 }, { "epoch": 0.2876662679859374, "grad_norm": 2.1719488672286507, "learning_rate": 8.357760798686321e-06, "loss": 0.9704, "step": 7937 }, { "epoch": 0.2877025116885941, "grad_norm": 2.296571236454387, "learning_rate": 8.357325886721745e-06, "loss": 0.866, "step": 7938 }, { "epoch": 0.28773875539125077, "grad_norm": 2.433736262570585, "learning_rate": 8.356890928495249e-06, "loss": 0.7995, "step": 7939 }, { "epoch": 0.2877749990939074, "grad_norm": 2.3346718622987277, "learning_rate": 8.356455924012825e-06, "loss": 0.9066, "step": 7940 }, { "epoch": 0.2878112427965641, "grad_norm": 2.0369882093032223, "learning_rate": 8.356020873280468e-06, "loss": 0.9496, "step": 7941 }, { "epoch": 0.28784748649922076, "grad_norm": 2.215271518712355, "learning_rate": 8.355585776304174e-06, "loss": 0.7319, "step": 7942 }, { "epoch": 0.2878837302018774, "grad_norm": 2.3413607792704436, "learning_rate": 8.355150633089937e-06, "loss": 0.8389, "step": 7943 }, { "epoch": 0.2879199739045341, "grad_norm": 2.55138331393507, "learning_rate": 8.354715443643754e-06, "loss": 0.9536, "step": 7944 }, { "epoch": 0.28795621760719076, "grad_norm": 2.555699351825635, "learning_rate": 8.354280207971621e-06, "loss": 1.0339, "step": 7945 }, { "epoch": 0.2879924613098474, "grad_norm": 2.36858799143265, "learning_rate": 8.353844926079536e-06, "loss": 0.8671, "step": 7946 }, { "epoch": 0.2880287050125041, "grad_norm": 2.3528666385239965, "learning_rate": 8.353409597973494e-06, "loss": 0.9362, "step": 7947 }, { "epoch": 0.28806494871516075, "grad_norm": 2.2820325860864994, "learning_rate": 8.352974223659495e-06, "loss": 0.8693, "step": 7948 }, { "epoch": 0.2881011924178174, "grad_norm": 2.5380131567729083, "learning_rate": 8.352538803143543e-06, "loss": 1.1204, "step": 7949 }, { "epoch": 0.28813743612047404, "grad_norm": 2.5063759543357054, "learning_rate": 8.352103336431631e-06, "loss": 1.0278, "step": 7950 }, { "epoch": 0.28817367982313075, "grad_norm": 2.394030155648297, "learning_rate": 8.351667823529765e-06, "loss": 1.0742, "step": 7951 }, { "epoch": 0.2882099235257874, "grad_norm": 2.246057886414975, "learning_rate": 8.35123226444394e-06, "loss": 0.8565, "step": 7952 }, { "epoch": 0.28824616722844404, "grad_norm": 2.29820198690489, "learning_rate": 8.350796659180163e-06, "loss": 0.9038, "step": 7953 }, { "epoch": 0.28828241093110074, "grad_norm": 2.3448940968741794, "learning_rate": 8.350361007744435e-06, "loss": 0.8698, "step": 7954 }, { "epoch": 0.2883186546337574, "grad_norm": 2.500632604493759, "learning_rate": 8.349925310142756e-06, "loss": 1.0036, "step": 7955 }, { "epoch": 0.28835489833641403, "grad_norm": 2.3772552335513653, "learning_rate": 8.349489566381135e-06, "loss": 1.0447, "step": 7956 }, { "epoch": 0.28839114203907074, "grad_norm": 2.4947553977637518, "learning_rate": 8.349053776465573e-06, "loss": 0.9955, "step": 7957 }, { "epoch": 0.2884273857417274, "grad_norm": 2.133520535927297, "learning_rate": 8.348617940402075e-06, "loss": 0.8608, "step": 7958 }, { "epoch": 0.28846362944438403, "grad_norm": 2.8181880252614655, "learning_rate": 8.348182058196647e-06, "loss": 0.9427, "step": 7959 }, { "epoch": 0.2884998731470407, "grad_norm": 2.6529674256631166, "learning_rate": 8.347746129855295e-06, "loss": 0.926, "step": 7960 }, { "epoch": 0.2885361168496974, "grad_norm": 2.0589642056009154, "learning_rate": 8.347310155384025e-06, "loss": 0.7698, "step": 7961 }, { "epoch": 0.288572360552354, "grad_norm": 2.465496817610451, "learning_rate": 8.346874134788848e-06, "loss": 1.0412, "step": 7962 }, { "epoch": 0.28860860425501067, "grad_norm": 2.5095710584205513, "learning_rate": 8.346438068075768e-06, "loss": 0.9858, "step": 7963 }, { "epoch": 0.28864484795766737, "grad_norm": 2.27567387587167, "learning_rate": 8.346001955250795e-06, "loss": 0.9006, "step": 7964 }, { "epoch": 0.288681091660324, "grad_norm": 2.236554543279107, "learning_rate": 8.345565796319939e-06, "loss": 0.9128, "step": 7965 }, { "epoch": 0.28871733536298066, "grad_norm": 2.5607529515656138, "learning_rate": 8.34512959128921e-06, "loss": 1.019, "step": 7966 }, { "epoch": 0.28875357906563737, "grad_norm": 2.4511537643198125, "learning_rate": 8.344693340164617e-06, "loss": 0.8571, "step": 7967 }, { "epoch": 0.288789822768294, "grad_norm": 2.0722907466675253, "learning_rate": 8.344257042952173e-06, "loss": 1.0543, "step": 7968 }, { "epoch": 0.28882606647095066, "grad_norm": 2.433517285861365, "learning_rate": 8.34382069965789e-06, "loss": 0.9659, "step": 7969 }, { "epoch": 0.28886231017360736, "grad_norm": 2.553760775876138, "learning_rate": 8.34338431028778e-06, "loss": 0.8593, "step": 7970 }, { "epoch": 0.288898553876264, "grad_norm": 2.5399643144711233, "learning_rate": 8.342947874847857e-06, "loss": 1.0086, "step": 7971 }, { "epoch": 0.28893479757892065, "grad_norm": 2.5202040222692283, "learning_rate": 8.342511393344132e-06, "loss": 1.0306, "step": 7972 }, { "epoch": 0.2889710412815773, "grad_norm": 2.3339663181882413, "learning_rate": 8.342074865782623e-06, "loss": 0.9371, "step": 7973 }, { "epoch": 0.289007284984234, "grad_norm": 2.116233174965674, "learning_rate": 8.341638292169343e-06, "loss": 0.8608, "step": 7974 }, { "epoch": 0.28904352868689065, "grad_norm": 2.595201308845141, "learning_rate": 8.341201672510311e-06, "loss": 0.7211, "step": 7975 }, { "epoch": 0.2890797723895473, "grad_norm": 2.2266578213289403, "learning_rate": 8.340765006811538e-06, "loss": 0.8194, "step": 7976 }, { "epoch": 0.289116016092204, "grad_norm": 2.3439725604493438, "learning_rate": 8.340328295079044e-06, "loss": 0.9955, "step": 7977 }, { "epoch": 0.28915225979486064, "grad_norm": 2.325966053731759, "learning_rate": 8.339891537318845e-06, "loss": 0.9806, "step": 7978 }, { "epoch": 0.2891885034975173, "grad_norm": 3.1374312532257864, "learning_rate": 8.339454733536963e-06, "loss": 0.8679, "step": 7979 }, { "epoch": 0.289224747200174, "grad_norm": 2.1452706791620595, "learning_rate": 8.339017883739412e-06, "loss": 1.077, "step": 7980 }, { "epoch": 0.28926099090283064, "grad_norm": 2.5198431762648807, "learning_rate": 8.338580987932215e-06, "loss": 0.8956, "step": 7981 }, { "epoch": 0.2892972346054873, "grad_norm": 2.5162995251707514, "learning_rate": 8.33814404612139e-06, "loss": 1.0424, "step": 7982 }, { "epoch": 0.289333478308144, "grad_norm": 2.343202164622009, "learning_rate": 8.33770705831296e-06, "loss": 0.8293, "step": 7983 }, { "epoch": 0.28936972201080063, "grad_norm": 2.6563293431295874, "learning_rate": 8.337270024512947e-06, "loss": 1.0368, "step": 7984 }, { "epoch": 0.2894059657134573, "grad_norm": 2.3447085709102184, "learning_rate": 8.336832944727368e-06, "loss": 0.9377, "step": 7985 }, { "epoch": 0.2894422094161139, "grad_norm": 2.49865414006091, "learning_rate": 8.336395818962252e-06, "loss": 1.1127, "step": 7986 }, { "epoch": 0.28947845311877063, "grad_norm": 1.8897860346747752, "learning_rate": 8.335958647223617e-06, "loss": 0.7513, "step": 7987 }, { "epoch": 0.2895146968214273, "grad_norm": 2.4796144575934216, "learning_rate": 8.335521429517493e-06, "loss": 1.0217, "step": 7988 }, { "epoch": 0.2895509405240839, "grad_norm": 2.292488952675081, "learning_rate": 8.335084165849895e-06, "loss": 0.8964, "step": 7989 }, { "epoch": 0.2895871842267406, "grad_norm": 2.3320710456917286, "learning_rate": 8.33464685622686e-06, "loss": 0.741, "step": 7990 }, { "epoch": 0.28962342792939727, "grad_norm": 2.4047390064514578, "learning_rate": 8.334209500654404e-06, "loss": 1.0011, "step": 7991 }, { "epoch": 0.2896596716320539, "grad_norm": 2.398147403090664, "learning_rate": 8.33377209913856e-06, "loss": 0.9178, "step": 7992 }, { "epoch": 0.2896959153347106, "grad_norm": 2.053069592907809, "learning_rate": 8.333334651685352e-06, "loss": 0.7817, "step": 7993 }, { "epoch": 0.28973215903736727, "grad_norm": 2.4516189472745666, "learning_rate": 8.33289715830081e-06, "loss": 1.0147, "step": 7994 }, { "epoch": 0.2897684027400239, "grad_norm": 2.291990787755099, "learning_rate": 8.332459618990958e-06, "loss": 1.0505, "step": 7995 }, { "epoch": 0.28980464644268056, "grad_norm": 2.46686129853496, "learning_rate": 8.332022033761828e-06, "loss": 1.0978, "step": 7996 }, { "epoch": 0.28984089014533726, "grad_norm": 2.3687260586348886, "learning_rate": 8.331584402619452e-06, "loss": 0.8142, "step": 7997 }, { "epoch": 0.2898771338479939, "grad_norm": 2.101403360891441, "learning_rate": 8.331146725569856e-06, "loss": 0.8237, "step": 7998 }, { "epoch": 0.28991337755065055, "grad_norm": 2.311648830152721, "learning_rate": 8.330709002619073e-06, "loss": 1.0327, "step": 7999 }, { "epoch": 0.28994962125330725, "grad_norm": 2.262079550111414, "learning_rate": 8.330271233773134e-06, "loss": 1.0549, "step": 8000 }, { "epoch": 0.2899858649559639, "grad_norm": 2.342082665752865, "learning_rate": 8.329833419038073e-06, "loss": 0.75, "step": 8001 }, { "epoch": 0.29002210865862055, "grad_norm": 2.1746493222715615, "learning_rate": 8.32939555841992e-06, "loss": 1.0164, "step": 8002 }, { "epoch": 0.29005835236127725, "grad_norm": 2.187305841342538, "learning_rate": 8.32895765192471e-06, "loss": 0.914, "step": 8003 }, { "epoch": 0.2900945960639339, "grad_norm": 2.159960251939613, "learning_rate": 8.328519699558477e-06, "loss": 1.0551, "step": 8004 }, { "epoch": 0.29013083976659054, "grad_norm": 2.122527550969587, "learning_rate": 8.328081701327255e-06, "loss": 0.9799, "step": 8005 }, { "epoch": 0.29016708346924724, "grad_norm": 2.6335535427061747, "learning_rate": 8.327643657237081e-06, "loss": 0.9882, "step": 8006 }, { "epoch": 0.2902033271719039, "grad_norm": 2.3259833361050317, "learning_rate": 8.327205567293988e-06, "loss": 0.7931, "step": 8007 }, { "epoch": 0.29023957087456054, "grad_norm": 2.367761053113119, "learning_rate": 8.326767431504017e-06, "loss": 0.9226, "step": 8008 }, { "epoch": 0.2902758145772172, "grad_norm": 2.3089241603207937, "learning_rate": 8.326329249873201e-06, "loss": 0.8544, "step": 8009 }, { "epoch": 0.2903120582798739, "grad_norm": 2.4138602092078827, "learning_rate": 8.325891022407579e-06, "loss": 0.9749, "step": 8010 }, { "epoch": 0.29034830198253053, "grad_norm": 2.2431960636515624, "learning_rate": 8.325452749113191e-06, "loss": 0.9047, "step": 8011 }, { "epoch": 0.2903845456851872, "grad_norm": 2.4087515162286928, "learning_rate": 8.325014429996076e-06, "loss": 0.8035, "step": 8012 }, { "epoch": 0.2904207893878439, "grad_norm": 2.2233170202217027, "learning_rate": 8.324576065062274e-06, "loss": 0.9777, "step": 8013 }, { "epoch": 0.2904570330905005, "grad_norm": 2.708858931293542, "learning_rate": 8.324137654317824e-06, "loss": 1.0418, "step": 8014 }, { "epoch": 0.2904932767931572, "grad_norm": 2.560073271752196, "learning_rate": 8.323699197768765e-06, "loss": 0.8678, "step": 8015 }, { "epoch": 0.2905295204958139, "grad_norm": 2.285174948094649, "learning_rate": 8.323260695421142e-06, "loss": 0.818, "step": 8016 }, { "epoch": 0.2905657641984705, "grad_norm": 2.2227951925994294, "learning_rate": 8.322822147280997e-06, "loss": 0.9808, "step": 8017 }, { "epoch": 0.29060200790112717, "grad_norm": 2.64139062452416, "learning_rate": 8.322383553354373e-06, "loss": 0.9624, "step": 8018 }, { "epoch": 0.2906382516037838, "grad_norm": 2.2114926943458832, "learning_rate": 8.321944913647312e-06, "loss": 0.7948, "step": 8019 }, { "epoch": 0.2906744953064405, "grad_norm": 2.1800997635215653, "learning_rate": 8.321506228165861e-06, "loss": 1.045, "step": 8020 }, { "epoch": 0.29071073900909716, "grad_norm": 2.5983961572003227, "learning_rate": 8.321067496916062e-06, "loss": 0.8608, "step": 8021 }, { "epoch": 0.2907469827117538, "grad_norm": 2.5353667454217055, "learning_rate": 8.32062871990396e-06, "loss": 0.9038, "step": 8022 }, { "epoch": 0.2907832264144105, "grad_norm": 2.4323983308548702, "learning_rate": 8.320189897135604e-06, "loss": 0.8654, "step": 8023 }, { "epoch": 0.29081947011706716, "grad_norm": 2.338397194377249, "learning_rate": 8.319751028617039e-06, "loss": 0.809, "step": 8024 }, { "epoch": 0.2908557138197238, "grad_norm": 2.47599085644482, "learning_rate": 8.319312114354313e-06, "loss": 0.9813, "step": 8025 }, { "epoch": 0.2908919575223805, "grad_norm": 2.500912604188784, "learning_rate": 8.318873154353471e-06, "loss": 0.8666, "step": 8026 }, { "epoch": 0.29092820122503715, "grad_norm": 2.8105012723217557, "learning_rate": 8.318434148620566e-06, "loss": 1.0042, "step": 8027 }, { "epoch": 0.2909644449276938, "grad_norm": 2.2034322995008093, "learning_rate": 8.317995097161647e-06, "loss": 1.051, "step": 8028 }, { "epoch": 0.2910006886303505, "grad_norm": 2.600511665589311, "learning_rate": 8.317555999982759e-06, "loss": 1.0053, "step": 8029 }, { "epoch": 0.29103693233300715, "grad_norm": 2.137877386081784, "learning_rate": 8.317116857089957e-06, "loss": 1.1464, "step": 8030 }, { "epoch": 0.2910731760356638, "grad_norm": 2.3457805683374464, "learning_rate": 8.316677668489293e-06, "loss": 1.0347, "step": 8031 }, { "epoch": 0.29110941973832044, "grad_norm": 2.2906793593230588, "learning_rate": 8.316238434186814e-06, "loss": 1.0047, "step": 8032 }, { "epoch": 0.29114566344097714, "grad_norm": 2.1093132097291405, "learning_rate": 8.315799154188575e-06, "loss": 1.0362, "step": 8033 }, { "epoch": 0.2911819071436338, "grad_norm": 2.402357506891593, "learning_rate": 8.31535982850063e-06, "loss": 0.9344, "step": 8034 }, { "epoch": 0.29121815084629044, "grad_norm": 2.523432195907922, "learning_rate": 8.31492045712903e-06, "loss": 0.8667, "step": 8035 }, { "epoch": 0.29125439454894714, "grad_norm": 2.5773618112793604, "learning_rate": 8.314481040079833e-06, "loss": 0.9328, "step": 8036 }, { "epoch": 0.2912906382516038, "grad_norm": 2.372552755300664, "learning_rate": 8.314041577359091e-06, "loss": 0.9183, "step": 8037 }, { "epoch": 0.29132688195426043, "grad_norm": 2.533100455373732, "learning_rate": 8.313602068972861e-06, "loss": 0.9915, "step": 8038 }, { "epoch": 0.29136312565691713, "grad_norm": 2.7195895736076747, "learning_rate": 8.313162514927197e-06, "loss": 0.8837, "step": 8039 }, { "epoch": 0.2913993693595738, "grad_norm": 2.188049317966998, "learning_rate": 8.312722915228157e-06, "loss": 0.8554, "step": 8040 }, { "epoch": 0.2914356130622304, "grad_norm": 2.6611911509965624, "learning_rate": 8.3122832698818e-06, "loss": 0.9102, "step": 8041 }, { "epoch": 0.2914718567648871, "grad_norm": 2.3791427286762477, "learning_rate": 8.311843578894183e-06, "loss": 1.1444, "step": 8042 }, { "epoch": 0.2915081004675438, "grad_norm": 2.3501806851863405, "learning_rate": 8.311403842271365e-06, "loss": 0.9356, "step": 8043 }, { "epoch": 0.2915443441702004, "grad_norm": 2.398561757069848, "learning_rate": 8.310964060019403e-06, "loss": 0.9459, "step": 8044 }, { "epoch": 0.29158058787285707, "grad_norm": 2.1093866460528043, "learning_rate": 8.31052423214436e-06, "loss": 1.034, "step": 8045 }, { "epoch": 0.29161683157551377, "grad_norm": 2.170115654605359, "learning_rate": 8.310084358652295e-06, "loss": 1.0688, "step": 8046 }, { "epoch": 0.2916530752781704, "grad_norm": 2.600769365046441, "learning_rate": 8.309644439549267e-06, "loss": 0.8766, "step": 8047 }, { "epoch": 0.29168931898082706, "grad_norm": 2.278123884429352, "learning_rate": 8.309204474841343e-06, "loss": 0.9464, "step": 8048 }, { "epoch": 0.29172556268348376, "grad_norm": 2.4256051977578523, "learning_rate": 8.308764464534582e-06, "loss": 0.7867, "step": 8049 }, { "epoch": 0.2917618063861404, "grad_norm": 2.4630892138939027, "learning_rate": 8.308324408635047e-06, "loss": 1.0896, "step": 8050 }, { "epoch": 0.29179805008879706, "grad_norm": 2.211499577959382, "learning_rate": 8.307884307148804e-06, "loss": 1.0169, "step": 8051 }, { "epoch": 0.29183429379145376, "grad_norm": 2.5205823593869128, "learning_rate": 8.307444160081915e-06, "loss": 1.0734, "step": 8052 }, { "epoch": 0.2918705374941104, "grad_norm": 2.414216363451683, "learning_rate": 8.307003967440444e-06, "loss": 0.9614, "step": 8053 }, { "epoch": 0.29190678119676705, "grad_norm": 2.379636837175074, "learning_rate": 8.306563729230461e-06, "loss": 0.9032, "step": 8054 }, { "epoch": 0.2919430248994237, "grad_norm": 2.4684712735640306, "learning_rate": 8.306123445458028e-06, "loss": 0.9902, "step": 8055 }, { "epoch": 0.2919792686020804, "grad_norm": 2.2381420792063085, "learning_rate": 8.305683116129214e-06, "loss": 0.8121, "step": 8056 }, { "epoch": 0.29201551230473705, "grad_norm": 2.5917791313261667, "learning_rate": 8.305242741250085e-06, "loss": 1.0815, "step": 8057 }, { "epoch": 0.2920517560073937, "grad_norm": 2.2916389428360198, "learning_rate": 8.304802320826711e-06, "loss": 0.9834, "step": 8058 }, { "epoch": 0.2920879997100504, "grad_norm": 2.2050858369877706, "learning_rate": 8.304361854865159e-06, "loss": 0.9227, "step": 8059 }, { "epoch": 0.29212424341270704, "grad_norm": 2.1356974370003536, "learning_rate": 8.303921343371501e-06, "loss": 0.9694, "step": 8060 }, { "epoch": 0.2921604871153637, "grad_norm": 2.271156106997236, "learning_rate": 8.303480786351803e-06, "loss": 0.8583, "step": 8061 }, { "epoch": 0.2921967308180204, "grad_norm": 2.607329853680643, "learning_rate": 8.303040183812139e-06, "loss": 0.9373, "step": 8062 }, { "epoch": 0.29223297452067704, "grad_norm": 2.3590734137571077, "learning_rate": 8.302599535758578e-06, "loss": 1.0312, "step": 8063 }, { "epoch": 0.2922692182233337, "grad_norm": 2.4205735544145197, "learning_rate": 8.302158842197192e-06, "loss": 0.9367, "step": 8064 }, { "epoch": 0.2923054619259904, "grad_norm": 2.358617539734703, "learning_rate": 8.301718103134055e-06, "loss": 0.8106, "step": 8065 }, { "epoch": 0.29234170562864703, "grad_norm": 2.699929512222721, "learning_rate": 8.301277318575241e-06, "loss": 1.0254, "step": 8066 }, { "epoch": 0.2923779493313037, "grad_norm": 2.5467502813592886, "learning_rate": 8.300836488526819e-06, "loss": 0.8843, "step": 8067 }, { "epoch": 0.2924141930339603, "grad_norm": 2.366697308997074, "learning_rate": 8.300395612994866e-06, "loss": 0.9571, "step": 8068 }, { "epoch": 0.292450436736617, "grad_norm": 2.700673702233026, "learning_rate": 8.299954691985461e-06, "loss": 1.0194, "step": 8069 }, { "epoch": 0.29248668043927367, "grad_norm": 2.4097503831911826, "learning_rate": 8.299513725504675e-06, "loss": 0.9296, "step": 8070 }, { "epoch": 0.2925229241419303, "grad_norm": 2.3296170986511218, "learning_rate": 8.299072713558585e-06, "loss": 0.9283, "step": 8071 }, { "epoch": 0.292559167844587, "grad_norm": 2.4717710864982365, "learning_rate": 8.298631656153266e-06, "loss": 0.9782, "step": 8072 }, { "epoch": 0.29259541154724367, "grad_norm": 2.416784796358173, "learning_rate": 8.298190553294802e-06, "loss": 0.8431, "step": 8073 }, { "epoch": 0.2926316552499003, "grad_norm": 2.2925783340136974, "learning_rate": 8.297749404989264e-06, "loss": 0.8731, "step": 8074 }, { "epoch": 0.292667898952557, "grad_norm": 2.480438025678566, "learning_rate": 8.297308211242734e-06, "loss": 0.9987, "step": 8075 }, { "epoch": 0.29270414265521366, "grad_norm": 2.3197800759947524, "learning_rate": 8.296866972061292e-06, "loss": 0.9014, "step": 8076 }, { "epoch": 0.2927403863578703, "grad_norm": 2.298588635923351, "learning_rate": 8.296425687451016e-06, "loss": 1.1256, "step": 8077 }, { "epoch": 0.292776630060527, "grad_norm": 2.6936541178027906, "learning_rate": 8.295984357417988e-06, "loss": 0.9148, "step": 8078 }, { "epoch": 0.29281287376318366, "grad_norm": 2.2541663922830315, "learning_rate": 8.29554298196829e-06, "loss": 0.7919, "step": 8079 }, { "epoch": 0.2928491174658403, "grad_norm": 2.3186596820315084, "learning_rate": 8.295101561108e-06, "loss": 0.9794, "step": 8080 }, { "epoch": 0.29288536116849695, "grad_norm": 2.180811149994225, "learning_rate": 8.294660094843205e-06, "loss": 0.8269, "step": 8081 }, { "epoch": 0.29292160487115365, "grad_norm": 2.579276484052648, "learning_rate": 8.294218583179986e-06, "loss": 0.9511, "step": 8082 }, { "epoch": 0.2929578485738103, "grad_norm": 2.213629053412718, "learning_rate": 8.293777026124428e-06, "loss": 0.7932, "step": 8083 }, { "epoch": 0.29299409227646694, "grad_norm": 3.1122300822242, "learning_rate": 8.293335423682614e-06, "loss": 1.0899, "step": 8084 }, { "epoch": 0.29303033597912365, "grad_norm": 2.6268064604140693, "learning_rate": 8.29289377586063e-06, "loss": 1.0547, "step": 8085 }, { "epoch": 0.2930665796817803, "grad_norm": 2.576837784331491, "learning_rate": 8.292452082664562e-06, "loss": 0.9472, "step": 8086 }, { "epoch": 0.29310282338443694, "grad_norm": 1.9831806356770085, "learning_rate": 8.292010344100494e-06, "loss": 0.7721, "step": 8087 }, { "epoch": 0.29313906708709364, "grad_norm": 2.42770595559294, "learning_rate": 8.291568560174514e-06, "loss": 0.8121, "step": 8088 }, { "epoch": 0.2931753107897503, "grad_norm": 2.847696211867413, "learning_rate": 8.291126730892711e-06, "loss": 0.8316, "step": 8089 }, { "epoch": 0.29321155449240693, "grad_norm": 2.349506707101251, "learning_rate": 8.290684856261173e-06, "loss": 0.9544, "step": 8090 }, { "epoch": 0.2932477981950636, "grad_norm": 1.9754085673339432, "learning_rate": 8.290242936285985e-06, "loss": 0.6619, "step": 8091 }, { "epoch": 0.2932840418977203, "grad_norm": 2.6494000846764685, "learning_rate": 8.28980097097324e-06, "loss": 0.9972, "step": 8092 }, { "epoch": 0.29332028560037693, "grad_norm": 2.2847982888596956, "learning_rate": 8.28935896032903e-06, "loss": 1.0395, "step": 8093 }, { "epoch": 0.2933565293030336, "grad_norm": 2.7346187453724546, "learning_rate": 8.28891690435944e-06, "loss": 1.0623, "step": 8094 }, { "epoch": 0.2933927730056903, "grad_norm": 2.499832022986665, "learning_rate": 8.288474803070566e-06, "loss": 0.9868, "step": 8095 }, { "epoch": 0.2934290167083469, "grad_norm": 2.4420513612796175, "learning_rate": 8.288032656468496e-06, "loss": 1.0651, "step": 8096 }, { "epoch": 0.29346526041100357, "grad_norm": 2.1829424633174312, "learning_rate": 8.287590464559327e-06, "loss": 0.8132, "step": 8097 }, { "epoch": 0.29350150411366027, "grad_norm": 2.307997016250405, "learning_rate": 8.287148227349149e-06, "loss": 0.9835, "step": 8098 }, { "epoch": 0.2935377478163169, "grad_norm": 2.349529179617232, "learning_rate": 8.286705944844055e-06, "loss": 1.0676, "step": 8099 }, { "epoch": 0.29357399151897356, "grad_norm": 2.405689255230066, "learning_rate": 8.286263617050142e-06, "loss": 0.9489, "step": 8100 }, { "epoch": 0.29361023522163027, "grad_norm": 2.2192494184223763, "learning_rate": 8.285821243973504e-06, "loss": 0.9231, "step": 8101 }, { "epoch": 0.2936464789242869, "grad_norm": 2.3946488435467983, "learning_rate": 8.285378825620236e-06, "loss": 1.0508, "step": 8102 }, { "epoch": 0.29368272262694356, "grad_norm": 2.577200489009145, "learning_rate": 8.284936361996434e-06, "loss": 1.0052, "step": 8103 }, { "epoch": 0.2937189663296002, "grad_norm": 2.265133542220593, "learning_rate": 8.284493853108197e-06, "loss": 0.8976, "step": 8104 }, { "epoch": 0.2937552100322569, "grad_norm": 2.246078826189685, "learning_rate": 8.284051298961619e-06, "loss": 0.9124, "step": 8105 }, { "epoch": 0.29379145373491355, "grad_norm": 2.5213758794310794, "learning_rate": 8.283608699562802e-06, "loss": 1.0598, "step": 8106 }, { "epoch": 0.2938276974375702, "grad_norm": 2.321445569570522, "learning_rate": 8.283166054917843e-06, "loss": 0.8703, "step": 8107 }, { "epoch": 0.2938639411402269, "grad_norm": 2.3927904829385924, "learning_rate": 8.28272336503284e-06, "loss": 1.0282, "step": 8108 }, { "epoch": 0.29390018484288355, "grad_norm": 2.156550870468196, "learning_rate": 8.282280629913897e-06, "loss": 0.7842, "step": 8109 }, { "epoch": 0.2939364285455402, "grad_norm": 2.3614919583821425, "learning_rate": 8.28183784956711e-06, "loss": 0.8699, "step": 8110 }, { "epoch": 0.2939726722481969, "grad_norm": 2.0831909143912686, "learning_rate": 8.281395023998583e-06, "loss": 0.8858, "step": 8111 }, { "epoch": 0.29400891595085354, "grad_norm": 2.782220014920266, "learning_rate": 8.280952153214414e-06, "loss": 0.9842, "step": 8112 }, { "epoch": 0.2940451596535102, "grad_norm": 2.3464652703103055, "learning_rate": 8.280509237220712e-06, "loss": 1.1474, "step": 8113 }, { "epoch": 0.2940814033561669, "grad_norm": 2.246033310165924, "learning_rate": 8.280066276023576e-06, "loss": 0.9464, "step": 8114 }, { "epoch": 0.29411764705882354, "grad_norm": 2.4743377522511003, "learning_rate": 8.27962326962911e-06, "loss": 1.073, "step": 8115 }, { "epoch": 0.2941538907614802, "grad_norm": 2.2613983583864217, "learning_rate": 8.279180218043418e-06, "loss": 0.8574, "step": 8116 }, { "epoch": 0.29419013446413683, "grad_norm": 2.323843142073358, "learning_rate": 8.27873712127261e-06, "loss": 1.0446, "step": 8117 }, { "epoch": 0.29422637816679353, "grad_norm": 2.298946722177028, "learning_rate": 8.278293979322783e-06, "loss": 0.9345, "step": 8118 }, { "epoch": 0.2942626218694502, "grad_norm": 2.377497542869851, "learning_rate": 8.277850792200048e-06, "loss": 0.8015, "step": 8119 }, { "epoch": 0.2942988655721068, "grad_norm": 1.8971254479816992, "learning_rate": 8.277407559910514e-06, "loss": 0.6683, "step": 8120 }, { "epoch": 0.29433510927476353, "grad_norm": 2.4205537689905623, "learning_rate": 8.276964282460282e-06, "loss": 0.9958, "step": 8121 }, { "epoch": 0.2943713529774202, "grad_norm": 2.028238559863504, "learning_rate": 8.276520959855467e-06, "loss": 0.9516, "step": 8122 }, { "epoch": 0.2944075966800768, "grad_norm": 2.1716785822497564, "learning_rate": 8.276077592102174e-06, "loss": 0.8636, "step": 8123 }, { "epoch": 0.2944438403827335, "grad_norm": 2.566450983502928, "learning_rate": 8.275634179206514e-06, "loss": 0.8813, "step": 8124 }, { "epoch": 0.29448008408539017, "grad_norm": 2.3890740917651914, "learning_rate": 8.275190721174596e-06, "loss": 1.0645, "step": 8125 }, { "epoch": 0.2945163277880468, "grad_norm": 1.8974185314357999, "learning_rate": 8.274747218012529e-06, "loss": 0.8339, "step": 8126 }, { "epoch": 0.29455257149070346, "grad_norm": 2.45613391571864, "learning_rate": 8.274303669726427e-06, "loss": 0.9692, "step": 8127 }, { "epoch": 0.29458881519336017, "grad_norm": 2.31606430378173, "learning_rate": 8.2738600763224e-06, "loss": 0.8157, "step": 8128 }, { "epoch": 0.2946250588960168, "grad_norm": 2.4237202668663227, "learning_rate": 8.273416437806561e-06, "loss": 0.9087, "step": 8129 }, { "epoch": 0.29466130259867346, "grad_norm": 2.3469799439125816, "learning_rate": 8.272972754185023e-06, "loss": 1.0253, "step": 8130 }, { "epoch": 0.29469754630133016, "grad_norm": 2.2474786976838543, "learning_rate": 8.272529025463901e-06, "loss": 0.8646, "step": 8131 }, { "epoch": 0.2947337900039868, "grad_norm": 2.746253765294107, "learning_rate": 8.272085251649307e-06, "loss": 1.0295, "step": 8132 }, { "epoch": 0.29477003370664345, "grad_norm": 2.130297958792265, "learning_rate": 8.27164143274736e-06, "loss": 0.798, "step": 8133 }, { "epoch": 0.29480627740930015, "grad_norm": 2.0489016162396143, "learning_rate": 8.271197568764169e-06, "loss": 0.891, "step": 8134 }, { "epoch": 0.2948425211119568, "grad_norm": 2.457741818234198, "learning_rate": 8.270753659705856e-06, "loss": 1.0694, "step": 8135 }, { "epoch": 0.29487876481461345, "grad_norm": 2.638985294129332, "learning_rate": 8.270309705578534e-06, "loss": 1.1121, "step": 8136 }, { "epoch": 0.29491500851727015, "grad_norm": 2.171344879306924, "learning_rate": 8.269865706388323e-06, "loss": 0.911, "step": 8137 }, { "epoch": 0.2949512522199268, "grad_norm": 2.483466056145825, "learning_rate": 8.269421662141342e-06, "loss": 0.9332, "step": 8138 }, { "epoch": 0.29498749592258344, "grad_norm": 2.399679627856029, "learning_rate": 8.268977572843705e-06, "loss": 0.9151, "step": 8139 }, { "epoch": 0.2950237396252401, "grad_norm": 2.2587183601633516, "learning_rate": 8.268533438501534e-06, "loss": 0.989, "step": 8140 }, { "epoch": 0.2950599833278968, "grad_norm": 2.4602596316963106, "learning_rate": 8.268089259120951e-06, "loss": 0.9902, "step": 8141 }, { "epoch": 0.29509622703055344, "grad_norm": 2.4439135682303506, "learning_rate": 8.267645034708073e-06, "loss": 1.0081, "step": 8142 }, { "epoch": 0.2951324707332101, "grad_norm": 2.2257411650664713, "learning_rate": 8.267200765269024e-06, "loss": 0.9119, "step": 8143 }, { "epoch": 0.2951687144358668, "grad_norm": 2.2841016738007083, "learning_rate": 8.266756450809923e-06, "loss": 0.9571, "step": 8144 }, { "epoch": 0.29520495813852343, "grad_norm": 2.3512758620101617, "learning_rate": 8.266312091336895e-06, "loss": 0.9152, "step": 8145 }, { "epoch": 0.2952412018411801, "grad_norm": 2.5370204595870183, "learning_rate": 8.265867686856059e-06, "loss": 0.9254, "step": 8146 }, { "epoch": 0.2952774455438368, "grad_norm": 2.3092523192776646, "learning_rate": 8.265423237373545e-06, "loss": 0.8693, "step": 8147 }, { "epoch": 0.2953136892464934, "grad_norm": 2.1010985393941284, "learning_rate": 8.264978742895472e-06, "loss": 1.0202, "step": 8148 }, { "epoch": 0.2953499329491501, "grad_norm": 2.38123111960923, "learning_rate": 8.264534203427968e-06, "loss": 0.9742, "step": 8149 }, { "epoch": 0.2953861766518068, "grad_norm": 2.255680323263541, "learning_rate": 8.264089618977154e-06, "loss": 0.9117, "step": 8150 }, { "epoch": 0.2954224203544634, "grad_norm": 2.490553284441727, "learning_rate": 8.263644989549162e-06, "loss": 1.0209, "step": 8151 }, { "epoch": 0.29545866405712007, "grad_norm": 2.6533753124407564, "learning_rate": 8.263200315150113e-06, "loss": 1.0336, "step": 8152 }, { "epoch": 0.2954949077597767, "grad_norm": 2.5537150381013283, "learning_rate": 8.26275559578614e-06, "loss": 1.0641, "step": 8153 }, { "epoch": 0.2955311514624334, "grad_norm": 2.4545498539945085, "learning_rate": 8.262310831463366e-06, "loss": 0.9561, "step": 8154 }, { "epoch": 0.29556739516509006, "grad_norm": 2.551855539624207, "learning_rate": 8.261866022187923e-06, "loss": 0.9594, "step": 8155 }, { "epoch": 0.2956036388677467, "grad_norm": 2.355219785443834, "learning_rate": 8.261421167965938e-06, "loss": 0.992, "step": 8156 }, { "epoch": 0.2956398825704034, "grad_norm": 2.8637004875775474, "learning_rate": 8.260976268803543e-06, "loss": 0.8522, "step": 8157 }, { "epoch": 0.29567612627306006, "grad_norm": 2.5600499211657235, "learning_rate": 8.260531324706866e-06, "loss": 1.0729, "step": 8158 }, { "epoch": 0.2957123699757167, "grad_norm": 2.4425890748149683, "learning_rate": 8.260086335682041e-06, "loss": 0.9965, "step": 8159 }, { "epoch": 0.2957486136783734, "grad_norm": 2.418045984661667, "learning_rate": 8.259641301735196e-06, "loss": 0.8923, "step": 8160 }, { "epoch": 0.29578485738103005, "grad_norm": 2.5469452683839204, "learning_rate": 8.259196222872466e-06, "loss": 1.3228, "step": 8161 }, { "epoch": 0.2958211010836867, "grad_norm": 2.576165406944441, "learning_rate": 8.258751099099982e-06, "loss": 1.0857, "step": 8162 }, { "epoch": 0.29585734478634335, "grad_norm": 2.5826859276760668, "learning_rate": 8.25830593042388e-06, "loss": 0.9953, "step": 8163 }, { "epoch": 0.29589358848900005, "grad_norm": 2.058200407459087, "learning_rate": 8.257860716850294e-06, "loss": 0.7323, "step": 8164 }, { "epoch": 0.2959298321916567, "grad_norm": 2.239203416334217, "learning_rate": 8.257415458385355e-06, "loss": 0.8999, "step": 8165 }, { "epoch": 0.29596607589431334, "grad_norm": 2.5927085418452602, "learning_rate": 8.256970155035202e-06, "loss": 1.0796, "step": 8166 }, { "epoch": 0.29600231959697004, "grad_norm": 2.562247129371315, "learning_rate": 8.25652480680597e-06, "loss": 0.855, "step": 8167 }, { "epoch": 0.2960385632996267, "grad_norm": 2.286601217467643, "learning_rate": 8.256079413703796e-06, "loss": 0.8677, "step": 8168 }, { "epoch": 0.29607480700228334, "grad_norm": 2.326269165350728, "learning_rate": 8.255633975734817e-06, "loss": 0.8909, "step": 8169 }, { "epoch": 0.29611105070494004, "grad_norm": 2.2446193575624926, "learning_rate": 8.255188492905171e-06, "loss": 0.9634, "step": 8170 }, { "epoch": 0.2961472944075967, "grad_norm": 2.263460660796747, "learning_rate": 8.254742965220997e-06, "loss": 0.8048, "step": 8171 }, { "epoch": 0.29618353811025333, "grad_norm": 2.5044836968935518, "learning_rate": 8.25429739268843e-06, "loss": 0.9812, "step": 8172 }, { "epoch": 0.29621978181291003, "grad_norm": 2.2768237265674562, "learning_rate": 8.253851775313616e-06, "loss": 0.8544, "step": 8173 }, { "epoch": 0.2962560255155667, "grad_norm": 2.3204153646742283, "learning_rate": 8.253406113102693e-06, "loss": 1.0101, "step": 8174 }, { "epoch": 0.2962922692182233, "grad_norm": 2.4447400156181542, "learning_rate": 8.252960406061799e-06, "loss": 1.0534, "step": 8175 }, { "epoch": 0.29632851292087997, "grad_norm": 2.2020317477706968, "learning_rate": 8.252514654197078e-06, "loss": 0.7971, "step": 8176 }, { "epoch": 0.2963647566235367, "grad_norm": 2.1866706981583985, "learning_rate": 8.252068857514673e-06, "loss": 0.9129, "step": 8177 }, { "epoch": 0.2964010003261933, "grad_norm": 2.1695927617872712, "learning_rate": 8.251623016020727e-06, "loss": 0.9378, "step": 8178 }, { "epoch": 0.29643724402884997, "grad_norm": 2.113528996410352, "learning_rate": 8.25117712972138e-06, "loss": 0.9667, "step": 8179 }, { "epoch": 0.29647348773150667, "grad_norm": 2.4262789601637924, "learning_rate": 8.25073119862278e-06, "loss": 0.8554, "step": 8180 }, { "epoch": 0.2965097314341633, "grad_norm": 2.298497228949691, "learning_rate": 8.250285222731071e-06, "loss": 1.0818, "step": 8181 }, { "epoch": 0.29654597513681996, "grad_norm": 2.402339024212667, "learning_rate": 8.249839202052396e-06, "loss": 0.9151, "step": 8182 }, { "epoch": 0.29658221883947666, "grad_norm": 2.4153591612683467, "learning_rate": 8.249393136592903e-06, "loss": 0.9946, "step": 8183 }, { "epoch": 0.2966184625421333, "grad_norm": 2.3390711699471476, "learning_rate": 8.248947026358735e-06, "loss": 0.8861, "step": 8184 }, { "epoch": 0.29665470624478996, "grad_norm": 2.239137441680381, "learning_rate": 8.248500871356045e-06, "loss": 0.7744, "step": 8185 }, { "epoch": 0.29669094994744666, "grad_norm": 2.041500039163203, "learning_rate": 8.248054671590978e-06, "loss": 0.873, "step": 8186 }, { "epoch": 0.2967271936501033, "grad_norm": 2.257261825107242, "learning_rate": 8.24760842706968e-06, "loss": 1.1555, "step": 8187 }, { "epoch": 0.29676343735275995, "grad_norm": 2.0981007777214384, "learning_rate": 8.247162137798302e-06, "loss": 0.943, "step": 8188 }, { "epoch": 0.2967996810554166, "grad_norm": 2.3787817988689235, "learning_rate": 8.246715803782994e-06, "loss": 1.0754, "step": 8189 }, { "epoch": 0.2968359247580733, "grad_norm": 2.5539998747683472, "learning_rate": 8.246269425029906e-06, "loss": 1.0362, "step": 8190 }, { "epoch": 0.29687216846072995, "grad_norm": 2.529758053184676, "learning_rate": 8.245823001545187e-06, "loss": 1.1093, "step": 8191 }, { "epoch": 0.2969084121633866, "grad_norm": 2.611761058266582, "learning_rate": 8.245376533334992e-06, "loss": 0.9148, "step": 8192 }, { "epoch": 0.2969446558660433, "grad_norm": 2.51372662921825, "learning_rate": 8.244930020405471e-06, "loss": 0.8665, "step": 8193 }, { "epoch": 0.29698089956869994, "grad_norm": 2.2924502279888155, "learning_rate": 8.244483462762777e-06, "loss": 0.9008, "step": 8194 }, { "epoch": 0.2970171432713566, "grad_norm": 2.2230853860813617, "learning_rate": 8.244036860413062e-06, "loss": 0.9281, "step": 8195 }, { "epoch": 0.2970533869740133, "grad_norm": 2.2027676656724604, "learning_rate": 8.243590213362483e-06, "loss": 0.938, "step": 8196 }, { "epoch": 0.29708963067666994, "grad_norm": 2.2500363882982195, "learning_rate": 8.24314352161719e-06, "loss": 0.8965, "step": 8197 }, { "epoch": 0.2971258743793266, "grad_norm": 2.4946676418800653, "learning_rate": 8.242696785183342e-06, "loss": 0.9295, "step": 8198 }, { "epoch": 0.29716211808198323, "grad_norm": 2.1258353030595, "learning_rate": 8.242250004067092e-06, "loss": 0.8049, "step": 8199 }, { "epoch": 0.29719836178463993, "grad_norm": 2.277583782546544, "learning_rate": 8.241803178274599e-06, "loss": 0.9657, "step": 8200 }, { "epoch": 0.2972346054872966, "grad_norm": 2.4316710608898746, "learning_rate": 8.241356307812019e-06, "loss": 1.0015, "step": 8201 }, { "epoch": 0.2972708491899532, "grad_norm": 2.6677426783403724, "learning_rate": 8.24090939268551e-06, "loss": 0.9319, "step": 8202 }, { "epoch": 0.2973070928926099, "grad_norm": 2.1968970865201154, "learning_rate": 8.240462432901229e-06, "loss": 0.8992, "step": 8203 }, { "epoch": 0.29734333659526657, "grad_norm": 2.0088263960236072, "learning_rate": 8.240015428465335e-06, "loss": 0.951, "step": 8204 }, { "epoch": 0.2973795802979232, "grad_norm": 2.112843058724326, "learning_rate": 8.239568379383986e-06, "loss": 0.8412, "step": 8205 }, { "epoch": 0.2974158240005799, "grad_norm": 2.3669268459738873, "learning_rate": 8.239121285663347e-06, "loss": 0.9742, "step": 8206 }, { "epoch": 0.29745206770323657, "grad_norm": 2.322355772741771, "learning_rate": 8.238674147309575e-06, "loss": 0.9492, "step": 8207 }, { "epoch": 0.2974883114058932, "grad_norm": 2.2512021773868005, "learning_rate": 8.238226964328831e-06, "loss": 0.8434, "step": 8208 }, { "epoch": 0.2975245551085499, "grad_norm": 2.1303854750992866, "learning_rate": 8.237779736727279e-06, "loss": 1.1453, "step": 8209 }, { "epoch": 0.29756079881120656, "grad_norm": 2.115160092100299, "learning_rate": 8.23733246451108e-06, "loss": 0.8042, "step": 8210 }, { "epoch": 0.2975970425138632, "grad_norm": 2.4086715628376028, "learning_rate": 8.236885147686396e-06, "loss": 1.0064, "step": 8211 }, { "epoch": 0.29763328621651985, "grad_norm": 2.6601818636765255, "learning_rate": 8.236437786259395e-06, "loss": 0.8955, "step": 8212 }, { "epoch": 0.29766952991917656, "grad_norm": 2.4125982807626642, "learning_rate": 8.235990380236236e-06, "loss": 1.015, "step": 8213 }, { "epoch": 0.2977057736218332, "grad_norm": 2.1775078937495245, "learning_rate": 8.235542929623089e-06, "loss": 1.0448, "step": 8214 }, { "epoch": 0.29774201732448985, "grad_norm": 2.1838297142940415, "learning_rate": 8.235095434426115e-06, "loss": 0.9992, "step": 8215 }, { "epoch": 0.29777826102714655, "grad_norm": 2.4983569770770573, "learning_rate": 8.234647894651484e-06, "loss": 0.9424, "step": 8216 }, { "epoch": 0.2978145047298032, "grad_norm": 2.3017787284413918, "learning_rate": 8.23420031030536e-06, "loss": 1.2212, "step": 8217 }, { "epoch": 0.29785074843245984, "grad_norm": 2.19971815814111, "learning_rate": 8.233752681393914e-06, "loss": 0.9224, "step": 8218 }, { "epoch": 0.29788699213511655, "grad_norm": 2.640706906668239, "learning_rate": 8.233305007923309e-06, "loss": 0.9459, "step": 8219 }, { "epoch": 0.2979232358377732, "grad_norm": 2.278917789998005, "learning_rate": 8.232857289899717e-06, "loss": 1.0293, "step": 8220 }, { "epoch": 0.29795947954042984, "grad_norm": 2.2053225919955715, "learning_rate": 8.232409527329308e-06, "loss": 0.8219, "step": 8221 }, { "epoch": 0.29799572324308654, "grad_norm": 2.2432250587152343, "learning_rate": 8.231961720218251e-06, "loss": 0.915, "step": 8222 }, { "epoch": 0.2980319669457432, "grad_norm": 2.4683836665646925, "learning_rate": 8.231513868572713e-06, "loss": 1.0034, "step": 8223 }, { "epoch": 0.29806821064839983, "grad_norm": 2.5345063844583704, "learning_rate": 8.231065972398873e-06, "loss": 0.8681, "step": 8224 }, { "epoch": 0.2981044543510565, "grad_norm": 2.5895259679597626, "learning_rate": 8.230618031702895e-06, "loss": 0.903, "step": 8225 }, { "epoch": 0.2981406980537132, "grad_norm": 2.3173526566847658, "learning_rate": 8.230170046490954e-06, "loss": 0.9977, "step": 8226 }, { "epoch": 0.29817694175636983, "grad_norm": 2.4122630444730895, "learning_rate": 8.229722016769223e-06, "loss": 1.1802, "step": 8227 }, { "epoch": 0.2982131854590265, "grad_norm": 2.3911716241710157, "learning_rate": 8.229273942543878e-06, "loss": 0.8478, "step": 8228 }, { "epoch": 0.2982494291616832, "grad_norm": 2.1988338066988513, "learning_rate": 8.22882582382109e-06, "loss": 0.9663, "step": 8229 }, { "epoch": 0.2982856728643398, "grad_norm": 2.5793535694559044, "learning_rate": 8.228377660607034e-06, "loss": 1.0058, "step": 8230 }, { "epoch": 0.29832191656699647, "grad_norm": 2.358484185046118, "learning_rate": 8.227929452907887e-06, "loss": 1.0121, "step": 8231 }, { "epoch": 0.29835816026965317, "grad_norm": 1.9747849983342403, "learning_rate": 8.227481200729824e-06, "loss": 0.7377, "step": 8232 }, { "epoch": 0.2983944039723098, "grad_norm": 2.323786341803064, "learning_rate": 8.227032904079022e-06, "loss": 0.7928, "step": 8233 }, { "epoch": 0.29843064767496646, "grad_norm": 2.344257633191057, "learning_rate": 8.22658456296166e-06, "loss": 0.8371, "step": 8234 }, { "epoch": 0.2984668913776231, "grad_norm": 2.254827946017214, "learning_rate": 8.226136177383913e-06, "loss": 0.9868, "step": 8235 }, { "epoch": 0.2985031350802798, "grad_norm": 2.484782937943756, "learning_rate": 8.22568774735196e-06, "loss": 1.1848, "step": 8236 }, { "epoch": 0.29853937878293646, "grad_norm": 2.2726770689228823, "learning_rate": 8.225239272871981e-06, "loss": 0.8629, "step": 8237 }, { "epoch": 0.2985756224855931, "grad_norm": 2.173888295404227, "learning_rate": 8.224790753950156e-06, "loss": 0.9513, "step": 8238 }, { "epoch": 0.2986118661882498, "grad_norm": 2.673394926833566, "learning_rate": 8.224342190592664e-06, "loss": 0.9517, "step": 8239 }, { "epoch": 0.29864810989090645, "grad_norm": 2.219172494245224, "learning_rate": 8.223893582805687e-06, "loss": 0.9489, "step": 8240 }, { "epoch": 0.2986843535935631, "grad_norm": 2.1564702829883564, "learning_rate": 8.223444930595406e-06, "loss": 1.018, "step": 8241 }, { "epoch": 0.2987205972962198, "grad_norm": 2.4936293872945576, "learning_rate": 8.222996233968001e-06, "loss": 0.7861, "step": 8242 }, { "epoch": 0.29875684099887645, "grad_norm": 2.6692176256959708, "learning_rate": 8.22254749292966e-06, "loss": 1.0409, "step": 8243 }, { "epoch": 0.2987930847015331, "grad_norm": 2.2707707617566792, "learning_rate": 8.222098707486563e-06, "loss": 0.982, "step": 8244 }, { "epoch": 0.2988293284041898, "grad_norm": 2.6395081986476288, "learning_rate": 8.221649877644894e-06, "loss": 0.9218, "step": 8245 }, { "epoch": 0.29886557210684644, "grad_norm": 2.1212107100926474, "learning_rate": 8.221201003410839e-06, "loss": 0.9866, "step": 8246 }, { "epoch": 0.2989018158095031, "grad_norm": 2.220456248516022, "learning_rate": 8.220752084790581e-06, "loss": 0.875, "step": 8247 }, { "epoch": 0.29893805951215974, "grad_norm": 2.4381227452749843, "learning_rate": 8.220303121790309e-06, "loss": 1.1526, "step": 8248 }, { "epoch": 0.29897430321481644, "grad_norm": 2.1012547079751913, "learning_rate": 8.219854114416206e-06, "loss": 0.8633, "step": 8249 }, { "epoch": 0.2990105469174731, "grad_norm": 2.2232810727706656, "learning_rate": 8.21940506267446e-06, "loss": 0.7691, "step": 8250 }, { "epoch": 0.29904679062012973, "grad_norm": 2.3790805171603107, "learning_rate": 8.21895596657126e-06, "loss": 0.8847, "step": 8251 }, { "epoch": 0.29908303432278643, "grad_norm": 2.4008297617908716, "learning_rate": 8.218506826112794e-06, "loss": 0.9808, "step": 8252 }, { "epoch": 0.2991192780254431, "grad_norm": 2.18320827922813, "learning_rate": 8.21805764130525e-06, "loss": 0.8709, "step": 8253 }, { "epoch": 0.2991555217280997, "grad_norm": 2.285650863602667, "learning_rate": 8.217608412154818e-06, "loss": 0.8807, "step": 8254 }, { "epoch": 0.29919176543075643, "grad_norm": 2.1950437220352605, "learning_rate": 8.21715913866769e-06, "loss": 1.0133, "step": 8255 }, { "epoch": 0.2992280091334131, "grad_norm": 2.5145705825787004, "learning_rate": 8.216709820850051e-06, "loss": 1.0182, "step": 8256 }, { "epoch": 0.2992642528360697, "grad_norm": 2.484813305209788, "learning_rate": 8.216260458708099e-06, "loss": 1.083, "step": 8257 }, { "epoch": 0.2993004965387264, "grad_norm": 2.397024486886796, "learning_rate": 8.215811052248021e-06, "loss": 0.893, "step": 8258 }, { "epoch": 0.29933674024138307, "grad_norm": 2.353522368541104, "learning_rate": 8.215361601476013e-06, "loss": 0.9113, "step": 8259 }, { "epoch": 0.2993729839440397, "grad_norm": 2.200958980851157, "learning_rate": 8.214912106398266e-06, "loss": 0.9215, "step": 8260 }, { "epoch": 0.29940922764669636, "grad_norm": 2.324522781624448, "learning_rate": 8.214462567020975e-06, "loss": 0.8966, "step": 8261 }, { "epoch": 0.29944547134935307, "grad_norm": 2.3713140963815973, "learning_rate": 8.214012983350333e-06, "loss": 0.9178, "step": 8262 }, { "epoch": 0.2994817150520097, "grad_norm": 2.2733847489050816, "learning_rate": 8.213563355392537e-06, "loss": 0.9101, "step": 8263 }, { "epoch": 0.29951795875466636, "grad_norm": 2.51341246218922, "learning_rate": 8.213113683153781e-06, "loss": 1.0416, "step": 8264 }, { "epoch": 0.29955420245732306, "grad_norm": 2.576181790836704, "learning_rate": 8.212663966640261e-06, "loss": 0.8307, "step": 8265 }, { "epoch": 0.2995904461599797, "grad_norm": 2.16605952505765, "learning_rate": 8.212214205858175e-06, "loss": 1.0814, "step": 8266 }, { "epoch": 0.29962668986263635, "grad_norm": 2.229983782754874, "learning_rate": 8.21176440081372e-06, "loss": 0.9412, "step": 8267 }, { "epoch": 0.29966293356529305, "grad_norm": 2.355604693970339, "learning_rate": 8.211314551513095e-06, "loss": 1.1279, "step": 8268 }, { "epoch": 0.2996991772679497, "grad_norm": 2.309193408506744, "learning_rate": 8.210864657962496e-06, "loss": 0.875, "step": 8269 }, { "epoch": 0.29973542097060635, "grad_norm": 2.513778285348271, "learning_rate": 8.210414720168125e-06, "loss": 0.8967, "step": 8270 }, { "epoch": 0.299771664673263, "grad_norm": 2.128341072315239, "learning_rate": 8.20996473813618e-06, "loss": 0.8613, "step": 8271 }, { "epoch": 0.2998079083759197, "grad_norm": 2.193672166035194, "learning_rate": 8.209514711872862e-06, "loss": 1.1572, "step": 8272 }, { "epoch": 0.29984415207857634, "grad_norm": 2.325768258031441, "learning_rate": 8.209064641384375e-06, "loss": 0.8391, "step": 8273 }, { "epoch": 0.299880395781233, "grad_norm": 2.238192401543335, "learning_rate": 8.208614526676914e-06, "loss": 0.9281, "step": 8274 }, { "epoch": 0.2999166394838897, "grad_norm": 2.2264280146767934, "learning_rate": 8.208164367756688e-06, "loss": 0.8876, "step": 8275 }, { "epoch": 0.29995288318654634, "grad_norm": 15.770934941063846, "learning_rate": 8.207714164629896e-06, "loss": 1.7975, "step": 8276 }, { "epoch": 0.299989126889203, "grad_norm": 2.2765343393093187, "learning_rate": 8.207263917302744e-06, "loss": 0.9938, "step": 8277 }, { "epoch": 0.3000253705918597, "grad_norm": 2.406806291845478, "learning_rate": 8.206813625781434e-06, "loss": 0.919, "step": 8278 }, { "epoch": 0.30006161429451633, "grad_norm": 2.4824239250333946, "learning_rate": 8.206363290072171e-06, "loss": 0.9529, "step": 8279 }, { "epoch": 0.300097857997173, "grad_norm": 2.502757713698826, "learning_rate": 8.205912910181163e-06, "loss": 0.9452, "step": 8280 }, { "epoch": 0.3001341016998297, "grad_norm": 2.20446959898327, "learning_rate": 8.205462486114611e-06, "loss": 0.8728, "step": 8281 }, { "epoch": 0.3001703454024863, "grad_norm": 2.3214527071785684, "learning_rate": 8.205012017878726e-06, "loss": 0.9466, "step": 8282 }, { "epoch": 0.300206589105143, "grad_norm": 2.782934408449133, "learning_rate": 8.204561505479716e-06, "loss": 1.1133, "step": 8283 }, { "epoch": 0.3002428328077996, "grad_norm": 2.396619224038963, "learning_rate": 8.204110948923783e-06, "loss": 0.7758, "step": 8284 }, { "epoch": 0.3002790765104563, "grad_norm": 2.1643034162324004, "learning_rate": 8.203660348217142e-06, "loss": 0.7841, "step": 8285 }, { "epoch": 0.30031532021311297, "grad_norm": 2.1282316071859753, "learning_rate": 8.203209703365996e-06, "loss": 0.8418, "step": 8286 }, { "epoch": 0.3003515639157696, "grad_norm": 2.3298697361282437, "learning_rate": 8.20275901437656e-06, "loss": 0.9547, "step": 8287 }, { "epoch": 0.3003878076184263, "grad_norm": 2.3030063531863787, "learning_rate": 8.20230828125504e-06, "loss": 0.8719, "step": 8288 }, { "epoch": 0.30042405132108296, "grad_norm": 2.2429357554363376, "learning_rate": 8.201857504007649e-06, "loss": 0.9636, "step": 8289 }, { "epoch": 0.3004602950237396, "grad_norm": 2.261147282173664, "learning_rate": 8.201406682640599e-06, "loss": 1.0017, "step": 8290 }, { "epoch": 0.3004965387263963, "grad_norm": 2.4747959334060057, "learning_rate": 8.2009558171601e-06, "loss": 1.3403, "step": 8291 }, { "epoch": 0.30053278242905296, "grad_norm": 2.2494522488783577, "learning_rate": 8.200504907572365e-06, "loss": 0.9352, "step": 8292 }, { "epoch": 0.3005690261317096, "grad_norm": 2.318573305592954, "learning_rate": 8.200053953883608e-06, "loss": 1.1926, "step": 8293 }, { "epoch": 0.3006052698343663, "grad_norm": 2.6246241080863064, "learning_rate": 8.199602956100044e-06, "loss": 0.8166, "step": 8294 }, { "epoch": 0.30064151353702295, "grad_norm": 2.4213623970001037, "learning_rate": 8.199151914227887e-06, "loss": 0.8374, "step": 8295 }, { "epoch": 0.3006777572396796, "grad_norm": 2.528555204073354, "learning_rate": 8.198700828273351e-06, "loss": 0.8761, "step": 8296 }, { "epoch": 0.30071400094233625, "grad_norm": 2.2749844951952825, "learning_rate": 8.198249698242651e-06, "loss": 0.8124, "step": 8297 }, { "epoch": 0.30075024464499295, "grad_norm": 2.7466322614373913, "learning_rate": 8.197798524142005e-06, "loss": 0.8938, "step": 8298 }, { "epoch": 0.3007864883476496, "grad_norm": 2.2843412860220216, "learning_rate": 8.19734730597763e-06, "loss": 0.8138, "step": 8299 }, { "epoch": 0.30082273205030624, "grad_norm": 2.2820308458303824, "learning_rate": 8.196896043755742e-06, "loss": 0.9793, "step": 8300 }, { "epoch": 0.30085897575296294, "grad_norm": 2.375201490508353, "learning_rate": 8.196444737482561e-06, "loss": 0.8231, "step": 8301 }, { "epoch": 0.3008952194556196, "grad_norm": 2.391416228112119, "learning_rate": 8.195993387164303e-06, "loss": 0.944, "step": 8302 }, { "epoch": 0.30093146315827624, "grad_norm": 2.4018189438223585, "learning_rate": 8.19554199280719e-06, "loss": 1.0436, "step": 8303 }, { "epoch": 0.30096770686093294, "grad_norm": 2.117237076637423, "learning_rate": 8.19509055441744e-06, "loss": 0.9836, "step": 8304 }, { "epoch": 0.3010039505635896, "grad_norm": 2.2028564040720933, "learning_rate": 8.194639072001276e-06, "loss": 1.0024, "step": 8305 }, { "epoch": 0.30104019426624623, "grad_norm": 2.477095683050294, "learning_rate": 8.194187545564915e-06, "loss": 0.9451, "step": 8306 }, { "epoch": 0.3010764379689029, "grad_norm": 2.289215078390029, "learning_rate": 8.193735975114585e-06, "loss": 0.9713, "step": 8307 }, { "epoch": 0.3011126816715596, "grad_norm": 2.454277260031468, "learning_rate": 8.193284360656501e-06, "loss": 0.9848, "step": 8308 }, { "epoch": 0.3011489253742162, "grad_norm": 2.2880975409769793, "learning_rate": 8.192832702196893e-06, "loss": 1.0253, "step": 8309 }, { "epoch": 0.30118516907687287, "grad_norm": 2.230943931424856, "learning_rate": 8.19238099974198e-06, "loss": 0.8676, "step": 8310 }, { "epoch": 0.3012214127795296, "grad_norm": 2.5516596121678243, "learning_rate": 8.191929253297987e-06, "loss": 0.9334, "step": 8311 }, { "epoch": 0.3012576564821862, "grad_norm": 2.471872471367337, "learning_rate": 8.19147746287114e-06, "loss": 0.7212, "step": 8312 }, { "epoch": 0.30129390018484287, "grad_norm": 2.5766167275542484, "learning_rate": 8.191025628467664e-06, "loss": 0.8922, "step": 8313 }, { "epoch": 0.30133014388749957, "grad_norm": 2.1485216606504163, "learning_rate": 8.190573750093784e-06, "loss": 0.8565, "step": 8314 }, { "epoch": 0.3013663875901562, "grad_norm": 2.4439770000855368, "learning_rate": 8.190121827755728e-06, "loss": 1.0688, "step": 8315 }, { "epoch": 0.30140263129281286, "grad_norm": 2.312079932556165, "learning_rate": 8.189669861459722e-06, "loss": 1.0797, "step": 8316 }, { "epoch": 0.30143887499546956, "grad_norm": 2.3229509335382192, "learning_rate": 8.189217851211995e-06, "loss": 0.8781, "step": 8317 }, { "epoch": 0.3014751186981262, "grad_norm": 2.3533803019690347, "learning_rate": 8.188765797018773e-06, "loss": 0.9144, "step": 8318 }, { "epoch": 0.30151136240078286, "grad_norm": 2.060766370613091, "learning_rate": 8.188313698886288e-06, "loss": 0.9371, "step": 8319 }, { "epoch": 0.3015476061034395, "grad_norm": 2.110102128666285, "learning_rate": 8.18786155682077e-06, "loss": 0.8942, "step": 8320 }, { "epoch": 0.3015838498060962, "grad_norm": 1.9062770584950368, "learning_rate": 8.187409370828444e-06, "loss": 0.8851, "step": 8321 }, { "epoch": 0.30162009350875285, "grad_norm": 2.606348232041116, "learning_rate": 8.186957140915549e-06, "loss": 1.0092, "step": 8322 }, { "epoch": 0.3016563372114095, "grad_norm": 2.2010022491026984, "learning_rate": 8.18650486708831e-06, "loss": 1.001, "step": 8323 }, { "epoch": 0.3016925809140662, "grad_norm": 2.6257374479161077, "learning_rate": 8.18605254935296e-06, "loss": 1.0915, "step": 8324 }, { "epoch": 0.30172882461672285, "grad_norm": 2.3983203314078416, "learning_rate": 8.185600187715733e-06, "loss": 0.8899, "step": 8325 }, { "epoch": 0.3017650683193795, "grad_norm": 2.3264054797706804, "learning_rate": 8.185147782182864e-06, "loss": 1.0854, "step": 8326 }, { "epoch": 0.3018013120220362, "grad_norm": 2.449641782842989, "learning_rate": 8.184695332760584e-06, "loss": 0.9303, "step": 8327 }, { "epoch": 0.30183755572469284, "grad_norm": 2.319672084200795, "learning_rate": 8.184242839455128e-06, "loss": 0.9423, "step": 8328 }, { "epoch": 0.3018737994273495, "grad_norm": 2.1282874857256853, "learning_rate": 8.183790302272729e-06, "loss": 0.7349, "step": 8329 }, { "epoch": 0.30191004313000613, "grad_norm": 2.022617462283666, "learning_rate": 8.183337721219628e-06, "loss": 0.9651, "step": 8330 }, { "epoch": 0.30194628683266284, "grad_norm": 2.4516899674792554, "learning_rate": 8.182885096302059e-06, "loss": 0.9773, "step": 8331 }, { "epoch": 0.3019825305353195, "grad_norm": 2.3929388515778394, "learning_rate": 8.182432427526257e-06, "loss": 0.9748, "step": 8332 }, { "epoch": 0.30201877423797613, "grad_norm": 2.1693268149003546, "learning_rate": 8.18197971489846e-06, "loss": 1.0012, "step": 8333 }, { "epoch": 0.30205501794063283, "grad_norm": 2.270469521222319, "learning_rate": 8.181526958424909e-06, "loss": 0.9912, "step": 8334 }, { "epoch": 0.3020912616432895, "grad_norm": 2.125769841084805, "learning_rate": 8.18107415811184e-06, "loss": 0.9546, "step": 8335 }, { "epoch": 0.3021275053459461, "grad_norm": 2.39140891578535, "learning_rate": 8.180621313965493e-06, "loss": 1.052, "step": 8336 }, { "epoch": 0.3021637490486028, "grad_norm": 2.309738197868878, "learning_rate": 8.180168425992106e-06, "loss": 1.1029, "step": 8337 }, { "epoch": 0.30219999275125947, "grad_norm": 2.163053800061305, "learning_rate": 8.179715494197924e-06, "loss": 0.957, "step": 8338 }, { "epoch": 0.3022362364539161, "grad_norm": 2.4762064201992344, "learning_rate": 8.179262518589184e-06, "loss": 0.9436, "step": 8339 }, { "epoch": 0.3022724801565728, "grad_norm": 2.4249486523233053, "learning_rate": 8.178809499172128e-06, "loss": 0.9802, "step": 8340 }, { "epoch": 0.30230872385922947, "grad_norm": 2.2618307839456877, "learning_rate": 8.178356435953004e-06, "loss": 0.976, "step": 8341 }, { "epoch": 0.3023449675618861, "grad_norm": 2.385014519961565, "learning_rate": 8.177903328938045e-06, "loss": 0.9209, "step": 8342 }, { "epoch": 0.30238121126454276, "grad_norm": 2.4762989381008564, "learning_rate": 8.177450178133503e-06, "loss": 1.0345, "step": 8343 }, { "epoch": 0.30241745496719946, "grad_norm": 2.268105956117992, "learning_rate": 8.17699698354562e-06, "loss": 0.6643, "step": 8344 }, { "epoch": 0.3024536986698561, "grad_norm": 2.2982120407079303, "learning_rate": 8.176543745180638e-06, "loss": 0.7605, "step": 8345 }, { "epoch": 0.30248994237251275, "grad_norm": 2.272571726491345, "learning_rate": 8.176090463044807e-06, "loss": 1.1872, "step": 8346 }, { "epoch": 0.30252618607516946, "grad_norm": 2.149893191179873, "learning_rate": 8.175637137144368e-06, "loss": 1.0593, "step": 8347 }, { "epoch": 0.3025624297778261, "grad_norm": 2.380284605070289, "learning_rate": 8.17518376748557e-06, "loss": 0.9262, "step": 8348 }, { "epoch": 0.30259867348048275, "grad_norm": 2.3140996408885712, "learning_rate": 8.17473035407466e-06, "loss": 0.8907, "step": 8349 }, { "epoch": 0.30263491718313945, "grad_norm": 2.397680492715394, "learning_rate": 8.174276896917885e-06, "loss": 1.0431, "step": 8350 }, { "epoch": 0.3026711608857961, "grad_norm": 2.390245906135197, "learning_rate": 8.173823396021497e-06, "loss": 0.7659, "step": 8351 }, { "epoch": 0.30270740458845274, "grad_norm": 2.6691590791627764, "learning_rate": 8.173369851391739e-06, "loss": 1.0822, "step": 8352 }, { "epoch": 0.30274364829110945, "grad_norm": 2.4581475330650853, "learning_rate": 8.172916263034865e-06, "loss": 1.1177, "step": 8353 }, { "epoch": 0.3027798919937661, "grad_norm": 2.1436425823199623, "learning_rate": 8.172462630957124e-06, "loss": 0.9128, "step": 8354 }, { "epoch": 0.30281613569642274, "grad_norm": 2.1923273535116587, "learning_rate": 8.172008955164766e-06, "loss": 0.8754, "step": 8355 }, { "epoch": 0.3028523793990794, "grad_norm": 2.3283391422090935, "learning_rate": 8.171555235664044e-06, "loss": 0.8765, "step": 8356 }, { "epoch": 0.3028886231017361, "grad_norm": 2.3741861134144666, "learning_rate": 8.171101472461207e-06, "loss": 1.0716, "step": 8357 }, { "epoch": 0.30292486680439273, "grad_norm": 2.276157207513864, "learning_rate": 8.170647665562511e-06, "loss": 0.9008, "step": 8358 }, { "epoch": 0.3029611105070494, "grad_norm": 2.106082635531556, "learning_rate": 8.170193814974208e-06, "loss": 0.928, "step": 8359 }, { "epoch": 0.3029973542097061, "grad_norm": 2.5961131342120867, "learning_rate": 8.169739920702551e-06, "loss": 0.964, "step": 8360 }, { "epoch": 0.30303359791236273, "grad_norm": 2.225400466226446, "learning_rate": 8.169285982753794e-06, "loss": 0.7837, "step": 8361 }, { "epoch": 0.3030698416150194, "grad_norm": 2.5865829300233747, "learning_rate": 8.168832001134194e-06, "loss": 0.8924, "step": 8362 }, { "epoch": 0.3031060853176761, "grad_norm": 2.289787332724522, "learning_rate": 8.168377975850004e-06, "loss": 0.8605, "step": 8363 }, { "epoch": 0.3031423290203327, "grad_norm": 2.361086972692811, "learning_rate": 8.167923906907483e-06, "loss": 0.9261, "step": 8364 }, { "epoch": 0.30317857272298937, "grad_norm": 2.457784469461053, "learning_rate": 8.167469794312886e-06, "loss": 0.9539, "step": 8365 }, { "epoch": 0.303214816425646, "grad_norm": 2.197286111911228, "learning_rate": 8.167015638072473e-06, "loss": 0.8882, "step": 8366 }, { "epoch": 0.3032510601283027, "grad_norm": 2.207286842605299, "learning_rate": 8.166561438192498e-06, "loss": 0.8082, "step": 8367 }, { "epoch": 0.30328730383095936, "grad_norm": 2.558557054959032, "learning_rate": 8.16610719467922e-06, "loss": 0.9387, "step": 8368 }, { "epoch": 0.303323547533616, "grad_norm": 2.519698985781287, "learning_rate": 8.165652907538902e-06, "loss": 0.8877, "step": 8369 }, { "epoch": 0.3033597912362727, "grad_norm": 2.1478852107490427, "learning_rate": 8.165198576777799e-06, "loss": 0.8485, "step": 8370 }, { "epoch": 0.30339603493892936, "grad_norm": 2.3529382588278254, "learning_rate": 8.164744202402175e-06, "loss": 1.0171, "step": 8371 }, { "epoch": 0.303432278641586, "grad_norm": 2.547441585759515, "learning_rate": 8.16428978441829e-06, "loss": 1.0547, "step": 8372 }, { "epoch": 0.3034685223442427, "grad_norm": 2.0161731506589406, "learning_rate": 8.163835322832407e-06, "loss": 0.9234, "step": 8373 }, { "epoch": 0.30350476604689935, "grad_norm": 2.2920412764293205, "learning_rate": 8.163380817650786e-06, "loss": 0.9707, "step": 8374 }, { "epoch": 0.303541009749556, "grad_norm": 2.493688001162923, "learning_rate": 8.16292626887969e-06, "loss": 1.0275, "step": 8375 }, { "epoch": 0.3035772534522127, "grad_norm": 2.1535785158610885, "learning_rate": 8.162471676525384e-06, "loss": 0.9248, "step": 8376 }, { "epoch": 0.30361349715486935, "grad_norm": 2.630662010632512, "learning_rate": 8.162017040594127e-06, "loss": 1.0008, "step": 8377 }, { "epoch": 0.303649740857526, "grad_norm": 2.3403745503940123, "learning_rate": 8.161562361092191e-06, "loss": 1.1115, "step": 8378 }, { "epoch": 0.30368598456018264, "grad_norm": 2.3498570229301103, "learning_rate": 8.161107638025838e-06, "loss": 0.8829, "step": 8379 }, { "epoch": 0.30372222826283934, "grad_norm": 2.318119924840911, "learning_rate": 8.160652871401333e-06, "loss": 0.7957, "step": 8380 }, { "epoch": 0.303758471965496, "grad_norm": 2.0895951882164807, "learning_rate": 8.16019806122494e-06, "loss": 0.8646, "step": 8381 }, { "epoch": 0.30379471566815264, "grad_norm": 2.618634413047624, "learning_rate": 8.159743207502933e-06, "loss": 1.0441, "step": 8382 }, { "epoch": 0.30383095937080934, "grad_norm": 2.4008468355471524, "learning_rate": 8.159288310241574e-06, "loss": 0.9273, "step": 8383 }, { "epoch": 0.303867203073466, "grad_norm": 2.52900574740406, "learning_rate": 8.158833369447133e-06, "loss": 0.9044, "step": 8384 }, { "epoch": 0.30390344677612263, "grad_norm": 2.181977957042047, "learning_rate": 8.158378385125877e-06, "loss": 0.9203, "step": 8385 }, { "epoch": 0.30393969047877933, "grad_norm": 2.4335478103899453, "learning_rate": 8.157923357284079e-06, "loss": 1.0348, "step": 8386 }, { "epoch": 0.303975934181436, "grad_norm": 2.3512571599199132, "learning_rate": 8.157468285928005e-06, "loss": 0.9387, "step": 8387 }, { "epoch": 0.3040121778840926, "grad_norm": 2.3246780187155225, "learning_rate": 8.15701317106393e-06, "loss": 0.8908, "step": 8388 }, { "epoch": 0.30404842158674933, "grad_norm": 2.1552076035159033, "learning_rate": 8.156558012698121e-06, "loss": 0.851, "step": 8389 }, { "epoch": 0.304084665289406, "grad_norm": 2.225396718188854, "learning_rate": 8.156102810836853e-06, "loss": 0.8761, "step": 8390 }, { "epoch": 0.3041209089920626, "grad_norm": 2.3148761232998725, "learning_rate": 8.155647565486396e-06, "loss": 0.8809, "step": 8391 }, { "epoch": 0.30415715269471927, "grad_norm": 2.44071226023718, "learning_rate": 8.155192276653023e-06, "loss": 0.9186, "step": 8392 }, { "epoch": 0.30419339639737597, "grad_norm": 2.5339421109220828, "learning_rate": 8.15473694434301e-06, "loss": 1.078, "step": 8393 }, { "epoch": 0.3042296401000326, "grad_norm": 2.2857088796969176, "learning_rate": 8.15428156856263e-06, "loss": 0.8315, "step": 8394 }, { "epoch": 0.30426588380268926, "grad_norm": 2.1384179125139258, "learning_rate": 8.153826149318157e-06, "loss": 0.6942, "step": 8395 }, { "epoch": 0.30430212750534597, "grad_norm": 2.6417960981359774, "learning_rate": 8.153370686615866e-06, "loss": 0.9646, "step": 8396 }, { "epoch": 0.3043383712080026, "grad_norm": 2.3917833672872373, "learning_rate": 8.152915180462037e-06, "loss": 0.9055, "step": 8397 }, { "epoch": 0.30437461491065926, "grad_norm": 2.519649392249363, "learning_rate": 8.15245963086294e-06, "loss": 1.0107, "step": 8398 }, { "epoch": 0.30441085861331596, "grad_norm": 2.4102338155310408, "learning_rate": 8.15200403782486e-06, "loss": 1.0371, "step": 8399 }, { "epoch": 0.3044471023159726, "grad_norm": 2.4886797149961, "learning_rate": 8.151548401354068e-06, "loss": 0.9178, "step": 8400 }, { "epoch": 0.30448334601862925, "grad_norm": 2.5684588693957555, "learning_rate": 8.151092721456845e-06, "loss": 0.9295, "step": 8401 }, { "epoch": 0.3045195897212859, "grad_norm": 2.541412958592943, "learning_rate": 8.150636998139471e-06, "loss": 0.9659, "step": 8402 }, { "epoch": 0.3045558334239426, "grad_norm": 2.533843469482997, "learning_rate": 8.150181231408224e-06, "loss": 0.9841, "step": 8403 }, { "epoch": 0.30459207712659925, "grad_norm": 2.275510822266284, "learning_rate": 8.149725421269386e-06, "loss": 1.0113, "step": 8404 }, { "epoch": 0.3046283208292559, "grad_norm": 2.436072360188956, "learning_rate": 8.149269567729234e-06, "loss": 0.7499, "step": 8405 }, { "epoch": 0.3046645645319126, "grad_norm": 2.3633086846491707, "learning_rate": 8.148813670794054e-06, "loss": 0.9981, "step": 8406 }, { "epoch": 0.30470080823456924, "grad_norm": 2.4937940602700626, "learning_rate": 8.148357730470125e-06, "loss": 0.9804, "step": 8407 }, { "epoch": 0.3047370519372259, "grad_norm": 2.3162718077492728, "learning_rate": 8.147901746763731e-06, "loss": 0.9499, "step": 8408 }, { "epoch": 0.3047732956398826, "grad_norm": 2.3873445222231586, "learning_rate": 8.147445719681155e-06, "loss": 1.0936, "step": 8409 }, { "epoch": 0.30480953934253924, "grad_norm": 2.4887575736137184, "learning_rate": 8.14698964922868e-06, "loss": 0.9164, "step": 8410 }, { "epoch": 0.3048457830451959, "grad_norm": 2.228558516319067, "learning_rate": 8.146533535412591e-06, "loss": 0.7996, "step": 8411 }, { "epoch": 0.3048820267478526, "grad_norm": 2.133198142181427, "learning_rate": 8.146077378239173e-06, "loss": 0.8421, "step": 8412 }, { "epoch": 0.30491827045050923, "grad_norm": 2.190816893323851, "learning_rate": 8.145621177714712e-06, "loss": 0.958, "step": 8413 }, { "epoch": 0.3049545141531659, "grad_norm": 2.259761997689993, "learning_rate": 8.145164933845492e-06, "loss": 0.9162, "step": 8414 }, { "epoch": 0.3049907578558225, "grad_norm": 2.4392396870314808, "learning_rate": 8.144708646637802e-06, "loss": 0.9886, "step": 8415 }, { "epoch": 0.3050270015584792, "grad_norm": 2.605211388757408, "learning_rate": 8.144252316097928e-06, "loss": 0.9647, "step": 8416 }, { "epoch": 0.3050632452611359, "grad_norm": 2.2304471046182304, "learning_rate": 8.143795942232162e-06, "loss": 1.0552, "step": 8417 }, { "epoch": 0.3050994889637925, "grad_norm": 2.2027895442112695, "learning_rate": 8.143339525046786e-06, "loss": 1.0421, "step": 8418 }, { "epoch": 0.3051357326664492, "grad_norm": 2.3750489583977763, "learning_rate": 8.142883064548095e-06, "loss": 1.0423, "step": 8419 }, { "epoch": 0.30517197636910587, "grad_norm": 2.266381057753873, "learning_rate": 8.142426560742372e-06, "loss": 0.9362, "step": 8420 }, { "epoch": 0.3052082200717625, "grad_norm": 2.484573009246519, "learning_rate": 8.141970013635915e-06, "loss": 1.0032, "step": 8421 }, { "epoch": 0.3052444637744192, "grad_norm": 2.34891641228737, "learning_rate": 8.14151342323501e-06, "loss": 0.7899, "step": 8422 }, { "epoch": 0.30528070747707586, "grad_norm": 2.220127381488303, "learning_rate": 8.14105678954595e-06, "loss": 0.8293, "step": 8423 }, { "epoch": 0.3053169511797325, "grad_norm": 2.4726780743781798, "learning_rate": 8.140600112575027e-06, "loss": 0.9416, "step": 8424 }, { "epoch": 0.3053531948823892, "grad_norm": 2.2308137001007684, "learning_rate": 8.140143392328534e-06, "loss": 1.0714, "step": 8425 }, { "epoch": 0.30538943858504586, "grad_norm": 1.8871363293117722, "learning_rate": 8.139686628812763e-06, "loss": 0.7006, "step": 8426 }, { "epoch": 0.3054256822877025, "grad_norm": 2.408956889872973, "learning_rate": 8.13922982203401e-06, "loss": 0.7814, "step": 8427 }, { "epoch": 0.30546192599035915, "grad_norm": 2.7523332037062116, "learning_rate": 8.138772971998572e-06, "loss": 0.9936, "step": 8428 }, { "epoch": 0.30549816969301585, "grad_norm": 2.146695591469554, "learning_rate": 8.138316078712737e-06, "loss": 0.8587, "step": 8429 }, { "epoch": 0.3055344133956725, "grad_norm": 1.9674180989566747, "learning_rate": 8.137859142182805e-06, "loss": 0.839, "step": 8430 }, { "epoch": 0.30557065709832915, "grad_norm": 2.3898878403125576, "learning_rate": 8.137402162415071e-06, "loss": 1.1079, "step": 8431 }, { "epoch": 0.30560690080098585, "grad_norm": 2.331718686821595, "learning_rate": 8.136945139415834e-06, "loss": 1.0727, "step": 8432 }, { "epoch": 0.3056431445036425, "grad_norm": 2.495235347162091, "learning_rate": 8.13648807319139e-06, "loss": 0.9634, "step": 8433 }, { "epoch": 0.30567938820629914, "grad_norm": 2.597203886586884, "learning_rate": 8.136030963748036e-06, "loss": 0.9212, "step": 8434 }, { "epoch": 0.30571563190895584, "grad_norm": 2.471289617431652, "learning_rate": 8.135573811092074e-06, "loss": 0.9904, "step": 8435 }, { "epoch": 0.3057518756116125, "grad_norm": 2.3422379878892507, "learning_rate": 8.1351166152298e-06, "loss": 0.9295, "step": 8436 }, { "epoch": 0.30578811931426914, "grad_norm": 2.422950694386047, "learning_rate": 8.134659376167515e-06, "loss": 1.0629, "step": 8437 }, { "epoch": 0.3058243630169258, "grad_norm": 2.24613580762784, "learning_rate": 8.134202093911518e-06, "loss": 1.0828, "step": 8438 }, { "epoch": 0.3058606067195825, "grad_norm": 2.0765163875552046, "learning_rate": 8.133744768468114e-06, "loss": 1.0081, "step": 8439 }, { "epoch": 0.30589685042223913, "grad_norm": 2.598492571421412, "learning_rate": 8.133287399843602e-06, "loss": 1.0704, "step": 8440 }, { "epoch": 0.3059330941248958, "grad_norm": 2.375906882915939, "learning_rate": 8.132829988044284e-06, "loss": 0.9353, "step": 8441 }, { "epoch": 0.3059693378275525, "grad_norm": 3.1135185476997522, "learning_rate": 8.132372533076464e-06, "loss": 1.0784, "step": 8442 }, { "epoch": 0.3060055815302091, "grad_norm": 2.5776585746370984, "learning_rate": 8.131915034946445e-06, "loss": 1.0959, "step": 8443 }, { "epoch": 0.30604182523286577, "grad_norm": 2.215270189830206, "learning_rate": 8.13145749366053e-06, "loss": 1.0564, "step": 8444 }, { "epoch": 0.3060780689355225, "grad_norm": 2.5190014980946103, "learning_rate": 8.130999909225026e-06, "loss": 1.11, "step": 8445 }, { "epoch": 0.3061143126381791, "grad_norm": 2.289506767410318, "learning_rate": 8.130542281646233e-06, "loss": 0.8694, "step": 8446 }, { "epoch": 0.30615055634083577, "grad_norm": 2.7061202185811197, "learning_rate": 8.130084610930465e-06, "loss": 0.9616, "step": 8447 }, { "epoch": 0.30618680004349247, "grad_norm": 2.3586813043177366, "learning_rate": 8.129626897084023e-06, "loss": 1.0072, "step": 8448 }, { "epoch": 0.3062230437461491, "grad_norm": 2.251516902574854, "learning_rate": 8.129169140113216e-06, "loss": 0.8503, "step": 8449 }, { "epoch": 0.30625928744880576, "grad_norm": 1.958248835159443, "learning_rate": 8.128711340024349e-06, "loss": 0.9463, "step": 8450 }, { "epoch": 0.3062955311514624, "grad_norm": 2.5866611141897664, "learning_rate": 8.12825349682373e-06, "loss": 1.0585, "step": 8451 }, { "epoch": 0.3063317748541191, "grad_norm": 2.298402877208322, "learning_rate": 8.127795610517674e-06, "loss": 0.9733, "step": 8452 }, { "epoch": 0.30636801855677576, "grad_norm": 2.1653834331120967, "learning_rate": 8.127337681112483e-06, "loss": 0.8111, "step": 8453 }, { "epoch": 0.3064042622594324, "grad_norm": 2.4288799942356385, "learning_rate": 8.126879708614473e-06, "loss": 0.9124, "step": 8454 }, { "epoch": 0.3064405059620891, "grad_norm": 2.3861707670871426, "learning_rate": 8.126421693029948e-06, "loss": 0.7808, "step": 8455 }, { "epoch": 0.30647674966474575, "grad_norm": 2.2955387812288324, "learning_rate": 8.125963634365226e-06, "loss": 0.8161, "step": 8456 }, { "epoch": 0.3065129933674024, "grad_norm": 2.0139998895865907, "learning_rate": 8.125505532626613e-06, "loss": 0.7912, "step": 8457 }, { "epoch": 0.3065492370700591, "grad_norm": 2.131720470803668, "learning_rate": 8.125047387820426e-06, "loss": 0.8514, "step": 8458 }, { "epoch": 0.30658548077271575, "grad_norm": 2.6415295571786452, "learning_rate": 8.124589199952978e-06, "loss": 0.9481, "step": 8459 }, { "epoch": 0.3066217244753724, "grad_norm": 2.3044014259814958, "learning_rate": 8.124130969030575e-06, "loss": 1.0706, "step": 8460 }, { "epoch": 0.3066579681780291, "grad_norm": 2.53028233976085, "learning_rate": 8.123672695059541e-06, "loss": 0.8389, "step": 8461 }, { "epoch": 0.30669421188068574, "grad_norm": 2.339933646057574, "learning_rate": 8.123214378046185e-06, "loss": 1.1167, "step": 8462 }, { "epoch": 0.3067304555833424, "grad_norm": 2.3598726730596753, "learning_rate": 8.122756017996825e-06, "loss": 1.0681, "step": 8463 }, { "epoch": 0.30676669928599903, "grad_norm": 2.5835950342519727, "learning_rate": 8.122297614917773e-06, "loss": 1.0797, "step": 8464 }, { "epoch": 0.30680294298865574, "grad_norm": 2.715437979732439, "learning_rate": 8.12183916881535e-06, "loss": 1.0734, "step": 8465 }, { "epoch": 0.3068391866913124, "grad_norm": 2.53698782253216, "learning_rate": 8.121380679695871e-06, "loss": 0.6964, "step": 8466 }, { "epoch": 0.30687543039396903, "grad_norm": 2.768638281059626, "learning_rate": 8.120922147565654e-06, "loss": 0.9431, "step": 8467 }, { "epoch": 0.30691167409662573, "grad_norm": 2.1680062892505747, "learning_rate": 8.120463572431017e-06, "loss": 0.8608, "step": 8468 }, { "epoch": 0.3069479177992824, "grad_norm": 2.2550571518618168, "learning_rate": 8.12000495429828e-06, "loss": 0.9763, "step": 8469 }, { "epoch": 0.306984161501939, "grad_norm": 2.2837633451904766, "learning_rate": 8.11954629317376e-06, "loss": 0.9906, "step": 8470 }, { "epoch": 0.3070204052045957, "grad_norm": 2.3159518922999607, "learning_rate": 8.119087589063779e-06, "loss": 1.0051, "step": 8471 }, { "epoch": 0.30705664890725237, "grad_norm": 2.3178929845917455, "learning_rate": 8.118628841974658e-06, "loss": 0.8998, "step": 8472 }, { "epoch": 0.307092892609909, "grad_norm": 2.3795120264630087, "learning_rate": 8.118170051912719e-06, "loss": 0.7548, "step": 8473 }, { "epoch": 0.30712913631256566, "grad_norm": 2.364616961975437, "learning_rate": 8.117711218884281e-06, "loss": 1.031, "step": 8474 }, { "epoch": 0.30716538001522237, "grad_norm": 2.359911075234288, "learning_rate": 8.117252342895667e-06, "loss": 0.7866, "step": 8475 }, { "epoch": 0.307201623717879, "grad_norm": 2.7879244413306927, "learning_rate": 8.116793423953201e-06, "loss": 1.0688, "step": 8476 }, { "epoch": 0.30723786742053566, "grad_norm": 2.470502926629267, "learning_rate": 8.116334462063209e-06, "loss": 0.9144, "step": 8477 }, { "epoch": 0.30727411112319236, "grad_norm": 2.357842983648429, "learning_rate": 8.115875457232012e-06, "loss": 0.827, "step": 8478 }, { "epoch": 0.307310354825849, "grad_norm": 2.5748109651312516, "learning_rate": 8.115416409465934e-06, "loss": 0.9442, "step": 8479 }, { "epoch": 0.30734659852850565, "grad_norm": 2.22337421772114, "learning_rate": 8.114957318771304e-06, "loss": 0.8635, "step": 8480 }, { "epoch": 0.30738284223116236, "grad_norm": 2.3369192086202233, "learning_rate": 8.114498185154444e-06, "loss": 1.0211, "step": 8481 }, { "epoch": 0.307419085933819, "grad_norm": 2.309080019115849, "learning_rate": 8.114039008621686e-06, "loss": 0.8251, "step": 8482 }, { "epoch": 0.30745532963647565, "grad_norm": 2.6508817075020805, "learning_rate": 8.11357978917935e-06, "loss": 1.1261, "step": 8483 }, { "epoch": 0.30749157333913235, "grad_norm": 2.2419907361946145, "learning_rate": 8.113120526833768e-06, "loss": 1.0195, "step": 8484 }, { "epoch": 0.307527817041789, "grad_norm": 2.3644833709146136, "learning_rate": 8.11266122159127e-06, "loss": 1.1203, "step": 8485 }, { "epoch": 0.30756406074444564, "grad_norm": 2.267631632552404, "learning_rate": 8.112201873458183e-06, "loss": 0.8833, "step": 8486 }, { "epoch": 0.3076003044471023, "grad_norm": 2.2696040121634913, "learning_rate": 8.111742482440834e-06, "loss": 0.9097, "step": 8487 }, { "epoch": 0.307636548149759, "grad_norm": 2.3511302232012112, "learning_rate": 8.111283048545557e-06, "loss": 0.8879, "step": 8488 }, { "epoch": 0.30767279185241564, "grad_norm": 2.4875356071292067, "learning_rate": 8.110823571778681e-06, "loss": 0.9796, "step": 8489 }, { "epoch": 0.3077090355550723, "grad_norm": 2.3175452296557193, "learning_rate": 8.110364052146537e-06, "loss": 1.0112, "step": 8490 }, { "epoch": 0.307745279257729, "grad_norm": 2.566791232687689, "learning_rate": 8.109904489655458e-06, "loss": 1.1036, "step": 8491 }, { "epoch": 0.30778152296038563, "grad_norm": 2.602874036908196, "learning_rate": 8.109444884311776e-06, "loss": 0.8719, "step": 8492 }, { "epoch": 0.3078177666630423, "grad_norm": 2.5368209455905184, "learning_rate": 8.108985236121825e-06, "loss": 1.014, "step": 8493 }, { "epoch": 0.307854010365699, "grad_norm": 2.3773178100981163, "learning_rate": 8.108525545091936e-06, "loss": 1.1009, "step": 8494 }, { "epoch": 0.30789025406835563, "grad_norm": 2.5186478451657686, "learning_rate": 8.108065811228445e-06, "loss": 1.0219, "step": 8495 }, { "epoch": 0.3079264977710123, "grad_norm": 2.392467241642974, "learning_rate": 8.107606034537687e-06, "loss": 1.0243, "step": 8496 }, { "epoch": 0.307962741473669, "grad_norm": 2.0245242537195, "learning_rate": 8.107146215025998e-06, "loss": 0.8289, "step": 8497 }, { "epoch": 0.3079989851763256, "grad_norm": 2.615118586152336, "learning_rate": 8.106686352699713e-06, "loss": 0.9613, "step": 8498 }, { "epoch": 0.30803522887898227, "grad_norm": 2.548750900187743, "learning_rate": 8.106226447565169e-06, "loss": 0.9973, "step": 8499 }, { "epoch": 0.3080714725816389, "grad_norm": 2.441168278215118, "learning_rate": 8.105766499628703e-06, "loss": 0.8947, "step": 8500 }, { "epoch": 0.3081077162842956, "grad_norm": 2.16595538328082, "learning_rate": 8.105306508896653e-06, "loss": 1.0201, "step": 8501 }, { "epoch": 0.30814395998695226, "grad_norm": 2.2535912187998552, "learning_rate": 8.104846475375358e-06, "loss": 0.7245, "step": 8502 }, { "epoch": 0.3081802036896089, "grad_norm": 2.2629309853660566, "learning_rate": 8.104386399071155e-06, "loss": 0.7617, "step": 8503 }, { "epoch": 0.3082164473922656, "grad_norm": 2.2002820990160217, "learning_rate": 8.103926279990386e-06, "loss": 1.1873, "step": 8504 }, { "epoch": 0.30825269109492226, "grad_norm": 2.3017055379071985, "learning_rate": 8.10346611813939e-06, "loss": 1.0492, "step": 8505 }, { "epoch": 0.3082889347975789, "grad_norm": 2.84063363173168, "learning_rate": 8.103005913524507e-06, "loss": 0.8687, "step": 8506 }, { "epoch": 0.3083251785002356, "grad_norm": 2.3551941961388096, "learning_rate": 8.102545666152079e-06, "loss": 0.9288, "step": 8507 }, { "epoch": 0.30836142220289225, "grad_norm": 2.1514137653015015, "learning_rate": 8.10208537602845e-06, "loss": 0.6636, "step": 8508 }, { "epoch": 0.3083976659055489, "grad_norm": 2.417194737763493, "learning_rate": 8.101625043159958e-06, "loss": 0.977, "step": 8509 }, { "epoch": 0.30843390960820555, "grad_norm": 2.6049774613259027, "learning_rate": 8.10116466755295e-06, "loss": 0.9592, "step": 8510 }, { "epoch": 0.30847015331086225, "grad_norm": 2.506422969731188, "learning_rate": 8.100704249213767e-06, "loss": 0.9492, "step": 8511 }, { "epoch": 0.3085063970135189, "grad_norm": 2.458756479722116, "learning_rate": 8.100243788148757e-06, "loss": 0.8996, "step": 8512 }, { "epoch": 0.30854264071617554, "grad_norm": 2.509733000177467, "learning_rate": 8.099783284364261e-06, "loss": 1.1278, "step": 8513 }, { "epoch": 0.30857888441883224, "grad_norm": 2.475169854553985, "learning_rate": 8.099322737866627e-06, "loss": 0.9963, "step": 8514 }, { "epoch": 0.3086151281214889, "grad_norm": 2.195735824727767, "learning_rate": 8.0988621486622e-06, "loss": 0.8342, "step": 8515 }, { "epoch": 0.30865137182414554, "grad_norm": 2.568129308495978, "learning_rate": 8.098401516757326e-06, "loss": 0.924, "step": 8516 }, { "epoch": 0.30868761552680224, "grad_norm": 2.4340728562595277, "learning_rate": 8.097940842158354e-06, "loss": 1.0788, "step": 8517 }, { "epoch": 0.3087238592294589, "grad_norm": 2.2242306987605196, "learning_rate": 8.09748012487163e-06, "loss": 0.9624, "step": 8518 }, { "epoch": 0.30876010293211553, "grad_norm": 2.784124409175437, "learning_rate": 8.097019364903504e-06, "loss": 1.0885, "step": 8519 }, { "epoch": 0.30879634663477223, "grad_norm": 2.63517052806729, "learning_rate": 8.096558562260325e-06, "loss": 0.9322, "step": 8520 }, { "epoch": 0.3088325903374289, "grad_norm": 2.329051711869963, "learning_rate": 8.096097716948438e-06, "loss": 1.0216, "step": 8521 }, { "epoch": 0.3088688340400855, "grad_norm": 2.2552549940629194, "learning_rate": 8.095636828974199e-06, "loss": 1.0657, "step": 8522 }, { "epoch": 0.3089050777427422, "grad_norm": 2.4738576136659676, "learning_rate": 8.095175898343957e-06, "loss": 0.8627, "step": 8523 }, { "epoch": 0.3089413214453989, "grad_norm": 2.5686405163332546, "learning_rate": 8.094714925064062e-06, "loss": 0.8281, "step": 8524 }, { "epoch": 0.3089775651480555, "grad_norm": 2.1463326408388257, "learning_rate": 8.094253909140869e-06, "loss": 0.8761, "step": 8525 }, { "epoch": 0.30901380885071217, "grad_norm": 2.4150419934726255, "learning_rate": 8.093792850580726e-06, "loss": 1.03, "step": 8526 }, { "epoch": 0.30905005255336887, "grad_norm": 2.0434886392837552, "learning_rate": 8.09333174938999e-06, "loss": 0.8809, "step": 8527 }, { "epoch": 0.3090862962560255, "grad_norm": 2.32734558166028, "learning_rate": 8.092870605575014e-06, "loss": 0.933, "step": 8528 }, { "epoch": 0.30912253995868216, "grad_norm": 2.428357980253927, "learning_rate": 8.09240941914215e-06, "loss": 0.7403, "step": 8529 }, { "epoch": 0.30915878366133887, "grad_norm": 2.241984405468165, "learning_rate": 8.091948190097755e-06, "loss": 1.0638, "step": 8530 }, { "epoch": 0.3091950273639955, "grad_norm": 2.3706035073957814, "learning_rate": 8.091486918448184e-06, "loss": 0.9482, "step": 8531 }, { "epoch": 0.30923127106665216, "grad_norm": 2.4374180037769198, "learning_rate": 8.091025604199793e-06, "loss": 1.056, "step": 8532 }, { "epoch": 0.30926751476930886, "grad_norm": 2.0831447030927897, "learning_rate": 8.090564247358938e-06, "loss": 0.8156, "step": 8533 }, { "epoch": 0.3093037584719655, "grad_norm": 2.2822501291496775, "learning_rate": 8.090102847931977e-06, "loss": 1.1663, "step": 8534 }, { "epoch": 0.30934000217462215, "grad_norm": 2.2211028939868154, "learning_rate": 8.08964140592527e-06, "loss": 0.7085, "step": 8535 }, { "epoch": 0.3093762458772788, "grad_norm": 2.7861559243739626, "learning_rate": 8.08917992134517e-06, "loss": 1.0831, "step": 8536 }, { "epoch": 0.3094124895799355, "grad_norm": 2.257194813360739, "learning_rate": 8.08871839419804e-06, "loss": 0.8425, "step": 8537 }, { "epoch": 0.30944873328259215, "grad_norm": 2.4098636453517757, "learning_rate": 8.08825682449024e-06, "loss": 1.0004, "step": 8538 }, { "epoch": 0.3094849769852488, "grad_norm": 2.356041769136149, "learning_rate": 8.087795212228125e-06, "loss": 0.9003, "step": 8539 }, { "epoch": 0.3095212206879055, "grad_norm": 2.283469182164507, "learning_rate": 8.087333557418063e-06, "loss": 0.981, "step": 8540 }, { "epoch": 0.30955746439056214, "grad_norm": 1.96981749048442, "learning_rate": 8.086871860066409e-06, "loss": 0.8214, "step": 8541 }, { "epoch": 0.3095937080932188, "grad_norm": 2.4796543184337727, "learning_rate": 8.086410120179528e-06, "loss": 1.0919, "step": 8542 }, { "epoch": 0.3096299517958755, "grad_norm": 2.303736763961728, "learning_rate": 8.085948337763784e-06, "loss": 0.7739, "step": 8543 }, { "epoch": 0.30966619549853214, "grad_norm": 2.3572184822418714, "learning_rate": 8.085486512825538e-06, "loss": 0.8871, "step": 8544 }, { "epoch": 0.3097024392011888, "grad_norm": 2.309981821763966, "learning_rate": 8.085024645371152e-06, "loss": 0.7719, "step": 8545 }, { "epoch": 0.30973868290384543, "grad_norm": 2.3842275576200973, "learning_rate": 8.084562735406993e-06, "loss": 1.0064, "step": 8546 }, { "epoch": 0.30977492660650213, "grad_norm": 2.372630609983979, "learning_rate": 8.084100782939426e-06, "loss": 0.8487, "step": 8547 }, { "epoch": 0.3098111703091588, "grad_norm": 2.1077330944914587, "learning_rate": 8.083638787974814e-06, "loss": 0.808, "step": 8548 }, { "epoch": 0.3098474140118154, "grad_norm": 2.462515977481025, "learning_rate": 8.083176750519526e-06, "loss": 0.9823, "step": 8549 }, { "epoch": 0.3098836577144721, "grad_norm": 2.3786308469413804, "learning_rate": 8.082714670579926e-06, "loss": 0.976, "step": 8550 }, { "epoch": 0.3099199014171288, "grad_norm": 2.2909120678217634, "learning_rate": 8.082252548162382e-06, "loss": 1.0355, "step": 8551 }, { "epoch": 0.3099561451197854, "grad_norm": 2.4922105887303836, "learning_rate": 8.081790383273263e-06, "loss": 0.9968, "step": 8552 }, { "epoch": 0.3099923888224421, "grad_norm": 2.5785801836939126, "learning_rate": 8.081328175918934e-06, "loss": 1.031, "step": 8553 }, { "epoch": 0.31002863252509877, "grad_norm": 2.630567927200487, "learning_rate": 8.08086592610577e-06, "loss": 0.9853, "step": 8554 }, { "epoch": 0.3100648762277554, "grad_norm": 2.5430402064505913, "learning_rate": 8.080403633840134e-06, "loss": 0.8379, "step": 8555 }, { "epoch": 0.3101011199304121, "grad_norm": 2.1854172424639207, "learning_rate": 8.079941299128399e-06, "loss": 0.7672, "step": 8556 }, { "epoch": 0.31013736363306876, "grad_norm": 2.134183794047872, "learning_rate": 8.079478921976936e-06, "loss": 0.9543, "step": 8557 }, { "epoch": 0.3101736073357254, "grad_norm": 2.4348404269174595, "learning_rate": 8.079016502392116e-06, "loss": 0.9935, "step": 8558 }, { "epoch": 0.31020985103838206, "grad_norm": 2.3830940341539386, "learning_rate": 8.07855404038031e-06, "loss": 0.9031, "step": 8559 }, { "epoch": 0.31024609474103876, "grad_norm": 2.13451478016187, "learning_rate": 8.078091535947892e-06, "loss": 0.968, "step": 8560 }, { "epoch": 0.3102823384436954, "grad_norm": 2.360558791799791, "learning_rate": 8.077628989101235e-06, "loss": 0.9819, "step": 8561 }, { "epoch": 0.31031858214635205, "grad_norm": 2.162773309276978, "learning_rate": 8.07716639984671e-06, "loss": 0.9388, "step": 8562 }, { "epoch": 0.31035482584900875, "grad_norm": 2.2723752272813433, "learning_rate": 8.076703768190695e-06, "loss": 0.9294, "step": 8563 }, { "epoch": 0.3103910695516654, "grad_norm": 2.30087362093844, "learning_rate": 8.076241094139561e-06, "loss": 0.9308, "step": 8564 }, { "epoch": 0.31042731325432205, "grad_norm": 2.204109496642183, "learning_rate": 8.075778377699687e-06, "loss": 0.9356, "step": 8565 }, { "epoch": 0.31046355695697875, "grad_norm": 2.3798987186560314, "learning_rate": 8.075315618877445e-06, "loss": 0.9089, "step": 8566 }, { "epoch": 0.3104998006596354, "grad_norm": 2.321107487116375, "learning_rate": 8.074852817679215e-06, "loss": 1.1204, "step": 8567 }, { "epoch": 0.31053604436229204, "grad_norm": 1.983887227858703, "learning_rate": 8.074389974111373e-06, "loss": 0.7466, "step": 8568 }, { "epoch": 0.31057228806494874, "grad_norm": 2.065988285250434, "learning_rate": 8.073927088180295e-06, "loss": 0.7515, "step": 8569 }, { "epoch": 0.3106085317676054, "grad_norm": 2.3928826615048844, "learning_rate": 8.073464159892363e-06, "loss": 0.9792, "step": 8570 }, { "epoch": 0.31064477547026204, "grad_norm": 2.2561200720185512, "learning_rate": 8.073001189253954e-06, "loss": 0.9171, "step": 8571 }, { "epoch": 0.3106810191729187, "grad_norm": 2.517270582715201, "learning_rate": 8.072538176271443e-06, "loss": 0.9534, "step": 8572 }, { "epoch": 0.3107172628755754, "grad_norm": 2.4170459929311785, "learning_rate": 8.072075120951218e-06, "loss": 1.0068, "step": 8573 }, { "epoch": 0.31075350657823203, "grad_norm": 2.7338453197913273, "learning_rate": 8.071612023299652e-06, "loss": 1.0555, "step": 8574 }, { "epoch": 0.3107897502808887, "grad_norm": 2.186312907902367, "learning_rate": 8.071148883323135e-06, "loss": 0.9433, "step": 8575 }, { "epoch": 0.3108259939835454, "grad_norm": 2.4245555958175213, "learning_rate": 8.070685701028041e-06, "loss": 1.0752, "step": 8576 }, { "epoch": 0.310862237686202, "grad_norm": 2.195674057690909, "learning_rate": 8.070222476420755e-06, "loss": 0.9929, "step": 8577 }, { "epoch": 0.31089848138885867, "grad_norm": 2.587153621815344, "learning_rate": 8.06975920950766e-06, "loss": 1.0191, "step": 8578 }, { "epoch": 0.3109347250915154, "grad_norm": 2.7490941218878815, "learning_rate": 8.06929590029514e-06, "loss": 0.9067, "step": 8579 }, { "epoch": 0.310970968794172, "grad_norm": 2.46466751935317, "learning_rate": 8.068832548789577e-06, "loss": 0.8928, "step": 8580 }, { "epoch": 0.31100721249682867, "grad_norm": 2.6438951272265387, "learning_rate": 8.06836915499736e-06, "loss": 0.9722, "step": 8581 }, { "epoch": 0.3110434561994853, "grad_norm": 2.2789279881378106, "learning_rate": 8.06790571892487e-06, "loss": 0.8332, "step": 8582 }, { "epoch": 0.311079699902142, "grad_norm": 2.4677748898789065, "learning_rate": 8.067442240578497e-06, "loss": 0.9617, "step": 8583 }, { "epoch": 0.31111594360479866, "grad_norm": 2.5108822126503605, "learning_rate": 8.066978719964624e-06, "loss": 1.0467, "step": 8584 }, { "epoch": 0.3111521873074553, "grad_norm": 2.9583771504837717, "learning_rate": 8.066515157089639e-06, "loss": 1.1594, "step": 8585 }, { "epoch": 0.311188431010112, "grad_norm": 1.9710529239126613, "learning_rate": 8.066051551959928e-06, "loss": 1.0237, "step": 8586 }, { "epoch": 0.31122467471276866, "grad_norm": 2.299107314783304, "learning_rate": 8.065587904581884e-06, "loss": 0.8617, "step": 8587 }, { "epoch": 0.3112609184154253, "grad_norm": 2.617999575581726, "learning_rate": 8.06512421496189e-06, "loss": 1.0938, "step": 8588 }, { "epoch": 0.311297162118082, "grad_norm": 2.5303602597753136, "learning_rate": 8.064660483106339e-06, "loss": 0.9704, "step": 8589 }, { "epoch": 0.31133340582073865, "grad_norm": 2.182416862542657, "learning_rate": 8.06419670902162e-06, "loss": 0.8494, "step": 8590 }, { "epoch": 0.3113696495233953, "grad_norm": 2.182915562554282, "learning_rate": 8.063732892714125e-06, "loss": 1.0139, "step": 8591 }, { "epoch": 0.311405893226052, "grad_norm": 2.353734923631014, "learning_rate": 8.06326903419024e-06, "loss": 0.9233, "step": 8592 }, { "epoch": 0.31144213692870865, "grad_norm": 2.0287666057514233, "learning_rate": 8.062805133456364e-06, "loss": 0.7853, "step": 8593 }, { "epoch": 0.3114783806313653, "grad_norm": 2.0232962119476507, "learning_rate": 8.062341190518883e-06, "loss": 0.833, "step": 8594 }, { "epoch": 0.31151462433402194, "grad_norm": 2.241999515952618, "learning_rate": 8.061877205384194e-06, "loss": 0.9245, "step": 8595 }, { "epoch": 0.31155086803667864, "grad_norm": 2.000170219244739, "learning_rate": 8.061413178058688e-06, "loss": 0.9232, "step": 8596 }, { "epoch": 0.3115871117393353, "grad_norm": 2.405546359715088, "learning_rate": 8.060949108548761e-06, "loss": 0.88, "step": 8597 }, { "epoch": 0.31162335544199193, "grad_norm": 2.4540149753197644, "learning_rate": 8.060484996860805e-06, "loss": 0.9537, "step": 8598 }, { "epoch": 0.31165959914464864, "grad_norm": 2.457060853134526, "learning_rate": 8.060020843001218e-06, "loss": 0.8766, "step": 8599 }, { "epoch": 0.3116958428473053, "grad_norm": 2.2178064188729016, "learning_rate": 8.059556646976394e-06, "loss": 0.9364, "step": 8600 }, { "epoch": 0.31173208654996193, "grad_norm": 2.8783481738652137, "learning_rate": 8.05909240879273e-06, "loss": 1.1631, "step": 8601 }, { "epoch": 0.31176833025261863, "grad_norm": 2.685755203766626, "learning_rate": 8.058628128456622e-06, "loss": 0.9901, "step": 8602 }, { "epoch": 0.3118045739552753, "grad_norm": 2.1914695880323745, "learning_rate": 8.058163805974467e-06, "loss": 0.8206, "step": 8603 }, { "epoch": 0.3118408176579319, "grad_norm": 2.4305414568680144, "learning_rate": 8.057699441352666e-06, "loss": 0.8166, "step": 8604 }, { "epoch": 0.31187706136058857, "grad_norm": 2.077033893537665, "learning_rate": 8.057235034597617e-06, "loss": 0.9152, "step": 8605 }, { "epoch": 0.31191330506324527, "grad_norm": 2.232060010675535, "learning_rate": 8.056770585715716e-06, "loss": 0.7744, "step": 8606 }, { "epoch": 0.3119495487659019, "grad_norm": 2.4424029663997637, "learning_rate": 8.056306094713367e-06, "loss": 1.0064, "step": 8607 }, { "epoch": 0.31198579246855856, "grad_norm": 2.1589991306021807, "learning_rate": 8.055841561596966e-06, "loss": 0.9257, "step": 8608 }, { "epoch": 0.31202203617121527, "grad_norm": 2.4853510515018535, "learning_rate": 8.055376986372918e-06, "loss": 1.055, "step": 8609 }, { "epoch": 0.3120582798738719, "grad_norm": 2.570015861112992, "learning_rate": 8.054912369047623e-06, "loss": 1.0082, "step": 8610 }, { "epoch": 0.31209452357652856, "grad_norm": 2.4990932787743034, "learning_rate": 8.054447709627482e-06, "loss": 1.0913, "step": 8611 }, { "epoch": 0.31213076727918526, "grad_norm": 2.3215647044278196, "learning_rate": 8.0539830081189e-06, "loss": 0.8033, "step": 8612 }, { "epoch": 0.3121670109818419, "grad_norm": 2.523124543871363, "learning_rate": 8.05351826452828e-06, "loss": 1.0217, "step": 8613 }, { "epoch": 0.31220325468449855, "grad_norm": 2.3595011392822176, "learning_rate": 8.053053478862024e-06, "loss": 0.8913, "step": 8614 }, { "epoch": 0.31223949838715526, "grad_norm": 2.2719181750320945, "learning_rate": 8.052588651126536e-06, "loss": 0.9179, "step": 8615 }, { "epoch": 0.3122757420898119, "grad_norm": 2.1041089636392343, "learning_rate": 8.052123781328224e-06, "loss": 0.7239, "step": 8616 }, { "epoch": 0.31231198579246855, "grad_norm": 2.178162611304701, "learning_rate": 8.051658869473492e-06, "loss": 0.9366, "step": 8617 }, { "epoch": 0.3123482294951252, "grad_norm": 2.3863044804042324, "learning_rate": 8.051193915568746e-06, "loss": 0.9895, "step": 8618 }, { "epoch": 0.3123844731977819, "grad_norm": 2.202246016865326, "learning_rate": 8.050728919620393e-06, "loss": 0.8868, "step": 8619 }, { "epoch": 0.31242071690043854, "grad_norm": 2.3385922080125705, "learning_rate": 8.050263881634841e-06, "loss": 1.0216, "step": 8620 }, { "epoch": 0.3124569606030952, "grad_norm": 2.492910285489858, "learning_rate": 8.049798801618498e-06, "loss": 0.8278, "step": 8621 }, { "epoch": 0.3124932043057519, "grad_norm": 2.5784125191803406, "learning_rate": 8.04933367957777e-06, "loss": 0.8176, "step": 8622 }, { "epoch": 0.31252944800840854, "grad_norm": 2.380047006766079, "learning_rate": 8.04886851551907e-06, "loss": 0.9716, "step": 8623 }, { "epoch": 0.3125656917110652, "grad_norm": 2.467706821116812, "learning_rate": 8.048403309448804e-06, "loss": 0.8328, "step": 8624 }, { "epoch": 0.3126019354137219, "grad_norm": 2.1399675940550376, "learning_rate": 8.047938061373387e-06, "loss": 0.8486, "step": 8625 }, { "epoch": 0.31263817911637853, "grad_norm": 2.0595630429653697, "learning_rate": 8.047472771299225e-06, "loss": 0.9949, "step": 8626 }, { "epoch": 0.3126744228190352, "grad_norm": 2.3376674406862548, "learning_rate": 8.047007439232731e-06, "loss": 0.955, "step": 8627 }, { "epoch": 0.3127106665216919, "grad_norm": 2.454756389956956, "learning_rate": 8.046542065180318e-06, "loss": 0.9387, "step": 8628 }, { "epoch": 0.31274691022434853, "grad_norm": 2.3569360270248105, "learning_rate": 8.046076649148397e-06, "loss": 0.9555, "step": 8629 }, { "epoch": 0.3127831539270052, "grad_norm": 2.064859784357926, "learning_rate": 8.045611191143382e-06, "loss": 0.8323, "step": 8630 }, { "epoch": 0.3128193976296618, "grad_norm": 2.5675600423073477, "learning_rate": 8.045145691171687e-06, "loss": 1.0311, "step": 8631 }, { "epoch": 0.3128556413323185, "grad_norm": 2.374557756592444, "learning_rate": 8.044680149239726e-06, "loss": 0.8251, "step": 8632 }, { "epoch": 0.31289188503497517, "grad_norm": 2.225480308144682, "learning_rate": 8.044214565353914e-06, "loss": 1.0926, "step": 8633 }, { "epoch": 0.3129281287376318, "grad_norm": 2.117573474106415, "learning_rate": 8.043748939520666e-06, "loss": 0.8162, "step": 8634 }, { "epoch": 0.3129643724402885, "grad_norm": 2.271134496796366, "learning_rate": 8.043283271746401e-06, "loss": 1.0275, "step": 8635 }, { "epoch": 0.31300061614294516, "grad_norm": 2.4387116521436667, "learning_rate": 8.04281756203753e-06, "loss": 0.9748, "step": 8636 }, { "epoch": 0.3130368598456018, "grad_norm": 2.325334400033589, "learning_rate": 8.042351810400475e-06, "loss": 0.7629, "step": 8637 }, { "epoch": 0.3130731035482585, "grad_norm": 2.163726723021845, "learning_rate": 8.04188601684165e-06, "loss": 0.9627, "step": 8638 }, { "epoch": 0.31310934725091516, "grad_norm": 2.4614734899490847, "learning_rate": 8.041420181367476e-06, "loss": 1.0147, "step": 8639 }, { "epoch": 0.3131455909535718, "grad_norm": 2.7029259175505556, "learning_rate": 8.040954303984372e-06, "loss": 0.8749, "step": 8640 }, { "epoch": 0.31318183465622845, "grad_norm": 2.377842891149305, "learning_rate": 8.040488384698757e-06, "loss": 1.1028, "step": 8641 }, { "epoch": 0.31321807835888515, "grad_norm": 2.29463520767979, "learning_rate": 8.040022423517049e-06, "loss": 0.8654, "step": 8642 }, { "epoch": 0.3132543220615418, "grad_norm": 2.4515384333420855, "learning_rate": 8.039556420445672e-06, "loss": 0.8894, "step": 8643 }, { "epoch": 0.31329056576419845, "grad_norm": 2.4586272045624367, "learning_rate": 8.039090375491047e-06, "loss": 1.0287, "step": 8644 }, { "epoch": 0.31332680946685515, "grad_norm": 2.400907197364327, "learning_rate": 8.038624288659593e-06, "loss": 1.0133, "step": 8645 }, { "epoch": 0.3133630531695118, "grad_norm": 2.444760860657762, "learning_rate": 8.038158159957734e-06, "loss": 0.8921, "step": 8646 }, { "epoch": 0.31339929687216844, "grad_norm": 2.3749520223752127, "learning_rate": 8.037691989391894e-06, "loss": 0.9053, "step": 8647 }, { "epoch": 0.31343554057482514, "grad_norm": 2.1706394760118037, "learning_rate": 8.037225776968494e-06, "loss": 0.9431, "step": 8648 }, { "epoch": 0.3134717842774818, "grad_norm": 2.3884837161703083, "learning_rate": 8.03675952269396e-06, "loss": 1.098, "step": 8649 }, { "epoch": 0.31350802798013844, "grad_norm": 2.509243505669457, "learning_rate": 8.036293226574719e-06, "loss": 0.9477, "step": 8650 }, { "epoch": 0.31354427168279514, "grad_norm": 2.373384095367293, "learning_rate": 8.035826888617191e-06, "loss": 0.8483, "step": 8651 }, { "epoch": 0.3135805153854518, "grad_norm": 2.387249252264869, "learning_rate": 8.035360508827806e-06, "loss": 0.8741, "step": 8652 }, { "epoch": 0.31361675908810843, "grad_norm": 2.4011425632983086, "learning_rate": 8.034894087212986e-06, "loss": 1.04, "step": 8653 }, { "epoch": 0.3136530027907651, "grad_norm": 2.6588073070332734, "learning_rate": 8.034427623779163e-06, "loss": 1.0419, "step": 8654 }, { "epoch": 0.3136892464934218, "grad_norm": 2.4999305987689677, "learning_rate": 8.033961118532762e-06, "loss": 1.0242, "step": 8655 }, { "epoch": 0.3137254901960784, "grad_norm": 2.348130930334356, "learning_rate": 8.033494571480213e-06, "loss": 0.8705, "step": 8656 }, { "epoch": 0.3137617338987351, "grad_norm": 2.3387369946515393, "learning_rate": 8.033027982627941e-06, "loss": 0.8902, "step": 8657 }, { "epoch": 0.3137979776013918, "grad_norm": 2.260668706178107, "learning_rate": 8.03256135198238e-06, "loss": 0.9187, "step": 8658 }, { "epoch": 0.3138342213040484, "grad_norm": 2.2919332616919044, "learning_rate": 8.032094679549957e-06, "loss": 0.9454, "step": 8659 }, { "epoch": 0.31387046500670507, "grad_norm": 2.1510983858478006, "learning_rate": 8.031627965337104e-06, "loss": 0.8817, "step": 8660 }, { "epoch": 0.31390670870936177, "grad_norm": 2.107457452858906, "learning_rate": 8.03116120935025e-06, "loss": 0.7767, "step": 8661 }, { "epoch": 0.3139429524120184, "grad_norm": 2.3064402114361924, "learning_rate": 8.030694411595826e-06, "loss": 0.9016, "step": 8662 }, { "epoch": 0.31397919611467506, "grad_norm": 2.2875435062379186, "learning_rate": 8.030227572080268e-06, "loss": 0.9024, "step": 8663 }, { "epoch": 0.31401543981733177, "grad_norm": 2.4002048044188213, "learning_rate": 8.029760690810005e-06, "loss": 0.9006, "step": 8664 }, { "epoch": 0.3140516835199884, "grad_norm": 2.1366299005455067, "learning_rate": 8.029293767791474e-06, "loss": 1.1029, "step": 8665 }, { "epoch": 0.31408792722264506, "grad_norm": 2.377629651656318, "learning_rate": 8.028826803031106e-06, "loss": 1.0162, "step": 8666 }, { "epoch": 0.3141241709253017, "grad_norm": 2.247047145507717, "learning_rate": 8.028359796535337e-06, "loss": 0.8712, "step": 8667 }, { "epoch": 0.3141604146279584, "grad_norm": 2.4350880353620634, "learning_rate": 8.027892748310601e-06, "loss": 1.0306, "step": 8668 }, { "epoch": 0.31419665833061505, "grad_norm": 2.440049202076603, "learning_rate": 8.027425658363335e-06, "loss": 0.9641, "step": 8669 }, { "epoch": 0.3142329020332717, "grad_norm": 2.4571565596218163, "learning_rate": 8.026958526699972e-06, "loss": 0.88, "step": 8670 }, { "epoch": 0.3142691457359284, "grad_norm": 2.185541188562701, "learning_rate": 8.026491353326953e-06, "loss": 0.6138, "step": 8671 }, { "epoch": 0.31430538943858505, "grad_norm": 2.2116081641430076, "learning_rate": 8.026024138250715e-06, "loss": 0.8769, "step": 8672 }, { "epoch": 0.3143416331412417, "grad_norm": 2.2524933647827905, "learning_rate": 8.025556881477692e-06, "loss": 0.9542, "step": 8673 }, { "epoch": 0.3143778768438984, "grad_norm": 2.441119186283057, "learning_rate": 8.025089583014325e-06, "loss": 0.9571, "step": 8674 }, { "epoch": 0.31441412054655504, "grad_norm": 2.5038158086282043, "learning_rate": 8.024622242867054e-06, "loss": 0.9637, "step": 8675 }, { "epoch": 0.3144503642492117, "grad_norm": 2.7457654105804044, "learning_rate": 8.024154861042318e-06, "loss": 1.0865, "step": 8676 }, { "epoch": 0.31448660795186834, "grad_norm": 2.335552513014508, "learning_rate": 8.023687437546556e-06, "loss": 0.9467, "step": 8677 }, { "epoch": 0.31452285165452504, "grad_norm": 2.3635646386263933, "learning_rate": 8.02321997238621e-06, "loss": 1.0081, "step": 8678 }, { "epoch": 0.3145590953571817, "grad_norm": 2.746080793586017, "learning_rate": 8.022752465567722e-06, "loss": 0.9278, "step": 8679 }, { "epoch": 0.31459533905983833, "grad_norm": 2.25127488297886, "learning_rate": 8.022284917097531e-06, "loss": 0.9737, "step": 8680 }, { "epoch": 0.31463158276249503, "grad_norm": 2.2002664021626805, "learning_rate": 8.021817326982083e-06, "loss": 1.0444, "step": 8681 }, { "epoch": 0.3146678264651517, "grad_norm": 2.604666392834839, "learning_rate": 8.02134969522782e-06, "loss": 1.0205, "step": 8682 }, { "epoch": 0.3147040701678083, "grad_norm": 2.447491478363027, "learning_rate": 8.020882021841187e-06, "loss": 0.9103, "step": 8683 }, { "epoch": 0.314740313870465, "grad_norm": 2.3367094880483044, "learning_rate": 8.020414306828624e-06, "loss": 0.9977, "step": 8684 }, { "epoch": 0.3147765575731217, "grad_norm": 2.2588294483406397, "learning_rate": 8.01994655019658e-06, "loss": 1.0814, "step": 8685 }, { "epoch": 0.3148128012757783, "grad_norm": 2.319030248924159, "learning_rate": 8.0194787519515e-06, "loss": 1.0036, "step": 8686 }, { "epoch": 0.314849044978435, "grad_norm": 2.292860038096218, "learning_rate": 8.019010912099826e-06, "loss": 0.969, "step": 8687 }, { "epoch": 0.31488528868109167, "grad_norm": 2.1530400938943504, "learning_rate": 8.018543030648009e-06, "loss": 0.8673, "step": 8688 }, { "epoch": 0.3149215323837483, "grad_norm": 2.530422947423382, "learning_rate": 8.018075107602494e-06, "loss": 1.056, "step": 8689 }, { "epoch": 0.31495777608640496, "grad_norm": 2.567620755864592, "learning_rate": 8.01760714296973e-06, "loss": 0.9822, "step": 8690 }, { "epoch": 0.31499401978906166, "grad_norm": 2.224537717427354, "learning_rate": 8.017139136756164e-06, "loss": 0.8385, "step": 8691 }, { "epoch": 0.3150302634917183, "grad_norm": 2.508531695748444, "learning_rate": 8.016671088968246e-06, "loss": 0.9517, "step": 8692 }, { "epoch": 0.31506650719437496, "grad_norm": 2.2700610539755637, "learning_rate": 8.016202999612426e-06, "loss": 0.8776, "step": 8693 }, { "epoch": 0.31510275089703166, "grad_norm": 2.0363919779075386, "learning_rate": 8.01573486869515e-06, "loss": 0.8156, "step": 8694 }, { "epoch": 0.3151389945996883, "grad_norm": 2.1996642502959967, "learning_rate": 8.015266696222874e-06, "loss": 1.1155, "step": 8695 }, { "epoch": 0.31517523830234495, "grad_norm": 2.4859240761820582, "learning_rate": 8.014798482202045e-06, "loss": 1.1256, "step": 8696 }, { "epoch": 0.31521148200500165, "grad_norm": 2.197397881320509, "learning_rate": 8.014330226639116e-06, "loss": 0.8433, "step": 8697 }, { "epoch": 0.3152477257076583, "grad_norm": 2.17713498745715, "learning_rate": 8.01386192954054e-06, "loss": 0.9121, "step": 8698 }, { "epoch": 0.31528396941031495, "grad_norm": 2.179627029528775, "learning_rate": 8.01339359091277e-06, "loss": 0.949, "step": 8699 }, { "epoch": 0.31532021311297165, "grad_norm": 2.2444473719410487, "learning_rate": 8.012925210762259e-06, "loss": 0.7756, "step": 8700 }, { "epoch": 0.3153564568156283, "grad_norm": 2.782772691804398, "learning_rate": 8.01245678909546e-06, "loss": 0.9545, "step": 8701 }, { "epoch": 0.31539270051828494, "grad_norm": 2.3602490492166464, "learning_rate": 8.011988325918826e-06, "loss": 0.9977, "step": 8702 }, { "epoch": 0.3154289442209416, "grad_norm": 2.532898165569985, "learning_rate": 8.011519821238818e-06, "loss": 0.8118, "step": 8703 }, { "epoch": 0.3154651879235983, "grad_norm": 2.1090126482187936, "learning_rate": 8.011051275061887e-06, "loss": 0.9859, "step": 8704 }, { "epoch": 0.31550143162625494, "grad_norm": 2.331267043194, "learning_rate": 8.01058268739449e-06, "loss": 1.0369, "step": 8705 }, { "epoch": 0.3155376753289116, "grad_norm": 2.142322291292717, "learning_rate": 8.010114058243085e-06, "loss": 0.9128, "step": 8706 }, { "epoch": 0.3155739190315683, "grad_norm": 2.1335046560970343, "learning_rate": 8.009645387614128e-06, "loss": 1.1256, "step": 8707 }, { "epoch": 0.31561016273422493, "grad_norm": 2.422402911269474, "learning_rate": 8.009176675514078e-06, "loss": 0.9439, "step": 8708 }, { "epoch": 0.3156464064368816, "grad_norm": 2.2614505654313577, "learning_rate": 8.008707921949394e-06, "loss": 0.9486, "step": 8709 }, { "epoch": 0.3156826501395383, "grad_norm": 2.2242455201456397, "learning_rate": 8.008239126926534e-06, "loss": 0.874, "step": 8710 }, { "epoch": 0.3157188938421949, "grad_norm": 2.1564636595628284, "learning_rate": 8.007770290451958e-06, "loss": 0.8944, "step": 8711 }, { "epoch": 0.31575513754485157, "grad_norm": 2.501065047956044, "learning_rate": 8.007301412532126e-06, "loss": 0.8149, "step": 8712 }, { "epoch": 0.3157913812475082, "grad_norm": 2.2845166428473433, "learning_rate": 8.0068324931735e-06, "loss": 0.8921, "step": 8713 }, { "epoch": 0.3158276249501649, "grad_norm": 2.677692746723845, "learning_rate": 8.006363532382539e-06, "loss": 0.88, "step": 8714 }, { "epoch": 0.31586386865282157, "grad_norm": 2.164780109893817, "learning_rate": 8.005894530165709e-06, "loss": 0.9131, "step": 8715 }, { "epoch": 0.3159001123554782, "grad_norm": 2.364896267666729, "learning_rate": 8.00542548652947e-06, "loss": 1.0097, "step": 8716 }, { "epoch": 0.3159363560581349, "grad_norm": 2.289430055238462, "learning_rate": 8.004956401480283e-06, "loss": 0.9537, "step": 8717 }, { "epoch": 0.31597259976079156, "grad_norm": 2.313930857809746, "learning_rate": 8.004487275024617e-06, "loss": 0.7864, "step": 8718 }, { "epoch": 0.3160088434634482, "grad_norm": 2.2984582978043284, "learning_rate": 8.004018107168932e-06, "loss": 0.9497, "step": 8719 }, { "epoch": 0.3160450871661049, "grad_norm": 2.776528525722645, "learning_rate": 8.003548897919693e-06, "loss": 0.9888, "step": 8720 }, { "epoch": 0.31608133086876156, "grad_norm": 2.421113950829157, "learning_rate": 8.00307964728337e-06, "loss": 0.9412, "step": 8721 }, { "epoch": 0.3161175745714182, "grad_norm": 2.4390157488021718, "learning_rate": 8.002610355266424e-06, "loss": 0.9791, "step": 8722 }, { "epoch": 0.3161538182740749, "grad_norm": 2.2048016675462687, "learning_rate": 8.002141021875322e-06, "loss": 0.8794, "step": 8723 }, { "epoch": 0.31619006197673155, "grad_norm": 2.3426880222580277, "learning_rate": 8.001671647116533e-06, "loss": 0.7698, "step": 8724 }, { "epoch": 0.3162263056793882, "grad_norm": 2.2383667186630114, "learning_rate": 8.001202230996526e-06, "loss": 0.8241, "step": 8725 }, { "epoch": 0.31626254938204484, "grad_norm": 2.454715686787644, "learning_rate": 8.000732773521767e-06, "loss": 0.9547, "step": 8726 }, { "epoch": 0.31629879308470155, "grad_norm": 2.3549135389725038, "learning_rate": 8.000263274698723e-06, "loss": 0.9552, "step": 8727 }, { "epoch": 0.3163350367873582, "grad_norm": 2.4002259043112084, "learning_rate": 7.999793734533867e-06, "loss": 1.0645, "step": 8728 }, { "epoch": 0.31637128049001484, "grad_norm": 2.2480773619925634, "learning_rate": 7.999324153033667e-06, "loss": 1.065, "step": 8729 }, { "epoch": 0.31640752419267154, "grad_norm": 2.3749205736998364, "learning_rate": 7.998854530204595e-06, "loss": 1.054, "step": 8730 }, { "epoch": 0.3164437678953282, "grad_norm": 2.3087044662755933, "learning_rate": 7.998384866053119e-06, "loss": 0.8429, "step": 8731 }, { "epoch": 0.31648001159798483, "grad_norm": 2.52676970236271, "learning_rate": 7.997915160585715e-06, "loss": 1.0009, "step": 8732 }, { "epoch": 0.31651625530064154, "grad_norm": 2.400186300793724, "learning_rate": 7.997445413808853e-06, "loss": 1.097, "step": 8733 }, { "epoch": 0.3165524990032982, "grad_norm": 2.571250311973213, "learning_rate": 7.996975625729005e-06, "loss": 1.0242, "step": 8734 }, { "epoch": 0.31658874270595483, "grad_norm": 2.2833266803788272, "learning_rate": 7.996505796352648e-06, "loss": 0.7897, "step": 8735 }, { "epoch": 0.31662498640861153, "grad_norm": 2.3272751929731608, "learning_rate": 7.99603592568625e-06, "loss": 0.7338, "step": 8736 }, { "epoch": 0.3166612301112682, "grad_norm": 2.066320760670741, "learning_rate": 7.995566013736291e-06, "loss": 0.9123, "step": 8737 }, { "epoch": 0.3166974738139248, "grad_norm": 2.5359938617763444, "learning_rate": 7.995096060509242e-06, "loss": 0.9465, "step": 8738 }, { "epoch": 0.31673371751658147, "grad_norm": 2.45017871975544, "learning_rate": 7.994626066011582e-06, "loss": 1.1473, "step": 8739 }, { "epoch": 0.31676996121923817, "grad_norm": 2.3380380002398864, "learning_rate": 7.994156030249786e-06, "loss": 1.0101, "step": 8740 }, { "epoch": 0.3168062049218948, "grad_norm": 2.242346109571506, "learning_rate": 7.99368595323033e-06, "loss": 0.8071, "step": 8741 }, { "epoch": 0.31684244862455146, "grad_norm": 2.256630211253461, "learning_rate": 7.993215834959693e-06, "loss": 0.9563, "step": 8742 }, { "epoch": 0.31687869232720817, "grad_norm": 2.3338340260859582, "learning_rate": 7.992745675444352e-06, "loss": 0.8496, "step": 8743 }, { "epoch": 0.3169149360298648, "grad_norm": 2.581580730490734, "learning_rate": 7.992275474690785e-06, "loss": 0.8446, "step": 8744 }, { "epoch": 0.31695117973252146, "grad_norm": 2.4296434789636923, "learning_rate": 7.991805232705472e-06, "loss": 1.0841, "step": 8745 }, { "epoch": 0.31698742343517816, "grad_norm": 2.2419193585220154, "learning_rate": 7.991334949494891e-06, "loss": 0.972, "step": 8746 }, { "epoch": 0.3170236671378348, "grad_norm": 2.3383779893779684, "learning_rate": 7.990864625065527e-06, "loss": 0.877, "step": 8747 }, { "epoch": 0.31705991084049145, "grad_norm": 2.3941582181200043, "learning_rate": 7.990394259423853e-06, "loss": 0.9939, "step": 8748 }, { "epoch": 0.3170961545431481, "grad_norm": 2.3111948932377744, "learning_rate": 7.989923852576357e-06, "loss": 1.0843, "step": 8749 }, { "epoch": 0.3171323982458048, "grad_norm": 2.3278664813387837, "learning_rate": 7.989453404529517e-06, "loss": 0.7166, "step": 8750 }, { "epoch": 0.31716864194846145, "grad_norm": 2.0070587362842445, "learning_rate": 7.988982915289818e-06, "loss": 0.9519, "step": 8751 }, { "epoch": 0.3172048856511181, "grad_norm": 2.176752443757455, "learning_rate": 7.988512384863742e-06, "loss": 0.9106, "step": 8752 }, { "epoch": 0.3172411293537748, "grad_norm": 2.1342242034914922, "learning_rate": 7.988041813257774e-06, "loss": 0.8305, "step": 8753 }, { "epoch": 0.31727737305643144, "grad_norm": 2.617884283610296, "learning_rate": 7.987571200478396e-06, "loss": 0.9787, "step": 8754 }, { "epoch": 0.3173136167590881, "grad_norm": 2.173600779931889, "learning_rate": 7.987100546532094e-06, "loss": 0.9187, "step": 8755 }, { "epoch": 0.3173498604617448, "grad_norm": 2.269089097145918, "learning_rate": 7.986629851425352e-06, "loss": 0.946, "step": 8756 }, { "epoch": 0.31738610416440144, "grad_norm": 2.312620064379225, "learning_rate": 7.986159115164657e-06, "loss": 0.9887, "step": 8757 }, { "epoch": 0.3174223478670581, "grad_norm": 2.3982931608822504, "learning_rate": 7.985688337756496e-06, "loss": 0.8987, "step": 8758 }, { "epoch": 0.3174585915697148, "grad_norm": 2.4208200654318013, "learning_rate": 7.985217519207355e-06, "loss": 1.0087, "step": 8759 }, { "epoch": 0.31749483527237143, "grad_norm": 2.288437107764799, "learning_rate": 7.984746659523723e-06, "loss": 0.9083, "step": 8760 }, { "epoch": 0.3175310789750281, "grad_norm": 2.100255317345497, "learning_rate": 7.984275758712086e-06, "loss": 0.8737, "step": 8761 }, { "epoch": 0.3175673226776847, "grad_norm": 2.570141944326943, "learning_rate": 7.983804816778935e-06, "loss": 1.0217, "step": 8762 }, { "epoch": 0.31760356638034143, "grad_norm": 2.306851573025226, "learning_rate": 7.983333833730758e-06, "loss": 0.8862, "step": 8763 }, { "epoch": 0.3176398100829981, "grad_norm": 2.3421472124915588, "learning_rate": 7.982862809574044e-06, "loss": 0.9243, "step": 8764 }, { "epoch": 0.3176760537856547, "grad_norm": 2.3170332722033895, "learning_rate": 7.982391744315285e-06, "loss": 0.9852, "step": 8765 }, { "epoch": 0.3177122974883114, "grad_norm": 1.9811264048947814, "learning_rate": 7.98192063796097e-06, "loss": 0.8007, "step": 8766 }, { "epoch": 0.31774854119096807, "grad_norm": 2.156651569425908, "learning_rate": 7.981449490517595e-06, "loss": 0.7839, "step": 8767 }, { "epoch": 0.3177847848936247, "grad_norm": 2.406223875773221, "learning_rate": 7.980978301991647e-06, "loss": 0.9326, "step": 8768 }, { "epoch": 0.3178210285962814, "grad_norm": 2.164022151803251, "learning_rate": 7.980507072389621e-06, "loss": 1.0999, "step": 8769 }, { "epoch": 0.31785727229893807, "grad_norm": 2.1340668817484123, "learning_rate": 7.980035801718008e-06, "loss": 0.8209, "step": 8770 }, { "epoch": 0.3178935160015947, "grad_norm": 2.4457389162248253, "learning_rate": 7.979564489983309e-06, "loss": 1.1123, "step": 8771 }, { "epoch": 0.3179297597042514, "grad_norm": 2.627158569181317, "learning_rate": 7.97909313719201e-06, "loss": 0.8639, "step": 8772 }, { "epoch": 0.31796600340690806, "grad_norm": 2.2517105969283127, "learning_rate": 7.97862174335061e-06, "loss": 0.9999, "step": 8773 }, { "epoch": 0.3180022471095647, "grad_norm": 2.513173899578701, "learning_rate": 7.978150308465604e-06, "loss": 0.9743, "step": 8774 }, { "epoch": 0.31803849081222135, "grad_norm": 2.213504326140319, "learning_rate": 7.977678832543488e-06, "loss": 0.8936, "step": 8775 }, { "epoch": 0.31807473451487805, "grad_norm": 2.6035331837379094, "learning_rate": 7.977207315590759e-06, "loss": 0.9113, "step": 8776 }, { "epoch": 0.3181109782175347, "grad_norm": 2.556742188311483, "learning_rate": 7.976735757613912e-06, "loss": 1.0144, "step": 8777 }, { "epoch": 0.31814722192019135, "grad_norm": 2.2181602263372353, "learning_rate": 7.976264158619447e-06, "loss": 0.9417, "step": 8778 }, { "epoch": 0.31818346562284805, "grad_norm": 2.2341522227178925, "learning_rate": 7.975792518613863e-06, "loss": 0.9912, "step": 8779 }, { "epoch": 0.3182197093255047, "grad_norm": 2.686671317967984, "learning_rate": 7.975320837603657e-06, "loss": 0.8521, "step": 8780 }, { "epoch": 0.31825595302816134, "grad_norm": 2.2931589975412208, "learning_rate": 7.97484911559533e-06, "loss": 1.1787, "step": 8781 }, { "epoch": 0.31829219673081804, "grad_norm": 2.513268033134292, "learning_rate": 7.974377352595379e-06, "loss": 1.1019, "step": 8782 }, { "epoch": 0.3183284404334747, "grad_norm": 2.2120575150090946, "learning_rate": 7.97390554861031e-06, "loss": 0.8581, "step": 8783 }, { "epoch": 0.31836468413613134, "grad_norm": 2.195869838804383, "learning_rate": 7.973433703646618e-06, "loss": 0.9905, "step": 8784 }, { "epoch": 0.318400927838788, "grad_norm": 2.9359084130419926, "learning_rate": 7.972961817710811e-06, "loss": 0.8983, "step": 8785 }, { "epoch": 0.3184371715414447, "grad_norm": 2.181339729397003, "learning_rate": 7.972489890809386e-06, "loss": 0.911, "step": 8786 }, { "epoch": 0.31847341524410133, "grad_norm": 2.481253706717191, "learning_rate": 7.972017922948849e-06, "loss": 1.0712, "step": 8787 }, { "epoch": 0.318509658946758, "grad_norm": 2.534374578060171, "learning_rate": 7.971545914135702e-06, "loss": 1.0891, "step": 8788 }, { "epoch": 0.3185459026494147, "grad_norm": 1.9992657521557413, "learning_rate": 7.97107386437645e-06, "loss": 0.9301, "step": 8789 }, { "epoch": 0.3185821463520713, "grad_norm": 2.2628842537029707, "learning_rate": 7.970601773677596e-06, "loss": 0.9667, "step": 8790 }, { "epoch": 0.318618390054728, "grad_norm": 2.522136113319736, "learning_rate": 7.970129642045647e-06, "loss": 0.9406, "step": 8791 }, { "epoch": 0.3186546337573847, "grad_norm": 2.4206059230923143, "learning_rate": 7.969657469487107e-06, "loss": 1.1698, "step": 8792 }, { "epoch": 0.3186908774600413, "grad_norm": 2.2044130824610253, "learning_rate": 7.969185256008485e-06, "loss": 0.9077, "step": 8793 }, { "epoch": 0.31872712116269797, "grad_norm": 2.124123568522395, "learning_rate": 7.968713001616284e-06, "loss": 0.8349, "step": 8794 }, { "epoch": 0.31876336486535467, "grad_norm": 2.6023635111133476, "learning_rate": 7.968240706317013e-06, "loss": 1.0481, "step": 8795 }, { "epoch": 0.3187996085680113, "grad_norm": 2.5391106334645195, "learning_rate": 7.967768370117182e-06, "loss": 0.917, "step": 8796 }, { "epoch": 0.31883585227066796, "grad_norm": 2.275475251233869, "learning_rate": 7.967295993023296e-06, "loss": 0.9938, "step": 8797 }, { "epoch": 0.3188720959733246, "grad_norm": 2.4812533315775336, "learning_rate": 7.966823575041866e-06, "loss": 0.9272, "step": 8798 }, { "epoch": 0.3189083396759813, "grad_norm": 2.4135102763976213, "learning_rate": 7.966351116179403e-06, "loss": 1.1179, "step": 8799 }, { "epoch": 0.31894458337863796, "grad_norm": 2.478770391749914, "learning_rate": 7.965878616442413e-06, "loss": 1.1018, "step": 8800 }, { "epoch": 0.3189808270812946, "grad_norm": 2.712417617648934, "learning_rate": 7.96540607583741e-06, "loss": 0.9422, "step": 8801 }, { "epoch": 0.3190170707839513, "grad_norm": 2.544640011529107, "learning_rate": 7.964933494370906e-06, "loss": 0.9643, "step": 8802 }, { "epoch": 0.31905331448660795, "grad_norm": 2.5341056440271137, "learning_rate": 7.964460872049412e-06, "loss": 0.9309, "step": 8803 }, { "epoch": 0.3190895581892646, "grad_norm": 2.541137239853453, "learning_rate": 7.963988208879439e-06, "loss": 1.0003, "step": 8804 }, { "epoch": 0.3191258018919213, "grad_norm": 2.4492642558790614, "learning_rate": 7.9635155048675e-06, "loss": 0.8178, "step": 8805 }, { "epoch": 0.31916204559457795, "grad_norm": 2.4464469417509336, "learning_rate": 7.963042760020112e-06, "loss": 0.9176, "step": 8806 }, { "epoch": 0.3191982892972346, "grad_norm": 2.362750289726953, "learning_rate": 7.962569974343784e-06, "loss": 1.0222, "step": 8807 }, { "epoch": 0.3192345329998913, "grad_norm": 2.1319756520601327, "learning_rate": 7.962097147845037e-06, "loss": 0.7932, "step": 8808 }, { "epoch": 0.31927077670254794, "grad_norm": 2.2778968986718353, "learning_rate": 7.96162428053038e-06, "loss": 0.9682, "step": 8809 }, { "epoch": 0.3193070204052046, "grad_norm": 2.1457194453040636, "learning_rate": 7.961151372406333e-06, "loss": 0.6889, "step": 8810 }, { "epoch": 0.31934326410786124, "grad_norm": 2.4456331321764004, "learning_rate": 7.960678423479409e-06, "loss": 1.1113, "step": 8811 }, { "epoch": 0.31937950781051794, "grad_norm": 2.365388693998781, "learning_rate": 7.960205433756129e-06, "loss": 1.0781, "step": 8812 }, { "epoch": 0.3194157515131746, "grad_norm": 2.286976215159204, "learning_rate": 7.959732403243006e-06, "loss": 0.8429, "step": 8813 }, { "epoch": 0.31945199521583123, "grad_norm": 2.661136187996904, "learning_rate": 7.959259331946562e-06, "loss": 0.8802, "step": 8814 }, { "epoch": 0.31948823891848793, "grad_norm": 2.3613246382322703, "learning_rate": 7.958786219873315e-06, "loss": 0.9745, "step": 8815 }, { "epoch": 0.3195244826211446, "grad_norm": 2.4981163920591554, "learning_rate": 7.958313067029781e-06, "loss": 0.9835, "step": 8816 }, { "epoch": 0.3195607263238012, "grad_norm": 2.4314718757418285, "learning_rate": 7.957839873422484e-06, "loss": 0.8143, "step": 8817 }, { "epoch": 0.3195969700264579, "grad_norm": 2.3645264302239037, "learning_rate": 7.957366639057942e-06, "loss": 0.9957, "step": 8818 }, { "epoch": 0.3196332137291146, "grad_norm": 2.3947498870437696, "learning_rate": 7.956893363942677e-06, "loss": 1.0626, "step": 8819 }, { "epoch": 0.3196694574317712, "grad_norm": 2.0131605392630356, "learning_rate": 7.95642004808321e-06, "loss": 0.8454, "step": 8820 }, { "epoch": 0.31970570113442787, "grad_norm": 2.3430017018821356, "learning_rate": 7.95594669148606e-06, "loss": 1.0391, "step": 8821 }, { "epoch": 0.31974194483708457, "grad_norm": 2.3977987346037906, "learning_rate": 7.955473294157754e-06, "loss": 0.922, "step": 8822 }, { "epoch": 0.3197781885397412, "grad_norm": 2.35776558473058, "learning_rate": 7.954999856104812e-06, "loss": 0.9738, "step": 8823 }, { "epoch": 0.31981443224239786, "grad_norm": 2.1752307889578955, "learning_rate": 7.954526377333762e-06, "loss": 0.8236, "step": 8824 }, { "epoch": 0.31985067594505456, "grad_norm": 2.361360976459065, "learning_rate": 7.954052857851123e-06, "loss": 0.8665, "step": 8825 }, { "epoch": 0.3198869196477112, "grad_norm": 2.2690486892147366, "learning_rate": 7.953579297663423e-06, "loss": 0.9473, "step": 8826 }, { "epoch": 0.31992316335036786, "grad_norm": 2.1584958647452126, "learning_rate": 7.953105696777185e-06, "loss": 0.9941, "step": 8827 }, { "epoch": 0.31995940705302456, "grad_norm": 2.2467123299654963, "learning_rate": 7.952632055198938e-06, "loss": 1.0276, "step": 8828 }, { "epoch": 0.3199956507556812, "grad_norm": 2.473637782538788, "learning_rate": 7.952158372935208e-06, "loss": 1.1852, "step": 8829 }, { "epoch": 0.32003189445833785, "grad_norm": 2.1926583494660905, "learning_rate": 7.95168464999252e-06, "loss": 0.9753, "step": 8830 }, { "epoch": 0.32006813816099455, "grad_norm": 2.5663028196271553, "learning_rate": 7.951210886377402e-06, "loss": 0.8813, "step": 8831 }, { "epoch": 0.3201043818636512, "grad_norm": 2.119030438498669, "learning_rate": 7.950737082096384e-06, "loss": 0.871, "step": 8832 }, { "epoch": 0.32014062556630785, "grad_norm": 2.3148451430429704, "learning_rate": 7.950263237155992e-06, "loss": 0.8392, "step": 8833 }, { "epoch": 0.3201768692689645, "grad_norm": 2.4036469931702644, "learning_rate": 7.949789351562759e-06, "loss": 0.9335, "step": 8834 }, { "epoch": 0.3202131129716212, "grad_norm": 2.45293973503113, "learning_rate": 7.949315425323211e-06, "loss": 0.8854, "step": 8835 }, { "epoch": 0.32024935667427784, "grad_norm": 2.1437370289960964, "learning_rate": 7.94884145844388e-06, "loss": 0.8807, "step": 8836 }, { "epoch": 0.3202856003769345, "grad_norm": 2.1459795218281714, "learning_rate": 7.948367450931298e-06, "loss": 0.87, "step": 8837 }, { "epoch": 0.3203218440795912, "grad_norm": 2.407500594788792, "learning_rate": 7.947893402791995e-06, "loss": 1.0768, "step": 8838 }, { "epoch": 0.32035808778224784, "grad_norm": 2.1581353714653653, "learning_rate": 7.947419314032505e-06, "loss": 1.0966, "step": 8839 }, { "epoch": 0.3203943314849045, "grad_norm": 2.2626700664206876, "learning_rate": 7.946945184659359e-06, "loss": 0.8572, "step": 8840 }, { "epoch": 0.3204305751875612, "grad_norm": 2.5891888686611093, "learning_rate": 7.94647101467909e-06, "loss": 0.938, "step": 8841 }, { "epoch": 0.32046681889021783, "grad_norm": 2.547993545810855, "learning_rate": 7.945996804098233e-06, "loss": 0.9272, "step": 8842 }, { "epoch": 0.3205030625928745, "grad_norm": 2.263081486916295, "learning_rate": 7.94552255292332e-06, "loss": 0.9139, "step": 8843 }, { "epoch": 0.3205393062955312, "grad_norm": 2.407570578306059, "learning_rate": 7.945048261160889e-06, "loss": 1.0059, "step": 8844 }, { "epoch": 0.3205755499981878, "grad_norm": 2.209226710948751, "learning_rate": 7.944573928817474e-06, "loss": 0.9159, "step": 8845 }, { "epoch": 0.32061179370084447, "grad_norm": 2.2497032231448473, "learning_rate": 7.944099555899612e-06, "loss": 0.8664, "step": 8846 }, { "epoch": 0.3206480374035011, "grad_norm": 2.3118390276855267, "learning_rate": 7.943625142413836e-06, "loss": 0.9138, "step": 8847 }, { "epoch": 0.3206842811061578, "grad_norm": 2.3842195781402546, "learning_rate": 7.943150688366687e-06, "loss": 0.9432, "step": 8848 }, { "epoch": 0.32072052480881447, "grad_norm": 2.2678775149474384, "learning_rate": 7.942676193764703e-06, "loss": 0.8613, "step": 8849 }, { "epoch": 0.3207567685114711, "grad_norm": 2.303648716726899, "learning_rate": 7.94220165861442e-06, "loss": 1.1469, "step": 8850 }, { "epoch": 0.3207930122141278, "grad_norm": 2.1592813833845876, "learning_rate": 7.941727082922377e-06, "loss": 0.8857, "step": 8851 }, { "epoch": 0.32082925591678446, "grad_norm": 2.5423576517715794, "learning_rate": 7.941252466695115e-06, "loss": 1.0445, "step": 8852 }, { "epoch": 0.3208654996194411, "grad_norm": 2.353795190231734, "learning_rate": 7.940777809939171e-06, "loss": 0.9548, "step": 8853 }, { "epoch": 0.3209017433220978, "grad_norm": 2.2189260117598684, "learning_rate": 7.940303112661087e-06, "loss": 1.0683, "step": 8854 }, { "epoch": 0.32093798702475446, "grad_norm": 2.499647775486145, "learning_rate": 7.939828374867407e-06, "loss": 1.1031, "step": 8855 }, { "epoch": 0.3209742307274111, "grad_norm": 2.2230022208841462, "learning_rate": 7.939353596564668e-06, "loss": 0.775, "step": 8856 }, { "epoch": 0.32101047443006775, "grad_norm": 2.503802135404935, "learning_rate": 7.938878777759414e-06, "loss": 0.9702, "step": 8857 }, { "epoch": 0.32104671813272445, "grad_norm": 2.0752070657474486, "learning_rate": 7.938403918458189e-06, "loss": 0.7769, "step": 8858 }, { "epoch": 0.3210829618353811, "grad_norm": 2.6447671311408394, "learning_rate": 7.937929018667534e-06, "loss": 0.9458, "step": 8859 }, { "epoch": 0.32111920553803774, "grad_norm": 2.173645650727518, "learning_rate": 7.937454078393996e-06, "loss": 1.0281, "step": 8860 }, { "epoch": 0.32115544924069445, "grad_norm": 2.1154445085235127, "learning_rate": 7.936979097644115e-06, "loss": 0.9414, "step": 8861 }, { "epoch": 0.3211916929433511, "grad_norm": 2.381663754150276, "learning_rate": 7.936504076424439e-06, "loss": 1.0336, "step": 8862 }, { "epoch": 0.32122793664600774, "grad_norm": 2.2813136274676986, "learning_rate": 7.936029014741512e-06, "loss": 0.9904, "step": 8863 }, { "epoch": 0.32126418034866444, "grad_norm": 2.4301865845918362, "learning_rate": 7.935553912601882e-06, "loss": 0.7921, "step": 8864 }, { "epoch": 0.3213004240513211, "grad_norm": 2.0410869091823325, "learning_rate": 7.935078770012093e-06, "loss": 0.8781, "step": 8865 }, { "epoch": 0.32133666775397773, "grad_norm": 2.3709029778074306, "learning_rate": 7.934603586978696e-06, "loss": 0.869, "step": 8866 }, { "epoch": 0.32137291145663444, "grad_norm": 2.3518734773147894, "learning_rate": 7.934128363508234e-06, "loss": 1.1256, "step": 8867 }, { "epoch": 0.3214091551592911, "grad_norm": 2.170696135447886, "learning_rate": 7.93365309960726e-06, "loss": 0.8609, "step": 8868 }, { "epoch": 0.32144539886194773, "grad_norm": 2.603933312863148, "learning_rate": 7.933177795282318e-06, "loss": 0.9376, "step": 8869 }, { "epoch": 0.3214816425646044, "grad_norm": 2.115558421007826, "learning_rate": 7.932702450539962e-06, "loss": 0.7384, "step": 8870 }, { "epoch": 0.3215178862672611, "grad_norm": 2.1742751910282125, "learning_rate": 7.932227065386738e-06, "loss": 0.9071, "step": 8871 }, { "epoch": 0.3215541299699177, "grad_norm": 2.2280093095886087, "learning_rate": 7.9317516398292e-06, "loss": 0.8374, "step": 8872 }, { "epoch": 0.32159037367257437, "grad_norm": 2.2700330102195836, "learning_rate": 7.931276173873896e-06, "loss": 0.8831, "step": 8873 }, { "epoch": 0.32162661737523107, "grad_norm": 2.2583184044422415, "learning_rate": 7.930800667527381e-06, "loss": 0.8381, "step": 8874 }, { "epoch": 0.3216628610778877, "grad_norm": 2.142630547536038, "learning_rate": 7.930325120796205e-06, "loss": 0.7537, "step": 8875 }, { "epoch": 0.32169910478054436, "grad_norm": 2.432034865182461, "learning_rate": 7.929849533686919e-06, "loss": 0.8208, "step": 8876 }, { "epoch": 0.32173534848320107, "grad_norm": 2.3806468803271175, "learning_rate": 7.92937390620608e-06, "loss": 1.1048, "step": 8877 }, { "epoch": 0.3217715921858577, "grad_norm": 2.3139394455343725, "learning_rate": 7.92889823836024e-06, "loss": 0.9438, "step": 8878 }, { "epoch": 0.32180783588851436, "grad_norm": 2.183655571373543, "learning_rate": 7.928422530155952e-06, "loss": 1.0182, "step": 8879 }, { "epoch": 0.32184407959117106, "grad_norm": 2.589797181609219, "learning_rate": 7.927946781599776e-06, "loss": 0.9903, "step": 8880 }, { "epoch": 0.3218803232938277, "grad_norm": 2.307286801050193, "learning_rate": 7.927470992698261e-06, "loss": 0.8459, "step": 8881 }, { "epoch": 0.32191656699648435, "grad_norm": 2.5584649792497682, "learning_rate": 7.926995163457967e-06, "loss": 0.8926, "step": 8882 }, { "epoch": 0.321952810699141, "grad_norm": 2.2989198569763634, "learning_rate": 7.92651929388545e-06, "loss": 1.0008, "step": 8883 }, { "epoch": 0.3219890544017977, "grad_norm": 2.449508092524118, "learning_rate": 7.926043383987268e-06, "loss": 0.8702, "step": 8884 }, { "epoch": 0.32202529810445435, "grad_norm": 2.2453871404788104, "learning_rate": 7.925567433769976e-06, "loss": 0.8977, "step": 8885 }, { "epoch": 0.322061541807111, "grad_norm": 2.529744719832343, "learning_rate": 7.925091443240136e-06, "loss": 0.9119, "step": 8886 }, { "epoch": 0.3220977855097677, "grad_norm": 2.225516483232636, "learning_rate": 7.924615412404304e-06, "loss": 0.8712, "step": 8887 }, { "epoch": 0.32213402921242434, "grad_norm": 2.3109987379726844, "learning_rate": 7.92413934126904e-06, "loss": 0.9923, "step": 8888 }, { "epoch": 0.322170272915081, "grad_norm": 2.7492528828967746, "learning_rate": 7.923663229840905e-06, "loss": 1.0091, "step": 8889 }, { "epoch": 0.3222065166177377, "grad_norm": 2.3042981608636457, "learning_rate": 7.923187078126457e-06, "loss": 1.0453, "step": 8890 }, { "epoch": 0.32224276032039434, "grad_norm": 2.3496833239725183, "learning_rate": 7.922710886132263e-06, "loss": 1.0407, "step": 8891 }, { "epoch": 0.322279004023051, "grad_norm": 2.3478204039212636, "learning_rate": 7.922234653864878e-06, "loss": 1.038, "step": 8892 }, { "epoch": 0.32231524772570763, "grad_norm": 2.222205992793298, "learning_rate": 7.921758381330866e-06, "loss": 0.984, "step": 8893 }, { "epoch": 0.32235149142836433, "grad_norm": 2.3170431966689624, "learning_rate": 7.92128206853679e-06, "loss": 0.8716, "step": 8894 }, { "epoch": 0.322387735131021, "grad_norm": 2.5133242854560445, "learning_rate": 7.920805715489218e-06, "loss": 1.1415, "step": 8895 }, { "epoch": 0.3224239788336776, "grad_norm": 2.154487801877355, "learning_rate": 7.920329322194706e-06, "loss": 0.9719, "step": 8896 }, { "epoch": 0.32246022253633433, "grad_norm": 2.4318114022295667, "learning_rate": 7.919852888659823e-06, "loss": 1.0155, "step": 8897 }, { "epoch": 0.322496466238991, "grad_norm": 2.5611293557080606, "learning_rate": 7.919376414891134e-06, "loss": 1.0808, "step": 8898 }, { "epoch": 0.3225327099416476, "grad_norm": 2.353495941579928, "learning_rate": 7.918899900895202e-06, "loss": 1.0076, "step": 8899 }, { "epoch": 0.3225689536443043, "grad_norm": 2.4708090880666784, "learning_rate": 7.918423346678595e-06, "loss": 0.8946, "step": 8900 }, { "epoch": 0.32260519734696097, "grad_norm": 2.2275124227527834, "learning_rate": 7.91794675224788e-06, "loss": 1.1113, "step": 8901 }, { "epoch": 0.3226414410496176, "grad_norm": 2.1985977383141795, "learning_rate": 7.917470117609623e-06, "loss": 0.8849, "step": 8902 }, { "epoch": 0.3226776847522743, "grad_norm": 2.125985130771081, "learning_rate": 7.916993442770393e-06, "loss": 0.9869, "step": 8903 }, { "epoch": 0.32271392845493097, "grad_norm": 2.2137092321604603, "learning_rate": 7.916516727736756e-06, "loss": 0.8372, "step": 8904 }, { "epoch": 0.3227501721575876, "grad_norm": 2.226512081959514, "learning_rate": 7.916039972515283e-06, "loss": 0.8315, "step": 8905 }, { "epoch": 0.32278641586024426, "grad_norm": 2.525410331205068, "learning_rate": 7.915563177112543e-06, "loss": 0.9455, "step": 8906 }, { "epoch": 0.32282265956290096, "grad_norm": 2.318552654813398, "learning_rate": 7.915086341535106e-06, "loss": 0.8521, "step": 8907 }, { "epoch": 0.3228589032655576, "grad_norm": 2.8456925706079392, "learning_rate": 7.914609465789542e-06, "loss": 0.9499, "step": 8908 }, { "epoch": 0.32289514696821425, "grad_norm": 2.7028129200568247, "learning_rate": 7.914132549882422e-06, "loss": 0.9237, "step": 8909 }, { "epoch": 0.32293139067087095, "grad_norm": 2.1398575834196167, "learning_rate": 7.913655593820317e-06, "loss": 0.9487, "step": 8910 }, { "epoch": 0.3229676343735276, "grad_norm": 2.457582958717304, "learning_rate": 7.913178597609802e-06, "loss": 0.9206, "step": 8911 }, { "epoch": 0.32300387807618425, "grad_norm": 2.284452678529502, "learning_rate": 7.912701561257447e-06, "loss": 0.8617, "step": 8912 }, { "epoch": 0.32304012177884095, "grad_norm": 2.3149619702362565, "learning_rate": 7.912224484769826e-06, "loss": 1.0433, "step": 8913 }, { "epoch": 0.3230763654814976, "grad_norm": 2.3439107021489347, "learning_rate": 7.911747368153512e-06, "loss": 0.8774, "step": 8914 }, { "epoch": 0.32311260918415424, "grad_norm": 2.4329982650261184, "learning_rate": 7.911270211415081e-06, "loss": 0.8542, "step": 8915 }, { "epoch": 0.3231488528868109, "grad_norm": 2.1440655889523037, "learning_rate": 7.910793014561106e-06, "loss": 0.7628, "step": 8916 }, { "epoch": 0.3231850965894676, "grad_norm": 2.63536683462628, "learning_rate": 7.910315777598166e-06, "loss": 0.8838, "step": 8917 }, { "epoch": 0.32322134029212424, "grad_norm": 2.0517542473270307, "learning_rate": 7.909838500532833e-06, "loss": 0.941, "step": 8918 }, { "epoch": 0.3232575839947809, "grad_norm": 2.3328331706343337, "learning_rate": 7.909361183371685e-06, "loss": 0.8614, "step": 8919 }, { "epoch": 0.3232938276974376, "grad_norm": 2.2838162397360127, "learning_rate": 7.908883826121301e-06, "loss": 0.8238, "step": 8920 }, { "epoch": 0.32333007140009423, "grad_norm": 2.2159826058435153, "learning_rate": 7.908406428788256e-06, "loss": 1.1304, "step": 8921 }, { "epoch": 0.3233663151027509, "grad_norm": 1.9301755353731895, "learning_rate": 7.90792899137913e-06, "loss": 0.6765, "step": 8922 }, { "epoch": 0.3234025588054076, "grad_norm": 2.2046726091032873, "learning_rate": 7.907451513900501e-06, "loss": 0.9671, "step": 8923 }, { "epoch": 0.3234388025080642, "grad_norm": 2.3102692432812897, "learning_rate": 7.906973996358947e-06, "loss": 0.8758, "step": 8924 }, { "epoch": 0.3234750462107209, "grad_norm": 2.4139785169363286, "learning_rate": 7.906496438761051e-06, "loss": 1.0261, "step": 8925 }, { "epoch": 0.3235112899133776, "grad_norm": 2.015781918263802, "learning_rate": 7.906018841113393e-06, "loss": 0.7514, "step": 8926 }, { "epoch": 0.3235475336160342, "grad_norm": 2.4980471550389076, "learning_rate": 7.905541203422551e-06, "loss": 1.0722, "step": 8927 }, { "epoch": 0.32358377731869087, "grad_norm": 2.3440332539127016, "learning_rate": 7.905063525695107e-06, "loss": 1.0083, "step": 8928 }, { "epoch": 0.3236200210213475, "grad_norm": 2.195212849921758, "learning_rate": 7.904585807937646e-06, "loss": 0.6932, "step": 8929 }, { "epoch": 0.3236562647240042, "grad_norm": 2.277543211592482, "learning_rate": 7.90410805015675e-06, "loss": 0.7843, "step": 8930 }, { "epoch": 0.32369250842666086, "grad_norm": 2.9378571855291407, "learning_rate": 7.903630252359003e-06, "loss": 1.0093, "step": 8931 }, { "epoch": 0.3237287521293175, "grad_norm": 2.3023846729943576, "learning_rate": 7.903152414550983e-06, "loss": 0.9191, "step": 8932 }, { "epoch": 0.3237649958319742, "grad_norm": 2.253864076452528, "learning_rate": 7.902674536739282e-06, "loss": 0.8517, "step": 8933 }, { "epoch": 0.32380123953463086, "grad_norm": 2.7749959317525614, "learning_rate": 7.902196618930479e-06, "loss": 0.9231, "step": 8934 }, { "epoch": 0.3238374832372875, "grad_norm": 2.2325089176896613, "learning_rate": 7.901718661131164e-06, "loss": 0.8664, "step": 8935 }, { "epoch": 0.3238737269399442, "grad_norm": 2.205883186372024, "learning_rate": 7.90124066334792e-06, "loss": 0.9843, "step": 8936 }, { "epoch": 0.32390997064260085, "grad_norm": 2.7098258427598334, "learning_rate": 7.900762625587332e-06, "loss": 1.1739, "step": 8937 }, { "epoch": 0.3239462143452575, "grad_norm": 2.3924178977924915, "learning_rate": 7.900284547855992e-06, "loss": 0.8888, "step": 8938 }, { "epoch": 0.3239824580479142, "grad_norm": 2.2522279226668, "learning_rate": 7.899806430160483e-06, "loss": 0.8185, "step": 8939 }, { "epoch": 0.32401870175057085, "grad_norm": 2.435720426573865, "learning_rate": 7.899328272507396e-06, "loss": 1.0737, "step": 8940 }, { "epoch": 0.3240549454532275, "grad_norm": 2.321932617676979, "learning_rate": 7.898850074903321e-06, "loss": 0.995, "step": 8941 }, { "epoch": 0.32409118915588414, "grad_norm": 2.005098242529161, "learning_rate": 7.898371837354843e-06, "loss": 0.7901, "step": 8942 }, { "epoch": 0.32412743285854084, "grad_norm": 2.3522299305435017, "learning_rate": 7.897893559868555e-06, "loss": 0.9164, "step": 8943 }, { "epoch": 0.3241636765611975, "grad_norm": 2.7026860000007753, "learning_rate": 7.897415242451044e-06, "loss": 1.0052, "step": 8944 }, { "epoch": 0.32419992026385414, "grad_norm": 2.458421310812618, "learning_rate": 7.896936885108905e-06, "loss": 0.9511, "step": 8945 }, { "epoch": 0.32423616396651084, "grad_norm": 2.148052899078794, "learning_rate": 7.896458487848727e-06, "loss": 0.8369, "step": 8946 }, { "epoch": 0.3242724076691675, "grad_norm": 2.1077367953794868, "learning_rate": 7.895980050677104e-06, "loss": 0.9219, "step": 8947 }, { "epoch": 0.32430865137182413, "grad_norm": 2.1755227205337673, "learning_rate": 7.895501573600626e-06, "loss": 0.9891, "step": 8948 }, { "epoch": 0.32434489507448083, "grad_norm": 2.4486910006295046, "learning_rate": 7.895023056625888e-06, "loss": 1.1094, "step": 8949 }, { "epoch": 0.3243811387771375, "grad_norm": 2.67041051228421, "learning_rate": 7.894544499759484e-06, "loss": 0.9446, "step": 8950 }, { "epoch": 0.3244173824797941, "grad_norm": 2.3938436326881343, "learning_rate": 7.894065903008007e-06, "loss": 1.0317, "step": 8951 }, { "epoch": 0.32445362618245077, "grad_norm": 2.3349779167693097, "learning_rate": 7.893587266378051e-06, "loss": 1.0669, "step": 8952 }, { "epoch": 0.3244898698851075, "grad_norm": 2.3026505558175083, "learning_rate": 7.893108589876214e-06, "loss": 0.7983, "step": 8953 }, { "epoch": 0.3245261135877641, "grad_norm": 2.38453334012986, "learning_rate": 7.89262987350909e-06, "loss": 1.1016, "step": 8954 }, { "epoch": 0.32456235729042077, "grad_norm": 2.502905168687159, "learning_rate": 7.892151117283275e-06, "loss": 0.8566, "step": 8955 }, { "epoch": 0.32459860099307747, "grad_norm": 2.3785377537986445, "learning_rate": 7.891672321205366e-06, "loss": 0.9167, "step": 8956 }, { "epoch": 0.3246348446957341, "grad_norm": 1.898716468775263, "learning_rate": 7.891193485281963e-06, "loss": 0.7925, "step": 8957 }, { "epoch": 0.32467108839839076, "grad_norm": 2.349814927749296, "learning_rate": 7.890714609519661e-06, "loss": 0.9489, "step": 8958 }, { "epoch": 0.32470733210104746, "grad_norm": 2.126042570960817, "learning_rate": 7.89023569392506e-06, "loss": 0.7247, "step": 8959 }, { "epoch": 0.3247435758037041, "grad_norm": 2.302349800333667, "learning_rate": 7.88975673850476e-06, "loss": 0.7809, "step": 8960 }, { "epoch": 0.32477981950636076, "grad_norm": 2.4224993863838686, "learning_rate": 7.88927774326536e-06, "loss": 0.7507, "step": 8961 }, { "epoch": 0.32481606320901746, "grad_norm": 2.5324948108051504, "learning_rate": 7.888798708213459e-06, "loss": 0.7879, "step": 8962 }, { "epoch": 0.3248523069116741, "grad_norm": 2.4614245938708192, "learning_rate": 7.88831963335566e-06, "loss": 1.0804, "step": 8963 }, { "epoch": 0.32488855061433075, "grad_norm": 2.425937399799067, "learning_rate": 7.887840518698562e-06, "loss": 0.9101, "step": 8964 }, { "epoch": 0.3249247943169874, "grad_norm": 2.284981949200998, "learning_rate": 7.887361364248769e-06, "loss": 0.831, "step": 8965 }, { "epoch": 0.3249610380196441, "grad_norm": 2.5348609165685874, "learning_rate": 7.886882170012882e-06, "loss": 1.0491, "step": 8966 }, { "epoch": 0.32499728172230075, "grad_norm": 2.351046492216074, "learning_rate": 7.886402935997506e-06, "loss": 0.8159, "step": 8967 }, { "epoch": 0.3250335254249574, "grad_norm": 2.369877468072976, "learning_rate": 7.88592366220924e-06, "loss": 0.9522, "step": 8968 }, { "epoch": 0.3250697691276141, "grad_norm": 2.2453621738029548, "learning_rate": 7.885444348654693e-06, "loss": 0.7939, "step": 8969 }, { "epoch": 0.32510601283027074, "grad_norm": 2.523136969937852, "learning_rate": 7.88496499534047e-06, "loss": 0.9586, "step": 8970 }, { "epoch": 0.3251422565329274, "grad_norm": 2.3441084169020465, "learning_rate": 7.884485602273173e-06, "loss": 0.9014, "step": 8971 }, { "epoch": 0.3251785002355841, "grad_norm": 2.188006624422449, "learning_rate": 7.884006169459408e-06, "loss": 0.8648, "step": 8972 }, { "epoch": 0.32521474393824074, "grad_norm": 2.238441864831364, "learning_rate": 7.883526696905783e-06, "loss": 0.9949, "step": 8973 }, { "epoch": 0.3252509876408974, "grad_norm": 2.471645850773749, "learning_rate": 7.883047184618904e-06, "loss": 1.0529, "step": 8974 }, { "epoch": 0.3252872313435541, "grad_norm": 2.4633701618542108, "learning_rate": 7.882567632605377e-06, "loss": 0.9475, "step": 8975 }, { "epoch": 0.32532347504621073, "grad_norm": 2.3313482168602015, "learning_rate": 7.882088040871812e-06, "loss": 0.8461, "step": 8976 }, { "epoch": 0.3253597187488674, "grad_norm": 2.540139927825488, "learning_rate": 7.881608409424816e-06, "loss": 0.9783, "step": 8977 }, { "epoch": 0.325395962451524, "grad_norm": 2.344836349947478, "learning_rate": 7.881128738271003e-06, "loss": 0.7587, "step": 8978 }, { "epoch": 0.3254322061541807, "grad_norm": 2.5745102840920633, "learning_rate": 7.880649027416974e-06, "loss": 0.8712, "step": 8979 }, { "epoch": 0.32546844985683737, "grad_norm": 2.441689807044326, "learning_rate": 7.880169276869346e-06, "loss": 1.1389, "step": 8980 }, { "epoch": 0.325504693559494, "grad_norm": 1.9434980284981667, "learning_rate": 7.879689486634724e-06, "loss": 0.8753, "step": 8981 }, { "epoch": 0.3255409372621507, "grad_norm": 2.1454975774171814, "learning_rate": 7.879209656719727e-06, "loss": 0.8717, "step": 8982 }, { "epoch": 0.32557718096480737, "grad_norm": 2.431393048878817, "learning_rate": 7.878729787130959e-06, "loss": 0.9166, "step": 8983 }, { "epoch": 0.325613424667464, "grad_norm": 2.499528396380932, "learning_rate": 7.878249877875037e-06, "loss": 1.0261, "step": 8984 }, { "epoch": 0.3256496683701207, "grad_norm": 2.3147928149568258, "learning_rate": 7.877769928958571e-06, "loss": 0.945, "step": 8985 }, { "epoch": 0.32568591207277736, "grad_norm": 2.303526160808654, "learning_rate": 7.877289940388177e-06, "loss": 0.9391, "step": 8986 }, { "epoch": 0.325722155775434, "grad_norm": 2.376021280401269, "learning_rate": 7.876809912170467e-06, "loss": 0.8402, "step": 8987 }, { "epoch": 0.32575839947809065, "grad_norm": 2.301742810776103, "learning_rate": 7.876329844312058e-06, "loss": 1.0678, "step": 8988 }, { "epoch": 0.32579464318074736, "grad_norm": 2.588427934629214, "learning_rate": 7.87584973681956e-06, "loss": 0.8448, "step": 8989 }, { "epoch": 0.325830886883404, "grad_norm": 2.36611149792026, "learning_rate": 7.875369589699596e-06, "loss": 0.9364, "step": 8990 }, { "epoch": 0.32586713058606065, "grad_norm": 2.2258320024580196, "learning_rate": 7.874889402958775e-06, "loss": 0.9966, "step": 8991 }, { "epoch": 0.32590337428871735, "grad_norm": 2.4300298729358323, "learning_rate": 7.874409176603718e-06, "loss": 0.9693, "step": 8992 }, { "epoch": 0.325939617991374, "grad_norm": 2.43979359603282, "learning_rate": 7.87392891064104e-06, "loss": 0.7722, "step": 8993 }, { "epoch": 0.32597586169403064, "grad_norm": 2.5749900089958846, "learning_rate": 7.87344860507736e-06, "loss": 0.8277, "step": 8994 }, { "epoch": 0.32601210539668735, "grad_norm": 2.015365041237481, "learning_rate": 7.872968259919297e-06, "loss": 0.9288, "step": 8995 }, { "epoch": 0.326048349099344, "grad_norm": 2.5412338839363233, "learning_rate": 7.87248787517347e-06, "loss": 0.8871, "step": 8996 }, { "epoch": 0.32608459280200064, "grad_norm": 2.4497389376491716, "learning_rate": 7.872007450846494e-06, "loss": 0.8758, "step": 8997 }, { "epoch": 0.32612083650465734, "grad_norm": 2.128693451394387, "learning_rate": 7.871526986944994e-06, "loss": 0.9034, "step": 8998 }, { "epoch": 0.326157080207314, "grad_norm": 2.3579078145776866, "learning_rate": 7.871046483475589e-06, "loss": 0.7564, "step": 8999 }, { "epoch": 0.32619332390997063, "grad_norm": 2.279193591710583, "learning_rate": 7.8705659404449e-06, "loss": 0.8219, "step": 9000 }, { "epoch": 0.3262295676126273, "grad_norm": 2.4124758091947425, "learning_rate": 7.870085357859545e-06, "loss": 0.8738, "step": 9001 }, { "epoch": 0.326265811315284, "grad_norm": 2.6876760857124884, "learning_rate": 7.869604735726153e-06, "loss": 0.9728, "step": 9002 }, { "epoch": 0.32630205501794063, "grad_norm": 2.1903604155262344, "learning_rate": 7.869124074051342e-06, "loss": 0.8837, "step": 9003 }, { "epoch": 0.3263382987205973, "grad_norm": 2.5108381204393933, "learning_rate": 7.868643372841737e-06, "loss": 0.9233, "step": 9004 }, { "epoch": 0.326374542423254, "grad_norm": 2.469837744251022, "learning_rate": 7.868162632103961e-06, "loss": 0.8131, "step": 9005 }, { "epoch": 0.3264107861259106, "grad_norm": 2.282180567957586, "learning_rate": 7.867681851844639e-06, "loss": 0.9795, "step": 9006 }, { "epoch": 0.32644702982856727, "grad_norm": 2.067704705621541, "learning_rate": 7.867201032070395e-06, "loss": 0.924, "step": 9007 }, { "epoch": 0.32648327353122397, "grad_norm": 2.4600862874389087, "learning_rate": 7.866720172787855e-06, "loss": 0.9838, "step": 9008 }, { "epoch": 0.3265195172338806, "grad_norm": 2.351842926160582, "learning_rate": 7.866239274003645e-06, "loss": 0.9831, "step": 9009 }, { "epoch": 0.32655576093653726, "grad_norm": 2.439178301700868, "learning_rate": 7.865758335724392e-06, "loss": 0.8731, "step": 9010 }, { "epoch": 0.32659200463919397, "grad_norm": 2.2348069482830324, "learning_rate": 7.86527735795672e-06, "loss": 0.9016, "step": 9011 }, { "epoch": 0.3266282483418506, "grad_norm": 2.519928160861362, "learning_rate": 7.86479634070726e-06, "loss": 1.1316, "step": 9012 }, { "epoch": 0.32666449204450726, "grad_norm": 2.322755282095259, "learning_rate": 7.86431528398264e-06, "loss": 0.8906, "step": 9013 }, { "epoch": 0.3267007357471639, "grad_norm": 2.557771293877378, "learning_rate": 7.863834187789487e-06, "loss": 0.8784, "step": 9014 }, { "epoch": 0.3267369794498206, "grad_norm": 2.468625319084209, "learning_rate": 7.863353052134429e-06, "loss": 1.052, "step": 9015 }, { "epoch": 0.32677322315247725, "grad_norm": 2.3405042993883973, "learning_rate": 7.862871877024102e-06, "loss": 1.0408, "step": 9016 }, { "epoch": 0.3268094668551339, "grad_norm": 2.75008919600861, "learning_rate": 7.862390662465127e-06, "loss": 1.0408, "step": 9017 }, { "epoch": 0.3268457105577906, "grad_norm": 2.4359479719733357, "learning_rate": 7.861909408464142e-06, "loss": 0.8967, "step": 9018 }, { "epoch": 0.32688195426044725, "grad_norm": 2.4453595997819506, "learning_rate": 7.861428115027776e-06, "loss": 0.8438, "step": 9019 }, { "epoch": 0.3269181979631039, "grad_norm": 2.1627581279232064, "learning_rate": 7.860946782162662e-06, "loss": 0.8244, "step": 9020 }, { "epoch": 0.3269544416657606, "grad_norm": 2.3643476631135587, "learning_rate": 7.860465409875431e-06, "loss": 0.8198, "step": 9021 }, { "epoch": 0.32699068536841724, "grad_norm": 2.4502096663637736, "learning_rate": 7.859983998172715e-06, "loss": 0.9014, "step": 9022 }, { "epoch": 0.3270269290710739, "grad_norm": 2.3217724482257913, "learning_rate": 7.85950254706115e-06, "loss": 1.0498, "step": 9023 }, { "epoch": 0.32706317277373054, "grad_norm": 2.4068555984587765, "learning_rate": 7.859021056547372e-06, "loss": 0.9378, "step": 9024 }, { "epoch": 0.32709941647638724, "grad_norm": 2.382310791290083, "learning_rate": 7.85853952663801e-06, "loss": 0.9296, "step": 9025 }, { "epoch": 0.3271356601790439, "grad_norm": 2.1791833506686675, "learning_rate": 7.858057957339703e-06, "loss": 0.788, "step": 9026 }, { "epoch": 0.32717190388170053, "grad_norm": 2.175430228024051, "learning_rate": 7.857576348659087e-06, "loss": 0.8177, "step": 9027 }, { "epoch": 0.32720814758435723, "grad_norm": 2.3443339338103275, "learning_rate": 7.857094700602796e-06, "loss": 0.8499, "step": 9028 }, { "epoch": 0.3272443912870139, "grad_norm": 2.60467002904822, "learning_rate": 7.856613013177467e-06, "loss": 0.9979, "step": 9029 }, { "epoch": 0.3272806349896705, "grad_norm": 2.3486592845611276, "learning_rate": 7.85613128638974e-06, "loss": 0.7005, "step": 9030 }, { "epoch": 0.32731687869232723, "grad_norm": 2.4756293965961467, "learning_rate": 7.85564952024625e-06, "loss": 1.1369, "step": 9031 }, { "epoch": 0.3273531223949839, "grad_norm": 2.45191968685744, "learning_rate": 7.855167714753637e-06, "loss": 0.8694, "step": 9032 }, { "epoch": 0.3273893660976405, "grad_norm": 2.4757852778922564, "learning_rate": 7.85468586991854e-06, "loss": 0.8912, "step": 9033 }, { "epoch": 0.3274256098002972, "grad_norm": 2.217977862246509, "learning_rate": 7.854203985747598e-06, "loss": 1.0173, "step": 9034 }, { "epoch": 0.32746185350295387, "grad_norm": 2.1306023211865197, "learning_rate": 7.85372206224745e-06, "loss": 0.8694, "step": 9035 }, { "epoch": 0.3274980972056105, "grad_norm": 2.471473359988134, "learning_rate": 7.85324009942474e-06, "loss": 1.0258, "step": 9036 }, { "epoch": 0.32753434090826716, "grad_norm": 2.1831568825917764, "learning_rate": 7.852758097286104e-06, "loss": 0.8823, "step": 9037 }, { "epoch": 0.32757058461092387, "grad_norm": 2.0324716943367194, "learning_rate": 7.852276055838191e-06, "loss": 0.7236, "step": 9038 }, { "epoch": 0.3276068283135805, "grad_norm": 2.5521530852271446, "learning_rate": 7.851793975087635e-06, "loss": 0.9816, "step": 9039 }, { "epoch": 0.32764307201623716, "grad_norm": 2.2632415987637096, "learning_rate": 7.851311855041084e-06, "loss": 0.9863, "step": 9040 }, { "epoch": 0.32767931571889386, "grad_norm": 2.4671822418826785, "learning_rate": 7.85082969570518e-06, "loss": 0.986, "step": 9041 }, { "epoch": 0.3277155594215505, "grad_norm": 2.247492632351293, "learning_rate": 7.850347497086567e-06, "loss": 0.8625, "step": 9042 }, { "epoch": 0.32775180312420715, "grad_norm": 2.7143267355438154, "learning_rate": 7.84986525919189e-06, "loss": 1.0027, "step": 9043 }, { "epoch": 0.32778804682686385, "grad_norm": 2.36559327540211, "learning_rate": 7.849382982027794e-06, "loss": 0.9217, "step": 9044 }, { "epoch": 0.3278242905295205, "grad_norm": 2.4008216045270645, "learning_rate": 7.848900665600923e-06, "loss": 0.9959, "step": 9045 }, { "epoch": 0.32786053423217715, "grad_norm": 2.1013464379693714, "learning_rate": 7.848418309917922e-06, "loss": 0.9975, "step": 9046 }, { "epoch": 0.32789677793483385, "grad_norm": 2.39113266996998, "learning_rate": 7.847935914985441e-06, "loss": 1.0401, "step": 9047 }, { "epoch": 0.3279330216374905, "grad_norm": 2.345367888573156, "learning_rate": 7.847453480810125e-06, "loss": 1.031, "step": 9048 }, { "epoch": 0.32796926534014714, "grad_norm": 2.387919317157383, "learning_rate": 7.846971007398623e-06, "loss": 0.901, "step": 9049 }, { "epoch": 0.3280055090428038, "grad_norm": 2.4026337331000978, "learning_rate": 7.846488494757581e-06, "loss": 0.8576, "step": 9050 }, { "epoch": 0.3280417527454605, "grad_norm": 2.499193330295966, "learning_rate": 7.84600594289365e-06, "loss": 0.9389, "step": 9051 }, { "epoch": 0.32807799644811714, "grad_norm": 2.0222773707327737, "learning_rate": 7.845523351813477e-06, "loss": 0.7576, "step": 9052 }, { "epoch": 0.3281142401507738, "grad_norm": 2.6053345672427404, "learning_rate": 7.845040721523716e-06, "loss": 1.03, "step": 9053 }, { "epoch": 0.3281504838534305, "grad_norm": 2.7145912092378612, "learning_rate": 7.844558052031013e-06, "loss": 0.99, "step": 9054 }, { "epoch": 0.32818672755608713, "grad_norm": 2.2809163269656505, "learning_rate": 7.844075343342018e-06, "loss": 1.1654, "step": 9055 }, { "epoch": 0.3282229712587438, "grad_norm": 2.153425996867934, "learning_rate": 7.843592595463388e-06, "loss": 0.811, "step": 9056 }, { "epoch": 0.3282592149614005, "grad_norm": 2.2665722951508296, "learning_rate": 7.843109808401771e-06, "loss": 0.7458, "step": 9057 }, { "epoch": 0.3282954586640571, "grad_norm": 2.599969667366078, "learning_rate": 7.84262698216382e-06, "loss": 1.0759, "step": 9058 }, { "epoch": 0.3283317023667138, "grad_norm": 2.2782561055491106, "learning_rate": 7.842144116756188e-06, "loss": 0.7466, "step": 9059 }, { "epoch": 0.3283679460693704, "grad_norm": 2.0790167929663244, "learning_rate": 7.84166121218553e-06, "loss": 0.9067, "step": 9060 }, { "epoch": 0.3284041897720271, "grad_norm": 2.339337899745475, "learning_rate": 7.841178268458499e-06, "loss": 0.844, "step": 9061 }, { "epoch": 0.32844043347468377, "grad_norm": 2.3119799827379253, "learning_rate": 7.840695285581748e-06, "loss": 0.8456, "step": 9062 }, { "epoch": 0.3284766771773404, "grad_norm": 2.269246392658187, "learning_rate": 7.840212263561934e-06, "loss": 0.9076, "step": 9063 }, { "epoch": 0.3285129208799971, "grad_norm": 2.545508503602925, "learning_rate": 7.839729202405716e-06, "loss": 1.0813, "step": 9064 }, { "epoch": 0.32854916458265376, "grad_norm": 2.638726468540579, "learning_rate": 7.839246102119744e-06, "loss": 0.7954, "step": 9065 }, { "epoch": 0.3285854082853104, "grad_norm": 2.362885118125192, "learning_rate": 7.838762962710678e-06, "loss": 0.9052, "step": 9066 }, { "epoch": 0.3286216519879671, "grad_norm": 2.346949089349428, "learning_rate": 7.838279784185176e-06, "loss": 0.9878, "step": 9067 }, { "epoch": 0.32865789569062376, "grad_norm": 2.52884878238137, "learning_rate": 7.837796566549895e-06, "loss": 0.8949, "step": 9068 }, { "epoch": 0.3286941393932804, "grad_norm": 2.2068813309694635, "learning_rate": 7.837313309811493e-06, "loss": 0.8646, "step": 9069 }, { "epoch": 0.3287303830959371, "grad_norm": 2.38326613056049, "learning_rate": 7.836830013976631e-06, "loss": 1.0461, "step": 9070 }, { "epoch": 0.32876662679859375, "grad_norm": 2.3243377759007626, "learning_rate": 7.836346679051964e-06, "loss": 1.0634, "step": 9071 }, { "epoch": 0.3288028705012504, "grad_norm": 2.521676827790656, "learning_rate": 7.835863305044158e-06, "loss": 0.8428, "step": 9072 }, { "epoch": 0.32883911420390705, "grad_norm": 2.552951812109098, "learning_rate": 7.835379891959872e-06, "loss": 1.0163, "step": 9073 }, { "epoch": 0.32887535790656375, "grad_norm": 2.1890261348847098, "learning_rate": 7.834896439805762e-06, "loss": 0.9204, "step": 9074 }, { "epoch": 0.3289116016092204, "grad_norm": 2.207635086540792, "learning_rate": 7.834412948588496e-06, "loss": 0.928, "step": 9075 }, { "epoch": 0.32894784531187704, "grad_norm": 2.278470973165141, "learning_rate": 7.833929418314731e-06, "loss": 0.9268, "step": 9076 }, { "epoch": 0.32898408901453374, "grad_norm": 2.2455882772382676, "learning_rate": 7.833445848991137e-06, "loss": 1.0953, "step": 9077 }, { "epoch": 0.3290203327171904, "grad_norm": 2.274629631670001, "learning_rate": 7.83296224062437e-06, "loss": 1.0272, "step": 9078 }, { "epoch": 0.32905657641984704, "grad_norm": 2.2706363774967593, "learning_rate": 7.832478593221096e-06, "loss": 0.8228, "step": 9079 }, { "epoch": 0.32909282012250374, "grad_norm": 2.183375850895885, "learning_rate": 7.831994906787982e-06, "loss": 0.8771, "step": 9080 }, { "epoch": 0.3291290638251604, "grad_norm": 2.33387952007712, "learning_rate": 7.83151118133169e-06, "loss": 1.0689, "step": 9081 }, { "epoch": 0.32916530752781703, "grad_norm": 2.268271022790952, "learning_rate": 7.831027416858887e-06, "loss": 0.9053, "step": 9082 }, { "epoch": 0.32920155123047373, "grad_norm": 2.3266181127630614, "learning_rate": 7.830543613376239e-06, "loss": 1.0516, "step": 9083 }, { "epoch": 0.3292377949331304, "grad_norm": 2.5865876869234494, "learning_rate": 7.830059770890409e-06, "loss": 0.9952, "step": 9084 }, { "epoch": 0.329274038635787, "grad_norm": 2.1966256793586356, "learning_rate": 7.829575889408068e-06, "loss": 0.9942, "step": 9085 }, { "epoch": 0.32931028233844367, "grad_norm": 2.5296433128361335, "learning_rate": 7.829091968935884e-06, "loss": 1.0327, "step": 9086 }, { "epoch": 0.3293465260411004, "grad_norm": 2.4847268379247422, "learning_rate": 7.828608009480523e-06, "loss": 0.8549, "step": 9087 }, { "epoch": 0.329382769743757, "grad_norm": 2.1513338652457517, "learning_rate": 7.828124011048651e-06, "loss": 1.0745, "step": 9088 }, { "epoch": 0.32941901344641367, "grad_norm": 2.7878261927740597, "learning_rate": 7.827639973646945e-06, "loss": 0.8875, "step": 9089 }, { "epoch": 0.32945525714907037, "grad_norm": 2.8293957305491713, "learning_rate": 7.82715589728207e-06, "loss": 0.956, "step": 9090 }, { "epoch": 0.329491500851727, "grad_norm": 2.3719116275507157, "learning_rate": 7.826671781960695e-06, "loss": 0.7988, "step": 9091 }, { "epoch": 0.32952774455438366, "grad_norm": 2.0599976744195216, "learning_rate": 7.826187627689492e-06, "loss": 0.9361, "step": 9092 }, { "epoch": 0.32956398825704036, "grad_norm": 2.4883876101379254, "learning_rate": 7.825703434475134e-06, "loss": 1.0533, "step": 9093 }, { "epoch": 0.329600231959697, "grad_norm": 2.118838276816582, "learning_rate": 7.825219202324292e-06, "loss": 0.8367, "step": 9094 }, { "epoch": 0.32963647566235366, "grad_norm": 2.3431172794155986, "learning_rate": 7.824734931243636e-06, "loss": 0.9038, "step": 9095 }, { "epoch": 0.3296727193650103, "grad_norm": 2.3971177343114323, "learning_rate": 7.824250621239841e-06, "loss": 0.8736, "step": 9096 }, { "epoch": 0.329708963067667, "grad_norm": 2.7326005322760003, "learning_rate": 7.823766272319583e-06, "loss": 1.0716, "step": 9097 }, { "epoch": 0.32974520677032365, "grad_norm": 2.595245021711324, "learning_rate": 7.82328188448953e-06, "loss": 0.9301, "step": 9098 }, { "epoch": 0.3297814504729803, "grad_norm": 2.2335935136449288, "learning_rate": 7.822797457756363e-06, "loss": 0.9445, "step": 9099 }, { "epoch": 0.329817694175637, "grad_norm": 2.152875980714581, "learning_rate": 7.822312992126754e-06, "loss": 1.0655, "step": 9100 }, { "epoch": 0.32985393787829365, "grad_norm": 2.4291659675882062, "learning_rate": 7.82182848760738e-06, "loss": 0.849, "step": 9101 }, { "epoch": 0.3298901815809503, "grad_norm": 2.497782083465, "learning_rate": 7.821343944204913e-06, "loss": 0.9623, "step": 9102 }, { "epoch": 0.329926425283607, "grad_norm": 2.1494482372679515, "learning_rate": 7.820859361926034e-06, "loss": 0.9922, "step": 9103 }, { "epoch": 0.32996266898626364, "grad_norm": 2.5851627610284447, "learning_rate": 7.820374740777419e-06, "loss": 0.9518, "step": 9104 }, { "epoch": 0.3299989126889203, "grad_norm": 2.4846407055114756, "learning_rate": 7.819890080765746e-06, "loss": 0.8503, "step": 9105 }, { "epoch": 0.330035156391577, "grad_norm": 2.281334921833816, "learning_rate": 7.819405381897691e-06, "loss": 0.7621, "step": 9106 }, { "epoch": 0.33007140009423364, "grad_norm": 2.440091386707116, "learning_rate": 7.818920644179937e-06, "loss": 0.8466, "step": 9107 }, { "epoch": 0.3301076437968903, "grad_norm": 2.4036074845661926, "learning_rate": 7.818435867619162e-06, "loss": 0.7846, "step": 9108 }, { "epoch": 0.33014388749954693, "grad_norm": 2.2327770738860253, "learning_rate": 7.817951052222044e-06, "loss": 1.0864, "step": 9109 }, { "epoch": 0.33018013120220363, "grad_norm": 2.1116155531223555, "learning_rate": 7.817466197995266e-06, "loss": 1.0931, "step": 9110 }, { "epoch": 0.3302163749048603, "grad_norm": 1.9947744897715929, "learning_rate": 7.816981304945508e-06, "loss": 0.8699, "step": 9111 }, { "epoch": 0.3302526186075169, "grad_norm": 2.426488941343434, "learning_rate": 7.816496373079449e-06, "loss": 0.8945, "step": 9112 }, { "epoch": 0.3302888623101736, "grad_norm": 2.3693070362919744, "learning_rate": 7.816011402403775e-06, "loss": 0.9063, "step": 9113 }, { "epoch": 0.33032510601283027, "grad_norm": 2.379264484937228, "learning_rate": 7.815526392925165e-06, "loss": 0.8145, "step": 9114 }, { "epoch": 0.3303613497154869, "grad_norm": 2.2485651349011673, "learning_rate": 7.815041344650308e-06, "loss": 1.1048, "step": 9115 }, { "epoch": 0.3303975934181436, "grad_norm": 2.3691893403903155, "learning_rate": 7.81455625758588e-06, "loss": 0.9665, "step": 9116 }, { "epoch": 0.33043383712080027, "grad_norm": 2.2776405223217515, "learning_rate": 7.81407113173857e-06, "loss": 0.9053, "step": 9117 }, { "epoch": 0.3304700808234569, "grad_norm": 2.134734130901066, "learning_rate": 7.813585967115063e-06, "loss": 0.8864, "step": 9118 }, { "epoch": 0.3305063245261136, "grad_norm": 2.6839604501233274, "learning_rate": 7.81310076372204e-06, "loss": 1.0266, "step": 9119 }, { "epoch": 0.33054256822877026, "grad_norm": 2.243077403247853, "learning_rate": 7.812615521566193e-06, "loss": 0.988, "step": 9120 }, { "epoch": 0.3305788119314269, "grad_norm": 2.414456975070341, "learning_rate": 7.812130240654205e-06, "loss": 1.0808, "step": 9121 }, { "epoch": 0.33061505563408355, "grad_norm": 2.531559372668523, "learning_rate": 7.811644920992761e-06, "loss": 1.1131, "step": 9122 }, { "epoch": 0.33065129933674026, "grad_norm": 2.282624082143589, "learning_rate": 7.81115956258855e-06, "loss": 0.9046, "step": 9123 }, { "epoch": 0.3306875430393969, "grad_norm": 2.5067179073788983, "learning_rate": 7.810674165448262e-06, "loss": 1.1389, "step": 9124 }, { "epoch": 0.33072378674205355, "grad_norm": 2.2405382146987343, "learning_rate": 7.810188729578586e-06, "loss": 0.872, "step": 9125 }, { "epoch": 0.33076003044471025, "grad_norm": 1.943044457010175, "learning_rate": 7.809703254986205e-06, "loss": 0.8512, "step": 9126 }, { "epoch": 0.3307962741473669, "grad_norm": 2.4498931200916565, "learning_rate": 7.809217741677813e-06, "loss": 1.0361, "step": 9127 }, { "epoch": 0.33083251785002354, "grad_norm": 2.097350060454316, "learning_rate": 7.8087321896601e-06, "loss": 0.9432, "step": 9128 }, { "epoch": 0.33086876155268025, "grad_norm": 2.4194400909794167, "learning_rate": 7.808246598939757e-06, "loss": 1.079, "step": 9129 }, { "epoch": 0.3309050052553369, "grad_norm": 2.00139631897006, "learning_rate": 7.807760969523473e-06, "loss": 0.8101, "step": 9130 }, { "epoch": 0.33094124895799354, "grad_norm": 2.449178892914834, "learning_rate": 7.807275301417942e-06, "loss": 0.9924, "step": 9131 }, { "epoch": 0.3309774926606502, "grad_norm": 2.2203586993782243, "learning_rate": 7.806789594629854e-06, "loss": 0.8564, "step": 9132 }, { "epoch": 0.3310137363633069, "grad_norm": 2.423161302429588, "learning_rate": 7.806303849165903e-06, "loss": 1.075, "step": 9133 }, { "epoch": 0.33104998006596353, "grad_norm": 2.2861851545647967, "learning_rate": 7.805818065032784e-06, "loss": 0.7789, "step": 9134 }, { "epoch": 0.3310862237686202, "grad_norm": 2.231353961600443, "learning_rate": 7.805332242237186e-06, "loss": 0.8734, "step": 9135 }, { "epoch": 0.3311224674712769, "grad_norm": 2.1060668067963855, "learning_rate": 7.804846380785809e-06, "loss": 0.8254, "step": 9136 }, { "epoch": 0.33115871117393353, "grad_norm": 2.392901164649145, "learning_rate": 7.804360480685345e-06, "loss": 0.8387, "step": 9137 }, { "epoch": 0.3311949548765902, "grad_norm": 2.0063840750983837, "learning_rate": 7.803874541942488e-06, "loss": 0.7672, "step": 9138 }, { "epoch": 0.3312311985792469, "grad_norm": 2.447937213970065, "learning_rate": 7.803388564563937e-06, "loss": 0.9363, "step": 9139 }, { "epoch": 0.3312674422819035, "grad_norm": 2.3614459728909876, "learning_rate": 7.802902548556385e-06, "loss": 1.2855, "step": 9140 }, { "epoch": 0.33130368598456017, "grad_norm": 2.173730303878019, "learning_rate": 7.802416493926532e-06, "loss": 0.9282, "step": 9141 }, { "epoch": 0.33133992968721687, "grad_norm": 2.527873204361225, "learning_rate": 7.801930400681076e-06, "loss": 1.0752, "step": 9142 }, { "epoch": 0.3313761733898735, "grad_norm": 2.89007242117859, "learning_rate": 7.801444268826712e-06, "loss": 1.2126, "step": 9143 }, { "epoch": 0.33141241709253016, "grad_norm": 2.337311622781234, "learning_rate": 7.800958098370141e-06, "loss": 0.9097, "step": 9144 }, { "epoch": 0.3314486607951868, "grad_norm": 2.335182392632499, "learning_rate": 7.80047188931806e-06, "loss": 0.851, "step": 9145 }, { "epoch": 0.3314849044978435, "grad_norm": 2.2585506355534477, "learning_rate": 7.799985641677172e-06, "loss": 1.0231, "step": 9146 }, { "epoch": 0.33152114820050016, "grad_norm": 2.5227896719868466, "learning_rate": 7.799499355454174e-06, "loss": 1.0912, "step": 9147 }, { "epoch": 0.3315573919031568, "grad_norm": 2.396833833274676, "learning_rate": 7.799013030655769e-06, "loss": 0.8252, "step": 9148 }, { "epoch": 0.3315936356058135, "grad_norm": 2.5598584849989865, "learning_rate": 7.798526667288657e-06, "loss": 0.8778, "step": 9149 }, { "epoch": 0.33162987930847015, "grad_norm": 2.121286098889268, "learning_rate": 7.798040265359539e-06, "loss": 0.9424, "step": 9150 }, { "epoch": 0.3316661230111268, "grad_norm": 2.2279128023788317, "learning_rate": 7.797553824875119e-06, "loss": 0.8413, "step": 9151 }, { "epoch": 0.3317023667137835, "grad_norm": 2.309519399335362, "learning_rate": 7.7970673458421e-06, "loss": 1.0573, "step": 9152 }, { "epoch": 0.33173861041644015, "grad_norm": 2.602024974312944, "learning_rate": 7.796580828267183e-06, "loss": 0.8825, "step": 9153 }, { "epoch": 0.3317748541190968, "grad_norm": 2.4796934971943716, "learning_rate": 7.796094272157075e-06, "loss": 0.9371, "step": 9154 }, { "epoch": 0.3318110978217535, "grad_norm": 1.9197832120023162, "learning_rate": 7.795607677518478e-06, "loss": 0.8595, "step": 9155 }, { "epoch": 0.33184734152441014, "grad_norm": 2.275164790689183, "learning_rate": 7.795121044358099e-06, "loss": 1.0201, "step": 9156 }, { "epoch": 0.3318835852270668, "grad_norm": 2.1985779213567715, "learning_rate": 7.79463437268264e-06, "loss": 0.7803, "step": 9157 }, { "epoch": 0.33191982892972344, "grad_norm": 2.646134847181037, "learning_rate": 7.794147662498813e-06, "loss": 0.9823, "step": 9158 }, { "epoch": 0.33195607263238014, "grad_norm": 2.0719224671195082, "learning_rate": 7.793660913813316e-06, "loss": 0.7168, "step": 9159 }, { "epoch": 0.3319923163350368, "grad_norm": 2.2014500659413976, "learning_rate": 7.793174126632865e-06, "loss": 0.9572, "step": 9160 }, { "epoch": 0.33202856003769343, "grad_norm": 2.2717185194667397, "learning_rate": 7.792687300964164e-06, "loss": 0.8136, "step": 9161 }, { "epoch": 0.33206480374035013, "grad_norm": 2.1716520007841296, "learning_rate": 7.79220043681392e-06, "loss": 0.7821, "step": 9162 }, { "epoch": 0.3321010474430068, "grad_norm": 2.53388664392428, "learning_rate": 7.791713534188842e-06, "loss": 1.0549, "step": 9163 }, { "epoch": 0.3321372911456634, "grad_norm": 2.440143616733673, "learning_rate": 7.791226593095641e-06, "loss": 0.9163, "step": 9164 }, { "epoch": 0.33217353484832013, "grad_norm": 2.589903651595552, "learning_rate": 7.790739613541026e-06, "loss": 1.0725, "step": 9165 }, { "epoch": 0.3322097785509768, "grad_norm": 2.5450458221256604, "learning_rate": 7.790252595531704e-06, "loss": 1.0631, "step": 9166 }, { "epoch": 0.3322460222536334, "grad_norm": 2.409833260124258, "learning_rate": 7.789765539074392e-06, "loss": 0.9202, "step": 9167 }, { "epoch": 0.33228226595629007, "grad_norm": 2.7279803717627447, "learning_rate": 7.789278444175797e-06, "loss": 0.9901, "step": 9168 }, { "epoch": 0.33231850965894677, "grad_norm": 2.5762721033712066, "learning_rate": 7.78879131084263e-06, "loss": 0.8302, "step": 9169 }, { "epoch": 0.3323547533616034, "grad_norm": 2.4358526158774434, "learning_rate": 7.788304139081608e-06, "loss": 0.7891, "step": 9170 }, { "epoch": 0.33239099706426006, "grad_norm": 2.119654856659519, "learning_rate": 7.787816928899441e-06, "loss": 0.7631, "step": 9171 }, { "epoch": 0.33242724076691677, "grad_norm": 2.378215800563644, "learning_rate": 7.787329680302841e-06, "loss": 0.9687, "step": 9172 }, { "epoch": 0.3324634844695734, "grad_norm": 2.5611327544358065, "learning_rate": 7.786842393298525e-06, "loss": 1.0351, "step": 9173 }, { "epoch": 0.33249972817223006, "grad_norm": 2.326251186488421, "learning_rate": 7.786355067893208e-06, "loss": 1.0758, "step": 9174 }, { "epoch": 0.33253597187488676, "grad_norm": 2.251805781209016, "learning_rate": 7.7858677040936e-06, "loss": 0.9252, "step": 9175 }, { "epoch": 0.3325722155775434, "grad_norm": 2.3871731639137552, "learning_rate": 7.785380301906422e-06, "loss": 0.9034, "step": 9176 }, { "epoch": 0.33260845928020005, "grad_norm": 2.3619443241692544, "learning_rate": 7.784892861338387e-06, "loss": 0.9274, "step": 9177 }, { "epoch": 0.33264470298285675, "grad_norm": 2.3568822617236314, "learning_rate": 7.784405382396214e-06, "loss": 0.8918, "step": 9178 }, { "epoch": 0.3326809466855134, "grad_norm": 2.3900975871440178, "learning_rate": 7.783917865086616e-06, "loss": 0.9942, "step": 9179 }, { "epoch": 0.33271719038817005, "grad_norm": 2.4022867781325736, "learning_rate": 7.783430309416315e-06, "loss": 0.9368, "step": 9180 }, { "epoch": 0.3327534340908267, "grad_norm": 2.359688905281283, "learning_rate": 7.78294271539203e-06, "loss": 0.9627, "step": 9181 }, { "epoch": 0.3327896777934834, "grad_norm": 2.3728163362589494, "learning_rate": 7.782455083020475e-06, "loss": 0.8506, "step": 9182 }, { "epoch": 0.33282592149614004, "grad_norm": 2.3198008101374574, "learning_rate": 7.781967412308372e-06, "loss": 1.019, "step": 9183 }, { "epoch": 0.3328621651987967, "grad_norm": 2.514728712695446, "learning_rate": 7.78147970326244e-06, "loss": 0.9535, "step": 9184 }, { "epoch": 0.3328984089014534, "grad_norm": 2.3124493585089536, "learning_rate": 7.7809919558894e-06, "loss": 1.0001, "step": 9185 }, { "epoch": 0.33293465260411004, "grad_norm": 2.2978250184193647, "learning_rate": 7.780504170195972e-06, "loss": 0.9956, "step": 9186 }, { "epoch": 0.3329708963067667, "grad_norm": 2.475849793354532, "learning_rate": 7.780016346188881e-06, "loss": 1.0542, "step": 9187 }, { "epoch": 0.3330071400094234, "grad_norm": 2.362175515590413, "learning_rate": 7.779528483874846e-06, "loss": 1.0025, "step": 9188 }, { "epoch": 0.33304338371208003, "grad_norm": 2.2821883345744833, "learning_rate": 7.779040583260586e-06, "loss": 0.8828, "step": 9189 }, { "epoch": 0.3330796274147367, "grad_norm": 2.2088856850189114, "learning_rate": 7.77855264435283e-06, "loss": 0.8901, "step": 9190 }, { "epoch": 0.3331158711173934, "grad_norm": 2.347907022404563, "learning_rate": 7.778064667158299e-06, "loss": 0.9657, "step": 9191 }, { "epoch": 0.33315211482005, "grad_norm": 2.349070842342069, "learning_rate": 7.777576651683717e-06, "loss": 0.8997, "step": 9192 }, { "epoch": 0.3331883585227067, "grad_norm": 2.2949185508517096, "learning_rate": 7.777088597935805e-06, "loss": 0.8609, "step": 9193 }, { "epoch": 0.3332246022253633, "grad_norm": 2.2892136047527067, "learning_rate": 7.776600505921296e-06, "loss": 1.009, "step": 9194 }, { "epoch": 0.33326084592802, "grad_norm": 2.0625477506389003, "learning_rate": 7.77611237564691e-06, "loss": 0.7625, "step": 9195 }, { "epoch": 0.33329708963067667, "grad_norm": 2.339218851174886, "learning_rate": 7.775624207119374e-06, "loss": 0.946, "step": 9196 }, { "epoch": 0.3333333333333333, "grad_norm": 2.2019143069514078, "learning_rate": 7.775136000345415e-06, "loss": 0.9187, "step": 9197 }, { "epoch": 0.33336957703599, "grad_norm": 2.397340095506725, "learning_rate": 7.774647755331761e-06, "loss": 0.9044, "step": 9198 }, { "epoch": 0.33340582073864666, "grad_norm": 2.4341918772694973, "learning_rate": 7.774159472085138e-06, "loss": 0.9187, "step": 9199 }, { "epoch": 0.3334420644413033, "grad_norm": 2.302209993936341, "learning_rate": 7.773671150612276e-06, "loss": 0.9494, "step": 9200 }, { "epoch": 0.33347830814396, "grad_norm": 2.5019082168070015, "learning_rate": 7.773182790919902e-06, "loss": 1.0332, "step": 9201 }, { "epoch": 0.33351455184661666, "grad_norm": 2.3940034418733025, "learning_rate": 7.772694393014747e-06, "loss": 1.1658, "step": 9202 }, { "epoch": 0.3335507955492733, "grad_norm": 2.2123790348770758, "learning_rate": 7.772205956903541e-06, "loss": 0.859, "step": 9203 }, { "epoch": 0.33358703925192995, "grad_norm": 2.3479355815569427, "learning_rate": 7.771717482593014e-06, "loss": 1.0035, "step": 9204 }, { "epoch": 0.33362328295458665, "grad_norm": 2.1937672877934014, "learning_rate": 7.771228970089893e-06, "loss": 0.9134, "step": 9205 }, { "epoch": 0.3336595266572433, "grad_norm": 2.4794762013092218, "learning_rate": 7.770740419400914e-06, "loss": 1.0049, "step": 9206 }, { "epoch": 0.33369577035989995, "grad_norm": 2.2395031316231306, "learning_rate": 7.77025183053281e-06, "loss": 0.9757, "step": 9207 }, { "epoch": 0.33373201406255665, "grad_norm": 2.1995821433285103, "learning_rate": 7.769763203492308e-06, "loss": 0.9398, "step": 9208 }, { "epoch": 0.3337682577652133, "grad_norm": 2.4026107046903635, "learning_rate": 7.769274538286147e-06, "loss": 0.9983, "step": 9209 }, { "epoch": 0.33380450146786994, "grad_norm": 2.4239452984636825, "learning_rate": 7.768785834921057e-06, "loss": 0.9542, "step": 9210 }, { "epoch": 0.33384074517052664, "grad_norm": 2.552931244649168, "learning_rate": 7.768297093403773e-06, "loss": 0.95, "step": 9211 }, { "epoch": 0.3338769888731833, "grad_norm": 2.494096737097968, "learning_rate": 7.767808313741028e-06, "loss": 1.0982, "step": 9212 }, { "epoch": 0.33391323257583994, "grad_norm": 2.4348431446097196, "learning_rate": 7.767319495939559e-06, "loss": 0.9095, "step": 9213 }, { "epoch": 0.33394947627849664, "grad_norm": 2.7749514158845225, "learning_rate": 7.7668306400061e-06, "loss": 1.0397, "step": 9214 }, { "epoch": 0.3339857199811533, "grad_norm": 2.4308246571581127, "learning_rate": 7.76634174594739e-06, "loss": 0.9249, "step": 9215 }, { "epoch": 0.33402196368380993, "grad_norm": 2.498353153896617, "learning_rate": 7.765852813770163e-06, "loss": 1.1059, "step": 9216 }, { "epoch": 0.3340582073864666, "grad_norm": 2.674654668206078, "learning_rate": 7.765363843481156e-06, "loss": 0.9245, "step": 9217 }, { "epoch": 0.3340944510891233, "grad_norm": 2.315270909375045, "learning_rate": 7.764874835087109e-06, "loss": 1.1378, "step": 9218 }, { "epoch": 0.3341306947917799, "grad_norm": 2.2559573056782214, "learning_rate": 7.764385788594757e-06, "loss": 0.9982, "step": 9219 }, { "epoch": 0.33416693849443657, "grad_norm": 2.313212347279266, "learning_rate": 7.763896704010842e-06, "loss": 0.8935, "step": 9220 }, { "epoch": 0.3342031821970933, "grad_norm": 2.346838982421358, "learning_rate": 7.763407581342103e-06, "loss": 0.815, "step": 9221 }, { "epoch": 0.3342394258997499, "grad_norm": 2.4392072217328686, "learning_rate": 7.762918420595276e-06, "loss": 0.9488, "step": 9222 }, { "epoch": 0.33427566960240657, "grad_norm": 2.2627606147774766, "learning_rate": 7.762429221777107e-06, "loss": 0.8769, "step": 9223 }, { "epoch": 0.33431191330506327, "grad_norm": 2.254397156224082, "learning_rate": 7.761939984894333e-06, "loss": 0.9389, "step": 9224 }, { "epoch": 0.3343481570077199, "grad_norm": 2.2004111377531608, "learning_rate": 7.761450709953696e-06, "loss": 0.861, "step": 9225 }, { "epoch": 0.33438440071037656, "grad_norm": 2.3746803931876594, "learning_rate": 7.760961396961937e-06, "loss": 1.0811, "step": 9226 }, { "epoch": 0.3344206444130332, "grad_norm": 2.256661848828775, "learning_rate": 7.760472045925803e-06, "loss": 0.7845, "step": 9227 }, { "epoch": 0.3344568881156899, "grad_norm": 2.598628937179289, "learning_rate": 7.75998265685203e-06, "loss": 0.9209, "step": 9228 }, { "epoch": 0.33449313181834656, "grad_norm": 2.423080741038122, "learning_rate": 7.759493229747368e-06, "loss": 1.1156, "step": 9229 }, { "epoch": 0.3345293755210032, "grad_norm": 2.520892950922488, "learning_rate": 7.759003764618557e-06, "loss": 0.9088, "step": 9230 }, { "epoch": 0.3345656192236599, "grad_norm": 2.2366202450033064, "learning_rate": 7.758514261472344e-06, "loss": 0.9815, "step": 9231 }, { "epoch": 0.33460186292631655, "grad_norm": 2.5406391983701053, "learning_rate": 7.758024720315471e-06, "loss": 0.8526, "step": 9232 }, { "epoch": 0.3346381066289732, "grad_norm": 2.5189827974873187, "learning_rate": 7.757535141154687e-06, "loss": 0.9768, "step": 9233 }, { "epoch": 0.3346743503316299, "grad_norm": 2.2658721473637624, "learning_rate": 7.757045523996736e-06, "loss": 0.9003, "step": 9234 }, { "epoch": 0.33471059403428655, "grad_norm": 2.41936362647628, "learning_rate": 7.756555868848364e-06, "loss": 0.9636, "step": 9235 }, { "epoch": 0.3347468377369432, "grad_norm": 2.2963065175750503, "learning_rate": 7.75606617571632e-06, "loss": 0.9126, "step": 9236 }, { "epoch": 0.3347830814395999, "grad_norm": 2.390968705066286, "learning_rate": 7.755576444607352e-06, "loss": 0.9192, "step": 9237 }, { "epoch": 0.33481932514225654, "grad_norm": 2.3800637388253634, "learning_rate": 7.755086675528204e-06, "loss": 1.0093, "step": 9238 }, { "epoch": 0.3348555688449132, "grad_norm": 2.4594387015100088, "learning_rate": 7.754596868485631e-06, "loss": 0.9238, "step": 9239 }, { "epoch": 0.33489181254756983, "grad_norm": 2.156756327176006, "learning_rate": 7.754107023486377e-06, "loss": 0.8168, "step": 9240 }, { "epoch": 0.33492805625022654, "grad_norm": 2.149396716719918, "learning_rate": 7.753617140537196e-06, "loss": 0.9068, "step": 9241 }, { "epoch": 0.3349642999528832, "grad_norm": 2.3495409802245923, "learning_rate": 7.753127219644834e-06, "loss": 0.8429, "step": 9242 }, { "epoch": 0.33500054365553983, "grad_norm": 2.1721900754377783, "learning_rate": 7.752637260816044e-06, "loss": 0.8779, "step": 9243 }, { "epoch": 0.33503678735819653, "grad_norm": 2.3785263885537606, "learning_rate": 7.75214726405758e-06, "loss": 0.9471, "step": 9244 }, { "epoch": 0.3350730310608532, "grad_norm": 2.5092358864864095, "learning_rate": 7.751657229376188e-06, "loss": 0.9445, "step": 9245 }, { "epoch": 0.3351092747635098, "grad_norm": 2.15839082887702, "learning_rate": 7.751167156778623e-06, "loss": 0.8533, "step": 9246 }, { "epoch": 0.3351455184661665, "grad_norm": 2.5666455801904275, "learning_rate": 7.750677046271642e-06, "loss": 0.8694, "step": 9247 }, { "epoch": 0.33518176216882317, "grad_norm": 2.6010553165348913, "learning_rate": 7.750186897861993e-06, "loss": 0.9192, "step": 9248 }, { "epoch": 0.3352180058714798, "grad_norm": 2.779570067397861, "learning_rate": 7.749696711556431e-06, "loss": 0.8875, "step": 9249 }, { "epoch": 0.3352542495741365, "grad_norm": 2.3293698531393967, "learning_rate": 7.749206487361712e-06, "loss": 0.9865, "step": 9250 }, { "epoch": 0.33529049327679317, "grad_norm": 2.3747743904249967, "learning_rate": 7.748716225284592e-06, "loss": 0.8446, "step": 9251 }, { "epoch": 0.3353267369794498, "grad_norm": 2.2071617560500254, "learning_rate": 7.748225925331822e-06, "loss": 0.8353, "step": 9252 }, { "epoch": 0.33536298068210646, "grad_norm": 2.4700103497848116, "learning_rate": 7.747735587510161e-06, "loss": 0.8629, "step": 9253 }, { "epoch": 0.33539922438476316, "grad_norm": 2.366517630081767, "learning_rate": 7.747245211826366e-06, "loss": 0.9086, "step": 9254 }, { "epoch": 0.3354354680874198, "grad_norm": 2.4298696122890004, "learning_rate": 7.746754798287195e-06, "loss": 1.1156, "step": 9255 }, { "epoch": 0.33547171179007645, "grad_norm": 2.5741012661266756, "learning_rate": 7.746264346899403e-06, "loss": 1.0672, "step": 9256 }, { "epoch": 0.33550795549273316, "grad_norm": 2.5943934671979427, "learning_rate": 7.745773857669749e-06, "loss": 0.8615, "step": 9257 }, { "epoch": 0.3355441991953898, "grad_norm": 2.6196270543485123, "learning_rate": 7.745283330604991e-06, "loss": 0.8933, "step": 9258 }, { "epoch": 0.33558044289804645, "grad_norm": 2.0994195247633827, "learning_rate": 7.744792765711891e-06, "loss": 1.1591, "step": 9259 }, { "epoch": 0.33561668660070315, "grad_norm": 2.26286865498982, "learning_rate": 7.744302162997207e-06, "loss": 0.913, "step": 9260 }, { "epoch": 0.3356529303033598, "grad_norm": 2.2470863693075476, "learning_rate": 7.743811522467696e-06, "loss": 0.9462, "step": 9261 }, { "epoch": 0.33568917400601644, "grad_norm": 2.545677923616155, "learning_rate": 7.743320844130122e-06, "loss": 1.0074, "step": 9262 }, { "epoch": 0.3357254177086731, "grad_norm": 2.2331774463147336, "learning_rate": 7.742830127991247e-06, "loss": 0.9317, "step": 9263 }, { "epoch": 0.3357616614113298, "grad_norm": 2.365889391307787, "learning_rate": 7.742339374057832e-06, "loss": 0.8642, "step": 9264 }, { "epoch": 0.33579790511398644, "grad_norm": 2.2201185870617617, "learning_rate": 7.74184858233664e-06, "loss": 0.9237, "step": 9265 }, { "epoch": 0.3358341488166431, "grad_norm": 14.658656304050027, "learning_rate": 7.741357752834431e-06, "loss": 1.3951, "step": 9266 }, { "epoch": 0.3358703925192998, "grad_norm": 2.347978259210397, "learning_rate": 7.74086688555797e-06, "loss": 0.9453, "step": 9267 }, { "epoch": 0.33590663622195643, "grad_norm": 2.579643480481311, "learning_rate": 7.740375980514021e-06, "loss": 1.0155, "step": 9268 }, { "epoch": 0.3359428799246131, "grad_norm": 2.3058156442235207, "learning_rate": 7.739885037709349e-06, "loss": 0.8898, "step": 9269 }, { "epoch": 0.3359791236272698, "grad_norm": 1.9512564385383422, "learning_rate": 7.739394057150718e-06, "loss": 1.0346, "step": 9270 }, { "epoch": 0.33601536732992643, "grad_norm": 2.1967450906742365, "learning_rate": 7.738903038844893e-06, "loss": 0.8625, "step": 9271 }, { "epoch": 0.3360516110325831, "grad_norm": 2.4768728712100154, "learning_rate": 7.738411982798641e-06, "loss": 1.0266, "step": 9272 }, { "epoch": 0.3360878547352398, "grad_norm": 2.334118076019236, "learning_rate": 7.737920889018728e-06, "loss": 0.8405, "step": 9273 }, { "epoch": 0.3361240984378964, "grad_norm": 2.4108470621018716, "learning_rate": 7.737429757511921e-06, "loss": 1.027, "step": 9274 }, { "epoch": 0.33616034214055307, "grad_norm": 2.4014400628599697, "learning_rate": 7.736938588284988e-06, "loss": 1.2189, "step": 9275 }, { "epoch": 0.3361965858432097, "grad_norm": 2.0769213229275016, "learning_rate": 7.736447381344697e-06, "loss": 0.9601, "step": 9276 }, { "epoch": 0.3362328295458664, "grad_norm": 2.1064016055907473, "learning_rate": 7.735956136697814e-06, "loss": 0.8168, "step": 9277 }, { "epoch": 0.33626907324852306, "grad_norm": 2.4483512144263755, "learning_rate": 7.735464854351111e-06, "loss": 1.08, "step": 9278 }, { "epoch": 0.3363053169511797, "grad_norm": 2.2503615181528303, "learning_rate": 7.734973534311357e-06, "loss": 0.8391, "step": 9279 }, { "epoch": 0.3363415606538364, "grad_norm": 2.2575219338292545, "learning_rate": 7.734482176585322e-06, "loss": 0.9509, "step": 9280 }, { "epoch": 0.33637780435649306, "grad_norm": 2.3932029735808, "learning_rate": 7.733990781179776e-06, "loss": 0.8189, "step": 9281 }, { "epoch": 0.3364140480591497, "grad_norm": 2.3336776246166204, "learning_rate": 7.73349934810149e-06, "loss": 0.8547, "step": 9282 }, { "epoch": 0.3364502917618064, "grad_norm": 2.6990613946706548, "learning_rate": 7.733007877357238e-06, "loss": 0.9773, "step": 9283 }, { "epoch": 0.33648653546446305, "grad_norm": 2.390058743627314, "learning_rate": 7.732516368953787e-06, "loss": 0.9627, "step": 9284 }, { "epoch": 0.3365227791671197, "grad_norm": 2.1499140265977186, "learning_rate": 7.732024822897916e-06, "loss": 0.9251, "step": 9285 }, { "epoch": 0.3365590228697764, "grad_norm": 2.1775619212044486, "learning_rate": 7.731533239196394e-06, "loss": 0.8328, "step": 9286 }, { "epoch": 0.33659526657243305, "grad_norm": 2.474590800622714, "learning_rate": 7.731041617855996e-06, "loss": 1.0177, "step": 9287 }, { "epoch": 0.3366315102750897, "grad_norm": 2.1394038497962065, "learning_rate": 7.730549958883496e-06, "loss": 0.806, "step": 9288 }, { "epoch": 0.33666775397774634, "grad_norm": 2.6671569444183105, "learning_rate": 7.730058262285669e-06, "loss": 1.0198, "step": 9289 }, { "epoch": 0.33670399768040304, "grad_norm": 2.3418930745441813, "learning_rate": 7.72956652806929e-06, "loss": 0.9518, "step": 9290 }, { "epoch": 0.3367402413830597, "grad_norm": 2.4485845390008296, "learning_rate": 7.729074756241135e-06, "loss": 0.9425, "step": 9291 }, { "epoch": 0.33677648508571634, "grad_norm": 2.418542483240781, "learning_rate": 7.728582946807982e-06, "loss": 1.0657, "step": 9292 }, { "epoch": 0.33681272878837304, "grad_norm": 2.2298492550589404, "learning_rate": 7.728091099776605e-06, "loss": 0.9921, "step": 9293 }, { "epoch": 0.3368489724910297, "grad_norm": 2.3296496486933242, "learning_rate": 7.727599215153783e-06, "loss": 1.0596, "step": 9294 }, { "epoch": 0.33688521619368633, "grad_norm": 2.714583878074144, "learning_rate": 7.727107292946292e-06, "loss": 1.0337, "step": 9295 }, { "epoch": 0.33692145989634303, "grad_norm": 2.1307868477468563, "learning_rate": 7.726615333160914e-06, "loss": 0.8134, "step": 9296 }, { "epoch": 0.3369577035989997, "grad_norm": 2.5575419568340827, "learning_rate": 7.726123335804423e-06, "loss": 0.8088, "step": 9297 }, { "epoch": 0.3369939473016563, "grad_norm": 2.688609341113922, "learning_rate": 7.725631300883603e-06, "loss": 1.1276, "step": 9298 }, { "epoch": 0.337030191004313, "grad_norm": 2.2290754875247565, "learning_rate": 7.725139228405231e-06, "loss": 0.8043, "step": 9299 }, { "epoch": 0.3370664347069697, "grad_norm": 2.5347781216074803, "learning_rate": 7.72464711837609e-06, "loss": 0.9941, "step": 9300 }, { "epoch": 0.3371026784096263, "grad_norm": 2.127908966471681, "learning_rate": 7.724154970802959e-06, "loss": 0.8584, "step": 9301 }, { "epoch": 0.33713892211228297, "grad_norm": 2.1107043664042537, "learning_rate": 7.72366278569262e-06, "loss": 0.8206, "step": 9302 }, { "epoch": 0.33717516581493967, "grad_norm": 2.1402814505858996, "learning_rate": 7.723170563051855e-06, "loss": 0.888, "step": 9303 }, { "epoch": 0.3372114095175963, "grad_norm": 2.2518401835701503, "learning_rate": 7.722678302887447e-06, "loss": 0.9796, "step": 9304 }, { "epoch": 0.33724765322025296, "grad_norm": 2.236482189201853, "learning_rate": 7.722186005206177e-06, "loss": 1.0681, "step": 9305 }, { "epoch": 0.33728389692290967, "grad_norm": 2.32802752722165, "learning_rate": 7.721693670014832e-06, "loss": 0.8446, "step": 9306 }, { "epoch": 0.3373201406255663, "grad_norm": 2.3618848700013477, "learning_rate": 7.721201297320194e-06, "loss": 1.0625, "step": 9307 }, { "epoch": 0.33735638432822296, "grad_norm": 2.3157721550500887, "learning_rate": 7.720708887129047e-06, "loss": 1.0302, "step": 9308 }, { "epoch": 0.33739262803087966, "grad_norm": 2.2826882733331946, "learning_rate": 7.720216439448176e-06, "loss": 0.9745, "step": 9309 }, { "epoch": 0.3374288717335363, "grad_norm": 2.3104104367183567, "learning_rate": 7.719723954284369e-06, "loss": 0.9835, "step": 9310 }, { "epoch": 0.33746511543619295, "grad_norm": 2.305311749853872, "learning_rate": 7.719231431644412e-06, "loss": 0.788, "step": 9311 }, { "epoch": 0.3375013591388496, "grad_norm": 2.57865398277811, "learning_rate": 7.718738871535088e-06, "loss": 1.0624, "step": 9312 }, { "epoch": 0.3375376028415063, "grad_norm": 2.3860631387411884, "learning_rate": 7.718246273963189e-06, "loss": 0.9391, "step": 9313 }, { "epoch": 0.33757384654416295, "grad_norm": 2.2264019558939516, "learning_rate": 7.717753638935498e-06, "loss": 0.8319, "step": 9314 }, { "epoch": 0.3376100902468196, "grad_norm": 2.275434367159098, "learning_rate": 7.717260966458806e-06, "loss": 1.0518, "step": 9315 }, { "epoch": 0.3376463339494763, "grad_norm": 2.4692733876194914, "learning_rate": 7.716768256539901e-06, "loss": 0.9584, "step": 9316 }, { "epoch": 0.33768257765213294, "grad_norm": 2.213393494915488, "learning_rate": 7.716275509185573e-06, "loss": 0.9097, "step": 9317 }, { "epoch": 0.3377188213547896, "grad_norm": 2.70354900763204, "learning_rate": 7.715782724402611e-06, "loss": 0.8507, "step": 9318 }, { "epoch": 0.3377550650574463, "grad_norm": 2.3808734468076813, "learning_rate": 7.715289902197806e-06, "loss": 1.0768, "step": 9319 }, { "epoch": 0.33779130876010294, "grad_norm": 2.4164884854043716, "learning_rate": 7.714797042577949e-06, "loss": 1.0921, "step": 9320 }, { "epoch": 0.3378275524627596, "grad_norm": 2.6773000152863338, "learning_rate": 7.71430414554983e-06, "loss": 0.7981, "step": 9321 }, { "epoch": 0.3378637961654163, "grad_norm": 2.4848996683664, "learning_rate": 7.71381121112024e-06, "loss": 1.0389, "step": 9322 }, { "epoch": 0.33790003986807293, "grad_norm": 2.377847151423289, "learning_rate": 7.713318239295975e-06, "loss": 0.8409, "step": 9323 }, { "epoch": 0.3379362835707296, "grad_norm": 2.1886769166630473, "learning_rate": 7.712825230083825e-06, "loss": 0.8119, "step": 9324 }, { "epoch": 0.3379725272733862, "grad_norm": 2.482551477377346, "learning_rate": 7.712332183490582e-06, "loss": 1.02, "step": 9325 }, { "epoch": 0.3380087709760429, "grad_norm": 2.2748144443605343, "learning_rate": 7.711839099523044e-06, "loss": 1.0043, "step": 9326 }, { "epoch": 0.3380450146786996, "grad_norm": 2.804308353573569, "learning_rate": 7.711345978188003e-06, "loss": 0.9818, "step": 9327 }, { "epoch": 0.3380812583813562, "grad_norm": 2.353159237105097, "learning_rate": 7.710852819492255e-06, "loss": 0.9163, "step": 9328 }, { "epoch": 0.3381175020840129, "grad_norm": 2.1847971217041806, "learning_rate": 7.710359623442593e-06, "loss": 1.0132, "step": 9329 }, { "epoch": 0.33815374578666957, "grad_norm": 2.781045204036907, "learning_rate": 7.709866390045815e-06, "loss": 1.2234, "step": 9330 }, { "epoch": 0.3381899894893262, "grad_norm": 2.4105634832037124, "learning_rate": 7.709373119308718e-06, "loss": 0.8905, "step": 9331 }, { "epoch": 0.3382262331919829, "grad_norm": 2.351213457482333, "learning_rate": 7.708879811238096e-06, "loss": 0.8573, "step": 9332 }, { "epoch": 0.33826247689463956, "grad_norm": 2.4014399067023273, "learning_rate": 7.708386465840749e-06, "loss": 1.0516, "step": 9333 }, { "epoch": 0.3382987205972962, "grad_norm": 2.4666511392716752, "learning_rate": 7.707893083123477e-06, "loss": 0.9424, "step": 9334 }, { "epoch": 0.33833496429995286, "grad_norm": 2.2756415391937526, "learning_rate": 7.707399663093072e-06, "loss": 0.9248, "step": 9335 }, { "epoch": 0.33837120800260956, "grad_norm": 2.4521596800334318, "learning_rate": 7.70690620575634e-06, "loss": 0.9563, "step": 9336 }, { "epoch": 0.3384074517052662, "grad_norm": 2.139414725289832, "learning_rate": 7.706412711120078e-06, "loss": 0.794, "step": 9337 }, { "epoch": 0.33844369540792285, "grad_norm": 2.2856321261023207, "learning_rate": 7.705919179191084e-06, "loss": 0.8083, "step": 9338 }, { "epoch": 0.33847993911057955, "grad_norm": 2.4297717686511895, "learning_rate": 7.70542560997616e-06, "loss": 1.0354, "step": 9339 }, { "epoch": 0.3385161828132362, "grad_norm": 2.353818528731164, "learning_rate": 7.704932003482107e-06, "loss": 0.9418, "step": 9340 }, { "epoch": 0.33855242651589285, "grad_norm": 2.2331633505881894, "learning_rate": 7.704438359715729e-06, "loss": 0.9092, "step": 9341 }, { "epoch": 0.33858867021854955, "grad_norm": 2.4034170638265344, "learning_rate": 7.703944678683824e-06, "loss": 1.0275, "step": 9342 }, { "epoch": 0.3386249139212062, "grad_norm": 2.39164706631005, "learning_rate": 7.703450960393198e-06, "loss": 0.9882, "step": 9343 }, { "epoch": 0.33866115762386284, "grad_norm": 2.1910689970592236, "learning_rate": 7.702957204850652e-06, "loss": 0.8294, "step": 9344 }, { "epoch": 0.33869740132651954, "grad_norm": 2.494591328962534, "learning_rate": 7.702463412062989e-06, "loss": 0.8764, "step": 9345 }, { "epoch": 0.3387336450291762, "grad_norm": 2.548616389866428, "learning_rate": 7.701969582037016e-06, "loss": 1.0685, "step": 9346 }, { "epoch": 0.33876988873183284, "grad_norm": 2.584537881151442, "learning_rate": 7.701475714779536e-06, "loss": 0.9527, "step": 9347 }, { "epoch": 0.3388061324344895, "grad_norm": 2.3685587132369736, "learning_rate": 7.700981810297355e-06, "loss": 0.9461, "step": 9348 }, { "epoch": 0.3388423761371462, "grad_norm": 2.585757043487651, "learning_rate": 7.70048786859728e-06, "loss": 0.9929, "step": 9349 }, { "epoch": 0.33887861983980283, "grad_norm": 2.401750850778578, "learning_rate": 7.699993889686113e-06, "loss": 1.0693, "step": 9350 }, { "epoch": 0.3389148635424595, "grad_norm": 2.2338356412314893, "learning_rate": 7.699499873570661e-06, "loss": 0.8515, "step": 9351 }, { "epoch": 0.3389511072451162, "grad_norm": 2.3192304639106336, "learning_rate": 7.699005820257737e-06, "loss": 0.9255, "step": 9352 }, { "epoch": 0.3389873509477728, "grad_norm": 2.012414245941072, "learning_rate": 7.698511729754144e-06, "loss": 0.8057, "step": 9353 }, { "epoch": 0.33902359465042947, "grad_norm": 2.204880920635013, "learning_rate": 7.698017602066692e-06, "loss": 0.9294, "step": 9354 }, { "epoch": 0.3390598383530862, "grad_norm": 2.7222347418051194, "learning_rate": 7.697523437202187e-06, "loss": 0.951, "step": 9355 }, { "epoch": 0.3390960820557428, "grad_norm": 2.3201467153189146, "learning_rate": 7.697029235167441e-06, "loss": 1.0577, "step": 9356 }, { "epoch": 0.33913232575839947, "grad_norm": 2.501906252534753, "learning_rate": 7.696534995969264e-06, "loss": 0.9663, "step": 9357 }, { "epoch": 0.33916856946105617, "grad_norm": 2.1361876880885355, "learning_rate": 7.696040719614465e-06, "loss": 0.9622, "step": 9358 }, { "epoch": 0.3392048131637128, "grad_norm": 2.199396845935416, "learning_rate": 7.695546406109857e-06, "loss": 0.8902, "step": 9359 }, { "epoch": 0.33924105686636946, "grad_norm": 2.264525250481117, "learning_rate": 7.69505205546225e-06, "loss": 0.7869, "step": 9360 }, { "epoch": 0.3392773005690261, "grad_norm": 2.2532792731021716, "learning_rate": 7.694557667678454e-06, "loss": 0.8836, "step": 9361 }, { "epoch": 0.3393135442716828, "grad_norm": 2.2741627278409524, "learning_rate": 7.694063242765284e-06, "loss": 0.9498, "step": 9362 }, { "epoch": 0.33934978797433946, "grad_norm": 2.07312665088407, "learning_rate": 7.69356878072955e-06, "loss": 0.8448, "step": 9363 }, { "epoch": 0.3393860316769961, "grad_norm": 2.6184427412477267, "learning_rate": 7.69307428157807e-06, "loss": 1.1166, "step": 9364 }, { "epoch": 0.3394222753796528, "grad_norm": 2.381619296065097, "learning_rate": 7.692579745317652e-06, "loss": 0.9296, "step": 9365 }, { "epoch": 0.33945851908230945, "grad_norm": 2.4715776627130546, "learning_rate": 7.692085171955114e-06, "loss": 0.9069, "step": 9366 }, { "epoch": 0.3394947627849661, "grad_norm": 2.073342595660732, "learning_rate": 7.691590561497272e-06, "loss": 0.7141, "step": 9367 }, { "epoch": 0.3395310064876228, "grad_norm": 2.3354059309702744, "learning_rate": 7.691095913950938e-06, "loss": 1.0462, "step": 9368 }, { "epoch": 0.33956725019027945, "grad_norm": 2.478095807839299, "learning_rate": 7.690601229322934e-06, "loss": 0.9525, "step": 9369 }, { "epoch": 0.3396034938929361, "grad_norm": 2.4235431651027963, "learning_rate": 7.690106507620068e-06, "loss": 0.8503, "step": 9370 }, { "epoch": 0.33963973759559274, "grad_norm": 2.5078978330917168, "learning_rate": 7.689611748849162e-06, "loss": 0.9506, "step": 9371 }, { "epoch": 0.33967598129824944, "grad_norm": 2.132887045480969, "learning_rate": 7.689116953017034e-06, "loss": 0.8384, "step": 9372 }, { "epoch": 0.3397122250009061, "grad_norm": 2.392803180718308, "learning_rate": 7.6886221201305e-06, "loss": 0.8591, "step": 9373 }, { "epoch": 0.33974846870356273, "grad_norm": 2.31560667186144, "learning_rate": 7.68812725019638e-06, "loss": 0.9027, "step": 9374 }, { "epoch": 0.33978471240621944, "grad_norm": 2.4507325751720948, "learning_rate": 7.687632343221492e-06, "loss": 0.8524, "step": 9375 }, { "epoch": 0.3398209561088761, "grad_norm": 2.153672908659273, "learning_rate": 7.687137399212655e-06, "loss": 0.9, "step": 9376 }, { "epoch": 0.33985719981153273, "grad_norm": 2.6058296876955476, "learning_rate": 7.68664241817669e-06, "loss": 1.0603, "step": 9377 }, { "epoch": 0.33989344351418943, "grad_norm": 2.161941143998218, "learning_rate": 7.686147400120417e-06, "loss": 0.723, "step": 9378 }, { "epoch": 0.3399296872168461, "grad_norm": 2.257122625338274, "learning_rate": 7.685652345050659e-06, "loss": 0.9347, "step": 9379 }, { "epoch": 0.3399659309195027, "grad_norm": 2.03710362058456, "learning_rate": 7.685157252974233e-06, "loss": 0.738, "step": 9380 }, { "epoch": 0.3400021746221594, "grad_norm": 2.536975985723671, "learning_rate": 7.684662123897966e-06, "loss": 0.8811, "step": 9381 }, { "epoch": 0.34003841832481607, "grad_norm": 2.437710518617755, "learning_rate": 7.684166957828679e-06, "loss": 0.9426, "step": 9382 }, { "epoch": 0.3400746620274727, "grad_norm": 2.0961639529717817, "learning_rate": 7.683671754773193e-06, "loss": 0.8546, "step": 9383 }, { "epoch": 0.34011090573012936, "grad_norm": 2.193989915084908, "learning_rate": 7.683176514738333e-06, "loss": 0.9706, "step": 9384 }, { "epoch": 0.34014714943278607, "grad_norm": 2.402126512512768, "learning_rate": 7.682681237730926e-06, "loss": 1.0857, "step": 9385 }, { "epoch": 0.3401833931354427, "grad_norm": 2.3766313464495012, "learning_rate": 7.682185923757793e-06, "loss": 0.8253, "step": 9386 }, { "epoch": 0.34021963683809936, "grad_norm": 2.6609318089378355, "learning_rate": 7.68169057282576e-06, "loss": 0.906, "step": 9387 }, { "epoch": 0.34025588054075606, "grad_norm": 2.2141427956674273, "learning_rate": 7.681195184941651e-06, "loss": 0.8356, "step": 9388 }, { "epoch": 0.3402921242434127, "grad_norm": 2.6962929673122398, "learning_rate": 7.680699760112296e-06, "loss": 1.1451, "step": 9389 }, { "epoch": 0.34032836794606935, "grad_norm": 2.5855513103017507, "learning_rate": 7.680204298344519e-06, "loss": 0.9401, "step": 9390 }, { "epoch": 0.34036461164872606, "grad_norm": 2.4006780038033226, "learning_rate": 7.679708799645147e-06, "loss": 0.9241, "step": 9391 }, { "epoch": 0.3404008553513827, "grad_norm": 2.388179105295044, "learning_rate": 7.679213264021008e-06, "loss": 0.9335, "step": 9392 }, { "epoch": 0.34043709905403935, "grad_norm": 2.1980827386701285, "learning_rate": 7.678717691478933e-06, "loss": 1.0735, "step": 9393 }, { "epoch": 0.34047334275669605, "grad_norm": 2.4919610512455783, "learning_rate": 7.678222082025745e-06, "loss": 1.0467, "step": 9394 }, { "epoch": 0.3405095864593527, "grad_norm": 2.3029500767076545, "learning_rate": 7.677726435668278e-06, "loss": 1.017, "step": 9395 }, { "epoch": 0.34054583016200934, "grad_norm": 2.6190214967074468, "learning_rate": 7.67723075241336e-06, "loss": 0.8701, "step": 9396 }, { "epoch": 0.340582073864666, "grad_norm": 2.34786184812305, "learning_rate": 7.676735032267823e-06, "loss": 1.1035, "step": 9397 }, { "epoch": 0.3406183175673227, "grad_norm": 2.1304448812336445, "learning_rate": 7.676239275238494e-06, "loss": 0.732, "step": 9398 }, { "epoch": 0.34065456126997934, "grad_norm": 2.4269120575194174, "learning_rate": 7.675743481332208e-06, "loss": 0.8102, "step": 9399 }, { "epoch": 0.340690804972636, "grad_norm": 2.2044601143824925, "learning_rate": 7.675247650555792e-06, "loss": 0.9579, "step": 9400 }, { "epoch": 0.3407270486752927, "grad_norm": 2.284541955767292, "learning_rate": 7.674751782916083e-06, "loss": 0.81, "step": 9401 }, { "epoch": 0.34076329237794933, "grad_norm": 2.181958332432551, "learning_rate": 7.674255878419913e-06, "loss": 1.0644, "step": 9402 }, { "epoch": 0.340799536080606, "grad_norm": 2.446192731816019, "learning_rate": 7.673759937074114e-06, "loss": 0.9848, "step": 9403 }, { "epoch": 0.3408357797832627, "grad_norm": 2.49152605797328, "learning_rate": 7.67326395888552e-06, "loss": 1.3049, "step": 9404 }, { "epoch": 0.34087202348591933, "grad_norm": 2.4248681512745516, "learning_rate": 7.672767943860965e-06, "loss": 1.0356, "step": 9405 }, { "epoch": 0.340908267188576, "grad_norm": 2.2782015274327834, "learning_rate": 7.672271892007285e-06, "loss": 0.8982, "step": 9406 }, { "epoch": 0.3409445108912326, "grad_norm": 2.1711857122792426, "learning_rate": 7.671775803331314e-06, "loss": 0.7719, "step": 9407 }, { "epoch": 0.3409807545938893, "grad_norm": 2.5703196115032214, "learning_rate": 7.671279677839888e-06, "loss": 1.0076, "step": 9408 }, { "epoch": 0.34101699829654597, "grad_norm": 2.4420717041949347, "learning_rate": 7.670783515539844e-06, "loss": 0.9848, "step": 9409 }, { "epoch": 0.3410532419992026, "grad_norm": 2.1800786757089323, "learning_rate": 7.670287316438017e-06, "loss": 0.9715, "step": 9410 }, { "epoch": 0.3410894857018593, "grad_norm": 2.3984767080322613, "learning_rate": 7.669791080541249e-06, "loss": 1.0921, "step": 9411 }, { "epoch": 0.34112572940451596, "grad_norm": 2.2919671497518914, "learning_rate": 7.66929480785637e-06, "loss": 1.0479, "step": 9412 }, { "epoch": 0.3411619731071726, "grad_norm": 2.1872703476757014, "learning_rate": 7.668798498390227e-06, "loss": 1.0293, "step": 9413 }, { "epoch": 0.3411982168098293, "grad_norm": 2.3817548670564155, "learning_rate": 7.668302152149651e-06, "loss": 1.0063, "step": 9414 }, { "epoch": 0.34123446051248596, "grad_norm": 2.3756332755725724, "learning_rate": 7.667805769141488e-06, "loss": 0.8961, "step": 9415 }, { "epoch": 0.3412707042151426, "grad_norm": 2.2901153119489206, "learning_rate": 7.667309349372574e-06, "loss": 0.969, "step": 9416 }, { "epoch": 0.3413069479177993, "grad_norm": 2.3562807304737903, "learning_rate": 7.666812892849751e-06, "loss": 0.8877, "step": 9417 }, { "epoch": 0.34134319162045595, "grad_norm": 2.183622345655964, "learning_rate": 7.666316399579858e-06, "loss": 0.9431, "step": 9418 }, { "epoch": 0.3413794353231126, "grad_norm": 2.235892142487615, "learning_rate": 7.665819869569738e-06, "loss": 0.9209, "step": 9419 }, { "epoch": 0.34141567902576925, "grad_norm": 2.1034009376493885, "learning_rate": 7.665323302826233e-06, "loss": 0.8471, "step": 9420 }, { "epoch": 0.34145192272842595, "grad_norm": 2.400836058145148, "learning_rate": 7.664826699356184e-06, "loss": 1.0404, "step": 9421 }, { "epoch": 0.3414881664310826, "grad_norm": 2.0335548894672772, "learning_rate": 7.664330059166435e-06, "loss": 0.8879, "step": 9422 }, { "epoch": 0.34152441013373924, "grad_norm": 2.4291390672556403, "learning_rate": 7.66383338226383e-06, "loss": 1.0046, "step": 9423 }, { "epoch": 0.34156065383639594, "grad_norm": 2.2870308843353593, "learning_rate": 7.66333666865521e-06, "loss": 0.9541, "step": 9424 }, { "epoch": 0.3415968975390526, "grad_norm": 2.089226726175085, "learning_rate": 7.662839918347421e-06, "loss": 0.9039, "step": 9425 }, { "epoch": 0.34163314124170924, "grad_norm": 2.315058022415906, "learning_rate": 7.662343131347312e-06, "loss": 0.8474, "step": 9426 }, { "epoch": 0.34166938494436594, "grad_norm": 2.5906908586684336, "learning_rate": 7.661846307661724e-06, "loss": 0.7718, "step": 9427 }, { "epoch": 0.3417056286470226, "grad_norm": 2.216575146611937, "learning_rate": 7.661349447297501e-06, "loss": 0.9523, "step": 9428 }, { "epoch": 0.34174187234967923, "grad_norm": 2.1937364063204665, "learning_rate": 7.660852550261495e-06, "loss": 0.9256, "step": 9429 }, { "epoch": 0.34177811605233593, "grad_norm": 2.3396966525399154, "learning_rate": 7.660355616560548e-06, "loss": 0.9588, "step": 9430 }, { "epoch": 0.3418143597549926, "grad_norm": 2.3188848765593795, "learning_rate": 7.65985864620151e-06, "loss": 1.0036, "step": 9431 }, { "epoch": 0.3418506034576492, "grad_norm": 2.4308051681144516, "learning_rate": 7.659361639191228e-06, "loss": 1.1211, "step": 9432 }, { "epoch": 0.3418868471603059, "grad_norm": 2.1776129204693735, "learning_rate": 7.658864595536553e-06, "loss": 0.9858, "step": 9433 }, { "epoch": 0.3419230908629626, "grad_norm": 2.00723084858103, "learning_rate": 7.658367515244329e-06, "loss": 0.7958, "step": 9434 }, { "epoch": 0.3419593345656192, "grad_norm": 2.1108224828262765, "learning_rate": 7.65787039832141e-06, "loss": 0.8936, "step": 9435 }, { "epoch": 0.34199557826827587, "grad_norm": 2.3386605771196805, "learning_rate": 7.657373244774644e-06, "loss": 0.9814, "step": 9436 }, { "epoch": 0.34203182197093257, "grad_norm": 2.625362184671307, "learning_rate": 7.656876054610884e-06, "loss": 0.963, "step": 9437 }, { "epoch": 0.3420680656735892, "grad_norm": 2.598785208618807, "learning_rate": 7.656378827836977e-06, "loss": 0.957, "step": 9438 }, { "epoch": 0.34210430937624586, "grad_norm": 2.5312117262655955, "learning_rate": 7.655881564459775e-06, "loss": 0.9356, "step": 9439 }, { "epoch": 0.34214055307890257, "grad_norm": 2.4877947446304653, "learning_rate": 7.655384264486132e-06, "loss": 0.9381, "step": 9440 }, { "epoch": 0.3421767967815592, "grad_norm": 2.0844116134804263, "learning_rate": 7.654886927922902e-06, "loss": 0.9242, "step": 9441 }, { "epoch": 0.34221304048421586, "grad_norm": 2.288601053023352, "learning_rate": 7.654389554776933e-06, "loss": 0.9724, "step": 9442 }, { "epoch": 0.3422492841868725, "grad_norm": 2.561080876557623, "learning_rate": 7.653892145055081e-06, "loss": 0.9671, "step": 9443 }, { "epoch": 0.3422855278895292, "grad_norm": 2.1338158204595845, "learning_rate": 7.653394698764203e-06, "loss": 0.9707, "step": 9444 }, { "epoch": 0.34232177159218585, "grad_norm": 2.31577874788036, "learning_rate": 7.652897215911149e-06, "loss": 0.8138, "step": 9445 }, { "epoch": 0.3423580152948425, "grad_norm": 2.4495897310240515, "learning_rate": 7.652399696502777e-06, "loss": 0.8432, "step": 9446 }, { "epoch": 0.3423942589974992, "grad_norm": 2.3504401643591573, "learning_rate": 7.65190214054594e-06, "loss": 1.232, "step": 9447 }, { "epoch": 0.34243050270015585, "grad_norm": 2.4033184545217203, "learning_rate": 7.651404548047495e-06, "loss": 0.993, "step": 9448 }, { "epoch": 0.3424667464028125, "grad_norm": 2.4113761048355613, "learning_rate": 7.6509069190143e-06, "loss": 1.078, "step": 9449 }, { "epoch": 0.3425029901054692, "grad_norm": 2.097725250522109, "learning_rate": 7.65040925345321e-06, "loss": 1.0577, "step": 9450 }, { "epoch": 0.34253923380812584, "grad_norm": 2.2348722837950565, "learning_rate": 7.649911551371084e-06, "loss": 0.8955, "step": 9451 }, { "epoch": 0.3425754775107825, "grad_norm": 2.420613450110036, "learning_rate": 7.649413812774778e-06, "loss": 0.9818, "step": 9452 }, { "epoch": 0.3426117212134392, "grad_norm": 2.494970574297271, "learning_rate": 7.648916037671153e-06, "loss": 0.9682, "step": 9453 }, { "epoch": 0.34264796491609584, "grad_norm": 2.171866702281491, "learning_rate": 7.648418226067068e-06, "loss": 0.9089, "step": 9454 }, { "epoch": 0.3426842086187525, "grad_norm": 2.015080930160724, "learning_rate": 7.64792037796938e-06, "loss": 0.8038, "step": 9455 }, { "epoch": 0.34272045232140913, "grad_norm": 2.6333779808251196, "learning_rate": 7.647422493384953e-06, "loss": 0.9904, "step": 9456 }, { "epoch": 0.34275669602406583, "grad_norm": 2.6644277866603, "learning_rate": 7.646924572320642e-06, "loss": 0.8305, "step": 9457 }, { "epoch": 0.3427929397267225, "grad_norm": 2.193933090360556, "learning_rate": 7.646426614783313e-06, "loss": 0.7493, "step": 9458 }, { "epoch": 0.3428291834293791, "grad_norm": 2.0635434379007807, "learning_rate": 7.645928620779826e-06, "loss": 0.8521, "step": 9459 }, { "epoch": 0.3428654271320358, "grad_norm": 2.4471953195237734, "learning_rate": 7.645430590317043e-06, "loss": 0.7078, "step": 9460 }, { "epoch": 0.3429016708346925, "grad_norm": 2.3640464026238215, "learning_rate": 7.644932523401824e-06, "loss": 0.9993, "step": 9461 }, { "epoch": 0.3429379145373491, "grad_norm": 2.4761204301226023, "learning_rate": 7.644434420041037e-06, "loss": 0.9693, "step": 9462 }, { "epoch": 0.3429741582400058, "grad_norm": 2.2544817542729865, "learning_rate": 7.643936280241542e-06, "loss": 0.8944, "step": 9463 }, { "epoch": 0.34301040194266247, "grad_norm": 2.6548831704885547, "learning_rate": 7.643438104010204e-06, "loss": 0.9016, "step": 9464 }, { "epoch": 0.3430466456453191, "grad_norm": 2.276766609194547, "learning_rate": 7.64293989135389e-06, "loss": 0.922, "step": 9465 }, { "epoch": 0.3430828893479758, "grad_norm": 2.2135601377566814, "learning_rate": 7.642441642279458e-06, "loss": 0.97, "step": 9466 }, { "epoch": 0.34311913305063246, "grad_norm": 2.251782897735305, "learning_rate": 7.641943356793782e-06, "loss": 0.8795, "step": 9467 }, { "epoch": 0.3431553767532891, "grad_norm": 2.0385629739592246, "learning_rate": 7.641445034903723e-06, "loss": 0.8258, "step": 9468 }, { "epoch": 0.34319162045594576, "grad_norm": 2.2395061405646395, "learning_rate": 7.640946676616148e-06, "loss": 0.8717, "step": 9469 }, { "epoch": 0.34322786415860246, "grad_norm": 2.291288639409027, "learning_rate": 7.640448281937927e-06, "loss": 0.9683, "step": 9470 }, { "epoch": 0.3432641078612591, "grad_norm": 2.013356131328641, "learning_rate": 7.639949850875922e-06, "loss": 0.8668, "step": 9471 }, { "epoch": 0.34330035156391575, "grad_norm": 2.479172645423643, "learning_rate": 7.639451383437009e-06, "loss": 0.9642, "step": 9472 }, { "epoch": 0.34333659526657245, "grad_norm": 2.2742133989621287, "learning_rate": 7.63895287962805e-06, "loss": 0.9194, "step": 9473 }, { "epoch": 0.3433728389692291, "grad_norm": 2.4587824170971735, "learning_rate": 7.638454339455918e-06, "loss": 0.8759, "step": 9474 }, { "epoch": 0.34340908267188575, "grad_norm": 2.78113021260839, "learning_rate": 7.637955762927479e-06, "loss": 0.9293, "step": 9475 }, { "epoch": 0.34344532637454245, "grad_norm": 2.370094653816835, "learning_rate": 7.637457150049605e-06, "loss": 1.1983, "step": 9476 }, { "epoch": 0.3434815700771991, "grad_norm": 2.2863584200067977, "learning_rate": 7.636958500829169e-06, "loss": 0.9003, "step": 9477 }, { "epoch": 0.34351781377985574, "grad_norm": 2.288793237069359, "learning_rate": 7.636459815273037e-06, "loss": 0.8649, "step": 9478 }, { "epoch": 0.3435540574825124, "grad_norm": 2.115809651134035, "learning_rate": 7.635961093388084e-06, "loss": 0.8335, "step": 9479 }, { "epoch": 0.3435903011851691, "grad_norm": 2.2823536390287353, "learning_rate": 7.63546233518118e-06, "loss": 1.0124, "step": 9480 }, { "epoch": 0.34362654488782574, "grad_norm": 1.9812333065642211, "learning_rate": 7.6349635406592e-06, "loss": 0.8813, "step": 9481 }, { "epoch": 0.3436627885904824, "grad_norm": 2.3903354842630264, "learning_rate": 7.634464709829017e-06, "loss": 0.9586, "step": 9482 }, { "epoch": 0.3436990322931391, "grad_norm": 2.122018401982168, "learning_rate": 7.633965842697503e-06, "loss": 0.8605, "step": 9483 }, { "epoch": 0.34373527599579573, "grad_norm": 2.580678568836025, "learning_rate": 7.633466939271533e-06, "loss": 1.0748, "step": 9484 }, { "epoch": 0.3437715196984524, "grad_norm": 2.4940486462197486, "learning_rate": 7.632967999557981e-06, "loss": 0.9185, "step": 9485 }, { "epoch": 0.3438077634011091, "grad_norm": 2.575539822376856, "learning_rate": 7.632469023563722e-06, "loss": 0.9437, "step": 9486 }, { "epoch": 0.3438440071037657, "grad_norm": 2.6021286800216563, "learning_rate": 7.63197001129563e-06, "loss": 0.9044, "step": 9487 }, { "epoch": 0.34388025080642237, "grad_norm": 2.287150451706229, "learning_rate": 7.631470962760586e-06, "loss": 0.8957, "step": 9488 }, { "epoch": 0.3439164945090791, "grad_norm": 2.560180053081234, "learning_rate": 7.630971877965463e-06, "loss": 0.9702, "step": 9489 }, { "epoch": 0.3439527382117357, "grad_norm": 2.1557209449874155, "learning_rate": 7.630472756917138e-06, "loss": 0.774, "step": 9490 }, { "epoch": 0.34398898191439237, "grad_norm": 2.272245979034985, "learning_rate": 7.629973599622488e-06, "loss": 0.6855, "step": 9491 }, { "epoch": 0.344025225617049, "grad_norm": 2.254734050992257, "learning_rate": 7.629474406088396e-06, "loss": 0.8938, "step": 9492 }, { "epoch": 0.3440614693197057, "grad_norm": 2.1698238861037193, "learning_rate": 7.628975176321734e-06, "loss": 1.0676, "step": 9493 }, { "epoch": 0.34409771302236236, "grad_norm": 2.4207802517648163, "learning_rate": 7.628475910329385e-06, "loss": 0.8949, "step": 9494 }, { "epoch": 0.344133956725019, "grad_norm": 2.1289120730124695, "learning_rate": 7.627976608118228e-06, "loss": 0.8607, "step": 9495 }, { "epoch": 0.3441702004276757, "grad_norm": 2.5913440186741012, "learning_rate": 7.627477269695143e-06, "loss": 0.8511, "step": 9496 }, { "epoch": 0.34420644413033236, "grad_norm": 2.368900069730962, "learning_rate": 7.626977895067007e-06, "loss": 0.8158, "step": 9497 }, { "epoch": 0.344242687832989, "grad_norm": 2.152333039593228, "learning_rate": 7.626478484240708e-06, "loss": 0.8974, "step": 9498 }, { "epoch": 0.3442789315356457, "grad_norm": 2.1713218841808573, "learning_rate": 7.625979037223122e-06, "loss": 0.8634, "step": 9499 }, { "epoch": 0.34431517523830235, "grad_norm": 2.3738724615487534, "learning_rate": 7.625479554021134e-06, "loss": 0.9169, "step": 9500 }, { "epoch": 0.344351418940959, "grad_norm": 2.3790567690979474, "learning_rate": 7.624980034641625e-06, "loss": 0.9359, "step": 9501 }, { "epoch": 0.34438766264361564, "grad_norm": 2.321930125289063, "learning_rate": 7.624480479091478e-06, "loss": 1.1521, "step": 9502 }, { "epoch": 0.34442390634627235, "grad_norm": 2.428610140078153, "learning_rate": 7.62398088737758e-06, "loss": 0.8287, "step": 9503 }, { "epoch": 0.344460150048929, "grad_norm": 2.322452632351237, "learning_rate": 7.623481259506811e-06, "loss": 0.8875, "step": 9504 }, { "epoch": 0.34449639375158564, "grad_norm": 2.176184925648411, "learning_rate": 7.622981595486055e-06, "loss": 0.887, "step": 9505 }, { "epoch": 0.34453263745424234, "grad_norm": 2.5365001831213587, "learning_rate": 7.6224818953222e-06, "loss": 0.8133, "step": 9506 }, { "epoch": 0.344568881156899, "grad_norm": 2.0978677560214423, "learning_rate": 7.621982159022131e-06, "loss": 1.0585, "step": 9507 }, { "epoch": 0.34460512485955563, "grad_norm": 2.3087327823021826, "learning_rate": 7.621482386592733e-06, "loss": 1.1609, "step": 9508 }, { "epoch": 0.34464136856221234, "grad_norm": 2.4243670974040445, "learning_rate": 7.620982578040894e-06, "loss": 0.8418, "step": 9509 }, { "epoch": 0.344677612264869, "grad_norm": 2.37646722436024, "learning_rate": 7.6204827333735e-06, "loss": 1.0167, "step": 9510 }, { "epoch": 0.34471385596752563, "grad_norm": 2.2783433749689945, "learning_rate": 7.619982852597438e-06, "loss": 0.9933, "step": 9511 }, { "epoch": 0.34475009967018233, "grad_norm": 2.500482822807742, "learning_rate": 7.619482935719598e-06, "loss": 1.0473, "step": 9512 }, { "epoch": 0.344786343372839, "grad_norm": 2.5628572284662496, "learning_rate": 7.618982982746867e-06, "loss": 0.7609, "step": 9513 }, { "epoch": 0.3448225870754956, "grad_norm": 2.2866525236419584, "learning_rate": 7.6184829936861336e-06, "loss": 1.0444, "step": 9514 }, { "epoch": 0.34485883077815227, "grad_norm": 2.387341328015796, "learning_rate": 7.617982968544289e-06, "loss": 0.8485, "step": 9515 }, { "epoch": 0.34489507448080897, "grad_norm": 2.358058130965246, "learning_rate": 7.617482907328222e-06, "loss": 0.9905, "step": 9516 }, { "epoch": 0.3449313181834656, "grad_norm": 2.6449120743542927, "learning_rate": 7.616982810044822e-06, "loss": 0.9597, "step": 9517 }, { "epoch": 0.34496756188612226, "grad_norm": 2.408751849721692, "learning_rate": 7.616482676700983e-06, "loss": 0.9097, "step": 9518 }, { "epoch": 0.34500380558877897, "grad_norm": 2.2672037272976886, "learning_rate": 7.615982507303595e-06, "loss": 0.9365, "step": 9519 }, { "epoch": 0.3450400492914356, "grad_norm": 2.7788351656019143, "learning_rate": 7.6154823018595515e-06, "loss": 0.9959, "step": 9520 }, { "epoch": 0.34507629299409226, "grad_norm": 2.609522597772967, "learning_rate": 7.614982060375742e-06, "loss": 0.9552, "step": 9521 }, { "epoch": 0.34511253669674896, "grad_norm": 2.584531464502419, "learning_rate": 7.614481782859062e-06, "loss": 0.8911, "step": 9522 }, { "epoch": 0.3451487803994056, "grad_norm": 2.3575399476221173, "learning_rate": 7.613981469316404e-06, "loss": 0.9062, "step": 9523 }, { "epoch": 0.34518502410206225, "grad_norm": 2.038653539899071, "learning_rate": 7.613481119754662e-06, "loss": 0.7702, "step": 9524 }, { "epoch": 0.34522126780471896, "grad_norm": 2.6171844639859922, "learning_rate": 7.61298073418073e-06, "loss": 0.8337, "step": 9525 }, { "epoch": 0.3452575115073756, "grad_norm": 2.7841207777672925, "learning_rate": 7.612480312601505e-06, "loss": 0.9184, "step": 9526 }, { "epoch": 0.34529375521003225, "grad_norm": 2.2749511806578853, "learning_rate": 7.611979855023881e-06, "loss": 1.122, "step": 9527 }, { "epoch": 0.3453299989126889, "grad_norm": 2.4116629198542707, "learning_rate": 7.611479361454754e-06, "loss": 0.8403, "step": 9528 }, { "epoch": 0.3453662426153456, "grad_norm": 2.092630867473528, "learning_rate": 7.610978831901022e-06, "loss": 0.9942, "step": 9529 }, { "epoch": 0.34540248631800224, "grad_norm": 2.454088656667566, "learning_rate": 7.610478266369579e-06, "loss": 0.9749, "step": 9530 }, { "epoch": 0.3454387300206589, "grad_norm": 2.4749563054898984, "learning_rate": 7.609977664867325e-06, "loss": 0.9436, "step": 9531 }, { "epoch": 0.3454749737233156, "grad_norm": 2.1953354069114837, "learning_rate": 7.609477027401158e-06, "loss": 0.789, "step": 9532 }, { "epoch": 0.34551121742597224, "grad_norm": 2.330625676218705, "learning_rate": 7.6089763539779745e-06, "loss": 1.0429, "step": 9533 }, { "epoch": 0.3455474611286289, "grad_norm": 2.3086738082961475, "learning_rate": 7.608475644604675e-06, "loss": 0.8943, "step": 9534 }, { "epoch": 0.3455837048312856, "grad_norm": 2.5861425310894433, "learning_rate": 7.607974899288159e-06, "loss": 0.9455, "step": 9535 }, { "epoch": 0.34561994853394223, "grad_norm": 2.3508005648586137, "learning_rate": 7.6074741180353254e-06, "loss": 0.8424, "step": 9536 }, { "epoch": 0.3456561922365989, "grad_norm": 2.7345448456448542, "learning_rate": 7.606973300853077e-06, "loss": 1.0572, "step": 9537 }, { "epoch": 0.3456924359392555, "grad_norm": 2.3655223839991137, "learning_rate": 7.606472447748311e-06, "loss": 0.9726, "step": 9538 }, { "epoch": 0.34572867964191223, "grad_norm": 2.3315296263891097, "learning_rate": 7.6059715587279326e-06, "loss": 1.015, "step": 9539 }, { "epoch": 0.3457649233445689, "grad_norm": 2.6111371212016365, "learning_rate": 7.605470633798842e-06, "loss": 1.0047, "step": 9540 }, { "epoch": 0.3458011670472255, "grad_norm": 2.461875445306932, "learning_rate": 7.604969672967941e-06, "loss": 0.9492, "step": 9541 }, { "epoch": 0.3458374107498822, "grad_norm": 2.5893176086695187, "learning_rate": 7.604468676242134e-06, "loss": 1.1754, "step": 9542 }, { "epoch": 0.34587365445253887, "grad_norm": 2.4752237911869632, "learning_rate": 7.603967643628325e-06, "loss": 0.9933, "step": 9543 }, { "epoch": 0.3459098981551955, "grad_norm": 2.2305050674502, "learning_rate": 7.603466575133413e-06, "loss": 0.8521, "step": 9544 }, { "epoch": 0.3459461418578522, "grad_norm": 2.6081827719494015, "learning_rate": 7.602965470764309e-06, "loss": 0.8586, "step": 9545 }, { "epoch": 0.34598238556050886, "grad_norm": 2.163496726721948, "learning_rate": 7.602464330527915e-06, "loss": 0.8964, "step": 9546 }, { "epoch": 0.3460186292631655, "grad_norm": 2.617536515164471, "learning_rate": 7.601963154431136e-06, "loss": 0.7774, "step": 9547 }, { "epoch": 0.3460548729658222, "grad_norm": 2.261124261479821, "learning_rate": 7.601461942480879e-06, "loss": 0.709, "step": 9548 }, { "epoch": 0.34609111666847886, "grad_norm": 2.449343798649531, "learning_rate": 7.600960694684048e-06, "loss": 1.0162, "step": 9549 }, { "epoch": 0.3461273603711355, "grad_norm": 2.359196856075817, "learning_rate": 7.600459411047553e-06, "loss": 0.9803, "step": 9550 }, { "epoch": 0.34616360407379215, "grad_norm": 2.2557806885721727, "learning_rate": 7.5999580915783e-06, "loss": 0.9708, "step": 9551 }, { "epoch": 0.34619984777644885, "grad_norm": 2.243631876048694, "learning_rate": 7.5994567362831974e-06, "loss": 0.9204, "step": 9552 }, { "epoch": 0.3462360914791055, "grad_norm": 2.3477896507928824, "learning_rate": 7.598955345169152e-06, "loss": 1.0847, "step": 9553 }, { "epoch": 0.34627233518176215, "grad_norm": 2.3954209965373403, "learning_rate": 7.598453918243074e-06, "loss": 0.8572, "step": 9554 }, { "epoch": 0.34630857888441885, "grad_norm": 2.322841417227647, "learning_rate": 7.597952455511872e-06, "loss": 0.997, "step": 9555 }, { "epoch": 0.3463448225870755, "grad_norm": 2.507502962662467, "learning_rate": 7.5974509569824575e-06, "loss": 0.9607, "step": 9556 }, { "epoch": 0.34638106628973214, "grad_norm": 2.3668908483922615, "learning_rate": 7.596949422661739e-06, "loss": 0.7778, "step": 9557 }, { "epoch": 0.34641730999238884, "grad_norm": 2.407964334398394, "learning_rate": 7.596447852556628e-06, "loss": 0.9349, "step": 9558 }, { "epoch": 0.3464535536950455, "grad_norm": 2.4245391855651977, "learning_rate": 7.595946246674036e-06, "loss": 1.0001, "step": 9559 }, { "epoch": 0.34648979739770214, "grad_norm": 2.542246922382877, "learning_rate": 7.595444605020874e-06, "loss": 0.8515, "step": 9560 }, { "epoch": 0.34652604110035884, "grad_norm": 2.2397789689207745, "learning_rate": 7.594942927604057e-06, "loss": 0.7279, "step": 9561 }, { "epoch": 0.3465622848030155, "grad_norm": 2.063511358234101, "learning_rate": 7.594441214430495e-06, "loss": 0.6746, "step": 9562 }, { "epoch": 0.34659852850567213, "grad_norm": 2.2879880781265727, "learning_rate": 7.5939394655071006e-06, "loss": 0.9324, "step": 9563 }, { "epoch": 0.3466347722083288, "grad_norm": 2.3893019485446976, "learning_rate": 7.59343768084079e-06, "loss": 0.9968, "step": 9564 }, { "epoch": 0.3466710159109855, "grad_norm": 2.462982210307635, "learning_rate": 7.5929358604384774e-06, "loss": 0.9573, "step": 9565 }, { "epoch": 0.3467072596136421, "grad_norm": 2.419097518952276, "learning_rate": 7.592434004307075e-06, "loss": 0.9117, "step": 9566 }, { "epoch": 0.3467435033162988, "grad_norm": 2.2731623250211475, "learning_rate": 7.591932112453502e-06, "loss": 0.9528, "step": 9567 }, { "epoch": 0.3467797470189555, "grad_norm": 2.3920133139396804, "learning_rate": 7.591430184884671e-06, "loss": 1.0269, "step": 9568 }, { "epoch": 0.3468159907216121, "grad_norm": 2.345253632439147, "learning_rate": 7.590928221607499e-06, "loss": 0.7965, "step": 9569 }, { "epoch": 0.34685223442426877, "grad_norm": 2.3179880655801126, "learning_rate": 7.590426222628904e-06, "loss": 0.8624, "step": 9570 }, { "epoch": 0.34688847812692547, "grad_norm": 2.2990985520746983, "learning_rate": 7.589924187955802e-06, "loss": 1.0393, "step": 9571 }, { "epoch": 0.3469247218295821, "grad_norm": 2.413674462079808, "learning_rate": 7.589422117595111e-06, "loss": 0.9228, "step": 9572 }, { "epoch": 0.34696096553223876, "grad_norm": 2.3917290442844568, "learning_rate": 7.588920011553749e-06, "loss": 0.7497, "step": 9573 }, { "epoch": 0.3469972092348954, "grad_norm": 2.4890902906792984, "learning_rate": 7.588417869838635e-06, "loss": 0.8899, "step": 9574 }, { "epoch": 0.3470334529375521, "grad_norm": 2.5064725798326135, "learning_rate": 7.587915692456688e-06, "loss": 0.9459, "step": 9575 }, { "epoch": 0.34706969664020876, "grad_norm": 2.170477114496364, "learning_rate": 7.587413479414827e-06, "loss": 0.8638, "step": 9576 }, { "epoch": 0.3471059403428654, "grad_norm": 2.2996204636466833, "learning_rate": 7.586911230719975e-06, "loss": 0.9756, "step": 9577 }, { "epoch": 0.3471421840455221, "grad_norm": 2.196015055208635, "learning_rate": 7.586408946379049e-06, "loss": 1.0265, "step": 9578 }, { "epoch": 0.34717842774817875, "grad_norm": 2.119596205030173, "learning_rate": 7.5859066263989735e-06, "loss": 0.92, "step": 9579 }, { "epoch": 0.3472146714508354, "grad_norm": 2.2458453964289298, "learning_rate": 7.585404270786669e-06, "loss": 0.8996, "step": 9580 }, { "epoch": 0.3472509151534921, "grad_norm": 2.3998118045371117, "learning_rate": 7.584901879549056e-06, "loss": 0.8543, "step": 9581 }, { "epoch": 0.34728715885614875, "grad_norm": 2.253914089060982, "learning_rate": 7.584399452693058e-06, "loss": 0.8773, "step": 9582 }, { "epoch": 0.3473234025588054, "grad_norm": 2.2937770145151735, "learning_rate": 7.5838969902256e-06, "loss": 1.02, "step": 9583 }, { "epoch": 0.3473596462614621, "grad_norm": 2.4155728642112693, "learning_rate": 7.583394492153602e-06, "loss": 0.9325, "step": 9584 }, { "epoch": 0.34739588996411874, "grad_norm": 2.2516273851193773, "learning_rate": 7.582891958483992e-06, "loss": 0.8341, "step": 9585 }, { "epoch": 0.3474321336667754, "grad_norm": 1.9240509363719918, "learning_rate": 7.582389389223692e-06, "loss": 0.832, "step": 9586 }, { "epoch": 0.34746837736943204, "grad_norm": 2.076474163730995, "learning_rate": 7.581886784379629e-06, "loss": 0.9313, "step": 9587 }, { "epoch": 0.34750462107208874, "grad_norm": 2.3265405728932667, "learning_rate": 7.581384143958727e-06, "loss": 0.8494, "step": 9588 }, { "epoch": 0.3475408647747454, "grad_norm": 2.048156480532499, "learning_rate": 7.580881467967913e-06, "loss": 1.0576, "step": 9589 }, { "epoch": 0.34757710847740203, "grad_norm": 2.4130834367205383, "learning_rate": 7.580378756414113e-06, "loss": 0.8363, "step": 9590 }, { "epoch": 0.34761335218005873, "grad_norm": 2.222176077937795, "learning_rate": 7.579876009304255e-06, "loss": 0.8917, "step": 9591 }, { "epoch": 0.3476495958827154, "grad_norm": 2.4346983164622387, "learning_rate": 7.579373226645264e-06, "loss": 1.0419, "step": 9592 }, { "epoch": 0.347685839585372, "grad_norm": 2.6822512930473805, "learning_rate": 7.578870408444072e-06, "loss": 1.112, "step": 9593 }, { "epoch": 0.3477220832880287, "grad_norm": 2.1677810373010136, "learning_rate": 7.578367554707604e-06, "loss": 0.9752, "step": 9594 }, { "epoch": 0.3477583269906854, "grad_norm": 2.4984230518858395, "learning_rate": 7.5778646654427915e-06, "loss": 0.9238, "step": 9595 }, { "epoch": 0.347794570693342, "grad_norm": 2.378633083811191, "learning_rate": 7.577361740656562e-06, "loss": 1.0274, "step": 9596 }, { "epoch": 0.3478308143959987, "grad_norm": 2.1136347538747957, "learning_rate": 7.576858780355847e-06, "loss": 0.9442, "step": 9597 }, { "epoch": 0.34786705809865537, "grad_norm": 2.437183349784584, "learning_rate": 7.576355784547575e-06, "loss": 1.1439, "step": 9598 }, { "epoch": 0.347903301801312, "grad_norm": 2.3249644168344217, "learning_rate": 7.575852753238679e-06, "loss": 0.9896, "step": 9599 }, { "epoch": 0.34793954550396866, "grad_norm": 2.35450959510023, "learning_rate": 7.575349686436091e-06, "loss": 0.9417, "step": 9600 }, { "epoch": 0.34797578920662536, "grad_norm": 2.7809719660897034, "learning_rate": 7.574846584146742e-06, "loss": 0.8435, "step": 9601 }, { "epoch": 0.348012032909282, "grad_norm": 2.133239603982523, "learning_rate": 7.574343446377563e-06, "loss": 0.8374, "step": 9602 }, { "epoch": 0.34804827661193866, "grad_norm": 2.257445382463979, "learning_rate": 7.57384027313549e-06, "loss": 1.0252, "step": 9603 }, { "epoch": 0.34808452031459536, "grad_norm": 2.5814505846694673, "learning_rate": 7.573337064427453e-06, "loss": 0.8859, "step": 9604 }, { "epoch": 0.348120764017252, "grad_norm": 2.6346280145882344, "learning_rate": 7.572833820260388e-06, "loss": 0.8003, "step": 9605 }, { "epoch": 0.34815700771990865, "grad_norm": 2.439676250819745, "learning_rate": 7.572330540641228e-06, "loss": 0.9489, "step": 9606 }, { "epoch": 0.34819325142256535, "grad_norm": 2.4485741558585614, "learning_rate": 7.571827225576911e-06, "loss": 0.8525, "step": 9607 }, { "epoch": 0.348229495125222, "grad_norm": 2.2228700172760347, "learning_rate": 7.571323875074368e-06, "loss": 0.9314, "step": 9608 }, { "epoch": 0.34826573882787865, "grad_norm": 2.02218915331763, "learning_rate": 7.570820489140539e-06, "loss": 0.8498, "step": 9609 }, { "epoch": 0.3483019825305353, "grad_norm": 2.07833307102507, "learning_rate": 7.570317067782358e-06, "loss": 0.8971, "step": 9610 }, { "epoch": 0.348338226233192, "grad_norm": 2.001845996039267, "learning_rate": 7.569813611006762e-06, "loss": 0.875, "step": 9611 }, { "epoch": 0.34837446993584864, "grad_norm": 2.3274605017935204, "learning_rate": 7.569310118820688e-06, "loss": 0.9361, "step": 9612 }, { "epoch": 0.3484107136385053, "grad_norm": 2.5739607600421897, "learning_rate": 7.568806591231075e-06, "loss": 0.9678, "step": 9613 }, { "epoch": 0.348446957341162, "grad_norm": 2.239670178697844, "learning_rate": 7.568303028244861e-06, "loss": 1.1805, "step": 9614 }, { "epoch": 0.34848320104381864, "grad_norm": 2.380967880198562, "learning_rate": 7.567799429868983e-06, "loss": 0.96, "step": 9615 }, { "epoch": 0.3485194447464753, "grad_norm": 2.4012080252499985, "learning_rate": 7.567295796110384e-06, "loss": 1.113, "step": 9616 }, { "epoch": 0.348555688449132, "grad_norm": 2.5427070343110842, "learning_rate": 7.566792126975999e-06, "loss": 1.0756, "step": 9617 }, { "epoch": 0.34859193215178863, "grad_norm": 2.421705698019623, "learning_rate": 7.5662884224727736e-06, "loss": 0.9463, "step": 9618 }, { "epoch": 0.3486281758544453, "grad_norm": 2.442916827193874, "learning_rate": 7.565784682607644e-06, "loss": 0.928, "step": 9619 }, { "epoch": 0.348664419557102, "grad_norm": 2.6531962996590055, "learning_rate": 7.565280907387555e-06, "loss": 0.9882, "step": 9620 }, { "epoch": 0.3487006632597586, "grad_norm": 2.346698902859806, "learning_rate": 7.564777096819444e-06, "loss": 0.8546, "step": 9621 }, { "epoch": 0.34873690696241527, "grad_norm": 2.500835456610336, "learning_rate": 7.564273250910259e-06, "loss": 0.9599, "step": 9622 }, { "epoch": 0.3487731506650719, "grad_norm": 2.516318896025409, "learning_rate": 7.5637693696669364e-06, "loss": 0.9695, "step": 9623 }, { "epoch": 0.3488093943677286, "grad_norm": 2.556336183255421, "learning_rate": 7.5632654530964245e-06, "loss": 0.9054, "step": 9624 }, { "epoch": 0.34884563807038527, "grad_norm": 2.6794966085843592, "learning_rate": 7.562761501205662e-06, "loss": 1.1979, "step": 9625 }, { "epoch": 0.3488818817730419, "grad_norm": 2.097743425225861, "learning_rate": 7.562257514001599e-06, "loss": 0.6341, "step": 9626 }, { "epoch": 0.3489181254756986, "grad_norm": 2.1380637380388037, "learning_rate": 7.561753491491175e-06, "loss": 0.9115, "step": 9627 }, { "epoch": 0.34895436917835526, "grad_norm": 2.2165449080196185, "learning_rate": 7.5612494336813395e-06, "loss": 0.9788, "step": 9628 }, { "epoch": 0.3489906128810119, "grad_norm": 2.6999980205717105, "learning_rate": 7.560745340579034e-06, "loss": 0.8402, "step": 9629 }, { "epoch": 0.3490268565836686, "grad_norm": 2.6065332341280762, "learning_rate": 7.5602412121912076e-06, "loss": 1.0113, "step": 9630 }, { "epoch": 0.34906310028632526, "grad_norm": 2.5509401276021046, "learning_rate": 7.559737048524804e-06, "loss": 0.9743, "step": 9631 }, { "epoch": 0.3490993439889819, "grad_norm": 2.2641214247551096, "learning_rate": 7.559232849586773e-06, "loss": 0.9568, "step": 9632 }, { "epoch": 0.3491355876916386, "grad_norm": 2.4229322190759497, "learning_rate": 7.558728615384061e-06, "loss": 0.9308, "step": 9633 }, { "epoch": 0.34917183139429525, "grad_norm": 2.7862348657740648, "learning_rate": 7.558224345923616e-06, "loss": 0.956, "step": 9634 }, { "epoch": 0.3492080750969519, "grad_norm": 2.2378234650262705, "learning_rate": 7.5577200412123866e-06, "loss": 0.6897, "step": 9635 }, { "epoch": 0.34924431879960854, "grad_norm": 2.376312735932401, "learning_rate": 7.557215701257321e-06, "loss": 1.0031, "step": 9636 }, { "epoch": 0.34928056250226525, "grad_norm": 2.1281902573730678, "learning_rate": 7.556711326065371e-06, "loss": 0.9399, "step": 9637 }, { "epoch": 0.3493168062049219, "grad_norm": 2.721272860880673, "learning_rate": 7.5562069156434845e-06, "loss": 0.9631, "step": 9638 }, { "epoch": 0.34935304990757854, "grad_norm": 2.347215767072287, "learning_rate": 7.555702469998614e-06, "loss": 0.9614, "step": 9639 }, { "epoch": 0.34938929361023524, "grad_norm": 2.474110820106598, "learning_rate": 7.555197989137708e-06, "loss": 1.0083, "step": 9640 }, { "epoch": 0.3494255373128919, "grad_norm": 2.4316857466346558, "learning_rate": 7.554693473067719e-06, "loss": 0.9559, "step": 9641 }, { "epoch": 0.34946178101554853, "grad_norm": 2.382033890829777, "learning_rate": 7.5541889217956e-06, "loss": 1.0203, "step": 9642 }, { "epoch": 0.34949802471820524, "grad_norm": 2.3140876207217134, "learning_rate": 7.5536843353283e-06, "loss": 0.9418, "step": 9643 }, { "epoch": 0.3495342684208619, "grad_norm": 2.3752635127919133, "learning_rate": 7.553179713672776e-06, "loss": 0.9586, "step": 9644 }, { "epoch": 0.34957051212351853, "grad_norm": 2.2277592002455666, "learning_rate": 7.552675056835978e-06, "loss": 0.8856, "step": 9645 }, { "epoch": 0.3496067558261752, "grad_norm": 2.3229784468003034, "learning_rate": 7.552170364824864e-06, "loss": 0.9885, "step": 9646 }, { "epoch": 0.3496429995288319, "grad_norm": 2.3137803198989935, "learning_rate": 7.551665637646386e-06, "loss": 0.8388, "step": 9647 }, { "epoch": 0.3496792432314885, "grad_norm": 2.392766354627096, "learning_rate": 7.551160875307497e-06, "loss": 0.9948, "step": 9648 }, { "epoch": 0.34971548693414517, "grad_norm": 2.4054318309491443, "learning_rate": 7.550656077815155e-06, "loss": 0.7378, "step": 9649 }, { "epoch": 0.34975173063680187, "grad_norm": 2.286072865710654, "learning_rate": 7.5501512451763135e-06, "loss": 1.0143, "step": 9650 }, { "epoch": 0.3497879743394585, "grad_norm": 2.3031515821144746, "learning_rate": 7.5496463773979315e-06, "loss": 0.8035, "step": 9651 }, { "epoch": 0.34982421804211516, "grad_norm": 2.336747954395025, "learning_rate": 7.549141474486963e-06, "loss": 0.8224, "step": 9652 }, { "epoch": 0.34986046174477187, "grad_norm": 2.0037520244122886, "learning_rate": 7.548636536450368e-06, "loss": 0.7883, "step": 9653 }, { "epoch": 0.3498967054474285, "grad_norm": 2.162021721968893, "learning_rate": 7.548131563295102e-06, "loss": 0.987, "step": 9654 }, { "epoch": 0.34993294915008516, "grad_norm": 2.4551955844851086, "learning_rate": 7.547626555028124e-06, "loss": 0.9587, "step": 9655 }, { "epoch": 0.34996919285274186, "grad_norm": 2.3276769816940797, "learning_rate": 7.5471215116563945e-06, "loss": 0.9178, "step": 9656 }, { "epoch": 0.3500054365553985, "grad_norm": 2.031410038350952, "learning_rate": 7.5466164331868695e-06, "loss": 1.0131, "step": 9657 }, { "epoch": 0.35004168025805515, "grad_norm": 2.5045478571063, "learning_rate": 7.546111319626511e-06, "loss": 0.9407, "step": 9658 }, { "epoch": 0.3500779239607118, "grad_norm": 2.3647475860263323, "learning_rate": 7.545606170982278e-06, "loss": 0.9944, "step": 9659 }, { "epoch": 0.3501141676633685, "grad_norm": 2.4778533846645794, "learning_rate": 7.545100987261131e-06, "loss": 0.8545, "step": 9660 }, { "epoch": 0.35015041136602515, "grad_norm": 2.582827550532122, "learning_rate": 7.544595768470032e-06, "loss": 0.8398, "step": 9661 }, { "epoch": 0.3501866550686818, "grad_norm": 2.2930870937621046, "learning_rate": 7.5440905146159425e-06, "loss": 0.9713, "step": 9662 }, { "epoch": 0.3502228987713385, "grad_norm": 2.141601015537548, "learning_rate": 7.543585225705824e-06, "loss": 0.8695, "step": 9663 }, { "epoch": 0.35025914247399514, "grad_norm": 2.611215395176083, "learning_rate": 7.54307990174664e-06, "loss": 1.1022, "step": 9664 }, { "epoch": 0.3502953861766518, "grad_norm": 2.2990356559983436, "learning_rate": 7.5425745427453515e-06, "loss": 1.0095, "step": 9665 }, { "epoch": 0.3503316298793085, "grad_norm": 2.2318105080182655, "learning_rate": 7.542069148708925e-06, "loss": 0.9391, "step": 9666 }, { "epoch": 0.35036787358196514, "grad_norm": 2.425530158119811, "learning_rate": 7.541563719644323e-06, "loss": 1.0362, "step": 9667 }, { "epoch": 0.3504041172846218, "grad_norm": 2.3140482529807063, "learning_rate": 7.54105825555851e-06, "loss": 0.9957, "step": 9668 }, { "epoch": 0.3504403609872785, "grad_norm": 2.697750527389979, "learning_rate": 7.54055275645845e-06, "loss": 1.0704, "step": 9669 }, { "epoch": 0.35047660468993513, "grad_norm": 2.4051580294713384, "learning_rate": 7.54004722235111e-06, "loss": 1.003, "step": 9670 }, { "epoch": 0.3505128483925918, "grad_norm": 2.677978332346239, "learning_rate": 7.539541653243455e-06, "loss": 0.9868, "step": 9671 }, { "epoch": 0.3505490920952484, "grad_norm": 2.361243938501574, "learning_rate": 7.539036049142453e-06, "loss": 0.8786, "step": 9672 }, { "epoch": 0.35058533579790513, "grad_norm": 2.0415254341124713, "learning_rate": 7.538530410055068e-06, "loss": 0.7583, "step": 9673 }, { "epoch": 0.3506215795005618, "grad_norm": 2.395072741544083, "learning_rate": 7.538024735988272e-06, "loss": 0.8799, "step": 9674 }, { "epoch": 0.3506578232032184, "grad_norm": 2.311661198929476, "learning_rate": 7.537519026949027e-06, "loss": 1.0337, "step": 9675 }, { "epoch": 0.3506940669058751, "grad_norm": 2.2836706154549153, "learning_rate": 7.537013282944307e-06, "loss": 0.7724, "step": 9676 }, { "epoch": 0.35073031060853177, "grad_norm": 2.4163945570026124, "learning_rate": 7.536507503981076e-06, "loss": 1.1279, "step": 9677 }, { "epoch": 0.3507665543111884, "grad_norm": 2.30269762328476, "learning_rate": 7.536001690066308e-06, "loss": 0.8182, "step": 9678 }, { "epoch": 0.3508027980138451, "grad_norm": 2.5021422326287035, "learning_rate": 7.535495841206969e-06, "loss": 0.9409, "step": 9679 }, { "epoch": 0.35083904171650176, "grad_norm": 2.6768204589127844, "learning_rate": 7.534989957410031e-06, "loss": 1.0068, "step": 9680 }, { "epoch": 0.3508752854191584, "grad_norm": 2.241828602003663, "learning_rate": 7.534484038682463e-06, "loss": 0.8535, "step": 9681 }, { "epoch": 0.35091152912181506, "grad_norm": 2.3492402426659695, "learning_rate": 7.533978085031241e-06, "loss": 0.9968, "step": 9682 }, { "epoch": 0.35094777282447176, "grad_norm": 2.4154680884092476, "learning_rate": 7.533472096463332e-06, "loss": 0.9046, "step": 9683 }, { "epoch": 0.3509840165271284, "grad_norm": 2.5104387902986227, "learning_rate": 7.532966072985709e-06, "loss": 1.0527, "step": 9684 }, { "epoch": 0.35102026022978505, "grad_norm": 2.2874704570501603, "learning_rate": 7.532460014605345e-06, "loss": 0.9218, "step": 9685 }, { "epoch": 0.35105650393244175, "grad_norm": 2.2600019911501934, "learning_rate": 7.531953921329215e-06, "loss": 0.9603, "step": 9686 }, { "epoch": 0.3510927476350984, "grad_norm": 2.355089710386742, "learning_rate": 7.53144779316429e-06, "loss": 1.0019, "step": 9687 }, { "epoch": 0.35112899133775505, "grad_norm": 2.322417214360824, "learning_rate": 7.530941630117548e-06, "loss": 0.8652, "step": 9688 }, { "epoch": 0.35116523504041175, "grad_norm": 2.4159230344694236, "learning_rate": 7.530435432195957e-06, "loss": 0.905, "step": 9689 }, { "epoch": 0.3512014787430684, "grad_norm": 2.295025881146538, "learning_rate": 7.529929199406499e-06, "loss": 0.9343, "step": 9690 }, { "epoch": 0.35123772244572504, "grad_norm": 2.226598872041891, "learning_rate": 7.529422931756145e-06, "loss": 0.9402, "step": 9691 }, { "epoch": 0.35127396614838174, "grad_norm": 2.4167360593655425, "learning_rate": 7.528916629251875e-06, "loss": 0.9459, "step": 9692 }, { "epoch": 0.3513102098510384, "grad_norm": 2.390248068113, "learning_rate": 7.528410291900661e-06, "loss": 0.8982, "step": 9693 }, { "epoch": 0.35134645355369504, "grad_norm": 2.295225328278055, "learning_rate": 7.527903919709483e-06, "loss": 0.8579, "step": 9694 }, { "epoch": 0.3513826972563517, "grad_norm": 2.2874824699371294, "learning_rate": 7.527397512685319e-06, "loss": 1.0174, "step": 9695 }, { "epoch": 0.3514189409590084, "grad_norm": 2.3349826291790237, "learning_rate": 7.526891070835144e-06, "loss": 0.9717, "step": 9696 }, { "epoch": 0.35145518466166503, "grad_norm": 2.33633191058294, "learning_rate": 7.52638459416594e-06, "loss": 0.9671, "step": 9697 }, { "epoch": 0.3514914283643217, "grad_norm": 2.3732241153929823, "learning_rate": 7.525878082684682e-06, "loss": 0.9232, "step": 9698 }, { "epoch": 0.3515276720669784, "grad_norm": 2.319325542018401, "learning_rate": 7.525371536398354e-06, "loss": 0.7364, "step": 9699 }, { "epoch": 0.351563915769635, "grad_norm": 2.3813467518464475, "learning_rate": 7.5248649553139305e-06, "loss": 0.9591, "step": 9700 }, { "epoch": 0.3516001594722917, "grad_norm": 2.1631774647464064, "learning_rate": 7.524358339438398e-06, "loss": 1.0288, "step": 9701 }, { "epoch": 0.3516364031749484, "grad_norm": 2.23375459819695, "learning_rate": 7.523851688778733e-06, "loss": 0.7485, "step": 9702 }, { "epoch": 0.351672646877605, "grad_norm": 2.7082670036308585, "learning_rate": 7.523345003341918e-06, "loss": 1.0033, "step": 9703 }, { "epoch": 0.35170889058026167, "grad_norm": 2.1944209699929083, "learning_rate": 7.522838283134934e-06, "loss": 0.9405, "step": 9704 }, { "epoch": 0.35174513428291837, "grad_norm": 2.6037577554977416, "learning_rate": 7.522331528164766e-06, "loss": 0.9485, "step": 9705 }, { "epoch": 0.351781377985575, "grad_norm": 2.346828122968111, "learning_rate": 7.521824738438395e-06, "loss": 1.022, "step": 9706 }, { "epoch": 0.35181762168823166, "grad_norm": 2.135825196379987, "learning_rate": 7.521317913962805e-06, "loss": 0.7959, "step": 9707 }, { "epoch": 0.3518538653908883, "grad_norm": 2.527451881867176, "learning_rate": 7.520811054744976e-06, "loss": 0.9619, "step": 9708 }, { "epoch": 0.351890109093545, "grad_norm": 2.7336821960067805, "learning_rate": 7.520304160791898e-06, "loss": 1.1557, "step": 9709 }, { "epoch": 0.35192635279620166, "grad_norm": 2.1801464899960323, "learning_rate": 7.51979723211055e-06, "loss": 0.8377, "step": 9710 }, { "epoch": 0.3519625964988583, "grad_norm": 2.30499508289066, "learning_rate": 7.519290268707923e-06, "loss": 0.89, "step": 9711 }, { "epoch": 0.351998840201515, "grad_norm": 2.37534009127387, "learning_rate": 7.5187832705909965e-06, "loss": 0.902, "step": 9712 }, { "epoch": 0.35203508390417165, "grad_norm": 2.653727259264579, "learning_rate": 7.518276237766763e-06, "loss": 0.8452, "step": 9713 }, { "epoch": 0.3520713276068283, "grad_norm": 2.3313250283558675, "learning_rate": 7.517769170242204e-06, "loss": 0.8202, "step": 9714 }, { "epoch": 0.352107571309485, "grad_norm": 2.201604117352103, "learning_rate": 7.517262068024309e-06, "loss": 0.9088, "step": 9715 }, { "epoch": 0.35214381501214165, "grad_norm": 2.5455922707794096, "learning_rate": 7.516754931120067e-06, "loss": 0.8938, "step": 9716 }, { "epoch": 0.3521800587147983, "grad_norm": 2.4160222112192207, "learning_rate": 7.516247759536463e-06, "loss": 1.0187, "step": 9717 }, { "epoch": 0.35221630241745494, "grad_norm": 2.06202429017554, "learning_rate": 7.515740553280485e-06, "loss": 0.9417, "step": 9718 }, { "epoch": 0.35225254612011164, "grad_norm": 2.1852163094608876, "learning_rate": 7.515233312359124e-06, "loss": 0.8866, "step": 9719 }, { "epoch": 0.3522887898227683, "grad_norm": 2.358749628087617, "learning_rate": 7.5147260367793705e-06, "loss": 1.0762, "step": 9720 }, { "epoch": 0.35232503352542494, "grad_norm": 2.2676315816058565, "learning_rate": 7.514218726548213e-06, "loss": 1.0391, "step": 9721 }, { "epoch": 0.35236127722808164, "grad_norm": 2.421871828183596, "learning_rate": 7.513711381672642e-06, "loss": 0.8689, "step": 9722 }, { "epoch": 0.3523975209307383, "grad_norm": 2.389660400342241, "learning_rate": 7.513204002159647e-06, "loss": 0.8579, "step": 9723 }, { "epoch": 0.35243376463339493, "grad_norm": 2.2894344986844284, "learning_rate": 7.5126965880162215e-06, "loss": 0.9393, "step": 9724 }, { "epoch": 0.35247000833605163, "grad_norm": 2.328147113650656, "learning_rate": 7.5121891392493575e-06, "loss": 0.9133, "step": 9725 }, { "epoch": 0.3525062520387083, "grad_norm": 2.323739271109573, "learning_rate": 7.511681655866045e-06, "loss": 0.8658, "step": 9726 }, { "epoch": 0.3525424957413649, "grad_norm": 2.241648091346015, "learning_rate": 7.511174137873281e-06, "loss": 0.8457, "step": 9727 }, { "epoch": 0.3525787394440216, "grad_norm": 2.3444827838988007, "learning_rate": 7.510666585278052e-06, "loss": 0.8933, "step": 9728 }, { "epoch": 0.3526149831466783, "grad_norm": 2.538361701779541, "learning_rate": 7.51015899808736e-06, "loss": 1.0115, "step": 9729 }, { "epoch": 0.3526512268493349, "grad_norm": 2.374171634872952, "learning_rate": 7.509651376308191e-06, "loss": 0.8822, "step": 9730 }, { "epoch": 0.35268747055199157, "grad_norm": 2.2575013647124975, "learning_rate": 7.509143719947547e-06, "loss": 0.8986, "step": 9731 }, { "epoch": 0.35272371425464827, "grad_norm": 2.257920922249069, "learning_rate": 7.50863602901242e-06, "loss": 0.8534, "step": 9732 }, { "epoch": 0.3527599579573049, "grad_norm": 2.475288385163929, "learning_rate": 7.508128303509804e-06, "loss": 1.0416, "step": 9733 }, { "epoch": 0.35279620165996156, "grad_norm": 2.311627932736519, "learning_rate": 7.507620543446698e-06, "loss": 0.9028, "step": 9734 }, { "epoch": 0.35283244536261826, "grad_norm": 2.4730060418287505, "learning_rate": 7.507112748830096e-06, "loss": 0.9513, "step": 9735 }, { "epoch": 0.3528686890652749, "grad_norm": 2.455586535017002, "learning_rate": 7.506604919666998e-06, "loss": 1.0489, "step": 9736 }, { "epoch": 0.35290493276793156, "grad_norm": 2.3602415964299923, "learning_rate": 7.506097055964401e-06, "loss": 0.8554, "step": 9737 }, { "epoch": 0.35294117647058826, "grad_norm": 2.242695261429976, "learning_rate": 7.505589157729301e-06, "loss": 1.065, "step": 9738 }, { "epoch": 0.3529774201732449, "grad_norm": 2.2712203775143056, "learning_rate": 7.5050812249686965e-06, "loss": 1.0207, "step": 9739 }, { "epoch": 0.35301366387590155, "grad_norm": 2.1849581529206406, "learning_rate": 7.504573257689589e-06, "loss": 0.7289, "step": 9740 }, { "epoch": 0.35304990757855825, "grad_norm": 2.334499354485434, "learning_rate": 7.504065255898977e-06, "loss": 0.9466, "step": 9741 }, { "epoch": 0.3530861512812149, "grad_norm": 2.4202480062908913, "learning_rate": 7.503557219603859e-06, "loss": 0.8548, "step": 9742 }, { "epoch": 0.35312239498387155, "grad_norm": 2.405904997156911, "learning_rate": 7.503049148811238e-06, "loss": 0.9863, "step": 9743 }, { "epoch": 0.3531586386865282, "grad_norm": 2.276872893665595, "learning_rate": 7.502541043528113e-06, "loss": 1.027, "step": 9744 }, { "epoch": 0.3531948823891849, "grad_norm": 2.1887801835766645, "learning_rate": 7.502032903761487e-06, "loss": 0.8795, "step": 9745 }, { "epoch": 0.35323112609184154, "grad_norm": 2.1975065207265434, "learning_rate": 7.501524729518359e-06, "loss": 0.8634, "step": 9746 }, { "epoch": 0.3532673697944982, "grad_norm": 2.352977378466496, "learning_rate": 7.501016520805734e-06, "loss": 0.8344, "step": 9747 }, { "epoch": 0.3533036134971549, "grad_norm": 2.6560531186879732, "learning_rate": 7.5005082776306125e-06, "loss": 0.9251, "step": 9748 }, { "epoch": 0.35333985719981154, "grad_norm": 2.1246573507279005, "learning_rate": 7.500000000000001e-06, "loss": 0.7729, "step": 9749 }, { "epoch": 0.3533761009024682, "grad_norm": 2.492926895994606, "learning_rate": 7.4994916879209e-06, "loss": 0.869, "step": 9750 }, { "epoch": 0.3534123446051249, "grad_norm": 2.306470753644523, "learning_rate": 7.498983341400316e-06, "loss": 0.9904, "step": 9751 }, { "epoch": 0.35344858830778153, "grad_norm": 2.3242397769490086, "learning_rate": 7.498474960445253e-06, "loss": 0.7672, "step": 9752 }, { "epoch": 0.3534848320104382, "grad_norm": 2.3467056748055612, "learning_rate": 7.497966545062716e-06, "loss": 0.9415, "step": 9753 }, { "epoch": 0.3535210757130948, "grad_norm": 2.4201025854582854, "learning_rate": 7.497458095259711e-06, "loss": 1.0506, "step": 9754 }, { "epoch": 0.3535573194157515, "grad_norm": 2.463926177991227, "learning_rate": 7.496949611043245e-06, "loss": 1.0255, "step": 9755 }, { "epoch": 0.35359356311840817, "grad_norm": 2.252521422917296, "learning_rate": 7.496441092420321e-06, "loss": 0.9415, "step": 9756 }, { "epoch": 0.3536298068210648, "grad_norm": 2.275188768727575, "learning_rate": 7.49593253939795e-06, "loss": 1.0615, "step": 9757 }, { "epoch": 0.3536660505237215, "grad_norm": 2.4411512733380567, "learning_rate": 7.495423951983137e-06, "loss": 0.9794, "step": 9758 }, { "epoch": 0.35370229422637817, "grad_norm": 2.2639341540770745, "learning_rate": 7.4949153301828926e-06, "loss": 0.8409, "step": 9759 }, { "epoch": 0.3537385379290348, "grad_norm": 2.398925579753667, "learning_rate": 7.494406674004222e-06, "loss": 1.0403, "step": 9760 }, { "epoch": 0.3537747816316915, "grad_norm": 2.165703643559917, "learning_rate": 7.493897983454137e-06, "loss": 0.8268, "step": 9761 }, { "epoch": 0.35381102533434816, "grad_norm": 2.3090609421906567, "learning_rate": 7.493389258539646e-06, "loss": 0.8728, "step": 9762 }, { "epoch": 0.3538472690370048, "grad_norm": 2.4521656381906554, "learning_rate": 7.492880499267759e-06, "loss": 0.9924, "step": 9763 }, { "epoch": 0.3538835127396615, "grad_norm": 2.1987251686628424, "learning_rate": 7.492371705645487e-06, "loss": 0.97, "step": 9764 }, { "epoch": 0.35391975644231816, "grad_norm": 2.60378330407973, "learning_rate": 7.491862877679841e-06, "loss": 0.8046, "step": 9765 }, { "epoch": 0.3539560001449748, "grad_norm": 2.306823417600443, "learning_rate": 7.491354015377829e-06, "loss": 0.947, "step": 9766 }, { "epoch": 0.35399224384763145, "grad_norm": 2.3908032515772266, "learning_rate": 7.490845118746467e-06, "loss": 1.0574, "step": 9767 }, { "epoch": 0.35402848755028815, "grad_norm": 2.2758370480569594, "learning_rate": 7.4903361877927646e-06, "loss": 0.9373, "step": 9768 }, { "epoch": 0.3540647312529448, "grad_norm": 2.4111402779979962, "learning_rate": 7.489827222523737e-06, "loss": 0.9216, "step": 9769 }, { "epoch": 0.35410097495560144, "grad_norm": 2.4340587740153983, "learning_rate": 7.489318222946395e-06, "loss": 1.0038, "step": 9770 }, { "epoch": 0.35413721865825815, "grad_norm": 2.1364141403645642, "learning_rate": 7.488809189067754e-06, "loss": 0.7846, "step": 9771 }, { "epoch": 0.3541734623609148, "grad_norm": 2.1483797901129567, "learning_rate": 7.488300120894827e-06, "loss": 0.9117, "step": 9772 }, { "epoch": 0.35420970606357144, "grad_norm": 2.30502244379784, "learning_rate": 7.4877910184346295e-06, "loss": 0.8898, "step": 9773 }, { "epoch": 0.35424594976622814, "grad_norm": 2.479811824688868, "learning_rate": 7.4872818816941775e-06, "loss": 0.9616, "step": 9774 }, { "epoch": 0.3542821934688848, "grad_norm": 2.374471387080462, "learning_rate": 7.486772710680483e-06, "loss": 0.9789, "step": 9775 }, { "epoch": 0.35431843717154143, "grad_norm": 2.0971646864953706, "learning_rate": 7.486263505400566e-06, "loss": 1.1049, "step": 9776 }, { "epoch": 0.35435468087419814, "grad_norm": 2.311462098793227, "learning_rate": 7.485754265861441e-06, "loss": 1.038, "step": 9777 }, { "epoch": 0.3543909245768548, "grad_norm": 2.359286129332216, "learning_rate": 7.485244992070126e-06, "loss": 1.0073, "step": 9778 }, { "epoch": 0.35442716827951143, "grad_norm": 2.502665233921024, "learning_rate": 7.484735684033637e-06, "loss": 0.808, "step": 9779 }, { "epoch": 0.3544634119821681, "grad_norm": 2.7051332019854457, "learning_rate": 7.484226341758996e-06, "loss": 1.0515, "step": 9780 }, { "epoch": 0.3544996556848248, "grad_norm": 2.2758602109951522, "learning_rate": 7.483716965253215e-06, "loss": 0.9066, "step": 9781 }, { "epoch": 0.3545358993874814, "grad_norm": 2.4856279617127344, "learning_rate": 7.483207554523317e-06, "loss": 0.8327, "step": 9782 }, { "epoch": 0.35457214309013807, "grad_norm": 2.4746444485810755, "learning_rate": 7.482698109576323e-06, "loss": 0.8328, "step": 9783 }, { "epoch": 0.35460838679279477, "grad_norm": 2.3095983403184066, "learning_rate": 7.482188630419249e-06, "loss": 1.1345, "step": 9784 }, { "epoch": 0.3546446304954514, "grad_norm": 2.6008026822883346, "learning_rate": 7.481679117059116e-06, "loss": 0.9736, "step": 9785 }, { "epoch": 0.35468087419810806, "grad_norm": 2.3232853073974327, "learning_rate": 7.481169569502947e-06, "loss": 1.0239, "step": 9786 }, { "epoch": 0.35471711790076477, "grad_norm": 2.3018812493152345, "learning_rate": 7.48065998775776e-06, "loss": 0.8768, "step": 9787 }, { "epoch": 0.3547533616034214, "grad_norm": 2.3750329852781236, "learning_rate": 7.4801503718305815e-06, "loss": 0.9212, "step": 9788 }, { "epoch": 0.35478960530607806, "grad_norm": 2.292969557403138, "learning_rate": 7.479640721728428e-06, "loss": 0.9754, "step": 9789 }, { "epoch": 0.3548258490087347, "grad_norm": 2.30326481930251, "learning_rate": 7.479131037458327e-06, "loss": 1.0237, "step": 9790 }, { "epoch": 0.3548620927113914, "grad_norm": 2.52031528670535, "learning_rate": 7.478621319027298e-06, "loss": 0.8429, "step": 9791 }, { "epoch": 0.35489833641404805, "grad_norm": 2.4738330613813577, "learning_rate": 7.4781115664423675e-06, "loss": 0.8635, "step": 9792 }, { "epoch": 0.3549345801167047, "grad_norm": 2.365482871724994, "learning_rate": 7.477601779710558e-06, "loss": 0.7858, "step": 9793 }, { "epoch": 0.3549708238193614, "grad_norm": 2.0526186101649375, "learning_rate": 7.477091958838895e-06, "loss": 0.888, "step": 9794 }, { "epoch": 0.35500706752201805, "grad_norm": 2.4243006937241325, "learning_rate": 7.4765821038344025e-06, "loss": 0.9088, "step": 9795 }, { "epoch": 0.3550433112246747, "grad_norm": 2.036698116639458, "learning_rate": 7.476072214704105e-06, "loss": 0.909, "step": 9796 }, { "epoch": 0.3550795549273314, "grad_norm": 2.541949470265218, "learning_rate": 7.475562291455031e-06, "loss": 0.9699, "step": 9797 }, { "epoch": 0.35511579862998804, "grad_norm": 2.2217565803142705, "learning_rate": 7.475052334094206e-06, "loss": 0.8409, "step": 9798 }, { "epoch": 0.3551520423326447, "grad_norm": 2.2674171235141682, "learning_rate": 7.474542342628657e-06, "loss": 0.8972, "step": 9799 }, { "epoch": 0.3551882860353014, "grad_norm": 2.4061082259753577, "learning_rate": 7.474032317065411e-06, "loss": 0.8955, "step": 9800 }, { "epoch": 0.35522452973795804, "grad_norm": 2.4245515235152935, "learning_rate": 7.473522257411494e-06, "loss": 1.0567, "step": 9801 }, { "epoch": 0.3552607734406147, "grad_norm": 2.3191454183028606, "learning_rate": 7.473012163673938e-06, "loss": 0.7659, "step": 9802 }, { "epoch": 0.35529701714327133, "grad_norm": 2.2517970299267764, "learning_rate": 7.47250203585977e-06, "loss": 0.8794, "step": 9803 }, { "epoch": 0.35533326084592803, "grad_norm": 2.115921722131623, "learning_rate": 7.47199187397602e-06, "loss": 0.8493, "step": 9804 }, { "epoch": 0.3553695045485847, "grad_norm": 2.4644827722142684, "learning_rate": 7.471481678029715e-06, "loss": 0.9348, "step": 9805 }, { "epoch": 0.3554057482512413, "grad_norm": 2.355204495278095, "learning_rate": 7.4709714480278885e-06, "loss": 0.8976, "step": 9806 }, { "epoch": 0.35544199195389803, "grad_norm": 2.333277609885076, "learning_rate": 7.4704611839775686e-06, "loss": 0.9197, "step": 9807 }, { "epoch": 0.3554782356565547, "grad_norm": 2.406328991418174, "learning_rate": 7.469950885885789e-06, "loss": 0.8657, "step": 9808 }, { "epoch": 0.3555144793592113, "grad_norm": 2.7145245863966405, "learning_rate": 7.469440553759579e-06, "loss": 0.9105, "step": 9809 }, { "epoch": 0.355550723061868, "grad_norm": 2.4933971450261523, "learning_rate": 7.4689301876059726e-06, "loss": 0.9119, "step": 9810 }, { "epoch": 0.35558696676452467, "grad_norm": 2.3337122850388106, "learning_rate": 7.468419787432001e-06, "loss": 0.9424, "step": 9811 }, { "epoch": 0.3556232104671813, "grad_norm": 2.4745541599978345, "learning_rate": 7.467909353244698e-06, "loss": 0.9685, "step": 9812 }, { "epoch": 0.35565945416983796, "grad_norm": 2.5359398425501984, "learning_rate": 7.4673988850510955e-06, "loss": 1.015, "step": 9813 }, { "epoch": 0.35569569787249466, "grad_norm": 2.359371452221282, "learning_rate": 7.466888382858231e-06, "loss": 1.0933, "step": 9814 }, { "epoch": 0.3557319415751513, "grad_norm": 2.1643255638081045, "learning_rate": 7.466377846673134e-06, "loss": 1.0446, "step": 9815 }, { "epoch": 0.35576818527780796, "grad_norm": 2.3582716694072774, "learning_rate": 7.465867276502843e-06, "loss": 0.856, "step": 9816 }, { "epoch": 0.35580442898046466, "grad_norm": 2.320273613643504, "learning_rate": 7.465356672354391e-06, "loss": 0.9607, "step": 9817 }, { "epoch": 0.3558406726831213, "grad_norm": 2.3203522105111536, "learning_rate": 7.464846034234816e-06, "loss": 1.014, "step": 9818 }, { "epoch": 0.35587691638577795, "grad_norm": 2.6388178394203585, "learning_rate": 7.464335362151154e-06, "loss": 0.9547, "step": 9819 }, { "epoch": 0.35591316008843465, "grad_norm": 2.2880089420898453, "learning_rate": 7.46382465611044e-06, "loss": 0.8578, "step": 9820 }, { "epoch": 0.3559494037910913, "grad_norm": 2.263477288784436, "learning_rate": 7.463313916119712e-06, "loss": 0.9462, "step": 9821 }, { "epoch": 0.35598564749374795, "grad_norm": 2.258960601554496, "learning_rate": 7.462803142186009e-06, "loss": 1.075, "step": 9822 }, { "epoch": 0.35602189119640465, "grad_norm": 2.4077069908950963, "learning_rate": 7.4622923343163685e-06, "loss": 0.9051, "step": 9823 }, { "epoch": 0.3560581348990613, "grad_norm": 2.3938627686723866, "learning_rate": 7.461781492517829e-06, "loss": 1.0249, "step": 9824 }, { "epoch": 0.35609437860171794, "grad_norm": 2.5458359383217335, "learning_rate": 7.461270616797428e-06, "loss": 0.985, "step": 9825 }, { "epoch": 0.3561306223043746, "grad_norm": 2.1074170021404464, "learning_rate": 7.460759707162207e-06, "loss": 0.8158, "step": 9826 }, { "epoch": 0.3561668660070313, "grad_norm": 2.6300055702278997, "learning_rate": 7.460248763619204e-06, "loss": 0.9827, "step": 9827 }, { "epoch": 0.35620310970968794, "grad_norm": 2.3934402050416694, "learning_rate": 7.459737786175464e-06, "loss": 0.9144, "step": 9828 }, { "epoch": 0.3562393534123446, "grad_norm": 2.2417047020384513, "learning_rate": 7.459226774838022e-06, "loss": 0.8616, "step": 9829 }, { "epoch": 0.3562755971150013, "grad_norm": 2.4982117737769585, "learning_rate": 7.4587157296139244e-06, "loss": 0.918, "step": 9830 }, { "epoch": 0.35631184081765793, "grad_norm": 2.3744064779967675, "learning_rate": 7.45820465051021e-06, "loss": 1.0917, "step": 9831 }, { "epoch": 0.3563480845203146, "grad_norm": 2.603204462662531, "learning_rate": 7.457693537533923e-06, "loss": 1.007, "step": 9832 }, { "epoch": 0.3563843282229713, "grad_norm": 2.5971445211168627, "learning_rate": 7.457182390692105e-06, "loss": 1.0031, "step": 9833 }, { "epoch": 0.3564205719256279, "grad_norm": 2.4001089231916333, "learning_rate": 7.4566712099917995e-06, "loss": 0.9293, "step": 9834 }, { "epoch": 0.3564568156282846, "grad_norm": 2.3248719458309672, "learning_rate": 7.456159995440051e-06, "loss": 1.0885, "step": 9835 }, { "epoch": 0.3564930593309413, "grad_norm": 2.593064903717408, "learning_rate": 7.455648747043902e-06, "loss": 1.0076, "step": 9836 }, { "epoch": 0.3565293030335979, "grad_norm": 2.160763000113112, "learning_rate": 7.455137464810399e-06, "loss": 0.9225, "step": 9837 }, { "epoch": 0.35656554673625457, "grad_norm": 2.1576444156338757, "learning_rate": 7.454626148746587e-06, "loss": 0.7601, "step": 9838 }, { "epoch": 0.3566017904389112, "grad_norm": 2.289093338764887, "learning_rate": 7.454114798859511e-06, "loss": 0.9557, "step": 9839 }, { "epoch": 0.3566380341415679, "grad_norm": 2.1629981159630645, "learning_rate": 7.453603415156216e-06, "loss": 1.0432, "step": 9840 }, { "epoch": 0.35667427784422456, "grad_norm": 2.252765579688716, "learning_rate": 7.453091997643752e-06, "loss": 0.914, "step": 9841 }, { "epoch": 0.3567105215468812, "grad_norm": 2.466316637286242, "learning_rate": 7.452580546329164e-06, "loss": 0.8269, "step": 9842 }, { "epoch": 0.3567467652495379, "grad_norm": 2.5542707817037207, "learning_rate": 7.452069061219496e-06, "loss": 0.8179, "step": 9843 }, { "epoch": 0.35678300895219456, "grad_norm": 2.2350052179301696, "learning_rate": 7.451557542321803e-06, "loss": 0.9142, "step": 9844 }, { "epoch": 0.3568192526548512, "grad_norm": 2.030571345063338, "learning_rate": 7.451045989643128e-06, "loss": 1.0152, "step": 9845 }, { "epoch": 0.3568554963575079, "grad_norm": 2.1927120171867824, "learning_rate": 7.450534403190521e-06, "loss": 0.8126, "step": 9846 }, { "epoch": 0.35689174006016455, "grad_norm": 2.18685418191042, "learning_rate": 7.450022782971034e-06, "loss": 0.8347, "step": 9847 }, { "epoch": 0.3569279837628212, "grad_norm": 2.272867037792037, "learning_rate": 7.449511128991713e-06, "loss": 1.0142, "step": 9848 }, { "epoch": 0.35696422746547785, "grad_norm": 2.85805214878174, "learning_rate": 7.44899944125961e-06, "loss": 1.009, "step": 9849 }, { "epoch": 0.35700047116813455, "grad_norm": 2.0622856248574553, "learning_rate": 7.448487719781776e-06, "loss": 0.6421, "step": 9850 }, { "epoch": 0.3570367148707912, "grad_norm": 2.4122205128166248, "learning_rate": 7.447975964565264e-06, "loss": 0.9319, "step": 9851 }, { "epoch": 0.35707295857344784, "grad_norm": 2.4649167126688796, "learning_rate": 7.4474641756171215e-06, "loss": 1.059, "step": 9852 }, { "epoch": 0.35710920227610454, "grad_norm": 2.1921779053846526, "learning_rate": 7.446952352944404e-06, "loss": 0.9441, "step": 9853 }, { "epoch": 0.3571454459787612, "grad_norm": 2.2780993541530625, "learning_rate": 7.446440496554163e-06, "loss": 0.9021, "step": 9854 }, { "epoch": 0.35718168968141784, "grad_norm": 2.2893680026683905, "learning_rate": 7.44592860645345e-06, "loss": 1.0923, "step": 9855 }, { "epoch": 0.35721793338407454, "grad_norm": 2.2261591407793593, "learning_rate": 7.445416682649321e-06, "loss": 0.8919, "step": 9856 }, { "epoch": 0.3572541770867312, "grad_norm": 2.5054140757469607, "learning_rate": 7.4449047251488285e-06, "loss": 0.9677, "step": 9857 }, { "epoch": 0.35729042078938783, "grad_norm": 2.1354199529255418, "learning_rate": 7.444392733959027e-06, "loss": 0.909, "step": 9858 }, { "epoch": 0.35732666449204453, "grad_norm": 2.3200946785314733, "learning_rate": 7.4438807090869736e-06, "loss": 1.0383, "step": 9859 }, { "epoch": 0.3573629081947012, "grad_norm": 2.2759345693178212, "learning_rate": 7.443368650539722e-06, "loss": 0.9967, "step": 9860 }, { "epoch": 0.3573991518973578, "grad_norm": 2.460546987887434, "learning_rate": 7.442856558324329e-06, "loss": 0.9616, "step": 9861 }, { "epoch": 0.35743539560001447, "grad_norm": 2.172465553597475, "learning_rate": 7.442344432447848e-06, "loss": 0.8595, "step": 9862 }, { "epoch": 0.3574716393026712, "grad_norm": 2.2658795861668435, "learning_rate": 7.441832272917337e-06, "loss": 0.6744, "step": 9863 }, { "epoch": 0.3575078830053278, "grad_norm": 2.6575942419209944, "learning_rate": 7.441320079739855e-06, "loss": 1.1833, "step": 9864 }, { "epoch": 0.35754412670798447, "grad_norm": 2.113500607143641, "learning_rate": 7.4408078529224605e-06, "loss": 0.9613, "step": 9865 }, { "epoch": 0.35758037041064117, "grad_norm": 2.382257928643031, "learning_rate": 7.440295592472207e-06, "loss": 0.9312, "step": 9866 }, { "epoch": 0.3576166141132978, "grad_norm": 2.4477246160030206, "learning_rate": 7.4397832983961595e-06, "loss": 0.9795, "step": 9867 }, { "epoch": 0.35765285781595446, "grad_norm": 2.392745617557646, "learning_rate": 7.439270970701371e-06, "loss": 1.0541, "step": 9868 }, { "epoch": 0.35768910151861116, "grad_norm": 2.183260589945009, "learning_rate": 7.438758609394905e-06, "loss": 0.8532, "step": 9869 }, { "epoch": 0.3577253452212678, "grad_norm": 2.3300899585043933, "learning_rate": 7.43824621448382e-06, "loss": 0.7341, "step": 9870 }, { "epoch": 0.35776158892392446, "grad_norm": 2.195643543830895, "learning_rate": 7.437733785975177e-06, "loss": 0.8103, "step": 9871 }, { "epoch": 0.35779783262658116, "grad_norm": 2.502411078712058, "learning_rate": 7.437221323876036e-06, "loss": 1.0668, "step": 9872 }, { "epoch": 0.3578340763292378, "grad_norm": 2.401276743642893, "learning_rate": 7.4367088281934595e-06, "loss": 1.07, "step": 9873 }, { "epoch": 0.35787032003189445, "grad_norm": 2.577053959274623, "learning_rate": 7.4361962989345085e-06, "loss": 1.0139, "step": 9874 }, { "epoch": 0.3579065637345511, "grad_norm": 2.562260699837028, "learning_rate": 7.435683736106248e-06, "loss": 1.1313, "step": 9875 }, { "epoch": 0.3579428074372078, "grad_norm": 2.3212261107422, "learning_rate": 7.435171139715735e-06, "loss": 1.0512, "step": 9876 }, { "epoch": 0.35797905113986445, "grad_norm": 2.547032794658378, "learning_rate": 7.43465850977004e-06, "loss": 1.012, "step": 9877 }, { "epoch": 0.3580152948425211, "grad_norm": 2.1068277174891157, "learning_rate": 7.434145846276221e-06, "loss": 0.9898, "step": 9878 }, { "epoch": 0.3580515385451778, "grad_norm": 2.5145952993880862, "learning_rate": 7.433633149241345e-06, "loss": 0.9073, "step": 9879 }, { "epoch": 0.35808778224783444, "grad_norm": 2.207575041085713, "learning_rate": 7.4331204186724774e-06, "loss": 0.7422, "step": 9880 }, { "epoch": 0.3581240259504911, "grad_norm": 2.2881376378947453, "learning_rate": 7.43260765457668e-06, "loss": 0.9842, "step": 9881 }, { "epoch": 0.3581602696531478, "grad_norm": 2.193814155139834, "learning_rate": 7.432094856961021e-06, "loss": 0.9506, "step": 9882 }, { "epoch": 0.35819651335580444, "grad_norm": 2.2164684045242864, "learning_rate": 7.431582025832565e-06, "loss": 0.9108, "step": 9883 }, { "epoch": 0.3582327570584611, "grad_norm": 2.4341256619694898, "learning_rate": 7.4310691611983795e-06, "loss": 1.0401, "step": 9884 }, { "epoch": 0.35826900076111773, "grad_norm": 2.2123437397848735, "learning_rate": 7.430556263065532e-06, "loss": 0.9111, "step": 9885 }, { "epoch": 0.35830524446377443, "grad_norm": 2.4391172563709964, "learning_rate": 7.430043331441089e-06, "loss": 0.9589, "step": 9886 }, { "epoch": 0.3583414881664311, "grad_norm": 2.3140317982775422, "learning_rate": 7.429530366332118e-06, "loss": 0.907, "step": 9887 }, { "epoch": 0.3583777318690877, "grad_norm": 2.585001053866474, "learning_rate": 7.4290173677456875e-06, "loss": 0.9198, "step": 9888 }, { "epoch": 0.3584139755717444, "grad_norm": 2.3798915358464736, "learning_rate": 7.4285043356888685e-06, "loss": 1.0233, "step": 9889 }, { "epoch": 0.35845021927440107, "grad_norm": 2.259130332299004, "learning_rate": 7.427991270168729e-06, "loss": 0.8468, "step": 9890 }, { "epoch": 0.3584864629770577, "grad_norm": 2.4467789896203564, "learning_rate": 7.427478171192336e-06, "loss": 0.9472, "step": 9891 }, { "epoch": 0.3585227066797144, "grad_norm": 2.0213126494451976, "learning_rate": 7.426965038766763e-06, "loss": 0.9458, "step": 9892 }, { "epoch": 0.35855895038237107, "grad_norm": 2.3309395439842664, "learning_rate": 7.426451872899079e-06, "loss": 0.7928, "step": 9893 }, { "epoch": 0.3585951940850277, "grad_norm": 2.4851996278010517, "learning_rate": 7.425938673596355e-06, "loss": 0.7946, "step": 9894 }, { "epoch": 0.3586314377876844, "grad_norm": 2.397788326394517, "learning_rate": 7.4254254408656635e-06, "loss": 0.8443, "step": 9895 }, { "epoch": 0.35866768149034106, "grad_norm": 2.1649170187876527, "learning_rate": 7.424912174714076e-06, "loss": 0.9065, "step": 9896 }, { "epoch": 0.3587039251929977, "grad_norm": 2.355125606448241, "learning_rate": 7.424398875148666e-06, "loss": 0.8839, "step": 9897 }, { "epoch": 0.35874016889565435, "grad_norm": 2.654858449516322, "learning_rate": 7.423885542176505e-06, "loss": 1.0328, "step": 9898 }, { "epoch": 0.35877641259831106, "grad_norm": 2.545995460549321, "learning_rate": 7.423372175804668e-06, "loss": 0.9988, "step": 9899 }, { "epoch": 0.3588126563009677, "grad_norm": 2.4539519086353523, "learning_rate": 7.422858776040229e-06, "loss": 0.9989, "step": 9900 }, { "epoch": 0.35884890000362435, "grad_norm": 2.2976619032079255, "learning_rate": 7.42234534289026e-06, "loss": 1.1471, "step": 9901 }, { "epoch": 0.35888514370628105, "grad_norm": 2.3653731195045324, "learning_rate": 7.421831876361838e-06, "loss": 0.9247, "step": 9902 }, { "epoch": 0.3589213874089377, "grad_norm": 2.2413576570587543, "learning_rate": 7.421318376462036e-06, "loss": 0.9169, "step": 9903 }, { "epoch": 0.35895763111159434, "grad_norm": 2.705696478802226, "learning_rate": 7.420804843197933e-06, "loss": 0.9383, "step": 9904 }, { "epoch": 0.35899387481425105, "grad_norm": 2.469826297860851, "learning_rate": 7.4202912765766025e-06, "loss": 0.8468, "step": 9905 }, { "epoch": 0.3590301185169077, "grad_norm": 2.593605888575023, "learning_rate": 7.419777676605121e-06, "loss": 0.8987, "step": 9906 }, { "epoch": 0.35906636221956434, "grad_norm": 2.421353186638179, "learning_rate": 7.419264043290567e-06, "loss": 0.9579, "step": 9907 }, { "epoch": 0.35910260592222104, "grad_norm": 2.8205911209067325, "learning_rate": 7.4187503766400185e-06, "loss": 0.8483, "step": 9908 }, { "epoch": 0.3591388496248777, "grad_norm": 2.3708173165018533, "learning_rate": 7.418236676660552e-06, "loss": 0.7814, "step": 9909 }, { "epoch": 0.35917509332753433, "grad_norm": 2.7396762500966463, "learning_rate": 7.417722943359247e-06, "loss": 1.0458, "step": 9910 }, { "epoch": 0.359211337030191, "grad_norm": 2.399150873474193, "learning_rate": 7.417209176743182e-06, "loss": 0.86, "step": 9911 }, { "epoch": 0.3592475807328477, "grad_norm": 2.2781188185287156, "learning_rate": 7.416695376819435e-06, "loss": 1.0374, "step": 9912 }, { "epoch": 0.35928382443550433, "grad_norm": 2.253551746100611, "learning_rate": 7.416181543595089e-06, "loss": 0.9188, "step": 9913 }, { "epoch": 0.359320068138161, "grad_norm": 2.2532049699602115, "learning_rate": 7.415667677077222e-06, "loss": 1.0222, "step": 9914 }, { "epoch": 0.3593563118408177, "grad_norm": 2.239310763413581, "learning_rate": 7.415153777272916e-06, "loss": 1.077, "step": 9915 }, { "epoch": 0.3593925555434743, "grad_norm": 2.037567448089029, "learning_rate": 7.4146398441892494e-06, "loss": 0.9977, "step": 9916 }, { "epoch": 0.35942879924613097, "grad_norm": 2.493178544713335, "learning_rate": 7.414125877833308e-06, "loss": 0.9612, "step": 9917 }, { "epoch": 0.35946504294878767, "grad_norm": 2.4071739285717086, "learning_rate": 7.413611878212172e-06, "loss": 0.9143, "step": 9918 }, { "epoch": 0.3595012866514443, "grad_norm": 2.293024530161505, "learning_rate": 7.413097845332924e-06, "loss": 1.0647, "step": 9919 }, { "epoch": 0.35953753035410096, "grad_norm": 2.5182673676408998, "learning_rate": 7.412583779202646e-06, "loss": 0.7916, "step": 9920 }, { "epoch": 0.3595737740567576, "grad_norm": 2.000113029213829, "learning_rate": 7.412069679828424e-06, "loss": 0.8683, "step": 9921 }, { "epoch": 0.3596100177594143, "grad_norm": 2.245969630201573, "learning_rate": 7.4115555472173395e-06, "loss": 0.9777, "step": 9922 }, { "epoch": 0.35964626146207096, "grad_norm": 2.151523762652557, "learning_rate": 7.411041381376478e-06, "loss": 0.8196, "step": 9923 }, { "epoch": 0.3596825051647276, "grad_norm": 2.290947874591797, "learning_rate": 7.4105271823129255e-06, "loss": 0.9608, "step": 9924 }, { "epoch": 0.3597187488673843, "grad_norm": 2.490776324687158, "learning_rate": 7.410012950033764e-06, "loss": 0.9293, "step": 9925 }, { "epoch": 0.35975499257004095, "grad_norm": 2.3310062036965324, "learning_rate": 7.409498684546084e-06, "loss": 0.8637, "step": 9926 }, { "epoch": 0.3597912362726976, "grad_norm": 1.9953210895988562, "learning_rate": 7.408984385856969e-06, "loss": 0.8319, "step": 9927 }, { "epoch": 0.3598274799753543, "grad_norm": 2.3288176837700516, "learning_rate": 7.408470053973505e-06, "loss": 1.0991, "step": 9928 }, { "epoch": 0.35986372367801095, "grad_norm": 2.297972490075331, "learning_rate": 7.4079556889027815e-06, "loss": 1.018, "step": 9929 }, { "epoch": 0.3598999673806676, "grad_norm": 2.5299952347284633, "learning_rate": 7.407441290651884e-06, "loss": 0.9739, "step": 9930 }, { "epoch": 0.3599362110833243, "grad_norm": 2.300856466552683, "learning_rate": 7.406926859227902e-06, "loss": 0.9797, "step": 9931 }, { "epoch": 0.35997245478598094, "grad_norm": 2.3781440746210967, "learning_rate": 7.406412394637923e-06, "loss": 0.9444, "step": 9932 }, { "epoch": 0.3600086984886376, "grad_norm": 2.5008350243308946, "learning_rate": 7.405897896889037e-06, "loss": 1.0159, "step": 9933 }, { "epoch": 0.36004494219129424, "grad_norm": 2.2918520314669646, "learning_rate": 7.405383365988332e-06, "loss": 0.767, "step": 9934 }, { "epoch": 0.36008118589395094, "grad_norm": 2.277113870979427, "learning_rate": 7.404868801942901e-06, "loss": 1.0697, "step": 9935 }, { "epoch": 0.3601174295966076, "grad_norm": 2.199515070723957, "learning_rate": 7.40435420475983e-06, "loss": 0.9207, "step": 9936 }, { "epoch": 0.36015367329926423, "grad_norm": 2.2855418663798663, "learning_rate": 7.403839574446214e-06, "loss": 1.0243, "step": 9937 }, { "epoch": 0.36018991700192093, "grad_norm": 2.011060288115991, "learning_rate": 7.403324911009142e-06, "loss": 0.7695, "step": 9938 }, { "epoch": 0.3602261607045776, "grad_norm": 2.255629619170103, "learning_rate": 7.402810214455706e-06, "loss": 0.8896, "step": 9939 }, { "epoch": 0.3602624044072342, "grad_norm": 2.4026576016191155, "learning_rate": 7.4022954847929986e-06, "loss": 0.9471, "step": 9940 }, { "epoch": 0.36029864810989093, "grad_norm": 2.016274306519577, "learning_rate": 7.4017807220281115e-06, "loss": 0.7388, "step": 9941 }, { "epoch": 0.3603348918125476, "grad_norm": 2.53427749148209, "learning_rate": 7.4012659261681395e-06, "loss": 0.9002, "step": 9942 }, { "epoch": 0.3603711355152042, "grad_norm": 2.3398278774934766, "learning_rate": 7.400751097220174e-06, "loss": 1.07, "step": 9943 }, { "epoch": 0.3604073792178609, "grad_norm": 2.5351932550801246, "learning_rate": 7.400236235191312e-06, "loss": 0.9632, "step": 9944 }, { "epoch": 0.36044362292051757, "grad_norm": 2.267985504308254, "learning_rate": 7.399721340088645e-06, "loss": 0.9988, "step": 9945 }, { "epoch": 0.3604798666231742, "grad_norm": 2.1425277763723036, "learning_rate": 7.3992064119192695e-06, "loss": 0.9389, "step": 9946 }, { "epoch": 0.36051611032583086, "grad_norm": 2.218487112444953, "learning_rate": 7.398691450690282e-06, "loss": 0.8818, "step": 9947 }, { "epoch": 0.36055235402848756, "grad_norm": 2.4099502399964945, "learning_rate": 7.3981764564087754e-06, "loss": 0.9285, "step": 9948 }, { "epoch": 0.3605885977311442, "grad_norm": 2.5566250916930158, "learning_rate": 7.397661429081848e-06, "loss": 0.78, "step": 9949 }, { "epoch": 0.36062484143380086, "grad_norm": 2.455425780509401, "learning_rate": 7.397146368716597e-06, "loss": 1.0288, "step": 9950 }, { "epoch": 0.36066108513645756, "grad_norm": 2.155137208442383, "learning_rate": 7.396631275320116e-06, "loss": 1.0309, "step": 9951 }, { "epoch": 0.3606973288391142, "grad_norm": 2.2708161775630145, "learning_rate": 7.396116148899509e-06, "loss": 0.9852, "step": 9952 }, { "epoch": 0.36073357254177085, "grad_norm": 2.2871301701187794, "learning_rate": 7.3956009894618685e-06, "loss": 0.9186, "step": 9953 }, { "epoch": 0.36076981624442755, "grad_norm": 2.3106451154381524, "learning_rate": 7.395085797014297e-06, "loss": 0.9034, "step": 9954 }, { "epoch": 0.3608060599470842, "grad_norm": 2.241106755306107, "learning_rate": 7.39457057156389e-06, "loss": 0.9752, "step": 9955 }, { "epoch": 0.36084230364974085, "grad_norm": 2.4530979042656638, "learning_rate": 7.394055313117749e-06, "loss": 0.911, "step": 9956 }, { "epoch": 0.3608785473523975, "grad_norm": 2.2884366671028595, "learning_rate": 7.393540021682974e-06, "loss": 0.9071, "step": 9957 }, { "epoch": 0.3609147910550542, "grad_norm": 2.3494293916572864, "learning_rate": 7.393024697266665e-06, "loss": 0.7799, "step": 9958 }, { "epoch": 0.36095103475771084, "grad_norm": 2.3744638796747806, "learning_rate": 7.392509339875923e-06, "loss": 0.9551, "step": 9959 }, { "epoch": 0.3609872784603675, "grad_norm": 2.2329966473241973, "learning_rate": 7.391993949517849e-06, "loss": 1.0675, "step": 9960 }, { "epoch": 0.3610235221630242, "grad_norm": 2.2770622982204163, "learning_rate": 7.3914785261995446e-06, "loss": 0.9422, "step": 9961 }, { "epoch": 0.36105976586568084, "grad_norm": 2.484769287801088, "learning_rate": 7.390963069928113e-06, "loss": 0.8528, "step": 9962 }, { "epoch": 0.3610960095683375, "grad_norm": 2.3001552156336187, "learning_rate": 7.390447580710655e-06, "loss": 1.0758, "step": 9963 }, { "epoch": 0.3611322532709942, "grad_norm": 2.20963551240995, "learning_rate": 7.389932058554277e-06, "loss": 1.052, "step": 9964 }, { "epoch": 0.36116849697365083, "grad_norm": 2.0988885423578125, "learning_rate": 7.38941650346608e-06, "loss": 0.895, "step": 9965 }, { "epoch": 0.3612047406763075, "grad_norm": 2.595388762892153, "learning_rate": 7.388900915453169e-06, "loss": 0.9773, "step": 9966 }, { "epoch": 0.3612409843789642, "grad_norm": 2.3012041092467532, "learning_rate": 7.388385294522649e-06, "loss": 0.9675, "step": 9967 }, { "epoch": 0.3612772280816208, "grad_norm": 2.1758151157873584, "learning_rate": 7.387869640681622e-06, "loss": 0.9723, "step": 9968 }, { "epoch": 0.3613134717842775, "grad_norm": 2.052095380707706, "learning_rate": 7.387353953937196e-06, "loss": 0.8913, "step": 9969 }, { "epoch": 0.3613497154869341, "grad_norm": 2.239648167719825, "learning_rate": 7.386838234296477e-06, "loss": 0.9217, "step": 9970 }, { "epoch": 0.3613859591895908, "grad_norm": 2.3169550231331306, "learning_rate": 7.38632248176657e-06, "loss": 1.1289, "step": 9971 }, { "epoch": 0.36142220289224747, "grad_norm": 2.3251143204451994, "learning_rate": 7.385806696354583e-06, "loss": 0.9937, "step": 9972 }, { "epoch": 0.3614584465949041, "grad_norm": 2.256284805436236, "learning_rate": 7.385290878067622e-06, "loss": 0.9513, "step": 9973 }, { "epoch": 0.3614946902975608, "grad_norm": 2.2519195678080255, "learning_rate": 7.384775026912796e-06, "loss": 0.9166, "step": 9974 }, { "epoch": 0.36153093400021746, "grad_norm": 2.161255988083805, "learning_rate": 7.384259142897211e-06, "loss": 0.7848, "step": 9975 }, { "epoch": 0.3615671777028741, "grad_norm": 2.4469960017729466, "learning_rate": 7.383743226027978e-06, "loss": 0.9465, "step": 9976 }, { "epoch": 0.3616034214055308, "grad_norm": 2.442227956727835, "learning_rate": 7.383227276312206e-06, "loss": 1.0572, "step": 9977 }, { "epoch": 0.36163966510818746, "grad_norm": 2.1120306107445734, "learning_rate": 7.3827112937570025e-06, "loss": 0.8859, "step": 9978 }, { "epoch": 0.3616759088108441, "grad_norm": 2.2022465486367624, "learning_rate": 7.382195278369477e-06, "loss": 0.7896, "step": 9979 }, { "epoch": 0.3617121525135008, "grad_norm": 2.3692635545864036, "learning_rate": 7.3816792301567445e-06, "loss": 0.935, "step": 9980 }, { "epoch": 0.36174839621615745, "grad_norm": 2.243307355194207, "learning_rate": 7.3811631491259095e-06, "loss": 0.6757, "step": 9981 }, { "epoch": 0.3617846399188141, "grad_norm": 2.3585599688495598, "learning_rate": 7.380647035284088e-06, "loss": 0.9507, "step": 9982 }, { "epoch": 0.36182088362147075, "grad_norm": 2.320452732370664, "learning_rate": 7.3801308886383885e-06, "loss": 0.772, "step": 9983 }, { "epoch": 0.36185712732412745, "grad_norm": 2.2528501032450707, "learning_rate": 7.379614709195926e-06, "loss": 1.0156, "step": 9984 }, { "epoch": 0.3618933710267841, "grad_norm": 2.209218328453714, "learning_rate": 7.3790984969638115e-06, "loss": 0.8318, "step": 9985 }, { "epoch": 0.36192961472944074, "grad_norm": 2.202912143952446, "learning_rate": 7.37858225194916e-06, "loss": 0.8163, "step": 9986 }, { "epoch": 0.36196585843209744, "grad_norm": 2.316398853579721, "learning_rate": 7.378065974159082e-06, "loss": 0.8196, "step": 9987 }, { "epoch": 0.3620021021347541, "grad_norm": 2.4246432600462504, "learning_rate": 7.377549663600695e-06, "loss": 1.062, "step": 9988 }, { "epoch": 0.36203834583741074, "grad_norm": 2.364072588394877, "learning_rate": 7.377033320281109e-06, "loss": 1.0399, "step": 9989 }, { "epoch": 0.36207458954006744, "grad_norm": 2.574389867687888, "learning_rate": 7.3765169442074435e-06, "loss": 1.0128, "step": 9990 }, { "epoch": 0.3621108332427241, "grad_norm": 2.1963044448880447, "learning_rate": 7.37600053538681e-06, "loss": 1.0476, "step": 9991 }, { "epoch": 0.36214707694538073, "grad_norm": 2.411201525359618, "learning_rate": 7.375484093826327e-06, "loss": 0.9071, "step": 9992 }, { "epoch": 0.3621833206480374, "grad_norm": 2.151000083720606, "learning_rate": 7.374967619533109e-06, "loss": 0.756, "step": 9993 }, { "epoch": 0.3622195643506941, "grad_norm": 2.377258753891521, "learning_rate": 7.374451112514275e-06, "loss": 0.9127, "step": 9994 }, { "epoch": 0.3622558080533507, "grad_norm": 2.301744297443805, "learning_rate": 7.3739345727769396e-06, "loss": 1.1057, "step": 9995 }, { "epoch": 0.36229205175600737, "grad_norm": 2.0994546918966353, "learning_rate": 7.373418000328223e-06, "loss": 0.9275, "step": 9996 }, { "epoch": 0.3623282954586641, "grad_norm": 2.255587770697563, "learning_rate": 7.37290139517524e-06, "loss": 0.8503, "step": 9997 }, { "epoch": 0.3623645391613207, "grad_norm": 2.526795604716562, "learning_rate": 7.372384757325113e-06, "loss": 0.8714, "step": 9998 }, { "epoch": 0.36240078286397737, "grad_norm": 2.5232564897280887, "learning_rate": 7.371868086784956e-06, "loss": 1.0175, "step": 9999 }, { "epoch": 0.36243702656663407, "grad_norm": 2.78604753573846, "learning_rate": 7.3713513835618935e-06, "loss": 0.9216, "step": 10000 }, { "epoch": 0.3624732702692907, "grad_norm": 2.2511656473948007, "learning_rate": 7.370834647663041e-06, "loss": 0.9013, "step": 10001 }, { "epoch": 0.36250951397194736, "grad_norm": 2.562031174057189, "learning_rate": 7.370317879095522e-06, "loss": 0.899, "step": 10002 }, { "epoch": 0.36254575767460406, "grad_norm": 2.287433166208485, "learning_rate": 7.369801077866456e-06, "loss": 0.9641, "step": 10003 }, { "epoch": 0.3625820013772607, "grad_norm": 2.293322405349757, "learning_rate": 7.369284243982965e-06, "loss": 0.8903, "step": 10004 }, { "epoch": 0.36261824507991736, "grad_norm": 2.097328746923398, "learning_rate": 7.368767377452171e-06, "loss": 0.9565, "step": 10005 }, { "epoch": 0.362654488782574, "grad_norm": 2.629250433221133, "learning_rate": 7.368250478281193e-06, "loss": 0.948, "step": 10006 }, { "epoch": 0.3626907324852307, "grad_norm": 2.3377746312181857, "learning_rate": 7.367733546477156e-06, "loss": 0.924, "step": 10007 }, { "epoch": 0.36272697618788735, "grad_norm": 2.412289382003187, "learning_rate": 7.367216582047183e-06, "loss": 0.9243, "step": 10008 }, { "epoch": 0.362763219890544, "grad_norm": 2.4679717283692164, "learning_rate": 7.366699584998397e-06, "loss": 0.992, "step": 10009 }, { "epoch": 0.3627994635932007, "grad_norm": 1.9400439017592215, "learning_rate": 7.366182555337923e-06, "loss": 0.7818, "step": 10010 }, { "epoch": 0.36283570729585735, "grad_norm": 2.326660236900671, "learning_rate": 7.365665493072882e-06, "loss": 0.922, "step": 10011 }, { "epoch": 0.362871950998514, "grad_norm": 2.5954754103950237, "learning_rate": 7.365148398210404e-06, "loss": 1.0956, "step": 10012 }, { "epoch": 0.3629081947011707, "grad_norm": 2.606352402836422, "learning_rate": 7.36463127075761e-06, "loss": 0.9529, "step": 10013 }, { "epoch": 0.36294443840382734, "grad_norm": 2.080368014503596, "learning_rate": 7.364114110721627e-06, "loss": 0.6745, "step": 10014 }, { "epoch": 0.362980682106484, "grad_norm": 2.352561694065798, "learning_rate": 7.363596918109583e-06, "loss": 0.8921, "step": 10015 }, { "epoch": 0.3630169258091407, "grad_norm": 2.28568560353213, "learning_rate": 7.363079692928602e-06, "loss": 1.0195, "step": 10016 }, { "epoch": 0.36305316951179734, "grad_norm": 2.3520390567597333, "learning_rate": 7.362562435185812e-06, "loss": 0.8006, "step": 10017 }, { "epoch": 0.363089413214454, "grad_norm": 2.028850483229926, "learning_rate": 7.3620451448883405e-06, "loss": 0.8917, "step": 10018 }, { "epoch": 0.36312565691711063, "grad_norm": 2.4061405668075855, "learning_rate": 7.361527822043314e-06, "loss": 0.9992, "step": 10019 }, { "epoch": 0.36316190061976733, "grad_norm": 2.3182681424010645, "learning_rate": 7.361010466657863e-06, "loss": 0.987, "step": 10020 }, { "epoch": 0.363198144322424, "grad_norm": 2.5462298514807284, "learning_rate": 7.360493078739116e-06, "loss": 0.9497, "step": 10021 }, { "epoch": 0.3632343880250806, "grad_norm": 2.493433405555753, "learning_rate": 7.359975658294202e-06, "loss": 1.0539, "step": 10022 }, { "epoch": 0.3632706317277373, "grad_norm": 2.0301633639427696, "learning_rate": 7.359458205330251e-06, "loss": 0.9679, "step": 10023 }, { "epoch": 0.36330687543039397, "grad_norm": 2.3574302594423595, "learning_rate": 7.358940719854393e-06, "loss": 1.0005, "step": 10024 }, { "epoch": 0.3633431191330506, "grad_norm": 2.3789376328782392, "learning_rate": 7.358423201873759e-06, "loss": 0.981, "step": 10025 }, { "epoch": 0.3633793628357073, "grad_norm": 2.016376628496102, "learning_rate": 7.357905651395479e-06, "loss": 0.9337, "step": 10026 }, { "epoch": 0.36341560653836397, "grad_norm": 2.403635453436403, "learning_rate": 7.357388068426684e-06, "loss": 0.9519, "step": 10027 }, { "epoch": 0.3634518502410206, "grad_norm": 2.1004543018546413, "learning_rate": 7.356870452974507e-06, "loss": 0.9411, "step": 10028 }, { "epoch": 0.36348809394367726, "grad_norm": 2.5120130031125174, "learning_rate": 7.356352805046083e-06, "loss": 1.0842, "step": 10029 }, { "epoch": 0.36352433764633396, "grad_norm": 1.9114596090463836, "learning_rate": 7.355835124648541e-06, "loss": 0.8855, "step": 10030 }, { "epoch": 0.3635605813489906, "grad_norm": 2.048802835838916, "learning_rate": 7.355317411789015e-06, "loss": 0.9467, "step": 10031 }, { "epoch": 0.36359682505164725, "grad_norm": 2.457906728829255, "learning_rate": 7.3547996664746406e-06, "loss": 0.9057, "step": 10032 }, { "epoch": 0.36363306875430396, "grad_norm": 2.253342173443859, "learning_rate": 7.35428188871255e-06, "loss": 0.9452, "step": 10033 }, { "epoch": 0.3636693124569606, "grad_norm": 2.22098334642478, "learning_rate": 7.353764078509879e-06, "loss": 1.0005, "step": 10034 }, { "epoch": 0.36370555615961725, "grad_norm": 2.393193804481356, "learning_rate": 7.353246235873765e-06, "loss": 0.8189, "step": 10035 }, { "epoch": 0.36374179986227395, "grad_norm": 2.204943655313522, "learning_rate": 7.352728360811338e-06, "loss": 0.9225, "step": 10036 }, { "epoch": 0.3637780435649306, "grad_norm": 2.4450862890365532, "learning_rate": 7.3522104533297374e-06, "loss": 0.8972, "step": 10037 }, { "epoch": 0.36381428726758724, "grad_norm": 2.272937538894512, "learning_rate": 7.3516925134361e-06, "loss": 0.869, "step": 10038 }, { "epoch": 0.36385053097024395, "grad_norm": 2.3303645750094075, "learning_rate": 7.351174541137562e-06, "loss": 0.9513, "step": 10039 }, { "epoch": 0.3638867746729006, "grad_norm": 2.511549547041268, "learning_rate": 7.350656536441261e-06, "loss": 1.0557, "step": 10040 }, { "epoch": 0.36392301837555724, "grad_norm": 2.285581849217424, "learning_rate": 7.350138499354334e-06, "loss": 1.2121, "step": 10041 }, { "epoch": 0.3639592620782139, "grad_norm": 2.1138718831857273, "learning_rate": 7.349620429883921e-06, "loss": 1.1062, "step": 10042 }, { "epoch": 0.3639955057808706, "grad_norm": 2.701153242091318, "learning_rate": 7.349102328037157e-06, "loss": 1.0894, "step": 10043 }, { "epoch": 0.36403174948352723, "grad_norm": 2.8008017598474644, "learning_rate": 7.3485841938211866e-06, "loss": 1.0372, "step": 10044 }, { "epoch": 0.3640679931861839, "grad_norm": 2.129569683916211, "learning_rate": 7.348066027243146e-06, "loss": 0.888, "step": 10045 }, { "epoch": 0.3641042368888406, "grad_norm": 2.4834320646040897, "learning_rate": 7.347547828310173e-06, "loss": 0.9447, "step": 10046 }, { "epoch": 0.36414048059149723, "grad_norm": 2.41656999234694, "learning_rate": 7.347029597029414e-06, "loss": 0.7924, "step": 10047 }, { "epoch": 0.3641767242941539, "grad_norm": 2.314774970906115, "learning_rate": 7.346511333408004e-06, "loss": 1.0394, "step": 10048 }, { "epoch": 0.3642129679968106, "grad_norm": 1.7663164010911743, "learning_rate": 7.345993037453088e-06, "loss": 0.9244, "step": 10049 }, { "epoch": 0.3642492116994672, "grad_norm": 2.264325570369531, "learning_rate": 7.345474709171806e-06, "loss": 0.8999, "step": 10050 }, { "epoch": 0.36428545540212387, "grad_norm": 2.4342797009494195, "learning_rate": 7.344956348571301e-06, "loss": 0.9489, "step": 10051 }, { "epoch": 0.36432169910478057, "grad_norm": 2.3000663940238577, "learning_rate": 7.3444379556587164e-06, "loss": 0.7912, "step": 10052 }, { "epoch": 0.3643579428074372, "grad_norm": 2.3987622777764166, "learning_rate": 7.3439195304411945e-06, "loss": 0.8698, "step": 10053 }, { "epoch": 0.36439418651009386, "grad_norm": 2.359628487267147, "learning_rate": 7.343401072925879e-06, "loss": 0.9218, "step": 10054 }, { "epoch": 0.3644304302127505, "grad_norm": 2.1955183309525674, "learning_rate": 7.342882583119916e-06, "loss": 0.9144, "step": 10055 }, { "epoch": 0.3644666739154072, "grad_norm": 2.287510310123708, "learning_rate": 7.342364061030445e-06, "loss": 0.9357, "step": 10056 }, { "epoch": 0.36450291761806386, "grad_norm": 2.4703033892435537, "learning_rate": 7.341845506664615e-06, "loss": 0.8623, "step": 10057 }, { "epoch": 0.3645391613207205, "grad_norm": 2.2751550398487015, "learning_rate": 7.34132692002957e-06, "loss": 0.9001, "step": 10058 }, { "epoch": 0.3645754050233772, "grad_norm": 2.515710192811453, "learning_rate": 7.340808301132456e-06, "loss": 1.1147, "step": 10059 }, { "epoch": 0.36461164872603385, "grad_norm": 2.8607232408399588, "learning_rate": 7.3402896499804185e-06, "loss": 0.914, "step": 10060 }, { "epoch": 0.3646478924286905, "grad_norm": 2.599593675757064, "learning_rate": 7.339770966580606e-06, "loss": 0.9844, "step": 10061 }, { "epoch": 0.3646841361313472, "grad_norm": 2.889135553719581, "learning_rate": 7.339252250940165e-06, "loss": 0.7967, "step": 10062 }, { "epoch": 0.36472037983400385, "grad_norm": 2.2068528979271234, "learning_rate": 7.338733503066242e-06, "loss": 0.8561, "step": 10063 }, { "epoch": 0.3647566235366605, "grad_norm": 2.4825877700522105, "learning_rate": 7.338214722965986e-06, "loss": 0.9622, "step": 10064 }, { "epoch": 0.36479286723931714, "grad_norm": 2.306758411446639, "learning_rate": 7.337695910646545e-06, "loss": 0.9044, "step": 10065 }, { "epoch": 0.36482911094197384, "grad_norm": 2.412190845875401, "learning_rate": 7.337177066115069e-06, "loss": 1.0246, "step": 10066 }, { "epoch": 0.3648653546446305, "grad_norm": 2.7041776883757804, "learning_rate": 7.336658189378706e-06, "loss": 0.955, "step": 10067 }, { "epoch": 0.36490159834728714, "grad_norm": 2.1902601021547596, "learning_rate": 7.336139280444605e-06, "loss": 0.9266, "step": 10068 }, { "epoch": 0.36493784204994384, "grad_norm": 2.159933347047135, "learning_rate": 7.33562033931992e-06, "loss": 1.1537, "step": 10069 }, { "epoch": 0.3649740857526005, "grad_norm": 2.245150900781664, "learning_rate": 7.335101366011796e-06, "loss": 0.8366, "step": 10070 }, { "epoch": 0.36501032945525713, "grad_norm": 2.329144051922015, "learning_rate": 7.33458236052739e-06, "loss": 0.9265, "step": 10071 }, { "epoch": 0.36504657315791383, "grad_norm": 2.3832775717173478, "learning_rate": 7.33406332287385e-06, "loss": 0.9637, "step": 10072 }, { "epoch": 0.3650828168605705, "grad_norm": 2.4252359469655578, "learning_rate": 7.33354425305833e-06, "loss": 1.0544, "step": 10073 }, { "epoch": 0.3651190605632271, "grad_norm": 2.1736500088901134, "learning_rate": 7.3330251510879815e-06, "loss": 0.9027, "step": 10074 }, { "epoch": 0.36515530426588383, "grad_norm": 2.246858377146226, "learning_rate": 7.332506016969957e-06, "loss": 0.9496, "step": 10075 }, { "epoch": 0.3651915479685405, "grad_norm": 2.2535378306620166, "learning_rate": 7.331986850711409e-06, "loss": 0.9654, "step": 10076 }, { "epoch": 0.3652277916711971, "grad_norm": 2.4229620248756993, "learning_rate": 7.331467652319494e-06, "loss": 1.2756, "step": 10077 }, { "epoch": 0.36526403537385377, "grad_norm": 2.3063311640394484, "learning_rate": 7.330948421801364e-06, "loss": 0.6969, "step": 10078 }, { "epoch": 0.36530027907651047, "grad_norm": 2.5667611387036446, "learning_rate": 7.330429159164174e-06, "loss": 1.0748, "step": 10079 }, { "epoch": 0.3653365227791671, "grad_norm": 2.055317311003014, "learning_rate": 7.32990986441508e-06, "loss": 0.7931, "step": 10080 }, { "epoch": 0.36537276648182376, "grad_norm": 2.2000499868736907, "learning_rate": 7.3293905375612385e-06, "loss": 0.8979, "step": 10081 }, { "epoch": 0.36540901018448046, "grad_norm": 2.3800554202194175, "learning_rate": 7.328871178609802e-06, "loss": 0.968, "step": 10082 }, { "epoch": 0.3654452538871371, "grad_norm": 2.3122893720816458, "learning_rate": 7.328351787567931e-06, "loss": 0.8729, "step": 10083 }, { "epoch": 0.36548149758979376, "grad_norm": 2.290199466870987, "learning_rate": 7.327832364442779e-06, "loss": 0.8292, "step": 10084 }, { "epoch": 0.36551774129245046, "grad_norm": 2.404372670949319, "learning_rate": 7.327312909241507e-06, "loss": 0.9089, "step": 10085 }, { "epoch": 0.3655539849951071, "grad_norm": 2.512208450993582, "learning_rate": 7.326793421971269e-06, "loss": 0.8706, "step": 10086 }, { "epoch": 0.36559022869776375, "grad_norm": 2.292190805900452, "learning_rate": 7.3262739026392245e-06, "loss": 0.8194, "step": 10087 }, { "epoch": 0.3656264724004204, "grad_norm": 2.2309274673865875, "learning_rate": 7.325754351252533e-06, "loss": 0.8371, "step": 10088 }, { "epoch": 0.3656627161030771, "grad_norm": 2.358335959645732, "learning_rate": 7.325234767818354e-06, "loss": 1.041, "step": 10089 }, { "epoch": 0.36569895980573375, "grad_norm": 2.619705098034363, "learning_rate": 7.324715152343844e-06, "loss": 0.973, "step": 10090 }, { "epoch": 0.3657352035083904, "grad_norm": 2.476154284106603, "learning_rate": 7.324195504836167e-06, "loss": 0.8868, "step": 10091 }, { "epoch": 0.3657714472110471, "grad_norm": 2.235918073022539, "learning_rate": 7.323675825302482e-06, "loss": 1.0362, "step": 10092 }, { "epoch": 0.36580769091370374, "grad_norm": 2.383047186019977, "learning_rate": 7.323156113749948e-06, "loss": 1.0864, "step": 10093 }, { "epoch": 0.3658439346163604, "grad_norm": 2.216640502515113, "learning_rate": 7.3226363701857285e-06, "loss": 1.047, "step": 10094 }, { "epoch": 0.3658801783190171, "grad_norm": 2.320556749362241, "learning_rate": 7.322116594616985e-06, "loss": 0.972, "step": 10095 }, { "epoch": 0.36591642202167374, "grad_norm": 2.1279341873559985, "learning_rate": 7.321596787050878e-06, "loss": 0.9649, "step": 10096 }, { "epoch": 0.3659526657243304, "grad_norm": 2.041600729358016, "learning_rate": 7.321076947494572e-06, "loss": 0.9018, "step": 10097 }, { "epoch": 0.3659889094269871, "grad_norm": 2.0908333020010823, "learning_rate": 7.320557075955228e-06, "loss": 0.9778, "step": 10098 }, { "epoch": 0.36602515312964373, "grad_norm": 2.014674696886334, "learning_rate": 7.320037172440013e-06, "loss": 0.8986, "step": 10099 }, { "epoch": 0.3660613968323004, "grad_norm": 2.190764848848097, "learning_rate": 7.319517236956087e-06, "loss": 0.8861, "step": 10100 }, { "epoch": 0.366097640534957, "grad_norm": 2.258417274422268, "learning_rate": 7.3189972695106184e-06, "loss": 1.0139, "step": 10101 }, { "epoch": 0.3661338842376137, "grad_norm": 2.2343604327090625, "learning_rate": 7.3184772701107684e-06, "loss": 0.9158, "step": 10102 }, { "epoch": 0.3661701279402704, "grad_norm": 2.2381381630647126, "learning_rate": 7.3179572387637046e-06, "loss": 0.9088, "step": 10103 }, { "epoch": 0.366206371642927, "grad_norm": 2.1425004533938155, "learning_rate": 7.317437175476592e-06, "loss": 0.9985, "step": 10104 }, { "epoch": 0.3662426153455837, "grad_norm": 2.28464038985509, "learning_rate": 7.3169170802565946e-06, "loss": 0.7991, "step": 10105 }, { "epoch": 0.36627885904824037, "grad_norm": 2.4375681119021464, "learning_rate": 7.316396953110883e-06, "loss": 0.7954, "step": 10106 }, { "epoch": 0.366315102750897, "grad_norm": 2.539082406552156, "learning_rate": 7.3158767940466216e-06, "loss": 0.8999, "step": 10107 }, { "epoch": 0.3663513464535537, "grad_norm": 2.1007987282088565, "learning_rate": 7.3153566030709785e-06, "loss": 0.8347, "step": 10108 }, { "epoch": 0.36638759015621036, "grad_norm": 2.284758015204618, "learning_rate": 7.314836380191121e-06, "loss": 1.1244, "step": 10109 }, { "epoch": 0.366423833858867, "grad_norm": 2.4941762270074403, "learning_rate": 7.314316125414221e-06, "loss": 0.9723, "step": 10110 }, { "epoch": 0.3664600775615237, "grad_norm": 2.001204154386941, "learning_rate": 7.313795838747443e-06, "loss": 0.7617, "step": 10111 }, { "epoch": 0.36649632126418036, "grad_norm": 2.395444151594282, "learning_rate": 7.313275520197958e-06, "loss": 0.9712, "step": 10112 }, { "epoch": 0.366532564966837, "grad_norm": 2.1972149562058765, "learning_rate": 7.312755169772934e-06, "loss": 0.9457, "step": 10113 }, { "epoch": 0.36656880866949365, "grad_norm": 2.344748196051962, "learning_rate": 7.312234787479543e-06, "loss": 0.9501, "step": 10114 }, { "epoch": 0.36660505237215035, "grad_norm": 2.6388489484606317, "learning_rate": 7.311714373324954e-06, "loss": 0.9587, "step": 10115 }, { "epoch": 0.366641296074807, "grad_norm": 2.0372494523893443, "learning_rate": 7.3111939273163415e-06, "loss": 0.8097, "step": 10116 }, { "epoch": 0.36667753977746365, "grad_norm": 2.6083846302700646, "learning_rate": 7.310673449460872e-06, "loss": 0.9815, "step": 10117 }, { "epoch": 0.36671378348012035, "grad_norm": 2.4534509499974995, "learning_rate": 7.3101529397657216e-06, "loss": 0.9546, "step": 10118 }, { "epoch": 0.366750027182777, "grad_norm": 2.709412652463585, "learning_rate": 7.309632398238059e-06, "loss": 1.0501, "step": 10119 }, { "epoch": 0.36678627088543364, "grad_norm": 2.3653968734610413, "learning_rate": 7.30911182488506e-06, "loss": 0.8667, "step": 10120 }, { "epoch": 0.36682251458809034, "grad_norm": 2.3475214553644315, "learning_rate": 7.308591219713897e-06, "loss": 0.8919, "step": 10121 }, { "epoch": 0.366858758290747, "grad_norm": 2.2498231043097756, "learning_rate": 7.308070582731743e-06, "loss": 0.9369, "step": 10122 }, { "epoch": 0.36689500199340364, "grad_norm": 2.520123518586603, "learning_rate": 7.307549913945772e-06, "loss": 0.9406, "step": 10123 }, { "epoch": 0.3669312456960603, "grad_norm": 2.568406092436258, "learning_rate": 7.307029213363159e-06, "loss": 0.969, "step": 10124 }, { "epoch": 0.366967489398717, "grad_norm": 2.529762316797473, "learning_rate": 7.306508480991078e-06, "loss": 0.9062, "step": 10125 }, { "epoch": 0.36700373310137363, "grad_norm": 2.4802371527846905, "learning_rate": 7.305987716836706e-06, "loss": 1.0531, "step": 10126 }, { "epoch": 0.3670399768040303, "grad_norm": 2.5016613111536783, "learning_rate": 7.305466920907216e-06, "loss": 1.1188, "step": 10127 }, { "epoch": 0.367076220506687, "grad_norm": 2.5842294883352848, "learning_rate": 7.304946093209788e-06, "loss": 0.9016, "step": 10128 }, { "epoch": 0.3671124642093436, "grad_norm": 2.36852645104909, "learning_rate": 7.304425233751596e-06, "loss": 1.2268, "step": 10129 }, { "epoch": 0.36714870791200027, "grad_norm": 2.2710819844778123, "learning_rate": 7.303904342539818e-06, "loss": 1.0675, "step": 10130 }, { "epoch": 0.367184951614657, "grad_norm": 2.2835189438310537, "learning_rate": 7.303383419581633e-06, "loss": 1.0213, "step": 10131 }, { "epoch": 0.3672211953173136, "grad_norm": 2.4693854582315047, "learning_rate": 7.302862464884216e-06, "loss": 1.0128, "step": 10132 }, { "epoch": 0.36725743901997027, "grad_norm": 2.2036603625116813, "learning_rate": 7.302341478454748e-06, "loss": 1.0297, "step": 10133 }, { "epoch": 0.36729368272262697, "grad_norm": 2.515580781929027, "learning_rate": 7.301820460300406e-06, "loss": 0.9554, "step": 10134 }, { "epoch": 0.3673299264252836, "grad_norm": 2.7193534469064593, "learning_rate": 7.301299410428368e-06, "loss": 0.97, "step": 10135 }, { "epoch": 0.36736617012794026, "grad_norm": 2.1213139477122085, "learning_rate": 7.30077832884582e-06, "loss": 1.011, "step": 10136 }, { "epoch": 0.3674024138305969, "grad_norm": 2.340451171301593, "learning_rate": 7.300257215559936e-06, "loss": 0.9518, "step": 10137 }, { "epoch": 0.3674386575332536, "grad_norm": 2.214516090117081, "learning_rate": 7.299736070577899e-06, "loss": 1.0433, "step": 10138 }, { "epoch": 0.36747490123591026, "grad_norm": 2.205396840417373, "learning_rate": 7.299214893906889e-06, "loss": 0.9959, "step": 10139 }, { "epoch": 0.3675111449385669, "grad_norm": 2.5575182216671655, "learning_rate": 7.298693685554089e-06, "loss": 0.9824, "step": 10140 }, { "epoch": 0.3675473886412236, "grad_norm": 2.3953746457206635, "learning_rate": 7.2981724455266814e-06, "loss": 0.9204, "step": 10141 }, { "epoch": 0.36758363234388025, "grad_norm": 2.2410707614948184, "learning_rate": 7.297651173831846e-06, "loss": 0.9039, "step": 10142 }, { "epoch": 0.3676198760465369, "grad_norm": 2.4463696300085833, "learning_rate": 7.297129870476767e-06, "loss": 0.9208, "step": 10143 }, { "epoch": 0.3676561197491936, "grad_norm": 2.3569087301902325, "learning_rate": 7.296608535468628e-06, "loss": 0.9052, "step": 10144 }, { "epoch": 0.36769236345185025, "grad_norm": 2.4631343019772314, "learning_rate": 7.2960871688146114e-06, "loss": 0.9732, "step": 10145 }, { "epoch": 0.3677286071545069, "grad_norm": 2.4182580031869687, "learning_rate": 7.295565770521904e-06, "loss": 1.1513, "step": 10146 }, { "epoch": 0.3677648508571636, "grad_norm": 2.6440070774435327, "learning_rate": 7.295044340597687e-06, "loss": 0.9551, "step": 10147 }, { "epoch": 0.36780109455982024, "grad_norm": 2.304708136422348, "learning_rate": 7.294522879049147e-06, "loss": 0.9498, "step": 10148 }, { "epoch": 0.3678373382624769, "grad_norm": 2.1370765001324674, "learning_rate": 7.29400138588347e-06, "loss": 0.7859, "step": 10149 }, { "epoch": 0.36787358196513353, "grad_norm": 2.3550431407397303, "learning_rate": 7.293479861107842e-06, "loss": 1.0005, "step": 10150 }, { "epoch": 0.36790982566779024, "grad_norm": 2.055722977795753, "learning_rate": 7.2929583047294485e-06, "loss": 0.8776, "step": 10151 }, { "epoch": 0.3679460693704469, "grad_norm": 2.339175301764662, "learning_rate": 7.292436716755475e-06, "loss": 0.7961, "step": 10152 }, { "epoch": 0.36798231307310353, "grad_norm": 2.1905631286325247, "learning_rate": 7.29191509719311e-06, "loss": 0.8631, "step": 10153 }, { "epoch": 0.36801855677576023, "grad_norm": 2.2341042473593773, "learning_rate": 7.2913934460495425e-06, "loss": 0.9617, "step": 10154 }, { "epoch": 0.3680548004784169, "grad_norm": 2.465270211776477, "learning_rate": 7.290871763331958e-06, "loss": 0.7863, "step": 10155 }, { "epoch": 0.3680910441810735, "grad_norm": 2.3244022298885882, "learning_rate": 7.2903500490475466e-06, "loss": 0.9757, "step": 10156 }, { "epoch": 0.3681272878837302, "grad_norm": 2.6154020599611902, "learning_rate": 7.289828303203495e-06, "loss": 0.9554, "step": 10157 }, { "epoch": 0.36816353158638687, "grad_norm": 2.358922235838639, "learning_rate": 7.289306525806996e-06, "loss": 0.9466, "step": 10158 }, { "epoch": 0.3681997752890435, "grad_norm": 2.3047810723090993, "learning_rate": 7.288784716865238e-06, "loss": 0.6955, "step": 10159 }, { "epoch": 0.36823601899170016, "grad_norm": 2.139605465415746, "learning_rate": 7.28826287638541e-06, "loss": 0.9479, "step": 10160 }, { "epoch": 0.36827226269435687, "grad_norm": 2.2903208089925093, "learning_rate": 7.287741004374704e-06, "loss": 0.9035, "step": 10161 }, { "epoch": 0.3683085063970135, "grad_norm": 2.3910270751140232, "learning_rate": 7.287219100840311e-06, "loss": 0.8978, "step": 10162 }, { "epoch": 0.36834475009967016, "grad_norm": 2.3106032724347534, "learning_rate": 7.28669716578942e-06, "loss": 1.0013, "step": 10163 }, { "epoch": 0.36838099380232686, "grad_norm": 2.01829228205963, "learning_rate": 7.286175199229226e-06, "loss": 0.9178, "step": 10164 }, { "epoch": 0.3684172375049835, "grad_norm": 2.0337344343250376, "learning_rate": 7.28565320116692e-06, "loss": 0.8952, "step": 10165 }, { "epoch": 0.36845348120764015, "grad_norm": 2.2324389819188553, "learning_rate": 7.2851311716096965e-06, "loss": 1.0858, "step": 10166 }, { "epoch": 0.36848972491029686, "grad_norm": 2.3451087681969054, "learning_rate": 7.284609110564746e-06, "loss": 0.7233, "step": 10167 }, { "epoch": 0.3685259686129535, "grad_norm": 2.176173562997846, "learning_rate": 7.2840870180392655e-06, "loss": 0.8992, "step": 10168 }, { "epoch": 0.36856221231561015, "grad_norm": 2.27285034967488, "learning_rate": 7.283564894040445e-06, "loss": 0.8729, "step": 10169 }, { "epoch": 0.36859845601826685, "grad_norm": 2.403469994259268, "learning_rate": 7.283042738575483e-06, "loss": 0.9843, "step": 10170 }, { "epoch": 0.3686346997209235, "grad_norm": 2.525719933629429, "learning_rate": 7.282520551651573e-06, "loss": 1.0603, "step": 10171 }, { "epoch": 0.36867094342358014, "grad_norm": 2.3010661090829414, "learning_rate": 7.281998333275909e-06, "loss": 0.8354, "step": 10172 }, { "epoch": 0.3687071871262368, "grad_norm": 2.4427303594664447, "learning_rate": 7.281476083455689e-06, "loss": 0.9607, "step": 10173 }, { "epoch": 0.3687434308288935, "grad_norm": 2.2013664118346727, "learning_rate": 7.280953802198108e-06, "loss": 0.9045, "step": 10174 }, { "epoch": 0.36877967453155014, "grad_norm": 2.3535735312060515, "learning_rate": 7.280431489510362e-06, "loss": 0.9169, "step": 10175 }, { "epoch": 0.3688159182342068, "grad_norm": 2.2759609002626466, "learning_rate": 7.2799091453996505e-06, "loss": 1.0389, "step": 10176 }, { "epoch": 0.3688521619368635, "grad_norm": 2.3389407002053275, "learning_rate": 7.279386769873169e-06, "loss": 1.0083, "step": 10177 }, { "epoch": 0.36888840563952013, "grad_norm": 2.0782316246806913, "learning_rate": 7.278864362938117e-06, "loss": 1.0587, "step": 10178 }, { "epoch": 0.3689246493421768, "grad_norm": 2.256560457130903, "learning_rate": 7.278341924601692e-06, "loss": 0.9389, "step": 10179 }, { "epoch": 0.3689608930448335, "grad_norm": 2.3623609176398626, "learning_rate": 7.277819454871093e-06, "loss": 0.9873, "step": 10180 }, { "epoch": 0.36899713674749013, "grad_norm": 2.329646415549368, "learning_rate": 7.2772969537535206e-06, "loss": 0.8942, "step": 10181 }, { "epoch": 0.3690333804501468, "grad_norm": 2.228460663462098, "learning_rate": 7.276774421256173e-06, "loss": 1.0622, "step": 10182 }, { "epoch": 0.3690696241528035, "grad_norm": 1.9777722557892414, "learning_rate": 7.27625185738625e-06, "loss": 0.8874, "step": 10183 }, { "epoch": 0.3691058678554601, "grad_norm": 2.343346852380792, "learning_rate": 7.275729262150952e-06, "loss": 0.8362, "step": 10184 }, { "epoch": 0.36914211155811677, "grad_norm": 2.404320904446022, "learning_rate": 7.275206635557484e-06, "loss": 1.0329, "step": 10185 }, { "epoch": 0.3691783552607734, "grad_norm": 2.5446676115277658, "learning_rate": 7.274683977613043e-06, "loss": 1.0199, "step": 10186 }, { "epoch": 0.3692145989634301, "grad_norm": 2.1208763312514387, "learning_rate": 7.274161288324833e-06, "loss": 0.8106, "step": 10187 }, { "epoch": 0.36925084266608676, "grad_norm": 2.44307507395136, "learning_rate": 7.273638567700056e-06, "loss": 1.0887, "step": 10188 }, { "epoch": 0.3692870863687434, "grad_norm": 2.3174305483484186, "learning_rate": 7.2731158157459135e-06, "loss": 0.8958, "step": 10189 }, { "epoch": 0.3693233300714001, "grad_norm": 2.306683541771656, "learning_rate": 7.27259303246961e-06, "loss": 1.0029, "step": 10190 }, { "epoch": 0.36935957377405676, "grad_norm": 2.236372483764743, "learning_rate": 7.2720702178783495e-06, "loss": 0.9754, "step": 10191 }, { "epoch": 0.3693958174767134, "grad_norm": 2.4509138888911166, "learning_rate": 7.271547371979335e-06, "loss": 1.0107, "step": 10192 }, { "epoch": 0.3694320611793701, "grad_norm": 2.2022073688636725, "learning_rate": 7.271024494779772e-06, "loss": 0.6015, "step": 10193 }, { "epoch": 0.36946830488202675, "grad_norm": 2.464238462595419, "learning_rate": 7.270501586286864e-06, "loss": 1.0779, "step": 10194 }, { "epoch": 0.3695045485846834, "grad_norm": 2.391474931915884, "learning_rate": 7.26997864650782e-06, "loss": 0.9732, "step": 10195 }, { "epoch": 0.36954079228734005, "grad_norm": 2.243260363004123, "learning_rate": 7.269455675449839e-06, "loss": 0.9393, "step": 10196 }, { "epoch": 0.36957703598999675, "grad_norm": 2.1316058537764353, "learning_rate": 7.268932673120135e-06, "loss": 0.8737, "step": 10197 }, { "epoch": 0.3696132796926534, "grad_norm": 2.596044015371826, "learning_rate": 7.268409639525909e-06, "loss": 0.86, "step": 10198 }, { "epoch": 0.36964952339531004, "grad_norm": 2.3935927171141653, "learning_rate": 7.267886574674371e-06, "loss": 0.9549, "step": 10199 }, { "epoch": 0.36968576709796674, "grad_norm": 2.2258387257295125, "learning_rate": 7.267363478572726e-06, "loss": 0.9842, "step": 10200 }, { "epoch": 0.3697220108006234, "grad_norm": 2.2482705111060897, "learning_rate": 7.266840351228185e-06, "loss": 0.8097, "step": 10201 }, { "epoch": 0.36975825450328004, "grad_norm": 2.4105215821366177, "learning_rate": 7.266317192647954e-06, "loss": 1.0549, "step": 10202 }, { "epoch": 0.36979449820593674, "grad_norm": 2.2588248799394894, "learning_rate": 7.265794002839244e-06, "loss": 0.9744, "step": 10203 }, { "epoch": 0.3698307419085934, "grad_norm": 2.309019231568695, "learning_rate": 7.2652707818092615e-06, "loss": 1.0411, "step": 10204 }, { "epoch": 0.36986698561125003, "grad_norm": 2.529857529298048, "learning_rate": 7.264747529565218e-06, "loss": 1.0256, "step": 10205 }, { "epoch": 0.36990322931390673, "grad_norm": 2.3145574966814846, "learning_rate": 7.264224246114323e-06, "loss": 1.0339, "step": 10206 }, { "epoch": 0.3699394730165634, "grad_norm": 2.1615559963362445, "learning_rate": 7.263700931463788e-06, "loss": 0.9185, "step": 10207 }, { "epoch": 0.36997571671922, "grad_norm": 2.151395144862559, "learning_rate": 7.263177585620824e-06, "loss": 0.8414, "step": 10208 }, { "epoch": 0.3700119604218767, "grad_norm": 2.1685606942326796, "learning_rate": 7.262654208592641e-06, "loss": 0.6891, "step": 10209 }, { "epoch": 0.3700482041245334, "grad_norm": 2.257685748907346, "learning_rate": 7.26213080038645e-06, "loss": 0.8616, "step": 10210 }, { "epoch": 0.37008444782719, "grad_norm": 2.439935935746243, "learning_rate": 7.261607361009466e-06, "loss": 1.0433, "step": 10211 }, { "epoch": 0.37012069152984667, "grad_norm": 2.6653054813908117, "learning_rate": 7.261083890468898e-06, "loss": 0.8773, "step": 10212 }, { "epoch": 0.37015693523250337, "grad_norm": 2.3082805124533263, "learning_rate": 7.260560388771964e-06, "loss": 0.8436, "step": 10213 }, { "epoch": 0.37019317893516, "grad_norm": 2.149820777660527, "learning_rate": 7.260036855925875e-06, "loss": 0.9168, "step": 10214 }, { "epoch": 0.37022942263781666, "grad_norm": 2.100458266489914, "learning_rate": 7.259513291937844e-06, "loss": 0.9396, "step": 10215 }, { "epoch": 0.37026566634047337, "grad_norm": 2.3169838601815416, "learning_rate": 7.2589896968150864e-06, "loss": 0.9052, "step": 10216 }, { "epoch": 0.37030191004313, "grad_norm": 2.1308862368557193, "learning_rate": 7.258466070564818e-06, "loss": 0.8071, "step": 10217 }, { "epoch": 0.37033815374578666, "grad_norm": 2.3133903346076217, "learning_rate": 7.257942413194252e-06, "loss": 1.1552, "step": 10218 }, { "epoch": 0.37037439744844336, "grad_norm": 2.3185004261808793, "learning_rate": 7.2574187247106055e-06, "loss": 1.0093, "step": 10219 }, { "epoch": 0.3704106411511, "grad_norm": 2.2411997765180747, "learning_rate": 7.256895005121094e-06, "loss": 1.0475, "step": 10220 }, { "epoch": 0.37044688485375665, "grad_norm": 2.4122138602309713, "learning_rate": 7.2563712544329344e-06, "loss": 1.0976, "step": 10221 }, { "epoch": 0.3704831285564133, "grad_norm": 2.2913138763960097, "learning_rate": 7.255847472653343e-06, "loss": 0.8385, "step": 10222 }, { "epoch": 0.37051937225907, "grad_norm": 2.496409032238102, "learning_rate": 7.255323659789539e-06, "loss": 0.8998, "step": 10223 }, { "epoch": 0.37055561596172665, "grad_norm": 2.167969814864334, "learning_rate": 7.2547998158487374e-06, "loss": 0.7945, "step": 10224 }, { "epoch": 0.3705918596643833, "grad_norm": 2.597282726644515, "learning_rate": 7.254275940838159e-06, "loss": 0.9256, "step": 10225 }, { "epoch": 0.37062810336704, "grad_norm": 2.5016295607447927, "learning_rate": 7.25375203476502e-06, "loss": 1.041, "step": 10226 }, { "epoch": 0.37066434706969664, "grad_norm": 2.4160121254554374, "learning_rate": 7.253228097636544e-06, "loss": 0.9647, "step": 10227 }, { "epoch": 0.3707005907723533, "grad_norm": 2.353321318517285, "learning_rate": 7.252704129459946e-06, "loss": 0.955, "step": 10228 }, { "epoch": 0.37073683447501, "grad_norm": 2.154385678037429, "learning_rate": 7.252180130242447e-06, "loss": 0.7794, "step": 10229 }, { "epoch": 0.37077307817766664, "grad_norm": 2.4877603512017017, "learning_rate": 7.251656099991267e-06, "loss": 0.9194, "step": 10230 }, { "epoch": 0.3708093218803233, "grad_norm": 2.1130519561517187, "learning_rate": 7.25113203871363e-06, "loss": 0.9179, "step": 10231 }, { "epoch": 0.37084556558297993, "grad_norm": 2.1879368365907643, "learning_rate": 7.250607946416752e-06, "loss": 0.9415, "step": 10232 }, { "epoch": 0.37088180928563663, "grad_norm": 2.5514895916516536, "learning_rate": 7.250083823107861e-06, "loss": 1.0424, "step": 10233 }, { "epoch": 0.3709180529882933, "grad_norm": 2.0535058684116305, "learning_rate": 7.249559668794173e-06, "loss": 0.9117, "step": 10234 }, { "epoch": 0.3709542966909499, "grad_norm": 2.1967523676515444, "learning_rate": 7.249035483482914e-06, "loss": 0.9357, "step": 10235 }, { "epoch": 0.3709905403936066, "grad_norm": 2.3238156136154964, "learning_rate": 7.248511267181306e-06, "loss": 0.8298, "step": 10236 }, { "epoch": 0.3710267840962633, "grad_norm": 2.6980849828176043, "learning_rate": 7.247987019896574e-06, "loss": 0.9072, "step": 10237 }, { "epoch": 0.3710630277989199, "grad_norm": 2.341186028951274, "learning_rate": 7.24746274163594e-06, "loss": 0.9274, "step": 10238 }, { "epoch": 0.3710992715015766, "grad_norm": 2.491100476811309, "learning_rate": 7.246938432406628e-06, "loss": 1.0108, "step": 10239 }, { "epoch": 0.37113551520423327, "grad_norm": 2.426066488944647, "learning_rate": 7.2464140922158635e-06, "loss": 0.8609, "step": 10240 }, { "epoch": 0.3711717589068899, "grad_norm": 2.083181345837336, "learning_rate": 7.245889721070871e-06, "loss": 0.8472, "step": 10241 }, { "epoch": 0.3712080026095466, "grad_norm": 2.4623710438710127, "learning_rate": 7.245365318978876e-06, "loss": 1.0953, "step": 10242 }, { "epoch": 0.37124424631220326, "grad_norm": 2.3506695503200077, "learning_rate": 7.244840885947106e-06, "loss": 0.9389, "step": 10243 }, { "epoch": 0.3712804900148599, "grad_norm": 2.2404808265836955, "learning_rate": 7.2443164219827865e-06, "loss": 0.962, "step": 10244 }, { "epoch": 0.37131673371751656, "grad_norm": 2.4008214133318564, "learning_rate": 7.243791927093143e-06, "loss": 0.8505, "step": 10245 }, { "epoch": 0.37135297742017326, "grad_norm": 2.1874262784576706, "learning_rate": 7.243267401285404e-06, "loss": 0.9262, "step": 10246 }, { "epoch": 0.3713892211228299, "grad_norm": 2.435944752449696, "learning_rate": 7.242742844566797e-06, "loss": 1.0808, "step": 10247 }, { "epoch": 0.37142546482548655, "grad_norm": 2.2635603539236078, "learning_rate": 7.242218256944551e-06, "loss": 0.8187, "step": 10248 }, { "epoch": 0.37146170852814325, "grad_norm": 2.2941733927605465, "learning_rate": 7.241693638425893e-06, "loss": 0.8493, "step": 10249 }, { "epoch": 0.3714979522307999, "grad_norm": 2.3591987011487303, "learning_rate": 7.2411689890180506e-06, "loss": 0.8667, "step": 10250 }, { "epoch": 0.37153419593345655, "grad_norm": 2.132758961407452, "learning_rate": 7.240644308728258e-06, "loss": 0.7991, "step": 10251 }, { "epoch": 0.37157043963611325, "grad_norm": 2.306943306606814, "learning_rate": 7.240119597563739e-06, "loss": 1.0393, "step": 10252 }, { "epoch": 0.3716066833387699, "grad_norm": 2.309759030728645, "learning_rate": 7.239594855531727e-06, "loss": 0.7884, "step": 10253 }, { "epoch": 0.37164292704142654, "grad_norm": 2.490043808903079, "learning_rate": 7.2390700826394535e-06, "loss": 1.0551, "step": 10254 }, { "epoch": 0.37167917074408324, "grad_norm": 2.2196092429493803, "learning_rate": 7.238545278894149e-06, "loss": 0.9471, "step": 10255 }, { "epoch": 0.3717154144467399, "grad_norm": 2.1499446224608163, "learning_rate": 7.238020444303042e-06, "loss": 0.7487, "step": 10256 }, { "epoch": 0.37175165814939654, "grad_norm": 2.1883944275924634, "learning_rate": 7.237495578873369e-06, "loss": 0.958, "step": 10257 }, { "epoch": 0.3717879018520532, "grad_norm": 2.187636513703982, "learning_rate": 7.23697068261236e-06, "loss": 1.0018, "step": 10258 }, { "epoch": 0.3718241455547099, "grad_norm": 2.151591986772544, "learning_rate": 7.2364457555272474e-06, "loss": 0.8614, "step": 10259 }, { "epoch": 0.37186038925736653, "grad_norm": 2.504324866597716, "learning_rate": 7.235920797625264e-06, "loss": 0.8615, "step": 10260 }, { "epoch": 0.3718966329600232, "grad_norm": 2.077900065160539, "learning_rate": 7.2353958089136456e-06, "loss": 0.8057, "step": 10261 }, { "epoch": 0.3719328766626799, "grad_norm": 2.1065383323103726, "learning_rate": 7.234870789399624e-06, "loss": 1.0042, "step": 10262 }, { "epoch": 0.3719691203653365, "grad_norm": 2.116292988869697, "learning_rate": 7.234345739090435e-06, "loss": 0.891, "step": 10263 }, { "epoch": 0.37200536406799317, "grad_norm": 2.6474169503293377, "learning_rate": 7.233820657993312e-06, "loss": 0.9435, "step": 10264 }, { "epoch": 0.3720416077706499, "grad_norm": 2.3337441704930297, "learning_rate": 7.233295546115492e-06, "loss": 1.0288, "step": 10265 }, { "epoch": 0.3720778514733065, "grad_norm": 2.426308874749282, "learning_rate": 7.2327704034642095e-06, "loss": 0.9674, "step": 10266 }, { "epoch": 0.37211409517596317, "grad_norm": 2.264412875141565, "learning_rate": 7.232245230046703e-06, "loss": 1.0167, "step": 10267 }, { "epoch": 0.3721503388786198, "grad_norm": 2.4950057684382116, "learning_rate": 7.231720025870206e-06, "loss": 0.9844, "step": 10268 }, { "epoch": 0.3721865825812765, "grad_norm": 2.3183084479814986, "learning_rate": 7.231194790941957e-06, "loss": 0.943, "step": 10269 }, { "epoch": 0.37222282628393316, "grad_norm": 2.2528450894926815, "learning_rate": 7.2306695252691925e-06, "loss": 0.9519, "step": 10270 }, { "epoch": 0.3722590699865898, "grad_norm": 2.475369383083251, "learning_rate": 7.230144228859152e-06, "loss": 0.8647, "step": 10271 }, { "epoch": 0.3722953136892465, "grad_norm": 2.412545065585475, "learning_rate": 7.229618901719072e-06, "loss": 0.6756, "step": 10272 }, { "epoch": 0.37233155739190316, "grad_norm": 2.387177866546432, "learning_rate": 7.229093543856192e-06, "loss": 0.9471, "step": 10273 }, { "epoch": 0.3723678010945598, "grad_norm": 1.968069987665392, "learning_rate": 7.228568155277751e-06, "loss": 0.9008, "step": 10274 }, { "epoch": 0.3724040447972165, "grad_norm": 2.301526597155225, "learning_rate": 7.22804273599099e-06, "loss": 0.8648, "step": 10275 }, { "epoch": 0.37244028849987315, "grad_norm": 2.475972852759495, "learning_rate": 7.227517286003146e-06, "loss": 0.8764, "step": 10276 }, { "epoch": 0.3724765322025298, "grad_norm": 2.3587438655583357, "learning_rate": 7.226991805321462e-06, "loss": 1.0784, "step": 10277 }, { "epoch": 0.3725127759051865, "grad_norm": 2.3626996561292826, "learning_rate": 7.226466293953177e-06, "loss": 0.8852, "step": 10278 }, { "epoch": 0.37254901960784315, "grad_norm": 2.423734042217636, "learning_rate": 7.225940751905532e-06, "loss": 1.0304, "step": 10279 }, { "epoch": 0.3725852633104998, "grad_norm": 2.242236590654317, "learning_rate": 7.225415179185772e-06, "loss": 0.9809, "step": 10280 }, { "epoch": 0.37262150701315644, "grad_norm": 2.273131325173423, "learning_rate": 7.224889575801135e-06, "loss": 0.8339, "step": 10281 }, { "epoch": 0.37265775071581314, "grad_norm": 2.5266849670893983, "learning_rate": 7.224363941758866e-06, "loss": 0.9751, "step": 10282 }, { "epoch": 0.3726939944184698, "grad_norm": 2.4169010936532547, "learning_rate": 7.223838277066207e-06, "loss": 0.9255, "step": 10283 }, { "epoch": 0.37273023812112643, "grad_norm": 2.1754418852334543, "learning_rate": 7.2233125817304005e-06, "loss": 0.9066, "step": 10284 }, { "epoch": 0.37276648182378314, "grad_norm": 2.2767195909169424, "learning_rate": 7.222786855758693e-06, "loss": 0.7576, "step": 10285 }, { "epoch": 0.3728027255264398, "grad_norm": 2.3878465217477296, "learning_rate": 7.2222610991583264e-06, "loss": 0.981, "step": 10286 }, { "epoch": 0.37283896922909643, "grad_norm": 2.1399287724161935, "learning_rate": 7.221735311936545e-06, "loss": 0.891, "step": 10287 }, { "epoch": 0.37287521293175313, "grad_norm": 2.3966465800399908, "learning_rate": 7.2212094941005965e-06, "loss": 1.2044, "step": 10288 }, { "epoch": 0.3729114566344098, "grad_norm": 2.458627836258416, "learning_rate": 7.220683645657721e-06, "loss": 0.94, "step": 10289 }, { "epoch": 0.3729477003370664, "grad_norm": 2.1053237450035978, "learning_rate": 7.2201577666151715e-06, "loss": 0.8484, "step": 10290 }, { "epoch": 0.3729839440397231, "grad_norm": 2.3118971237606427, "learning_rate": 7.219631856980188e-06, "loss": 0.846, "step": 10291 }, { "epoch": 0.37302018774237977, "grad_norm": 2.5231694514122225, "learning_rate": 7.219105916760022e-06, "loss": 1.0001, "step": 10292 }, { "epoch": 0.3730564314450364, "grad_norm": 2.2248335571003928, "learning_rate": 7.218579945961916e-06, "loss": 0.9567, "step": 10293 }, { "epoch": 0.37309267514769306, "grad_norm": 2.252115699206352, "learning_rate": 7.218053944593123e-06, "loss": 0.9074, "step": 10294 }, { "epoch": 0.37312891885034977, "grad_norm": 2.4358030898000824, "learning_rate": 7.217527912660887e-06, "loss": 0.9457, "step": 10295 }, { "epoch": 0.3731651625530064, "grad_norm": 2.343199751873504, "learning_rate": 7.217001850172458e-06, "loss": 0.8753, "step": 10296 }, { "epoch": 0.37320140625566306, "grad_norm": 2.179061504683239, "learning_rate": 7.216475757135083e-06, "loss": 0.9245, "step": 10297 }, { "epoch": 0.37323764995831976, "grad_norm": 2.7802023504443443, "learning_rate": 7.215949633556014e-06, "loss": 0.8603, "step": 10298 }, { "epoch": 0.3732738936609764, "grad_norm": 1.912877851601762, "learning_rate": 7.215423479442497e-06, "loss": 0.967, "step": 10299 }, { "epoch": 0.37331013736363305, "grad_norm": 2.576906607852083, "learning_rate": 7.214897294801786e-06, "loss": 0.8121, "step": 10300 }, { "epoch": 0.37334638106628976, "grad_norm": 2.4996048333427954, "learning_rate": 7.214371079641129e-06, "loss": 0.8423, "step": 10301 }, { "epoch": 0.3733826247689464, "grad_norm": 2.5571949143767183, "learning_rate": 7.213844833967779e-06, "loss": 1.0302, "step": 10302 }, { "epoch": 0.37341886847160305, "grad_norm": 2.2839861515309328, "learning_rate": 7.213318557788984e-06, "loss": 1.0054, "step": 10303 }, { "epoch": 0.3734551121742597, "grad_norm": 2.139629771726922, "learning_rate": 7.212792251112e-06, "loss": 1.0235, "step": 10304 }, { "epoch": 0.3734913558769164, "grad_norm": 2.3912994105074956, "learning_rate": 7.2122659139440765e-06, "loss": 0.9685, "step": 10305 }, { "epoch": 0.37352759957957304, "grad_norm": 2.2093928819005755, "learning_rate": 7.211739546292467e-06, "loss": 0.8827, "step": 10306 }, { "epoch": 0.3735638432822297, "grad_norm": 2.133261218172255, "learning_rate": 7.211213148164423e-06, "loss": 0.772, "step": 10307 }, { "epoch": 0.3736000869848864, "grad_norm": 2.4623059545819506, "learning_rate": 7.2106867195672e-06, "loss": 1.0421, "step": 10308 }, { "epoch": 0.37363633068754304, "grad_norm": 2.329129630753955, "learning_rate": 7.21016026050805e-06, "loss": 1.0775, "step": 10309 }, { "epoch": 0.3736725743901997, "grad_norm": 2.3646562446272803, "learning_rate": 7.20963377099423e-06, "loss": 0.9055, "step": 10310 }, { "epoch": 0.3737088180928564, "grad_norm": 2.338171458557959, "learning_rate": 7.2091072510329905e-06, "loss": 0.9336, "step": 10311 }, { "epoch": 0.37374506179551303, "grad_norm": 2.2635919461845124, "learning_rate": 7.208580700631591e-06, "loss": 0.8009, "step": 10312 }, { "epoch": 0.3737813054981697, "grad_norm": 2.191501325301551, "learning_rate": 7.208054119797284e-06, "loss": 0.8335, "step": 10313 }, { "epoch": 0.3738175492008264, "grad_norm": 2.1223730337505913, "learning_rate": 7.2075275085373275e-06, "loss": 0.8167, "step": 10314 }, { "epoch": 0.37385379290348303, "grad_norm": 2.6294759735299555, "learning_rate": 7.207000866858977e-06, "loss": 1.0577, "step": 10315 }, { "epoch": 0.3738900366061397, "grad_norm": 2.3923079913930225, "learning_rate": 7.20647419476949e-06, "loss": 0.9574, "step": 10316 }, { "epoch": 0.3739262803087963, "grad_norm": 2.9568931064385726, "learning_rate": 7.2059474922761215e-06, "loss": 1.1262, "step": 10317 }, { "epoch": 0.373962524011453, "grad_norm": 2.3257177332492076, "learning_rate": 7.205420759386131e-06, "loss": 0.8765, "step": 10318 }, { "epoch": 0.37399876771410967, "grad_norm": 15.15546464450957, "learning_rate": 7.204893996106776e-06, "loss": 1.2698, "step": 10319 }, { "epoch": 0.3740350114167663, "grad_norm": 2.2128416242526407, "learning_rate": 7.204367202445317e-06, "loss": 1.0879, "step": 10320 }, { "epoch": 0.374071255119423, "grad_norm": 2.56120925148965, "learning_rate": 7.203840378409009e-06, "loss": 1.0392, "step": 10321 }, { "epoch": 0.37410749882207966, "grad_norm": 2.6206115238930314, "learning_rate": 7.203313524005114e-06, "loss": 0.9315, "step": 10322 }, { "epoch": 0.3741437425247363, "grad_norm": 2.3123922236253245, "learning_rate": 7.2027866392408905e-06, "loss": 0.8867, "step": 10323 }, { "epoch": 0.374179986227393, "grad_norm": 2.2834063229038266, "learning_rate": 7.202259724123601e-06, "loss": 0.9527, "step": 10324 }, { "epoch": 0.37421622993004966, "grad_norm": 2.4105397991006066, "learning_rate": 7.201732778660504e-06, "loss": 0.8502, "step": 10325 }, { "epoch": 0.3742524736327063, "grad_norm": 2.2715913721977614, "learning_rate": 7.201205802858862e-06, "loss": 1.1456, "step": 10326 }, { "epoch": 0.374288717335363, "grad_norm": 2.6548551653703623, "learning_rate": 7.200678796725933e-06, "loss": 0.847, "step": 10327 }, { "epoch": 0.37432496103801965, "grad_norm": 2.5368399733903977, "learning_rate": 7.200151760268981e-06, "loss": 0.9514, "step": 10328 }, { "epoch": 0.3743612047406763, "grad_norm": 2.0649835391947486, "learning_rate": 7.199624693495269e-06, "loss": 0.9366, "step": 10329 }, { "epoch": 0.37439744844333295, "grad_norm": 2.4980748734247036, "learning_rate": 7.199097596412059e-06, "loss": 0.9955, "step": 10330 }, { "epoch": 0.37443369214598965, "grad_norm": 2.2223891704544694, "learning_rate": 7.198570469026614e-06, "loss": 0.8446, "step": 10331 }, { "epoch": 0.3744699358486463, "grad_norm": 2.142412611746968, "learning_rate": 7.198043311346199e-06, "loss": 0.6954, "step": 10332 }, { "epoch": 0.37450617955130294, "grad_norm": 2.1478112502342057, "learning_rate": 7.197516123378074e-06, "loss": 0.8937, "step": 10333 }, { "epoch": 0.37454242325395964, "grad_norm": 2.3332784988903055, "learning_rate": 7.1969889051295085e-06, "loss": 0.7477, "step": 10334 }, { "epoch": 0.3745786669566163, "grad_norm": 2.2997643786390407, "learning_rate": 7.1964616566077625e-06, "loss": 0.7427, "step": 10335 }, { "epoch": 0.37461491065927294, "grad_norm": 2.2729113330276958, "learning_rate": 7.1959343778201054e-06, "loss": 0.9558, "step": 10336 }, { "epoch": 0.37465115436192964, "grad_norm": 2.286259103371169, "learning_rate": 7.195407068773799e-06, "loss": 0.8175, "step": 10337 }, { "epoch": 0.3746873980645863, "grad_norm": 2.353306529742334, "learning_rate": 7.194879729476112e-06, "loss": 0.993, "step": 10338 }, { "epoch": 0.37472364176724293, "grad_norm": 2.448076199820355, "learning_rate": 7.194352359934308e-06, "loss": 0.9166, "step": 10339 }, { "epoch": 0.3747598854698996, "grad_norm": 2.5960114279858733, "learning_rate": 7.193824960155656e-06, "loss": 1.0694, "step": 10340 }, { "epoch": 0.3747961291725563, "grad_norm": 1.899418027786954, "learning_rate": 7.193297530147423e-06, "loss": 0.7979, "step": 10341 }, { "epoch": 0.3748323728752129, "grad_norm": 2.5976053372140253, "learning_rate": 7.192770069916878e-06, "loss": 0.861, "step": 10342 }, { "epoch": 0.3748686165778696, "grad_norm": 2.1390352176357417, "learning_rate": 7.192242579471286e-06, "loss": 0.8948, "step": 10343 }, { "epoch": 0.3749048602805263, "grad_norm": 2.4216057318592754, "learning_rate": 7.191715058817917e-06, "loss": 0.9178, "step": 10344 }, { "epoch": 0.3749411039831829, "grad_norm": 2.4872807002981054, "learning_rate": 7.191187507964042e-06, "loss": 1.0684, "step": 10345 }, { "epoch": 0.37497734768583957, "grad_norm": 2.1178303437867725, "learning_rate": 7.190659926916925e-06, "loss": 1.0497, "step": 10346 }, { "epoch": 0.37501359138849627, "grad_norm": 2.4797599028549575, "learning_rate": 7.190132315683842e-06, "loss": 1.0007, "step": 10347 }, { "epoch": 0.3750498350911529, "grad_norm": 2.2661406922484004, "learning_rate": 7.18960467427206e-06, "loss": 1.0096, "step": 10348 }, { "epoch": 0.37508607879380956, "grad_norm": 2.3569552560665974, "learning_rate": 7.1890770026888476e-06, "loss": 1.0208, "step": 10349 }, { "epoch": 0.37512232249646627, "grad_norm": 2.1260494470312707, "learning_rate": 7.18854930094148e-06, "loss": 0.835, "step": 10350 }, { "epoch": 0.3751585661991229, "grad_norm": 2.1568590631395277, "learning_rate": 7.188021569037226e-06, "loss": 0.9296, "step": 10351 }, { "epoch": 0.37519480990177956, "grad_norm": 2.145281877855909, "learning_rate": 7.187493806983358e-06, "loss": 0.9033, "step": 10352 }, { "epoch": 0.3752310536044362, "grad_norm": 2.374977257454687, "learning_rate": 7.1869660147871474e-06, "loss": 0.8681, "step": 10353 }, { "epoch": 0.3752672973070929, "grad_norm": 2.290425759580631, "learning_rate": 7.1864381924558706e-06, "loss": 1.0724, "step": 10354 }, { "epoch": 0.37530354100974955, "grad_norm": 2.3040164622230472, "learning_rate": 7.185910339996796e-06, "loss": 0.9339, "step": 10355 }, { "epoch": 0.3753397847124062, "grad_norm": 2.4403699699161847, "learning_rate": 7.1853824574171985e-06, "loss": 1.0453, "step": 10356 }, { "epoch": 0.3753760284150629, "grad_norm": 2.227355530371976, "learning_rate": 7.184854544724354e-06, "loss": 0.9408, "step": 10357 }, { "epoch": 0.37541227211771955, "grad_norm": 2.1909610886551754, "learning_rate": 7.184326601925532e-06, "loss": 0.9235, "step": 10358 }, { "epoch": 0.3754485158203762, "grad_norm": 2.5793623530019727, "learning_rate": 7.183798629028014e-06, "loss": 0.8034, "step": 10359 }, { "epoch": 0.3754847595230329, "grad_norm": 2.3633402783378514, "learning_rate": 7.183270626039068e-06, "loss": 0.7572, "step": 10360 }, { "epoch": 0.37552100322568954, "grad_norm": 2.7098516665096035, "learning_rate": 7.182742592965977e-06, "loss": 1.0717, "step": 10361 }, { "epoch": 0.3755572469283462, "grad_norm": 2.329061391622171, "learning_rate": 7.182214529816012e-06, "loss": 0.8492, "step": 10362 }, { "epoch": 0.3755934906310029, "grad_norm": 2.1346093340256305, "learning_rate": 7.181686436596449e-06, "loss": 0.7918, "step": 10363 }, { "epoch": 0.37562973433365954, "grad_norm": 2.1810505542910024, "learning_rate": 7.181158313314567e-06, "loss": 0.9193, "step": 10364 }, { "epoch": 0.3756659780363162, "grad_norm": 2.3126915959134036, "learning_rate": 7.180630159977644e-06, "loss": 1.0454, "step": 10365 }, { "epoch": 0.37570222173897283, "grad_norm": 2.417432025099379, "learning_rate": 7.180101976592953e-06, "loss": 0.9702, "step": 10366 }, { "epoch": 0.37573846544162953, "grad_norm": 2.3809569864543585, "learning_rate": 7.179573763167779e-06, "loss": 0.7619, "step": 10367 }, { "epoch": 0.3757747091442862, "grad_norm": 2.362244869863584, "learning_rate": 7.179045519709394e-06, "loss": 1.0619, "step": 10368 }, { "epoch": 0.3758109528469428, "grad_norm": 2.5497348750560223, "learning_rate": 7.1785172462250804e-06, "loss": 0.8534, "step": 10369 }, { "epoch": 0.3758471965495995, "grad_norm": 2.306247880226465, "learning_rate": 7.177988942722117e-06, "loss": 0.9119, "step": 10370 }, { "epoch": 0.3758834402522562, "grad_norm": 2.0966311539528615, "learning_rate": 7.177460609207784e-06, "loss": 0.7067, "step": 10371 }, { "epoch": 0.3759196839549128, "grad_norm": 2.5449953491148083, "learning_rate": 7.17693224568936e-06, "loss": 0.947, "step": 10372 }, { "epoch": 0.3759559276575695, "grad_norm": 2.3342165165552475, "learning_rate": 7.176403852174127e-06, "loss": 0.9444, "step": 10373 }, { "epoch": 0.37599217136022617, "grad_norm": 2.3287115294066756, "learning_rate": 7.175875428669362e-06, "loss": 1.0247, "step": 10374 }, { "epoch": 0.3760284150628828, "grad_norm": 2.1501201014591382, "learning_rate": 7.175346975182352e-06, "loss": 0.9465, "step": 10375 }, { "epoch": 0.37606465876553946, "grad_norm": 2.3268299250466358, "learning_rate": 7.174818491720376e-06, "loss": 0.9613, "step": 10376 }, { "epoch": 0.37610090246819616, "grad_norm": 2.45926903958167, "learning_rate": 7.174289978290717e-06, "loss": 1.0078, "step": 10377 }, { "epoch": 0.3761371461708528, "grad_norm": 2.374532125841726, "learning_rate": 7.1737614349006555e-06, "loss": 0.9787, "step": 10378 }, { "epoch": 0.37617338987350946, "grad_norm": 2.8235603782588568, "learning_rate": 7.173232861557477e-06, "loss": 0.8776, "step": 10379 }, { "epoch": 0.37620963357616616, "grad_norm": 2.591256600678962, "learning_rate": 7.172704258268464e-06, "loss": 1.1262, "step": 10380 }, { "epoch": 0.3762458772788228, "grad_norm": 2.392893571845471, "learning_rate": 7.1721756250409e-06, "loss": 0.9072, "step": 10381 }, { "epoch": 0.37628212098147945, "grad_norm": 2.4105669125535094, "learning_rate": 7.171646961882072e-06, "loss": 0.8127, "step": 10382 }, { "epoch": 0.37631836468413615, "grad_norm": 2.1849441500422047, "learning_rate": 7.17111826879926e-06, "loss": 0.8556, "step": 10383 }, { "epoch": 0.3763546083867928, "grad_norm": 2.3591985864083282, "learning_rate": 7.170589545799751e-06, "loss": 0.9013, "step": 10384 }, { "epoch": 0.37639085208944945, "grad_norm": 2.3118964517252545, "learning_rate": 7.170060792890832e-06, "loss": 0.9786, "step": 10385 }, { "epoch": 0.37642709579210615, "grad_norm": 2.1123158254407226, "learning_rate": 7.169532010079786e-06, "loss": 0.7685, "step": 10386 }, { "epoch": 0.3764633394947628, "grad_norm": 2.303581379070405, "learning_rate": 7.169003197373902e-06, "loss": 0.926, "step": 10387 }, { "epoch": 0.37649958319741944, "grad_norm": 2.5499292433874934, "learning_rate": 7.1684743547804655e-06, "loss": 0.8942, "step": 10388 }, { "epoch": 0.3765358269000761, "grad_norm": 2.2066975144391265, "learning_rate": 7.167945482306764e-06, "loss": 0.8873, "step": 10389 }, { "epoch": 0.3765720706027328, "grad_norm": 2.2694172441990297, "learning_rate": 7.167416579960085e-06, "loss": 0.8513, "step": 10390 }, { "epoch": 0.37660831430538944, "grad_norm": 2.322110040428759, "learning_rate": 7.1668876477477155e-06, "loss": 0.9451, "step": 10391 }, { "epoch": 0.3766445580080461, "grad_norm": 2.5081132445782153, "learning_rate": 7.166358685676947e-06, "loss": 0.9899, "step": 10392 }, { "epoch": 0.3766808017107028, "grad_norm": 2.2859672297999083, "learning_rate": 7.165829693755064e-06, "loss": 1.0858, "step": 10393 }, { "epoch": 0.37671704541335943, "grad_norm": 2.2742387446780667, "learning_rate": 7.1653006719893584e-06, "loss": 0.8519, "step": 10394 }, { "epoch": 0.3767532891160161, "grad_norm": 2.1476023089865515, "learning_rate": 7.164771620387119e-06, "loss": 0.9264, "step": 10395 }, { "epoch": 0.3767895328186728, "grad_norm": 2.264924884719064, "learning_rate": 7.164242538955634e-06, "loss": 0.8511, "step": 10396 }, { "epoch": 0.3768257765213294, "grad_norm": 2.2125438696108084, "learning_rate": 7.163713427702199e-06, "loss": 1.071, "step": 10397 }, { "epoch": 0.37686202022398607, "grad_norm": 2.304203723488367, "learning_rate": 7.163184286634099e-06, "loss": 0.9015, "step": 10398 }, { "epoch": 0.3768982639266427, "grad_norm": 2.4182239050670638, "learning_rate": 7.162655115758629e-06, "loss": 1.0742, "step": 10399 }, { "epoch": 0.3769345076292994, "grad_norm": 2.3013592266944607, "learning_rate": 7.162125915083078e-06, "loss": 0.9014, "step": 10400 }, { "epoch": 0.37697075133195607, "grad_norm": 2.4971250773759937, "learning_rate": 7.161596684614742e-06, "loss": 0.917, "step": 10401 }, { "epoch": 0.3770069950346127, "grad_norm": 2.1802436887890435, "learning_rate": 7.16106742436091e-06, "loss": 0.9021, "step": 10402 }, { "epoch": 0.3770432387372694, "grad_norm": 2.815620834629249, "learning_rate": 7.160538134328876e-06, "loss": 0.9467, "step": 10403 }, { "epoch": 0.37707948243992606, "grad_norm": 2.4838557406123685, "learning_rate": 7.160008814525931e-06, "loss": 0.9138, "step": 10404 }, { "epoch": 0.3771157261425827, "grad_norm": 2.45890528289876, "learning_rate": 7.159479464959374e-06, "loss": 1.0136, "step": 10405 }, { "epoch": 0.3771519698452394, "grad_norm": 2.4189545693323526, "learning_rate": 7.158950085636493e-06, "loss": 0.834, "step": 10406 }, { "epoch": 0.37718821354789606, "grad_norm": 2.802154163474874, "learning_rate": 7.158420676564588e-06, "loss": 1.0374, "step": 10407 }, { "epoch": 0.3772244572505527, "grad_norm": 2.4637664368875627, "learning_rate": 7.157891237750949e-06, "loss": 1.0589, "step": 10408 }, { "epoch": 0.3772607009532094, "grad_norm": 2.3529547246527382, "learning_rate": 7.157361769202876e-06, "loss": 1.1011, "step": 10409 }, { "epoch": 0.37729694465586605, "grad_norm": 2.21325545408748, "learning_rate": 7.156832270927661e-06, "loss": 0.8849, "step": 10410 }, { "epoch": 0.3773331883585227, "grad_norm": 2.5118958257672115, "learning_rate": 7.156302742932604e-06, "loss": 1.0484, "step": 10411 }, { "epoch": 0.37736943206117934, "grad_norm": 2.4080732203010258, "learning_rate": 7.155773185224998e-06, "loss": 0.8916, "step": 10412 }, { "epoch": 0.37740567576383605, "grad_norm": 2.3787360267865, "learning_rate": 7.1552435978121405e-06, "loss": 0.9424, "step": 10413 }, { "epoch": 0.3774419194664927, "grad_norm": 2.299809592753365, "learning_rate": 7.15471398070133e-06, "loss": 0.952, "step": 10414 }, { "epoch": 0.37747816316914934, "grad_norm": 2.5051936636674315, "learning_rate": 7.154184333899865e-06, "loss": 0.9085, "step": 10415 }, { "epoch": 0.37751440687180604, "grad_norm": 2.3264553824248115, "learning_rate": 7.153654657415041e-06, "loss": 0.9248, "step": 10416 }, { "epoch": 0.3775506505744627, "grad_norm": 2.2992058820624193, "learning_rate": 7.1531249512541604e-06, "loss": 0.8702, "step": 10417 }, { "epoch": 0.37758689427711933, "grad_norm": 2.4707090814361634, "learning_rate": 7.1525952154245186e-06, "loss": 0.8074, "step": 10418 }, { "epoch": 0.37762313797977604, "grad_norm": 2.2080741506197077, "learning_rate": 7.152065449933416e-06, "loss": 0.9012, "step": 10419 }, { "epoch": 0.3776593816824327, "grad_norm": 2.56141016504982, "learning_rate": 7.151535654788153e-06, "loss": 0.964, "step": 10420 }, { "epoch": 0.37769562538508933, "grad_norm": 2.4756737140804264, "learning_rate": 7.151005829996032e-06, "loss": 1.0501, "step": 10421 }, { "epoch": 0.37773186908774603, "grad_norm": 2.1371361122399133, "learning_rate": 7.15047597556435e-06, "loss": 0.7538, "step": 10422 }, { "epoch": 0.3777681127904027, "grad_norm": 2.2751408467657566, "learning_rate": 7.149946091500411e-06, "loss": 1.0415, "step": 10423 }, { "epoch": 0.3778043564930593, "grad_norm": 2.333120653691019, "learning_rate": 7.149416177811514e-06, "loss": 0.8661, "step": 10424 }, { "epoch": 0.37784060019571597, "grad_norm": 2.3345426983714668, "learning_rate": 7.148886234504962e-06, "loss": 0.8751, "step": 10425 }, { "epoch": 0.37787684389837267, "grad_norm": 2.312828556703141, "learning_rate": 7.148356261588057e-06, "loss": 0.9908, "step": 10426 }, { "epoch": 0.3779130876010293, "grad_norm": 2.3995307588747385, "learning_rate": 7.147826259068102e-06, "loss": 0.8689, "step": 10427 }, { "epoch": 0.37794933130368596, "grad_norm": 2.6776617085303984, "learning_rate": 7.1472962269524e-06, "loss": 0.9043, "step": 10428 }, { "epoch": 0.37798557500634267, "grad_norm": 2.490279753837194, "learning_rate": 7.146766165248256e-06, "loss": 0.8684, "step": 10429 }, { "epoch": 0.3780218187089993, "grad_norm": 2.384692507236168, "learning_rate": 7.146236073962971e-06, "loss": 0.8298, "step": 10430 }, { "epoch": 0.37805806241165596, "grad_norm": 2.54859599716407, "learning_rate": 7.145705953103853e-06, "loss": 0.9712, "step": 10431 }, { "epoch": 0.37809430611431266, "grad_norm": 2.3890586335301527, "learning_rate": 7.145175802678204e-06, "loss": 0.8178, "step": 10432 }, { "epoch": 0.3781305498169693, "grad_norm": 2.1090959779828773, "learning_rate": 7.144645622693328e-06, "loss": 0.8681, "step": 10433 }, { "epoch": 0.37816679351962595, "grad_norm": 2.0636255232057703, "learning_rate": 7.144115413156534e-06, "loss": 0.8369, "step": 10434 }, { "epoch": 0.3782030372222826, "grad_norm": 2.361181670079266, "learning_rate": 7.143585174075125e-06, "loss": 0.9572, "step": 10435 }, { "epoch": 0.3782392809249393, "grad_norm": 2.432642159727773, "learning_rate": 7.143054905456411e-06, "loss": 0.7728, "step": 10436 }, { "epoch": 0.37827552462759595, "grad_norm": 2.163616714455767, "learning_rate": 7.142524607307693e-06, "loss": 0.8982, "step": 10437 }, { "epoch": 0.3783117683302526, "grad_norm": 2.466311729866953, "learning_rate": 7.141994279636284e-06, "loss": 1.0149, "step": 10438 }, { "epoch": 0.3783480120329093, "grad_norm": 2.301024545625388, "learning_rate": 7.1414639224494886e-06, "loss": 0.8827, "step": 10439 }, { "epoch": 0.37838425573556594, "grad_norm": 2.505283369210243, "learning_rate": 7.1409335357546165e-06, "loss": 1.0418, "step": 10440 }, { "epoch": 0.3784204994382226, "grad_norm": 2.699452649809564, "learning_rate": 7.140403119558974e-06, "loss": 0.888, "step": 10441 }, { "epoch": 0.3784567431408793, "grad_norm": 2.3911424953007976, "learning_rate": 7.139872673869872e-06, "loss": 0.8538, "step": 10442 }, { "epoch": 0.37849298684353594, "grad_norm": 2.5101282086538452, "learning_rate": 7.139342198694617e-06, "loss": 0.9984, "step": 10443 }, { "epoch": 0.3785292305461926, "grad_norm": 2.231754029598937, "learning_rate": 7.138811694040522e-06, "loss": 0.6845, "step": 10444 }, { "epoch": 0.3785654742488493, "grad_norm": 2.1776963282477704, "learning_rate": 7.138281159914894e-06, "loss": 0.8072, "step": 10445 }, { "epoch": 0.37860171795150593, "grad_norm": 2.3957657621819317, "learning_rate": 7.137750596325046e-06, "loss": 0.977, "step": 10446 }, { "epoch": 0.3786379616541626, "grad_norm": 2.423957407861431, "learning_rate": 7.137220003278287e-06, "loss": 0.8498, "step": 10447 }, { "epoch": 0.3786742053568192, "grad_norm": 2.2110725840375665, "learning_rate": 7.136689380781929e-06, "loss": 1.021, "step": 10448 }, { "epoch": 0.37871044905947593, "grad_norm": 2.413374670740066, "learning_rate": 7.136158728843285e-06, "loss": 1.0081, "step": 10449 }, { "epoch": 0.3787466927621326, "grad_norm": 2.4089437351567895, "learning_rate": 7.1356280474696625e-06, "loss": 1.1137, "step": 10450 }, { "epoch": 0.3787829364647892, "grad_norm": 2.2021240720502337, "learning_rate": 7.1350973366683795e-06, "loss": 1.0215, "step": 10451 }, { "epoch": 0.3788191801674459, "grad_norm": 2.3732781778644116, "learning_rate": 7.134566596446746e-06, "loss": 0.8637, "step": 10452 }, { "epoch": 0.37885542387010257, "grad_norm": 2.307029057670992, "learning_rate": 7.134035826812076e-06, "loss": 1.0988, "step": 10453 }, { "epoch": 0.3788916675727592, "grad_norm": 2.4756561339704226, "learning_rate": 7.133505027771682e-06, "loss": 0.9013, "step": 10454 }, { "epoch": 0.3789279112754159, "grad_norm": 2.319588831018174, "learning_rate": 7.132974199332879e-06, "loss": 0.952, "step": 10455 }, { "epoch": 0.37896415497807256, "grad_norm": 2.3686777151292264, "learning_rate": 7.132443341502983e-06, "loss": 1.0609, "step": 10456 }, { "epoch": 0.3790003986807292, "grad_norm": 2.4923374149114874, "learning_rate": 7.131912454289305e-06, "loss": 0.9567, "step": 10457 }, { "epoch": 0.3790366423833859, "grad_norm": 2.361262592238758, "learning_rate": 7.131381537699165e-06, "loss": 0.9113, "step": 10458 }, { "epoch": 0.37907288608604256, "grad_norm": 2.269178193499196, "learning_rate": 7.130850591739876e-06, "loss": 1.0733, "step": 10459 }, { "epoch": 0.3791091297886992, "grad_norm": 2.3174018636698746, "learning_rate": 7.130319616418755e-06, "loss": 1.0566, "step": 10460 }, { "epoch": 0.37914537349135585, "grad_norm": 2.3923578604002786, "learning_rate": 7.129788611743116e-06, "loss": 1.0974, "step": 10461 }, { "epoch": 0.37918161719401255, "grad_norm": 2.665031024682449, "learning_rate": 7.12925757772028e-06, "loss": 0.9566, "step": 10462 }, { "epoch": 0.3792178608966692, "grad_norm": 2.223623194532401, "learning_rate": 7.12872651435756e-06, "loss": 1.0813, "step": 10463 }, { "epoch": 0.37925410459932585, "grad_norm": 2.0805095480211877, "learning_rate": 7.128195421662278e-06, "loss": 0.8744, "step": 10464 }, { "epoch": 0.37929034830198255, "grad_norm": 2.601568049836231, "learning_rate": 7.12766429964175e-06, "loss": 0.9297, "step": 10465 }, { "epoch": 0.3793265920046392, "grad_norm": 2.577247351144946, "learning_rate": 7.127133148303295e-06, "loss": 0.8651, "step": 10466 }, { "epoch": 0.37936283570729584, "grad_norm": 1.9260722893623519, "learning_rate": 7.126601967654229e-06, "loss": 0.7184, "step": 10467 }, { "epoch": 0.37939907940995254, "grad_norm": 2.325126172380894, "learning_rate": 7.126070757701877e-06, "loss": 0.8277, "step": 10468 }, { "epoch": 0.3794353231126092, "grad_norm": 2.5249088660363657, "learning_rate": 7.125539518453556e-06, "loss": 1.071, "step": 10469 }, { "epoch": 0.37947156681526584, "grad_norm": 2.239767320391092, "learning_rate": 7.1250082499165855e-06, "loss": 0.9782, "step": 10470 }, { "epoch": 0.3795078105179225, "grad_norm": 2.194342772535507, "learning_rate": 7.124476952098285e-06, "loss": 0.7925, "step": 10471 }, { "epoch": 0.3795440542205792, "grad_norm": 2.1263932448637544, "learning_rate": 7.123945625005978e-06, "loss": 1.0233, "step": 10472 }, { "epoch": 0.37958029792323583, "grad_norm": 2.2050484572305096, "learning_rate": 7.123414268646984e-06, "loss": 0.9791, "step": 10473 }, { "epoch": 0.3796165416258925, "grad_norm": 2.6021921538359702, "learning_rate": 7.122882883028627e-06, "loss": 0.9284, "step": 10474 }, { "epoch": 0.3796527853285492, "grad_norm": 2.336186462451403, "learning_rate": 7.122351468158226e-06, "loss": 0.9416, "step": 10475 }, { "epoch": 0.3796890290312058, "grad_norm": 2.114093351863977, "learning_rate": 7.121820024043107e-06, "loss": 0.9235, "step": 10476 }, { "epoch": 0.3797252727338625, "grad_norm": 2.4185418096711704, "learning_rate": 7.1212885506905894e-06, "loss": 0.9396, "step": 10477 }, { "epoch": 0.3797615164365192, "grad_norm": 2.2749561796291866, "learning_rate": 7.120757048108e-06, "loss": 0.9389, "step": 10478 }, { "epoch": 0.3797977601391758, "grad_norm": 2.3400705457044326, "learning_rate": 7.120225516302662e-06, "loss": 0.8546, "step": 10479 }, { "epoch": 0.37983400384183247, "grad_norm": 2.4420700771651807, "learning_rate": 7.119693955281897e-06, "loss": 0.9501, "step": 10480 }, { "epoch": 0.37987024754448917, "grad_norm": 2.3046098363698753, "learning_rate": 7.11916236505303e-06, "loss": 1.1624, "step": 10481 }, { "epoch": 0.3799064912471458, "grad_norm": 2.548178618125094, "learning_rate": 7.118630745623389e-06, "loss": 1.0114, "step": 10482 }, { "epoch": 0.37994273494980246, "grad_norm": 2.2340596992821564, "learning_rate": 7.118099097000297e-06, "loss": 0.8534, "step": 10483 }, { "epoch": 0.3799789786524591, "grad_norm": 2.516170091279399, "learning_rate": 7.117567419191081e-06, "loss": 1.0171, "step": 10484 }, { "epoch": 0.3800152223551158, "grad_norm": 2.240507357161942, "learning_rate": 7.1170357122030666e-06, "loss": 0.8442, "step": 10485 }, { "epoch": 0.38005146605777246, "grad_norm": 2.408050879015874, "learning_rate": 7.11650397604358e-06, "loss": 1.0033, "step": 10486 }, { "epoch": 0.3800877097604291, "grad_norm": 2.2569473154944593, "learning_rate": 7.115972210719949e-06, "loss": 0.9792, "step": 10487 }, { "epoch": 0.3801239534630858, "grad_norm": 2.337330851648562, "learning_rate": 7.115440416239501e-06, "loss": 0.997, "step": 10488 }, { "epoch": 0.38016019716574245, "grad_norm": 2.46223665469335, "learning_rate": 7.114908592609564e-06, "loss": 0.9675, "step": 10489 }, { "epoch": 0.3801964408683991, "grad_norm": 2.615575280997755, "learning_rate": 7.114376739837465e-06, "loss": 1.024, "step": 10490 }, { "epoch": 0.3802326845710558, "grad_norm": 2.4537862806124697, "learning_rate": 7.113844857930532e-06, "loss": 0.8732, "step": 10491 }, { "epoch": 0.38026892827371245, "grad_norm": 2.483161090650065, "learning_rate": 7.113312946896096e-06, "loss": 0.9384, "step": 10492 }, { "epoch": 0.3803051719763691, "grad_norm": 2.1749798282202373, "learning_rate": 7.112781006741486e-06, "loss": 0.9516, "step": 10493 }, { "epoch": 0.3803414156790258, "grad_norm": 2.4273519547038283, "learning_rate": 7.112249037474032e-06, "loss": 0.8243, "step": 10494 }, { "epoch": 0.38037765938168244, "grad_norm": 2.4106402584478768, "learning_rate": 7.1117170391010626e-06, "loss": 1.0257, "step": 10495 }, { "epoch": 0.3804139030843391, "grad_norm": 2.3708644121592313, "learning_rate": 7.111185011629909e-06, "loss": 0.913, "step": 10496 }, { "epoch": 0.38045014678699574, "grad_norm": 2.343730923841895, "learning_rate": 7.110652955067904e-06, "loss": 0.909, "step": 10497 }, { "epoch": 0.38048639048965244, "grad_norm": 2.0941523564517848, "learning_rate": 7.110120869422378e-06, "loss": 0.7848, "step": 10498 }, { "epoch": 0.3805226341923091, "grad_norm": 2.3629127146878903, "learning_rate": 7.109588754700662e-06, "loss": 0.81, "step": 10499 }, { "epoch": 0.38055887789496573, "grad_norm": 2.3803592150476156, "learning_rate": 7.10905661091009e-06, "loss": 0.98, "step": 10500 }, { "epoch": 0.38059512159762243, "grad_norm": 2.2796168017534373, "learning_rate": 7.108524438057991e-06, "loss": 0.7672, "step": 10501 }, { "epoch": 0.3806313653002791, "grad_norm": 2.6067308910996148, "learning_rate": 7.1079922361517025e-06, "loss": 0.8962, "step": 10502 }, { "epoch": 0.3806676090029357, "grad_norm": 2.3994314681321773, "learning_rate": 7.1074600051985545e-06, "loss": 1.0386, "step": 10503 }, { "epoch": 0.3807038527055924, "grad_norm": 2.281943546516522, "learning_rate": 7.106927745205884e-06, "loss": 0.9157, "step": 10504 }, { "epoch": 0.3807400964082491, "grad_norm": 2.37174385681675, "learning_rate": 7.106395456181021e-06, "loss": 0.898, "step": 10505 }, { "epoch": 0.3807763401109057, "grad_norm": 2.508132419463549, "learning_rate": 7.105863138131304e-06, "loss": 1.04, "step": 10506 }, { "epoch": 0.38081258381356237, "grad_norm": 2.4541773184816735, "learning_rate": 7.105330791064065e-06, "loss": 0.9421, "step": 10507 }, { "epoch": 0.38084882751621907, "grad_norm": 2.5410784165901967, "learning_rate": 7.104798414986643e-06, "loss": 0.8507, "step": 10508 }, { "epoch": 0.3808850712188757, "grad_norm": 2.450359455351609, "learning_rate": 7.104266009906372e-06, "loss": 0.993, "step": 10509 }, { "epoch": 0.38092131492153236, "grad_norm": 2.461263849397101, "learning_rate": 7.103733575830586e-06, "loss": 1.0136, "step": 10510 }, { "epoch": 0.38095755862418906, "grad_norm": 2.138785762616558, "learning_rate": 7.103201112766626e-06, "loss": 0.7606, "step": 10511 }, { "epoch": 0.3809938023268457, "grad_norm": 2.32059925221148, "learning_rate": 7.102668620721824e-06, "loss": 1.1598, "step": 10512 }, { "epoch": 0.38103004602950236, "grad_norm": 2.274861262883809, "learning_rate": 7.102136099703522e-06, "loss": 0.6757, "step": 10513 }, { "epoch": 0.38106628973215906, "grad_norm": 2.187373431353707, "learning_rate": 7.101603549719054e-06, "loss": 0.9123, "step": 10514 }, { "epoch": 0.3811025334348157, "grad_norm": 2.498795292440166, "learning_rate": 7.101070970775762e-06, "loss": 0.7779, "step": 10515 }, { "epoch": 0.38113877713747235, "grad_norm": 2.1122095039422364, "learning_rate": 7.100538362880982e-06, "loss": 1.0088, "step": 10516 }, { "epoch": 0.38117502084012905, "grad_norm": 2.7702092158685963, "learning_rate": 7.100005726042054e-06, "loss": 1.1551, "step": 10517 }, { "epoch": 0.3812112645427857, "grad_norm": 2.313989145401142, "learning_rate": 7.099473060266316e-06, "loss": 0.8636, "step": 10518 }, { "epoch": 0.38124750824544235, "grad_norm": 2.304633942568326, "learning_rate": 7.098940365561111e-06, "loss": 0.9944, "step": 10519 }, { "epoch": 0.381283751948099, "grad_norm": 2.5786352943965123, "learning_rate": 7.098407641933776e-06, "loss": 0.8373, "step": 10520 }, { "epoch": 0.3813199956507557, "grad_norm": 2.3643113005059115, "learning_rate": 7.097874889391653e-06, "loss": 1.1238, "step": 10521 }, { "epoch": 0.38135623935341234, "grad_norm": 2.3520524019759397, "learning_rate": 7.097342107942081e-06, "loss": 0.885, "step": 10522 }, { "epoch": 0.381392483056069, "grad_norm": 2.4124206623811526, "learning_rate": 7.096809297592405e-06, "loss": 0.9684, "step": 10523 }, { "epoch": 0.3814287267587257, "grad_norm": 2.2229882151483507, "learning_rate": 7.0962764583499644e-06, "loss": 0.9867, "step": 10524 }, { "epoch": 0.38146497046138234, "grad_norm": 2.351518683802246, "learning_rate": 7.095743590222103e-06, "loss": 1.0448, "step": 10525 }, { "epoch": 0.381501214164039, "grad_norm": 2.490679781816871, "learning_rate": 7.095210693216161e-06, "loss": 0.9459, "step": 10526 }, { "epoch": 0.3815374578666957, "grad_norm": 2.225330440138711, "learning_rate": 7.094677767339484e-06, "loss": 0.9132, "step": 10527 }, { "epoch": 0.38157370156935233, "grad_norm": 2.460606171038696, "learning_rate": 7.094144812599414e-06, "loss": 0.8142, "step": 10528 }, { "epoch": 0.381609945272009, "grad_norm": 2.2969922628068016, "learning_rate": 7.0936118290032956e-06, "loss": 0.8597, "step": 10529 }, { "epoch": 0.3816461889746657, "grad_norm": 2.1620629284884867, "learning_rate": 7.093078816558471e-06, "loss": 0.748, "step": 10530 }, { "epoch": 0.3816824326773223, "grad_norm": 2.3261470130144626, "learning_rate": 7.092545775272286e-06, "loss": 0.9589, "step": 10531 }, { "epoch": 0.38171867637997897, "grad_norm": 2.6104428821333725, "learning_rate": 7.092012705152086e-06, "loss": 0.9845, "step": 10532 }, { "epoch": 0.3817549200826356, "grad_norm": 2.5116564173726887, "learning_rate": 7.091479606205217e-06, "loss": 0.9499, "step": 10533 }, { "epoch": 0.3817911637852923, "grad_norm": 2.733837773856316, "learning_rate": 7.090946478439023e-06, "loss": 0.9796, "step": 10534 }, { "epoch": 0.38182740748794897, "grad_norm": 2.027880550008381, "learning_rate": 7.090413321860852e-06, "loss": 0.7334, "step": 10535 }, { "epoch": 0.3818636511906056, "grad_norm": 2.4432614559896484, "learning_rate": 7.089880136478048e-06, "loss": 1.028, "step": 10536 }, { "epoch": 0.3818998948932623, "grad_norm": 2.3424808788205236, "learning_rate": 7.089346922297961e-06, "loss": 0.8326, "step": 10537 }, { "epoch": 0.38193613859591896, "grad_norm": 1.9941290664072757, "learning_rate": 7.088813679327937e-06, "loss": 0.6686, "step": 10538 }, { "epoch": 0.3819723822985756, "grad_norm": 2.4034509505120063, "learning_rate": 7.088280407575324e-06, "loss": 0.9161, "step": 10539 }, { "epoch": 0.3820086260012323, "grad_norm": 2.536325143605556, "learning_rate": 7.087747107047469e-06, "loss": 1.0421, "step": 10540 }, { "epoch": 0.38204486970388896, "grad_norm": 2.347022264527175, "learning_rate": 7.087213777751722e-06, "loss": 0.8108, "step": 10541 }, { "epoch": 0.3820811134065456, "grad_norm": 2.3412919336698215, "learning_rate": 7.086680419695431e-06, "loss": 0.9377, "step": 10542 }, { "epoch": 0.38211735710920225, "grad_norm": 2.180460224639285, "learning_rate": 7.0861470328859475e-06, "loss": 0.7819, "step": 10543 }, { "epoch": 0.38215360081185895, "grad_norm": 2.420850751336607, "learning_rate": 7.085613617330617e-06, "loss": 0.8616, "step": 10544 }, { "epoch": 0.3821898445145156, "grad_norm": 2.2673189547272607, "learning_rate": 7.0850801730367935e-06, "loss": 0.8256, "step": 10545 }, { "epoch": 0.38222608821717224, "grad_norm": 2.323999022136142, "learning_rate": 7.084546700011827e-06, "loss": 1.059, "step": 10546 }, { "epoch": 0.38226233191982895, "grad_norm": 2.2684153354497543, "learning_rate": 7.084013198263068e-06, "loss": 0.8613, "step": 10547 }, { "epoch": 0.3822985756224856, "grad_norm": 2.2345330350355046, "learning_rate": 7.083479667797865e-06, "loss": 0.9668, "step": 10548 }, { "epoch": 0.38233481932514224, "grad_norm": 2.241276463548021, "learning_rate": 7.082946108623574e-06, "loss": 0.7551, "step": 10549 }, { "epoch": 0.38237106302779894, "grad_norm": 2.2674314792345323, "learning_rate": 7.0824125207475445e-06, "loss": 0.8263, "step": 10550 }, { "epoch": 0.3824073067304556, "grad_norm": 2.5056424093173963, "learning_rate": 7.081878904177131e-06, "loss": 0.9314, "step": 10551 }, { "epoch": 0.38244355043311223, "grad_norm": 2.0643755577236806, "learning_rate": 7.081345258919684e-06, "loss": 0.8927, "step": 10552 }, { "epoch": 0.38247979413576894, "grad_norm": 2.3443554094437693, "learning_rate": 7.080811584982559e-06, "loss": 1.0598, "step": 10553 }, { "epoch": 0.3825160378384256, "grad_norm": 2.2492175262952463, "learning_rate": 7.080277882373108e-06, "loss": 0.8566, "step": 10554 }, { "epoch": 0.38255228154108223, "grad_norm": 2.349412482340757, "learning_rate": 7.079744151098686e-06, "loss": 0.9229, "step": 10555 }, { "epoch": 0.3825885252437389, "grad_norm": 2.397253529234264, "learning_rate": 7.079210391166649e-06, "loss": 0.8191, "step": 10556 }, { "epoch": 0.3826247689463956, "grad_norm": 2.4033574404356286, "learning_rate": 7.078676602584349e-06, "loss": 0.8946, "step": 10557 }, { "epoch": 0.3826610126490522, "grad_norm": 2.4528188719957216, "learning_rate": 7.0781427853591425e-06, "loss": 0.6906, "step": 10558 }, { "epoch": 0.38269725635170887, "grad_norm": 2.3163086613641433, "learning_rate": 7.077608939498385e-06, "loss": 0.8706, "step": 10559 }, { "epoch": 0.38273350005436557, "grad_norm": 2.5421524513877856, "learning_rate": 7.0770750650094335e-06, "loss": 0.9294, "step": 10560 }, { "epoch": 0.3827697437570222, "grad_norm": 2.5312391070862894, "learning_rate": 7.0765411618996435e-06, "loss": 0.795, "step": 10561 }, { "epoch": 0.38280598745967886, "grad_norm": 2.5358982656029423, "learning_rate": 7.076007230176372e-06, "loss": 0.8012, "step": 10562 }, { "epoch": 0.38284223116233557, "grad_norm": 2.249258747653137, "learning_rate": 7.075473269846975e-06, "loss": 0.8622, "step": 10563 }, { "epoch": 0.3828784748649922, "grad_norm": 2.260831099534719, "learning_rate": 7.074939280918813e-06, "loss": 0.8839, "step": 10564 }, { "epoch": 0.38291471856764886, "grad_norm": 2.2293071595911447, "learning_rate": 7.074405263399244e-06, "loss": 0.9044, "step": 10565 }, { "epoch": 0.38295096227030556, "grad_norm": 2.289671244313857, "learning_rate": 7.073871217295624e-06, "loss": 0.9472, "step": 10566 }, { "epoch": 0.3829872059729622, "grad_norm": 2.4814716497682214, "learning_rate": 7.073337142615312e-06, "loss": 0.8966, "step": 10567 }, { "epoch": 0.38302344967561885, "grad_norm": 2.289047719889257, "learning_rate": 7.0728030393656675e-06, "loss": 0.8717, "step": 10568 }, { "epoch": 0.3830596933782755, "grad_norm": 2.3273809697201036, "learning_rate": 7.072268907554053e-06, "loss": 0.9183, "step": 10569 }, { "epoch": 0.3830959370809322, "grad_norm": 2.1782497384545603, "learning_rate": 7.071734747187824e-06, "loss": 0.8788, "step": 10570 }, { "epoch": 0.38313218078358885, "grad_norm": 2.263602429714561, "learning_rate": 7.0712005582743445e-06, "loss": 1.0761, "step": 10571 }, { "epoch": 0.3831684244862455, "grad_norm": 2.304991622487533, "learning_rate": 7.070666340820973e-06, "loss": 1.0531, "step": 10572 }, { "epoch": 0.3832046681889022, "grad_norm": 2.4531515596995885, "learning_rate": 7.070132094835073e-06, "loss": 0.9276, "step": 10573 }, { "epoch": 0.38324091189155884, "grad_norm": 2.237828070237172, "learning_rate": 7.069597820324004e-06, "loss": 0.9447, "step": 10574 }, { "epoch": 0.3832771555942155, "grad_norm": 2.3741012604010057, "learning_rate": 7.069063517295129e-06, "loss": 0.8987, "step": 10575 }, { "epoch": 0.3833133992968722, "grad_norm": 3.3164101810491995, "learning_rate": 7.068529185755809e-06, "loss": 0.8714, "step": 10576 }, { "epoch": 0.38334964299952884, "grad_norm": 2.1004444672061413, "learning_rate": 7.067994825713409e-06, "loss": 0.8742, "step": 10577 }, { "epoch": 0.3833858867021855, "grad_norm": 2.455755430653115, "learning_rate": 7.06746043717529e-06, "loss": 0.8338, "step": 10578 }, { "epoch": 0.38342213040484213, "grad_norm": 2.5355311692879, "learning_rate": 7.066926020148818e-06, "loss": 0.9159, "step": 10579 }, { "epoch": 0.38345837410749883, "grad_norm": 2.578971878646556, "learning_rate": 7.066391574641354e-06, "loss": 0.9767, "step": 10580 }, { "epoch": 0.3834946178101555, "grad_norm": 2.3961284119722923, "learning_rate": 7.065857100660264e-06, "loss": 0.8819, "step": 10581 }, { "epoch": 0.3835308615128121, "grad_norm": 2.8388416751138856, "learning_rate": 7.065322598212912e-06, "loss": 0.9881, "step": 10582 }, { "epoch": 0.38356710521546883, "grad_norm": 2.2759931851784394, "learning_rate": 7.064788067306665e-06, "loss": 0.9964, "step": 10583 }, { "epoch": 0.3836033489181255, "grad_norm": 2.463916069837902, "learning_rate": 7.064253507948886e-06, "loss": 0.9352, "step": 10584 }, { "epoch": 0.3836395926207821, "grad_norm": 2.3406736710812317, "learning_rate": 7.063718920146944e-06, "loss": 0.9423, "step": 10585 }, { "epoch": 0.3836758363234388, "grad_norm": 2.308905749038683, "learning_rate": 7.063184303908201e-06, "loss": 0.9324, "step": 10586 }, { "epoch": 0.38371208002609547, "grad_norm": 2.647454216587089, "learning_rate": 7.0626496592400286e-06, "loss": 0.8425, "step": 10587 }, { "epoch": 0.3837483237287521, "grad_norm": 2.3896731926107546, "learning_rate": 7.062114986149788e-06, "loss": 1.0779, "step": 10588 }, { "epoch": 0.3837845674314088, "grad_norm": 2.432835265850566, "learning_rate": 7.061580284644853e-06, "loss": 1.0775, "step": 10589 }, { "epoch": 0.38382081113406546, "grad_norm": 2.5755808337084662, "learning_rate": 7.0610455547325875e-06, "loss": 0.9008, "step": 10590 }, { "epoch": 0.3838570548367221, "grad_norm": 2.2352090227014703, "learning_rate": 7.06051079642036e-06, "loss": 0.9605, "step": 10591 }, { "epoch": 0.38389329853937876, "grad_norm": 2.3752170146057834, "learning_rate": 7.059976009715541e-06, "loss": 0.8554, "step": 10592 }, { "epoch": 0.38392954224203546, "grad_norm": 2.5538758669148307, "learning_rate": 7.059441194625497e-06, "loss": 0.984, "step": 10593 }, { "epoch": 0.3839657859446921, "grad_norm": 2.609621551481652, "learning_rate": 7.058906351157601e-06, "loss": 1.0326, "step": 10594 }, { "epoch": 0.38400202964734875, "grad_norm": 2.253367977396338, "learning_rate": 7.058371479319219e-06, "loss": 0.9284, "step": 10595 }, { "epoch": 0.38403827335000545, "grad_norm": 2.2062798507992394, "learning_rate": 7.057836579117724e-06, "loss": 0.8573, "step": 10596 }, { "epoch": 0.3840745170526621, "grad_norm": 2.453860447722254, "learning_rate": 7.057301650560484e-06, "loss": 0.8664, "step": 10597 }, { "epoch": 0.38411076075531875, "grad_norm": 2.492968057318243, "learning_rate": 7.056766693654873e-06, "loss": 0.8115, "step": 10598 }, { "epoch": 0.38414700445797545, "grad_norm": 2.23763719288683, "learning_rate": 7.05623170840826e-06, "loss": 0.9343, "step": 10599 }, { "epoch": 0.3841832481606321, "grad_norm": 2.4754737688014785, "learning_rate": 7.055696694828018e-06, "loss": 1.1275, "step": 10600 }, { "epoch": 0.38421949186328874, "grad_norm": 2.5449033054031425, "learning_rate": 7.0551616529215185e-06, "loss": 0.8339, "step": 10601 }, { "epoch": 0.38425573556594544, "grad_norm": 2.134285442476898, "learning_rate": 7.0546265826961334e-06, "loss": 1.0355, "step": 10602 }, { "epoch": 0.3842919792686021, "grad_norm": 2.122578433595394, "learning_rate": 7.054091484159238e-06, "loss": 0.9762, "step": 10603 }, { "epoch": 0.38432822297125874, "grad_norm": 2.5932593318793153, "learning_rate": 7.0535563573182045e-06, "loss": 0.9032, "step": 10604 }, { "epoch": 0.3843644666739154, "grad_norm": 2.3869931503878288, "learning_rate": 7.053021202180406e-06, "loss": 0.9945, "step": 10605 }, { "epoch": 0.3844007103765721, "grad_norm": 2.356830187864227, "learning_rate": 7.052486018753216e-06, "loss": 0.9168, "step": 10606 }, { "epoch": 0.38443695407922873, "grad_norm": 2.687143713228592, "learning_rate": 7.0519508070440105e-06, "loss": 0.8938, "step": 10607 }, { "epoch": 0.3844731977818854, "grad_norm": 2.4847128164771086, "learning_rate": 7.051415567060165e-06, "loss": 0.983, "step": 10608 }, { "epoch": 0.3845094414845421, "grad_norm": 2.2380784186723894, "learning_rate": 7.050880298809052e-06, "loss": 0.8009, "step": 10609 }, { "epoch": 0.3845456851871987, "grad_norm": 2.2475786053879188, "learning_rate": 7.050345002298049e-06, "loss": 0.8843, "step": 10610 }, { "epoch": 0.3845819288898554, "grad_norm": 2.5374938816558763, "learning_rate": 7.049809677534531e-06, "loss": 0.9208, "step": 10611 }, { "epoch": 0.3846181725925121, "grad_norm": 2.1176432558535927, "learning_rate": 7.049274324525879e-06, "loss": 1.0432, "step": 10612 }, { "epoch": 0.3846544162951687, "grad_norm": 2.295781200305015, "learning_rate": 7.048738943279463e-06, "loss": 1.0718, "step": 10613 }, { "epoch": 0.38469065999782537, "grad_norm": 2.741157065244098, "learning_rate": 7.048203533802664e-06, "loss": 0.9173, "step": 10614 }, { "epoch": 0.384726903700482, "grad_norm": 2.1303310260305572, "learning_rate": 7.04766809610286e-06, "loss": 0.9524, "step": 10615 }, { "epoch": 0.3847631474031387, "grad_norm": 2.1300843170327153, "learning_rate": 7.047132630187428e-06, "loss": 0.8407, "step": 10616 }, { "epoch": 0.38479939110579536, "grad_norm": 2.500079262349507, "learning_rate": 7.0465971360637445e-06, "loss": 0.984, "step": 10617 }, { "epoch": 0.384835634808452, "grad_norm": 1.8367245237859346, "learning_rate": 7.046061613739191e-06, "loss": 0.9123, "step": 10618 }, { "epoch": 0.3848718785111087, "grad_norm": 2.3458320092444205, "learning_rate": 7.045526063221147e-06, "loss": 0.9727, "step": 10619 }, { "epoch": 0.38490812221376536, "grad_norm": 2.0955342641588586, "learning_rate": 7.04499048451699e-06, "loss": 1.0093, "step": 10620 }, { "epoch": 0.384944365916422, "grad_norm": 2.2297280151534937, "learning_rate": 7.0444548776341005e-06, "loss": 0.8994, "step": 10621 }, { "epoch": 0.3849806096190787, "grad_norm": 2.2972855248004462, "learning_rate": 7.04391924257986e-06, "loss": 0.9877, "step": 10622 }, { "epoch": 0.38501685332173535, "grad_norm": 2.4677620288513364, "learning_rate": 7.0433835793616475e-06, "loss": 0.9319, "step": 10623 }, { "epoch": 0.385053097024392, "grad_norm": 2.0934567338378125, "learning_rate": 7.042847887986847e-06, "loss": 0.9122, "step": 10624 }, { "epoch": 0.3850893407270487, "grad_norm": 2.271825620873432, "learning_rate": 7.042312168462837e-06, "loss": 1.035, "step": 10625 }, { "epoch": 0.38512558442970535, "grad_norm": 1.9859207262451166, "learning_rate": 7.041776420797001e-06, "loss": 0.839, "step": 10626 }, { "epoch": 0.385161828132362, "grad_norm": 2.238085154107966, "learning_rate": 7.04124064499672e-06, "loss": 0.8796, "step": 10627 }, { "epoch": 0.38519807183501864, "grad_norm": 2.3773179910791544, "learning_rate": 7.0407048410693765e-06, "loss": 0.8099, "step": 10628 }, { "epoch": 0.38523431553767534, "grad_norm": 2.2911729311133677, "learning_rate": 7.040169009022356e-06, "loss": 1.0611, "step": 10629 }, { "epoch": 0.385270559240332, "grad_norm": 2.3365168753236216, "learning_rate": 7.03963314886304e-06, "loss": 0.9078, "step": 10630 }, { "epoch": 0.38530680294298864, "grad_norm": 2.2447803652726037, "learning_rate": 7.039097260598813e-06, "loss": 1.0107, "step": 10631 }, { "epoch": 0.38534304664564534, "grad_norm": 2.34496856982118, "learning_rate": 7.0385613442370585e-06, "loss": 0.7402, "step": 10632 }, { "epoch": 0.385379290348302, "grad_norm": 2.3501922339905974, "learning_rate": 7.038025399785162e-06, "loss": 0.8616, "step": 10633 }, { "epoch": 0.38541553405095863, "grad_norm": 2.2983480030621353, "learning_rate": 7.037489427250508e-06, "loss": 0.731, "step": 10634 }, { "epoch": 0.38545177775361533, "grad_norm": 2.0991738412469374, "learning_rate": 7.036953426640481e-06, "loss": 0.9796, "step": 10635 }, { "epoch": 0.385488021456272, "grad_norm": 2.0284039422546676, "learning_rate": 7.0364173979624695e-06, "loss": 0.86, "step": 10636 }, { "epoch": 0.3855242651589286, "grad_norm": 2.495960605016683, "learning_rate": 7.035881341223856e-06, "loss": 0.7654, "step": 10637 }, { "epoch": 0.3855605088615853, "grad_norm": 2.42197696976945, "learning_rate": 7.035345256432029e-06, "loss": 1.0008, "step": 10638 }, { "epoch": 0.385596752564242, "grad_norm": 2.073266106791856, "learning_rate": 7.034809143594376e-06, "loss": 1.0233, "step": 10639 }, { "epoch": 0.3856329962668986, "grad_norm": 1.9291801781997644, "learning_rate": 7.034273002718284e-06, "loss": 0.6939, "step": 10640 }, { "epoch": 0.38566923996955527, "grad_norm": 2.2578940435389105, "learning_rate": 7.033736833811139e-06, "loss": 0.6911, "step": 10641 }, { "epoch": 0.38570548367221197, "grad_norm": 2.416858499781813, "learning_rate": 7.0332006368803315e-06, "loss": 0.8145, "step": 10642 }, { "epoch": 0.3857417273748686, "grad_norm": 2.3770654736856054, "learning_rate": 7.03266441193325e-06, "loss": 1.1142, "step": 10643 }, { "epoch": 0.38577797107752526, "grad_norm": 2.4144698303260537, "learning_rate": 7.03212815897728e-06, "loss": 1.0286, "step": 10644 }, { "epoch": 0.38581421478018196, "grad_norm": 2.164584630169198, "learning_rate": 7.031591878019813e-06, "loss": 0.8349, "step": 10645 }, { "epoch": 0.3858504584828386, "grad_norm": 2.6064925118732023, "learning_rate": 7.03105556906824e-06, "loss": 0.9272, "step": 10646 }, { "epoch": 0.38588670218549526, "grad_norm": 2.0872163645691555, "learning_rate": 7.0305192321299486e-06, "loss": 0.8682, "step": 10647 }, { "epoch": 0.38592294588815196, "grad_norm": 2.4187355685090863, "learning_rate": 7.029982867212332e-06, "loss": 0.7261, "step": 10648 }, { "epoch": 0.3859591895908086, "grad_norm": 2.0435207199720207, "learning_rate": 7.029446474322776e-06, "loss": 0.8478, "step": 10649 }, { "epoch": 0.38599543329346525, "grad_norm": 2.3975052144583304, "learning_rate": 7.028910053468677e-06, "loss": 0.8297, "step": 10650 }, { "epoch": 0.3860316769961219, "grad_norm": 2.4899229084379417, "learning_rate": 7.028373604657424e-06, "loss": 1.0198, "step": 10651 }, { "epoch": 0.3860679206987786, "grad_norm": 2.2841503823230966, "learning_rate": 7.0278371278964094e-06, "loss": 0.9125, "step": 10652 }, { "epoch": 0.38610416440143525, "grad_norm": 2.418647862191811, "learning_rate": 7.027300623193028e-06, "loss": 0.9356, "step": 10653 }, { "epoch": 0.3861404081040919, "grad_norm": 2.304017348804712, "learning_rate": 7.026764090554668e-06, "loss": 0.9054, "step": 10654 }, { "epoch": 0.3861766518067486, "grad_norm": 2.447184870719418, "learning_rate": 7.026227529988723e-06, "loss": 0.9989, "step": 10655 }, { "epoch": 0.38621289550940524, "grad_norm": 2.3273852739511245, "learning_rate": 7.02569094150259e-06, "loss": 0.9212, "step": 10656 }, { "epoch": 0.3862491392120619, "grad_norm": 2.0557128043443917, "learning_rate": 7.02515432510366e-06, "loss": 0.7545, "step": 10657 }, { "epoch": 0.3862853829147186, "grad_norm": 2.312090384326317, "learning_rate": 7.024617680799329e-06, "loss": 0.9136, "step": 10658 }, { "epoch": 0.38632162661737524, "grad_norm": 2.301175808714633, "learning_rate": 7.0240810085969905e-06, "loss": 0.8804, "step": 10659 }, { "epoch": 0.3863578703200319, "grad_norm": 2.378954780193102, "learning_rate": 7.02354430850404e-06, "loss": 0.9496, "step": 10660 }, { "epoch": 0.3863941140226886, "grad_norm": 2.505592380509297, "learning_rate": 7.023007580527872e-06, "loss": 0.9474, "step": 10661 }, { "epoch": 0.38643035772534523, "grad_norm": 2.399630559733044, "learning_rate": 7.022470824675883e-06, "loss": 0.8905, "step": 10662 }, { "epoch": 0.3864666014280019, "grad_norm": 2.05966274207811, "learning_rate": 7.02193404095547e-06, "loss": 0.8582, "step": 10663 }, { "epoch": 0.3865028451306585, "grad_norm": 2.367489880990797, "learning_rate": 7.02139722937403e-06, "loss": 0.93, "step": 10664 }, { "epoch": 0.3865390888333152, "grad_norm": 2.207560286244149, "learning_rate": 7.020860389938956e-06, "loss": 0.9344, "step": 10665 }, { "epoch": 0.38657533253597187, "grad_norm": 2.4996559106116796, "learning_rate": 7.020323522657649e-06, "loss": 0.7289, "step": 10666 }, { "epoch": 0.3866115762386285, "grad_norm": 2.290269772299912, "learning_rate": 7.019786627537506e-06, "loss": 1.0249, "step": 10667 }, { "epoch": 0.3866478199412852, "grad_norm": 2.3822200370438433, "learning_rate": 7.019249704585924e-06, "loss": 0.851, "step": 10668 }, { "epoch": 0.38668406364394187, "grad_norm": 2.2604179766824983, "learning_rate": 7.0187127538103024e-06, "loss": 0.7025, "step": 10669 }, { "epoch": 0.3867203073465985, "grad_norm": 2.197988092617419, "learning_rate": 7.018175775218041e-06, "loss": 0.8785, "step": 10670 }, { "epoch": 0.3867565510492552, "grad_norm": 2.3274647411484075, "learning_rate": 7.017638768816537e-06, "loss": 0.9583, "step": 10671 }, { "epoch": 0.38679279475191186, "grad_norm": 2.0525972373872667, "learning_rate": 7.017101734613192e-06, "loss": 0.768, "step": 10672 }, { "epoch": 0.3868290384545685, "grad_norm": 2.142505526729221, "learning_rate": 7.016564672615405e-06, "loss": 0.9635, "step": 10673 }, { "epoch": 0.3868652821572252, "grad_norm": 2.5130945297475034, "learning_rate": 7.016027582830576e-06, "loss": 1.0081, "step": 10674 }, { "epoch": 0.38690152585988186, "grad_norm": 2.4161671512039598, "learning_rate": 7.015490465266106e-06, "loss": 1.0887, "step": 10675 }, { "epoch": 0.3869377695625385, "grad_norm": 2.326513250670015, "learning_rate": 7.014953319929395e-06, "loss": 0.8578, "step": 10676 }, { "epoch": 0.38697401326519515, "grad_norm": 2.301162728102374, "learning_rate": 7.0144161468278485e-06, "loss": 0.973, "step": 10677 }, { "epoch": 0.38701025696785185, "grad_norm": 2.2965970779123044, "learning_rate": 7.013878945968864e-06, "loss": 0.8864, "step": 10678 }, { "epoch": 0.3870465006705085, "grad_norm": 2.2275283120910787, "learning_rate": 7.0133417173598466e-06, "loss": 0.898, "step": 10679 }, { "epoch": 0.38708274437316514, "grad_norm": 2.1727539522781285, "learning_rate": 7.012804461008196e-06, "loss": 0.9906, "step": 10680 }, { "epoch": 0.38711898807582185, "grad_norm": 2.287048208309652, "learning_rate": 7.012267176921319e-06, "loss": 0.9867, "step": 10681 }, { "epoch": 0.3871552317784785, "grad_norm": 2.41511532761972, "learning_rate": 7.011729865106618e-06, "loss": 0.8593, "step": 10682 }, { "epoch": 0.38719147548113514, "grad_norm": 2.364378212459194, "learning_rate": 7.0111925255714954e-06, "loss": 0.8409, "step": 10683 }, { "epoch": 0.38722771918379184, "grad_norm": 2.3840554381408277, "learning_rate": 7.010655158323354e-06, "loss": 0.9302, "step": 10684 }, { "epoch": 0.3872639628864485, "grad_norm": 2.2498369690957283, "learning_rate": 7.010117763369604e-06, "loss": 0.77, "step": 10685 }, { "epoch": 0.38730020658910513, "grad_norm": 2.601998828387148, "learning_rate": 7.009580340717644e-06, "loss": 0.9344, "step": 10686 }, { "epoch": 0.3873364502917618, "grad_norm": 2.3937801800124485, "learning_rate": 7.009042890374885e-06, "loss": 1.0471, "step": 10687 }, { "epoch": 0.3873726939944185, "grad_norm": 2.2379899635134826, "learning_rate": 7.0085054123487265e-06, "loss": 0.7232, "step": 10688 }, { "epoch": 0.38740893769707513, "grad_norm": 2.4173189972511415, "learning_rate": 7.00796790664658e-06, "loss": 0.9445, "step": 10689 }, { "epoch": 0.3874451813997318, "grad_norm": 2.123366523933073, "learning_rate": 7.00743037327585e-06, "loss": 0.9884, "step": 10690 }, { "epoch": 0.3874814251023885, "grad_norm": 2.431345716373866, "learning_rate": 7.006892812243944e-06, "loss": 0.9593, "step": 10691 }, { "epoch": 0.3875176688050451, "grad_norm": 2.817986646130229, "learning_rate": 7.006355223558269e-06, "loss": 0.9617, "step": 10692 }, { "epoch": 0.38755391250770177, "grad_norm": 2.366740421212263, "learning_rate": 7.005817607226231e-06, "loss": 0.675, "step": 10693 }, { "epoch": 0.38759015621035847, "grad_norm": 2.2727365229283305, "learning_rate": 7.005279963255241e-06, "loss": 0.9062, "step": 10694 }, { "epoch": 0.3876263999130151, "grad_norm": 2.495162325732663, "learning_rate": 7.004742291652705e-06, "loss": 1.019, "step": 10695 }, { "epoch": 0.38766264361567176, "grad_norm": 2.5196168238947556, "learning_rate": 7.0042045924260325e-06, "loss": 0.8487, "step": 10696 }, { "epoch": 0.38769888731832847, "grad_norm": 1.9175199150469382, "learning_rate": 7.003666865582633e-06, "loss": 0.8362, "step": 10697 }, { "epoch": 0.3877351310209851, "grad_norm": 2.230064184829092, "learning_rate": 7.003129111129916e-06, "loss": 0.876, "step": 10698 }, { "epoch": 0.38777137472364176, "grad_norm": 2.2917930866661025, "learning_rate": 7.002591329075293e-06, "loss": 0.9323, "step": 10699 }, { "epoch": 0.3878076184262984, "grad_norm": 2.6459424743954933, "learning_rate": 7.002053519426169e-06, "loss": 1.1297, "step": 10700 }, { "epoch": 0.3878438621289551, "grad_norm": 2.2324751354040586, "learning_rate": 7.001515682189961e-06, "loss": 0.8706, "step": 10701 }, { "epoch": 0.38788010583161175, "grad_norm": 2.140818615895922, "learning_rate": 7.0009778173740776e-06, "loss": 0.8921, "step": 10702 }, { "epoch": 0.3879163495342684, "grad_norm": 2.398190654145301, "learning_rate": 7.000439924985929e-06, "loss": 0.7964, "step": 10703 }, { "epoch": 0.3879525932369251, "grad_norm": 2.5525326491587466, "learning_rate": 6.9999020050329285e-06, "loss": 1.1338, "step": 10704 }, { "epoch": 0.38798883693958175, "grad_norm": 2.0991677913723574, "learning_rate": 6.999364057522486e-06, "loss": 0.8901, "step": 10705 }, { "epoch": 0.3880250806422384, "grad_norm": 2.4193071942399857, "learning_rate": 6.998826082462018e-06, "loss": 1.0514, "step": 10706 }, { "epoch": 0.3880613243448951, "grad_norm": 2.24647052544318, "learning_rate": 6.998288079858935e-06, "loss": 1.0405, "step": 10707 }, { "epoch": 0.38809756804755174, "grad_norm": 2.3279613262180514, "learning_rate": 6.997750049720651e-06, "loss": 0.9158, "step": 10708 }, { "epoch": 0.3881338117502084, "grad_norm": 2.388796982491456, "learning_rate": 6.9972119920545786e-06, "loss": 0.9864, "step": 10709 }, { "epoch": 0.38817005545286504, "grad_norm": 2.4122132933334015, "learning_rate": 6.996673906868134e-06, "loss": 0.986, "step": 10710 }, { "epoch": 0.38820629915552174, "grad_norm": 2.269421461232838, "learning_rate": 6.996135794168729e-06, "loss": 1.0116, "step": 10711 }, { "epoch": 0.3882425428581784, "grad_norm": 2.455312032289655, "learning_rate": 6.9955976539637815e-06, "loss": 0.8589, "step": 10712 }, { "epoch": 0.38827878656083503, "grad_norm": 2.222952383991944, "learning_rate": 6.995059486260705e-06, "loss": 0.8885, "step": 10713 }, { "epoch": 0.38831503026349173, "grad_norm": 2.710897897819735, "learning_rate": 6.994521291066915e-06, "loss": 1.1811, "step": 10714 }, { "epoch": 0.3883512739661484, "grad_norm": 2.4185215054349194, "learning_rate": 6.993983068389828e-06, "loss": 0.9318, "step": 10715 }, { "epoch": 0.388387517668805, "grad_norm": 2.135243293875859, "learning_rate": 6.993444818236859e-06, "loss": 0.9154, "step": 10716 }, { "epoch": 0.38842376137146173, "grad_norm": 2.2588272327047543, "learning_rate": 6.992906540615427e-06, "loss": 0.8831, "step": 10717 }, { "epoch": 0.3884600050741184, "grad_norm": 2.662365596762967, "learning_rate": 6.992368235532948e-06, "loss": 0.9618, "step": 10718 }, { "epoch": 0.388496248776775, "grad_norm": 2.246912109196438, "learning_rate": 6.991829902996841e-06, "loss": 0.9453, "step": 10719 }, { "epoch": 0.3885324924794317, "grad_norm": 2.5900880353794706, "learning_rate": 6.991291543014522e-06, "loss": 0.9324, "step": 10720 }, { "epoch": 0.38856873618208837, "grad_norm": 2.5633885967103005, "learning_rate": 6.99075315559341e-06, "loss": 1.0097, "step": 10721 }, { "epoch": 0.388604979884745, "grad_norm": 2.3884213443257942, "learning_rate": 6.990214740740922e-06, "loss": 0.8031, "step": 10722 }, { "epoch": 0.38864122358740166, "grad_norm": 2.2703400024816687, "learning_rate": 6.989676298464479e-06, "loss": 1.0911, "step": 10723 }, { "epoch": 0.38867746729005836, "grad_norm": 2.372305249990386, "learning_rate": 6.9891378287715e-06, "loss": 0.8799, "step": 10724 }, { "epoch": 0.388713710992715, "grad_norm": 2.118327310350264, "learning_rate": 6.988599331669406e-06, "loss": 1.0931, "step": 10725 }, { "epoch": 0.38874995469537166, "grad_norm": 2.437621455226739, "learning_rate": 6.9880608071656134e-06, "loss": 1.0856, "step": 10726 }, { "epoch": 0.38878619839802836, "grad_norm": 2.5625557443614526, "learning_rate": 6.987522255267547e-06, "loss": 0.9082, "step": 10727 }, { "epoch": 0.388822442100685, "grad_norm": 2.92515207733556, "learning_rate": 6.986983675982625e-06, "loss": 0.8919, "step": 10728 }, { "epoch": 0.38885868580334165, "grad_norm": 2.301954329901932, "learning_rate": 6.9864450693182706e-06, "loss": 1.0049, "step": 10729 }, { "epoch": 0.38889492950599835, "grad_norm": 2.6791616944357677, "learning_rate": 6.985906435281904e-06, "loss": 0.6924, "step": 10730 }, { "epoch": 0.388931173208655, "grad_norm": 1.9189667217392763, "learning_rate": 6.985367773880949e-06, "loss": 0.851, "step": 10731 }, { "epoch": 0.38896741691131165, "grad_norm": 2.2189698373454805, "learning_rate": 6.984829085122825e-06, "loss": 0.8154, "step": 10732 }, { "epoch": 0.38900366061396835, "grad_norm": 2.2232270432670216, "learning_rate": 6.984290369014958e-06, "loss": 0.9785, "step": 10733 }, { "epoch": 0.389039904316625, "grad_norm": 2.423346123642858, "learning_rate": 6.983751625564768e-06, "loss": 0.7787, "step": 10734 }, { "epoch": 0.38907614801928164, "grad_norm": 2.318026765102732, "learning_rate": 6.983212854779682e-06, "loss": 0.9618, "step": 10735 }, { "epoch": 0.3891123917219383, "grad_norm": 2.1762040672916956, "learning_rate": 6.982674056667121e-06, "loss": 1.0242, "step": 10736 }, { "epoch": 0.389148635424595, "grad_norm": 2.294603048792129, "learning_rate": 6.982135231234511e-06, "loss": 0.9073, "step": 10737 }, { "epoch": 0.38918487912725164, "grad_norm": 2.635026931868605, "learning_rate": 6.981596378489276e-06, "loss": 0.8355, "step": 10738 }, { "epoch": 0.3892211228299083, "grad_norm": 2.6531749393870396, "learning_rate": 6.981057498438842e-06, "loss": 0.8168, "step": 10739 }, { "epoch": 0.389257366532565, "grad_norm": 2.414540113301135, "learning_rate": 6.980518591090635e-06, "loss": 1.0862, "step": 10740 }, { "epoch": 0.38929361023522163, "grad_norm": 2.375810361092411, "learning_rate": 6.979979656452078e-06, "loss": 0.8644, "step": 10741 }, { "epoch": 0.3893298539378783, "grad_norm": 2.284635557031984, "learning_rate": 6.9794406945305984e-06, "loss": 1.0133, "step": 10742 }, { "epoch": 0.389366097640535, "grad_norm": 2.1728859096589908, "learning_rate": 6.978901705333624e-06, "loss": 0.9147, "step": 10743 }, { "epoch": 0.3894023413431916, "grad_norm": 2.357282164449831, "learning_rate": 6.97836268886858e-06, "loss": 0.9677, "step": 10744 }, { "epoch": 0.3894385850458483, "grad_norm": 2.4413791345711298, "learning_rate": 6.977823645142896e-06, "loss": 1.0068, "step": 10745 }, { "epoch": 0.3894748287485049, "grad_norm": 2.2380358001189946, "learning_rate": 6.977284574163996e-06, "loss": 0.847, "step": 10746 }, { "epoch": 0.3895110724511616, "grad_norm": 2.5713354750389827, "learning_rate": 6.9767454759393125e-06, "loss": 1.0624, "step": 10747 }, { "epoch": 0.38954731615381827, "grad_norm": 2.0427538655660338, "learning_rate": 6.976206350476271e-06, "loss": 0.7363, "step": 10748 }, { "epoch": 0.3895835598564749, "grad_norm": 2.461778352970974, "learning_rate": 6.975667197782302e-06, "loss": 1.0095, "step": 10749 }, { "epoch": 0.3896198035591316, "grad_norm": 2.2909364003548225, "learning_rate": 6.975128017864834e-06, "loss": 0.8324, "step": 10750 }, { "epoch": 0.38965604726178826, "grad_norm": 2.6209595215619146, "learning_rate": 6.974588810731295e-06, "loss": 0.8251, "step": 10751 }, { "epoch": 0.3896922909644449, "grad_norm": 2.4017772377590916, "learning_rate": 6.974049576389117e-06, "loss": 1.0241, "step": 10752 }, { "epoch": 0.3897285346671016, "grad_norm": 2.2484787337694874, "learning_rate": 6.9735103148457295e-06, "loss": 0.973, "step": 10753 }, { "epoch": 0.38976477836975826, "grad_norm": 2.3740206817651157, "learning_rate": 6.972971026108564e-06, "loss": 0.7701, "step": 10754 }, { "epoch": 0.3898010220724149, "grad_norm": 2.4005695847509747, "learning_rate": 6.97243171018505e-06, "loss": 0.8772, "step": 10755 }, { "epoch": 0.3898372657750716, "grad_norm": 2.4250925947752946, "learning_rate": 6.971892367082621e-06, "loss": 0.9819, "step": 10756 }, { "epoch": 0.38987350947772825, "grad_norm": 2.329864137543652, "learning_rate": 6.9713529968087064e-06, "loss": 1.0175, "step": 10757 }, { "epoch": 0.3899097531803849, "grad_norm": 2.4233438024560314, "learning_rate": 6.970813599370741e-06, "loss": 0.8654, "step": 10758 }, { "epoch": 0.38994599688304155, "grad_norm": 2.4129723190337002, "learning_rate": 6.970274174776157e-06, "loss": 0.9829, "step": 10759 }, { "epoch": 0.38998224058569825, "grad_norm": 2.1536929708434145, "learning_rate": 6.9697347230323865e-06, "loss": 0.8896, "step": 10760 }, { "epoch": 0.3900184842883549, "grad_norm": 2.259698227354996, "learning_rate": 6.9691952441468605e-06, "loss": 1.2447, "step": 10761 }, { "epoch": 0.39005472799101154, "grad_norm": 2.655225100373277, "learning_rate": 6.968655738127016e-06, "loss": 1.0274, "step": 10762 }, { "epoch": 0.39009097169366824, "grad_norm": 2.18915334261948, "learning_rate": 6.968116204980285e-06, "loss": 0.7972, "step": 10763 }, { "epoch": 0.3901272153963249, "grad_norm": 2.4626120876701503, "learning_rate": 6.967576644714105e-06, "loss": 0.916, "step": 10764 }, { "epoch": 0.39016345909898154, "grad_norm": 2.498049449486976, "learning_rate": 6.967037057335907e-06, "loss": 0.9933, "step": 10765 }, { "epoch": 0.39019970280163824, "grad_norm": 2.8633125105421984, "learning_rate": 6.96649744285313e-06, "loss": 1.06, "step": 10766 }, { "epoch": 0.3902359465042949, "grad_norm": 2.30934884913498, "learning_rate": 6.965957801273206e-06, "loss": 0.8466, "step": 10767 }, { "epoch": 0.39027219020695153, "grad_norm": 2.6391254732651905, "learning_rate": 6.9654181326035715e-06, "loss": 0.8509, "step": 10768 }, { "epoch": 0.39030843390960823, "grad_norm": 2.287072904774301, "learning_rate": 6.964878436851666e-06, "loss": 0.7824, "step": 10769 }, { "epoch": 0.3903446776122649, "grad_norm": 2.346701891521865, "learning_rate": 6.964338714024924e-06, "loss": 0.9732, "step": 10770 }, { "epoch": 0.3903809213149215, "grad_norm": 2.2164169154279474, "learning_rate": 6.963798964130781e-06, "loss": 0.8761, "step": 10771 }, { "epoch": 0.39041716501757817, "grad_norm": 2.3044678730752843, "learning_rate": 6.963259187176676e-06, "loss": 0.9106, "step": 10772 }, { "epoch": 0.3904534087202349, "grad_norm": 2.370843084344676, "learning_rate": 6.962719383170047e-06, "loss": 1.0037, "step": 10773 }, { "epoch": 0.3904896524228915, "grad_norm": 2.2993040300000835, "learning_rate": 6.9621795521183334e-06, "loss": 0.9493, "step": 10774 }, { "epoch": 0.39052589612554817, "grad_norm": 2.5081904115852964, "learning_rate": 6.96163969402897e-06, "loss": 0.9258, "step": 10775 }, { "epoch": 0.39056213982820487, "grad_norm": 2.3186229104813236, "learning_rate": 6.9610998089093994e-06, "loss": 0.9939, "step": 10776 }, { "epoch": 0.3905983835308615, "grad_norm": 2.4670635360356505, "learning_rate": 6.960559896767059e-06, "loss": 0.8948, "step": 10777 }, { "epoch": 0.39063462723351816, "grad_norm": 2.345371854463766, "learning_rate": 6.960019957609388e-06, "loss": 1.0037, "step": 10778 }, { "epoch": 0.39067087093617486, "grad_norm": 2.2508145245907856, "learning_rate": 6.959479991443829e-06, "loss": 0.8921, "step": 10779 }, { "epoch": 0.3907071146388315, "grad_norm": 2.1628443882654453, "learning_rate": 6.958939998277821e-06, "loss": 0.9609, "step": 10780 }, { "epoch": 0.39074335834148816, "grad_norm": 2.2419173901124108, "learning_rate": 6.958399978118804e-06, "loss": 0.7741, "step": 10781 }, { "epoch": 0.3907796020441448, "grad_norm": 2.3727047129446643, "learning_rate": 6.957859930974219e-06, "loss": 1.148, "step": 10782 }, { "epoch": 0.3908158457468015, "grad_norm": 2.636799637710237, "learning_rate": 6.957319856851507e-06, "loss": 1.0292, "step": 10783 }, { "epoch": 0.39085208944945815, "grad_norm": 2.305670387825901, "learning_rate": 6.956779755758114e-06, "loss": 0.9564, "step": 10784 }, { "epoch": 0.3908883331521148, "grad_norm": 2.2595727584510374, "learning_rate": 6.956239627701477e-06, "loss": 0.9421, "step": 10785 }, { "epoch": 0.3909245768547715, "grad_norm": 2.137815605213562, "learning_rate": 6.9556994726890435e-06, "loss": 0.9993, "step": 10786 }, { "epoch": 0.39096082055742815, "grad_norm": 2.523618698630475, "learning_rate": 6.955159290728252e-06, "loss": 0.9781, "step": 10787 }, { "epoch": 0.3909970642600848, "grad_norm": 2.3127045726828874, "learning_rate": 6.9546190818265495e-06, "loss": 0.9399, "step": 10788 }, { "epoch": 0.3910333079627415, "grad_norm": 2.187680617338701, "learning_rate": 6.954078845991377e-06, "loss": 0.9758, "step": 10789 }, { "epoch": 0.39106955166539814, "grad_norm": 2.227365446175016, "learning_rate": 6.953538583230181e-06, "loss": 0.8755, "step": 10790 }, { "epoch": 0.3911057953680548, "grad_norm": 2.64809610773545, "learning_rate": 6.952998293550403e-06, "loss": 1.046, "step": 10791 }, { "epoch": 0.3911420390707115, "grad_norm": 2.3090931525806586, "learning_rate": 6.95245797695949e-06, "loss": 0.8907, "step": 10792 }, { "epoch": 0.39117828277336814, "grad_norm": 2.214897752856261, "learning_rate": 6.9519176334648875e-06, "loss": 0.9163, "step": 10793 }, { "epoch": 0.3912145264760248, "grad_norm": 2.2415263616936296, "learning_rate": 6.95137726307404e-06, "loss": 0.96, "step": 10794 }, { "epoch": 0.39125077017868143, "grad_norm": 2.3979877334626676, "learning_rate": 6.950836865794395e-06, "loss": 0.9234, "step": 10795 }, { "epoch": 0.39128701388133813, "grad_norm": 2.2572309980987137, "learning_rate": 6.950296441633397e-06, "loss": 0.9491, "step": 10796 }, { "epoch": 0.3913232575839948, "grad_norm": 2.4998406475601285, "learning_rate": 6.949755990598494e-06, "loss": 0.9661, "step": 10797 }, { "epoch": 0.3913595012866514, "grad_norm": 2.322260345437746, "learning_rate": 6.949215512697132e-06, "loss": 0.9282, "step": 10798 }, { "epoch": 0.3913957449893081, "grad_norm": 2.307313577225274, "learning_rate": 6.9486750079367606e-06, "loss": 1.0109, "step": 10799 }, { "epoch": 0.39143198869196477, "grad_norm": 2.499476813385658, "learning_rate": 6.948134476324825e-06, "loss": 0.9937, "step": 10800 }, { "epoch": 0.3914682323946214, "grad_norm": 2.2746600042607783, "learning_rate": 6.947593917868774e-06, "loss": 0.9937, "step": 10801 }, { "epoch": 0.3915044760972781, "grad_norm": 2.481382155462504, "learning_rate": 6.947053332576057e-06, "loss": 1.006, "step": 10802 }, { "epoch": 0.39154071979993477, "grad_norm": 2.340664818651785, "learning_rate": 6.946512720454121e-06, "loss": 0.8852, "step": 10803 }, { "epoch": 0.3915769635025914, "grad_norm": 2.5514946291541185, "learning_rate": 6.94597208151042e-06, "loss": 0.9355, "step": 10804 }, { "epoch": 0.3916132072052481, "grad_norm": 2.2358277261310047, "learning_rate": 6.945431415752398e-06, "loss": 1.1383, "step": 10805 }, { "epoch": 0.39164945090790476, "grad_norm": 2.0176243369242535, "learning_rate": 6.94489072318751e-06, "loss": 1.0057, "step": 10806 }, { "epoch": 0.3916856946105614, "grad_norm": 2.3998538588180107, "learning_rate": 6.9443500038232025e-06, "loss": 0.9726, "step": 10807 }, { "epoch": 0.39172193831321805, "grad_norm": 2.2160643397351283, "learning_rate": 6.9438092576669295e-06, "loss": 0.9256, "step": 10808 }, { "epoch": 0.39175818201587476, "grad_norm": 2.354645150991685, "learning_rate": 6.943268484726139e-06, "loss": 0.9968, "step": 10809 }, { "epoch": 0.3917944257185314, "grad_norm": 2.5338014056541796, "learning_rate": 6.942727685008285e-06, "loss": 0.913, "step": 10810 }, { "epoch": 0.39183066942118805, "grad_norm": 2.373502654487555, "learning_rate": 6.942186858520817e-06, "loss": 0.8955, "step": 10811 }, { "epoch": 0.39186691312384475, "grad_norm": 2.677678626040638, "learning_rate": 6.94164600527119e-06, "loss": 1.0436, "step": 10812 }, { "epoch": 0.3919031568265014, "grad_norm": 1.9945293631778795, "learning_rate": 6.941105125266854e-06, "loss": 0.9065, "step": 10813 }, { "epoch": 0.39193940052915804, "grad_norm": 2.240484931300408, "learning_rate": 6.940564218515264e-06, "loss": 1.0076, "step": 10814 }, { "epoch": 0.39197564423181475, "grad_norm": 2.508626884424313, "learning_rate": 6.940023285023872e-06, "loss": 0.8244, "step": 10815 }, { "epoch": 0.3920118879344714, "grad_norm": 2.3200596689127218, "learning_rate": 6.9394823248001345e-06, "loss": 1.0073, "step": 10816 }, { "epoch": 0.39204813163712804, "grad_norm": 2.221186737938487, "learning_rate": 6.938941337851503e-06, "loss": 0.9457, "step": 10817 }, { "epoch": 0.3920843753397847, "grad_norm": 2.519372117189392, "learning_rate": 6.938400324185431e-06, "loss": 0.8541, "step": 10818 }, { "epoch": 0.3921206190424414, "grad_norm": 2.5233782381089114, "learning_rate": 6.937859283809376e-06, "loss": 0.9941, "step": 10819 }, { "epoch": 0.39215686274509803, "grad_norm": 2.2888760311795586, "learning_rate": 6.937318216730792e-06, "loss": 0.9437, "step": 10820 }, { "epoch": 0.3921931064477547, "grad_norm": 2.5922588958210584, "learning_rate": 6.936777122957134e-06, "loss": 0.9086, "step": 10821 }, { "epoch": 0.3922293501504114, "grad_norm": 2.510026802409049, "learning_rate": 6.936236002495859e-06, "loss": 0.8249, "step": 10822 }, { "epoch": 0.39226559385306803, "grad_norm": 2.3094299245643612, "learning_rate": 6.9356948553544215e-06, "loss": 1.0161, "step": 10823 }, { "epoch": 0.3923018375557247, "grad_norm": 2.2686321782206345, "learning_rate": 6.935153681540281e-06, "loss": 0.7736, "step": 10824 }, { "epoch": 0.3923380812583814, "grad_norm": 1.9768286469773808, "learning_rate": 6.934612481060893e-06, "loss": 0.8911, "step": 10825 }, { "epoch": 0.392374324961038, "grad_norm": 2.143423314973858, "learning_rate": 6.934071253923716e-06, "loss": 0.9818, "step": 10826 }, { "epoch": 0.39241056866369467, "grad_norm": 2.614922282732292, "learning_rate": 6.933530000136205e-06, "loss": 0.9921, "step": 10827 }, { "epoch": 0.39244681236635137, "grad_norm": 2.1550545629826066, "learning_rate": 6.932988719705821e-06, "loss": 0.9428, "step": 10828 }, { "epoch": 0.392483056069008, "grad_norm": 2.196070015839747, "learning_rate": 6.932447412640021e-06, "loss": 0.8086, "step": 10829 }, { "epoch": 0.39251929977166466, "grad_norm": 2.4446731515165543, "learning_rate": 6.9319060789462644e-06, "loss": 0.8436, "step": 10830 }, { "epoch": 0.3925555434743213, "grad_norm": 2.152988560595529, "learning_rate": 6.93136471863201e-06, "loss": 1.0051, "step": 10831 }, { "epoch": 0.392591787176978, "grad_norm": 2.5720420978987875, "learning_rate": 6.930823331704719e-06, "loss": 0.9834, "step": 10832 }, { "epoch": 0.39262803087963466, "grad_norm": 2.3401331141149133, "learning_rate": 6.930281918171848e-06, "loss": 0.8285, "step": 10833 }, { "epoch": 0.3926642745822913, "grad_norm": 2.3644564326294084, "learning_rate": 6.929740478040861e-06, "loss": 0.9127, "step": 10834 }, { "epoch": 0.392700518284948, "grad_norm": 2.442607083818614, "learning_rate": 6.929199011319218e-06, "loss": 0.8417, "step": 10835 }, { "epoch": 0.39273676198760465, "grad_norm": 2.149990413331934, "learning_rate": 6.928657518014379e-06, "loss": 0.8521, "step": 10836 }, { "epoch": 0.3927730056902613, "grad_norm": 2.3576079750186327, "learning_rate": 6.928115998133806e-06, "loss": 0.7223, "step": 10837 }, { "epoch": 0.392809249392918, "grad_norm": 2.3552970706239575, "learning_rate": 6.927574451684959e-06, "loss": 0.8581, "step": 10838 }, { "epoch": 0.39284549309557465, "grad_norm": 2.5594208015562576, "learning_rate": 6.927032878675304e-06, "loss": 0.8181, "step": 10839 }, { "epoch": 0.3928817367982313, "grad_norm": 2.437726464981617, "learning_rate": 6.9264912791123e-06, "loss": 0.9917, "step": 10840 }, { "epoch": 0.392917980500888, "grad_norm": 2.5392629086404797, "learning_rate": 6.925949653003412e-06, "loss": 0.8986, "step": 10841 }, { "epoch": 0.39295422420354464, "grad_norm": 2.445829168080807, "learning_rate": 6.925408000356102e-06, "loss": 1.1697, "step": 10842 }, { "epoch": 0.3929904679062013, "grad_norm": 2.5365806545104257, "learning_rate": 6.924866321177834e-06, "loss": 0.9165, "step": 10843 }, { "epoch": 0.39302671160885794, "grad_norm": 2.2685616804665796, "learning_rate": 6.924324615476072e-06, "loss": 0.8364, "step": 10844 }, { "epoch": 0.39306295531151464, "grad_norm": 2.394158316388174, "learning_rate": 6.923782883258284e-06, "loss": 1.1712, "step": 10845 }, { "epoch": 0.3930991990141713, "grad_norm": 2.5370010451557237, "learning_rate": 6.923241124531928e-06, "loss": 0.9885, "step": 10846 }, { "epoch": 0.39313544271682793, "grad_norm": 2.223448387984958, "learning_rate": 6.922699339304473e-06, "loss": 0.8778, "step": 10847 }, { "epoch": 0.39317168641948463, "grad_norm": 2.3707427043350773, "learning_rate": 6.922157527583384e-06, "loss": 0.8666, "step": 10848 }, { "epoch": 0.3932079301221413, "grad_norm": 2.335163975167826, "learning_rate": 6.921615689376126e-06, "loss": 1.1204, "step": 10849 }, { "epoch": 0.3932441738247979, "grad_norm": 2.230464673070656, "learning_rate": 6.921073824690166e-06, "loss": 0.9612, "step": 10850 }, { "epoch": 0.39328041752745463, "grad_norm": 2.108888177943225, "learning_rate": 6.920531933532972e-06, "loss": 0.9121, "step": 10851 }, { "epoch": 0.3933166612301113, "grad_norm": 2.1805553795629096, "learning_rate": 6.9199900159120085e-06, "loss": 0.8776, "step": 10852 }, { "epoch": 0.3933529049327679, "grad_norm": 2.2463812610842515, "learning_rate": 6.919448071834745e-06, "loss": 0.9759, "step": 10853 }, { "epoch": 0.39338914863542457, "grad_norm": 2.4533916673442224, "learning_rate": 6.918906101308645e-06, "loss": 0.9376, "step": 10854 }, { "epoch": 0.39342539233808127, "grad_norm": 2.28289504387754, "learning_rate": 6.918364104341182e-06, "loss": 0.8037, "step": 10855 }, { "epoch": 0.3934616360407379, "grad_norm": 2.4260186580369516, "learning_rate": 6.917822080939821e-06, "loss": 0.9137, "step": 10856 }, { "epoch": 0.39349787974339456, "grad_norm": 2.2949085697441354, "learning_rate": 6.917280031112033e-06, "loss": 0.889, "step": 10857 }, { "epoch": 0.39353412344605126, "grad_norm": 2.0885479702981513, "learning_rate": 6.9167379548652836e-06, "loss": 0.8226, "step": 10858 }, { "epoch": 0.3935703671487079, "grad_norm": 2.4864414153991587, "learning_rate": 6.916195852207046e-06, "loss": 0.9633, "step": 10859 }, { "epoch": 0.39360661085136456, "grad_norm": 2.332497463639835, "learning_rate": 6.915653723144786e-06, "loss": 0.9839, "step": 10860 }, { "epoch": 0.39364285455402126, "grad_norm": 2.4524835041336566, "learning_rate": 6.915111567685979e-06, "loss": 0.919, "step": 10861 }, { "epoch": 0.3936790982566779, "grad_norm": 2.578820714956084, "learning_rate": 6.91456938583809e-06, "loss": 0.8792, "step": 10862 }, { "epoch": 0.39371534195933455, "grad_norm": 2.3228119441828636, "learning_rate": 6.914027177608596e-06, "loss": 0.8246, "step": 10863 }, { "epoch": 0.39375158566199125, "grad_norm": 2.335211875263958, "learning_rate": 6.913484943004962e-06, "loss": 0.8314, "step": 10864 }, { "epoch": 0.3937878293646479, "grad_norm": 2.8852495326388516, "learning_rate": 6.912942682034665e-06, "loss": 0.9397, "step": 10865 }, { "epoch": 0.39382407306730455, "grad_norm": 2.2513888401227136, "learning_rate": 6.912400394705174e-06, "loss": 0.7872, "step": 10866 }, { "epoch": 0.3938603167699612, "grad_norm": 2.208838622783403, "learning_rate": 6.911858081023963e-06, "loss": 0.8361, "step": 10867 }, { "epoch": 0.3938965604726179, "grad_norm": 2.1719318612047918, "learning_rate": 6.911315740998502e-06, "loss": 0.7815, "step": 10868 }, { "epoch": 0.39393280417527454, "grad_norm": 2.2558657716319948, "learning_rate": 6.9107733746362684e-06, "loss": 1.1066, "step": 10869 }, { "epoch": 0.3939690478779312, "grad_norm": 2.294952159237534, "learning_rate": 6.91023098194473e-06, "loss": 0.8164, "step": 10870 }, { "epoch": 0.3940052915805879, "grad_norm": 2.350097088047709, "learning_rate": 6.909688562931367e-06, "loss": 1.0088, "step": 10871 }, { "epoch": 0.39404153528324454, "grad_norm": 2.2175622926122025, "learning_rate": 6.909146117603649e-06, "loss": 0.9647, "step": 10872 }, { "epoch": 0.3940777789859012, "grad_norm": 2.387601343664302, "learning_rate": 6.908603645969052e-06, "loss": 0.8852, "step": 10873 }, { "epoch": 0.3941140226885579, "grad_norm": 2.07540018477944, "learning_rate": 6.908061148035051e-06, "loss": 0.9459, "step": 10874 }, { "epoch": 0.39415026639121453, "grad_norm": 2.781227662191225, "learning_rate": 6.907518623809123e-06, "loss": 1.1683, "step": 10875 }, { "epoch": 0.3941865100938712, "grad_norm": 2.0501075864632448, "learning_rate": 6.906976073298741e-06, "loss": 0.867, "step": 10876 }, { "epoch": 0.3942227537965279, "grad_norm": 2.3870415655779715, "learning_rate": 6.906433496511382e-06, "loss": 0.9626, "step": 10877 }, { "epoch": 0.3942589974991845, "grad_norm": 2.081139758897391, "learning_rate": 6.905890893454522e-06, "loss": 0.8404, "step": 10878 }, { "epoch": 0.3942952412018412, "grad_norm": 2.480403617459647, "learning_rate": 6.905348264135638e-06, "loss": 1.1253, "step": 10879 }, { "epoch": 0.3943314849044978, "grad_norm": 2.2519881201065766, "learning_rate": 6.904805608562208e-06, "loss": 0.9055, "step": 10880 }, { "epoch": 0.3943677286071545, "grad_norm": 2.457363157829686, "learning_rate": 6.904262926741709e-06, "loss": 0.7587, "step": 10881 }, { "epoch": 0.39440397230981117, "grad_norm": 2.332627261090628, "learning_rate": 6.9037202186816175e-06, "loss": 1.0274, "step": 10882 }, { "epoch": 0.3944402160124678, "grad_norm": 2.155471724448855, "learning_rate": 6.9031774843894135e-06, "loss": 0.8841, "step": 10883 }, { "epoch": 0.3944764597151245, "grad_norm": 2.1400533728712854, "learning_rate": 6.902634723872573e-06, "loss": 0.8574, "step": 10884 }, { "epoch": 0.39451270341778116, "grad_norm": 2.4597656350101174, "learning_rate": 6.902091937138578e-06, "loss": 1.0975, "step": 10885 }, { "epoch": 0.3945489471204378, "grad_norm": 2.409141427763306, "learning_rate": 6.901549124194908e-06, "loss": 1.028, "step": 10886 }, { "epoch": 0.3945851908230945, "grad_norm": 1.9700578480707016, "learning_rate": 6.90100628504904e-06, "loss": 0.8635, "step": 10887 }, { "epoch": 0.39462143452575116, "grad_norm": 2.2967696221291556, "learning_rate": 6.900463419708454e-06, "loss": 0.7677, "step": 10888 }, { "epoch": 0.3946576782284078, "grad_norm": 2.0368016794259436, "learning_rate": 6.899920528180633e-06, "loss": 0.8139, "step": 10889 }, { "epoch": 0.39469392193106445, "grad_norm": 2.4242719211660084, "learning_rate": 6.899377610473055e-06, "loss": 1.0183, "step": 10890 }, { "epoch": 0.39473016563372115, "grad_norm": 2.625047897834085, "learning_rate": 6.898834666593203e-06, "loss": 1.0431, "step": 10891 }, { "epoch": 0.3947664093363778, "grad_norm": 2.451149538144146, "learning_rate": 6.8982916965485555e-06, "loss": 0.7549, "step": 10892 }, { "epoch": 0.39480265303903445, "grad_norm": 2.3239244728771378, "learning_rate": 6.897748700346599e-06, "loss": 0.7563, "step": 10893 }, { "epoch": 0.39483889674169115, "grad_norm": 2.4054999113484823, "learning_rate": 6.897205677994811e-06, "loss": 1.071, "step": 10894 }, { "epoch": 0.3948751404443478, "grad_norm": 2.5981689177905105, "learning_rate": 6.896662629500679e-06, "loss": 0.9139, "step": 10895 }, { "epoch": 0.39491138414700444, "grad_norm": 2.2857753612810914, "learning_rate": 6.896119554871681e-06, "loss": 0.7926, "step": 10896 }, { "epoch": 0.39494762784966114, "grad_norm": 2.4673762814987557, "learning_rate": 6.895576454115303e-06, "loss": 1.1122, "step": 10897 }, { "epoch": 0.3949838715523178, "grad_norm": 2.4670162081492597, "learning_rate": 6.895033327239028e-06, "loss": 1.0228, "step": 10898 }, { "epoch": 0.39502011525497444, "grad_norm": 2.482883678416642, "learning_rate": 6.894490174250339e-06, "loss": 1.1006, "step": 10899 }, { "epoch": 0.39505635895763114, "grad_norm": 2.38489582228644, "learning_rate": 6.893946995156722e-06, "loss": 0.9967, "step": 10900 }, { "epoch": 0.3950926026602878, "grad_norm": 2.4955817083944667, "learning_rate": 6.89340378996566e-06, "loss": 0.9755, "step": 10901 }, { "epoch": 0.39512884636294443, "grad_norm": 2.269275370831288, "learning_rate": 6.892860558684639e-06, "loss": 1.0026, "step": 10902 }, { "epoch": 0.3951650900656011, "grad_norm": 2.340942147104467, "learning_rate": 6.892317301321145e-06, "loss": 0.9427, "step": 10903 }, { "epoch": 0.3952013337682578, "grad_norm": 2.5215703372043676, "learning_rate": 6.891774017882663e-06, "loss": 0.9153, "step": 10904 }, { "epoch": 0.3952375774709144, "grad_norm": 2.174430402613595, "learning_rate": 6.891230708376679e-06, "loss": 1.0297, "step": 10905 }, { "epoch": 0.39527382117357107, "grad_norm": 2.3040585040025934, "learning_rate": 6.8906873728106794e-06, "loss": 0.9895, "step": 10906 }, { "epoch": 0.3953100648762278, "grad_norm": 2.078051324602941, "learning_rate": 6.890144011192151e-06, "loss": 0.7896, "step": 10907 }, { "epoch": 0.3953463085788844, "grad_norm": 2.3538454137302383, "learning_rate": 6.88960062352858e-06, "loss": 1.0524, "step": 10908 }, { "epoch": 0.39538255228154107, "grad_norm": 2.1528844793420685, "learning_rate": 6.889057209827458e-06, "loss": 0.7844, "step": 10909 }, { "epoch": 0.39541879598419777, "grad_norm": 2.3151201082174184, "learning_rate": 6.888513770096268e-06, "loss": 0.8629, "step": 10910 }, { "epoch": 0.3954550396868544, "grad_norm": 2.2345195185288333, "learning_rate": 6.887970304342502e-06, "loss": 0.9211, "step": 10911 }, { "epoch": 0.39549128338951106, "grad_norm": 2.400213380442586, "learning_rate": 6.887426812573645e-06, "loss": 1.147, "step": 10912 }, { "epoch": 0.39552752709216776, "grad_norm": 2.0321992162113465, "learning_rate": 6.88688329479719e-06, "loss": 0.8526, "step": 10913 }, { "epoch": 0.3955637707948244, "grad_norm": 2.299386077307694, "learning_rate": 6.886339751020624e-06, "loss": 0.8417, "step": 10914 }, { "epoch": 0.39560001449748106, "grad_norm": 2.122321720073351, "learning_rate": 6.8857961812514365e-06, "loss": 0.9059, "step": 10915 }, { "epoch": 0.3956362582001377, "grad_norm": 2.0422831504265084, "learning_rate": 6.885252585497117e-06, "loss": 0.9764, "step": 10916 }, { "epoch": 0.3956725019027944, "grad_norm": 1.9885232709184455, "learning_rate": 6.884708963765159e-06, "loss": 1.0111, "step": 10917 }, { "epoch": 0.39570874560545105, "grad_norm": 1.8448638511635362, "learning_rate": 6.884165316063051e-06, "loss": 0.8507, "step": 10918 }, { "epoch": 0.3957449893081077, "grad_norm": 2.3284505646998683, "learning_rate": 6.8836216423982825e-06, "loss": 0.7698, "step": 10919 }, { "epoch": 0.3957812330107644, "grad_norm": 2.208530079524033, "learning_rate": 6.883077942778349e-06, "loss": 0.7761, "step": 10920 }, { "epoch": 0.39581747671342105, "grad_norm": 2.4216164091536, "learning_rate": 6.882534217210739e-06, "loss": 0.8702, "step": 10921 }, { "epoch": 0.3958537204160777, "grad_norm": 2.1820729485289747, "learning_rate": 6.8819904657029475e-06, "loss": 1.0118, "step": 10922 }, { "epoch": 0.3958899641187344, "grad_norm": 2.3674216483196173, "learning_rate": 6.881446688262466e-06, "loss": 0.9679, "step": 10923 }, { "epoch": 0.39592620782139104, "grad_norm": 2.216972638481608, "learning_rate": 6.8809028848967855e-06, "loss": 0.8708, "step": 10924 }, { "epoch": 0.3959624515240477, "grad_norm": 2.3068269133256645, "learning_rate": 6.8803590556134005e-06, "loss": 0.8864, "step": 10925 }, { "epoch": 0.39599869522670433, "grad_norm": 2.2718324342351273, "learning_rate": 6.879815200419806e-06, "loss": 0.9953, "step": 10926 }, { "epoch": 0.39603493892936104, "grad_norm": 2.315262019037616, "learning_rate": 6.879271319323494e-06, "loss": 0.7224, "step": 10927 }, { "epoch": 0.3960711826320177, "grad_norm": 2.268279363945347, "learning_rate": 6.8787274123319605e-06, "loss": 0.9157, "step": 10928 }, { "epoch": 0.39610742633467433, "grad_norm": 2.2678311290814066, "learning_rate": 6.878183479452698e-06, "loss": 1.0697, "step": 10929 }, { "epoch": 0.39614367003733103, "grad_norm": 2.3230435257588504, "learning_rate": 6.877639520693204e-06, "loss": 1.0377, "step": 10930 }, { "epoch": 0.3961799137399877, "grad_norm": 2.48727537799559, "learning_rate": 6.877095536060973e-06, "loss": 0.8624, "step": 10931 }, { "epoch": 0.3962161574426443, "grad_norm": 2.2132208412898047, "learning_rate": 6.876551525563502e-06, "loss": 0.9924, "step": 10932 }, { "epoch": 0.396252401145301, "grad_norm": 2.3409930982934446, "learning_rate": 6.876007489208285e-06, "loss": 1.0552, "step": 10933 }, { "epoch": 0.39628864484795767, "grad_norm": 2.403769735204751, "learning_rate": 6.87546342700282e-06, "loss": 0.8366, "step": 10934 }, { "epoch": 0.3963248885506143, "grad_norm": 2.499028529950513, "learning_rate": 6.874919338954602e-06, "loss": 0.9614, "step": 10935 }, { "epoch": 0.396361132253271, "grad_norm": 2.1758964845650066, "learning_rate": 6.8743752250711304e-06, "loss": 0.8425, "step": 10936 }, { "epoch": 0.39639737595592767, "grad_norm": 2.2534563801480827, "learning_rate": 6.8738310853599e-06, "loss": 0.9131, "step": 10937 }, { "epoch": 0.3964336196585843, "grad_norm": 2.2664733865726494, "learning_rate": 6.873286919828412e-06, "loss": 0.762, "step": 10938 }, { "epoch": 0.39646986336124096, "grad_norm": 2.365413575684831, "learning_rate": 6.872742728484163e-06, "loss": 1.1177, "step": 10939 }, { "epoch": 0.39650610706389766, "grad_norm": 2.6712726456711247, "learning_rate": 6.872198511334651e-06, "loss": 0.923, "step": 10940 }, { "epoch": 0.3965423507665543, "grad_norm": 2.5582601398177363, "learning_rate": 6.8716542683873765e-06, "loss": 0.8772, "step": 10941 }, { "epoch": 0.39657859446921095, "grad_norm": 2.2898157689971184, "learning_rate": 6.871109999649839e-06, "loss": 0.9741, "step": 10942 }, { "epoch": 0.39661483817186766, "grad_norm": 2.019869196232518, "learning_rate": 6.870565705129536e-06, "loss": 0.8178, "step": 10943 }, { "epoch": 0.3966510818745243, "grad_norm": 2.238305398327065, "learning_rate": 6.87002138483397e-06, "loss": 0.9392, "step": 10944 }, { "epoch": 0.39668732557718095, "grad_norm": 2.27165395697009, "learning_rate": 6.869477038770639e-06, "loss": 0.8977, "step": 10945 }, { "epoch": 0.39672356927983765, "grad_norm": 2.416833373618867, "learning_rate": 6.868932666947046e-06, "loss": 1.0218, "step": 10946 }, { "epoch": 0.3967598129824943, "grad_norm": 2.527192657791892, "learning_rate": 6.8683882693706894e-06, "loss": 0.9277, "step": 10947 }, { "epoch": 0.39679605668515094, "grad_norm": 2.3668165200234306, "learning_rate": 6.867843846049075e-06, "loss": 1.0594, "step": 10948 }, { "epoch": 0.39683230038780765, "grad_norm": 2.5982389828522923, "learning_rate": 6.8672993969897e-06, "loss": 0.8356, "step": 10949 }, { "epoch": 0.3968685440904643, "grad_norm": 2.1838174927280156, "learning_rate": 6.86675492220007e-06, "loss": 0.8774, "step": 10950 }, { "epoch": 0.39690478779312094, "grad_norm": 2.480880067072675, "learning_rate": 6.866210421687685e-06, "loss": 0.8089, "step": 10951 }, { "epoch": 0.3969410314957776, "grad_norm": 2.596210153566536, "learning_rate": 6.86566589546005e-06, "loss": 1.015, "step": 10952 }, { "epoch": 0.3969772751984343, "grad_norm": 2.3847351243984725, "learning_rate": 6.865121343524666e-06, "loss": 1.0155, "step": 10953 }, { "epoch": 0.39701351890109093, "grad_norm": 2.367817102551522, "learning_rate": 6.86457676588904e-06, "loss": 0.8806, "step": 10954 }, { "epoch": 0.3970497626037476, "grad_norm": 2.316975566236424, "learning_rate": 6.864032162560672e-06, "loss": 0.9446, "step": 10955 }, { "epoch": 0.3970860063064043, "grad_norm": 2.585103837415337, "learning_rate": 6.863487533547069e-06, "loss": 0.856, "step": 10956 }, { "epoch": 0.39712225000906093, "grad_norm": 2.4801454041520543, "learning_rate": 6.8629428788557335e-06, "loss": 1.025, "step": 10957 }, { "epoch": 0.3971584937117176, "grad_norm": 2.2712153325676274, "learning_rate": 6.862398198494174e-06, "loss": 0.9983, "step": 10958 }, { "epoch": 0.3971947374143743, "grad_norm": 2.420017387027269, "learning_rate": 6.861853492469891e-06, "loss": 0.8673, "step": 10959 }, { "epoch": 0.3972309811170309, "grad_norm": 2.3165741180996635, "learning_rate": 6.861308760790395e-06, "loss": 0.9673, "step": 10960 }, { "epoch": 0.39726722481968757, "grad_norm": 2.518260634984202, "learning_rate": 6.8607640034631885e-06, "loss": 1.0718, "step": 10961 }, { "epoch": 0.3973034685223442, "grad_norm": 2.424604736768565, "learning_rate": 6.860219220495781e-06, "loss": 0.9361, "step": 10962 }, { "epoch": 0.3973397122250009, "grad_norm": 2.1675953575641835, "learning_rate": 6.859674411895677e-06, "loss": 0.864, "step": 10963 }, { "epoch": 0.39737595592765756, "grad_norm": 2.337079065217515, "learning_rate": 6.859129577670384e-06, "loss": 0.9271, "step": 10964 }, { "epoch": 0.3974121996303142, "grad_norm": 2.53184636901227, "learning_rate": 6.85858471782741e-06, "loss": 1.0212, "step": 10965 }, { "epoch": 0.3974484433329709, "grad_norm": 2.3236784119837557, "learning_rate": 6.858039832374262e-06, "loss": 0.9682, "step": 10966 }, { "epoch": 0.39748468703562756, "grad_norm": 2.5499425526673853, "learning_rate": 6.857494921318449e-06, "loss": 0.9954, "step": 10967 }, { "epoch": 0.3975209307382842, "grad_norm": 2.7089119547102474, "learning_rate": 6.85694998466748e-06, "loss": 0.9904, "step": 10968 }, { "epoch": 0.3975571744409409, "grad_norm": 2.8449358849708064, "learning_rate": 6.856405022428863e-06, "loss": 1.0201, "step": 10969 }, { "epoch": 0.39759341814359755, "grad_norm": 2.4376250439925484, "learning_rate": 6.855860034610108e-06, "loss": 0.9946, "step": 10970 }, { "epoch": 0.3976296618462542, "grad_norm": 2.3869938941774387, "learning_rate": 6.855315021218722e-06, "loss": 0.9336, "step": 10971 }, { "epoch": 0.3976659055489109, "grad_norm": 2.2012263613050034, "learning_rate": 6.854769982262219e-06, "loss": 0.9186, "step": 10972 }, { "epoch": 0.39770214925156755, "grad_norm": 2.4482831823242637, "learning_rate": 6.854224917748108e-06, "loss": 0.9113, "step": 10973 }, { "epoch": 0.3977383929542242, "grad_norm": 2.021918839920301, "learning_rate": 6.8536798276838985e-06, "loss": 0.7642, "step": 10974 }, { "epoch": 0.39777463665688084, "grad_norm": 2.0910336880266147, "learning_rate": 6.8531347120771005e-06, "loss": 0.8487, "step": 10975 }, { "epoch": 0.39781088035953754, "grad_norm": 2.1261886479881373, "learning_rate": 6.852589570935229e-06, "loss": 0.8196, "step": 10976 }, { "epoch": 0.3978471240621942, "grad_norm": 2.207653548184659, "learning_rate": 6.852044404265792e-06, "loss": 1.0066, "step": 10977 }, { "epoch": 0.39788336776485084, "grad_norm": 2.1272032675784445, "learning_rate": 6.851499212076305e-06, "loss": 0.915, "step": 10978 }, { "epoch": 0.39791961146750754, "grad_norm": 2.6314693253226786, "learning_rate": 6.850953994374277e-06, "loss": 1.0741, "step": 10979 }, { "epoch": 0.3979558551701642, "grad_norm": 2.2392144448631206, "learning_rate": 6.850408751167223e-06, "loss": 0.9426, "step": 10980 }, { "epoch": 0.39799209887282083, "grad_norm": 2.5762883622494983, "learning_rate": 6.849863482462654e-06, "loss": 1.0481, "step": 10981 }, { "epoch": 0.39802834257547753, "grad_norm": 2.367306586567015, "learning_rate": 6.849318188268087e-06, "loss": 0.9636, "step": 10982 }, { "epoch": 0.3980645862781342, "grad_norm": 2.4107392382249753, "learning_rate": 6.848772868591034e-06, "loss": 1.0869, "step": 10983 }, { "epoch": 0.3981008299807908, "grad_norm": 2.3342424323618163, "learning_rate": 6.848227523439009e-06, "loss": 1.002, "step": 10984 }, { "epoch": 0.3981370736834475, "grad_norm": 2.307640844078189, "learning_rate": 6.847682152819524e-06, "loss": 0.857, "step": 10985 }, { "epoch": 0.3981733173861042, "grad_norm": 2.436630150695072, "learning_rate": 6.847136756740098e-06, "loss": 0.954, "step": 10986 }, { "epoch": 0.3982095610887608, "grad_norm": 2.1340808707073085, "learning_rate": 6.846591335208243e-06, "loss": 0.8995, "step": 10987 }, { "epoch": 0.39824580479141747, "grad_norm": 2.001397130374925, "learning_rate": 6.8460458882314795e-06, "loss": 0.6883, "step": 10988 }, { "epoch": 0.39828204849407417, "grad_norm": 2.3097055184345474, "learning_rate": 6.845500415817318e-06, "loss": 0.767, "step": 10989 }, { "epoch": 0.3983182921967308, "grad_norm": 2.4082302839267355, "learning_rate": 6.844954917973277e-06, "loss": 0.9169, "step": 10990 }, { "epoch": 0.39835453589938746, "grad_norm": 2.520727371297318, "learning_rate": 6.844409394706874e-06, "loss": 1.0077, "step": 10991 }, { "epoch": 0.39839077960204416, "grad_norm": 2.3310876804551244, "learning_rate": 6.843863846025625e-06, "loss": 0.7443, "step": 10992 }, { "epoch": 0.3984270233047008, "grad_norm": 2.0461324713808167, "learning_rate": 6.843318271937045e-06, "loss": 0.6799, "step": 10993 }, { "epoch": 0.39846326700735746, "grad_norm": 2.4090677959318754, "learning_rate": 6.842772672448656e-06, "loss": 0.9477, "step": 10994 }, { "epoch": 0.39849951071001416, "grad_norm": 2.440623981778712, "learning_rate": 6.842227047567973e-06, "loss": 0.8039, "step": 10995 }, { "epoch": 0.3985357544126708, "grad_norm": 2.2536535382008753, "learning_rate": 6.841681397302517e-06, "loss": 0.8812, "step": 10996 }, { "epoch": 0.39857199811532745, "grad_norm": 2.4434568622934187, "learning_rate": 6.841135721659804e-06, "loss": 0.9535, "step": 10997 }, { "epoch": 0.3986082418179841, "grad_norm": 2.5453206409195457, "learning_rate": 6.840590020647353e-06, "loss": 1.0909, "step": 10998 }, { "epoch": 0.3986444855206408, "grad_norm": 2.3567526515914965, "learning_rate": 6.8400442942726875e-06, "loss": 0.7185, "step": 10999 }, { "epoch": 0.39868072922329745, "grad_norm": 2.3891228248545113, "learning_rate": 6.839498542543324e-06, "loss": 0.8367, "step": 11000 }, { "epoch": 0.3987169729259541, "grad_norm": 2.5558056118350505, "learning_rate": 6.838952765466782e-06, "loss": 1.0018, "step": 11001 }, { "epoch": 0.3987532166286108, "grad_norm": 2.2690867988019106, "learning_rate": 6.838406963050583e-06, "loss": 0.8297, "step": 11002 }, { "epoch": 0.39878946033126744, "grad_norm": 2.4376252983648437, "learning_rate": 6.837861135302248e-06, "loss": 0.998, "step": 11003 }, { "epoch": 0.3988257040339241, "grad_norm": 2.181095973818615, "learning_rate": 6.837315282229298e-06, "loss": 0.8138, "step": 11004 }, { "epoch": 0.3988619477365808, "grad_norm": 2.6056537802901434, "learning_rate": 6.836769403839254e-06, "loss": 0.8827, "step": 11005 }, { "epoch": 0.39889819143923744, "grad_norm": 2.3497232291742574, "learning_rate": 6.836223500139639e-06, "loss": 0.9154, "step": 11006 }, { "epoch": 0.3989344351418941, "grad_norm": 2.1513059039714517, "learning_rate": 6.835677571137975e-06, "loss": 1.0454, "step": 11007 }, { "epoch": 0.3989706788445508, "grad_norm": 2.2247417438151804, "learning_rate": 6.835131616841783e-06, "loss": 0.8114, "step": 11008 }, { "epoch": 0.39900692254720743, "grad_norm": 2.9454137169666614, "learning_rate": 6.83458563725859e-06, "loss": 1.0732, "step": 11009 }, { "epoch": 0.3990431662498641, "grad_norm": 2.367214192204642, "learning_rate": 6.834039632395915e-06, "loss": 0.941, "step": 11010 }, { "epoch": 0.3990794099525207, "grad_norm": 2.3045651265565485, "learning_rate": 6.833493602261283e-06, "loss": 0.9029, "step": 11011 }, { "epoch": 0.3991156536551774, "grad_norm": 2.4490411823481204, "learning_rate": 6.8329475468622174e-06, "loss": 0.9013, "step": 11012 }, { "epoch": 0.3991518973578341, "grad_norm": 2.0745158124200507, "learning_rate": 6.832401466206244e-06, "loss": 0.9096, "step": 11013 }, { "epoch": 0.3991881410604907, "grad_norm": 2.106059956776265, "learning_rate": 6.831855360300885e-06, "loss": 0.9304, "step": 11014 }, { "epoch": 0.3992243847631474, "grad_norm": 2.6010348536302845, "learning_rate": 6.831309229153668e-06, "loss": 0.8599, "step": 11015 }, { "epoch": 0.39926062846580407, "grad_norm": 2.2108476272558235, "learning_rate": 6.830763072772118e-06, "loss": 0.9845, "step": 11016 }, { "epoch": 0.3992968721684607, "grad_norm": 2.254133704094943, "learning_rate": 6.83021689116376e-06, "loss": 0.869, "step": 11017 }, { "epoch": 0.3993331158711174, "grad_norm": 2.2251723671889243, "learning_rate": 6.82967068433612e-06, "loss": 1.0038, "step": 11018 }, { "epoch": 0.39936935957377406, "grad_norm": 2.256978115557974, "learning_rate": 6.829124452296724e-06, "loss": 0.9299, "step": 11019 }, { "epoch": 0.3994056032764307, "grad_norm": 2.423616119556558, "learning_rate": 6.8285781950531e-06, "loss": 0.9316, "step": 11020 }, { "epoch": 0.39944184697908736, "grad_norm": 2.478386890555477, "learning_rate": 6.8280319126127735e-06, "loss": 0.9914, "step": 11021 }, { "epoch": 0.39947809068174406, "grad_norm": 2.366824055746758, "learning_rate": 6.827485604983272e-06, "loss": 1.0124, "step": 11022 }, { "epoch": 0.3995143343844007, "grad_norm": 2.1333317783111587, "learning_rate": 6.826939272172126e-06, "loss": 0.9582, "step": 11023 }, { "epoch": 0.39955057808705735, "grad_norm": 2.335966025894045, "learning_rate": 6.826392914186861e-06, "loss": 0.8874, "step": 11024 }, { "epoch": 0.39958682178971405, "grad_norm": 2.082007672855245, "learning_rate": 6.8258465310350065e-06, "loss": 1.0428, "step": 11025 }, { "epoch": 0.3996230654923707, "grad_norm": 2.044108767447435, "learning_rate": 6.82530012272409e-06, "loss": 0.7173, "step": 11026 }, { "epoch": 0.39965930919502735, "grad_norm": 2.3624373466741653, "learning_rate": 6.824753689261643e-06, "loss": 0.9545, "step": 11027 }, { "epoch": 0.39969555289768405, "grad_norm": 2.485143514764006, "learning_rate": 6.8242072306551935e-06, "loss": 0.8247, "step": 11028 }, { "epoch": 0.3997317966003407, "grad_norm": 2.575639553181671, "learning_rate": 6.823660746912271e-06, "loss": 0.8499, "step": 11029 }, { "epoch": 0.39976804030299734, "grad_norm": 2.2062129954211396, "learning_rate": 6.823114238040407e-06, "loss": 1.0472, "step": 11030 }, { "epoch": 0.39980428400565404, "grad_norm": 2.299846540485188, "learning_rate": 6.822567704047132e-06, "loss": 0.8935, "step": 11031 }, { "epoch": 0.3998405277083107, "grad_norm": 2.005163660615684, "learning_rate": 6.822021144939974e-06, "loss": 0.7544, "step": 11032 }, { "epoch": 0.39987677141096734, "grad_norm": 2.3996220564482402, "learning_rate": 6.821474560726469e-06, "loss": 1.0385, "step": 11033 }, { "epoch": 0.399913015113624, "grad_norm": 2.5150318441981634, "learning_rate": 6.820927951414144e-06, "loss": 0.9358, "step": 11034 }, { "epoch": 0.3999492588162807, "grad_norm": 2.270872765570616, "learning_rate": 6.8203813170105335e-06, "loss": 0.8913, "step": 11035 }, { "epoch": 0.39998550251893733, "grad_norm": 2.0740072623072896, "learning_rate": 6.81983465752317e-06, "loss": 0.7859, "step": 11036 }, { "epoch": 0.400021746221594, "grad_norm": 2.462901990377833, "learning_rate": 6.819287972959585e-06, "loss": 0.9023, "step": 11037 }, { "epoch": 0.4000579899242507, "grad_norm": 2.7547044113333845, "learning_rate": 6.818741263327313e-06, "loss": 1.009, "step": 11038 }, { "epoch": 0.4000942336269073, "grad_norm": 2.3048252611466555, "learning_rate": 6.818194528633885e-06, "loss": 0.7383, "step": 11039 }, { "epoch": 0.40013047732956397, "grad_norm": 2.735706980800754, "learning_rate": 6.817647768886838e-06, "loss": 1.1108, "step": 11040 }, { "epoch": 0.4001667210322207, "grad_norm": 2.394405543235659, "learning_rate": 6.817100984093703e-06, "loss": 1.0017, "step": 11041 }, { "epoch": 0.4002029647348773, "grad_norm": 2.627277785496987, "learning_rate": 6.816554174262015e-06, "loss": 1.0901, "step": 11042 }, { "epoch": 0.40023920843753397, "grad_norm": 2.519295832952525, "learning_rate": 6.81600733939931e-06, "loss": 0.9558, "step": 11043 }, { "epoch": 0.40027545214019067, "grad_norm": 2.504881040009707, "learning_rate": 6.81546047951312e-06, "loss": 1.006, "step": 11044 }, { "epoch": 0.4003116958428473, "grad_norm": 2.645781870174243, "learning_rate": 6.814913594610985e-06, "loss": 1.001, "step": 11045 }, { "epoch": 0.40034793954550396, "grad_norm": 2.550056478101702, "learning_rate": 6.814366684700436e-06, "loss": 0.8685, "step": 11046 }, { "epoch": 0.4003841832481606, "grad_norm": 2.343109779585032, "learning_rate": 6.813819749789013e-06, "loss": 0.9472, "step": 11047 }, { "epoch": 0.4004204269508173, "grad_norm": 2.367233637249802, "learning_rate": 6.813272789884251e-06, "loss": 0.6298, "step": 11048 }, { "epoch": 0.40045667065347396, "grad_norm": 2.3479772453754806, "learning_rate": 6.8127258049936866e-06, "loss": 0.7839, "step": 11049 }, { "epoch": 0.4004929143561306, "grad_norm": 2.175600429368769, "learning_rate": 6.812178795124856e-06, "loss": 0.8382, "step": 11050 }, { "epoch": 0.4005291580587873, "grad_norm": 2.279065128744221, "learning_rate": 6.811631760285299e-06, "loss": 0.9546, "step": 11051 }, { "epoch": 0.40056540176144395, "grad_norm": 1.9446838014469052, "learning_rate": 6.8110847004825495e-06, "loss": 0.7249, "step": 11052 }, { "epoch": 0.4006016454641006, "grad_norm": 2.2603359829399765, "learning_rate": 6.810537615724151e-06, "loss": 0.9857, "step": 11053 }, { "epoch": 0.4006378891667573, "grad_norm": 2.0990336437181987, "learning_rate": 6.809990506017636e-06, "loss": 0.8624, "step": 11054 }, { "epoch": 0.40067413286941395, "grad_norm": 2.4999644753452417, "learning_rate": 6.80944337137055e-06, "loss": 1.1504, "step": 11055 }, { "epoch": 0.4007103765720706, "grad_norm": 2.1957155515238864, "learning_rate": 6.808896211790426e-06, "loss": 0.8922, "step": 11056 }, { "epoch": 0.40074662027472724, "grad_norm": 2.280055264500563, "learning_rate": 6.808349027284808e-06, "loss": 0.9797, "step": 11057 }, { "epoch": 0.40078286397738394, "grad_norm": 2.6753716327484747, "learning_rate": 6.807801817861231e-06, "loss": 0.9161, "step": 11058 }, { "epoch": 0.4008191076800406, "grad_norm": 2.637463383184655, "learning_rate": 6.807254583527241e-06, "loss": 1.0371, "step": 11059 }, { "epoch": 0.40085535138269723, "grad_norm": 2.2683044852870404, "learning_rate": 6.806707324290376e-06, "loss": 0.8398, "step": 11060 }, { "epoch": 0.40089159508535394, "grad_norm": 2.442864885806883, "learning_rate": 6.806160040158176e-06, "loss": 1.0801, "step": 11061 }, { "epoch": 0.4009278387880106, "grad_norm": 2.291578515674562, "learning_rate": 6.805612731138183e-06, "loss": 1.0461, "step": 11062 }, { "epoch": 0.40096408249066723, "grad_norm": 2.3618712783274676, "learning_rate": 6.805065397237939e-06, "loss": 0.939, "step": 11063 }, { "epoch": 0.40100032619332393, "grad_norm": 2.2744463429674298, "learning_rate": 6.804518038464983e-06, "loss": 0.8168, "step": 11064 }, { "epoch": 0.4010365698959806, "grad_norm": 2.5862423904075733, "learning_rate": 6.8039706548268635e-06, "loss": 0.9402, "step": 11065 }, { "epoch": 0.4010728135986372, "grad_norm": 2.3884965111370673, "learning_rate": 6.803423246331117e-06, "loss": 0.9767, "step": 11066 }, { "epoch": 0.4011090573012939, "grad_norm": 2.2572626213822176, "learning_rate": 6.802875812985289e-06, "loss": 0.9036, "step": 11067 }, { "epoch": 0.40114530100395057, "grad_norm": 2.4201566462845276, "learning_rate": 6.8023283547969236e-06, "loss": 1.0149, "step": 11068 }, { "epoch": 0.4011815447066072, "grad_norm": 2.2639507674116195, "learning_rate": 6.801780871773563e-06, "loss": 1.0099, "step": 11069 }, { "epoch": 0.40121778840926386, "grad_norm": 2.1557276238740153, "learning_rate": 6.801233363922752e-06, "loss": 0.8845, "step": 11070 }, { "epoch": 0.40125403211192057, "grad_norm": 2.503429544659608, "learning_rate": 6.800685831252034e-06, "loss": 0.9925, "step": 11071 }, { "epoch": 0.4012902758145772, "grad_norm": 2.17741454380584, "learning_rate": 6.800138273768955e-06, "loss": 1.0144, "step": 11072 }, { "epoch": 0.40132651951723386, "grad_norm": 2.317287194932693, "learning_rate": 6.799590691481058e-06, "loss": 0.9773, "step": 11073 }, { "epoch": 0.40136276321989056, "grad_norm": 2.429515227036484, "learning_rate": 6.799043084395891e-06, "loss": 1.0106, "step": 11074 }, { "epoch": 0.4013990069225472, "grad_norm": 2.205746820034339, "learning_rate": 6.7984954525209975e-06, "loss": 1.146, "step": 11075 }, { "epoch": 0.40143525062520385, "grad_norm": 2.327391483128241, "learning_rate": 6.797947795863924e-06, "loss": 0.7655, "step": 11076 }, { "epoch": 0.40147149432786056, "grad_norm": 2.2360718140596894, "learning_rate": 6.797400114432218e-06, "loss": 0.9302, "step": 11077 }, { "epoch": 0.4015077380305172, "grad_norm": 2.104576983131263, "learning_rate": 6.796852408233426e-06, "loss": 0.7216, "step": 11078 }, { "epoch": 0.40154398173317385, "grad_norm": 2.5283170373789368, "learning_rate": 6.796304677275093e-06, "loss": 0.9291, "step": 11079 }, { "epoch": 0.40158022543583055, "grad_norm": 2.305089776905212, "learning_rate": 6.795756921564769e-06, "loss": 0.9077, "step": 11080 }, { "epoch": 0.4016164691384872, "grad_norm": 2.346853319557705, "learning_rate": 6.79520914111e-06, "loss": 0.9862, "step": 11081 }, { "epoch": 0.40165271284114384, "grad_norm": 2.332354869589027, "learning_rate": 6.794661335918336e-06, "loss": 0.7742, "step": 11082 }, { "epoch": 0.4016889565438005, "grad_norm": 2.466260022816151, "learning_rate": 6.794113505997322e-06, "loss": 0.8156, "step": 11083 }, { "epoch": 0.4017252002464572, "grad_norm": 2.014917882557823, "learning_rate": 6.793565651354511e-06, "loss": 0.7865, "step": 11084 }, { "epoch": 0.40176144394911384, "grad_norm": 2.301337570121103, "learning_rate": 6.793017771997448e-06, "loss": 0.8911, "step": 11085 }, { "epoch": 0.4017976876517705, "grad_norm": 2.2493904604616644, "learning_rate": 6.792469867933687e-06, "loss": 0.8974, "step": 11086 }, { "epoch": 0.4018339313544272, "grad_norm": 2.2291327892803885, "learning_rate": 6.791921939170775e-06, "loss": 1.0917, "step": 11087 }, { "epoch": 0.40187017505708383, "grad_norm": 2.1778424888405006, "learning_rate": 6.791373985716261e-06, "loss": 1.069, "step": 11088 }, { "epoch": 0.4019064187597405, "grad_norm": 2.2638261467997443, "learning_rate": 6.790826007577696e-06, "loss": 1.0652, "step": 11089 }, { "epoch": 0.4019426624623972, "grad_norm": 2.2416714104557305, "learning_rate": 6.790278004762634e-06, "loss": 0.7957, "step": 11090 }, { "epoch": 0.40197890616505383, "grad_norm": 2.42251398175287, "learning_rate": 6.789729977278623e-06, "loss": 0.8947, "step": 11091 }, { "epoch": 0.4020151498677105, "grad_norm": 2.4955789185618507, "learning_rate": 6.789181925133215e-06, "loss": 0.8903, "step": 11092 }, { "epoch": 0.4020513935703671, "grad_norm": 2.385855275079201, "learning_rate": 6.788633848333961e-06, "loss": 1.023, "step": 11093 }, { "epoch": 0.4020876372730238, "grad_norm": 2.479287239788899, "learning_rate": 6.7880857468884165e-06, "loss": 1.0382, "step": 11094 }, { "epoch": 0.40212388097568047, "grad_norm": 2.2358143957347325, "learning_rate": 6.78753762080413e-06, "loss": 0.8049, "step": 11095 }, { "epoch": 0.4021601246783371, "grad_norm": 2.2770677130990804, "learning_rate": 6.786989470088658e-06, "loss": 0.8286, "step": 11096 }, { "epoch": 0.4021963683809938, "grad_norm": 2.292966593574228, "learning_rate": 6.7864412947495505e-06, "loss": 0.7962, "step": 11097 }, { "epoch": 0.40223261208365046, "grad_norm": 2.172663315338694, "learning_rate": 6.785893094794364e-06, "loss": 0.9058, "step": 11098 }, { "epoch": 0.4022688557863071, "grad_norm": 2.344920947489749, "learning_rate": 6.785344870230648e-06, "loss": 0.924, "step": 11099 }, { "epoch": 0.4023050994889638, "grad_norm": 2.2887553777617002, "learning_rate": 6.784796621065961e-06, "loss": 0.8791, "step": 11100 }, { "epoch": 0.40234134319162046, "grad_norm": 2.7713885665382034, "learning_rate": 6.784248347307856e-06, "loss": 0.7819, "step": 11101 }, { "epoch": 0.4023775868942771, "grad_norm": 2.6051157364894606, "learning_rate": 6.783700048963888e-06, "loss": 0.9102, "step": 11102 }, { "epoch": 0.4024138305969338, "grad_norm": 1.9711782330292427, "learning_rate": 6.7831517260416115e-06, "loss": 0.8661, "step": 11103 }, { "epoch": 0.40245007429959045, "grad_norm": 2.6242425663379656, "learning_rate": 6.782603378548582e-06, "loss": 0.9905, "step": 11104 }, { "epoch": 0.4024863180022471, "grad_norm": 2.4765153937144055, "learning_rate": 6.782055006492358e-06, "loss": 1.1715, "step": 11105 }, { "epoch": 0.40252256170490375, "grad_norm": 2.288473777833228, "learning_rate": 6.781506609880492e-06, "loss": 0.9121, "step": 11106 }, { "epoch": 0.40255880540756045, "grad_norm": 2.3283600976233205, "learning_rate": 6.780958188720543e-06, "loss": 0.877, "step": 11107 }, { "epoch": 0.4025950491102171, "grad_norm": 2.5310406660356533, "learning_rate": 6.7804097430200685e-06, "loss": 0.8137, "step": 11108 }, { "epoch": 0.40263129281287374, "grad_norm": 2.04570094324717, "learning_rate": 6.779861272786622e-06, "loss": 0.8583, "step": 11109 }, { "epoch": 0.40266753651553044, "grad_norm": 2.2571685345743506, "learning_rate": 6.779312778027766e-06, "loss": 0.8226, "step": 11110 }, { "epoch": 0.4027037802181871, "grad_norm": 2.415097011507808, "learning_rate": 6.778764258751054e-06, "loss": 0.9655, "step": 11111 }, { "epoch": 0.40274002392084374, "grad_norm": 2.2629716584790573, "learning_rate": 6.778215714964047e-06, "loss": 0.8675, "step": 11112 }, { "epoch": 0.40277626762350044, "grad_norm": 2.362043894236166, "learning_rate": 6.777667146674303e-06, "loss": 0.9761, "step": 11113 }, { "epoch": 0.4028125113261571, "grad_norm": 2.3885006710249934, "learning_rate": 6.777118553889381e-06, "loss": 0.9808, "step": 11114 }, { "epoch": 0.40284875502881373, "grad_norm": 2.195218376847254, "learning_rate": 6.776569936616839e-06, "loss": 0.9007, "step": 11115 }, { "epoch": 0.40288499873147043, "grad_norm": 2.3899525918631515, "learning_rate": 6.776021294864239e-06, "loss": 0.9535, "step": 11116 }, { "epoch": 0.4029212424341271, "grad_norm": 2.220367554986487, "learning_rate": 6.7754726286391406e-06, "loss": 0.8357, "step": 11117 }, { "epoch": 0.4029574861367837, "grad_norm": 2.2179816978219025, "learning_rate": 6.774923937949101e-06, "loss": 0.8894, "step": 11118 }, { "epoch": 0.4029937298394404, "grad_norm": 2.9065222619231275, "learning_rate": 6.774375222801684e-06, "loss": 0.9231, "step": 11119 }, { "epoch": 0.4030299735420971, "grad_norm": 2.896573988228361, "learning_rate": 6.77382648320445e-06, "loss": 0.9422, "step": 11120 }, { "epoch": 0.4030662172447537, "grad_norm": 2.381420228589286, "learning_rate": 6.773277719164958e-06, "loss": 0.8877, "step": 11121 }, { "epoch": 0.40310246094741037, "grad_norm": 2.455508072645727, "learning_rate": 6.772728930690773e-06, "loss": 1.1119, "step": 11122 }, { "epoch": 0.40313870465006707, "grad_norm": 2.2782818159094265, "learning_rate": 6.772180117789455e-06, "loss": 1.013, "step": 11123 }, { "epoch": 0.4031749483527237, "grad_norm": 2.381270714171748, "learning_rate": 6.771631280468567e-06, "loss": 0.9012, "step": 11124 }, { "epoch": 0.40321119205538036, "grad_norm": 2.517427329713549, "learning_rate": 6.771082418735671e-06, "loss": 0.9948, "step": 11125 }, { "epoch": 0.40324743575803706, "grad_norm": 2.0855144974858097, "learning_rate": 6.770533532598331e-06, "loss": 0.8466, "step": 11126 }, { "epoch": 0.4032836794606937, "grad_norm": 2.1983751140685106, "learning_rate": 6.769984622064109e-06, "loss": 0.794, "step": 11127 }, { "epoch": 0.40331992316335036, "grad_norm": 2.4552725070878965, "learning_rate": 6.7694356871405705e-06, "loss": 1.0581, "step": 11128 }, { "epoch": 0.403356166866007, "grad_norm": 2.2043447383792794, "learning_rate": 6.768886727835277e-06, "loss": 0.8301, "step": 11129 }, { "epoch": 0.4033924105686637, "grad_norm": 2.751750111276965, "learning_rate": 6.768337744155795e-06, "loss": 0.9353, "step": 11130 }, { "epoch": 0.40342865427132035, "grad_norm": 2.6465852842062327, "learning_rate": 6.767788736109689e-06, "loss": 0.9704, "step": 11131 }, { "epoch": 0.403464897973977, "grad_norm": 2.423555763438501, "learning_rate": 6.767239703704522e-06, "loss": 0.9711, "step": 11132 }, { "epoch": 0.4035011416766337, "grad_norm": 2.519275581127112, "learning_rate": 6.76669064694786e-06, "loss": 0.9124, "step": 11133 }, { "epoch": 0.40353738537929035, "grad_norm": 2.0584671842639803, "learning_rate": 6.76614156584727e-06, "loss": 0.8262, "step": 11134 }, { "epoch": 0.403573629081947, "grad_norm": 2.172169195333666, "learning_rate": 6.765592460410318e-06, "loss": 0.8359, "step": 11135 }, { "epoch": 0.4036098727846037, "grad_norm": 2.4874489056229723, "learning_rate": 6.76504333064457e-06, "loss": 0.8873, "step": 11136 }, { "epoch": 0.40364611648726034, "grad_norm": 2.225619052602902, "learning_rate": 6.764494176557592e-06, "loss": 0.9792, "step": 11137 }, { "epoch": 0.403682360189917, "grad_norm": 2.3621578671955663, "learning_rate": 6.763944998156952e-06, "loss": 0.8024, "step": 11138 }, { "epoch": 0.4037186038925737, "grad_norm": 2.124668668700625, "learning_rate": 6.763395795450215e-06, "loss": 0.8302, "step": 11139 }, { "epoch": 0.40375484759523034, "grad_norm": 2.208323861132626, "learning_rate": 6.762846568444952e-06, "loss": 0.7533, "step": 11140 }, { "epoch": 0.403791091297887, "grad_norm": 2.2924060475866335, "learning_rate": 6.762297317148728e-06, "loss": 0.9107, "step": 11141 }, { "epoch": 0.40382733500054363, "grad_norm": 2.644514506296417, "learning_rate": 6.761748041569114e-06, "loss": 1.0164, "step": 11142 }, { "epoch": 0.40386357870320033, "grad_norm": 2.1125124761657976, "learning_rate": 6.761198741713677e-06, "loss": 0.9044, "step": 11143 }, { "epoch": 0.403899822405857, "grad_norm": 2.1397480489553895, "learning_rate": 6.7606494175899865e-06, "loss": 1.064, "step": 11144 }, { "epoch": 0.4039360661085136, "grad_norm": 2.326338095848792, "learning_rate": 6.760100069205612e-06, "loss": 1.0405, "step": 11145 }, { "epoch": 0.4039723098111703, "grad_norm": 2.5011308601682463, "learning_rate": 6.759550696568122e-06, "loss": 0.9297, "step": 11146 }, { "epoch": 0.404008553513827, "grad_norm": 2.3914337509511903, "learning_rate": 6.759001299685089e-06, "loss": 0.9149, "step": 11147 }, { "epoch": 0.4040447972164836, "grad_norm": 2.4030083099656525, "learning_rate": 6.7584518785640815e-06, "loss": 0.8359, "step": 11148 }, { "epoch": 0.4040810409191403, "grad_norm": 2.5336027780388513, "learning_rate": 6.7579024332126704e-06, "loss": 0.979, "step": 11149 }, { "epoch": 0.40411728462179697, "grad_norm": 2.221568595728033, "learning_rate": 6.757352963638427e-06, "loss": 0.9185, "step": 11150 }, { "epoch": 0.4041535283244536, "grad_norm": 2.5212082326171656, "learning_rate": 6.756803469848923e-06, "loss": 1.0896, "step": 11151 }, { "epoch": 0.4041897720271103, "grad_norm": 2.188855656177408, "learning_rate": 6.756253951851729e-06, "loss": 0.871, "step": 11152 }, { "epoch": 0.40422601572976696, "grad_norm": 2.6456918712675543, "learning_rate": 6.7557044096544166e-06, "loss": 0.9651, "step": 11153 }, { "epoch": 0.4042622594324236, "grad_norm": 2.314973059779064, "learning_rate": 6.755154843264562e-06, "loss": 0.9441, "step": 11154 }, { "epoch": 0.40429850313508026, "grad_norm": 2.3047731085235603, "learning_rate": 6.754605252689732e-06, "loss": 0.9165, "step": 11155 }, { "epoch": 0.40433474683773696, "grad_norm": 2.230758140793964, "learning_rate": 6.7540556379375056e-06, "loss": 0.9711, "step": 11156 }, { "epoch": 0.4043709905403936, "grad_norm": 2.396687200686994, "learning_rate": 6.7535059990154525e-06, "loss": 0.9477, "step": 11157 }, { "epoch": 0.40440723424305025, "grad_norm": 2.1344988996682437, "learning_rate": 6.752956335931147e-06, "loss": 0.794, "step": 11158 }, { "epoch": 0.40444347794570695, "grad_norm": 2.248810539371154, "learning_rate": 6.752406648692163e-06, "loss": 0.7954, "step": 11159 }, { "epoch": 0.4044797216483636, "grad_norm": 2.3401602535495964, "learning_rate": 6.751856937306074e-06, "loss": 0.836, "step": 11160 }, { "epoch": 0.40451596535102025, "grad_norm": 2.449205078510816, "learning_rate": 6.751307201780458e-06, "loss": 1.059, "step": 11161 }, { "epoch": 0.40455220905367695, "grad_norm": 2.1373531718612013, "learning_rate": 6.750757442122886e-06, "loss": 1.0185, "step": 11162 }, { "epoch": 0.4045884527563336, "grad_norm": 2.5907543111365543, "learning_rate": 6.750207658340937e-06, "loss": 0.9872, "step": 11163 }, { "epoch": 0.40462469645899024, "grad_norm": 2.647007888241821, "learning_rate": 6.749657850442185e-06, "loss": 0.9615, "step": 11164 }, { "epoch": 0.4046609401616469, "grad_norm": 2.34984608453627, "learning_rate": 6.749108018434205e-06, "loss": 0.8598, "step": 11165 }, { "epoch": 0.4046971838643036, "grad_norm": 2.499309954780277, "learning_rate": 6.748558162324575e-06, "loss": 0.9043, "step": 11166 }, { "epoch": 0.40473342756696024, "grad_norm": 2.404457114191157, "learning_rate": 6.748008282120871e-06, "loss": 0.8811, "step": 11167 }, { "epoch": 0.4047696712696169, "grad_norm": 2.2076087863558387, "learning_rate": 6.747458377830669e-06, "loss": 0.8437, "step": 11168 }, { "epoch": 0.4048059149722736, "grad_norm": 1.9334525894043126, "learning_rate": 6.746908449461549e-06, "loss": 0.7845, "step": 11169 }, { "epoch": 0.40484215867493023, "grad_norm": 2.5103495535543114, "learning_rate": 6.746358497021086e-06, "loss": 1.0284, "step": 11170 }, { "epoch": 0.4048784023775869, "grad_norm": 2.4823939004531126, "learning_rate": 6.74580852051686e-06, "loss": 0.9099, "step": 11171 }, { "epoch": 0.4049146460802436, "grad_norm": 3.8137130791850558, "learning_rate": 6.745258519956447e-06, "loss": 0.8859, "step": 11172 }, { "epoch": 0.4049508897829002, "grad_norm": 2.414507581431384, "learning_rate": 6.744708495347428e-06, "loss": 1.0014, "step": 11173 }, { "epoch": 0.40498713348555687, "grad_norm": 2.2356579241330623, "learning_rate": 6.744158446697382e-06, "loss": 0.9285, "step": 11174 }, { "epoch": 0.4050233771882136, "grad_norm": 2.5633522428496174, "learning_rate": 6.743608374013886e-06, "loss": 0.7367, "step": 11175 }, { "epoch": 0.4050596208908702, "grad_norm": 2.2852514788868623, "learning_rate": 6.743058277304522e-06, "loss": 1.0404, "step": 11176 }, { "epoch": 0.40509586459352687, "grad_norm": 2.1112153654205486, "learning_rate": 6.742508156576868e-06, "loss": 0.9601, "step": 11177 }, { "epoch": 0.4051321082961835, "grad_norm": 2.2497181075458084, "learning_rate": 6.741958011838505e-06, "loss": 1.1473, "step": 11178 }, { "epoch": 0.4051683519988402, "grad_norm": 2.5317984384118484, "learning_rate": 6.741407843097017e-06, "loss": 0.9127, "step": 11179 }, { "epoch": 0.40520459570149686, "grad_norm": 2.0606431890008485, "learning_rate": 6.7408576503599784e-06, "loss": 0.968, "step": 11180 }, { "epoch": 0.4052408394041535, "grad_norm": 2.518588882625707, "learning_rate": 6.740307433634976e-06, "loss": 0.9043, "step": 11181 }, { "epoch": 0.4052770831068102, "grad_norm": 2.3591099498428725, "learning_rate": 6.73975719292959e-06, "loss": 0.922, "step": 11182 }, { "epoch": 0.40531332680946686, "grad_norm": 2.2143448310468874, "learning_rate": 6.739206928251401e-06, "loss": 0.8632, "step": 11183 }, { "epoch": 0.4053495705121235, "grad_norm": 2.4704012107254996, "learning_rate": 6.738656639607995e-06, "loss": 0.8887, "step": 11184 }, { "epoch": 0.4053858142147802, "grad_norm": 2.2141784564074967, "learning_rate": 6.7381063270069504e-06, "loss": 0.9069, "step": 11185 }, { "epoch": 0.40542205791743685, "grad_norm": 2.3293997641704185, "learning_rate": 6.737555990455851e-06, "loss": 1.0399, "step": 11186 }, { "epoch": 0.4054583016200935, "grad_norm": 2.618463084687005, "learning_rate": 6.737005629962281e-06, "loss": 0.8762, "step": 11187 }, { "epoch": 0.4054945453227502, "grad_norm": 2.169167606574102, "learning_rate": 6.736455245533824e-06, "loss": 0.8651, "step": 11188 }, { "epoch": 0.40553078902540685, "grad_norm": 2.1702059395251894, "learning_rate": 6.735904837178065e-06, "loss": 0.8427, "step": 11189 }, { "epoch": 0.4055670327280635, "grad_norm": 2.6650946428744176, "learning_rate": 6.735354404902586e-06, "loss": 0.8803, "step": 11190 }, { "epoch": 0.40560327643072014, "grad_norm": 2.2471536724606183, "learning_rate": 6.734803948714973e-06, "loss": 1.076, "step": 11191 }, { "epoch": 0.40563952013337684, "grad_norm": 2.1260975368795094, "learning_rate": 6.734253468622811e-06, "loss": 0.8041, "step": 11192 }, { "epoch": 0.4056757638360335, "grad_norm": 2.3720231919145736, "learning_rate": 6.7337029646336846e-06, "loss": 0.9883, "step": 11193 }, { "epoch": 0.40571200753869013, "grad_norm": 2.177734386274397, "learning_rate": 6.7331524367551816e-06, "loss": 0.9126, "step": 11194 }, { "epoch": 0.40574825124134684, "grad_norm": 2.056305565710278, "learning_rate": 6.732601884994885e-06, "loss": 1.0397, "step": 11195 }, { "epoch": 0.4057844949440035, "grad_norm": 2.3207620258229102, "learning_rate": 6.732051309360381e-06, "loss": 0.9759, "step": 11196 }, { "epoch": 0.40582073864666013, "grad_norm": 2.4754849426408176, "learning_rate": 6.73150070985926e-06, "loss": 0.8862, "step": 11197 }, { "epoch": 0.40585698234931683, "grad_norm": 2.432484112383314, "learning_rate": 6.730950086499104e-06, "loss": 1.0272, "step": 11198 }, { "epoch": 0.4058932260519735, "grad_norm": 2.190644912804782, "learning_rate": 6.7303994392875035e-06, "loss": 0.8932, "step": 11199 }, { "epoch": 0.4059294697546301, "grad_norm": 2.3155548769601264, "learning_rate": 6.729848768232046e-06, "loss": 0.8311, "step": 11200 }, { "epoch": 0.40596571345728677, "grad_norm": 2.5094202325931403, "learning_rate": 6.729298073340318e-06, "loss": 0.8255, "step": 11201 }, { "epoch": 0.40600195715994347, "grad_norm": 2.4211930780319615, "learning_rate": 6.728747354619908e-06, "loss": 0.7982, "step": 11202 }, { "epoch": 0.4060382008626001, "grad_norm": 2.3629832655769296, "learning_rate": 6.728196612078406e-06, "loss": 0.9484, "step": 11203 }, { "epoch": 0.40607444456525676, "grad_norm": 2.55230977092168, "learning_rate": 6.727645845723398e-06, "loss": 0.9431, "step": 11204 }, { "epoch": 0.40611068826791347, "grad_norm": 2.5335806928920865, "learning_rate": 6.727095055562478e-06, "loss": 0.8599, "step": 11205 }, { "epoch": 0.4061469319705701, "grad_norm": 2.151242477278974, "learning_rate": 6.72654424160323e-06, "loss": 0.8873, "step": 11206 }, { "epoch": 0.40618317567322676, "grad_norm": 2.2963293455322575, "learning_rate": 6.725993403853248e-06, "loss": 0.9828, "step": 11207 }, { "epoch": 0.40621941937588346, "grad_norm": 2.2405720573320433, "learning_rate": 6.7254425423201196e-06, "loss": 0.9569, "step": 11208 }, { "epoch": 0.4062556630785401, "grad_norm": 2.560229613945399, "learning_rate": 6.724891657011438e-06, "loss": 0.8578, "step": 11209 }, { "epoch": 0.40629190678119675, "grad_norm": 2.4787200955715982, "learning_rate": 6.724340747934791e-06, "loss": 0.7157, "step": 11210 }, { "epoch": 0.40632815048385346, "grad_norm": 2.234906612345545, "learning_rate": 6.723789815097772e-06, "loss": 0.8062, "step": 11211 }, { "epoch": 0.4063643941865101, "grad_norm": 2.1963626147292934, "learning_rate": 6.723238858507972e-06, "loss": 0.9089, "step": 11212 }, { "epoch": 0.40640063788916675, "grad_norm": 2.3045795247311527, "learning_rate": 6.722687878172983e-06, "loss": 0.976, "step": 11213 }, { "epoch": 0.4064368815918234, "grad_norm": 2.235066759636684, "learning_rate": 6.722136874100396e-06, "loss": 0.9005, "step": 11214 }, { "epoch": 0.4064731252944801, "grad_norm": 2.160554013317252, "learning_rate": 6.7215858462978065e-06, "loss": 1.0481, "step": 11215 }, { "epoch": 0.40650936899713674, "grad_norm": 2.060485015690194, "learning_rate": 6.721034794772803e-06, "loss": 0.8374, "step": 11216 }, { "epoch": 0.4065456126997934, "grad_norm": 2.382381248112479, "learning_rate": 6.720483719532982e-06, "loss": 0.9248, "step": 11217 }, { "epoch": 0.4065818564024501, "grad_norm": 2.4629096779828354, "learning_rate": 6.719932620585934e-06, "loss": 0.996, "step": 11218 }, { "epoch": 0.40661810010510674, "grad_norm": 2.215837028072125, "learning_rate": 6.719381497939257e-06, "loss": 0.9706, "step": 11219 }, { "epoch": 0.4066543438077634, "grad_norm": 2.211475704958823, "learning_rate": 6.71883035160054e-06, "loss": 0.9494, "step": 11220 }, { "epoch": 0.4066905875104201, "grad_norm": 2.3377630626196746, "learning_rate": 6.718279181577384e-06, "loss": 1.0754, "step": 11221 }, { "epoch": 0.40672683121307673, "grad_norm": 2.411850010466828, "learning_rate": 6.717727987877377e-06, "loss": 0.998, "step": 11222 }, { "epoch": 0.4067630749157334, "grad_norm": 2.2572495006447926, "learning_rate": 6.717176770508119e-06, "loss": 1.0251, "step": 11223 }, { "epoch": 0.4067993186183901, "grad_norm": 2.460620875358873, "learning_rate": 6.716625529477203e-06, "loss": 1.0108, "step": 11224 }, { "epoch": 0.40683556232104673, "grad_norm": 2.154572413576315, "learning_rate": 6.716074264792226e-06, "loss": 0.8503, "step": 11225 }, { "epoch": 0.4068718060237034, "grad_norm": 2.407677823561632, "learning_rate": 6.715522976460782e-06, "loss": 0.8942, "step": 11226 }, { "epoch": 0.40690804972636, "grad_norm": 2.5839953483579006, "learning_rate": 6.714971664490471e-06, "loss": 0.9743, "step": 11227 }, { "epoch": 0.4069442934290167, "grad_norm": 2.1006695091759617, "learning_rate": 6.714420328888885e-06, "loss": 0.8225, "step": 11228 }, { "epoch": 0.40698053713167337, "grad_norm": 2.0715655803952293, "learning_rate": 6.713868969663626e-06, "loss": 0.9685, "step": 11229 }, { "epoch": 0.40701678083433, "grad_norm": 2.3544167950773978, "learning_rate": 6.7133175868222875e-06, "loss": 1.0211, "step": 11230 }, { "epoch": 0.4070530245369867, "grad_norm": 2.341328792154194, "learning_rate": 6.71276618037247e-06, "loss": 0.9188, "step": 11231 }, { "epoch": 0.40708926823964336, "grad_norm": 2.4475172981309443, "learning_rate": 6.712214750321769e-06, "loss": 1.0528, "step": 11232 }, { "epoch": 0.4071255119423, "grad_norm": 2.2177044471632184, "learning_rate": 6.711663296677786e-06, "loss": 0.9615, "step": 11233 }, { "epoch": 0.4071617556449567, "grad_norm": 2.3073693645334346, "learning_rate": 6.7111118194481175e-06, "loss": 1.0496, "step": 11234 }, { "epoch": 0.40719799934761336, "grad_norm": 2.3358895691002033, "learning_rate": 6.710560318640364e-06, "loss": 1.0686, "step": 11235 }, { "epoch": 0.40723424305027, "grad_norm": 2.276670231655412, "learning_rate": 6.710008794262121e-06, "loss": 0.8088, "step": 11236 }, { "epoch": 0.40727048675292665, "grad_norm": 2.4305630353346643, "learning_rate": 6.7094572463209925e-06, "loss": 0.8823, "step": 11237 }, { "epoch": 0.40730673045558335, "grad_norm": 2.3568960796952854, "learning_rate": 6.708905674824576e-06, "loss": 0.9794, "step": 11238 }, { "epoch": 0.40734297415824, "grad_norm": 2.3849767814918987, "learning_rate": 6.708354079780475e-06, "loss": 0.92, "step": 11239 }, { "epoch": 0.40737921786089665, "grad_norm": 2.1141107372414374, "learning_rate": 6.707802461196285e-06, "loss": 0.8642, "step": 11240 }, { "epoch": 0.40741546156355335, "grad_norm": 2.5186761469599666, "learning_rate": 6.7072508190796115e-06, "loss": 0.9948, "step": 11241 }, { "epoch": 0.40745170526621, "grad_norm": 2.4462894143179623, "learning_rate": 6.706699153438054e-06, "loss": 0.8061, "step": 11242 }, { "epoch": 0.40748794896886664, "grad_norm": 2.1545781900096164, "learning_rate": 6.706147464279215e-06, "loss": 0.8849, "step": 11243 }, { "epoch": 0.40752419267152334, "grad_norm": 2.1240575378285387, "learning_rate": 6.705595751610697e-06, "loss": 1.0326, "step": 11244 }, { "epoch": 0.40756043637418, "grad_norm": 2.2826911137214667, "learning_rate": 6.705044015440098e-06, "loss": 0.9018, "step": 11245 }, { "epoch": 0.40759668007683664, "grad_norm": 2.54998695718062, "learning_rate": 6.704492255775027e-06, "loss": 0.9997, "step": 11246 }, { "epoch": 0.40763292377949334, "grad_norm": 2.4648426639384455, "learning_rate": 6.7039404726230806e-06, "loss": 0.9182, "step": 11247 }, { "epoch": 0.40766916748215, "grad_norm": 2.7077450309131375, "learning_rate": 6.703388665991867e-06, "loss": 1.0016, "step": 11248 }, { "epoch": 0.40770541118480663, "grad_norm": 2.6013486284497906, "learning_rate": 6.702836835888986e-06, "loss": 1.1549, "step": 11249 }, { "epoch": 0.4077416548874633, "grad_norm": 2.2755504624015024, "learning_rate": 6.702284982322045e-06, "loss": 1.0777, "step": 11250 }, { "epoch": 0.40777789859012, "grad_norm": 2.5222950975107246, "learning_rate": 6.701733105298644e-06, "loss": 0.962, "step": 11251 }, { "epoch": 0.4078141422927766, "grad_norm": 2.160076800268198, "learning_rate": 6.701181204826393e-06, "loss": 0.85, "step": 11252 }, { "epoch": 0.4078503859954333, "grad_norm": 2.3446187703139416, "learning_rate": 6.7006292809128925e-06, "loss": 0.9976, "step": 11253 }, { "epoch": 0.40788662969809, "grad_norm": 2.280483746972255, "learning_rate": 6.700077333565749e-06, "loss": 0.8872, "step": 11254 }, { "epoch": 0.4079228734007466, "grad_norm": 2.1210847415929126, "learning_rate": 6.699525362792567e-06, "loss": 0.795, "step": 11255 }, { "epoch": 0.40795911710340327, "grad_norm": 2.3284127269303028, "learning_rate": 6.698973368600955e-06, "loss": 0.8742, "step": 11256 }, { "epoch": 0.40799536080605997, "grad_norm": 2.0822191086622475, "learning_rate": 6.698421350998516e-06, "loss": 1.0057, "step": 11257 }, { "epoch": 0.4080316045087166, "grad_norm": 2.0202748660114853, "learning_rate": 6.697869309992859e-06, "loss": 0.914, "step": 11258 }, { "epoch": 0.40806784821137326, "grad_norm": 2.625815290060853, "learning_rate": 6.697317245591589e-06, "loss": 1.1048, "step": 11259 }, { "epoch": 0.40810409191402996, "grad_norm": 2.1730065155072724, "learning_rate": 6.696765157802315e-06, "loss": 0.8787, "step": 11260 }, { "epoch": 0.4081403356166866, "grad_norm": 2.4484050053129325, "learning_rate": 6.6962130466326434e-06, "loss": 0.9328, "step": 11261 }, { "epoch": 0.40817657931934326, "grad_norm": 2.5339015034880914, "learning_rate": 6.695660912090182e-06, "loss": 0.8343, "step": 11262 }, { "epoch": 0.4082128230219999, "grad_norm": 2.2011989892678687, "learning_rate": 6.695108754182536e-06, "loss": 0.8905, "step": 11263 }, { "epoch": 0.4082490667246566, "grad_norm": 2.154953992946993, "learning_rate": 6.694556572917319e-06, "loss": 0.8384, "step": 11264 }, { "epoch": 0.40828531042731325, "grad_norm": 2.2617687273175573, "learning_rate": 6.694004368302136e-06, "loss": 1.1325, "step": 11265 }, { "epoch": 0.4083215541299699, "grad_norm": 2.3582346931140568, "learning_rate": 6.693452140344598e-06, "loss": 0.9827, "step": 11266 }, { "epoch": 0.4083577978326266, "grad_norm": 2.3930013801000705, "learning_rate": 6.692899889052313e-06, "loss": 1.0368, "step": 11267 }, { "epoch": 0.40839404153528325, "grad_norm": 2.6149369627156753, "learning_rate": 6.6923476144328935e-06, "loss": 1.0449, "step": 11268 }, { "epoch": 0.4084302852379399, "grad_norm": 2.218791142916152, "learning_rate": 6.691795316493945e-06, "loss": 0.7828, "step": 11269 }, { "epoch": 0.4084665289405966, "grad_norm": 1.8799794941976324, "learning_rate": 6.691242995243081e-06, "loss": 0.7679, "step": 11270 }, { "epoch": 0.40850277264325324, "grad_norm": 2.177531967095885, "learning_rate": 6.690690650687912e-06, "loss": 0.947, "step": 11271 }, { "epoch": 0.4085390163459099, "grad_norm": 2.124524062401873, "learning_rate": 6.690138282836048e-06, "loss": 0.7111, "step": 11272 }, { "epoch": 0.40857526004856654, "grad_norm": 2.225816756324527, "learning_rate": 6.689585891695101e-06, "loss": 0.93, "step": 11273 }, { "epoch": 0.40861150375122324, "grad_norm": 2.2949888983281075, "learning_rate": 6.689033477272682e-06, "loss": 1.123, "step": 11274 }, { "epoch": 0.4086477474538799, "grad_norm": 2.314348464659822, "learning_rate": 6.688481039576401e-06, "loss": 0.924, "step": 11275 }, { "epoch": 0.40868399115653653, "grad_norm": 2.114646779808977, "learning_rate": 6.6879285786138745e-06, "loss": 0.9087, "step": 11276 }, { "epoch": 0.40872023485919323, "grad_norm": 2.6475539198976477, "learning_rate": 6.687376094392712e-06, "loss": 1.0534, "step": 11277 }, { "epoch": 0.4087564785618499, "grad_norm": 2.186264004900428, "learning_rate": 6.686823586920529e-06, "loss": 1.0752, "step": 11278 }, { "epoch": 0.4087927222645065, "grad_norm": 2.1310251898299337, "learning_rate": 6.686271056204936e-06, "loss": 1.0278, "step": 11279 }, { "epoch": 0.4088289659671632, "grad_norm": 2.6238192700383, "learning_rate": 6.685718502253549e-06, "loss": 0.9793, "step": 11280 }, { "epoch": 0.4088652096698199, "grad_norm": 2.260486497004906, "learning_rate": 6.685165925073979e-06, "loss": 0.9145, "step": 11281 }, { "epoch": 0.4089014533724765, "grad_norm": 1.9670657543209904, "learning_rate": 6.6846133246738435e-06, "loss": 0.8214, "step": 11282 }, { "epoch": 0.4089376970751332, "grad_norm": 2.253635739853258, "learning_rate": 6.684060701060753e-06, "loss": 0.8385, "step": 11283 }, { "epoch": 0.40897394077778987, "grad_norm": 2.410289529931237, "learning_rate": 6.6835080542423245e-06, "loss": 0.8976, "step": 11284 }, { "epoch": 0.4090101844804465, "grad_norm": 2.4679359475322333, "learning_rate": 6.682955384226174e-06, "loss": 1.0368, "step": 11285 }, { "epoch": 0.40904642818310316, "grad_norm": 2.2429883670732242, "learning_rate": 6.682402691019917e-06, "loss": 0.9813, "step": 11286 }, { "epoch": 0.40908267188575986, "grad_norm": 2.2784039877569886, "learning_rate": 6.681849974631167e-06, "loss": 0.9911, "step": 11287 }, { "epoch": 0.4091189155884165, "grad_norm": 2.2199782429530286, "learning_rate": 6.681297235067542e-06, "loss": 0.8438, "step": 11288 }, { "epoch": 0.40915515929107316, "grad_norm": 2.5193727685092155, "learning_rate": 6.680744472336657e-06, "loss": 0.8694, "step": 11289 }, { "epoch": 0.40919140299372986, "grad_norm": 2.2108757021452194, "learning_rate": 6.680191686446131e-06, "loss": 0.8221, "step": 11290 }, { "epoch": 0.4092276466963865, "grad_norm": 2.531262776549786, "learning_rate": 6.6796388774035806e-06, "loss": 0.9615, "step": 11291 }, { "epoch": 0.40926389039904315, "grad_norm": 2.3986193848223785, "learning_rate": 6.679086045216622e-06, "loss": 0.8706, "step": 11292 }, { "epoch": 0.40930013410169985, "grad_norm": 2.067205476951689, "learning_rate": 6.678533189892871e-06, "loss": 1.0352, "step": 11293 }, { "epoch": 0.4093363778043565, "grad_norm": 2.4746064851733913, "learning_rate": 6.67798031143995e-06, "loss": 0.9949, "step": 11294 }, { "epoch": 0.40937262150701315, "grad_norm": 2.522360927746726, "learning_rate": 6.677427409865473e-06, "loss": 1.0004, "step": 11295 }, { "epoch": 0.4094088652096698, "grad_norm": 2.2861733731830225, "learning_rate": 6.676874485177063e-06, "loss": 0.8791, "step": 11296 }, { "epoch": 0.4094451089123265, "grad_norm": 2.138335286005471, "learning_rate": 6.676321537382334e-06, "loss": 0.8251, "step": 11297 }, { "epoch": 0.40948135261498314, "grad_norm": 2.443300754006146, "learning_rate": 6.67576856648891e-06, "loss": 0.9298, "step": 11298 }, { "epoch": 0.4095175963176398, "grad_norm": 2.620127485735708, "learning_rate": 6.6752155725044075e-06, "loss": 0.9758, "step": 11299 }, { "epoch": 0.4095538400202965, "grad_norm": 2.2332803789306737, "learning_rate": 6.6746625554364494e-06, "loss": 0.9539, "step": 11300 }, { "epoch": 0.40959008372295314, "grad_norm": 2.4614666110848367, "learning_rate": 6.674109515292654e-06, "loss": 0.9765, "step": 11301 }, { "epoch": 0.4096263274256098, "grad_norm": 2.260549924195042, "learning_rate": 6.673556452080641e-06, "loss": 0.8267, "step": 11302 }, { "epoch": 0.4096625711282665, "grad_norm": 2.227740142033726, "learning_rate": 6.673003365808032e-06, "loss": 1.065, "step": 11303 }, { "epoch": 0.40969881483092313, "grad_norm": 2.3850982799547036, "learning_rate": 6.672450256482449e-06, "loss": 0.9901, "step": 11304 }, { "epoch": 0.4097350585335798, "grad_norm": 2.236015462377307, "learning_rate": 6.671897124111512e-06, "loss": 0.9371, "step": 11305 }, { "epoch": 0.4097713022362365, "grad_norm": 2.414973455062474, "learning_rate": 6.671343968702844e-06, "loss": 0.9195, "step": 11306 }, { "epoch": 0.4098075459388931, "grad_norm": 2.3823010894123797, "learning_rate": 6.670790790264066e-06, "loss": 0.8865, "step": 11307 }, { "epoch": 0.40984378964154977, "grad_norm": 2.3177341389685417, "learning_rate": 6.670237588802804e-06, "loss": 0.8576, "step": 11308 }, { "epoch": 0.4098800333442064, "grad_norm": 2.5207705059167633, "learning_rate": 6.669684364326676e-06, "loss": 0.9078, "step": 11309 }, { "epoch": 0.4099162770468631, "grad_norm": 2.363898421398475, "learning_rate": 6.6691311168433086e-06, "loss": 0.9132, "step": 11310 }, { "epoch": 0.40995252074951977, "grad_norm": 2.2607033814431903, "learning_rate": 6.668577846360324e-06, "loss": 0.9578, "step": 11311 }, { "epoch": 0.4099887644521764, "grad_norm": 2.5336034038270334, "learning_rate": 6.668024552885345e-06, "loss": 0.932, "step": 11312 }, { "epoch": 0.4100250081548331, "grad_norm": 2.3959183471063668, "learning_rate": 6.667471236425995e-06, "loss": 0.9674, "step": 11313 }, { "epoch": 0.41006125185748976, "grad_norm": 2.3989940199655844, "learning_rate": 6.666917896989903e-06, "loss": 0.8406, "step": 11314 }, { "epoch": 0.4100974955601464, "grad_norm": 2.415926732191334, "learning_rate": 6.666364534584689e-06, "loss": 1.1737, "step": 11315 }, { "epoch": 0.4101337392628031, "grad_norm": 2.231009426719593, "learning_rate": 6.665811149217979e-06, "loss": 0.8979, "step": 11316 }, { "epoch": 0.41016998296545976, "grad_norm": 2.139846391650159, "learning_rate": 6.665257740897398e-06, "loss": 1.0819, "step": 11317 }, { "epoch": 0.4102062266681164, "grad_norm": 2.3431858785017083, "learning_rate": 6.664704309630574e-06, "loss": 0.8138, "step": 11318 }, { "epoch": 0.4102424703707731, "grad_norm": 2.556025196630605, "learning_rate": 6.664150855425131e-06, "loss": 1.0779, "step": 11319 }, { "epoch": 0.41027871407342975, "grad_norm": 2.1962541508744025, "learning_rate": 6.6635973782886956e-06, "loss": 0.8836, "step": 11320 }, { "epoch": 0.4103149577760864, "grad_norm": 2.344944305175692, "learning_rate": 6.663043878228895e-06, "loss": 1.0082, "step": 11321 }, { "epoch": 0.41035120147874304, "grad_norm": 2.229053708920565, "learning_rate": 6.662490355253355e-06, "loss": 0.9496, "step": 11322 }, { "epoch": 0.41038744518139975, "grad_norm": 2.6485275674674984, "learning_rate": 6.661936809369703e-06, "loss": 0.94, "step": 11323 }, { "epoch": 0.4104236888840564, "grad_norm": 2.1393558712100096, "learning_rate": 6.661383240585567e-06, "loss": 0.8574, "step": 11324 }, { "epoch": 0.41045993258671304, "grad_norm": 2.2834322850786295, "learning_rate": 6.660829648908576e-06, "loss": 0.9386, "step": 11325 }, { "epoch": 0.41049617628936974, "grad_norm": 2.194057716616381, "learning_rate": 6.660276034346355e-06, "loss": 1.2857, "step": 11326 }, { "epoch": 0.4105324199920264, "grad_norm": 2.360790650797957, "learning_rate": 6.659722396906535e-06, "loss": 0.8751, "step": 11327 }, { "epoch": 0.41056866369468303, "grad_norm": 2.404654533199857, "learning_rate": 6.659168736596744e-06, "loss": 0.8095, "step": 11328 }, { "epoch": 0.41060490739733974, "grad_norm": 2.1379411340978973, "learning_rate": 6.658615053424612e-06, "loss": 0.9539, "step": 11329 }, { "epoch": 0.4106411510999964, "grad_norm": 2.30161092551998, "learning_rate": 6.658061347397767e-06, "loss": 1.0858, "step": 11330 }, { "epoch": 0.41067739480265303, "grad_norm": 2.5236412113092235, "learning_rate": 6.6575076185238404e-06, "loss": 0.7988, "step": 11331 }, { "epoch": 0.4107136385053097, "grad_norm": 2.140323735116978, "learning_rate": 6.656953866810461e-06, "loss": 0.8728, "step": 11332 }, { "epoch": 0.4107498822079664, "grad_norm": 2.219424626054782, "learning_rate": 6.656400092265258e-06, "loss": 0.9896, "step": 11333 }, { "epoch": 0.410786125910623, "grad_norm": 2.5420066407333843, "learning_rate": 6.655846294895864e-06, "loss": 0.9406, "step": 11334 }, { "epoch": 0.41082236961327967, "grad_norm": 2.291785814619066, "learning_rate": 6.65529247470991e-06, "loss": 0.9091, "step": 11335 }, { "epoch": 0.41085861331593637, "grad_norm": 2.3994524980074576, "learning_rate": 6.654738631715026e-06, "loss": 0.9553, "step": 11336 }, { "epoch": 0.410894857018593, "grad_norm": 2.3211427551936135, "learning_rate": 6.6541847659188455e-06, "loss": 0.9273, "step": 11337 }, { "epoch": 0.41093110072124966, "grad_norm": 2.3335971228416783, "learning_rate": 6.653630877328998e-06, "loss": 0.8435, "step": 11338 }, { "epoch": 0.41096734442390637, "grad_norm": 2.3752945182546714, "learning_rate": 6.653076965953119e-06, "loss": 1.0925, "step": 11339 }, { "epoch": 0.411003588126563, "grad_norm": 1.9488438963075865, "learning_rate": 6.652523031798839e-06, "loss": 0.8865, "step": 11340 }, { "epoch": 0.41103983182921966, "grad_norm": 2.602986784307722, "learning_rate": 6.65196907487379e-06, "loss": 1.1021, "step": 11341 }, { "epoch": 0.41107607553187636, "grad_norm": 2.470320939331046, "learning_rate": 6.651415095185606e-06, "loss": 0.9301, "step": 11342 }, { "epoch": 0.411112319234533, "grad_norm": 2.200281451847022, "learning_rate": 6.650861092741921e-06, "loss": 1.0857, "step": 11343 }, { "epoch": 0.41114856293718965, "grad_norm": 2.6606149299101975, "learning_rate": 6.650307067550368e-06, "loss": 1.1089, "step": 11344 }, { "epoch": 0.4111848066398463, "grad_norm": 2.3042007731483403, "learning_rate": 6.649753019618583e-06, "loss": 1.0168, "step": 11345 }, { "epoch": 0.411221050342503, "grad_norm": 2.217280712605544, "learning_rate": 6.6491989489541985e-06, "loss": 0.9418, "step": 11346 }, { "epoch": 0.41125729404515965, "grad_norm": 2.231351528663015, "learning_rate": 6.64864485556485e-06, "loss": 0.8218, "step": 11347 }, { "epoch": 0.4112935377478163, "grad_norm": 2.259453760117645, "learning_rate": 6.648090739458173e-06, "loss": 0.8859, "step": 11348 }, { "epoch": 0.411329781450473, "grad_norm": 2.269340264927435, "learning_rate": 6.647536600641802e-06, "loss": 0.9155, "step": 11349 }, { "epoch": 0.41136602515312964, "grad_norm": 2.4875948662650194, "learning_rate": 6.646982439123372e-06, "loss": 0.9511, "step": 11350 }, { "epoch": 0.4114022688557863, "grad_norm": 2.466846257872674, "learning_rate": 6.64642825491052e-06, "loss": 0.9766, "step": 11351 }, { "epoch": 0.411438512558443, "grad_norm": 2.255151967964945, "learning_rate": 6.645874048010883e-06, "loss": 0.9332, "step": 11352 }, { "epoch": 0.41147475626109964, "grad_norm": 2.5579429078073863, "learning_rate": 6.645319818432098e-06, "loss": 1.0327, "step": 11353 }, { "epoch": 0.4115109999637563, "grad_norm": 2.228126642401995, "learning_rate": 6.6447655661818e-06, "loss": 0.9057, "step": 11354 }, { "epoch": 0.411547243666413, "grad_norm": 2.308337065838676, "learning_rate": 6.644211291267627e-06, "loss": 0.878, "step": 11355 }, { "epoch": 0.41158348736906963, "grad_norm": 2.3374211882169655, "learning_rate": 6.643656993697217e-06, "loss": 0.8453, "step": 11356 }, { "epoch": 0.4116197310717263, "grad_norm": 2.2784472211520574, "learning_rate": 6.6431026734782075e-06, "loss": 0.8924, "step": 11357 }, { "epoch": 0.4116559747743829, "grad_norm": 2.5267641351606, "learning_rate": 6.6425483306182385e-06, "loss": 0.9337, "step": 11358 }, { "epoch": 0.41169221847703963, "grad_norm": 2.3228297750074494, "learning_rate": 6.641993965124945e-06, "loss": 0.9032, "step": 11359 }, { "epoch": 0.4117284621796963, "grad_norm": 2.370163038501487, "learning_rate": 6.641439577005968e-06, "loss": 0.8956, "step": 11360 }, { "epoch": 0.4117647058823529, "grad_norm": 2.6351443705373025, "learning_rate": 6.640885166268947e-06, "loss": 0.8007, "step": 11361 }, { "epoch": 0.4118009495850096, "grad_norm": 2.3034685810130306, "learning_rate": 6.640330732921521e-06, "loss": 1.0026, "step": 11362 }, { "epoch": 0.41183719328766627, "grad_norm": 2.2574807468961007, "learning_rate": 6.639776276971328e-06, "loss": 0.8415, "step": 11363 }, { "epoch": 0.4118734369903229, "grad_norm": 2.146379804598693, "learning_rate": 6.639221798426011e-06, "loss": 0.7576, "step": 11364 }, { "epoch": 0.4119096806929796, "grad_norm": 2.4746512347928613, "learning_rate": 6.638667297293209e-06, "loss": 0.8583, "step": 11365 }, { "epoch": 0.41194592439563626, "grad_norm": 2.6169056394641754, "learning_rate": 6.63811277358056e-06, "loss": 0.9632, "step": 11366 }, { "epoch": 0.4119821680982929, "grad_norm": 2.3154969695946614, "learning_rate": 6.637558227295712e-06, "loss": 0.8869, "step": 11367 }, { "epoch": 0.41201841180094956, "grad_norm": 2.3314414203895875, "learning_rate": 6.6370036584463e-06, "loss": 0.8803, "step": 11368 }, { "epoch": 0.41205465550360626, "grad_norm": 2.5691704754548135, "learning_rate": 6.636449067039967e-06, "loss": 0.8626, "step": 11369 }, { "epoch": 0.4120908992062629, "grad_norm": 1.975465504527137, "learning_rate": 6.635894453084356e-06, "loss": 0.7519, "step": 11370 }, { "epoch": 0.41212714290891955, "grad_norm": 2.4514087436902017, "learning_rate": 6.635339816587109e-06, "loss": 0.8875, "step": 11371 }, { "epoch": 0.41216338661157625, "grad_norm": 2.4364461372884687, "learning_rate": 6.634785157555868e-06, "loss": 0.9958, "step": 11372 }, { "epoch": 0.4121996303142329, "grad_norm": 2.3843473081232216, "learning_rate": 6.634230475998277e-06, "loss": 1.0335, "step": 11373 }, { "epoch": 0.41223587401688955, "grad_norm": 2.3822005362710494, "learning_rate": 6.633675771921977e-06, "loss": 0.8373, "step": 11374 }, { "epoch": 0.41227211771954625, "grad_norm": 2.4392341073461403, "learning_rate": 6.633121045334614e-06, "loss": 0.8939, "step": 11375 }, { "epoch": 0.4123083614222029, "grad_norm": 2.2002937905169744, "learning_rate": 6.63256629624383e-06, "loss": 1.113, "step": 11376 }, { "epoch": 0.41234460512485954, "grad_norm": 2.2526507178973163, "learning_rate": 6.6320115246572705e-06, "loss": 0.7935, "step": 11377 }, { "epoch": 0.41238084882751624, "grad_norm": 2.282081546638246, "learning_rate": 6.631456730582579e-06, "loss": 1.0123, "step": 11378 }, { "epoch": 0.4124170925301729, "grad_norm": 2.19183395612756, "learning_rate": 6.630901914027401e-06, "loss": 0.9896, "step": 11379 }, { "epoch": 0.41245333623282954, "grad_norm": 2.374346360870057, "learning_rate": 6.63034707499938e-06, "loss": 0.8479, "step": 11380 }, { "epoch": 0.4124895799354862, "grad_norm": 2.679916740811079, "learning_rate": 6.629792213506163e-06, "loss": 0.9401, "step": 11381 }, { "epoch": 0.4125258236381429, "grad_norm": 2.59530698362104, "learning_rate": 6.629237329555394e-06, "loss": 0.9196, "step": 11382 }, { "epoch": 0.41256206734079953, "grad_norm": 2.563628037584121, "learning_rate": 6.62868242315472e-06, "loss": 0.9261, "step": 11383 }, { "epoch": 0.4125983110434562, "grad_norm": 2.33660023705214, "learning_rate": 6.628127494311787e-06, "loss": 0.9819, "step": 11384 }, { "epoch": 0.4126345547461129, "grad_norm": 2.2445124273230217, "learning_rate": 6.627572543034243e-06, "loss": 0.9096, "step": 11385 }, { "epoch": 0.4126707984487695, "grad_norm": 2.2751814743568026, "learning_rate": 6.627017569329732e-06, "loss": 0.9395, "step": 11386 }, { "epoch": 0.4127070421514262, "grad_norm": 2.172346587067615, "learning_rate": 6.626462573205905e-06, "loss": 0.7963, "step": 11387 }, { "epoch": 0.4127432858540829, "grad_norm": 2.411521060964932, "learning_rate": 6.625907554670407e-06, "loss": 1.0802, "step": 11388 }, { "epoch": 0.4127795295567395, "grad_norm": 2.34757423023803, "learning_rate": 6.625352513730886e-06, "loss": 1.029, "step": 11389 }, { "epoch": 0.41281577325939617, "grad_norm": 2.345702767873828, "learning_rate": 6.624797450394989e-06, "loss": 0.8989, "step": 11390 }, { "epoch": 0.41285201696205287, "grad_norm": 2.422348725349075, "learning_rate": 6.624242364670368e-06, "loss": 0.9422, "step": 11391 }, { "epoch": 0.4128882606647095, "grad_norm": 2.239583355611091, "learning_rate": 6.623687256564667e-06, "loss": 0.9842, "step": 11392 }, { "epoch": 0.41292450436736616, "grad_norm": 2.3975463601994815, "learning_rate": 6.6231321260855385e-06, "loss": 1.0322, "step": 11393 }, { "epoch": 0.4129607480700228, "grad_norm": 2.472387784134804, "learning_rate": 6.622576973240631e-06, "loss": 0.958, "step": 11394 }, { "epoch": 0.4129969917726795, "grad_norm": 2.582956271022066, "learning_rate": 6.622021798037595e-06, "loss": 0.9072, "step": 11395 }, { "epoch": 0.41303323547533616, "grad_norm": 2.2726837456505087, "learning_rate": 6.6214666004840785e-06, "loss": 0.9186, "step": 11396 }, { "epoch": 0.4130694791779928, "grad_norm": 2.6321234210096063, "learning_rate": 6.620911380587733e-06, "loss": 0.8588, "step": 11397 }, { "epoch": 0.4131057228806495, "grad_norm": 2.184610614989365, "learning_rate": 6.62035613835621e-06, "loss": 0.7313, "step": 11398 }, { "epoch": 0.41314196658330615, "grad_norm": 2.401046708247184, "learning_rate": 6.6198008737971595e-06, "loss": 0.8772, "step": 11399 }, { "epoch": 0.4131782102859628, "grad_norm": 2.189847479593995, "learning_rate": 6.619245586918231e-06, "loss": 0.9768, "step": 11400 }, { "epoch": 0.4132144539886195, "grad_norm": 2.255286764099025, "learning_rate": 6.618690277727079e-06, "loss": 1.0167, "step": 11401 }, { "epoch": 0.41325069769127615, "grad_norm": 2.1943700906946986, "learning_rate": 6.618134946231352e-06, "loss": 1.0127, "step": 11402 }, { "epoch": 0.4132869413939328, "grad_norm": 1.976721831293728, "learning_rate": 6.617579592438707e-06, "loss": 0.6585, "step": 11403 }, { "epoch": 0.41332318509658944, "grad_norm": 2.245451366811997, "learning_rate": 6.617024216356791e-06, "loss": 1.0394, "step": 11404 }, { "epoch": 0.41335942879924614, "grad_norm": 2.273697019778811, "learning_rate": 6.616468817993259e-06, "loss": 0.8765, "step": 11405 }, { "epoch": 0.4133956725019028, "grad_norm": 2.3166831336059373, "learning_rate": 6.615913397355766e-06, "loss": 0.6808, "step": 11406 }, { "epoch": 0.41343191620455944, "grad_norm": 2.630066444697983, "learning_rate": 6.615357954451964e-06, "loss": 0.8223, "step": 11407 }, { "epoch": 0.41346815990721614, "grad_norm": 2.424682478505185, "learning_rate": 6.6148024892895055e-06, "loss": 1.1308, "step": 11408 }, { "epoch": 0.4135044036098728, "grad_norm": 2.5197973404603493, "learning_rate": 6.614247001876044e-06, "loss": 1.0098, "step": 11409 }, { "epoch": 0.41354064731252943, "grad_norm": 2.2404560242944243, "learning_rate": 6.6136914922192375e-06, "loss": 0.9169, "step": 11410 }, { "epoch": 0.41357689101518613, "grad_norm": 2.281171321764441, "learning_rate": 6.613135960326736e-06, "loss": 0.8393, "step": 11411 }, { "epoch": 0.4136131347178428, "grad_norm": 2.419340678002897, "learning_rate": 6.612580406206197e-06, "loss": 0.8936, "step": 11412 }, { "epoch": 0.4136493784204994, "grad_norm": 2.186855452855517, "learning_rate": 6.612024829865275e-06, "loss": 0.9012, "step": 11413 }, { "epoch": 0.4136856221231561, "grad_norm": 2.464590607805286, "learning_rate": 6.611469231311628e-06, "loss": 0.8194, "step": 11414 }, { "epoch": 0.4137218658258128, "grad_norm": 2.0646105990710617, "learning_rate": 6.610913610552907e-06, "loss": 0.8612, "step": 11415 }, { "epoch": 0.4137581095284694, "grad_norm": 2.143123877948334, "learning_rate": 6.610357967596772e-06, "loss": 1.0216, "step": 11416 }, { "epoch": 0.41379435323112607, "grad_norm": 2.3292579333833476, "learning_rate": 6.609802302450878e-06, "loss": 0.9233, "step": 11417 }, { "epoch": 0.41383059693378277, "grad_norm": 2.1607515055745745, "learning_rate": 6.609246615122883e-06, "loss": 0.8831, "step": 11418 }, { "epoch": 0.4138668406364394, "grad_norm": 2.1493657858028614, "learning_rate": 6.608690905620441e-06, "loss": 0.9685, "step": 11419 }, { "epoch": 0.41390308433909606, "grad_norm": 2.4007207743454493, "learning_rate": 6.608135173951212e-06, "loss": 0.9136, "step": 11420 }, { "epoch": 0.41393932804175276, "grad_norm": 2.317085755428488, "learning_rate": 6.607579420122852e-06, "loss": 0.901, "step": 11421 }, { "epoch": 0.4139755717444094, "grad_norm": 2.191968478027193, "learning_rate": 6.607023644143022e-06, "loss": 0.9123, "step": 11422 }, { "epoch": 0.41401181544706606, "grad_norm": 2.334292723376072, "learning_rate": 6.606467846019376e-06, "loss": 0.8637, "step": 11423 }, { "epoch": 0.41404805914972276, "grad_norm": 2.2285087412025706, "learning_rate": 6.6059120257595756e-06, "loss": 0.8827, "step": 11424 }, { "epoch": 0.4140843028523794, "grad_norm": 2.291303765206688, "learning_rate": 6.605356183371278e-06, "loss": 0.9719, "step": 11425 }, { "epoch": 0.41412054655503605, "grad_norm": 2.5305393294523237, "learning_rate": 6.6048003188621435e-06, "loss": 0.8835, "step": 11426 }, { "epoch": 0.41415679025769275, "grad_norm": 2.1213529618669686, "learning_rate": 6.604244432239831e-06, "loss": 0.8665, "step": 11427 }, { "epoch": 0.4141930339603494, "grad_norm": 2.5389588252292903, "learning_rate": 6.603688523512002e-06, "loss": 0.901, "step": 11428 }, { "epoch": 0.41422927766300605, "grad_norm": 2.402019310529599, "learning_rate": 6.603132592686312e-06, "loss": 0.9565, "step": 11429 }, { "epoch": 0.4142655213656627, "grad_norm": 2.333237329380437, "learning_rate": 6.602576639770426e-06, "loss": 0.8627, "step": 11430 }, { "epoch": 0.4143017650683194, "grad_norm": 2.3531559525197223, "learning_rate": 6.602020664772003e-06, "loss": 0.9677, "step": 11431 }, { "epoch": 0.41433800877097604, "grad_norm": 2.145887566953201, "learning_rate": 6.601464667698705e-06, "loss": 0.8244, "step": 11432 }, { "epoch": 0.4143742524736327, "grad_norm": 2.029217636862842, "learning_rate": 6.600908648558191e-06, "loss": 0.718, "step": 11433 }, { "epoch": 0.4144104961762894, "grad_norm": 2.142870367945147, "learning_rate": 6.600352607358125e-06, "loss": 0.9471, "step": 11434 }, { "epoch": 0.41444673987894604, "grad_norm": 2.3458252861350366, "learning_rate": 6.599796544106167e-06, "loss": 0.8819, "step": 11435 }, { "epoch": 0.4144829835816027, "grad_norm": 2.301999063541916, "learning_rate": 6.5992404588099815e-06, "loss": 0.7617, "step": 11436 }, { "epoch": 0.4145192272842594, "grad_norm": 2.4594942798418553, "learning_rate": 6.598684351477229e-06, "loss": 1.0067, "step": 11437 }, { "epoch": 0.41455547098691603, "grad_norm": 2.478985785260159, "learning_rate": 6.598128222115573e-06, "loss": 0.8674, "step": 11438 }, { "epoch": 0.4145917146895727, "grad_norm": 2.268029326734544, "learning_rate": 6.5975720707326766e-06, "loss": 1.0039, "step": 11439 }, { "epoch": 0.4146279583922293, "grad_norm": 2.3184835017008547, "learning_rate": 6.597015897336203e-06, "loss": 1.108, "step": 11440 }, { "epoch": 0.414664202094886, "grad_norm": 2.2850830964952644, "learning_rate": 6.596459701933816e-06, "loss": 0.9814, "step": 11441 }, { "epoch": 0.41470044579754267, "grad_norm": 2.314008892434133, "learning_rate": 6.59590348453318e-06, "loss": 0.9567, "step": 11442 }, { "epoch": 0.4147366895001993, "grad_norm": 2.5438900409255893, "learning_rate": 6.595347245141959e-06, "loss": 0.9781, "step": 11443 }, { "epoch": 0.414772933202856, "grad_norm": 2.2829936081311923, "learning_rate": 6.594790983767818e-06, "loss": 0.8616, "step": 11444 }, { "epoch": 0.41480917690551267, "grad_norm": 2.40882093015156, "learning_rate": 6.594234700418423e-06, "loss": 0.9817, "step": 11445 }, { "epoch": 0.4148454206081693, "grad_norm": 2.333763138050376, "learning_rate": 6.593678395101437e-06, "loss": 0.8918, "step": 11446 }, { "epoch": 0.414881664310826, "grad_norm": 2.7888153666712316, "learning_rate": 6.5931220678245266e-06, "loss": 0.9139, "step": 11447 }, { "epoch": 0.41491790801348266, "grad_norm": 2.2882079133597886, "learning_rate": 6.592565718595357e-06, "loss": 1.046, "step": 11448 }, { "epoch": 0.4149541517161393, "grad_norm": 2.4282537636470143, "learning_rate": 6.592009347421595e-06, "loss": 0.7437, "step": 11449 }, { "epoch": 0.414990395418796, "grad_norm": 3.718113705041666, "learning_rate": 6.591452954310907e-06, "loss": 0.8648, "step": 11450 }, { "epoch": 0.41502663912145266, "grad_norm": 2.5629580557068956, "learning_rate": 6.59089653927096e-06, "loss": 1.0166, "step": 11451 }, { "epoch": 0.4150628828241093, "grad_norm": 2.242071342152265, "learning_rate": 6.590340102309421e-06, "loss": 1.1037, "step": 11452 }, { "epoch": 0.41509912652676595, "grad_norm": 2.5301025436075713, "learning_rate": 6.589783643433957e-06, "loss": 1.0652, "step": 11453 }, { "epoch": 0.41513537022942265, "grad_norm": 2.0948405052239454, "learning_rate": 6.589227162652236e-06, "loss": 0.8354, "step": 11454 }, { "epoch": 0.4151716139320793, "grad_norm": 2.462414538659057, "learning_rate": 6.588670659971928e-06, "loss": 0.7915, "step": 11455 }, { "epoch": 0.41520785763473594, "grad_norm": 2.355587444442982, "learning_rate": 6.588114135400696e-06, "loss": 0.8648, "step": 11456 }, { "epoch": 0.41524410133739265, "grad_norm": 2.784766386873485, "learning_rate": 6.587557588946212e-06, "loss": 0.9452, "step": 11457 }, { "epoch": 0.4152803450400493, "grad_norm": 2.4926462655747352, "learning_rate": 6.587001020616146e-06, "loss": 0.9375, "step": 11458 }, { "epoch": 0.41531658874270594, "grad_norm": 2.277882948678667, "learning_rate": 6.586444430418164e-06, "loss": 0.7374, "step": 11459 }, { "epoch": 0.41535283244536264, "grad_norm": 2.4801746158422144, "learning_rate": 6.585887818359939e-06, "loss": 0.9152, "step": 11460 }, { "epoch": 0.4153890761480193, "grad_norm": 2.5137705582803984, "learning_rate": 6.5853311844491374e-06, "loss": 1.1835, "step": 11461 }, { "epoch": 0.41542531985067593, "grad_norm": 2.232904296816538, "learning_rate": 6.584774528693432e-06, "loss": 0.9562, "step": 11462 }, { "epoch": 0.41546156355333264, "grad_norm": 2.1634651270459666, "learning_rate": 6.58421785110049e-06, "loss": 0.9658, "step": 11463 }, { "epoch": 0.4154978072559893, "grad_norm": 2.4092878931987376, "learning_rate": 6.583661151677987e-06, "loss": 1.0225, "step": 11464 }, { "epoch": 0.41553405095864593, "grad_norm": 2.4655681730591237, "learning_rate": 6.58310443043359e-06, "loss": 0.9784, "step": 11465 }, { "epoch": 0.4155702946613026, "grad_norm": 2.6273670729323673, "learning_rate": 6.582547687374971e-06, "loss": 0.8982, "step": 11466 }, { "epoch": 0.4156065383639593, "grad_norm": 2.2771413467869452, "learning_rate": 6.581990922509803e-06, "loss": 1.0266, "step": 11467 }, { "epoch": 0.4156427820666159, "grad_norm": 2.3199454860979727, "learning_rate": 6.581434135845756e-06, "loss": 1.0229, "step": 11468 }, { "epoch": 0.41567902576927257, "grad_norm": 2.522838225083613, "learning_rate": 6.580877327390501e-06, "loss": 1.0068, "step": 11469 }, { "epoch": 0.41571526947192927, "grad_norm": 2.4268766528551122, "learning_rate": 6.580320497151715e-06, "loss": 0.9047, "step": 11470 }, { "epoch": 0.4157515131745859, "grad_norm": 2.242684187062071, "learning_rate": 6.579763645137067e-06, "loss": 0.9331, "step": 11471 }, { "epoch": 0.41578775687724256, "grad_norm": 2.349198654027824, "learning_rate": 6.579206771354233e-06, "loss": 0.754, "step": 11472 }, { "epoch": 0.41582400057989927, "grad_norm": 2.942962670421429, "learning_rate": 6.578649875810883e-06, "loss": 0.9385, "step": 11473 }, { "epoch": 0.4158602442825559, "grad_norm": 2.206757919357481, "learning_rate": 6.578092958514694e-06, "loss": 0.8884, "step": 11474 }, { "epoch": 0.41589648798521256, "grad_norm": 2.3215269988425753, "learning_rate": 6.5775360194733386e-06, "loss": 1.0118, "step": 11475 }, { "epoch": 0.4159327316878692, "grad_norm": 2.02142433528553, "learning_rate": 6.576979058694491e-06, "loss": 0.6997, "step": 11476 }, { "epoch": 0.4159689753905259, "grad_norm": 2.125046488753442, "learning_rate": 6.576422076185824e-06, "loss": 0.7541, "step": 11477 }, { "epoch": 0.41600521909318255, "grad_norm": 2.401012840997349, "learning_rate": 6.575865071955015e-06, "loss": 1.0315, "step": 11478 }, { "epoch": 0.4160414627958392, "grad_norm": 2.262725028216359, "learning_rate": 6.5753080460097385e-06, "loss": 0.9569, "step": 11479 }, { "epoch": 0.4160777064984959, "grad_norm": 2.225727318437523, "learning_rate": 6.5747509983576695e-06, "loss": 0.8718, "step": 11480 }, { "epoch": 0.41611395020115255, "grad_norm": 2.3814723758733565, "learning_rate": 6.574193929006484e-06, "loss": 0.9373, "step": 11481 }, { "epoch": 0.4161501939038092, "grad_norm": 2.5203185370329773, "learning_rate": 6.57363683796386e-06, "loss": 0.9478, "step": 11482 }, { "epoch": 0.4161864376064659, "grad_norm": 2.589483930864779, "learning_rate": 6.573079725237469e-06, "loss": 0.8713, "step": 11483 }, { "epoch": 0.41622268130912254, "grad_norm": 2.4000825402818395, "learning_rate": 6.572522590834993e-06, "loss": 0.8222, "step": 11484 }, { "epoch": 0.4162589250117792, "grad_norm": 2.0378602576449687, "learning_rate": 6.5719654347641056e-06, "loss": 0.9515, "step": 11485 }, { "epoch": 0.4162951687144359, "grad_norm": 2.2429479529032266, "learning_rate": 6.5714082570324855e-06, "loss": 0.91, "step": 11486 }, { "epoch": 0.41633141241709254, "grad_norm": 2.353850717451904, "learning_rate": 6.5708510576478115e-06, "loss": 0.7837, "step": 11487 }, { "epoch": 0.4163676561197492, "grad_norm": 2.7247678429268167, "learning_rate": 6.570293836617757e-06, "loss": 0.9963, "step": 11488 }, { "epoch": 0.41640389982240583, "grad_norm": 2.2472375459221676, "learning_rate": 6.569736593950006e-06, "loss": 0.8529, "step": 11489 }, { "epoch": 0.41644014352506253, "grad_norm": 2.2789597505966195, "learning_rate": 6.569179329652231e-06, "loss": 0.9654, "step": 11490 }, { "epoch": 0.4164763872277192, "grad_norm": 2.2862222417491727, "learning_rate": 6.5686220437321165e-06, "loss": 1.0091, "step": 11491 }, { "epoch": 0.4165126309303758, "grad_norm": 2.478553767504054, "learning_rate": 6.568064736197337e-06, "loss": 0.9519, "step": 11492 }, { "epoch": 0.41654887463303253, "grad_norm": 2.4664152025611172, "learning_rate": 6.567507407055575e-06, "loss": 1.0693, "step": 11493 }, { "epoch": 0.4165851183356892, "grad_norm": 2.1852261891009657, "learning_rate": 6.566950056314509e-06, "loss": 0.9106, "step": 11494 }, { "epoch": 0.4166213620383458, "grad_norm": 2.2983981630454062, "learning_rate": 6.566392683981821e-06, "loss": 0.9236, "step": 11495 }, { "epoch": 0.4166576057410025, "grad_norm": 2.409479839333265, "learning_rate": 6.565835290065186e-06, "loss": 1.0219, "step": 11496 }, { "epoch": 0.41669384944365917, "grad_norm": 2.4049849812034765, "learning_rate": 6.565277874572289e-06, "loss": 0.9888, "step": 11497 }, { "epoch": 0.4167300931463158, "grad_norm": 2.584625661447746, "learning_rate": 6.564720437510808e-06, "loss": 0.9295, "step": 11498 }, { "epoch": 0.4167663368489725, "grad_norm": 2.5147605670802, "learning_rate": 6.5641629788884265e-06, "loss": 0.9622, "step": 11499 }, { "epoch": 0.41680258055162916, "grad_norm": 2.417202621335267, "learning_rate": 6.563605498712825e-06, "loss": 0.8386, "step": 11500 }, { "epoch": 0.4168388242542858, "grad_norm": 2.3111267263195416, "learning_rate": 6.563047996991686e-06, "loss": 0.944, "step": 11501 }, { "epoch": 0.41687506795694246, "grad_norm": 2.0337801569285463, "learning_rate": 6.56249047373269e-06, "loss": 0.8681, "step": 11502 }, { "epoch": 0.41691131165959916, "grad_norm": 2.0060538131479917, "learning_rate": 6.561932928943522e-06, "loss": 0.9262, "step": 11503 }, { "epoch": 0.4169475553622558, "grad_norm": 2.273044120510134, "learning_rate": 6.561375362631861e-06, "loss": 0.7893, "step": 11504 }, { "epoch": 0.41698379906491245, "grad_norm": 2.2054327091032246, "learning_rate": 6.560817774805394e-06, "loss": 0.9131, "step": 11505 }, { "epoch": 0.41702004276756915, "grad_norm": 2.477779838465867, "learning_rate": 6.560260165471799e-06, "loss": 0.8854, "step": 11506 }, { "epoch": 0.4170562864702258, "grad_norm": 2.0112671267930895, "learning_rate": 6.5597025346387655e-06, "loss": 0.7138, "step": 11507 }, { "epoch": 0.41709253017288245, "grad_norm": 2.1515811928825204, "learning_rate": 6.559144882313972e-06, "loss": 1.0199, "step": 11508 }, { "epoch": 0.41712877387553915, "grad_norm": 2.144320500926822, "learning_rate": 6.5585872085051065e-06, "loss": 0.9367, "step": 11509 }, { "epoch": 0.4171650175781958, "grad_norm": 2.226457735610502, "learning_rate": 6.55802951321985e-06, "loss": 0.897, "step": 11510 }, { "epoch": 0.41720126128085244, "grad_norm": 2.229949124390901, "learning_rate": 6.557471796465891e-06, "loss": 1.0243, "step": 11511 }, { "epoch": 0.4172375049835091, "grad_norm": 2.3224793749232493, "learning_rate": 6.556914058250911e-06, "loss": 0.9619, "step": 11512 }, { "epoch": 0.4172737486861658, "grad_norm": 2.2427889328635415, "learning_rate": 6.556356298582598e-06, "loss": 0.85, "step": 11513 }, { "epoch": 0.41730999238882244, "grad_norm": 2.1150153237024116, "learning_rate": 6.555798517468637e-06, "loss": 0.7634, "step": 11514 }, { "epoch": 0.4173462360914791, "grad_norm": 2.131308393566346, "learning_rate": 6.555240714916713e-06, "loss": 0.9079, "step": 11515 }, { "epoch": 0.4173824797941358, "grad_norm": 2.3208951391022645, "learning_rate": 6.554682890934512e-06, "loss": 1.0349, "step": 11516 }, { "epoch": 0.41741872349679243, "grad_norm": 2.357926673842796, "learning_rate": 6.5541250455297215e-06, "loss": 0.8974, "step": 11517 }, { "epoch": 0.4174549671994491, "grad_norm": 2.208230145572765, "learning_rate": 6.553567178710027e-06, "loss": 0.8866, "step": 11518 }, { "epoch": 0.4174912109021058, "grad_norm": 2.8230711614314847, "learning_rate": 6.5530092904831165e-06, "loss": 1.0368, "step": 11519 }, { "epoch": 0.4175274546047624, "grad_norm": 2.276186372725829, "learning_rate": 6.552451380856677e-06, "loss": 0.7687, "step": 11520 }, { "epoch": 0.4175636983074191, "grad_norm": 2.2662041806580597, "learning_rate": 6.551893449838397e-06, "loss": 0.903, "step": 11521 }, { "epoch": 0.4175999420100758, "grad_norm": 2.410070403653558, "learning_rate": 6.5513354974359625e-06, "loss": 1.09, "step": 11522 }, { "epoch": 0.4176361857127324, "grad_norm": 2.315299736650835, "learning_rate": 6.5507775236570645e-06, "loss": 0.8611, "step": 11523 }, { "epoch": 0.41767242941538907, "grad_norm": 2.0742757813096864, "learning_rate": 6.5502195285093905e-06, "loss": 1.1526, "step": 11524 }, { "epoch": 0.4177086731180457, "grad_norm": 2.2994777235078265, "learning_rate": 6.549661512000629e-06, "loss": 0.9232, "step": 11525 }, { "epoch": 0.4177449168207024, "grad_norm": 2.261222349263261, "learning_rate": 6.549103474138467e-06, "loss": 0.988, "step": 11526 }, { "epoch": 0.41778116052335906, "grad_norm": 2.378491455014138, "learning_rate": 6.5485454149305985e-06, "loss": 1.1025, "step": 11527 }, { "epoch": 0.4178174042260157, "grad_norm": 2.4794697451930374, "learning_rate": 6.547987334384709e-06, "loss": 0.8741, "step": 11528 }, { "epoch": 0.4178536479286724, "grad_norm": 2.5523101977502862, "learning_rate": 6.547429232508491e-06, "loss": 0.9009, "step": 11529 }, { "epoch": 0.41788989163132906, "grad_norm": 2.1863684505224836, "learning_rate": 6.546871109309633e-06, "loss": 0.8571, "step": 11530 }, { "epoch": 0.4179261353339857, "grad_norm": 2.350760649514048, "learning_rate": 6.546312964795827e-06, "loss": 1.0212, "step": 11531 }, { "epoch": 0.4179623790366424, "grad_norm": 2.0527107260418265, "learning_rate": 6.545754798974765e-06, "loss": 0.8035, "step": 11532 }, { "epoch": 0.41799862273929905, "grad_norm": 2.327503211308413, "learning_rate": 6.5451966118541354e-06, "loss": 1.0281, "step": 11533 }, { "epoch": 0.4180348664419557, "grad_norm": 2.5032659785164455, "learning_rate": 6.544638403441631e-06, "loss": 1.0333, "step": 11534 }, { "epoch": 0.4180711101446124, "grad_norm": 2.3811211726464494, "learning_rate": 6.544080173744944e-06, "loss": 0.8767, "step": 11535 }, { "epoch": 0.41810735384726905, "grad_norm": 2.316982381645907, "learning_rate": 6.543521922771765e-06, "loss": 1.0637, "step": 11536 }, { "epoch": 0.4181435975499257, "grad_norm": 2.0839970996981547, "learning_rate": 6.5429636505297885e-06, "loss": 0.7096, "step": 11537 }, { "epoch": 0.41817984125258234, "grad_norm": 2.4823612359671188, "learning_rate": 6.542405357026705e-06, "loss": 0.871, "step": 11538 }, { "epoch": 0.41821608495523904, "grad_norm": 2.1410311185169597, "learning_rate": 6.541847042270211e-06, "loss": 1.1059, "step": 11539 }, { "epoch": 0.4182523286578957, "grad_norm": 2.4229299806954323, "learning_rate": 6.541288706267994e-06, "loss": 0.8842, "step": 11540 }, { "epoch": 0.41828857236055234, "grad_norm": 2.182923245782773, "learning_rate": 6.540730349027753e-06, "loss": 0.8976, "step": 11541 }, { "epoch": 0.41832481606320904, "grad_norm": 2.241167447310417, "learning_rate": 6.5401719705571785e-06, "loss": 0.8743, "step": 11542 }, { "epoch": 0.4183610597658657, "grad_norm": 2.5156227355760734, "learning_rate": 6.539613570863968e-06, "loss": 0.8972, "step": 11543 }, { "epoch": 0.41839730346852233, "grad_norm": 2.240975655300763, "learning_rate": 6.539055149955809e-06, "loss": 1.0025, "step": 11544 }, { "epoch": 0.41843354717117903, "grad_norm": 2.1024267842463953, "learning_rate": 6.538496707840405e-06, "loss": 0.8682, "step": 11545 }, { "epoch": 0.4184697908738357, "grad_norm": 2.410661422642016, "learning_rate": 6.537938244525445e-06, "loss": 1.061, "step": 11546 }, { "epoch": 0.4185060345764923, "grad_norm": 2.2633810861078447, "learning_rate": 6.537379760018626e-06, "loss": 0.8762, "step": 11547 }, { "epoch": 0.41854227827914897, "grad_norm": 2.3104200164686923, "learning_rate": 6.536821254327643e-06, "loss": 0.9046, "step": 11548 }, { "epoch": 0.4185785219818057, "grad_norm": 1.907935599385622, "learning_rate": 6.536262727460195e-06, "loss": 0.8387, "step": 11549 }, { "epoch": 0.4186147656844623, "grad_norm": 2.3356354555579486, "learning_rate": 6.535704179423974e-06, "loss": 0.9071, "step": 11550 }, { "epoch": 0.41865100938711897, "grad_norm": 2.5683772480564224, "learning_rate": 6.535145610226678e-06, "loss": 1.0163, "step": 11551 }, { "epoch": 0.41868725308977567, "grad_norm": 2.303840167455717, "learning_rate": 6.534587019876005e-06, "loss": 0.8893, "step": 11552 }, { "epoch": 0.4187234967924323, "grad_norm": 2.3613034012942915, "learning_rate": 6.534028408379651e-06, "loss": 0.9289, "step": 11553 }, { "epoch": 0.41875974049508896, "grad_norm": 2.268577803263152, "learning_rate": 6.533469775745312e-06, "loss": 0.9248, "step": 11554 }, { "epoch": 0.41879598419774566, "grad_norm": 2.9545044861404683, "learning_rate": 6.5329111219806886e-06, "loss": 0.8747, "step": 11555 }, { "epoch": 0.4188322279004023, "grad_norm": 2.6117596046455294, "learning_rate": 6.5323524470934755e-06, "loss": 0.9693, "step": 11556 }, { "epoch": 0.41886847160305896, "grad_norm": 2.2263828270455717, "learning_rate": 6.5317937510913734e-06, "loss": 0.8169, "step": 11557 }, { "epoch": 0.41890471530571566, "grad_norm": 2.561326098650144, "learning_rate": 6.5312350339820785e-06, "loss": 0.9793, "step": 11558 }, { "epoch": 0.4189409590083723, "grad_norm": 2.194622783327591, "learning_rate": 6.530676295773293e-06, "loss": 0.9738, "step": 11559 }, { "epoch": 0.41897720271102895, "grad_norm": 2.4154772884787747, "learning_rate": 6.530117536472711e-06, "loss": 0.8686, "step": 11560 }, { "epoch": 0.4190134464136856, "grad_norm": 2.2556953368654242, "learning_rate": 6.529558756088037e-06, "loss": 0.9985, "step": 11561 }, { "epoch": 0.4190496901163423, "grad_norm": 2.2965493820006535, "learning_rate": 6.528999954626969e-06, "loss": 1.0548, "step": 11562 }, { "epoch": 0.41908593381899895, "grad_norm": 2.2443796430920946, "learning_rate": 6.528441132097205e-06, "loss": 0.9454, "step": 11563 }, { "epoch": 0.4191221775216556, "grad_norm": 2.4927191319592015, "learning_rate": 6.527882288506446e-06, "loss": 0.9147, "step": 11564 }, { "epoch": 0.4191584212243123, "grad_norm": 2.2389074136899274, "learning_rate": 6.527323423862394e-06, "loss": 0.8957, "step": 11565 }, { "epoch": 0.41919466492696894, "grad_norm": 2.52459805090069, "learning_rate": 6.526764538172747e-06, "loss": 0.9228, "step": 11566 }, { "epoch": 0.4192309086296256, "grad_norm": 2.2459084087424124, "learning_rate": 6.526205631445211e-06, "loss": 0.9793, "step": 11567 }, { "epoch": 0.4192671523322823, "grad_norm": 2.266805293595471, "learning_rate": 6.525646703687483e-06, "loss": 0.9567, "step": 11568 }, { "epoch": 0.41930339603493894, "grad_norm": 2.096729989468371, "learning_rate": 6.525087754907265e-06, "loss": 0.9471, "step": 11569 }, { "epoch": 0.4193396397375956, "grad_norm": 2.446230715019675, "learning_rate": 6.524528785112262e-06, "loss": 0.8481, "step": 11570 }, { "epoch": 0.41937588344025223, "grad_norm": 2.2600419256538795, "learning_rate": 6.523969794310174e-06, "loss": 0.986, "step": 11571 }, { "epoch": 0.41941212714290893, "grad_norm": 2.59774848483492, "learning_rate": 6.523410782508703e-06, "loss": 0.7978, "step": 11572 }, { "epoch": 0.4194483708455656, "grad_norm": 2.2511742078694854, "learning_rate": 6.522851749715553e-06, "loss": 0.8466, "step": 11573 }, { "epoch": 0.4194846145482222, "grad_norm": 2.2602858013918885, "learning_rate": 6.522292695938428e-06, "loss": 0.8962, "step": 11574 }, { "epoch": 0.4195208582508789, "grad_norm": 2.148950480650291, "learning_rate": 6.521733621185029e-06, "loss": 1.0173, "step": 11575 }, { "epoch": 0.41955710195353557, "grad_norm": 2.307935278102999, "learning_rate": 6.521174525463062e-06, "loss": 1.0365, "step": 11576 }, { "epoch": 0.4195933456561922, "grad_norm": 2.6133496662156532, "learning_rate": 6.520615408780229e-06, "loss": 0.8526, "step": 11577 }, { "epoch": 0.4196295893588489, "grad_norm": 2.482519684358065, "learning_rate": 6.520056271144236e-06, "loss": 0.99, "step": 11578 }, { "epoch": 0.41966583306150557, "grad_norm": 2.352067824969044, "learning_rate": 6.519497112562786e-06, "loss": 0.8214, "step": 11579 }, { "epoch": 0.4197020767641622, "grad_norm": 2.3365510934435574, "learning_rate": 6.518937933043586e-06, "loss": 0.9989, "step": 11580 }, { "epoch": 0.4197383204668189, "grad_norm": 2.474027611661485, "learning_rate": 6.518378732594339e-06, "loss": 0.8244, "step": 11581 }, { "epoch": 0.41977456416947556, "grad_norm": 2.4088756150945314, "learning_rate": 6.5178195112227525e-06, "loss": 0.8554, "step": 11582 }, { "epoch": 0.4198108078721322, "grad_norm": 2.279789294656221, "learning_rate": 6.517260268936529e-06, "loss": 0.9907, "step": 11583 }, { "epoch": 0.41984705157478885, "grad_norm": 2.322795461094096, "learning_rate": 6.51670100574338e-06, "loss": 0.861, "step": 11584 }, { "epoch": 0.41988329527744556, "grad_norm": 2.339579557015272, "learning_rate": 6.516141721651005e-06, "loss": 0.9504, "step": 11585 }, { "epoch": 0.4199195389801022, "grad_norm": 2.5549284426653958, "learning_rate": 6.515582416667115e-06, "loss": 0.8678, "step": 11586 }, { "epoch": 0.41995578268275885, "grad_norm": 2.5094714486449035, "learning_rate": 6.515023090799416e-06, "loss": 0.9326, "step": 11587 }, { "epoch": 0.41999202638541555, "grad_norm": 2.0797950315270706, "learning_rate": 6.514463744055616e-06, "loss": 0.7762, "step": 11588 }, { "epoch": 0.4200282700880722, "grad_norm": 2.3659634115296146, "learning_rate": 6.51390437644342e-06, "loss": 1.026, "step": 11589 }, { "epoch": 0.42006451379072884, "grad_norm": 2.4253536181061612, "learning_rate": 6.513344987970538e-06, "loss": 0.9963, "step": 11590 }, { "epoch": 0.42010075749338555, "grad_norm": 2.4159685956376458, "learning_rate": 6.512785578644678e-06, "loss": 0.9861, "step": 11591 }, { "epoch": 0.4201370011960422, "grad_norm": 2.3684523789573815, "learning_rate": 6.512226148473549e-06, "loss": 0.9133, "step": 11592 }, { "epoch": 0.42017324489869884, "grad_norm": 2.5237242425059714, "learning_rate": 6.511666697464855e-06, "loss": 0.9131, "step": 11593 }, { "epoch": 0.42020948860135554, "grad_norm": 2.2936559036948005, "learning_rate": 6.511107225626309e-06, "loss": 0.7789, "step": 11594 }, { "epoch": 0.4202457323040122, "grad_norm": 2.632195694335836, "learning_rate": 6.51054773296562e-06, "loss": 0.8965, "step": 11595 }, { "epoch": 0.42028197600666883, "grad_norm": 2.2264224527728107, "learning_rate": 6.5099882194904965e-06, "loss": 0.7527, "step": 11596 }, { "epoch": 0.4203182197093255, "grad_norm": 2.3468552704946606, "learning_rate": 6.509428685208648e-06, "loss": 0.8217, "step": 11597 }, { "epoch": 0.4203544634119822, "grad_norm": 2.254632227041288, "learning_rate": 6.508869130127786e-06, "loss": 0.9581, "step": 11598 }, { "epoch": 0.42039070711463883, "grad_norm": 3.022722526518185, "learning_rate": 6.508309554255618e-06, "loss": 0.8751, "step": 11599 }, { "epoch": 0.4204269508172955, "grad_norm": 2.535600287826692, "learning_rate": 6.50774995759986e-06, "loss": 0.8393, "step": 11600 }, { "epoch": 0.4204631945199522, "grad_norm": 2.118070730666108, "learning_rate": 6.507190340168217e-06, "loss": 0.8896, "step": 11601 }, { "epoch": 0.4204994382226088, "grad_norm": 2.430812865477134, "learning_rate": 6.506630701968404e-06, "loss": 0.9946, "step": 11602 }, { "epoch": 0.42053568192526547, "grad_norm": 2.2807932800495867, "learning_rate": 6.5060710430081285e-06, "loss": 1.027, "step": 11603 }, { "epoch": 0.42057192562792217, "grad_norm": 2.1968087003991603, "learning_rate": 6.505511363295108e-06, "loss": 0.9143, "step": 11604 }, { "epoch": 0.4206081693305788, "grad_norm": 2.4829156579560014, "learning_rate": 6.504951662837049e-06, "loss": 1.013, "step": 11605 }, { "epoch": 0.42064441303323546, "grad_norm": 2.2057502810006766, "learning_rate": 6.5043919416416675e-06, "loss": 0.7403, "step": 11606 }, { "epoch": 0.4206806567358921, "grad_norm": 2.291911812790186, "learning_rate": 6.5038321997166745e-06, "loss": 1.0058, "step": 11607 }, { "epoch": 0.4207169004385488, "grad_norm": 2.66669994663154, "learning_rate": 6.503272437069783e-06, "loss": 0.9047, "step": 11608 }, { "epoch": 0.42075314414120546, "grad_norm": 2.4362026338696516, "learning_rate": 6.502712653708707e-06, "loss": 1.0061, "step": 11609 }, { "epoch": 0.4207893878438621, "grad_norm": 2.1648131116538933, "learning_rate": 6.5021528496411605e-06, "loss": 0.9042, "step": 11610 }, { "epoch": 0.4208256315465188, "grad_norm": 2.664366624734622, "learning_rate": 6.501593024874855e-06, "loss": 0.8781, "step": 11611 }, { "epoch": 0.42086187524917545, "grad_norm": 2.2729274891976545, "learning_rate": 6.501033179417506e-06, "loss": 0.933, "step": 11612 }, { "epoch": 0.4208981189518321, "grad_norm": 2.6250429032410856, "learning_rate": 6.500473313276827e-06, "loss": 0.9739, "step": 11613 }, { "epoch": 0.4209343626544888, "grad_norm": 2.2863777743300697, "learning_rate": 6.499913426460533e-06, "loss": 1.1186, "step": 11614 }, { "epoch": 0.42097060635714545, "grad_norm": 2.0479642514041276, "learning_rate": 6.49935351897634e-06, "loss": 1.0877, "step": 11615 }, { "epoch": 0.4210068500598021, "grad_norm": 2.3996084569920253, "learning_rate": 6.498793590831962e-06, "loss": 1.0587, "step": 11616 }, { "epoch": 0.4210430937624588, "grad_norm": 2.1547278340438507, "learning_rate": 6.498233642035114e-06, "loss": 0.6764, "step": 11617 }, { "epoch": 0.42107933746511544, "grad_norm": 2.1842740705720107, "learning_rate": 6.497673672593514e-06, "loss": 0.9597, "step": 11618 }, { "epoch": 0.4211155811677721, "grad_norm": 1.9869089514533556, "learning_rate": 6.497113682514876e-06, "loss": 0.9144, "step": 11619 }, { "epoch": 0.42115182487042874, "grad_norm": 2.682655737748652, "learning_rate": 6.496553671806917e-06, "loss": 0.9863, "step": 11620 }, { "epoch": 0.42118806857308544, "grad_norm": 2.4088724906999843, "learning_rate": 6.495993640477353e-06, "loss": 1.0795, "step": 11621 }, { "epoch": 0.4212243122757421, "grad_norm": 1.9014834860292715, "learning_rate": 6.495433588533902e-06, "loss": 0.6341, "step": 11622 }, { "epoch": 0.42126055597839873, "grad_norm": 2.4037234345204848, "learning_rate": 6.494873515984279e-06, "loss": 1.1036, "step": 11623 }, { "epoch": 0.42129679968105543, "grad_norm": 2.6167518147635973, "learning_rate": 6.494313422836205e-06, "loss": 0.9421, "step": 11624 }, { "epoch": 0.4213330433837121, "grad_norm": 2.6941249525745805, "learning_rate": 6.493753309097395e-06, "loss": 0.9939, "step": 11625 }, { "epoch": 0.4213692870863687, "grad_norm": 2.1561693083705618, "learning_rate": 6.493193174775567e-06, "loss": 0.9746, "step": 11626 }, { "epoch": 0.42140553078902543, "grad_norm": 2.353224440111661, "learning_rate": 6.49263301987844e-06, "loss": 0.9328, "step": 11627 }, { "epoch": 0.4214417744916821, "grad_norm": 2.131672085043422, "learning_rate": 6.492072844413735e-06, "loss": 0.8629, "step": 11628 }, { "epoch": 0.4214780181943387, "grad_norm": 2.3118483808975667, "learning_rate": 6.4915126483891665e-06, "loss": 1.1467, "step": 11629 }, { "epoch": 0.4215142618969954, "grad_norm": 2.3077837122755365, "learning_rate": 6.490952431812457e-06, "loss": 0.8687, "step": 11630 }, { "epoch": 0.42155050559965207, "grad_norm": 2.1426112835561772, "learning_rate": 6.490392194691324e-06, "loss": 0.7555, "step": 11631 }, { "epoch": 0.4215867493023087, "grad_norm": 2.3391348358874082, "learning_rate": 6.4898319370334885e-06, "loss": 0.846, "step": 11632 }, { "epoch": 0.42162299300496536, "grad_norm": 2.285952522229174, "learning_rate": 6.489271658846669e-06, "loss": 1.0804, "step": 11633 }, { "epoch": 0.42165923670762206, "grad_norm": 1.9903218910690597, "learning_rate": 6.488711360138586e-06, "loss": 0.9772, "step": 11634 }, { "epoch": 0.4216954804102787, "grad_norm": 2.2574767420850193, "learning_rate": 6.4881510409169626e-06, "loss": 0.714, "step": 11635 }, { "epoch": 0.42173172411293536, "grad_norm": 2.331079111247287, "learning_rate": 6.487590701189516e-06, "loss": 0.8332, "step": 11636 }, { "epoch": 0.42176796781559206, "grad_norm": 2.4172813876724306, "learning_rate": 6.487030340963971e-06, "loss": 0.8391, "step": 11637 }, { "epoch": 0.4218042115182487, "grad_norm": 2.3089350317990944, "learning_rate": 6.486469960248048e-06, "loss": 0.9627, "step": 11638 }, { "epoch": 0.42184045522090535, "grad_norm": 2.124721003248941, "learning_rate": 6.4859095590494655e-06, "loss": 0.7833, "step": 11639 }, { "epoch": 0.42187669892356205, "grad_norm": 1.8944274662119729, "learning_rate": 6.485349137375949e-06, "loss": 0.955, "step": 11640 }, { "epoch": 0.4219129426262187, "grad_norm": 1.9956099128995868, "learning_rate": 6.4847886952352205e-06, "loss": 0.6541, "step": 11641 }, { "epoch": 0.42194918632887535, "grad_norm": 2.445440081810052, "learning_rate": 6.484228232635001e-06, "loss": 0.7931, "step": 11642 }, { "epoch": 0.421985430031532, "grad_norm": 2.538412921907097, "learning_rate": 6.483667749583014e-06, "loss": 1.0381, "step": 11643 }, { "epoch": 0.4220216737341887, "grad_norm": 2.5535214067018277, "learning_rate": 6.483107246086982e-06, "loss": 0.8766, "step": 11644 }, { "epoch": 0.42205791743684534, "grad_norm": 2.1164507087371662, "learning_rate": 6.482546722154629e-06, "loss": 0.8359, "step": 11645 }, { "epoch": 0.422094161139502, "grad_norm": 2.5904960381395226, "learning_rate": 6.48198617779368e-06, "loss": 1.018, "step": 11646 }, { "epoch": 0.4221304048421587, "grad_norm": 2.255093516513117, "learning_rate": 6.481425613011857e-06, "loss": 0.8777, "step": 11647 }, { "epoch": 0.42216664854481534, "grad_norm": 2.3354401939038025, "learning_rate": 6.480865027816886e-06, "loss": 0.9861, "step": 11648 }, { "epoch": 0.422202892247472, "grad_norm": 2.2715569123729313, "learning_rate": 6.48030442221649e-06, "loss": 0.7997, "step": 11649 }, { "epoch": 0.4222391359501287, "grad_norm": 2.4594515628072684, "learning_rate": 6.479743796218394e-06, "loss": 0.9611, "step": 11650 }, { "epoch": 0.42227537965278533, "grad_norm": 2.401209292798868, "learning_rate": 6.479183149830324e-06, "loss": 1.0681, "step": 11651 }, { "epoch": 0.422311623355442, "grad_norm": 1.9221065104987423, "learning_rate": 6.4786224830600034e-06, "loss": 0.8063, "step": 11652 }, { "epoch": 0.4223478670580987, "grad_norm": 2.217648787024411, "learning_rate": 6.47806179591516e-06, "loss": 1.0336, "step": 11653 }, { "epoch": 0.4223841107607553, "grad_norm": 2.180466759826214, "learning_rate": 6.47750108840352e-06, "loss": 0.9751, "step": 11654 }, { "epoch": 0.422420354463412, "grad_norm": 2.5530274834951405, "learning_rate": 6.476940360532807e-06, "loss": 0.8453, "step": 11655 }, { "epoch": 0.4224565981660686, "grad_norm": 2.427134597900423, "learning_rate": 6.476379612310749e-06, "loss": 0.9433, "step": 11656 }, { "epoch": 0.4224928418687253, "grad_norm": 2.2398996194010263, "learning_rate": 6.4758188437450745e-06, "loss": 0.7423, "step": 11657 }, { "epoch": 0.42252908557138197, "grad_norm": 2.417236528700514, "learning_rate": 6.475258054843509e-06, "loss": 0.9131, "step": 11658 }, { "epoch": 0.4225653292740386, "grad_norm": 2.299352377016769, "learning_rate": 6.474697245613778e-06, "loss": 0.9024, "step": 11659 }, { "epoch": 0.4226015729766953, "grad_norm": 1.9827086385074828, "learning_rate": 6.474136416063611e-06, "loss": 1.0324, "step": 11660 }, { "epoch": 0.42263781667935196, "grad_norm": 2.690770438042257, "learning_rate": 6.473575566200736e-06, "loss": 0.8297, "step": 11661 }, { "epoch": 0.4226740603820086, "grad_norm": 2.3594663074814846, "learning_rate": 6.473014696032881e-06, "loss": 0.9248, "step": 11662 }, { "epoch": 0.4227103040846653, "grad_norm": 2.482004465314049, "learning_rate": 6.472453805567776e-06, "loss": 1.1023, "step": 11663 }, { "epoch": 0.42274654778732196, "grad_norm": 2.6625830615825476, "learning_rate": 6.471892894813145e-06, "loss": 0.9135, "step": 11664 }, { "epoch": 0.4227827914899786, "grad_norm": 2.4873901675557417, "learning_rate": 6.471331963776721e-06, "loss": 0.8236, "step": 11665 }, { "epoch": 0.4228190351926353, "grad_norm": 2.2808324223213496, "learning_rate": 6.470771012466233e-06, "loss": 0.9033, "step": 11666 }, { "epoch": 0.42285527889529195, "grad_norm": 2.4673563276451507, "learning_rate": 6.470210040889411e-06, "loss": 0.8685, "step": 11667 }, { "epoch": 0.4228915225979486, "grad_norm": 2.2687241131526297, "learning_rate": 6.469649049053983e-06, "loss": 0.7003, "step": 11668 }, { "epoch": 0.42292776630060525, "grad_norm": 2.8983392331359057, "learning_rate": 6.46908803696768e-06, "loss": 0.8675, "step": 11669 }, { "epoch": 0.42296401000326195, "grad_norm": 2.224328212697915, "learning_rate": 6.468527004638231e-06, "loss": 0.9858, "step": 11670 }, { "epoch": 0.4230002537059186, "grad_norm": 2.766449604603161, "learning_rate": 6.46796595207337e-06, "loss": 0.8359, "step": 11671 }, { "epoch": 0.42303649740857524, "grad_norm": 2.569482549934663, "learning_rate": 6.467404879280823e-06, "loss": 1.0347, "step": 11672 }, { "epoch": 0.42307274111123194, "grad_norm": 2.393662722676261, "learning_rate": 6.466843786268327e-06, "loss": 0.9996, "step": 11673 }, { "epoch": 0.4231089848138886, "grad_norm": 2.4865087582154652, "learning_rate": 6.466282673043609e-06, "loss": 0.7846, "step": 11674 }, { "epoch": 0.42314522851654524, "grad_norm": 2.3247758547776107, "learning_rate": 6.465721539614405e-06, "loss": 0.9582, "step": 11675 }, { "epoch": 0.42318147221920194, "grad_norm": 2.1080929449057475, "learning_rate": 6.465160385988442e-06, "loss": 0.8876, "step": 11676 }, { "epoch": 0.4232177159218586, "grad_norm": 2.6330314153576606, "learning_rate": 6.464599212173457e-06, "loss": 0.9091, "step": 11677 }, { "epoch": 0.42325395962451523, "grad_norm": 2.103400001483706, "learning_rate": 6.464038018177179e-06, "loss": 0.7018, "step": 11678 }, { "epoch": 0.4232902033271719, "grad_norm": 2.3520828746351636, "learning_rate": 6.463476804007344e-06, "loss": 0.9162, "step": 11679 }, { "epoch": 0.4233264470298286, "grad_norm": 2.1964457124604415, "learning_rate": 6.4629155696716815e-06, "loss": 0.9698, "step": 11680 }, { "epoch": 0.4233626907324852, "grad_norm": 2.4231889683523606, "learning_rate": 6.46235431517793e-06, "loss": 0.8993, "step": 11681 }, { "epoch": 0.42339893443514187, "grad_norm": 2.254643607960818, "learning_rate": 6.461793040533818e-06, "loss": 1.0346, "step": 11682 }, { "epoch": 0.4234351781377986, "grad_norm": 2.333206468139936, "learning_rate": 6.461231745747083e-06, "loss": 0.8093, "step": 11683 }, { "epoch": 0.4234714218404552, "grad_norm": 2.738061898893053, "learning_rate": 6.4606704308254585e-06, "loss": 0.8766, "step": 11684 }, { "epoch": 0.42350766554311187, "grad_norm": 2.2934708913901654, "learning_rate": 6.460109095776679e-06, "loss": 0.8997, "step": 11685 }, { "epoch": 0.42354390924576857, "grad_norm": 2.411016919839852, "learning_rate": 6.4595477406084785e-06, "loss": 0.886, "step": 11686 }, { "epoch": 0.4235801529484252, "grad_norm": 2.3834419260371544, "learning_rate": 6.458986365328594e-06, "loss": 0.8529, "step": 11687 }, { "epoch": 0.42361639665108186, "grad_norm": 2.2234938085170852, "learning_rate": 6.458424969944759e-06, "loss": 0.6578, "step": 11688 }, { "epoch": 0.42365264035373856, "grad_norm": 2.036051040560526, "learning_rate": 6.4578635544647096e-06, "loss": 0.8042, "step": 11689 }, { "epoch": 0.4236888840563952, "grad_norm": 2.2756332942002997, "learning_rate": 6.4573021188961815e-06, "loss": 0.8341, "step": 11690 }, { "epoch": 0.42372512775905186, "grad_norm": 2.2101030562429536, "learning_rate": 6.456740663246913e-06, "loss": 0.9035, "step": 11691 }, { "epoch": 0.4237613714617085, "grad_norm": 2.2333969727339054, "learning_rate": 6.456179187524638e-06, "loss": 0.7734, "step": 11692 }, { "epoch": 0.4237976151643652, "grad_norm": 2.326689135653885, "learning_rate": 6.455617691737096e-06, "loss": 0.855, "step": 11693 }, { "epoch": 0.42383385886702185, "grad_norm": 2.2863843078430968, "learning_rate": 6.455056175892021e-06, "loss": 0.8304, "step": 11694 }, { "epoch": 0.4238701025696785, "grad_norm": 2.159390920078917, "learning_rate": 6.454494639997153e-06, "loss": 0.8553, "step": 11695 }, { "epoch": 0.4239063462723352, "grad_norm": 2.3890499250474693, "learning_rate": 6.453933084060227e-06, "loss": 0.9133, "step": 11696 }, { "epoch": 0.42394258997499185, "grad_norm": 2.369986421996221, "learning_rate": 6.453371508088984e-06, "loss": 0.9702, "step": 11697 }, { "epoch": 0.4239788336776485, "grad_norm": 2.246439004449623, "learning_rate": 6.45280991209116e-06, "loss": 0.8527, "step": 11698 }, { "epoch": 0.4240150773803052, "grad_norm": 2.4369886632449798, "learning_rate": 6.4522482960744945e-06, "loss": 0.9846, "step": 11699 }, { "epoch": 0.42405132108296184, "grad_norm": 2.2289320403759842, "learning_rate": 6.451686660046726e-06, "loss": 0.9248, "step": 11700 }, { "epoch": 0.4240875647856185, "grad_norm": 2.302754648392089, "learning_rate": 6.451125004015592e-06, "loss": 0.7092, "step": 11701 }, { "epoch": 0.4241238084882752, "grad_norm": 1.9474753195292145, "learning_rate": 6.450563327988832e-06, "loss": 0.9574, "step": 11702 }, { "epoch": 0.42416005219093184, "grad_norm": 2.1926091422524636, "learning_rate": 6.45000163197419e-06, "loss": 0.9746, "step": 11703 }, { "epoch": 0.4241962958935885, "grad_norm": 1.9991857533664719, "learning_rate": 6.449439915979399e-06, "loss": 0.8753, "step": 11704 }, { "epoch": 0.42423253959624513, "grad_norm": 2.7945182189625686, "learning_rate": 6.448878180012206e-06, "loss": 0.8002, "step": 11705 }, { "epoch": 0.42426878329890183, "grad_norm": 2.272486195969606, "learning_rate": 6.448316424080345e-06, "loss": 0.8847, "step": 11706 }, { "epoch": 0.4243050270015585, "grad_norm": 2.2566904839181543, "learning_rate": 6.447754648191562e-06, "loss": 0.7806, "step": 11707 }, { "epoch": 0.4243412707042151, "grad_norm": 2.5145098930734386, "learning_rate": 6.4471928523535945e-06, "loss": 1.0485, "step": 11708 }, { "epoch": 0.4243775144068718, "grad_norm": 2.0769560011532455, "learning_rate": 6.446631036574185e-06, "loss": 1.0517, "step": 11709 }, { "epoch": 0.42441375810952847, "grad_norm": 2.244124361543557, "learning_rate": 6.446069200861073e-06, "loss": 0.8527, "step": 11710 }, { "epoch": 0.4244500018121851, "grad_norm": 2.4535161936597643, "learning_rate": 6.445507345222004e-06, "loss": 0.9014, "step": 11711 }, { "epoch": 0.4244862455148418, "grad_norm": 2.3747849270379398, "learning_rate": 6.444945469664716e-06, "loss": 0.819, "step": 11712 }, { "epoch": 0.42452248921749847, "grad_norm": 2.419421284616633, "learning_rate": 6.444383574196955e-06, "loss": 0.9546, "step": 11713 }, { "epoch": 0.4245587329201551, "grad_norm": 2.277253271802804, "learning_rate": 6.4438216588264615e-06, "loss": 0.9599, "step": 11714 }, { "epoch": 0.42459497662281176, "grad_norm": 2.369866268370819, "learning_rate": 6.443259723560978e-06, "loss": 0.994, "step": 11715 }, { "epoch": 0.42463122032546846, "grad_norm": 2.350777015643106, "learning_rate": 6.44269776840825e-06, "loss": 0.9093, "step": 11716 }, { "epoch": 0.4246674640281251, "grad_norm": 2.379282682695695, "learning_rate": 6.4421357933760185e-06, "loss": 0.9568, "step": 11717 }, { "epoch": 0.42470370773078175, "grad_norm": 2.312846185065548, "learning_rate": 6.441573798472027e-06, "loss": 0.8742, "step": 11718 }, { "epoch": 0.42473995143343846, "grad_norm": 1.8952504653924338, "learning_rate": 6.441011783704021e-06, "loss": 0.627, "step": 11719 }, { "epoch": 0.4247761951360951, "grad_norm": 2.58303604438747, "learning_rate": 6.440449749079745e-06, "loss": 0.866, "step": 11720 }, { "epoch": 0.42481243883875175, "grad_norm": 2.2218139945971087, "learning_rate": 6.439887694606941e-06, "loss": 1.0714, "step": 11721 }, { "epoch": 0.42484868254140845, "grad_norm": 2.4299437325852744, "learning_rate": 6.439325620293356e-06, "loss": 1.1699, "step": 11722 }, { "epoch": 0.4248849262440651, "grad_norm": 2.563997128330848, "learning_rate": 6.438763526146735e-06, "loss": 1.065, "step": 11723 }, { "epoch": 0.42492116994672174, "grad_norm": 2.342447301729836, "learning_rate": 6.438201412174822e-06, "loss": 1.2077, "step": 11724 }, { "epoch": 0.42495741364937845, "grad_norm": 2.3458087834863255, "learning_rate": 6.437639278385364e-06, "loss": 0.952, "step": 11725 }, { "epoch": 0.4249936573520351, "grad_norm": 2.0678875624320012, "learning_rate": 6.437077124786106e-06, "loss": 0.8388, "step": 11726 }, { "epoch": 0.42502990105469174, "grad_norm": 2.4932066986642885, "learning_rate": 6.436514951384795e-06, "loss": 0.8135, "step": 11727 }, { "epoch": 0.4250661447573484, "grad_norm": 2.0536725440642445, "learning_rate": 6.435952758189175e-06, "loss": 0.8875, "step": 11728 }, { "epoch": 0.4251023884600051, "grad_norm": 2.58193504391635, "learning_rate": 6.4353905452069955e-06, "loss": 0.9671, "step": 11729 }, { "epoch": 0.42513863216266173, "grad_norm": 2.206516742106112, "learning_rate": 6.434828312446002e-06, "loss": 0.8681, "step": 11730 }, { "epoch": 0.4251748758653184, "grad_norm": 2.482895263118608, "learning_rate": 6.434266059913942e-06, "loss": 0.8878, "step": 11731 }, { "epoch": 0.4252111195679751, "grad_norm": 2.524081850840146, "learning_rate": 6.433703787618564e-06, "loss": 0.9483, "step": 11732 }, { "epoch": 0.42524736327063173, "grad_norm": 1.932868094047656, "learning_rate": 6.433141495567614e-06, "loss": 0.8489, "step": 11733 }, { "epoch": 0.4252836069732884, "grad_norm": 2.4713932035649098, "learning_rate": 6.432579183768841e-06, "loss": 1.0212, "step": 11734 }, { "epoch": 0.4253198506759451, "grad_norm": 2.628008602537849, "learning_rate": 6.432016852229994e-06, "loss": 0.9939, "step": 11735 }, { "epoch": 0.4253560943786017, "grad_norm": 2.230101322037761, "learning_rate": 6.431454500958822e-06, "loss": 0.794, "step": 11736 }, { "epoch": 0.42539233808125837, "grad_norm": 2.1673426892772865, "learning_rate": 6.43089212996307e-06, "loss": 0.8146, "step": 11737 }, { "epoch": 0.42542858178391507, "grad_norm": 2.297595756706157, "learning_rate": 6.430329739250492e-06, "loss": 0.9493, "step": 11738 }, { "epoch": 0.4254648254865717, "grad_norm": 2.4612225770825775, "learning_rate": 6.4297673288288335e-06, "loss": 0.8921, "step": 11739 }, { "epoch": 0.42550106918922836, "grad_norm": 2.011388220878692, "learning_rate": 6.429204898705846e-06, "loss": 0.9947, "step": 11740 }, { "epoch": 0.425537312891885, "grad_norm": 2.554681688083589, "learning_rate": 6.42864244888928e-06, "loss": 0.9258, "step": 11741 }, { "epoch": 0.4255735565945417, "grad_norm": 2.0480672764850545, "learning_rate": 6.4280799793868855e-06, "loss": 0.758, "step": 11742 }, { "epoch": 0.42560980029719836, "grad_norm": 2.4237063041129847, "learning_rate": 6.4275174902064105e-06, "loss": 0.9548, "step": 11743 }, { "epoch": 0.425646043999855, "grad_norm": 2.41115992377566, "learning_rate": 6.426954981355611e-06, "loss": 0.9333, "step": 11744 }, { "epoch": 0.4256822877025117, "grad_norm": 2.2249871260158463, "learning_rate": 6.426392452842233e-06, "loss": 0.9266, "step": 11745 }, { "epoch": 0.42571853140516835, "grad_norm": 2.5962197070484296, "learning_rate": 6.42582990467403e-06, "loss": 1.13, "step": 11746 }, { "epoch": 0.425754775107825, "grad_norm": 2.7006464511402584, "learning_rate": 6.425267336858753e-06, "loss": 0.781, "step": 11747 }, { "epoch": 0.4257910188104817, "grad_norm": 2.425779071339193, "learning_rate": 6.424704749404154e-06, "loss": 0.9316, "step": 11748 }, { "epoch": 0.42582726251313835, "grad_norm": 2.0776943236091006, "learning_rate": 6.424142142317985e-06, "loss": 0.7815, "step": 11749 }, { "epoch": 0.425863506215795, "grad_norm": 2.5050976111909478, "learning_rate": 6.4235795156079995e-06, "loss": 1.0984, "step": 11750 }, { "epoch": 0.42589974991845164, "grad_norm": 2.717543422091926, "learning_rate": 6.423016869281949e-06, "loss": 0.9193, "step": 11751 }, { "epoch": 0.42593599362110834, "grad_norm": 2.4587542555786777, "learning_rate": 6.422454203347586e-06, "loss": 0.9464, "step": 11752 }, { "epoch": 0.425972237323765, "grad_norm": 2.477129150963291, "learning_rate": 6.421891517812664e-06, "loss": 0.7932, "step": 11753 }, { "epoch": 0.42600848102642164, "grad_norm": 2.358197061250116, "learning_rate": 6.421328812684937e-06, "loss": 0.9139, "step": 11754 }, { "epoch": 0.42604472472907834, "grad_norm": 2.4077382564549383, "learning_rate": 6.42076608797216e-06, "loss": 0.9273, "step": 11755 }, { "epoch": 0.426080968431735, "grad_norm": 2.331838289952793, "learning_rate": 6.420203343682085e-06, "loss": 0.9256, "step": 11756 }, { "epoch": 0.42611721213439163, "grad_norm": 2.084498350566661, "learning_rate": 6.419640579822466e-06, "loss": 0.7609, "step": 11757 }, { "epoch": 0.42615345583704833, "grad_norm": 2.546493560708225, "learning_rate": 6.419077796401059e-06, "loss": 0.8365, "step": 11758 }, { "epoch": 0.426189699539705, "grad_norm": 2.159888516438434, "learning_rate": 6.418514993425617e-06, "loss": 0.9834, "step": 11759 }, { "epoch": 0.4262259432423616, "grad_norm": 2.4396763233211427, "learning_rate": 6.417952170903899e-06, "loss": 1.0491, "step": 11760 }, { "epoch": 0.42626218694501833, "grad_norm": 2.239597673543556, "learning_rate": 6.417389328843655e-06, "loss": 0.8361, "step": 11761 }, { "epoch": 0.426298430647675, "grad_norm": 2.420361719798992, "learning_rate": 6.416826467252643e-06, "loss": 0.8568, "step": 11762 }, { "epoch": 0.4263346743503316, "grad_norm": 2.179789159676769, "learning_rate": 6.41626358613862e-06, "loss": 0.9072, "step": 11763 }, { "epoch": 0.42637091805298827, "grad_norm": 2.8086503798012985, "learning_rate": 6.415700685509343e-06, "loss": 0.8042, "step": 11764 }, { "epoch": 0.42640716175564497, "grad_norm": 2.4707932137460764, "learning_rate": 6.415137765372565e-06, "loss": 0.9637, "step": 11765 }, { "epoch": 0.4264434054583016, "grad_norm": 2.294557407578764, "learning_rate": 6.414574825736046e-06, "loss": 0.9359, "step": 11766 }, { "epoch": 0.42647964916095826, "grad_norm": 2.3055527077524087, "learning_rate": 6.414011866607539e-06, "loss": 0.8294, "step": 11767 }, { "epoch": 0.42651589286361496, "grad_norm": 2.420281851014904, "learning_rate": 6.413448887994807e-06, "loss": 0.9137, "step": 11768 }, { "epoch": 0.4265521365662716, "grad_norm": 2.3190596245370156, "learning_rate": 6.4128858899056e-06, "loss": 0.9574, "step": 11769 }, { "epoch": 0.42658838026892826, "grad_norm": 2.200329450734323, "learning_rate": 6.412322872347683e-06, "loss": 0.7986, "step": 11770 }, { "epoch": 0.42662462397158496, "grad_norm": 2.4589361001436933, "learning_rate": 6.411759835328809e-06, "loss": 0.8972, "step": 11771 }, { "epoch": 0.4266608676742416, "grad_norm": 2.540066767546825, "learning_rate": 6.41119677885674e-06, "loss": 1.0588, "step": 11772 }, { "epoch": 0.42669711137689825, "grad_norm": 2.3091158967099212, "learning_rate": 6.4106337029392305e-06, "loss": 0.8181, "step": 11773 }, { "epoch": 0.42673335507955495, "grad_norm": 2.400813494108008, "learning_rate": 6.410070607584045e-06, "loss": 1.0038, "step": 11774 }, { "epoch": 0.4267695987822116, "grad_norm": 2.35011752559516, "learning_rate": 6.4095074927989374e-06, "loss": 1.1164, "step": 11775 }, { "epoch": 0.42680584248486825, "grad_norm": 2.4486011109591, "learning_rate": 6.4089443585916685e-06, "loss": 0.8483, "step": 11776 }, { "epoch": 0.4268420861875249, "grad_norm": 2.403137479011346, "learning_rate": 6.4083812049700005e-06, "loss": 1.1073, "step": 11777 }, { "epoch": 0.4268783298901816, "grad_norm": 2.466459612567179, "learning_rate": 6.4078180319416895e-06, "loss": 0.9162, "step": 11778 }, { "epoch": 0.42691457359283824, "grad_norm": 2.1373781239253686, "learning_rate": 6.407254839514496e-06, "loss": 1.0083, "step": 11779 }, { "epoch": 0.4269508172954949, "grad_norm": 2.1504405943028937, "learning_rate": 6.406691627696184e-06, "loss": 0.843, "step": 11780 }, { "epoch": 0.4269870609981516, "grad_norm": 2.434064657575726, "learning_rate": 6.406128396494512e-06, "loss": 0.9289, "step": 11781 }, { "epoch": 0.42702330470080824, "grad_norm": 2.1924216477537892, "learning_rate": 6.405565145917241e-06, "loss": 0.9465, "step": 11782 }, { "epoch": 0.4270595484034649, "grad_norm": 2.152079837100436, "learning_rate": 6.405001875972132e-06, "loss": 1.002, "step": 11783 }, { "epoch": 0.4270957921061216, "grad_norm": 2.2421365254662815, "learning_rate": 6.404438586666948e-06, "loss": 0.8964, "step": 11784 }, { "epoch": 0.42713203580877823, "grad_norm": 1.983621014977547, "learning_rate": 6.403875278009448e-06, "loss": 0.8685, "step": 11785 }, { "epoch": 0.4271682795114349, "grad_norm": 2.28504389018723, "learning_rate": 6.4033119500073985e-06, "loss": 0.8117, "step": 11786 }, { "epoch": 0.4272045232140915, "grad_norm": 2.252815786328205, "learning_rate": 6.402748602668556e-06, "loss": 0.9921, "step": 11787 }, { "epoch": 0.4272407669167482, "grad_norm": 2.6210286270964698, "learning_rate": 6.402185236000687e-06, "loss": 0.7568, "step": 11788 }, { "epoch": 0.4272770106194049, "grad_norm": 2.2685720797023845, "learning_rate": 6.401621850011552e-06, "loss": 0.8806, "step": 11789 }, { "epoch": 0.4273132543220615, "grad_norm": 2.175647244395752, "learning_rate": 6.401058444708918e-06, "loss": 0.8762, "step": 11790 }, { "epoch": 0.4273494980247182, "grad_norm": 2.345795479719872, "learning_rate": 6.4004950201005455e-06, "loss": 0.9738, "step": 11791 }, { "epoch": 0.42738574172737487, "grad_norm": 1.973316741580343, "learning_rate": 6.399931576194198e-06, "loss": 0.663, "step": 11792 }, { "epoch": 0.4274219854300315, "grad_norm": 2.4562209678291147, "learning_rate": 6.39936811299764e-06, "loss": 0.858, "step": 11793 }, { "epoch": 0.4274582291326882, "grad_norm": 2.3006166964840986, "learning_rate": 6.398804630518635e-06, "loss": 0.8258, "step": 11794 }, { "epoch": 0.42749447283534486, "grad_norm": 2.2956903263578408, "learning_rate": 6.3982411287649505e-06, "loss": 0.9826, "step": 11795 }, { "epoch": 0.4275307165380015, "grad_norm": 2.6043575448741203, "learning_rate": 6.397677607744348e-06, "loss": 0.9805, "step": 11796 }, { "epoch": 0.4275669602406582, "grad_norm": 2.492211503547765, "learning_rate": 6.397114067464593e-06, "loss": 1.1334, "step": 11797 }, { "epoch": 0.42760320394331486, "grad_norm": 2.023646381200144, "learning_rate": 6.39655050793345e-06, "loss": 0.8497, "step": 11798 }, { "epoch": 0.4276394476459715, "grad_norm": 2.397442895329967, "learning_rate": 6.395986929158686e-06, "loss": 0.9878, "step": 11799 }, { "epoch": 0.42767569134862815, "grad_norm": 2.3827710692360586, "learning_rate": 6.395423331148067e-06, "loss": 0.9465, "step": 11800 }, { "epoch": 0.42771193505128485, "grad_norm": 2.54333087786318, "learning_rate": 6.394859713909358e-06, "loss": 0.9912, "step": 11801 }, { "epoch": 0.4277481787539415, "grad_norm": 2.3662731009994924, "learning_rate": 6.394296077450327e-06, "loss": 0.9865, "step": 11802 }, { "epoch": 0.42778442245659815, "grad_norm": 2.3344669288587654, "learning_rate": 6.393732421778739e-06, "loss": 0.9284, "step": 11803 }, { "epoch": 0.42782066615925485, "grad_norm": 2.4336057580328383, "learning_rate": 6.39316874690236e-06, "loss": 0.8853, "step": 11804 }, { "epoch": 0.4278569098619115, "grad_norm": 2.377064217584979, "learning_rate": 6.3926050528289595e-06, "loss": 0.8731, "step": 11805 }, { "epoch": 0.42789315356456814, "grad_norm": 2.269855646116128, "learning_rate": 6.392041339566303e-06, "loss": 0.9838, "step": 11806 }, { "epoch": 0.42792939726722484, "grad_norm": 2.61943696835084, "learning_rate": 6.391477607122158e-06, "loss": 0.8565, "step": 11807 }, { "epoch": 0.4279656409698815, "grad_norm": 2.9056081841600085, "learning_rate": 6.390913855504294e-06, "loss": 0.8569, "step": 11808 }, { "epoch": 0.42800188467253814, "grad_norm": 2.260808683699651, "learning_rate": 6.3903500847204765e-06, "loss": 1.0131, "step": 11809 }, { "epoch": 0.42803812837519484, "grad_norm": 2.2679527747552983, "learning_rate": 6.389786294778477e-06, "loss": 0.9101, "step": 11810 }, { "epoch": 0.4280743720778515, "grad_norm": 2.0249390638718787, "learning_rate": 6.389222485686064e-06, "loss": 0.6843, "step": 11811 }, { "epoch": 0.42811061578050813, "grad_norm": 2.1519943541720896, "learning_rate": 6.388658657451004e-06, "loss": 0.8118, "step": 11812 }, { "epoch": 0.4281468594831648, "grad_norm": 2.2135125332923065, "learning_rate": 6.388094810081069e-06, "loss": 0.9003, "step": 11813 }, { "epoch": 0.4281831031858215, "grad_norm": 2.4782756345475128, "learning_rate": 6.3875309435840245e-06, "loss": 0.999, "step": 11814 }, { "epoch": 0.4282193468884781, "grad_norm": 2.533467179832793, "learning_rate": 6.386967057967645e-06, "loss": 1.019, "step": 11815 }, { "epoch": 0.42825559059113477, "grad_norm": 2.1579660097408104, "learning_rate": 6.386403153239696e-06, "loss": 0.9533, "step": 11816 }, { "epoch": 0.4282918342937915, "grad_norm": 2.0949177775239503, "learning_rate": 6.385839229407953e-06, "loss": 0.8449, "step": 11817 }, { "epoch": 0.4283280779964481, "grad_norm": 2.2680985434443444, "learning_rate": 6.3852752864801804e-06, "loss": 1.013, "step": 11818 }, { "epoch": 0.42836432169910477, "grad_norm": 2.1262630918110603, "learning_rate": 6.384711324464155e-06, "loss": 1.0135, "step": 11819 }, { "epoch": 0.42840056540176147, "grad_norm": 2.4375429602985608, "learning_rate": 6.384147343367643e-06, "loss": 0.9229, "step": 11820 }, { "epoch": 0.4284368091044181, "grad_norm": 2.2674515182649353, "learning_rate": 6.383583343198419e-06, "loss": 0.9686, "step": 11821 }, { "epoch": 0.42847305280707476, "grad_norm": 2.376652423607245, "learning_rate": 6.383019323964253e-06, "loss": 1.0591, "step": 11822 }, { "epoch": 0.4285092965097314, "grad_norm": 2.2956151204837916, "learning_rate": 6.3824552856729175e-06, "loss": 0.9205, "step": 11823 }, { "epoch": 0.4285455402123881, "grad_norm": 2.289594861219346, "learning_rate": 6.381891228332182e-06, "loss": 0.8689, "step": 11824 }, { "epoch": 0.42858178391504476, "grad_norm": 2.1397023693820603, "learning_rate": 6.381327151949823e-06, "loss": 0.8416, "step": 11825 }, { "epoch": 0.4286180276177014, "grad_norm": 2.595229505863993, "learning_rate": 6.38076305653361e-06, "loss": 0.8813, "step": 11826 }, { "epoch": 0.4286542713203581, "grad_norm": 2.359569699520315, "learning_rate": 6.380198942091319e-06, "loss": 0.9855, "step": 11827 }, { "epoch": 0.42869051502301475, "grad_norm": 2.3133417082845145, "learning_rate": 6.379634808630719e-06, "loss": 1.0812, "step": 11828 }, { "epoch": 0.4287267587256714, "grad_norm": 2.127453451702666, "learning_rate": 6.3790706561595885e-06, "loss": 0.8267, "step": 11829 }, { "epoch": 0.4287630024283281, "grad_norm": 2.4296565484434964, "learning_rate": 6.378506484685696e-06, "loss": 1.0607, "step": 11830 }, { "epoch": 0.42879924613098475, "grad_norm": 2.4352068994774654, "learning_rate": 6.377942294216819e-06, "loss": 0.9783, "step": 11831 }, { "epoch": 0.4288354898336414, "grad_norm": 2.374113531790678, "learning_rate": 6.377378084760731e-06, "loss": 0.9959, "step": 11832 }, { "epoch": 0.4288717335362981, "grad_norm": 2.555101019858195, "learning_rate": 6.3768138563252055e-06, "loss": 0.854, "step": 11833 }, { "epoch": 0.42890797723895474, "grad_norm": 2.546363789020011, "learning_rate": 6.376249608918017e-06, "loss": 0.8409, "step": 11834 }, { "epoch": 0.4289442209416114, "grad_norm": 2.170288643793868, "learning_rate": 6.375685342546942e-06, "loss": 1.1101, "step": 11835 }, { "epoch": 0.42898046464426803, "grad_norm": 2.5245015573594225, "learning_rate": 6.375121057219755e-06, "loss": 0.9509, "step": 11836 }, { "epoch": 0.42901670834692474, "grad_norm": 2.320130910028483, "learning_rate": 6.3745567529442306e-06, "loss": 0.9759, "step": 11837 }, { "epoch": 0.4290529520495814, "grad_norm": 2.435357949740996, "learning_rate": 6.373992429728147e-06, "loss": 0.8483, "step": 11838 }, { "epoch": 0.42908919575223803, "grad_norm": 2.4411668957685695, "learning_rate": 6.373428087579278e-06, "loss": 0.8349, "step": 11839 }, { "epoch": 0.42912543945489473, "grad_norm": 2.2655375247147256, "learning_rate": 6.3728637265053995e-06, "loss": 0.916, "step": 11840 }, { "epoch": 0.4291616831575514, "grad_norm": 2.680055918375968, "learning_rate": 6.37229934651429e-06, "loss": 1.0077, "step": 11841 }, { "epoch": 0.429197926860208, "grad_norm": 2.4297929668962186, "learning_rate": 6.3717349476137255e-06, "loss": 0.826, "step": 11842 }, { "epoch": 0.4292341705628647, "grad_norm": 2.364981163584141, "learning_rate": 6.371170529811483e-06, "loss": 0.9873, "step": 11843 }, { "epoch": 0.42927041426552137, "grad_norm": 2.5944922939603363, "learning_rate": 6.37060609311534e-06, "loss": 0.9266, "step": 11844 }, { "epoch": 0.429306657968178, "grad_norm": 2.1983060264124092, "learning_rate": 6.370041637533074e-06, "loss": 0.9226, "step": 11845 }, { "epoch": 0.4293429016708347, "grad_norm": 2.7054332902893927, "learning_rate": 6.369477163072462e-06, "loss": 0.9859, "step": 11846 }, { "epoch": 0.42937914537349137, "grad_norm": 2.5130521899178473, "learning_rate": 6.368912669741284e-06, "loss": 0.9013, "step": 11847 }, { "epoch": 0.429415389076148, "grad_norm": 2.522179099008489, "learning_rate": 6.368348157547316e-06, "loss": 0.8637, "step": 11848 }, { "epoch": 0.42945163277880466, "grad_norm": 2.248670653546731, "learning_rate": 6.367783626498339e-06, "loss": 0.9629, "step": 11849 }, { "epoch": 0.42948787648146136, "grad_norm": 2.4151779768273456, "learning_rate": 6.36721907660213e-06, "loss": 0.9252, "step": 11850 }, { "epoch": 0.429524120184118, "grad_norm": 2.252281836314402, "learning_rate": 6.36665450786647e-06, "loss": 0.7969, "step": 11851 }, { "epoch": 0.42956036388677465, "grad_norm": 2.206979658669651, "learning_rate": 6.366089920299138e-06, "loss": 0.8285, "step": 11852 }, { "epoch": 0.42959660758943136, "grad_norm": 2.343410589648549, "learning_rate": 6.365525313907912e-06, "loss": 1.0034, "step": 11853 }, { "epoch": 0.429632851292088, "grad_norm": 2.2534160535922547, "learning_rate": 6.364960688700572e-06, "loss": 1.0052, "step": 11854 }, { "epoch": 0.42966909499474465, "grad_norm": 2.186699149525834, "learning_rate": 6.364396044684901e-06, "loss": 0.8486, "step": 11855 }, { "epoch": 0.42970533869740135, "grad_norm": 2.141215419422768, "learning_rate": 6.363831381868677e-06, "loss": 1.0387, "step": 11856 }, { "epoch": 0.429741582400058, "grad_norm": 2.1626603309149077, "learning_rate": 6.363266700259681e-06, "loss": 0.9424, "step": 11857 }, { "epoch": 0.42977782610271464, "grad_norm": 2.3773754329295573, "learning_rate": 6.362701999865695e-06, "loss": 1.0131, "step": 11858 }, { "epoch": 0.4298140698053713, "grad_norm": 2.46029366685602, "learning_rate": 6.3621372806944994e-06, "loss": 0.8065, "step": 11859 }, { "epoch": 0.429850313508028, "grad_norm": 2.81149761123803, "learning_rate": 6.3615725427538745e-06, "loss": 0.8017, "step": 11860 }, { "epoch": 0.42988655721068464, "grad_norm": 2.3046618932910916, "learning_rate": 6.361007786051605e-06, "loss": 0.9987, "step": 11861 }, { "epoch": 0.4299228009133413, "grad_norm": 2.2892906057840556, "learning_rate": 6.360443010595472e-06, "loss": 0.9577, "step": 11862 }, { "epoch": 0.429959044615998, "grad_norm": 2.37826944217189, "learning_rate": 6.359878216393256e-06, "loss": 0.9823, "step": 11863 }, { "epoch": 0.42999528831865463, "grad_norm": 2.3717552448513373, "learning_rate": 6.359313403452741e-06, "loss": 0.8905, "step": 11864 }, { "epoch": 0.4300315320213113, "grad_norm": 2.021142142864723, "learning_rate": 6.358748571781708e-06, "loss": 0.9401, "step": 11865 }, { "epoch": 0.430067775723968, "grad_norm": 2.4218782729914206, "learning_rate": 6.358183721387943e-06, "loss": 0.9041, "step": 11866 }, { "epoch": 0.43010401942662463, "grad_norm": 2.5064161387365123, "learning_rate": 6.357618852279227e-06, "loss": 0.823, "step": 11867 }, { "epoch": 0.4301402631292813, "grad_norm": 2.543452356065503, "learning_rate": 6.357053964463343e-06, "loss": 0.9585, "step": 11868 }, { "epoch": 0.430176506831938, "grad_norm": 2.2286155932319107, "learning_rate": 6.3564890579480785e-06, "loss": 0.912, "step": 11869 }, { "epoch": 0.4302127505345946, "grad_norm": 2.1135340749860667, "learning_rate": 6.355924132741213e-06, "loss": 0.8494, "step": 11870 }, { "epoch": 0.43024899423725127, "grad_norm": 2.3620775013353326, "learning_rate": 6.355359188850535e-06, "loss": 1.0642, "step": 11871 }, { "epoch": 0.4302852379399079, "grad_norm": 2.2552945999364034, "learning_rate": 6.354794226283827e-06, "loss": 0.9329, "step": 11872 }, { "epoch": 0.4303214816425646, "grad_norm": 2.431171812713195, "learning_rate": 6.354229245048873e-06, "loss": 0.8955, "step": 11873 }, { "epoch": 0.43035772534522126, "grad_norm": 2.2205057664554313, "learning_rate": 6.353664245153459e-06, "loss": 0.7796, "step": 11874 }, { "epoch": 0.4303939690478779, "grad_norm": 2.6821201914695454, "learning_rate": 6.353099226605371e-06, "loss": 0.8991, "step": 11875 }, { "epoch": 0.4304302127505346, "grad_norm": 2.2588357922204865, "learning_rate": 6.352534189412392e-06, "loss": 0.8104, "step": 11876 }, { "epoch": 0.43046645645319126, "grad_norm": 2.4528870723871474, "learning_rate": 6.351969133582313e-06, "loss": 0.8642, "step": 11877 }, { "epoch": 0.4305027001558479, "grad_norm": 2.3288446792802655, "learning_rate": 6.351404059122914e-06, "loss": 0.9579, "step": 11878 }, { "epoch": 0.4305389438585046, "grad_norm": 2.3299543956455437, "learning_rate": 6.350838966041987e-06, "loss": 0.8672, "step": 11879 }, { "epoch": 0.43057518756116125, "grad_norm": 2.369947366416494, "learning_rate": 6.350273854347314e-06, "loss": 0.8343, "step": 11880 }, { "epoch": 0.4306114312638179, "grad_norm": 2.2826074066836486, "learning_rate": 6.3497087240466845e-06, "loss": 0.9796, "step": 11881 }, { "epoch": 0.43064767496647455, "grad_norm": 2.276375641132575, "learning_rate": 6.349143575147886e-06, "loss": 1.1587, "step": 11882 }, { "epoch": 0.43068391866913125, "grad_norm": 2.3963136146912176, "learning_rate": 6.348578407658704e-06, "loss": 0.8598, "step": 11883 }, { "epoch": 0.4307201623717879, "grad_norm": 2.073640670609811, "learning_rate": 6.348013221586927e-06, "loss": 0.7874, "step": 11884 }, { "epoch": 0.43075640607444454, "grad_norm": 2.2031302615112467, "learning_rate": 6.347448016940344e-06, "loss": 0.8731, "step": 11885 }, { "epoch": 0.43079264977710124, "grad_norm": 2.1874241025495853, "learning_rate": 6.346882793726741e-06, "loss": 0.8684, "step": 11886 }, { "epoch": 0.4308288934797579, "grad_norm": 2.7941855098898163, "learning_rate": 6.3463175519539085e-06, "loss": 0.8745, "step": 11887 }, { "epoch": 0.43086513718241454, "grad_norm": 2.492891499938595, "learning_rate": 6.3457522916296345e-06, "loss": 0.9702, "step": 11888 }, { "epoch": 0.43090138088507124, "grad_norm": 2.3017225232628507, "learning_rate": 6.345187012761708e-06, "loss": 0.8818, "step": 11889 }, { "epoch": 0.4309376245877279, "grad_norm": 2.521074143846501, "learning_rate": 6.344621715357917e-06, "loss": 1.0285, "step": 11890 }, { "epoch": 0.43097386829038453, "grad_norm": 2.1497605113989326, "learning_rate": 6.344056399426053e-06, "loss": 0.7809, "step": 11891 }, { "epoch": 0.43101011199304123, "grad_norm": 2.1631827040776574, "learning_rate": 6.343491064973903e-06, "loss": 0.8944, "step": 11892 }, { "epoch": 0.4310463556956979, "grad_norm": 2.122486190252711, "learning_rate": 6.3429257120092606e-06, "loss": 0.8569, "step": 11893 }, { "epoch": 0.4310825993983545, "grad_norm": 2.3816896192338173, "learning_rate": 6.3423603405399125e-06, "loss": 1.0822, "step": 11894 }, { "epoch": 0.4311188431010112, "grad_norm": 2.1812381730774435, "learning_rate": 6.341794950573651e-06, "loss": 0.9497, "step": 11895 }, { "epoch": 0.4311550868036679, "grad_norm": 2.175975854767357, "learning_rate": 6.341229542118268e-06, "loss": 0.8454, "step": 11896 }, { "epoch": 0.4311913305063245, "grad_norm": 2.278486322361323, "learning_rate": 6.340664115181551e-06, "loss": 0.9754, "step": 11897 }, { "epoch": 0.43122757420898117, "grad_norm": 2.359834841085208, "learning_rate": 6.340098669771295e-06, "loss": 0.8393, "step": 11898 }, { "epoch": 0.43126381791163787, "grad_norm": 2.3086822996111827, "learning_rate": 6.339533205895289e-06, "loss": 0.8742, "step": 11899 }, { "epoch": 0.4313000616142945, "grad_norm": 2.2620283861386383, "learning_rate": 6.338967723561327e-06, "loss": 0.9843, "step": 11900 }, { "epoch": 0.43133630531695116, "grad_norm": 2.4477969780544373, "learning_rate": 6.338402222777197e-06, "loss": 1.0208, "step": 11901 }, { "epoch": 0.43137254901960786, "grad_norm": 2.381317150628307, "learning_rate": 6.337836703550696e-06, "loss": 0.9235, "step": 11902 }, { "epoch": 0.4314087927222645, "grad_norm": 2.6612636978534803, "learning_rate": 6.337271165889612e-06, "loss": 1.0259, "step": 11903 }, { "epoch": 0.43144503642492116, "grad_norm": 2.2452429409376653, "learning_rate": 6.336705609801742e-06, "loss": 0.8966, "step": 11904 }, { "epoch": 0.43148128012757786, "grad_norm": 2.4923493498124545, "learning_rate": 6.336140035294877e-06, "loss": 0.895, "step": 11905 }, { "epoch": 0.4315175238302345, "grad_norm": 2.445229502731462, "learning_rate": 6.33557444237681e-06, "loss": 0.8563, "step": 11906 }, { "epoch": 0.43155376753289115, "grad_norm": 2.2458536232144475, "learning_rate": 6.335008831055334e-06, "loss": 1.0447, "step": 11907 }, { "epoch": 0.4315900112355478, "grad_norm": 2.4747129345695225, "learning_rate": 6.334443201338245e-06, "loss": 0.8464, "step": 11908 }, { "epoch": 0.4316262549382045, "grad_norm": 2.5138086835911215, "learning_rate": 6.333877553233336e-06, "loss": 0.8558, "step": 11909 }, { "epoch": 0.43166249864086115, "grad_norm": 2.748609267507101, "learning_rate": 6.333311886748401e-06, "loss": 0.8018, "step": 11910 }, { "epoch": 0.4316987423435178, "grad_norm": 2.4793203464565514, "learning_rate": 6.332746201891233e-06, "loss": 0.9007, "step": 11911 }, { "epoch": 0.4317349860461745, "grad_norm": 2.459894998739425, "learning_rate": 6.33218049866963e-06, "loss": 0.8544, "step": 11912 }, { "epoch": 0.43177122974883114, "grad_norm": 1.995846030652194, "learning_rate": 6.331614777091384e-06, "loss": 0.6756, "step": 11913 }, { "epoch": 0.4318074734514878, "grad_norm": 2.4480774106559227, "learning_rate": 6.331049037164293e-06, "loss": 0.8469, "step": 11914 }, { "epoch": 0.4318437171541445, "grad_norm": 2.2567054188165563, "learning_rate": 6.330483278896149e-06, "loss": 0.9837, "step": 11915 }, { "epoch": 0.43187996085680114, "grad_norm": 2.197560077292689, "learning_rate": 6.329917502294753e-06, "loss": 0.9195, "step": 11916 }, { "epoch": 0.4319162045594578, "grad_norm": 2.3553835940338246, "learning_rate": 6.329351707367895e-06, "loss": 0.9719, "step": 11917 }, { "epoch": 0.43195244826211443, "grad_norm": 2.1158147820240565, "learning_rate": 6.328785894123377e-06, "loss": 0.7259, "step": 11918 }, { "epoch": 0.43198869196477113, "grad_norm": 2.396204001935558, "learning_rate": 6.3282200625689935e-06, "loss": 1.0518, "step": 11919 }, { "epoch": 0.4320249356674278, "grad_norm": 2.2655069024509182, "learning_rate": 6.327654212712539e-06, "loss": 0.9788, "step": 11920 }, { "epoch": 0.4320611793700844, "grad_norm": 2.0798422324257793, "learning_rate": 6.327088344561813e-06, "loss": 0.8984, "step": 11921 }, { "epoch": 0.4320974230727411, "grad_norm": 14.89818401860946, "learning_rate": 6.326522458124613e-06, "loss": 1.2984, "step": 11922 }, { "epoch": 0.4321336667753978, "grad_norm": 2.3747106112373593, "learning_rate": 6.325956553408734e-06, "loss": 0.742, "step": 11923 }, { "epoch": 0.4321699104780544, "grad_norm": 2.4834671040947742, "learning_rate": 6.325390630421977e-06, "loss": 0.7959, "step": 11924 }, { "epoch": 0.4322061541807111, "grad_norm": 2.4102787330874427, "learning_rate": 6.3248246891721375e-06, "loss": 0.7687, "step": 11925 }, { "epoch": 0.43224239788336777, "grad_norm": 2.2234794939475537, "learning_rate": 6.324258729667017e-06, "loss": 0.8557, "step": 11926 }, { "epoch": 0.4322786415860244, "grad_norm": 2.637582368242489, "learning_rate": 6.32369275191441e-06, "loss": 1.0808, "step": 11927 }, { "epoch": 0.4323148852886811, "grad_norm": 2.3109191096997703, "learning_rate": 6.3231267559221184e-06, "loss": 0.9965, "step": 11928 }, { "epoch": 0.43235112899133776, "grad_norm": 2.4675425798685127, "learning_rate": 6.322560741697941e-06, "loss": 1.0322, "step": 11929 }, { "epoch": 0.4323873726939944, "grad_norm": 2.0544637318999497, "learning_rate": 6.321994709249676e-06, "loss": 0.8159, "step": 11930 }, { "epoch": 0.43242361639665106, "grad_norm": 2.3798334190064425, "learning_rate": 6.3214286585851225e-06, "loss": 0.8775, "step": 11931 }, { "epoch": 0.43245986009930776, "grad_norm": 2.376817538906413, "learning_rate": 6.3208625897120824e-06, "loss": 1.1231, "step": 11932 }, { "epoch": 0.4324961038019644, "grad_norm": 2.282123197452601, "learning_rate": 6.320296502638353e-06, "loss": 0.8922, "step": 11933 }, { "epoch": 0.43253234750462105, "grad_norm": 2.229522171677987, "learning_rate": 6.319730397371738e-06, "loss": 0.8005, "step": 11934 }, { "epoch": 0.43256859120727775, "grad_norm": 2.5149422656501113, "learning_rate": 6.319164273920035e-06, "loss": 0.8805, "step": 11935 }, { "epoch": 0.4326048349099344, "grad_norm": 2.47994207296651, "learning_rate": 6.318598132291047e-06, "loss": 1.052, "step": 11936 }, { "epoch": 0.43264107861259105, "grad_norm": 2.166856188842662, "learning_rate": 6.318031972492573e-06, "loss": 1.0074, "step": 11937 }, { "epoch": 0.43267732231524775, "grad_norm": 2.784720782782216, "learning_rate": 6.317465794532418e-06, "loss": 1.0539, "step": 11938 }, { "epoch": 0.4327135660179044, "grad_norm": 2.2047505438319055, "learning_rate": 6.31689959841838e-06, "loss": 1.0886, "step": 11939 }, { "epoch": 0.43274980972056104, "grad_norm": 2.4195251557470283, "learning_rate": 6.316333384158262e-06, "loss": 0.9337, "step": 11940 }, { "epoch": 0.43278605342321774, "grad_norm": 2.3504090889386524, "learning_rate": 6.315767151759864e-06, "loss": 0.9875, "step": 11941 }, { "epoch": 0.4328222971258744, "grad_norm": 2.3930189501194574, "learning_rate": 6.315200901230993e-06, "loss": 0.9795, "step": 11942 }, { "epoch": 0.43285854082853104, "grad_norm": 2.52888665587569, "learning_rate": 6.3146346325794475e-06, "loss": 1.097, "step": 11943 }, { "epoch": 0.4328947845311877, "grad_norm": 2.328399004362713, "learning_rate": 6.314068345813032e-06, "loss": 1.0006, "step": 11944 }, { "epoch": 0.4329310282338444, "grad_norm": 2.541773655488741, "learning_rate": 6.31350204093955e-06, "loss": 0.873, "step": 11945 }, { "epoch": 0.43296727193650103, "grad_norm": 2.171078395295112, "learning_rate": 6.312935717966803e-06, "loss": 0.8729, "step": 11946 }, { "epoch": 0.4330035156391577, "grad_norm": 2.263514918593749, "learning_rate": 6.312369376902597e-06, "loss": 0.8654, "step": 11947 }, { "epoch": 0.4330397593418144, "grad_norm": 2.428019601611984, "learning_rate": 6.311803017754735e-06, "loss": 0.9209, "step": 11948 }, { "epoch": 0.433076003044471, "grad_norm": 2.5767398407910624, "learning_rate": 6.311236640531021e-06, "loss": 1.1125, "step": 11949 }, { "epoch": 0.43311224674712767, "grad_norm": 2.3821493379167267, "learning_rate": 6.310670245239259e-06, "loss": 1.0317, "step": 11950 }, { "epoch": 0.4331484904497844, "grad_norm": 2.408827887127606, "learning_rate": 6.3101038318872534e-06, "loss": 0.9836, "step": 11951 }, { "epoch": 0.433184734152441, "grad_norm": 2.3841955551239753, "learning_rate": 6.309537400482808e-06, "loss": 0.8957, "step": 11952 }, { "epoch": 0.43322097785509767, "grad_norm": 2.469184964100353, "learning_rate": 6.308970951033731e-06, "loss": 1.0099, "step": 11953 }, { "epoch": 0.4332572215577543, "grad_norm": 2.336798207521109, "learning_rate": 6.308404483547825e-06, "loss": 0.9719, "step": 11954 }, { "epoch": 0.433293465260411, "grad_norm": 2.214393410325118, "learning_rate": 6.307837998032896e-06, "loss": 0.885, "step": 11955 }, { "epoch": 0.43332970896306766, "grad_norm": 2.3371294395374527, "learning_rate": 6.307271494496752e-06, "loss": 0.8696, "step": 11956 }, { "epoch": 0.4333659526657243, "grad_norm": 2.767825824226115, "learning_rate": 6.306704972947195e-06, "loss": 0.9037, "step": 11957 }, { "epoch": 0.433402196368381, "grad_norm": 2.4825107757178992, "learning_rate": 6.306138433392036e-06, "loss": 0.8683, "step": 11958 }, { "epoch": 0.43343844007103766, "grad_norm": 2.6234495601168617, "learning_rate": 6.3055718758390784e-06, "loss": 0.9416, "step": 11959 }, { "epoch": 0.4334746837736943, "grad_norm": 2.4706647711750405, "learning_rate": 6.305005300296131e-06, "loss": 1.0699, "step": 11960 }, { "epoch": 0.433510927476351, "grad_norm": 2.1839251278679668, "learning_rate": 6.304438706770998e-06, "loss": 0.8933, "step": 11961 }, { "epoch": 0.43354717117900765, "grad_norm": 2.4717963927994333, "learning_rate": 6.30387209527149e-06, "loss": 0.9079, "step": 11962 }, { "epoch": 0.4335834148816643, "grad_norm": 2.5181434252451034, "learning_rate": 6.3033054658054115e-06, "loss": 0.8293, "step": 11963 }, { "epoch": 0.433619658584321, "grad_norm": 2.432312362141265, "learning_rate": 6.302738818380573e-06, "loss": 0.9554, "step": 11964 }, { "epoch": 0.43365590228697765, "grad_norm": 2.321656093003618, "learning_rate": 6.302172153004781e-06, "loss": 0.7704, "step": 11965 }, { "epoch": 0.4336921459896343, "grad_norm": 2.430714665710761, "learning_rate": 6.301605469685845e-06, "loss": 0.8694, "step": 11966 }, { "epoch": 0.43372838969229094, "grad_norm": 2.4207346396894116, "learning_rate": 6.301038768431572e-06, "loss": 0.8694, "step": 11967 }, { "epoch": 0.43376463339494764, "grad_norm": 2.117101697530336, "learning_rate": 6.300472049249773e-06, "loss": 0.873, "step": 11968 }, { "epoch": 0.4338008770976043, "grad_norm": 2.3067284969420703, "learning_rate": 6.299905312148255e-06, "loss": 0.8792, "step": 11969 }, { "epoch": 0.43383712080026093, "grad_norm": 2.4418305751706915, "learning_rate": 6.299338557134827e-06, "loss": 0.9955, "step": 11970 }, { "epoch": 0.43387336450291764, "grad_norm": 2.448078445793662, "learning_rate": 6.2987717842173e-06, "loss": 0.853, "step": 11971 }, { "epoch": 0.4339096082055743, "grad_norm": 2.496421147256257, "learning_rate": 6.298204993403484e-06, "loss": 0.9364, "step": 11972 }, { "epoch": 0.43394585190823093, "grad_norm": 2.678657219802098, "learning_rate": 6.297638184701187e-06, "loss": 1.019, "step": 11973 }, { "epoch": 0.43398209561088763, "grad_norm": 2.369037414344127, "learning_rate": 6.297071358118221e-06, "loss": 0.9356, "step": 11974 }, { "epoch": 0.4340183393135443, "grad_norm": 2.375806211391506, "learning_rate": 6.296504513662396e-06, "loss": 0.9829, "step": 11975 }, { "epoch": 0.4340545830162009, "grad_norm": 2.6084418950381587, "learning_rate": 6.295937651341524e-06, "loss": 0.8044, "step": 11976 }, { "epoch": 0.4340908267188576, "grad_norm": 2.188098479943317, "learning_rate": 6.2953707711634145e-06, "loss": 0.8472, "step": 11977 }, { "epoch": 0.43412707042151427, "grad_norm": 2.4458349830039143, "learning_rate": 6.294803873135879e-06, "loss": 0.8404, "step": 11978 }, { "epoch": 0.4341633141241709, "grad_norm": 2.4316402424704218, "learning_rate": 6.29423695726673e-06, "loss": 1.059, "step": 11979 }, { "epoch": 0.43419955782682756, "grad_norm": 2.3253138851083452, "learning_rate": 6.293670023563776e-06, "loss": 0.8952, "step": 11980 }, { "epoch": 0.43423580152948427, "grad_norm": 2.4391566841941925, "learning_rate": 6.293103072034834e-06, "loss": 0.8521, "step": 11981 }, { "epoch": 0.4342720452321409, "grad_norm": 2.3050181790113773, "learning_rate": 6.292536102687712e-06, "loss": 0.9362, "step": 11982 }, { "epoch": 0.43430828893479756, "grad_norm": 2.722165328534278, "learning_rate": 6.291969115530226e-06, "loss": 1.0012, "step": 11983 }, { "epoch": 0.43434453263745426, "grad_norm": 2.1058554300461783, "learning_rate": 6.291402110570184e-06, "loss": 0.9302, "step": 11984 }, { "epoch": 0.4343807763401109, "grad_norm": 2.413980413636235, "learning_rate": 6.290835087815405e-06, "loss": 0.8079, "step": 11985 }, { "epoch": 0.43441702004276755, "grad_norm": 2.580717615971714, "learning_rate": 6.290268047273698e-06, "loss": 1.0575, "step": 11986 }, { "epoch": 0.43445326374542426, "grad_norm": 2.275450078390766, "learning_rate": 6.289700988952878e-06, "loss": 0.9878, "step": 11987 }, { "epoch": 0.4344895074480809, "grad_norm": 2.6437488440797914, "learning_rate": 6.289133912860757e-06, "loss": 0.908, "step": 11988 }, { "epoch": 0.43452575115073755, "grad_norm": 2.0092715728793857, "learning_rate": 6.288566819005153e-06, "loss": 0.9059, "step": 11989 }, { "epoch": 0.4345619948533942, "grad_norm": 2.2645589120369936, "learning_rate": 6.287999707393875e-06, "loss": 0.9867, "step": 11990 }, { "epoch": 0.4345982385560509, "grad_norm": 2.231323616380465, "learning_rate": 6.2874325780347414e-06, "loss": 0.8462, "step": 11991 }, { "epoch": 0.43463448225870754, "grad_norm": 2.1329204794981225, "learning_rate": 6.286865430935564e-06, "loss": 1.002, "step": 11992 }, { "epoch": 0.4346707259613642, "grad_norm": 2.2953772094294345, "learning_rate": 6.28629826610416e-06, "loss": 0.9889, "step": 11993 }, { "epoch": 0.4347069696640209, "grad_norm": 2.3398397067414223, "learning_rate": 6.2857310835483446e-06, "loss": 0.9236, "step": 11994 }, { "epoch": 0.43474321336667754, "grad_norm": 2.406605451821755, "learning_rate": 6.285163883275932e-06, "loss": 0.9504, "step": 11995 }, { "epoch": 0.4347794570693342, "grad_norm": 2.089991610626985, "learning_rate": 6.284596665294739e-06, "loss": 0.8805, "step": 11996 }, { "epoch": 0.4348157007719909, "grad_norm": 2.3125227155816153, "learning_rate": 6.2840294296125795e-06, "loss": 1.0115, "step": 11997 }, { "epoch": 0.43485194447464753, "grad_norm": 2.33026787515273, "learning_rate": 6.283462176237272e-06, "loss": 0.8552, "step": 11998 }, { "epoch": 0.4348881881773042, "grad_norm": 2.1341688108643027, "learning_rate": 6.282894905176631e-06, "loss": 1.0082, "step": 11999 }, { "epoch": 0.4349244318799609, "grad_norm": 2.473218776211268, "learning_rate": 6.282327616438474e-06, "loss": 1.2892, "step": 12000 }, { "epoch": 0.43496067558261753, "grad_norm": 2.155552304753461, "learning_rate": 6.281760310030619e-06, "loss": 0.8536, "step": 12001 }, { "epoch": 0.4349969192852742, "grad_norm": 2.2448124312077744, "learning_rate": 6.281192985960881e-06, "loss": 1.0445, "step": 12002 }, { "epoch": 0.4350331629879308, "grad_norm": 2.0861389440909486, "learning_rate": 6.280625644237079e-06, "loss": 0.8396, "step": 12003 }, { "epoch": 0.4350694066905875, "grad_norm": 2.5006406182935725, "learning_rate": 6.28005828486703e-06, "loss": 0.7685, "step": 12004 }, { "epoch": 0.43510565039324417, "grad_norm": 2.2412939444054345, "learning_rate": 6.279490907858553e-06, "loss": 0.8437, "step": 12005 }, { "epoch": 0.4351418940959008, "grad_norm": 2.093521166489187, "learning_rate": 6.278923513219465e-06, "loss": 0.8534, "step": 12006 }, { "epoch": 0.4351781377985575, "grad_norm": 2.366803419148868, "learning_rate": 6.278356100957585e-06, "loss": 0.8365, "step": 12007 }, { "epoch": 0.43521438150121416, "grad_norm": 2.2047846912161737, "learning_rate": 6.277788671080729e-06, "loss": 0.9319, "step": 12008 }, { "epoch": 0.4352506252038708, "grad_norm": 2.502497091144632, "learning_rate": 6.277221223596719e-06, "loss": 0.9781, "step": 12009 }, { "epoch": 0.4352868689065275, "grad_norm": 2.1031312323246585, "learning_rate": 6.276653758513371e-06, "loss": 0.8506, "step": 12010 }, { "epoch": 0.43532311260918416, "grad_norm": 2.2231832216399257, "learning_rate": 6.276086275838509e-06, "loss": 0.781, "step": 12011 }, { "epoch": 0.4353593563118408, "grad_norm": 2.89676560242722, "learning_rate": 6.2755187755799485e-06, "loss": 0.9571, "step": 12012 }, { "epoch": 0.4353956000144975, "grad_norm": 2.324651249763456, "learning_rate": 6.2749512577455116e-06, "loss": 0.8217, "step": 12013 }, { "epoch": 0.43543184371715415, "grad_norm": 2.311500285417331, "learning_rate": 6.274383722343016e-06, "loss": 0.6728, "step": 12014 }, { "epoch": 0.4354680874198108, "grad_norm": 2.2458206536931113, "learning_rate": 6.273816169380284e-06, "loss": 0.836, "step": 12015 }, { "epoch": 0.43550433112246745, "grad_norm": 2.257025325034368, "learning_rate": 6.273248598865137e-06, "loss": 0.911, "step": 12016 }, { "epoch": 0.43554057482512415, "grad_norm": 2.3382168311577014, "learning_rate": 6.272681010805394e-06, "loss": 0.9766, "step": 12017 }, { "epoch": 0.4355768185277808, "grad_norm": 2.267382236312611, "learning_rate": 6.272113405208875e-06, "loss": 0.8757, "step": 12018 }, { "epoch": 0.43561306223043744, "grad_norm": 2.4481813317441308, "learning_rate": 6.271545782083403e-06, "loss": 1.0166, "step": 12019 }, { "epoch": 0.43564930593309414, "grad_norm": 2.277418383284721, "learning_rate": 6.270978141436799e-06, "loss": 0.891, "step": 12020 }, { "epoch": 0.4356855496357508, "grad_norm": 2.059540739526212, "learning_rate": 6.270410483276887e-06, "loss": 1.0397, "step": 12021 }, { "epoch": 0.43572179333840744, "grad_norm": 2.424726341029484, "learning_rate": 6.269842807611484e-06, "loss": 0.86, "step": 12022 }, { "epoch": 0.43575803704106414, "grad_norm": 2.3227447664165304, "learning_rate": 6.269275114448416e-06, "loss": 0.9396, "step": 12023 }, { "epoch": 0.4357942807437208, "grad_norm": 2.190694994963713, "learning_rate": 6.268707403795504e-06, "loss": 1.0749, "step": 12024 }, { "epoch": 0.43583052444637743, "grad_norm": 2.086161439597016, "learning_rate": 6.268139675660572e-06, "loss": 0.9594, "step": 12025 }, { "epoch": 0.4358667681490341, "grad_norm": 2.413150805354321, "learning_rate": 6.267571930051444e-06, "loss": 0.9642, "step": 12026 }, { "epoch": 0.4359030118516908, "grad_norm": 2.061301571632944, "learning_rate": 6.26700416697594e-06, "loss": 0.9754, "step": 12027 }, { "epoch": 0.4359392555543474, "grad_norm": 2.007873620869013, "learning_rate": 6.266436386441883e-06, "loss": 0.6964, "step": 12028 }, { "epoch": 0.4359754992570041, "grad_norm": 2.2388480266108473, "learning_rate": 6.2658685884571e-06, "loss": 0.8639, "step": 12029 }, { "epoch": 0.4360117429596608, "grad_norm": 2.049086223218974, "learning_rate": 6.265300773029413e-06, "loss": 0.8381, "step": 12030 }, { "epoch": 0.4360479866623174, "grad_norm": 2.4572219121985017, "learning_rate": 6.264732940166648e-06, "loss": 1.0049, "step": 12031 }, { "epoch": 0.43608423036497407, "grad_norm": 2.264654969190447, "learning_rate": 6.264165089876626e-06, "loss": 0.8217, "step": 12032 }, { "epoch": 0.43612047406763077, "grad_norm": 2.5796454282422143, "learning_rate": 6.263597222167175e-06, "loss": 0.9469, "step": 12033 }, { "epoch": 0.4361567177702874, "grad_norm": 2.5752229898227763, "learning_rate": 6.263029337046118e-06, "loss": 1.0234, "step": 12034 }, { "epoch": 0.43619296147294406, "grad_norm": 2.3989827669538353, "learning_rate": 6.262461434521281e-06, "loss": 1.0873, "step": 12035 }, { "epoch": 0.43622920517560076, "grad_norm": 2.3609367152821994, "learning_rate": 6.261893514600489e-06, "loss": 0.9447, "step": 12036 }, { "epoch": 0.4362654488782574, "grad_norm": 2.1730139038682403, "learning_rate": 6.261325577291567e-06, "loss": 0.8866, "step": 12037 }, { "epoch": 0.43630169258091406, "grad_norm": 2.4012849637113933, "learning_rate": 6.260757622602341e-06, "loss": 0.9505, "step": 12038 }, { "epoch": 0.4363379362835707, "grad_norm": 2.3915533253302366, "learning_rate": 6.260189650540639e-06, "loss": 1.0123, "step": 12039 }, { "epoch": 0.4363741799862274, "grad_norm": 2.0732938315782077, "learning_rate": 6.259621661114283e-06, "loss": 0.7456, "step": 12040 }, { "epoch": 0.43641042368888405, "grad_norm": 2.149592727224705, "learning_rate": 6.259053654331106e-06, "loss": 0.6697, "step": 12041 }, { "epoch": 0.4364466673915407, "grad_norm": 2.6665551723615746, "learning_rate": 6.2584856301989274e-06, "loss": 0.9015, "step": 12042 }, { "epoch": 0.4364829110941974, "grad_norm": 2.5585690361750726, "learning_rate": 6.257917588725581e-06, "loss": 0.961, "step": 12043 }, { "epoch": 0.43651915479685405, "grad_norm": 2.270576903303344, "learning_rate": 6.257349529918888e-06, "loss": 0.8617, "step": 12044 }, { "epoch": 0.4365553984995107, "grad_norm": 2.250439151599363, "learning_rate": 6.256781453786681e-06, "loss": 0.9039, "step": 12045 }, { "epoch": 0.4365916422021674, "grad_norm": 2.2075135824714933, "learning_rate": 6.256213360336786e-06, "loss": 0.9191, "step": 12046 }, { "epoch": 0.43662788590482404, "grad_norm": 1.875776536618274, "learning_rate": 6.25564524957703e-06, "loss": 0.5607, "step": 12047 }, { "epoch": 0.4366641296074807, "grad_norm": 2.1995008228869923, "learning_rate": 6.255077121515241e-06, "loss": 0.8035, "step": 12048 }, { "epoch": 0.4367003733101374, "grad_norm": 2.833821738469162, "learning_rate": 6.254508976159249e-06, "loss": 0.8382, "step": 12049 }, { "epoch": 0.43673661701279404, "grad_norm": 2.094131779401155, "learning_rate": 6.253940813516881e-06, "loss": 0.9684, "step": 12050 }, { "epoch": 0.4367728607154507, "grad_norm": 2.224463268526457, "learning_rate": 6.2533726335959684e-06, "loss": 0.8499, "step": 12051 }, { "epoch": 0.43680910441810733, "grad_norm": 2.3286475194855294, "learning_rate": 6.252804436404338e-06, "loss": 0.8893, "step": 12052 }, { "epoch": 0.43684534812076403, "grad_norm": 2.4578607228037823, "learning_rate": 6.252236221949821e-06, "loss": 1.0284, "step": 12053 }, { "epoch": 0.4368815918234207, "grad_norm": 2.13751886503353, "learning_rate": 6.251667990240246e-06, "loss": 0.9791, "step": 12054 }, { "epoch": 0.4369178355260773, "grad_norm": 2.335781516068434, "learning_rate": 6.251099741283443e-06, "loss": 0.9336, "step": 12055 }, { "epoch": 0.436954079228734, "grad_norm": 2.573471865193563, "learning_rate": 6.250531475087242e-06, "loss": 0.8881, "step": 12056 }, { "epoch": 0.4369903229313907, "grad_norm": 2.360488457418363, "learning_rate": 6.249963191659474e-06, "loss": 0.7857, "step": 12057 }, { "epoch": 0.4370265666340473, "grad_norm": 2.407384396171372, "learning_rate": 6.249394891007968e-06, "loss": 0.9219, "step": 12058 }, { "epoch": 0.437062810336704, "grad_norm": 2.4383363760360948, "learning_rate": 6.248826573140557e-06, "loss": 1.0056, "step": 12059 }, { "epoch": 0.43709905403936067, "grad_norm": 2.426681053625923, "learning_rate": 6.248258238065071e-06, "loss": 1.0915, "step": 12060 }, { "epoch": 0.4371352977420173, "grad_norm": 2.379807994508351, "learning_rate": 6.247689885789341e-06, "loss": 0.9192, "step": 12061 }, { "epoch": 0.43717154144467396, "grad_norm": 1.9757321030912556, "learning_rate": 6.2471215163211995e-06, "loss": 0.8168, "step": 12062 }, { "epoch": 0.43720778514733066, "grad_norm": 2.3029434316571593, "learning_rate": 6.246553129668478e-06, "loss": 0.9124, "step": 12063 }, { "epoch": 0.4372440288499873, "grad_norm": 2.272171527053659, "learning_rate": 6.2459847258390085e-06, "loss": 1.0063, "step": 12064 }, { "epoch": 0.43728027255264396, "grad_norm": 2.269478137275157, "learning_rate": 6.245416304840621e-06, "loss": 0.9228, "step": 12065 }, { "epoch": 0.43731651625530066, "grad_norm": 2.3203613804775722, "learning_rate": 6.244847866681152e-06, "loss": 0.9184, "step": 12066 }, { "epoch": 0.4373527599579573, "grad_norm": 2.255376936817162, "learning_rate": 6.24427941136843e-06, "loss": 0.7933, "step": 12067 }, { "epoch": 0.43738900366061395, "grad_norm": 2.5973677821202243, "learning_rate": 6.2437109389102924e-06, "loss": 0.7557, "step": 12068 }, { "epoch": 0.43742524736327065, "grad_norm": 2.1298344136376333, "learning_rate": 6.24314244931457e-06, "loss": 0.9058, "step": 12069 }, { "epoch": 0.4374614910659273, "grad_norm": 2.266079530874311, "learning_rate": 6.242573942589096e-06, "loss": 0.9621, "step": 12070 }, { "epoch": 0.43749773476858395, "grad_norm": 2.4404745052091186, "learning_rate": 6.242005418741703e-06, "loss": 0.9966, "step": 12071 }, { "epoch": 0.43753397847124065, "grad_norm": 2.280913039860179, "learning_rate": 6.241436877780229e-06, "loss": 0.7878, "step": 12072 }, { "epoch": 0.4375702221738973, "grad_norm": 1.9654533206178522, "learning_rate": 6.240868319712505e-06, "loss": 0.8386, "step": 12073 }, { "epoch": 0.43760646587655394, "grad_norm": 2.424843019169831, "learning_rate": 6.240299744546366e-06, "loss": 0.8779, "step": 12074 }, { "epoch": 0.4376427095792106, "grad_norm": 2.2947232156840527, "learning_rate": 6.239731152289646e-06, "loss": 0.8321, "step": 12075 }, { "epoch": 0.4376789532818673, "grad_norm": 2.300268154982412, "learning_rate": 6.2391625429501814e-06, "loss": 0.992, "step": 12076 }, { "epoch": 0.43771519698452394, "grad_norm": 2.3850071235311443, "learning_rate": 6.238593916535805e-06, "loss": 0.8769, "step": 12077 }, { "epoch": 0.4377514406871806, "grad_norm": 2.4910532760653887, "learning_rate": 6.238025273054356e-06, "loss": 1.0184, "step": 12078 }, { "epoch": 0.4377876843898373, "grad_norm": 2.6121864105194064, "learning_rate": 6.237456612513665e-06, "loss": 1.0333, "step": 12079 }, { "epoch": 0.43782392809249393, "grad_norm": 2.088062383158766, "learning_rate": 6.236887934921572e-06, "loss": 0.8937, "step": 12080 }, { "epoch": 0.4378601717951506, "grad_norm": 2.402030812369269, "learning_rate": 6.236319240285909e-06, "loss": 1.0749, "step": 12081 }, { "epoch": 0.4378964154978073, "grad_norm": 2.243381847540903, "learning_rate": 6.235750528614516e-06, "loss": 0.8295, "step": 12082 }, { "epoch": 0.4379326592004639, "grad_norm": 2.4655716260449805, "learning_rate": 6.235181799915229e-06, "loss": 1.0105, "step": 12083 }, { "epoch": 0.43796890290312057, "grad_norm": 2.3841629300710756, "learning_rate": 6.234613054195883e-06, "loss": 1.0689, "step": 12084 }, { "epoch": 0.4380051466057773, "grad_norm": 2.3139566502933224, "learning_rate": 6.234044291464315e-06, "loss": 0.8924, "step": 12085 }, { "epoch": 0.4380413903084339, "grad_norm": 2.274408097100957, "learning_rate": 6.233475511728364e-06, "loss": 0.9349, "step": 12086 }, { "epoch": 0.43807763401109057, "grad_norm": 2.329210892504006, "learning_rate": 6.232906714995866e-06, "loss": 0.973, "step": 12087 }, { "epoch": 0.4381138777137472, "grad_norm": 2.1655843007335553, "learning_rate": 6.232337901274659e-06, "loss": 1.0418, "step": 12088 }, { "epoch": 0.4381501214164039, "grad_norm": 2.4624547579471745, "learning_rate": 6.231769070572581e-06, "loss": 0.9043, "step": 12089 }, { "epoch": 0.43818636511906056, "grad_norm": 2.3179110588885634, "learning_rate": 6.23120022289747e-06, "loss": 0.7489, "step": 12090 }, { "epoch": 0.4382226088217172, "grad_norm": 2.569370621955225, "learning_rate": 6.230631358257164e-06, "loss": 0.9055, "step": 12091 }, { "epoch": 0.4382588525243739, "grad_norm": 2.29537627535095, "learning_rate": 6.230062476659504e-06, "loss": 0.849, "step": 12092 }, { "epoch": 0.43829509622703056, "grad_norm": 2.4264052800199245, "learning_rate": 6.229493578112326e-06, "loss": 0.902, "step": 12093 }, { "epoch": 0.4383313399296872, "grad_norm": 2.258557547613561, "learning_rate": 6.228924662623471e-06, "loss": 1.0522, "step": 12094 }, { "epoch": 0.4383675836323439, "grad_norm": 2.545314654744792, "learning_rate": 6.228355730200776e-06, "loss": 1.066, "step": 12095 }, { "epoch": 0.43840382733500055, "grad_norm": 14.344630464194738, "learning_rate": 6.227786780852082e-06, "loss": 1.2328, "step": 12096 }, { "epoch": 0.4384400710376572, "grad_norm": 2.214133368053411, "learning_rate": 6.227217814585229e-06, "loss": 0.9463, "step": 12097 }, { "epoch": 0.43847631474031384, "grad_norm": 2.4561903263922327, "learning_rate": 6.226648831408058e-06, "loss": 0.7691, "step": 12098 }, { "epoch": 0.43851255844297055, "grad_norm": 2.362622410508599, "learning_rate": 6.2260798313284055e-06, "loss": 0.8532, "step": 12099 }, { "epoch": 0.4385488021456272, "grad_norm": 2.3465907889321573, "learning_rate": 6.225510814354115e-06, "loss": 0.888, "step": 12100 }, { "epoch": 0.43858504584828384, "grad_norm": 2.2684941979159405, "learning_rate": 6.224941780493028e-06, "loss": 0.8413, "step": 12101 }, { "epoch": 0.43862128955094054, "grad_norm": 2.1681539714402054, "learning_rate": 6.224372729752983e-06, "loss": 0.831, "step": 12102 }, { "epoch": 0.4386575332535972, "grad_norm": 2.509667188171699, "learning_rate": 6.223803662141823e-06, "loss": 0.8339, "step": 12103 }, { "epoch": 0.43869377695625383, "grad_norm": 2.3044715330166397, "learning_rate": 6.2232345776673895e-06, "loss": 0.8286, "step": 12104 }, { "epoch": 0.43873002065891054, "grad_norm": 2.123323319916894, "learning_rate": 6.222665476337522e-06, "loss": 0.729, "step": 12105 }, { "epoch": 0.4387662643615672, "grad_norm": 2.4224368845289743, "learning_rate": 6.222096358160064e-06, "loss": 0.8317, "step": 12106 }, { "epoch": 0.43880250806422383, "grad_norm": 2.5836697723122013, "learning_rate": 6.221527223142856e-06, "loss": 0.9306, "step": 12107 }, { "epoch": 0.43883875176688053, "grad_norm": 2.0901731158658614, "learning_rate": 6.220958071293744e-06, "loss": 1.0027, "step": 12108 }, { "epoch": 0.4388749954695372, "grad_norm": 2.1577267238404523, "learning_rate": 6.2203889026205655e-06, "loss": 0.8499, "step": 12109 }, { "epoch": 0.4389112391721938, "grad_norm": 2.2400548501680766, "learning_rate": 6.219819717131168e-06, "loss": 0.8046, "step": 12110 }, { "epoch": 0.43894748287485047, "grad_norm": 2.197431731631565, "learning_rate": 6.21925051483339e-06, "loss": 0.8414, "step": 12111 }, { "epoch": 0.43898372657750717, "grad_norm": 2.0643634600104876, "learning_rate": 6.2186812957350805e-06, "loss": 0.9419, "step": 12112 }, { "epoch": 0.4390199702801638, "grad_norm": 2.3739914093196606, "learning_rate": 6.218112059844079e-06, "loss": 0.8352, "step": 12113 }, { "epoch": 0.43905621398282046, "grad_norm": 2.326713715409401, "learning_rate": 6.217542807168229e-06, "loss": 0.6197, "step": 12114 }, { "epoch": 0.43909245768547717, "grad_norm": 2.3124228006171985, "learning_rate": 6.216973537715376e-06, "loss": 0.9186, "step": 12115 }, { "epoch": 0.4391287013881338, "grad_norm": 2.468374254703877, "learning_rate": 6.216404251493364e-06, "loss": 1.0137, "step": 12116 }, { "epoch": 0.43916494509079046, "grad_norm": 2.3986894845053106, "learning_rate": 6.215834948510037e-06, "loss": 1.0035, "step": 12117 }, { "epoch": 0.43920118879344716, "grad_norm": 2.227471941979477, "learning_rate": 6.215265628773239e-06, "loss": 0.9069, "step": 12118 }, { "epoch": 0.4392374324961038, "grad_norm": 2.3635400373862154, "learning_rate": 6.214696292290816e-06, "loss": 1.1333, "step": 12119 }, { "epoch": 0.43927367619876045, "grad_norm": 2.283510167929073, "learning_rate": 6.214126939070613e-06, "loss": 0.916, "step": 12120 }, { "epoch": 0.43930991990141716, "grad_norm": 2.2473739259284264, "learning_rate": 6.213557569120474e-06, "loss": 0.9913, "step": 12121 }, { "epoch": 0.4393461636040738, "grad_norm": 2.6838229978877277, "learning_rate": 6.212988182448247e-06, "loss": 0.9758, "step": 12122 }, { "epoch": 0.43938240730673045, "grad_norm": 2.492413119734779, "learning_rate": 6.212418779061776e-06, "loss": 0.8685, "step": 12123 }, { "epoch": 0.4394186510093871, "grad_norm": 2.184899009543897, "learning_rate": 6.2118493589689075e-06, "loss": 0.8383, "step": 12124 }, { "epoch": 0.4394548947120438, "grad_norm": 2.4280499174852794, "learning_rate": 6.211279922177486e-06, "loss": 0.8751, "step": 12125 }, { "epoch": 0.43949113841470044, "grad_norm": 2.2025635963514576, "learning_rate": 6.210710468695362e-06, "loss": 0.9147, "step": 12126 }, { "epoch": 0.4395273821173571, "grad_norm": 2.390392293347547, "learning_rate": 6.21014099853038e-06, "loss": 0.8895, "step": 12127 }, { "epoch": 0.4395636258200138, "grad_norm": 2.360216280081611, "learning_rate": 6.209571511690386e-06, "loss": 0.8575, "step": 12128 }, { "epoch": 0.43959986952267044, "grad_norm": 2.395035329704679, "learning_rate": 6.209002008183228e-06, "loss": 0.9522, "step": 12129 }, { "epoch": 0.4396361132253271, "grad_norm": 2.6515327463965783, "learning_rate": 6.208432488016753e-06, "loss": 0.8648, "step": 12130 }, { "epoch": 0.4396723569279838, "grad_norm": 2.447396379728462, "learning_rate": 6.20786295119881e-06, "loss": 0.9903, "step": 12131 }, { "epoch": 0.43970860063064043, "grad_norm": 2.383741002879617, "learning_rate": 6.207293397737246e-06, "loss": 1.0661, "step": 12132 }, { "epoch": 0.4397448443332971, "grad_norm": 2.4631913314091998, "learning_rate": 6.20672382763991e-06, "loss": 0.9587, "step": 12133 }, { "epoch": 0.4397810880359537, "grad_norm": 2.4679973694172364, "learning_rate": 6.206154240914649e-06, "loss": 0.9115, "step": 12134 }, { "epoch": 0.43981733173861043, "grad_norm": 2.3150809646293173, "learning_rate": 6.205584637569311e-06, "loss": 1.1159, "step": 12135 }, { "epoch": 0.4398535754412671, "grad_norm": 2.09781337068452, "learning_rate": 6.2050150176117454e-06, "loss": 1.0145, "step": 12136 }, { "epoch": 0.4398898191439237, "grad_norm": 2.2458667171649336, "learning_rate": 6.204445381049803e-06, "loss": 0.9969, "step": 12137 }, { "epoch": 0.4399260628465804, "grad_norm": 2.4548522931236634, "learning_rate": 6.203875727891331e-06, "loss": 0.9722, "step": 12138 }, { "epoch": 0.43996230654923707, "grad_norm": 2.28849585775705, "learning_rate": 6.2033060581441795e-06, "loss": 0.8312, "step": 12139 }, { "epoch": 0.4399985502518937, "grad_norm": 2.7342746595289413, "learning_rate": 6.202736371816198e-06, "loss": 1.015, "step": 12140 }, { "epoch": 0.4400347939545504, "grad_norm": 2.534995633578344, "learning_rate": 6.202166668915238e-06, "loss": 0.9071, "step": 12141 }, { "epoch": 0.44007103765720706, "grad_norm": 2.481210129017651, "learning_rate": 6.201596949449148e-06, "loss": 0.9968, "step": 12142 }, { "epoch": 0.4401072813598637, "grad_norm": 2.150474450965911, "learning_rate": 6.201027213425779e-06, "loss": 0.8555, "step": 12143 }, { "epoch": 0.4401435250625204, "grad_norm": 2.422550692899572, "learning_rate": 6.2004574608529796e-06, "loss": 0.9171, "step": 12144 }, { "epoch": 0.44017976876517706, "grad_norm": 2.1463387231488475, "learning_rate": 6.199887691738604e-06, "loss": 0.9034, "step": 12145 }, { "epoch": 0.4402160124678337, "grad_norm": 2.223887548112764, "learning_rate": 6.199317906090501e-06, "loss": 0.9867, "step": 12146 }, { "epoch": 0.44025225617049035, "grad_norm": 2.6720465865094774, "learning_rate": 6.198748103916523e-06, "loss": 1.0088, "step": 12147 }, { "epoch": 0.44028849987314705, "grad_norm": 2.472440278698661, "learning_rate": 6.19817828522452e-06, "loss": 0.9532, "step": 12148 }, { "epoch": 0.4403247435758037, "grad_norm": 2.597296179976901, "learning_rate": 6.197608450022347e-06, "loss": 1.0079, "step": 12149 }, { "epoch": 0.44036098727846035, "grad_norm": 2.4602537662053097, "learning_rate": 6.197038598317853e-06, "loss": 0.9119, "step": 12150 }, { "epoch": 0.44039723098111705, "grad_norm": 2.268425196439824, "learning_rate": 6.19646873011889e-06, "loss": 1.0413, "step": 12151 }, { "epoch": 0.4404334746837737, "grad_norm": 2.271322976698051, "learning_rate": 6.195898845433313e-06, "loss": 0.8888, "step": 12152 }, { "epoch": 0.44046971838643034, "grad_norm": 2.472341797074303, "learning_rate": 6.195328944268971e-06, "loss": 0.9567, "step": 12153 }, { "epoch": 0.44050596208908704, "grad_norm": 2.288149526416128, "learning_rate": 6.194759026633721e-06, "loss": 0.8156, "step": 12154 }, { "epoch": 0.4405422057917437, "grad_norm": 2.408002230035455, "learning_rate": 6.1941890925354135e-06, "loss": 0.8587, "step": 12155 }, { "epoch": 0.44057844949440034, "grad_norm": 2.0557536034359885, "learning_rate": 6.193619141981901e-06, "loss": 0.7691, "step": 12156 }, { "epoch": 0.44061469319705704, "grad_norm": 2.3391448661419165, "learning_rate": 6.193049174981042e-06, "loss": 0.8468, "step": 12157 }, { "epoch": 0.4406509368997137, "grad_norm": 2.182836748127844, "learning_rate": 6.192479191540683e-06, "loss": 0.751, "step": 12158 }, { "epoch": 0.44068718060237033, "grad_norm": 2.7897041326985765, "learning_rate": 6.191909191668686e-06, "loss": 1.0165, "step": 12159 }, { "epoch": 0.440723424305027, "grad_norm": 2.06452635724336, "learning_rate": 6.191339175372899e-06, "loss": 0.9344, "step": 12160 }, { "epoch": 0.4407596680076837, "grad_norm": 2.399635478414126, "learning_rate": 6.19076914266118e-06, "loss": 0.9932, "step": 12161 }, { "epoch": 0.4407959117103403, "grad_norm": 2.2821969087002016, "learning_rate": 6.19019909354138e-06, "loss": 1.031, "step": 12162 }, { "epoch": 0.440832155412997, "grad_norm": 2.7481079937055504, "learning_rate": 6.189629028021358e-06, "loss": 0.7365, "step": 12163 }, { "epoch": 0.4408683991156537, "grad_norm": 1.8913988594600568, "learning_rate": 6.189058946108965e-06, "loss": 0.7024, "step": 12164 }, { "epoch": 0.4409046428183103, "grad_norm": 2.097084744887039, "learning_rate": 6.188488847812062e-06, "loss": 0.9745, "step": 12165 }, { "epoch": 0.44094088652096697, "grad_norm": 2.1920116977564508, "learning_rate": 6.187918733138499e-06, "loss": 1.1225, "step": 12166 }, { "epoch": 0.44097713022362367, "grad_norm": 2.466702689088615, "learning_rate": 6.187348602096137e-06, "loss": 0.9606, "step": 12167 }, { "epoch": 0.4410133739262803, "grad_norm": 2.5160081829120524, "learning_rate": 6.186778454692827e-06, "loss": 1.1889, "step": 12168 }, { "epoch": 0.44104961762893696, "grad_norm": 2.163650154601735, "learning_rate": 6.186208290936428e-06, "loss": 0.878, "step": 12169 }, { "epoch": 0.4410858613315936, "grad_norm": 1.9965290834238578, "learning_rate": 6.1856381108347985e-06, "loss": 0.9915, "step": 12170 }, { "epoch": 0.4411221050342503, "grad_norm": 2.2828708216224642, "learning_rate": 6.185067914395791e-06, "loss": 0.8524, "step": 12171 }, { "epoch": 0.44115834873690696, "grad_norm": 2.295190954096309, "learning_rate": 6.184497701627263e-06, "loss": 0.9391, "step": 12172 }, { "epoch": 0.4411945924395636, "grad_norm": 2.3271401379981613, "learning_rate": 6.1839274725370755e-06, "loss": 0.9793, "step": 12173 }, { "epoch": 0.4412308361422203, "grad_norm": 2.4825295485897114, "learning_rate": 6.183357227133082e-06, "loss": 1.0173, "step": 12174 }, { "epoch": 0.44126707984487695, "grad_norm": 2.1871614366111083, "learning_rate": 6.1827869654231425e-06, "loss": 0.975, "step": 12175 }, { "epoch": 0.4413033235475336, "grad_norm": 2.2092372195588403, "learning_rate": 6.182216687415112e-06, "loss": 0.783, "step": 12176 }, { "epoch": 0.4413395672501903, "grad_norm": 2.6445785379766362, "learning_rate": 6.1816463931168536e-06, "loss": 1.0324, "step": 12177 }, { "epoch": 0.44137581095284695, "grad_norm": 2.4234141570636485, "learning_rate": 6.1810760825362196e-06, "loss": 1.0787, "step": 12178 }, { "epoch": 0.4414120546555036, "grad_norm": 2.6216612405068243, "learning_rate": 6.180505755681073e-06, "loss": 0.9163, "step": 12179 }, { "epoch": 0.4414482983581603, "grad_norm": 2.313275442670879, "learning_rate": 6.179935412559271e-06, "loss": 0.9979, "step": 12180 }, { "epoch": 0.44148454206081694, "grad_norm": 2.309341949651776, "learning_rate": 6.179365053178673e-06, "loss": 0.8196, "step": 12181 }, { "epoch": 0.4415207857634736, "grad_norm": 2.368925798820939, "learning_rate": 6.178794677547138e-06, "loss": 0.9275, "step": 12182 }, { "epoch": 0.44155702946613024, "grad_norm": 2.0416639626190727, "learning_rate": 6.178224285672524e-06, "loss": 0.8134, "step": 12183 }, { "epoch": 0.44159327316878694, "grad_norm": 2.298046248937874, "learning_rate": 6.177653877562691e-06, "loss": 0.8535, "step": 12184 }, { "epoch": 0.4416295168714436, "grad_norm": 2.296225000397311, "learning_rate": 6.177083453225502e-06, "loss": 0.8343, "step": 12185 }, { "epoch": 0.44166576057410023, "grad_norm": 2.1351638026795396, "learning_rate": 6.176513012668813e-06, "loss": 0.8552, "step": 12186 }, { "epoch": 0.44170200427675693, "grad_norm": 2.456777136155901, "learning_rate": 6.175942555900488e-06, "loss": 0.8098, "step": 12187 }, { "epoch": 0.4417382479794136, "grad_norm": 2.894980063104722, "learning_rate": 6.175372082928385e-06, "loss": 1.0335, "step": 12188 }, { "epoch": 0.4417744916820702, "grad_norm": 2.4864855339043714, "learning_rate": 6.174801593760365e-06, "loss": 0.9362, "step": 12189 }, { "epoch": 0.4418107353847269, "grad_norm": 2.1977343583019766, "learning_rate": 6.174231088404291e-06, "loss": 0.8855, "step": 12190 }, { "epoch": 0.4418469790873836, "grad_norm": 2.4360022887698567, "learning_rate": 6.173660566868023e-06, "loss": 0.9094, "step": 12191 }, { "epoch": 0.4418832227900402, "grad_norm": 2.50381864076936, "learning_rate": 6.17309002915942e-06, "loss": 0.9534, "step": 12192 }, { "epoch": 0.44191946649269687, "grad_norm": 2.2214452254973187, "learning_rate": 6.172519475286347e-06, "loss": 0.8079, "step": 12193 }, { "epoch": 0.44195571019535357, "grad_norm": 2.2926132664848393, "learning_rate": 6.171948905256665e-06, "loss": 0.9563, "step": 12194 }, { "epoch": 0.4419919538980102, "grad_norm": 2.426262430492759, "learning_rate": 6.171378319078236e-06, "loss": 0.9524, "step": 12195 }, { "epoch": 0.44202819760066686, "grad_norm": 2.3670414419536137, "learning_rate": 6.170807716758921e-06, "loss": 0.9856, "step": 12196 }, { "epoch": 0.44206444130332356, "grad_norm": 2.137153630767078, "learning_rate": 6.170237098306585e-06, "loss": 0.8243, "step": 12197 }, { "epoch": 0.4421006850059802, "grad_norm": 2.3615170386671984, "learning_rate": 6.169666463729088e-06, "loss": 1.0316, "step": 12198 }, { "epoch": 0.44213692870863686, "grad_norm": 2.414258716434326, "learning_rate": 6.169095813034296e-06, "loss": 0.7681, "step": 12199 }, { "epoch": 0.44217317241129356, "grad_norm": 2.287552996039885, "learning_rate": 6.168525146230071e-06, "loss": 0.8577, "step": 12200 }, { "epoch": 0.4422094161139502, "grad_norm": 2.5236639347195036, "learning_rate": 6.167954463324276e-06, "loss": 0.8426, "step": 12201 }, { "epoch": 0.44224565981660685, "grad_norm": 2.331275992220633, "learning_rate": 6.167383764324773e-06, "loss": 0.8427, "step": 12202 }, { "epoch": 0.44228190351926355, "grad_norm": 2.6577880717345206, "learning_rate": 6.166813049239429e-06, "loss": 1.0525, "step": 12203 }, { "epoch": 0.4423181472219202, "grad_norm": 2.563025840875484, "learning_rate": 6.166242318076107e-06, "loss": 1.0445, "step": 12204 }, { "epoch": 0.44235439092457685, "grad_norm": 2.380881659577942, "learning_rate": 6.165671570842672e-06, "loss": 0.9103, "step": 12205 }, { "epoch": 0.4423906346272335, "grad_norm": 1.9529602655355636, "learning_rate": 6.165100807546986e-06, "loss": 0.8234, "step": 12206 }, { "epoch": 0.4424268783298902, "grad_norm": 2.2450691655018495, "learning_rate": 6.164530028196917e-06, "loss": 0.969, "step": 12207 }, { "epoch": 0.44246312203254684, "grad_norm": 2.35482314420041, "learning_rate": 6.163959232800326e-06, "loss": 1.1897, "step": 12208 }, { "epoch": 0.4424993657352035, "grad_norm": 2.534754859945205, "learning_rate": 6.163388421365084e-06, "loss": 1.1505, "step": 12209 }, { "epoch": 0.4425356094378602, "grad_norm": 2.1461783308551596, "learning_rate": 6.162817593899052e-06, "loss": 0.9236, "step": 12210 }, { "epoch": 0.44257185314051684, "grad_norm": 4.856602453253133, "learning_rate": 6.1622467504100965e-06, "loss": 0.9129, "step": 12211 }, { "epoch": 0.4426080968431735, "grad_norm": 2.2689530752481963, "learning_rate": 6.161675890906082e-06, "loss": 1.0041, "step": 12212 }, { "epoch": 0.4426443405458302, "grad_norm": 2.341775741715343, "learning_rate": 6.161105015394879e-06, "loss": 1.0097, "step": 12213 }, { "epoch": 0.44268058424848683, "grad_norm": 2.5327235217133146, "learning_rate": 6.16053412388435e-06, "loss": 1.1387, "step": 12214 }, { "epoch": 0.4427168279511435, "grad_norm": 2.4023177454810427, "learning_rate": 6.159963216382363e-06, "loss": 0.9553, "step": 12215 }, { "epoch": 0.4427530716538002, "grad_norm": 2.3774533320107962, "learning_rate": 6.159392292896783e-06, "loss": 0.7102, "step": 12216 }, { "epoch": 0.4427893153564568, "grad_norm": 2.171101901164448, "learning_rate": 6.158821353435479e-06, "loss": 0.9288, "step": 12217 }, { "epoch": 0.44282555905911347, "grad_norm": 2.283956448662625, "learning_rate": 6.158250398006319e-06, "loss": 0.8275, "step": 12218 }, { "epoch": 0.4428618027617701, "grad_norm": 2.3218578374985714, "learning_rate": 6.157679426617168e-06, "loss": 1.0425, "step": 12219 }, { "epoch": 0.4428980464644268, "grad_norm": 2.3889383010652314, "learning_rate": 6.157108439275895e-06, "loss": 0.7189, "step": 12220 }, { "epoch": 0.44293429016708347, "grad_norm": 2.2859651180642433, "learning_rate": 6.156537435990368e-06, "loss": 0.9676, "step": 12221 }, { "epoch": 0.4429705338697401, "grad_norm": 2.3948156275571293, "learning_rate": 6.155966416768453e-06, "loss": 1.1066, "step": 12222 }, { "epoch": 0.4430067775723968, "grad_norm": 2.232575915730869, "learning_rate": 6.15539538161802e-06, "loss": 0.8695, "step": 12223 }, { "epoch": 0.44304302127505346, "grad_norm": 2.5438601191773396, "learning_rate": 6.154824330546939e-06, "loss": 1.0688, "step": 12224 }, { "epoch": 0.4430792649777101, "grad_norm": 2.4201429817035107, "learning_rate": 6.1542532635630745e-06, "loss": 0.9604, "step": 12225 }, { "epoch": 0.4431155086803668, "grad_norm": 2.542375548237719, "learning_rate": 6.153682180674301e-06, "loss": 0.7777, "step": 12226 }, { "epoch": 0.44315175238302346, "grad_norm": 2.3690212307132508, "learning_rate": 6.153111081888483e-06, "loss": 0.8559, "step": 12227 }, { "epoch": 0.4431879960856801, "grad_norm": 2.1760628355749274, "learning_rate": 6.152539967213492e-06, "loss": 0.9188, "step": 12228 }, { "epoch": 0.44322423978833675, "grad_norm": 2.3767868171328645, "learning_rate": 6.151968836657198e-06, "loss": 0.9092, "step": 12229 }, { "epoch": 0.44326048349099345, "grad_norm": 2.3498995256340844, "learning_rate": 6.151397690227471e-06, "loss": 0.8735, "step": 12230 }, { "epoch": 0.4432967271936501, "grad_norm": 2.4965043289509703, "learning_rate": 6.150826527932177e-06, "loss": 1.0586, "step": 12231 }, { "epoch": 0.44333297089630674, "grad_norm": 2.1812652160675605, "learning_rate": 6.150255349779193e-06, "loss": 0.9569, "step": 12232 }, { "epoch": 0.44336921459896345, "grad_norm": 2.5005278603443424, "learning_rate": 6.149684155776383e-06, "loss": 0.8188, "step": 12233 }, { "epoch": 0.4434054583016201, "grad_norm": 2.253683787635495, "learning_rate": 6.149112945931623e-06, "loss": 0.7733, "step": 12234 }, { "epoch": 0.44344170200427674, "grad_norm": 2.3838442764062284, "learning_rate": 6.148541720252779e-06, "loss": 0.7799, "step": 12235 }, { "epoch": 0.44347794570693344, "grad_norm": 2.0574127739744443, "learning_rate": 6.147970478747727e-06, "loss": 0.8294, "step": 12236 }, { "epoch": 0.4435141894095901, "grad_norm": 2.0626959258213358, "learning_rate": 6.147399221424334e-06, "loss": 1.0386, "step": 12237 }, { "epoch": 0.44355043311224673, "grad_norm": 2.4975627122550463, "learning_rate": 6.146827948290476e-06, "loss": 1.0166, "step": 12238 }, { "epoch": 0.44358667681490344, "grad_norm": 2.326880314546361, "learning_rate": 6.14625665935402e-06, "loss": 0.7508, "step": 12239 }, { "epoch": 0.4436229205175601, "grad_norm": 2.6387503454410033, "learning_rate": 6.145685354622843e-06, "loss": 0.8872, "step": 12240 }, { "epoch": 0.44365916422021673, "grad_norm": 2.419565213050095, "learning_rate": 6.145114034104813e-06, "loss": 0.914, "step": 12241 }, { "epoch": 0.4436954079228734, "grad_norm": 2.0527682621745007, "learning_rate": 6.144542697807805e-06, "loss": 0.8479, "step": 12242 }, { "epoch": 0.4437316516255301, "grad_norm": 2.2010548879651752, "learning_rate": 6.14397134573969e-06, "loss": 0.9353, "step": 12243 }, { "epoch": 0.4437678953281867, "grad_norm": 2.332964311207894, "learning_rate": 6.1433999779083435e-06, "loss": 0.7647, "step": 12244 }, { "epoch": 0.44380413903084337, "grad_norm": 2.5142505066898195, "learning_rate": 6.142828594321634e-06, "loss": 0.8869, "step": 12245 }, { "epoch": 0.44384038273350007, "grad_norm": 2.336411177385572, "learning_rate": 6.14225719498744e-06, "loss": 0.7339, "step": 12246 }, { "epoch": 0.4438766264361567, "grad_norm": 2.4199859004374153, "learning_rate": 6.141685779913632e-06, "loss": 0.9432, "step": 12247 }, { "epoch": 0.44391287013881336, "grad_norm": 2.4236009863139936, "learning_rate": 6.141114349108084e-06, "loss": 0.8537, "step": 12248 }, { "epoch": 0.44394911384147007, "grad_norm": 2.4024713159768507, "learning_rate": 6.14054290257867e-06, "loss": 0.8921, "step": 12249 }, { "epoch": 0.4439853575441267, "grad_norm": 2.4517390922795697, "learning_rate": 6.139971440333265e-06, "loss": 1.0073, "step": 12250 }, { "epoch": 0.44402160124678336, "grad_norm": 1.992670228115994, "learning_rate": 6.139399962379743e-06, "loss": 0.7669, "step": 12251 }, { "epoch": 0.44405784494944006, "grad_norm": 2.1661899720534845, "learning_rate": 6.138828468725979e-06, "loss": 0.7869, "step": 12252 }, { "epoch": 0.4440940886520967, "grad_norm": 2.5795254219575305, "learning_rate": 6.138256959379847e-06, "loss": 0.9514, "step": 12253 }, { "epoch": 0.44413033235475335, "grad_norm": 2.306421261022008, "learning_rate": 6.137685434349223e-06, "loss": 0.8391, "step": 12254 }, { "epoch": 0.44416657605741, "grad_norm": 2.276668634048079, "learning_rate": 6.13711389364198e-06, "loss": 0.9627, "step": 12255 }, { "epoch": 0.4442028197600667, "grad_norm": 2.2864925595947936, "learning_rate": 6.136542337265998e-06, "loss": 0.8521, "step": 12256 }, { "epoch": 0.44423906346272335, "grad_norm": 2.1418544076678687, "learning_rate": 6.135970765229149e-06, "loss": 0.719, "step": 12257 }, { "epoch": 0.44427530716538, "grad_norm": 2.380189558775488, "learning_rate": 6.13539917753931e-06, "loss": 0.8829, "step": 12258 }, { "epoch": 0.4443115508680367, "grad_norm": 2.431323329798152, "learning_rate": 6.134827574204356e-06, "loss": 1.0476, "step": 12259 }, { "epoch": 0.44434779457069334, "grad_norm": 2.2756929413551257, "learning_rate": 6.134255955232165e-06, "loss": 0.9016, "step": 12260 }, { "epoch": 0.44438403827335, "grad_norm": 2.379642780147676, "learning_rate": 6.133684320630613e-06, "loss": 1.0002, "step": 12261 }, { "epoch": 0.4444202819760067, "grad_norm": 2.4954261729763396, "learning_rate": 6.133112670407576e-06, "loss": 0.9486, "step": 12262 }, { "epoch": 0.44445652567866334, "grad_norm": 2.341011075616104, "learning_rate": 6.132541004570932e-06, "loss": 0.8998, "step": 12263 }, { "epoch": 0.44449276938132, "grad_norm": 2.2889992208075545, "learning_rate": 6.131969323128558e-06, "loss": 0.9712, "step": 12264 }, { "epoch": 0.44452901308397663, "grad_norm": 2.2753920678314916, "learning_rate": 6.131397626088331e-06, "loss": 0.975, "step": 12265 }, { "epoch": 0.44456525678663333, "grad_norm": 2.5705678771665745, "learning_rate": 6.13082591345813e-06, "loss": 1.0809, "step": 12266 }, { "epoch": 0.44460150048929, "grad_norm": 2.268075247509269, "learning_rate": 6.1302541852458315e-06, "loss": 0.8919, "step": 12267 }, { "epoch": 0.4446377441919466, "grad_norm": 2.364682534385433, "learning_rate": 6.129682441459314e-06, "loss": 0.9617, "step": 12268 }, { "epoch": 0.44467398789460333, "grad_norm": 2.210523785825763, "learning_rate": 6.129110682106454e-06, "loss": 0.9213, "step": 12269 }, { "epoch": 0.44471023159726, "grad_norm": 2.3145017306620663, "learning_rate": 6.128538907195134e-06, "loss": 0.9121, "step": 12270 }, { "epoch": 0.4447464752999166, "grad_norm": 2.3338048593332763, "learning_rate": 6.127967116733229e-06, "loss": 0.9512, "step": 12271 }, { "epoch": 0.4447827190025733, "grad_norm": 2.3136326922814656, "learning_rate": 6.127395310728619e-06, "loss": 1.0783, "step": 12272 }, { "epoch": 0.44481896270522997, "grad_norm": 2.3204633310279967, "learning_rate": 6.126823489189184e-06, "loss": 0.8724, "step": 12273 }, { "epoch": 0.4448552064078866, "grad_norm": 2.2770172753700955, "learning_rate": 6.126251652122803e-06, "loss": 0.7709, "step": 12274 }, { "epoch": 0.4448914501105433, "grad_norm": 2.317861090539778, "learning_rate": 6.125679799537354e-06, "loss": 0.9322, "step": 12275 }, { "epoch": 0.44492769381319996, "grad_norm": 2.225751583904303, "learning_rate": 6.125107931440721e-06, "loss": 1.0709, "step": 12276 }, { "epoch": 0.4449639375158566, "grad_norm": 2.602568975051777, "learning_rate": 6.124536047840779e-06, "loss": 0.9118, "step": 12277 }, { "epoch": 0.44500018121851326, "grad_norm": 2.556522436463154, "learning_rate": 6.123964148745412e-06, "loss": 0.8851, "step": 12278 }, { "epoch": 0.44503642492116996, "grad_norm": 2.376181721580716, "learning_rate": 6.123392234162497e-06, "loss": 0.9311, "step": 12279 }, { "epoch": 0.4450726686238266, "grad_norm": 2.2324859269585686, "learning_rate": 6.122820304099917e-06, "loss": 1.022, "step": 12280 }, { "epoch": 0.44510891232648325, "grad_norm": 2.198472864500283, "learning_rate": 6.122248358565551e-06, "loss": 1.0848, "step": 12281 }, { "epoch": 0.44514515602913995, "grad_norm": 2.1359045033812705, "learning_rate": 6.121676397567283e-06, "loss": 1.0282, "step": 12282 }, { "epoch": 0.4451813997317966, "grad_norm": 2.6129881076205304, "learning_rate": 6.121104421112992e-06, "loss": 0.9055, "step": 12283 }, { "epoch": 0.44521764343445325, "grad_norm": 2.7975170845909725, "learning_rate": 6.1205324292105604e-06, "loss": 0.8787, "step": 12284 }, { "epoch": 0.44525388713710995, "grad_norm": 2.5211813241050973, "learning_rate": 6.1199604218678684e-06, "loss": 1.0585, "step": 12285 }, { "epoch": 0.4452901308397666, "grad_norm": 2.051554503935856, "learning_rate": 6.119388399092802e-06, "loss": 0.906, "step": 12286 }, { "epoch": 0.44532637454242324, "grad_norm": 2.318438931184099, "learning_rate": 6.118816360893238e-06, "loss": 1.0468, "step": 12287 }, { "epoch": 0.44536261824507994, "grad_norm": 2.094458959916219, "learning_rate": 6.118244307277063e-06, "loss": 0.8179, "step": 12288 }, { "epoch": 0.4453988619477366, "grad_norm": 2.326996191075698, "learning_rate": 6.117672238252156e-06, "loss": 0.7388, "step": 12289 }, { "epoch": 0.44543510565039324, "grad_norm": 2.291779364016477, "learning_rate": 6.117100153826403e-06, "loss": 1.0042, "step": 12290 }, { "epoch": 0.4454713493530499, "grad_norm": 2.4876031995975216, "learning_rate": 6.1165280540076836e-06, "loss": 1.0038, "step": 12291 }, { "epoch": 0.4455075930557066, "grad_norm": 2.1579499962043998, "learning_rate": 6.115955938803884e-06, "loss": 0.7542, "step": 12292 }, { "epoch": 0.44554383675836323, "grad_norm": 2.2825060004571602, "learning_rate": 6.115383808222885e-06, "loss": 0.9304, "step": 12293 }, { "epoch": 0.4455800804610199, "grad_norm": 2.221049041471867, "learning_rate": 6.114811662272574e-06, "loss": 0.7738, "step": 12294 }, { "epoch": 0.4456163241636766, "grad_norm": 2.229678911824283, "learning_rate": 6.114239500960831e-06, "loss": 0.8316, "step": 12295 }, { "epoch": 0.4456525678663332, "grad_norm": 2.398391104553211, "learning_rate": 6.1136673242955426e-06, "loss": 0.9101, "step": 12296 }, { "epoch": 0.4456888115689899, "grad_norm": 2.2960823614371026, "learning_rate": 6.113095132284593e-06, "loss": 0.8487, "step": 12297 }, { "epoch": 0.4457250552716466, "grad_norm": 2.5479867515331724, "learning_rate": 6.112522924935862e-06, "loss": 0.9031, "step": 12298 }, { "epoch": 0.4457612989743032, "grad_norm": 2.2611917312086702, "learning_rate": 6.111950702257241e-06, "loss": 0.8993, "step": 12299 }, { "epoch": 0.44579754267695987, "grad_norm": 2.0741797855621193, "learning_rate": 6.11137846425661e-06, "loss": 0.9444, "step": 12300 }, { "epoch": 0.4458337863796165, "grad_norm": 2.4924906366364277, "learning_rate": 6.110806210941858e-06, "loss": 0.8078, "step": 12301 }, { "epoch": 0.4458700300822732, "grad_norm": 2.482940303153453, "learning_rate": 6.1102339423208655e-06, "loss": 0.9947, "step": 12302 }, { "epoch": 0.44590627378492986, "grad_norm": 2.1212821138007016, "learning_rate": 6.109661658401523e-06, "loss": 0.8755, "step": 12303 }, { "epoch": 0.4459425174875865, "grad_norm": 2.600254662784627, "learning_rate": 6.109089359191712e-06, "loss": 0.9131, "step": 12304 }, { "epoch": 0.4459787611902432, "grad_norm": 2.4149722694955407, "learning_rate": 6.108517044699323e-06, "loss": 0.82, "step": 12305 }, { "epoch": 0.44601500489289986, "grad_norm": 2.613574815505817, "learning_rate": 6.107944714932238e-06, "loss": 0.9405, "step": 12306 }, { "epoch": 0.4460512485955565, "grad_norm": 2.3661321440161593, "learning_rate": 6.1073723698983456e-06, "loss": 0.9031, "step": 12307 }, { "epoch": 0.4460874922982132, "grad_norm": 12.977062368462327, "learning_rate": 6.10680000960553e-06, "loss": 1.1711, "step": 12308 }, { "epoch": 0.44612373600086985, "grad_norm": 2.231029421369332, "learning_rate": 6.106227634061681e-06, "loss": 0.9931, "step": 12309 }, { "epoch": 0.4461599797035265, "grad_norm": 2.3311407482836364, "learning_rate": 6.105655243274683e-06, "loss": 0.7887, "step": 12310 }, { "epoch": 0.4461962234061832, "grad_norm": 2.5014527709622065, "learning_rate": 6.1050828372524265e-06, "loss": 0.9549, "step": 12311 }, { "epoch": 0.44623246710883985, "grad_norm": 2.3324288426385014, "learning_rate": 6.104510416002795e-06, "loss": 0.9755, "step": 12312 }, { "epoch": 0.4462687108114965, "grad_norm": 2.562211075062781, "learning_rate": 6.103937979533678e-06, "loss": 1.1149, "step": 12313 }, { "epoch": 0.44630495451415314, "grad_norm": 2.20248216884218, "learning_rate": 6.103365527852964e-06, "loss": 0.8057, "step": 12314 }, { "epoch": 0.44634119821680984, "grad_norm": 2.494473872630301, "learning_rate": 6.102793060968541e-06, "loss": 0.8233, "step": 12315 }, { "epoch": 0.4463774419194665, "grad_norm": 2.2212478896331946, "learning_rate": 6.102220578888296e-06, "loss": 0.8562, "step": 12316 }, { "epoch": 0.44641368562212314, "grad_norm": 2.186093451710278, "learning_rate": 6.101648081620118e-06, "loss": 0.9771, "step": 12317 }, { "epoch": 0.44644992932477984, "grad_norm": 2.158070106356489, "learning_rate": 6.101075569171895e-06, "loss": 0.8642, "step": 12318 }, { "epoch": 0.4464861730274365, "grad_norm": 2.387355147693129, "learning_rate": 6.100503041551517e-06, "loss": 0.7992, "step": 12319 }, { "epoch": 0.44652241673009313, "grad_norm": 2.090830426978386, "learning_rate": 6.099930498766872e-06, "loss": 0.8391, "step": 12320 }, { "epoch": 0.44655866043274983, "grad_norm": 13.631171909755727, "learning_rate": 6.099357940825852e-06, "loss": 1.074, "step": 12321 }, { "epoch": 0.4465949041354065, "grad_norm": 2.305991059325409, "learning_rate": 6.098785367736342e-06, "loss": 1.0361, "step": 12322 }, { "epoch": 0.4466311478380631, "grad_norm": 1.8701216854183917, "learning_rate": 6.098212779506236e-06, "loss": 0.712, "step": 12323 }, { "epoch": 0.4466673915407198, "grad_norm": 2.4336400193995695, "learning_rate": 6.09764017614342e-06, "loss": 0.8399, "step": 12324 }, { "epoch": 0.4467036352433765, "grad_norm": 2.3688152194896017, "learning_rate": 6.097067557655788e-06, "loss": 1.0376, "step": 12325 }, { "epoch": 0.4467398789460331, "grad_norm": 2.3385874588370283, "learning_rate": 6.096494924051228e-06, "loss": 0.9598, "step": 12326 }, { "epoch": 0.44677612264868977, "grad_norm": 2.1156774632072866, "learning_rate": 6.095922275337632e-06, "loss": 0.8547, "step": 12327 }, { "epoch": 0.44681236635134647, "grad_norm": 2.481326666956651, "learning_rate": 6.095349611522889e-06, "loss": 1.0624, "step": 12328 }, { "epoch": 0.4468486100540031, "grad_norm": 2.5634589303273168, "learning_rate": 6.094776932614891e-06, "loss": 0.7304, "step": 12329 }, { "epoch": 0.44688485375665976, "grad_norm": 2.2757581849280704, "learning_rate": 6.094204238621529e-06, "loss": 0.9087, "step": 12330 }, { "epoch": 0.44692109745931646, "grad_norm": 2.3885833123942506, "learning_rate": 6.093631529550694e-06, "loss": 1.0212, "step": 12331 }, { "epoch": 0.4469573411619731, "grad_norm": 2.293071145220323, "learning_rate": 6.093058805410277e-06, "loss": 0.9825, "step": 12332 }, { "epoch": 0.44699358486462976, "grad_norm": 1.9443929971319938, "learning_rate": 6.092486066208172e-06, "loss": 0.7434, "step": 12333 }, { "epoch": 0.44702982856728646, "grad_norm": 2.6396570379144064, "learning_rate": 6.09191331195227e-06, "loss": 0.9618, "step": 12334 }, { "epoch": 0.4470660722699431, "grad_norm": 2.430439287796238, "learning_rate": 6.091340542650463e-06, "loss": 0.9192, "step": 12335 }, { "epoch": 0.44710231597259975, "grad_norm": 2.2873468064360267, "learning_rate": 6.090767758310643e-06, "loss": 0.9535, "step": 12336 }, { "epoch": 0.4471385596752564, "grad_norm": 2.5695648011847925, "learning_rate": 6.090194958940702e-06, "loss": 1.1344, "step": 12337 }, { "epoch": 0.4471748033779131, "grad_norm": 2.3535248599498693, "learning_rate": 6.089622144548535e-06, "loss": 0.8261, "step": 12338 }, { "epoch": 0.44721104708056975, "grad_norm": 2.3446698452005466, "learning_rate": 6.089049315142033e-06, "loss": 0.844, "step": 12339 }, { "epoch": 0.4472472907832264, "grad_norm": 2.423373198927019, "learning_rate": 6.08847647072909e-06, "loss": 0.8209, "step": 12340 }, { "epoch": 0.4472835344858831, "grad_norm": 2.1528078083221796, "learning_rate": 6.0879036113176e-06, "loss": 0.966, "step": 12341 }, { "epoch": 0.44731977818853974, "grad_norm": 2.531667632403804, "learning_rate": 6.087330736915455e-06, "loss": 0.9426, "step": 12342 }, { "epoch": 0.4473560218911964, "grad_norm": 2.5200182494310894, "learning_rate": 6.086757847530553e-06, "loss": 0.9556, "step": 12343 }, { "epoch": 0.4473922655938531, "grad_norm": 2.3691975150596085, "learning_rate": 6.086184943170783e-06, "loss": 0.9014, "step": 12344 }, { "epoch": 0.44742850929650974, "grad_norm": 2.732237270783054, "learning_rate": 6.085612023844042e-06, "loss": 0.8199, "step": 12345 }, { "epoch": 0.4474647529991664, "grad_norm": 2.1593914812506028, "learning_rate": 6.0850390895582235e-06, "loss": 0.9426, "step": 12346 }, { "epoch": 0.4475009967018231, "grad_norm": 2.6232583041999185, "learning_rate": 6.084466140321222e-06, "loss": 0.9876, "step": 12347 }, { "epoch": 0.44753724040447973, "grad_norm": 2.100923000321575, "learning_rate": 6.083893176140934e-06, "loss": 0.7874, "step": 12348 }, { "epoch": 0.4475734841071364, "grad_norm": 2.1924525156372066, "learning_rate": 6.083320197025253e-06, "loss": 0.8328, "step": 12349 }, { "epoch": 0.447609727809793, "grad_norm": 2.0284394141197004, "learning_rate": 6.082747202982075e-06, "loss": 0.7981, "step": 12350 }, { "epoch": 0.4476459715124497, "grad_norm": 2.3145992617945446, "learning_rate": 6.082174194019295e-06, "loss": 0.8544, "step": 12351 }, { "epoch": 0.44768221521510637, "grad_norm": 2.250236091337628, "learning_rate": 6.081601170144809e-06, "loss": 0.9734, "step": 12352 }, { "epoch": 0.447718458917763, "grad_norm": 2.5186819434666043, "learning_rate": 6.081028131366513e-06, "loss": 0.9774, "step": 12353 }, { "epoch": 0.4477547026204197, "grad_norm": 2.2747030788118687, "learning_rate": 6.080455077692305e-06, "loss": 0.7767, "step": 12354 }, { "epoch": 0.44779094632307637, "grad_norm": 2.2471239885969108, "learning_rate": 6.079882009130078e-06, "loss": 0.8602, "step": 12355 }, { "epoch": 0.447827190025733, "grad_norm": 2.124464697126724, "learning_rate": 6.079308925687729e-06, "loss": 0.8352, "step": 12356 }, { "epoch": 0.4478634337283897, "grad_norm": 2.2669640585411415, "learning_rate": 6.078735827373158e-06, "loss": 0.8215, "step": 12357 }, { "epoch": 0.44789967743104636, "grad_norm": 2.4827468603984677, "learning_rate": 6.078162714194258e-06, "loss": 0.9384, "step": 12358 }, { "epoch": 0.447935921133703, "grad_norm": 2.4877833936898948, "learning_rate": 6.077589586158928e-06, "loss": 0.9218, "step": 12359 }, { "epoch": 0.4479721648363597, "grad_norm": 1.923573800274434, "learning_rate": 6.0770164432750655e-06, "loss": 0.7144, "step": 12360 }, { "epoch": 0.44800840853901636, "grad_norm": 2.7042137795609915, "learning_rate": 6.076443285550568e-06, "loss": 1.1321, "step": 12361 }, { "epoch": 0.448044652241673, "grad_norm": 2.3603191152118335, "learning_rate": 6.075870112993331e-06, "loss": 0.8401, "step": 12362 }, { "epoch": 0.44808089594432965, "grad_norm": 2.324133630753798, "learning_rate": 6.075296925611258e-06, "loss": 0.9926, "step": 12363 }, { "epoch": 0.44811713964698635, "grad_norm": 2.3390807670607647, "learning_rate": 6.074723723412243e-06, "loss": 1.0214, "step": 12364 }, { "epoch": 0.448153383349643, "grad_norm": 2.425891919453234, "learning_rate": 6.074150506404184e-06, "loss": 0.7694, "step": 12365 }, { "epoch": 0.44818962705229964, "grad_norm": 2.3010448601191267, "learning_rate": 6.07357727459498e-06, "loss": 1.0901, "step": 12366 }, { "epoch": 0.44822587075495635, "grad_norm": 2.3750662196783887, "learning_rate": 6.073004027992532e-06, "loss": 0.7595, "step": 12367 }, { "epoch": 0.448262114457613, "grad_norm": 2.534381015437596, "learning_rate": 6.0724307666047355e-06, "loss": 0.966, "step": 12368 }, { "epoch": 0.44829835816026964, "grad_norm": 2.0611325270475955, "learning_rate": 6.071857490439493e-06, "loss": 0.8341, "step": 12369 }, { "epoch": 0.44833460186292634, "grad_norm": 2.346695226632441, "learning_rate": 6.071284199504701e-06, "loss": 1.0155, "step": 12370 }, { "epoch": 0.448370845565583, "grad_norm": 2.2894984136640613, "learning_rate": 6.070710893808262e-06, "loss": 0.8773, "step": 12371 }, { "epoch": 0.44840708926823963, "grad_norm": 2.2602407627992838, "learning_rate": 6.070137573358075e-06, "loss": 0.9004, "step": 12372 }, { "epoch": 0.4484433329708963, "grad_norm": 2.218397759816794, "learning_rate": 6.069564238162039e-06, "loss": 0.8281, "step": 12373 }, { "epoch": 0.448479576673553, "grad_norm": 2.5245514248263707, "learning_rate": 6.068990888228055e-06, "loss": 1.0223, "step": 12374 }, { "epoch": 0.44851582037620963, "grad_norm": 2.421513711934627, "learning_rate": 6.068417523564022e-06, "loss": 0.8882, "step": 12375 }, { "epoch": 0.4485520640788663, "grad_norm": 2.7788080383193234, "learning_rate": 6.067844144177841e-06, "loss": 0.9001, "step": 12376 }, { "epoch": 0.448588307781523, "grad_norm": 2.24125111676504, "learning_rate": 6.067270750077417e-06, "loss": 0.73, "step": 12377 }, { "epoch": 0.4486245514841796, "grad_norm": 2.334772883451395, "learning_rate": 6.066697341270643e-06, "loss": 0.7098, "step": 12378 }, { "epoch": 0.44866079518683627, "grad_norm": 2.3074372637322145, "learning_rate": 6.066123917765428e-06, "loss": 0.8325, "step": 12379 }, { "epoch": 0.44869703888949297, "grad_norm": 2.0491327696372483, "learning_rate": 6.065550479569668e-06, "loss": 0.9125, "step": 12380 }, { "epoch": 0.4487332825921496, "grad_norm": 2.417971294916951, "learning_rate": 6.064977026691268e-06, "loss": 0.9448, "step": 12381 }, { "epoch": 0.44876952629480626, "grad_norm": 2.33286060786151, "learning_rate": 6.064403559138129e-06, "loss": 0.9468, "step": 12382 }, { "epoch": 0.44880576999746297, "grad_norm": 2.543565427207695, "learning_rate": 6.063830076918152e-06, "loss": 1.0317, "step": 12383 }, { "epoch": 0.4488420137001196, "grad_norm": 2.2507943801461634, "learning_rate": 6.063256580039241e-06, "loss": 0.82, "step": 12384 }, { "epoch": 0.44887825740277626, "grad_norm": 2.3055295570009906, "learning_rate": 6.062683068509295e-06, "loss": 0.9654, "step": 12385 }, { "epoch": 0.4489145011054329, "grad_norm": 2.297804381757917, "learning_rate": 6.062109542336222e-06, "loss": 0.792, "step": 12386 }, { "epoch": 0.4489507448080896, "grad_norm": 2.5610062065675683, "learning_rate": 6.061536001527919e-06, "loss": 0.9414, "step": 12387 }, { "epoch": 0.44898698851074625, "grad_norm": 2.400417130230434, "learning_rate": 6.060962446092295e-06, "loss": 1.1223, "step": 12388 }, { "epoch": 0.4490232322134029, "grad_norm": 2.353395540494164, "learning_rate": 6.060388876037246e-06, "loss": 1.0322, "step": 12389 }, { "epoch": 0.4490594759160596, "grad_norm": 2.161371928224498, "learning_rate": 6.059815291370683e-06, "loss": 0.8595, "step": 12390 }, { "epoch": 0.44909571961871625, "grad_norm": 2.495189796795995, "learning_rate": 6.059241692100505e-06, "loss": 0.8851, "step": 12391 }, { "epoch": 0.4491319633213729, "grad_norm": 2.345477520920811, "learning_rate": 6.058668078234618e-06, "loss": 0.8773, "step": 12392 }, { "epoch": 0.4491682070240296, "grad_norm": 2.128969501263462, "learning_rate": 6.058094449780925e-06, "loss": 0.7881, "step": 12393 }, { "epoch": 0.44920445072668624, "grad_norm": 2.5080621243430183, "learning_rate": 6.05752080674733e-06, "loss": 0.7945, "step": 12394 }, { "epoch": 0.4492406944293429, "grad_norm": 2.190563991300362, "learning_rate": 6.056947149141738e-06, "loss": 0.8133, "step": 12395 }, { "epoch": 0.4492769381319996, "grad_norm": 2.217347294632327, "learning_rate": 6.0563734769720525e-06, "loss": 0.9065, "step": 12396 }, { "epoch": 0.44931318183465624, "grad_norm": 2.376954345311861, "learning_rate": 6.05579979024618e-06, "loss": 0.8983, "step": 12397 }, { "epoch": 0.4493494255373129, "grad_norm": 2.6157807423994455, "learning_rate": 6.055226088972025e-06, "loss": 0.8753, "step": 12398 }, { "epoch": 0.44938566923996953, "grad_norm": 2.2699712337208533, "learning_rate": 6.054652373157493e-06, "loss": 0.8359, "step": 12399 }, { "epoch": 0.44942191294262623, "grad_norm": 2.4155639800369455, "learning_rate": 6.054078642810488e-06, "loss": 1.0736, "step": 12400 }, { "epoch": 0.4494581566452829, "grad_norm": 2.2576145122707336, "learning_rate": 6.053504897938917e-06, "loss": 0.7669, "step": 12401 }, { "epoch": 0.4494944003479395, "grad_norm": 2.7003430134414947, "learning_rate": 6.052931138550688e-06, "loss": 1.019, "step": 12402 }, { "epoch": 0.44953064405059623, "grad_norm": 2.24312572264175, "learning_rate": 6.0523573646537025e-06, "loss": 0.9233, "step": 12403 }, { "epoch": 0.4495668877532529, "grad_norm": 2.4074067792778355, "learning_rate": 6.05178357625587e-06, "loss": 0.8734, "step": 12404 }, { "epoch": 0.4496031314559095, "grad_norm": 2.2914950943337766, "learning_rate": 6.0512097733650945e-06, "loss": 0.9143, "step": 12405 }, { "epoch": 0.4496393751585662, "grad_norm": 2.4067581174579242, "learning_rate": 6.050635955989286e-06, "loss": 0.9127, "step": 12406 }, { "epoch": 0.44967561886122287, "grad_norm": 2.457375484945097, "learning_rate": 6.050062124136347e-06, "loss": 0.9794, "step": 12407 }, { "epoch": 0.4497118625638795, "grad_norm": 2.337481991754067, "learning_rate": 6.049488277814189e-06, "loss": 0.8577, "step": 12408 }, { "epoch": 0.44974810626653616, "grad_norm": 2.2999350098664526, "learning_rate": 6.048914417030716e-06, "loss": 0.7569, "step": 12409 }, { "epoch": 0.44978434996919286, "grad_norm": 2.162179222190901, "learning_rate": 6.048340541793838e-06, "loss": 0.8576, "step": 12410 }, { "epoch": 0.4498205936718495, "grad_norm": 2.3081123391292566, "learning_rate": 6.04776665211146e-06, "loss": 0.9397, "step": 12411 }, { "epoch": 0.44985683737450616, "grad_norm": 2.404640149577572, "learning_rate": 6.047192747991491e-06, "loss": 0.9312, "step": 12412 }, { "epoch": 0.44989308107716286, "grad_norm": 2.226628106038393, "learning_rate": 6.0466188294418415e-06, "loss": 0.7665, "step": 12413 }, { "epoch": 0.4499293247798195, "grad_norm": 2.1507861167160534, "learning_rate": 6.046044896470416e-06, "loss": 0.7321, "step": 12414 }, { "epoch": 0.44996556848247615, "grad_norm": 2.6125857235344663, "learning_rate": 6.045470949085124e-06, "loss": 1.0499, "step": 12415 }, { "epoch": 0.45000181218513285, "grad_norm": 2.308079363875708, "learning_rate": 6.044896987293875e-06, "loss": 0.8959, "step": 12416 }, { "epoch": 0.4500380558877895, "grad_norm": 2.4263385877209793, "learning_rate": 6.044323011104578e-06, "loss": 1.042, "step": 12417 }, { "epoch": 0.45007429959044615, "grad_norm": 2.4611395568268533, "learning_rate": 6.0437490205251416e-06, "loss": 0.921, "step": 12418 }, { "epoch": 0.45011054329310285, "grad_norm": 2.6024882015699093, "learning_rate": 6.043175015563474e-06, "loss": 0.9234, "step": 12419 }, { "epoch": 0.4501467869957595, "grad_norm": 2.448945590539416, "learning_rate": 6.042600996227485e-06, "loss": 0.9841, "step": 12420 }, { "epoch": 0.45018303069841614, "grad_norm": 2.0493371393790647, "learning_rate": 6.042026962525087e-06, "loss": 0.9127, "step": 12421 }, { "epoch": 0.4502192744010728, "grad_norm": 2.2372314514603984, "learning_rate": 6.041452914464187e-06, "loss": 0.8492, "step": 12422 }, { "epoch": 0.4502555181037295, "grad_norm": 2.2990522132439457, "learning_rate": 6.040878852052695e-06, "loss": 0.9398, "step": 12423 }, { "epoch": 0.45029176180638614, "grad_norm": 2.3400829008010064, "learning_rate": 6.040304775298522e-06, "loss": 0.9972, "step": 12424 }, { "epoch": 0.4503280055090428, "grad_norm": 2.3946660785178095, "learning_rate": 6.039730684209578e-06, "loss": 0.9849, "step": 12425 }, { "epoch": 0.4503642492116995, "grad_norm": 2.22717445994408, "learning_rate": 6.0391565787937744e-06, "loss": 1.0195, "step": 12426 }, { "epoch": 0.45040049291435613, "grad_norm": 2.302176265126153, "learning_rate": 6.038582459059021e-06, "loss": 0.9857, "step": 12427 }, { "epoch": 0.4504367366170128, "grad_norm": 2.1099409970792813, "learning_rate": 6.0380083250132316e-06, "loss": 0.918, "step": 12428 }, { "epoch": 0.4504729803196695, "grad_norm": 2.3258137056998467, "learning_rate": 6.037434176664312e-06, "loss": 0.8826, "step": 12429 }, { "epoch": 0.4505092240223261, "grad_norm": 2.4380717144922643, "learning_rate": 6.0368600140201795e-06, "loss": 1.0173, "step": 12430 }, { "epoch": 0.4505454677249828, "grad_norm": 2.3061881468186396, "learning_rate": 6.036285837088742e-06, "loss": 0.9639, "step": 12431 }, { "epoch": 0.4505817114276395, "grad_norm": 2.1757713485652137, "learning_rate": 6.035711645877913e-06, "loss": 0.9478, "step": 12432 }, { "epoch": 0.4506179551302961, "grad_norm": 2.286767994025993, "learning_rate": 6.035137440395603e-06, "loss": 0.8283, "step": 12433 }, { "epoch": 0.45065419883295277, "grad_norm": 2.2143925945877085, "learning_rate": 6.0345632206497266e-06, "loss": 0.9696, "step": 12434 }, { "epoch": 0.4506904425356094, "grad_norm": 2.5492111188468556, "learning_rate": 6.033988986648193e-06, "loss": 0.908, "step": 12435 }, { "epoch": 0.4507266862382661, "grad_norm": 2.6179526653806913, "learning_rate": 6.033414738398917e-06, "loss": 0.806, "step": 12436 }, { "epoch": 0.45076292994092276, "grad_norm": 2.5232628520699922, "learning_rate": 6.03284047590981e-06, "loss": 0.8985, "step": 12437 }, { "epoch": 0.4507991736435794, "grad_norm": 2.404675222951757, "learning_rate": 6.032266199188787e-06, "loss": 0.7873, "step": 12438 }, { "epoch": 0.4508354173462361, "grad_norm": 2.333303084675628, "learning_rate": 6.03169190824376e-06, "loss": 0.8806, "step": 12439 }, { "epoch": 0.45087166104889276, "grad_norm": 2.4177608711919363, "learning_rate": 6.0311176030826435e-06, "loss": 1.0152, "step": 12440 }, { "epoch": 0.4509079047515494, "grad_norm": 2.1377252849146227, "learning_rate": 6.03054328371335e-06, "loss": 0.8517, "step": 12441 }, { "epoch": 0.4509441484542061, "grad_norm": 2.2124406054342565, "learning_rate": 6.0299689501437916e-06, "loss": 0.68, "step": 12442 }, { "epoch": 0.45098039215686275, "grad_norm": 2.267165672374363, "learning_rate": 6.029394602381884e-06, "loss": 0.8508, "step": 12443 }, { "epoch": 0.4510166358595194, "grad_norm": 2.366043203569411, "learning_rate": 6.028820240435542e-06, "loss": 1.0106, "step": 12444 }, { "epoch": 0.45105287956217605, "grad_norm": 2.5260157247119635, "learning_rate": 6.028245864312679e-06, "loss": 1.0035, "step": 12445 }, { "epoch": 0.45108912326483275, "grad_norm": 2.3434085346647504, "learning_rate": 6.027671474021211e-06, "loss": 1.114, "step": 12446 }, { "epoch": 0.4511253669674894, "grad_norm": 2.109359003024513, "learning_rate": 6.02709706956905e-06, "loss": 0.7367, "step": 12447 }, { "epoch": 0.45116161067014604, "grad_norm": 2.3111889382276134, "learning_rate": 6.026522650964114e-06, "loss": 0.7855, "step": 12448 }, { "epoch": 0.45119785437280274, "grad_norm": 2.4702288448426524, "learning_rate": 6.025948218214315e-06, "loss": 0.9475, "step": 12449 }, { "epoch": 0.4512340980754594, "grad_norm": 2.3230783687327285, "learning_rate": 6.025373771327572e-06, "loss": 0.9594, "step": 12450 }, { "epoch": 0.45127034177811604, "grad_norm": 2.5205972098784057, "learning_rate": 6.024799310311798e-06, "loss": 0.9759, "step": 12451 }, { "epoch": 0.45130658548077274, "grad_norm": 2.410944001970628, "learning_rate": 6.024224835174909e-06, "loss": 0.8093, "step": 12452 }, { "epoch": 0.4513428291834294, "grad_norm": 2.3399772139142967, "learning_rate": 6.02365034592482e-06, "loss": 0.828, "step": 12453 }, { "epoch": 0.45137907288608603, "grad_norm": 2.4866239114377957, "learning_rate": 6.02307584256945e-06, "loss": 0.9956, "step": 12454 }, { "epoch": 0.45141531658874273, "grad_norm": 2.1965105296252316, "learning_rate": 6.022501325116712e-06, "loss": 0.9517, "step": 12455 }, { "epoch": 0.4514515602913994, "grad_norm": 2.5659167040197195, "learning_rate": 6.021926793574526e-06, "loss": 1.0607, "step": 12456 }, { "epoch": 0.451487803994056, "grad_norm": 2.278336936802611, "learning_rate": 6.021352247950804e-06, "loss": 0.8683, "step": 12457 }, { "epoch": 0.45152404769671267, "grad_norm": 2.4410430418174114, "learning_rate": 6.02077768825347e-06, "loss": 0.945, "step": 12458 }, { "epoch": 0.4515602913993694, "grad_norm": 2.4648927676343586, "learning_rate": 6.0202031144904325e-06, "loss": 1.0779, "step": 12459 }, { "epoch": 0.451596535102026, "grad_norm": 2.361182420221938, "learning_rate": 6.019628526669616e-06, "loss": 1.0004, "step": 12460 }, { "epoch": 0.45163277880468267, "grad_norm": 2.7228609729279305, "learning_rate": 6.019053924798934e-06, "loss": 0.9865, "step": 12461 }, { "epoch": 0.45166902250733937, "grad_norm": 2.446524281084599, "learning_rate": 6.018479308886305e-06, "loss": 0.8672, "step": 12462 }, { "epoch": 0.451705266209996, "grad_norm": 2.467058273641017, "learning_rate": 6.017904678939648e-06, "loss": 0.8984, "step": 12463 }, { "epoch": 0.45174150991265266, "grad_norm": 2.386523758439346, "learning_rate": 6.01733003496688e-06, "loss": 0.9989, "step": 12464 }, { "epoch": 0.45177775361530936, "grad_norm": 2.1503641328732646, "learning_rate": 6.016755376975918e-06, "loss": 0.94, "step": 12465 }, { "epoch": 0.451813997317966, "grad_norm": 2.1101287757049336, "learning_rate": 6.016180704974682e-06, "loss": 0.8094, "step": 12466 }, { "epoch": 0.45185024102062266, "grad_norm": 2.624382115691781, "learning_rate": 6.015606018971092e-06, "loss": 0.8272, "step": 12467 }, { "epoch": 0.4518864847232793, "grad_norm": 1.908039760836438, "learning_rate": 6.015031318973063e-06, "loss": 0.7399, "step": 12468 }, { "epoch": 0.451922728425936, "grad_norm": 2.196902467905279, "learning_rate": 6.014456604988519e-06, "loss": 0.9974, "step": 12469 }, { "epoch": 0.45195897212859265, "grad_norm": 1.9257700494248036, "learning_rate": 6.013881877025376e-06, "loss": 0.858, "step": 12470 }, { "epoch": 0.4519952158312493, "grad_norm": 2.1760719556192094, "learning_rate": 6.013307135091553e-06, "loss": 0.9632, "step": 12471 }, { "epoch": 0.452031459533906, "grad_norm": 2.1004482839306973, "learning_rate": 6.01273237919497e-06, "loss": 0.7152, "step": 12472 }, { "epoch": 0.45206770323656265, "grad_norm": 2.380110624337888, "learning_rate": 6.012157609343549e-06, "loss": 0.8538, "step": 12473 }, { "epoch": 0.4521039469392193, "grad_norm": 2.098284325087803, "learning_rate": 6.0115828255452055e-06, "loss": 0.9177, "step": 12474 }, { "epoch": 0.452140190641876, "grad_norm": 2.11261791915281, "learning_rate": 6.0110080278078654e-06, "loss": 0.9323, "step": 12475 }, { "epoch": 0.45217643434453264, "grad_norm": 2.603474976036415, "learning_rate": 6.010433216139444e-06, "loss": 0.995, "step": 12476 }, { "epoch": 0.4522126780471893, "grad_norm": 2.6848119413658473, "learning_rate": 6.009858390547866e-06, "loss": 0.9408, "step": 12477 }, { "epoch": 0.452248921749846, "grad_norm": 2.808949315366922, "learning_rate": 6.0092835510410484e-06, "loss": 0.9955, "step": 12478 }, { "epoch": 0.45228516545250264, "grad_norm": 2.0591502463889437, "learning_rate": 6.008708697626915e-06, "loss": 0.9138, "step": 12479 }, { "epoch": 0.4523214091551593, "grad_norm": 2.1130310641198, "learning_rate": 6.0081338303133855e-06, "loss": 0.7939, "step": 12480 }, { "epoch": 0.45235765285781593, "grad_norm": 2.2408508116700054, "learning_rate": 6.007558949108382e-06, "loss": 0.9697, "step": 12481 }, { "epoch": 0.45239389656047263, "grad_norm": 2.497769532880473, "learning_rate": 6.006984054019824e-06, "loss": 0.8251, "step": 12482 }, { "epoch": 0.4524301402631293, "grad_norm": 2.4033895373780285, "learning_rate": 6.006409145055636e-06, "loss": 0.9418, "step": 12483 }, { "epoch": 0.4524663839657859, "grad_norm": 2.493596248686621, "learning_rate": 6.005834222223738e-06, "loss": 1.0144, "step": 12484 }, { "epoch": 0.4525026276684426, "grad_norm": 2.432675349963678, "learning_rate": 6.005259285532052e-06, "loss": 0.9337, "step": 12485 }, { "epoch": 0.45253887137109927, "grad_norm": 2.3766200618778996, "learning_rate": 6.004684334988502e-06, "loss": 0.994, "step": 12486 }, { "epoch": 0.4525751150737559, "grad_norm": 1.7393272287346095, "learning_rate": 6.00410937060101e-06, "loss": 0.6956, "step": 12487 }, { "epoch": 0.4526113587764126, "grad_norm": 2.315803874081975, "learning_rate": 6.003534392377497e-06, "loss": 0.9232, "step": 12488 }, { "epoch": 0.45264760247906927, "grad_norm": 2.653725769792768, "learning_rate": 6.0029594003258875e-06, "loss": 0.6784, "step": 12489 }, { "epoch": 0.4526838461817259, "grad_norm": 2.006622926643018, "learning_rate": 6.002384394454105e-06, "loss": 0.9699, "step": 12490 }, { "epoch": 0.4527200898843826, "grad_norm": 2.5623578568143706, "learning_rate": 6.001809374770071e-06, "loss": 0.8235, "step": 12491 }, { "epoch": 0.45275633358703926, "grad_norm": 2.175581024601274, "learning_rate": 6.001234341281709e-06, "loss": 0.7793, "step": 12492 }, { "epoch": 0.4527925772896959, "grad_norm": 2.355079505437231, "learning_rate": 6.000659293996945e-06, "loss": 0.8675, "step": 12493 }, { "epoch": 0.45282882099235255, "grad_norm": 2.2399316865891525, "learning_rate": 6.000084232923699e-06, "loss": 0.8159, "step": 12494 }, { "epoch": 0.45286506469500926, "grad_norm": 2.505575450948645, "learning_rate": 5.999509158069898e-06, "loss": 0.9003, "step": 12495 }, { "epoch": 0.4529013083976659, "grad_norm": 2.275450688670512, "learning_rate": 5.998934069443465e-06, "loss": 0.9771, "step": 12496 }, { "epoch": 0.45293755210032255, "grad_norm": 2.426051142947186, "learning_rate": 5.9983589670523246e-06, "loss": 0.8933, "step": 12497 }, { "epoch": 0.45297379580297925, "grad_norm": 2.3852393018563114, "learning_rate": 5.997783850904402e-06, "loss": 1.0567, "step": 12498 }, { "epoch": 0.4530100395056359, "grad_norm": 2.216343571718817, "learning_rate": 5.997208721007621e-06, "loss": 0.7988, "step": 12499 }, { "epoch": 0.45304628320829254, "grad_norm": 2.2652904829099834, "learning_rate": 5.996633577369908e-06, "loss": 0.7118, "step": 12500 }, { "epoch": 0.45308252691094925, "grad_norm": 2.596457174867509, "learning_rate": 5.9960584199991864e-06, "loss": 0.7526, "step": 12501 }, { "epoch": 0.4531187706136059, "grad_norm": 2.526550127746923, "learning_rate": 5.995483248903381e-06, "loss": 0.8845, "step": 12502 }, { "epoch": 0.45315501431626254, "grad_norm": 2.3843419072814642, "learning_rate": 5.99490806409042e-06, "loss": 0.8974, "step": 12503 }, { "epoch": 0.4531912580189192, "grad_norm": 2.4778787152263693, "learning_rate": 5.994332865568227e-06, "loss": 0.8039, "step": 12504 }, { "epoch": 0.4532275017215759, "grad_norm": 2.3101753184263862, "learning_rate": 5.9937576533447286e-06, "loss": 0.9597, "step": 12505 }, { "epoch": 0.45326374542423253, "grad_norm": 2.2560486421353136, "learning_rate": 5.993182427427849e-06, "loss": 0.8717, "step": 12506 }, { "epoch": 0.4532999891268892, "grad_norm": 2.4547340706887275, "learning_rate": 5.992607187825519e-06, "loss": 1.163, "step": 12507 }, { "epoch": 0.4533362328295459, "grad_norm": 2.3429184531240113, "learning_rate": 5.9920319345456616e-06, "loss": 0.8598, "step": 12508 }, { "epoch": 0.45337247653220253, "grad_norm": 2.5745367867930775, "learning_rate": 5.9914566675962025e-06, "loss": 0.9294, "step": 12509 }, { "epoch": 0.4534087202348592, "grad_norm": 2.286943625956494, "learning_rate": 5.990881386985071e-06, "loss": 0.8165, "step": 12510 }, { "epoch": 0.4534449639375159, "grad_norm": 2.183276441934429, "learning_rate": 5.990306092720195e-06, "loss": 0.701, "step": 12511 }, { "epoch": 0.4534812076401725, "grad_norm": 2.393768792881275, "learning_rate": 5.989730784809498e-06, "loss": 0.8384, "step": 12512 }, { "epoch": 0.45351745134282917, "grad_norm": 2.144484297460579, "learning_rate": 5.989155463260909e-06, "loss": 0.9753, "step": 12513 }, { "epoch": 0.45355369504548587, "grad_norm": 2.4825906178847905, "learning_rate": 5.988580128082357e-06, "loss": 0.9016, "step": 12514 }, { "epoch": 0.4535899387481425, "grad_norm": 2.3104516831680564, "learning_rate": 5.988004779281769e-06, "loss": 0.9819, "step": 12515 }, { "epoch": 0.45362618245079916, "grad_norm": 2.3907672891691987, "learning_rate": 5.987429416867071e-06, "loss": 1.1077, "step": 12516 }, { "epoch": 0.4536624261534558, "grad_norm": 2.1679535181098997, "learning_rate": 5.986854040846194e-06, "loss": 0.9779, "step": 12517 }, { "epoch": 0.4536986698561125, "grad_norm": 2.3043455628812923, "learning_rate": 5.986278651227065e-06, "loss": 0.8694, "step": 12518 }, { "epoch": 0.45373491355876916, "grad_norm": 2.632782890269471, "learning_rate": 5.985703248017613e-06, "loss": 1.1704, "step": 12519 }, { "epoch": 0.4537711572614258, "grad_norm": 2.2632855310499855, "learning_rate": 5.985127831225767e-06, "loss": 0.991, "step": 12520 }, { "epoch": 0.4538074009640825, "grad_norm": 2.4545517604451286, "learning_rate": 5.984552400859453e-06, "loss": 1.0276, "step": 12521 }, { "epoch": 0.45384364466673915, "grad_norm": 2.286824422770481, "learning_rate": 5.983976956926604e-06, "loss": 0.8833, "step": 12522 }, { "epoch": 0.4538798883693958, "grad_norm": 2.5050935167724404, "learning_rate": 5.983401499435148e-06, "loss": 0.9868, "step": 12523 }, { "epoch": 0.4539161320720525, "grad_norm": 2.3625576785144107, "learning_rate": 5.982826028393013e-06, "loss": 0.8964, "step": 12524 }, { "epoch": 0.45395237577470915, "grad_norm": 2.1729757779079315, "learning_rate": 5.9822505438081306e-06, "loss": 0.8736, "step": 12525 }, { "epoch": 0.4539886194773658, "grad_norm": 2.6506272770873442, "learning_rate": 5.981675045688428e-06, "loss": 0.9781, "step": 12526 }, { "epoch": 0.4540248631800225, "grad_norm": 2.3334046078441317, "learning_rate": 5.981099534041839e-06, "loss": 0.905, "step": 12527 }, { "epoch": 0.45406110688267914, "grad_norm": 2.218634140289595, "learning_rate": 5.980524008876292e-06, "loss": 0.7866, "step": 12528 }, { "epoch": 0.4540973505853358, "grad_norm": 2.48153556928066, "learning_rate": 5.979948470199716e-06, "loss": 1.0364, "step": 12529 }, { "epoch": 0.45413359428799244, "grad_norm": 2.617910385737885, "learning_rate": 5.979372918020042e-06, "loss": 0.9229, "step": 12530 }, { "epoch": 0.45416983799064914, "grad_norm": 2.2355689651539303, "learning_rate": 5.978797352345204e-06, "loss": 0.9486, "step": 12531 }, { "epoch": 0.4542060816933058, "grad_norm": 2.499529166897253, "learning_rate": 5.978221773183127e-06, "loss": 1.0069, "step": 12532 }, { "epoch": 0.45424232539596243, "grad_norm": 2.4180000886624597, "learning_rate": 5.977646180541748e-06, "loss": 0.9338, "step": 12533 }, { "epoch": 0.45427856909861913, "grad_norm": 2.394262792450578, "learning_rate": 5.977070574428994e-06, "loss": 0.9899, "step": 12534 }, { "epoch": 0.4543148128012758, "grad_norm": 2.111700132633614, "learning_rate": 5.976494954852801e-06, "loss": 0.7182, "step": 12535 }, { "epoch": 0.4543510565039324, "grad_norm": 2.275128494854504, "learning_rate": 5.975919321821097e-06, "loss": 0.8674, "step": 12536 }, { "epoch": 0.45438730020658913, "grad_norm": 2.2145950930844207, "learning_rate": 5.9753436753418146e-06, "loss": 1.0697, "step": 12537 }, { "epoch": 0.4544235439092458, "grad_norm": 2.4262961788976782, "learning_rate": 5.974768015422887e-06, "loss": 0.8984, "step": 12538 }, { "epoch": 0.4544597876119024, "grad_norm": 2.5255216841867862, "learning_rate": 5.974192342072245e-06, "loss": 0.8577, "step": 12539 }, { "epoch": 0.45449603131455907, "grad_norm": 2.004853230471578, "learning_rate": 5.973616655297822e-06, "loss": 0.927, "step": 12540 }, { "epoch": 0.45453227501721577, "grad_norm": 2.3513164460970875, "learning_rate": 5.973040955107549e-06, "loss": 0.867, "step": 12541 }, { "epoch": 0.4545685187198724, "grad_norm": 2.5163695259528995, "learning_rate": 5.972465241509362e-06, "loss": 0.8411, "step": 12542 }, { "epoch": 0.45460476242252906, "grad_norm": 2.5472209284112517, "learning_rate": 5.97188951451119e-06, "loss": 0.9066, "step": 12543 }, { "epoch": 0.45464100612518576, "grad_norm": 2.4054221473989728, "learning_rate": 5.97131377412097e-06, "loss": 0.9278, "step": 12544 }, { "epoch": 0.4546772498278424, "grad_norm": 2.3693458306980877, "learning_rate": 5.970738020346633e-06, "loss": 0.8053, "step": 12545 }, { "epoch": 0.45471349353049906, "grad_norm": 2.198941436429955, "learning_rate": 5.970162253196114e-06, "loss": 0.9979, "step": 12546 }, { "epoch": 0.45474973723315576, "grad_norm": 2.2749556661248764, "learning_rate": 5.969586472677345e-06, "loss": 0.9112, "step": 12547 }, { "epoch": 0.4547859809358124, "grad_norm": 2.454523047353171, "learning_rate": 5.9690106787982616e-06, "loss": 0.9729, "step": 12548 }, { "epoch": 0.45482222463846905, "grad_norm": 2.07810028135795, "learning_rate": 5.968434871566796e-06, "loss": 0.7795, "step": 12549 }, { "epoch": 0.45485846834112575, "grad_norm": 2.3196182167657464, "learning_rate": 5.967859050990884e-06, "loss": 1.013, "step": 12550 }, { "epoch": 0.4548947120437824, "grad_norm": 1.9708699409728254, "learning_rate": 5.967283217078459e-06, "loss": 0.7617, "step": 12551 }, { "epoch": 0.45493095574643905, "grad_norm": 2.4434359026954087, "learning_rate": 5.966707369837458e-06, "loss": 1.1131, "step": 12552 }, { "epoch": 0.4549671994490957, "grad_norm": 2.556749934881468, "learning_rate": 5.966131509275812e-06, "loss": 1.0619, "step": 12553 }, { "epoch": 0.4550034431517524, "grad_norm": 2.3393628317501802, "learning_rate": 5.96555563540146e-06, "loss": 0.9507, "step": 12554 }, { "epoch": 0.45503968685440904, "grad_norm": 2.2351854571271117, "learning_rate": 5.964979748222334e-06, "loss": 0.8103, "step": 12555 }, { "epoch": 0.4550759305570657, "grad_norm": 2.0934365232039496, "learning_rate": 5.964403847746372e-06, "loss": 0.8168, "step": 12556 }, { "epoch": 0.4551121742597224, "grad_norm": 2.439373113933823, "learning_rate": 5.963827933981508e-06, "loss": 0.893, "step": 12557 }, { "epoch": 0.45514841796237904, "grad_norm": 2.29439943744405, "learning_rate": 5.9632520069356765e-06, "loss": 0.6733, "step": 12558 }, { "epoch": 0.4551846616650357, "grad_norm": 2.2127260245690716, "learning_rate": 5.962676066616815e-06, "loss": 0.8735, "step": 12559 }, { "epoch": 0.4552209053676924, "grad_norm": 2.102793583977433, "learning_rate": 5.962100113032861e-06, "loss": 1.0144, "step": 12560 }, { "epoch": 0.45525714907034903, "grad_norm": 2.4543008836922184, "learning_rate": 5.9615241461917485e-06, "loss": 0.8885, "step": 12561 }, { "epoch": 0.4552933927730057, "grad_norm": 2.9290583381493036, "learning_rate": 5.960948166101415e-06, "loss": 0.9303, "step": 12562 }, { "epoch": 0.4553296364756624, "grad_norm": 2.0615702356749703, "learning_rate": 5.960372172769796e-06, "loss": 0.8731, "step": 12563 }, { "epoch": 0.455365880178319, "grad_norm": 2.26856079274969, "learning_rate": 5.9597961662048295e-06, "loss": 0.9121, "step": 12564 }, { "epoch": 0.4554021238809757, "grad_norm": 2.6381470192866217, "learning_rate": 5.959220146414453e-06, "loss": 0.9477, "step": 12565 }, { "epoch": 0.4554383675836323, "grad_norm": 2.203186688667628, "learning_rate": 5.958644113406603e-06, "loss": 0.7753, "step": 12566 }, { "epoch": 0.455474611286289, "grad_norm": 3.106225300569449, "learning_rate": 5.958068067189218e-06, "loss": 0.9215, "step": 12567 }, { "epoch": 0.45551085498894567, "grad_norm": 2.223331874114199, "learning_rate": 5.957492007770232e-06, "loss": 0.7789, "step": 12568 }, { "epoch": 0.4555470986916023, "grad_norm": 2.426869194651255, "learning_rate": 5.956915935157587e-06, "loss": 1.0172, "step": 12569 }, { "epoch": 0.455583342394259, "grad_norm": 2.4999706012893945, "learning_rate": 5.956339849359218e-06, "loss": 1.031, "step": 12570 }, { "epoch": 0.45561958609691566, "grad_norm": 2.437499876315305, "learning_rate": 5.9557637503830645e-06, "loss": 0.8119, "step": 12571 }, { "epoch": 0.4556558297995723, "grad_norm": 2.5420097861120436, "learning_rate": 5.9551876382370655e-06, "loss": 0.9801, "step": 12572 }, { "epoch": 0.455692073502229, "grad_norm": 2.396970781512218, "learning_rate": 5.954611512929157e-06, "loss": 1.0038, "step": 12573 }, { "epoch": 0.45572831720488566, "grad_norm": 2.6371690521199938, "learning_rate": 5.954035374467281e-06, "loss": 1.056, "step": 12574 }, { "epoch": 0.4557645609075423, "grad_norm": 2.423978232124655, "learning_rate": 5.953459222859372e-06, "loss": 0.9981, "step": 12575 }, { "epoch": 0.45580080461019895, "grad_norm": 2.3891861518422415, "learning_rate": 5.952883058113373e-06, "loss": 1.0303, "step": 12576 }, { "epoch": 0.45583704831285565, "grad_norm": 2.964001605263857, "learning_rate": 5.952306880237223e-06, "loss": 1.0691, "step": 12577 }, { "epoch": 0.4558732920155123, "grad_norm": 2.346777111569908, "learning_rate": 5.951730689238859e-06, "loss": 1.0249, "step": 12578 }, { "epoch": 0.45590953571816895, "grad_norm": 2.47770185722643, "learning_rate": 5.951154485126221e-06, "loss": 0.9494, "step": 12579 }, { "epoch": 0.45594577942082565, "grad_norm": 2.1342731633214953, "learning_rate": 5.950578267907251e-06, "loss": 0.8109, "step": 12580 }, { "epoch": 0.4559820231234823, "grad_norm": 2.624618358450552, "learning_rate": 5.950002037589886e-06, "loss": 0.9559, "step": 12581 }, { "epoch": 0.45601826682613894, "grad_norm": 2.355241970251997, "learning_rate": 5.9494257941820675e-06, "loss": 0.9817, "step": 12582 }, { "epoch": 0.45605451052879564, "grad_norm": 2.148101260896665, "learning_rate": 5.9488495376917355e-06, "loss": 1.0828, "step": 12583 }, { "epoch": 0.4560907542314523, "grad_norm": 2.1232997133440175, "learning_rate": 5.948273268126832e-06, "loss": 0.8289, "step": 12584 }, { "epoch": 0.45612699793410894, "grad_norm": 2.2013485321559, "learning_rate": 5.947696985495295e-06, "loss": 1.0107, "step": 12585 }, { "epoch": 0.45616324163676564, "grad_norm": 2.288048666403085, "learning_rate": 5.9471206898050686e-06, "loss": 0.8323, "step": 12586 }, { "epoch": 0.4561994853394223, "grad_norm": 2.6731555333804704, "learning_rate": 5.946544381064091e-06, "loss": 0.8696, "step": 12587 }, { "epoch": 0.45623572904207893, "grad_norm": 2.416902039161148, "learning_rate": 5.945968059280304e-06, "loss": 0.7478, "step": 12588 }, { "epoch": 0.4562719727447356, "grad_norm": 2.271332606214642, "learning_rate": 5.945391724461649e-06, "loss": 0.7682, "step": 12589 }, { "epoch": 0.4563082164473923, "grad_norm": 2.3720878199967217, "learning_rate": 5.9448153766160675e-06, "loss": 0.928, "step": 12590 }, { "epoch": 0.4563444601500489, "grad_norm": 2.024020628046339, "learning_rate": 5.9442390157515016e-06, "loss": 0.6325, "step": 12591 }, { "epoch": 0.45638070385270557, "grad_norm": 2.214672922046665, "learning_rate": 5.943662641875895e-06, "loss": 0.9632, "step": 12592 }, { "epoch": 0.4564169475553623, "grad_norm": 2.1031675645261574, "learning_rate": 5.943086254997186e-06, "loss": 0.793, "step": 12593 }, { "epoch": 0.4564531912580189, "grad_norm": 2.257267424560079, "learning_rate": 5.942509855123319e-06, "loss": 0.7972, "step": 12594 }, { "epoch": 0.45648943496067557, "grad_norm": 2.1678126594949205, "learning_rate": 5.941933442262235e-06, "loss": 0.8843, "step": 12595 }, { "epoch": 0.45652567866333227, "grad_norm": 2.4386132318793403, "learning_rate": 5.94135701642188e-06, "loss": 0.9416, "step": 12596 }, { "epoch": 0.4565619223659889, "grad_norm": 2.321657132852051, "learning_rate": 5.940780577610194e-06, "loss": 0.8077, "step": 12597 }, { "epoch": 0.45659816606864556, "grad_norm": 2.320033477601167, "learning_rate": 5.940204125835122e-06, "loss": 0.9061, "step": 12598 }, { "epoch": 0.45663440977130226, "grad_norm": 2.195289153412736, "learning_rate": 5.939627661104604e-06, "loss": 0.9537, "step": 12599 }, { "epoch": 0.4566706534739589, "grad_norm": 2.221562810574969, "learning_rate": 5.939051183426585e-06, "loss": 0.88, "step": 12600 }, { "epoch": 0.45670689717661556, "grad_norm": 2.2520371246391075, "learning_rate": 5.938474692809009e-06, "loss": 0.9443, "step": 12601 }, { "epoch": 0.4567431408792722, "grad_norm": 2.5209442014875165, "learning_rate": 5.9378981892598185e-06, "loss": 0.8828, "step": 12602 }, { "epoch": 0.4567793845819289, "grad_norm": 2.51204906416199, "learning_rate": 5.9373216727869586e-06, "loss": 0.905, "step": 12603 }, { "epoch": 0.45681562828458555, "grad_norm": 2.3090883355023193, "learning_rate": 5.936745143398373e-06, "loss": 0.8747, "step": 12604 }, { "epoch": 0.4568518719872422, "grad_norm": 2.0008274151437155, "learning_rate": 5.936168601102008e-06, "loss": 0.8418, "step": 12605 }, { "epoch": 0.4568881156898989, "grad_norm": 2.483761960847926, "learning_rate": 5.935592045905804e-06, "loss": 1.1201, "step": 12606 }, { "epoch": 0.45692435939255555, "grad_norm": 2.5172285239611116, "learning_rate": 5.935015477817707e-06, "loss": 0.9704, "step": 12607 }, { "epoch": 0.4569606030952122, "grad_norm": 2.261996136283127, "learning_rate": 5.934438896845662e-06, "loss": 0.8197, "step": 12608 }, { "epoch": 0.4569968467978689, "grad_norm": 2.2737756883431843, "learning_rate": 5.933862302997615e-06, "loss": 0.9468, "step": 12609 }, { "epoch": 0.45703309050052554, "grad_norm": 2.0415993652808546, "learning_rate": 5.93328569628151e-06, "loss": 0.8115, "step": 12610 }, { "epoch": 0.4570693342031822, "grad_norm": 2.278932302308333, "learning_rate": 5.932709076705292e-06, "loss": 1.0828, "step": 12611 }, { "epoch": 0.45710557790583883, "grad_norm": 2.490280867770657, "learning_rate": 5.932132444276908e-06, "loss": 0.9788, "step": 12612 }, { "epoch": 0.45714182160849554, "grad_norm": 2.4573228345494993, "learning_rate": 5.931555799004302e-06, "loss": 0.8729, "step": 12613 }, { "epoch": 0.4571780653111522, "grad_norm": 2.0676110380009702, "learning_rate": 5.930979140895421e-06, "loss": 0.8175, "step": 12614 }, { "epoch": 0.45721430901380883, "grad_norm": 2.692142494339093, "learning_rate": 5.930402469958211e-06, "loss": 0.9528, "step": 12615 }, { "epoch": 0.45725055271646553, "grad_norm": 2.256555474908557, "learning_rate": 5.929825786200617e-06, "loss": 0.8687, "step": 12616 }, { "epoch": 0.4572867964191222, "grad_norm": 2.2519388212587885, "learning_rate": 5.929249089630584e-06, "loss": 0.7015, "step": 12617 }, { "epoch": 0.4573230401217788, "grad_norm": 5.3506254314227695, "learning_rate": 5.928672380256063e-06, "loss": 0.7998, "step": 12618 }, { "epoch": 0.4573592838244355, "grad_norm": 2.2585954283793654, "learning_rate": 5.928095658084998e-06, "loss": 0.9298, "step": 12619 }, { "epoch": 0.45739552752709217, "grad_norm": 2.3119086043589365, "learning_rate": 5.927518923125335e-06, "loss": 1.0818, "step": 12620 }, { "epoch": 0.4574317712297488, "grad_norm": 2.51545326413701, "learning_rate": 5.926942175385023e-06, "loss": 0.6956, "step": 12621 }, { "epoch": 0.4574680149324055, "grad_norm": 2.3057631834322807, "learning_rate": 5.926365414872008e-06, "loss": 0.8269, "step": 12622 }, { "epoch": 0.45750425863506217, "grad_norm": 2.437851304938718, "learning_rate": 5.9257886415942375e-06, "loss": 0.9432, "step": 12623 }, { "epoch": 0.4575405023377188, "grad_norm": 2.06988985030744, "learning_rate": 5.925211855559661e-06, "loss": 0.8784, "step": 12624 }, { "epoch": 0.45757674604037546, "grad_norm": 2.2056504616599746, "learning_rate": 5.9246350567762245e-06, "loss": 0.8009, "step": 12625 }, { "epoch": 0.45761298974303216, "grad_norm": 2.333980629250678, "learning_rate": 5.924058245251874e-06, "loss": 0.9208, "step": 12626 }, { "epoch": 0.4576492334456888, "grad_norm": 2.6511591276894997, "learning_rate": 5.9234814209945624e-06, "loss": 0.9719, "step": 12627 }, { "epoch": 0.45768547714834545, "grad_norm": 2.004747380660327, "learning_rate": 5.922904584012232e-06, "loss": 0.8482, "step": 12628 }, { "epoch": 0.45772172085100216, "grad_norm": 2.154231717067288, "learning_rate": 5.922327734312838e-06, "loss": 0.9825, "step": 12629 }, { "epoch": 0.4577579645536588, "grad_norm": 2.091337986556327, "learning_rate": 5.921750871904324e-06, "loss": 0.7559, "step": 12630 }, { "epoch": 0.45779420825631545, "grad_norm": 2.1299827112716208, "learning_rate": 5.92117399679464e-06, "loss": 0.9348, "step": 12631 }, { "epoch": 0.45783045195897215, "grad_norm": 2.298182656023973, "learning_rate": 5.920597108991736e-06, "loss": 0.8775, "step": 12632 }, { "epoch": 0.4578666956616288, "grad_norm": 2.547782498116326, "learning_rate": 5.920020208503561e-06, "loss": 1.1804, "step": 12633 }, { "epoch": 0.45790293936428544, "grad_norm": 2.3372747091390567, "learning_rate": 5.919443295338064e-06, "loss": 0.7169, "step": 12634 }, { "epoch": 0.45793918306694215, "grad_norm": 2.4492012436245054, "learning_rate": 5.918866369503195e-06, "loss": 0.8032, "step": 12635 }, { "epoch": 0.4579754267695988, "grad_norm": 2.42810854532499, "learning_rate": 5.918289431006902e-06, "loss": 0.8858, "step": 12636 }, { "epoch": 0.45801167047225544, "grad_norm": 2.360379519877261, "learning_rate": 5.9177124798571365e-06, "loss": 0.9438, "step": 12637 }, { "epoch": 0.4580479141749121, "grad_norm": 2.421285382289667, "learning_rate": 5.917135516061847e-06, "loss": 1.0826, "step": 12638 }, { "epoch": 0.4580841578775688, "grad_norm": 2.128542368931728, "learning_rate": 5.916558539628986e-06, "loss": 0.8921, "step": 12639 }, { "epoch": 0.45812040158022543, "grad_norm": 2.7064576797804567, "learning_rate": 5.915981550566501e-06, "loss": 0.8713, "step": 12640 }, { "epoch": 0.4581566452828821, "grad_norm": 2.2453040641071067, "learning_rate": 5.915404548882347e-06, "loss": 0.9524, "step": 12641 }, { "epoch": 0.4581928889855388, "grad_norm": 2.2389866732843964, "learning_rate": 5.914827534584469e-06, "loss": 0.8414, "step": 12642 }, { "epoch": 0.45822913268819543, "grad_norm": 2.0884819433864146, "learning_rate": 5.914250507680822e-06, "loss": 0.7841, "step": 12643 }, { "epoch": 0.4582653763908521, "grad_norm": 2.3644993176848246, "learning_rate": 5.913673468179357e-06, "loss": 0.7338, "step": 12644 }, { "epoch": 0.4583016200935088, "grad_norm": 2.287394759130169, "learning_rate": 5.9130964160880236e-06, "loss": 1.1005, "step": 12645 }, { "epoch": 0.4583378637961654, "grad_norm": 2.505954939537654, "learning_rate": 5.912519351414773e-06, "loss": 0.9233, "step": 12646 }, { "epoch": 0.45837410749882207, "grad_norm": 2.6384505729225367, "learning_rate": 5.911942274167558e-06, "loss": 0.8895, "step": 12647 }, { "epoch": 0.4584103512014787, "grad_norm": 2.401422011726995, "learning_rate": 5.91136518435433e-06, "loss": 1.0858, "step": 12648 }, { "epoch": 0.4584465949041354, "grad_norm": 2.425361503172152, "learning_rate": 5.910788081983041e-06, "loss": 0.9308, "step": 12649 }, { "epoch": 0.45848283860679206, "grad_norm": 2.4281546202147597, "learning_rate": 5.910210967061642e-06, "loss": 0.749, "step": 12650 }, { "epoch": 0.4585190823094487, "grad_norm": 2.352774798438461, "learning_rate": 5.909633839598088e-06, "loss": 1.0876, "step": 12651 }, { "epoch": 0.4585553260121054, "grad_norm": 2.3736894713842887, "learning_rate": 5.909056699600328e-06, "loss": 0.912, "step": 12652 }, { "epoch": 0.45859156971476206, "grad_norm": 2.2669447166321226, "learning_rate": 5.908479547076319e-06, "loss": 0.9443, "step": 12653 }, { "epoch": 0.4586278134174187, "grad_norm": 2.2458098622214973, "learning_rate": 5.907902382034009e-06, "loss": 0.9051, "step": 12654 }, { "epoch": 0.4586640571200754, "grad_norm": 2.224954561670973, "learning_rate": 5.907325204481356e-06, "loss": 0.9843, "step": 12655 }, { "epoch": 0.45870030082273205, "grad_norm": 2.4975677224564907, "learning_rate": 5.906748014426308e-06, "loss": 0.9491, "step": 12656 }, { "epoch": 0.4587365445253887, "grad_norm": 2.3454371797059426, "learning_rate": 5.906170811876821e-06, "loss": 0.9706, "step": 12657 }, { "epoch": 0.4587727882280454, "grad_norm": 2.147546131008331, "learning_rate": 5.905593596840849e-06, "loss": 0.7779, "step": 12658 }, { "epoch": 0.45880903193070205, "grad_norm": 2.252607530607957, "learning_rate": 5.9050163693263455e-06, "loss": 0.829, "step": 12659 }, { "epoch": 0.4588452756333587, "grad_norm": 2.183368528646568, "learning_rate": 5.904439129341262e-06, "loss": 0.9479, "step": 12660 }, { "epoch": 0.45888151933601534, "grad_norm": 2.1773424388982705, "learning_rate": 5.903861876893555e-06, "loss": 0.9898, "step": 12661 }, { "epoch": 0.45891776303867204, "grad_norm": 2.2305910536515072, "learning_rate": 5.903284611991178e-06, "loss": 0.9209, "step": 12662 }, { "epoch": 0.4589540067413287, "grad_norm": 2.402469646146017, "learning_rate": 5.902707334642085e-06, "loss": 0.8762, "step": 12663 }, { "epoch": 0.45899025044398534, "grad_norm": 2.245912678363305, "learning_rate": 5.902130044854232e-06, "loss": 0.9563, "step": 12664 }, { "epoch": 0.45902649414664204, "grad_norm": 2.508809965287382, "learning_rate": 5.901552742635572e-06, "loss": 0.7751, "step": 12665 }, { "epoch": 0.4590627378492987, "grad_norm": 2.4545168475117345, "learning_rate": 5.90097542799406e-06, "loss": 0.9169, "step": 12666 }, { "epoch": 0.45909898155195533, "grad_norm": 2.336602850643546, "learning_rate": 5.900398100937652e-06, "loss": 0.8871, "step": 12667 }, { "epoch": 0.45913522525461203, "grad_norm": 2.2916583794826564, "learning_rate": 5.899820761474302e-06, "loss": 0.7866, "step": 12668 }, { "epoch": 0.4591714689572687, "grad_norm": 2.1770882384723933, "learning_rate": 5.8992434096119665e-06, "loss": 0.9501, "step": 12669 }, { "epoch": 0.4592077126599253, "grad_norm": 2.3838776085801414, "learning_rate": 5.898666045358601e-06, "loss": 0.9689, "step": 12670 }, { "epoch": 0.45924395636258203, "grad_norm": 2.288025793712244, "learning_rate": 5.89808866872216e-06, "loss": 0.8432, "step": 12671 }, { "epoch": 0.4592802000652387, "grad_norm": 2.0417402443149406, "learning_rate": 5.897511279710601e-06, "loss": 0.7302, "step": 12672 }, { "epoch": 0.4593164437678953, "grad_norm": 2.1327479329799583, "learning_rate": 5.89693387833188e-06, "loss": 0.844, "step": 12673 }, { "epoch": 0.45935268747055197, "grad_norm": 2.400462228431697, "learning_rate": 5.896356464593952e-06, "loss": 0.9619, "step": 12674 }, { "epoch": 0.45938893117320867, "grad_norm": 2.3302493986553867, "learning_rate": 5.895779038504774e-06, "loss": 0.8799, "step": 12675 }, { "epoch": 0.4594251748758653, "grad_norm": 2.3707640068447757, "learning_rate": 5.8952016000723025e-06, "loss": 0.9654, "step": 12676 }, { "epoch": 0.45946141857852196, "grad_norm": 2.3170032664617795, "learning_rate": 5.8946241493044945e-06, "loss": 0.944, "step": 12677 }, { "epoch": 0.45949766228117866, "grad_norm": 2.329282648398051, "learning_rate": 5.894046686209306e-06, "loss": 0.8088, "step": 12678 }, { "epoch": 0.4595339059838353, "grad_norm": 2.2471472096301492, "learning_rate": 5.893469210794696e-06, "loss": 0.9276, "step": 12679 }, { "epoch": 0.45957014968649196, "grad_norm": 2.532143479294896, "learning_rate": 5.89289172306862e-06, "loss": 0.9214, "step": 12680 }, { "epoch": 0.45960639338914866, "grad_norm": 2.391281187430242, "learning_rate": 5.892314223039036e-06, "loss": 0.9166, "step": 12681 }, { "epoch": 0.4596426370918053, "grad_norm": 2.396443770398343, "learning_rate": 5.891736710713901e-06, "loss": 0.9874, "step": 12682 }, { "epoch": 0.45967888079446195, "grad_norm": 2.038680847363587, "learning_rate": 5.891159186101175e-06, "loss": 0.8374, "step": 12683 }, { "epoch": 0.4597151244971186, "grad_norm": 2.5418302136198734, "learning_rate": 5.890581649208814e-06, "loss": 0.9317, "step": 12684 }, { "epoch": 0.4597513681997753, "grad_norm": 2.3603296738325974, "learning_rate": 5.890004100044777e-06, "loss": 0.9647, "step": 12685 }, { "epoch": 0.45978761190243195, "grad_norm": 2.2541192751707624, "learning_rate": 5.8894265386170205e-06, "loss": 0.9871, "step": 12686 }, { "epoch": 0.4598238556050886, "grad_norm": 2.1525319917435004, "learning_rate": 5.8888489649335044e-06, "loss": 0.8999, "step": 12687 }, { "epoch": 0.4598600993077453, "grad_norm": 2.037600039690924, "learning_rate": 5.8882713790021875e-06, "loss": 0.844, "step": 12688 }, { "epoch": 0.45989634301040194, "grad_norm": 2.31559667641633, "learning_rate": 5.887693780831028e-06, "loss": 0.8515, "step": 12689 }, { "epoch": 0.4599325867130586, "grad_norm": 2.490784043875931, "learning_rate": 5.8871161704279845e-06, "loss": 0.9858, "step": 12690 }, { "epoch": 0.4599688304157153, "grad_norm": 2.2670173534370535, "learning_rate": 5.8865385478010175e-06, "loss": 0.9675, "step": 12691 }, { "epoch": 0.46000507411837194, "grad_norm": 2.4381252499325132, "learning_rate": 5.885960912958086e-06, "loss": 1.0847, "step": 12692 }, { "epoch": 0.4600413178210286, "grad_norm": 2.394928647497178, "learning_rate": 5.885383265907149e-06, "loss": 0.8312, "step": 12693 }, { "epoch": 0.4600775615236853, "grad_norm": 2.2875934747394133, "learning_rate": 5.884805606656164e-06, "loss": 0.802, "step": 12694 }, { "epoch": 0.46011380522634193, "grad_norm": 2.088861473187985, "learning_rate": 5.8842279352130946e-06, "loss": 0.69, "step": 12695 }, { "epoch": 0.4601500489289986, "grad_norm": 2.3699150430282936, "learning_rate": 5.883650251585899e-06, "loss": 0.9592, "step": 12696 }, { "epoch": 0.4601862926316552, "grad_norm": 2.064637723085158, "learning_rate": 5.883072555782537e-06, "loss": 0.7424, "step": 12697 }, { "epoch": 0.4602225363343119, "grad_norm": 2.471413965731392, "learning_rate": 5.882494847810968e-06, "loss": 0.9285, "step": 12698 }, { "epoch": 0.4602587800369686, "grad_norm": 2.3362922026557094, "learning_rate": 5.881917127679156e-06, "loss": 0.8771, "step": 12699 }, { "epoch": 0.4602950237396252, "grad_norm": 2.214380657893014, "learning_rate": 5.881339395395056e-06, "loss": 0.9017, "step": 12700 }, { "epoch": 0.4603312674422819, "grad_norm": 2.247209352164152, "learning_rate": 5.880761650966636e-06, "loss": 0.9139, "step": 12701 }, { "epoch": 0.46036751114493857, "grad_norm": 2.3840758737901946, "learning_rate": 5.8801838944018505e-06, "loss": 0.9405, "step": 12702 }, { "epoch": 0.4604037548475952, "grad_norm": 2.245040070738221, "learning_rate": 5.8796061257086655e-06, "loss": 0.8295, "step": 12703 }, { "epoch": 0.4604399985502519, "grad_norm": 2.4599767162600563, "learning_rate": 5.879028344895038e-06, "loss": 0.9544, "step": 12704 }, { "epoch": 0.46047624225290856, "grad_norm": 2.4192827992722497, "learning_rate": 5.878450551968932e-06, "loss": 1.0679, "step": 12705 }, { "epoch": 0.4605124859555652, "grad_norm": 3.2776696638629703, "learning_rate": 5.877872746938309e-06, "loss": 0.9004, "step": 12706 }, { "epoch": 0.4605487296582219, "grad_norm": 2.3880492247781633, "learning_rate": 5.877294929811129e-06, "loss": 1.0301, "step": 12707 }, { "epoch": 0.46058497336087856, "grad_norm": 2.4737637993998693, "learning_rate": 5.876717100595358e-06, "loss": 1.1398, "step": 12708 }, { "epoch": 0.4606212170635352, "grad_norm": 2.376545916410227, "learning_rate": 5.876139259298954e-06, "loss": 0.9134, "step": 12709 }, { "epoch": 0.46065746076619185, "grad_norm": 2.152515435029567, "learning_rate": 5.875561405929882e-06, "loss": 0.9956, "step": 12710 }, { "epoch": 0.46069370446884855, "grad_norm": 2.4108336696550365, "learning_rate": 5.874983540496102e-06, "loss": 0.8347, "step": 12711 }, { "epoch": 0.4607299481715052, "grad_norm": 2.243953394630424, "learning_rate": 5.87440566300558e-06, "loss": 1.0066, "step": 12712 }, { "epoch": 0.46076619187416185, "grad_norm": 2.4346357184025296, "learning_rate": 5.873827773466274e-06, "loss": 0.9451, "step": 12713 }, { "epoch": 0.46080243557681855, "grad_norm": 2.431250739994441, "learning_rate": 5.873249871886152e-06, "loss": 0.9315, "step": 12714 }, { "epoch": 0.4608386792794752, "grad_norm": 2.45481782768371, "learning_rate": 5.872671958273174e-06, "loss": 0.9888, "step": 12715 }, { "epoch": 0.46087492298213184, "grad_norm": 2.4384253592665437, "learning_rate": 5.872094032635304e-06, "loss": 0.9064, "step": 12716 }, { "epoch": 0.46091116668478854, "grad_norm": 2.2663917826291535, "learning_rate": 5.871516094980505e-06, "loss": 0.8996, "step": 12717 }, { "epoch": 0.4609474103874452, "grad_norm": 2.161375917931183, "learning_rate": 5.8709381453167435e-06, "loss": 1.0461, "step": 12718 }, { "epoch": 0.46098365409010184, "grad_norm": 2.4046572831170923, "learning_rate": 5.870360183651979e-06, "loss": 1.0034, "step": 12719 }, { "epoch": 0.4610198977927585, "grad_norm": 2.3530265727911273, "learning_rate": 5.869782209994178e-06, "loss": 0.8254, "step": 12720 }, { "epoch": 0.4610561414954152, "grad_norm": 2.256666279110441, "learning_rate": 5.869204224351306e-06, "loss": 0.8246, "step": 12721 }, { "epoch": 0.46109238519807183, "grad_norm": 2.2401467532301003, "learning_rate": 5.868626226731324e-06, "loss": 0.9212, "step": 12722 }, { "epoch": 0.4611286289007285, "grad_norm": 2.4247672457247127, "learning_rate": 5.868048217142197e-06, "loss": 0.8059, "step": 12723 }, { "epoch": 0.4611648726033852, "grad_norm": 2.382042026766482, "learning_rate": 5.867470195591892e-06, "loss": 0.9041, "step": 12724 }, { "epoch": 0.4612011163060418, "grad_norm": 2.2101717677575503, "learning_rate": 5.866892162088371e-06, "loss": 0.858, "step": 12725 }, { "epoch": 0.46123736000869847, "grad_norm": 2.633169614440798, "learning_rate": 5.866314116639602e-06, "loss": 0.7742, "step": 12726 }, { "epoch": 0.4612736037113552, "grad_norm": 2.294941249195644, "learning_rate": 5.865736059253546e-06, "loss": 0.9214, "step": 12727 }, { "epoch": 0.4613098474140118, "grad_norm": 2.298412282925859, "learning_rate": 5.865157989938173e-06, "loss": 0.9058, "step": 12728 }, { "epoch": 0.46134609111666847, "grad_norm": 2.043186237258557, "learning_rate": 5.864579908701444e-06, "loss": 0.9099, "step": 12729 }, { "epoch": 0.46138233481932517, "grad_norm": 2.2766468077688278, "learning_rate": 5.864001815551329e-06, "loss": 0.6521, "step": 12730 }, { "epoch": 0.4614185785219818, "grad_norm": 2.498487629715798, "learning_rate": 5.8634237104957916e-06, "loss": 1.0379, "step": 12731 }, { "epoch": 0.46145482222463846, "grad_norm": 2.38390762718996, "learning_rate": 5.862845593542796e-06, "loss": 0.8649, "step": 12732 }, { "epoch": 0.4614910659272951, "grad_norm": 2.2919071236193407, "learning_rate": 5.862267464700311e-06, "loss": 0.8767, "step": 12733 }, { "epoch": 0.4615273096299518, "grad_norm": 2.3406712157193548, "learning_rate": 5.861689323976301e-06, "loss": 0.935, "step": 12734 }, { "epoch": 0.46156355333260846, "grad_norm": 2.6411047989156806, "learning_rate": 5.861111171378733e-06, "loss": 0.9399, "step": 12735 }, { "epoch": 0.4615997970352651, "grad_norm": 2.5643720429696257, "learning_rate": 5.860533006915575e-06, "loss": 1.1059, "step": 12736 }, { "epoch": 0.4616360407379218, "grad_norm": 2.464536412346546, "learning_rate": 5.859954830594792e-06, "loss": 0.9936, "step": 12737 }, { "epoch": 0.46167228444057845, "grad_norm": 2.318578312007752, "learning_rate": 5.859376642424352e-06, "loss": 0.8563, "step": 12738 }, { "epoch": 0.4617085281432351, "grad_norm": 2.561242296358319, "learning_rate": 5.85879844241222e-06, "loss": 0.8703, "step": 12739 }, { "epoch": 0.4617447718458918, "grad_norm": 2.1215620439606058, "learning_rate": 5.858220230566366e-06, "loss": 0.8078, "step": 12740 }, { "epoch": 0.46178101554854845, "grad_norm": 2.633830834078374, "learning_rate": 5.857642006894757e-06, "loss": 0.9025, "step": 12741 }, { "epoch": 0.4618172592512051, "grad_norm": 2.226306844469382, "learning_rate": 5.8570637714053605e-06, "loss": 0.8174, "step": 12742 }, { "epoch": 0.4618535029538618, "grad_norm": 2.3618468231901155, "learning_rate": 5.85648552410614e-06, "loss": 0.9985, "step": 12743 }, { "epoch": 0.46188974665651844, "grad_norm": 2.1489645256239545, "learning_rate": 5.85590726500507e-06, "loss": 0.9007, "step": 12744 }, { "epoch": 0.4619259903591751, "grad_norm": 2.324891416172715, "learning_rate": 5.855328994110115e-06, "loss": 0.9417, "step": 12745 }, { "epoch": 0.46196223406183173, "grad_norm": 2.399458368850474, "learning_rate": 5.854750711429243e-06, "loss": 0.9516, "step": 12746 }, { "epoch": 0.46199847776448844, "grad_norm": 2.429659648927801, "learning_rate": 5.8541724169704225e-06, "loss": 0.9424, "step": 12747 }, { "epoch": 0.4620347214671451, "grad_norm": 2.142104021723109, "learning_rate": 5.853594110741625e-06, "loss": 0.8201, "step": 12748 }, { "epoch": 0.46207096516980173, "grad_norm": 2.2699286583920886, "learning_rate": 5.8530157927508145e-06, "loss": 1.0205, "step": 12749 }, { "epoch": 0.46210720887245843, "grad_norm": 2.4975089875880525, "learning_rate": 5.852437463005964e-06, "loss": 1.1557, "step": 12750 }, { "epoch": 0.4621434525751151, "grad_norm": 2.1472027010890704, "learning_rate": 5.85185912151504e-06, "loss": 0.8906, "step": 12751 }, { "epoch": 0.4621796962777717, "grad_norm": 2.383163553052578, "learning_rate": 5.851280768286014e-06, "loss": 0.8714, "step": 12752 }, { "epoch": 0.4622159399804284, "grad_norm": 2.309092144784502, "learning_rate": 5.8507024033268515e-06, "loss": 1.0176, "step": 12753 }, { "epoch": 0.46225218368308507, "grad_norm": 2.6163423915572626, "learning_rate": 5.850124026645526e-06, "loss": 0.801, "step": 12754 }, { "epoch": 0.4622884273857417, "grad_norm": 2.5485016252776593, "learning_rate": 5.849545638250004e-06, "loss": 1.0547, "step": 12755 }, { "epoch": 0.46232467108839836, "grad_norm": 2.4411643333363946, "learning_rate": 5.848967238148259e-06, "loss": 0.9725, "step": 12756 }, { "epoch": 0.46236091479105507, "grad_norm": 2.413404080735577, "learning_rate": 5.848388826348257e-06, "loss": 1.0251, "step": 12757 }, { "epoch": 0.4623971584937117, "grad_norm": 2.242626664528722, "learning_rate": 5.847810402857971e-06, "loss": 0.7656, "step": 12758 }, { "epoch": 0.46243340219636836, "grad_norm": 1.8493493662991731, "learning_rate": 5.847231967685369e-06, "loss": 0.8184, "step": 12759 }, { "epoch": 0.46246964589902506, "grad_norm": 2.495456682344098, "learning_rate": 5.846653520838425e-06, "loss": 1.0849, "step": 12760 }, { "epoch": 0.4625058896016817, "grad_norm": 2.4342850378855414, "learning_rate": 5.8460750623251075e-06, "loss": 0.8826, "step": 12761 }, { "epoch": 0.46254213330433835, "grad_norm": 2.508752788519853, "learning_rate": 5.8454965921533855e-06, "loss": 0.9491, "step": 12762 }, { "epoch": 0.46257837700699506, "grad_norm": 2.58009414409589, "learning_rate": 5.844918110331233e-06, "loss": 0.9996, "step": 12763 }, { "epoch": 0.4626146207096517, "grad_norm": 2.5727535616746775, "learning_rate": 5.84433961686662e-06, "loss": 1.0032, "step": 12764 }, { "epoch": 0.46265086441230835, "grad_norm": 2.233682127247167, "learning_rate": 5.843761111767516e-06, "loss": 0.8738, "step": 12765 }, { "epoch": 0.46268710811496505, "grad_norm": 2.2170875004004835, "learning_rate": 5.843182595041897e-06, "loss": 0.9288, "step": 12766 }, { "epoch": 0.4627233518176217, "grad_norm": 1.9153000394590713, "learning_rate": 5.842604066697729e-06, "loss": 0.7034, "step": 12767 }, { "epoch": 0.46275959552027834, "grad_norm": 2.1138504268930123, "learning_rate": 5.842025526742988e-06, "loss": 0.689, "step": 12768 }, { "epoch": 0.462795839222935, "grad_norm": 2.6945815700344684, "learning_rate": 5.841446975185644e-06, "loss": 0.9098, "step": 12769 }, { "epoch": 0.4628320829255917, "grad_norm": 2.2741868709406194, "learning_rate": 5.84086841203367e-06, "loss": 0.8555, "step": 12770 }, { "epoch": 0.46286832662824834, "grad_norm": 2.5585789306718736, "learning_rate": 5.840289837295039e-06, "loss": 0.9696, "step": 12771 }, { "epoch": 0.462904570330905, "grad_norm": 2.406808336020717, "learning_rate": 5.839711250977721e-06, "loss": 0.9345, "step": 12772 }, { "epoch": 0.4629408140335617, "grad_norm": 2.3597046861554607, "learning_rate": 5.839132653089688e-06, "loss": 1.013, "step": 12773 }, { "epoch": 0.46297705773621833, "grad_norm": 2.5389560721824407, "learning_rate": 5.8385540436389175e-06, "loss": 0.9981, "step": 12774 }, { "epoch": 0.463013301438875, "grad_norm": 2.3820517314719303, "learning_rate": 5.837975422633377e-06, "loss": 0.9437, "step": 12775 }, { "epoch": 0.4630495451415317, "grad_norm": 2.5343430630162502, "learning_rate": 5.837396790081045e-06, "loss": 1.0347, "step": 12776 }, { "epoch": 0.46308578884418833, "grad_norm": 2.45595273567399, "learning_rate": 5.836818145989889e-06, "loss": 1.1275, "step": 12777 }, { "epoch": 0.463122032546845, "grad_norm": 1.902538497392138, "learning_rate": 5.836239490367885e-06, "loss": 0.8264, "step": 12778 }, { "epoch": 0.4631582762495016, "grad_norm": 2.3580853016062875, "learning_rate": 5.835660823223008e-06, "loss": 0.7457, "step": 12779 }, { "epoch": 0.4631945199521583, "grad_norm": 2.7985419911699663, "learning_rate": 5.83508214456323e-06, "loss": 0.8762, "step": 12780 }, { "epoch": 0.46323076365481497, "grad_norm": 2.5454565409022143, "learning_rate": 5.834503454396525e-06, "loss": 0.8863, "step": 12781 }, { "epoch": 0.4632670073574716, "grad_norm": 2.4699623440625493, "learning_rate": 5.833924752730868e-06, "loss": 1.0472, "step": 12782 }, { "epoch": 0.4633032510601283, "grad_norm": 2.580947022778572, "learning_rate": 5.83334603957423e-06, "loss": 0.8978, "step": 12783 }, { "epoch": 0.46333949476278496, "grad_norm": 2.528041603086815, "learning_rate": 5.832767314934588e-06, "loss": 0.9094, "step": 12784 }, { "epoch": 0.4633757384654416, "grad_norm": 2.1935902987783256, "learning_rate": 5.832188578819917e-06, "loss": 0.9751, "step": 12785 }, { "epoch": 0.4634119821680983, "grad_norm": 2.0963149744324387, "learning_rate": 5.83160983123819e-06, "loss": 0.9025, "step": 12786 }, { "epoch": 0.46344822587075496, "grad_norm": 2.3487479883904525, "learning_rate": 5.831031072197384e-06, "loss": 0.9259, "step": 12787 }, { "epoch": 0.4634844695734116, "grad_norm": 2.2853052846171784, "learning_rate": 5.830452301705472e-06, "loss": 0.9676, "step": 12788 }, { "epoch": 0.4635207132760683, "grad_norm": 2.3477196062652763, "learning_rate": 5.829873519770428e-06, "loss": 0.8208, "step": 12789 }, { "epoch": 0.46355695697872495, "grad_norm": 2.6529751396562626, "learning_rate": 5.82929472640023e-06, "loss": 0.9189, "step": 12790 }, { "epoch": 0.4635932006813816, "grad_norm": 2.29875054526882, "learning_rate": 5.828715921602852e-06, "loss": 1.0629, "step": 12791 }, { "epoch": 0.46362944438403825, "grad_norm": 2.2833523429060243, "learning_rate": 5.828137105386269e-06, "loss": 0.9375, "step": 12792 }, { "epoch": 0.46366568808669495, "grad_norm": 2.2904783496559324, "learning_rate": 5.827558277758458e-06, "loss": 0.9336, "step": 12793 }, { "epoch": 0.4637019317893516, "grad_norm": 1.9673151755845562, "learning_rate": 5.826979438727393e-06, "loss": 0.8992, "step": 12794 }, { "epoch": 0.46373817549200824, "grad_norm": 2.3418358668238417, "learning_rate": 5.826400588301054e-06, "loss": 0.7688, "step": 12795 }, { "epoch": 0.46377441919466494, "grad_norm": 2.4494006936187436, "learning_rate": 5.825821726487414e-06, "loss": 0.8286, "step": 12796 }, { "epoch": 0.4638106628973216, "grad_norm": 2.090931526432383, "learning_rate": 5.825242853294451e-06, "loss": 1.054, "step": 12797 }, { "epoch": 0.46384690659997824, "grad_norm": 1.9515711202345167, "learning_rate": 5.824663968730139e-06, "loss": 0.7535, "step": 12798 }, { "epoch": 0.46388315030263494, "grad_norm": 2.0857795935364183, "learning_rate": 5.824085072802457e-06, "loss": 1.0415, "step": 12799 }, { "epoch": 0.4639193940052916, "grad_norm": 2.1641290449918342, "learning_rate": 5.823506165519379e-06, "loss": 0.8821, "step": 12800 }, { "epoch": 0.46395563770794823, "grad_norm": 2.345733398723493, "learning_rate": 5.822927246888888e-06, "loss": 0.7383, "step": 12801 }, { "epoch": 0.46399188141060493, "grad_norm": 2.6340725106957765, "learning_rate": 5.822348316918953e-06, "loss": 0.9505, "step": 12802 }, { "epoch": 0.4640281251132616, "grad_norm": 2.3924467583464177, "learning_rate": 5.8217693756175585e-06, "loss": 0.9045, "step": 12803 }, { "epoch": 0.4640643688159182, "grad_norm": 2.4977201364292014, "learning_rate": 5.821190422992677e-06, "loss": 0.9553, "step": 12804 }, { "epoch": 0.4641006125185749, "grad_norm": 2.453674224273359, "learning_rate": 5.82061145905229e-06, "loss": 0.9625, "step": 12805 }, { "epoch": 0.4641368562212316, "grad_norm": 2.640488150925952, "learning_rate": 5.820032483804372e-06, "loss": 0.8575, "step": 12806 }, { "epoch": 0.4641730999238882, "grad_norm": 2.1841296703040487, "learning_rate": 5.819453497256903e-06, "loss": 0.727, "step": 12807 }, { "epoch": 0.46420934362654487, "grad_norm": 2.257693447966573, "learning_rate": 5.81887449941786e-06, "loss": 1.0365, "step": 12808 }, { "epoch": 0.46424558732920157, "grad_norm": 2.4315026805461613, "learning_rate": 5.8182954902952225e-06, "loss": 0.8963, "step": 12809 }, { "epoch": 0.4642818310318582, "grad_norm": 2.7813152447734284, "learning_rate": 5.817716469896967e-06, "loss": 0.944, "step": 12810 }, { "epoch": 0.46431807473451486, "grad_norm": 2.1746022825730362, "learning_rate": 5.817137438231074e-06, "loss": 0.9649, "step": 12811 }, { "epoch": 0.46435431843717156, "grad_norm": 2.1686673576272426, "learning_rate": 5.816558395305521e-06, "loss": 1.0011, "step": 12812 }, { "epoch": 0.4643905621398282, "grad_norm": 2.3585362637703002, "learning_rate": 5.815979341128286e-06, "loss": 0.9759, "step": 12813 }, { "epoch": 0.46442680584248486, "grad_norm": 2.1570891636594167, "learning_rate": 5.81540027570735e-06, "loss": 0.9034, "step": 12814 }, { "epoch": 0.4644630495451415, "grad_norm": 2.5738486328129295, "learning_rate": 5.814821199050692e-06, "loss": 0.9206, "step": 12815 }, { "epoch": 0.4644992932477982, "grad_norm": 2.3897189889459267, "learning_rate": 5.81424211116629e-06, "loss": 0.7751, "step": 12816 }, { "epoch": 0.46453553695045485, "grad_norm": 2.319339317399405, "learning_rate": 5.813663012062124e-06, "loss": 0.7897, "step": 12817 }, { "epoch": 0.4645717806531115, "grad_norm": 2.3558131086637153, "learning_rate": 5.813083901746175e-06, "loss": 1.0063, "step": 12818 }, { "epoch": 0.4646080243557682, "grad_norm": 2.132335952842218, "learning_rate": 5.812504780226421e-06, "loss": 0.915, "step": 12819 }, { "epoch": 0.46464426805842485, "grad_norm": 2.1345796331963656, "learning_rate": 5.811925647510841e-06, "loss": 0.8568, "step": 12820 }, { "epoch": 0.4646805117610815, "grad_norm": 2.712288873522964, "learning_rate": 5.811346503607418e-06, "loss": 0.9904, "step": 12821 }, { "epoch": 0.4647167554637382, "grad_norm": 2.1122380172582447, "learning_rate": 5.81076734852413e-06, "loss": 0.7717, "step": 12822 }, { "epoch": 0.46475299916639484, "grad_norm": 2.298620481822112, "learning_rate": 5.81018818226896e-06, "loss": 1.0714, "step": 12823 }, { "epoch": 0.4647892428690515, "grad_norm": 2.1375858332514577, "learning_rate": 5.809609004849884e-06, "loss": 0.9723, "step": 12824 }, { "epoch": 0.4648254865717082, "grad_norm": 2.207899099242037, "learning_rate": 5.809029816274888e-06, "loss": 0.8646, "step": 12825 }, { "epoch": 0.46486173027436484, "grad_norm": 2.0319709401738595, "learning_rate": 5.8084506165519495e-06, "loss": 0.9139, "step": 12826 }, { "epoch": 0.4648979739770215, "grad_norm": 2.422159745382059, "learning_rate": 5.8078714056890505e-06, "loss": 1.0153, "step": 12827 }, { "epoch": 0.46493421767967813, "grad_norm": 2.230379117668708, "learning_rate": 5.8072921836941736e-06, "loss": 0.6407, "step": 12828 }, { "epoch": 0.46497046138233483, "grad_norm": 2.255080359758366, "learning_rate": 5.8067129505752976e-06, "loss": 0.6164, "step": 12829 }, { "epoch": 0.4650067050849915, "grad_norm": 2.3368471196970426, "learning_rate": 5.806133706340403e-06, "loss": 0.947, "step": 12830 }, { "epoch": 0.4650429487876481, "grad_norm": 2.1900077538120044, "learning_rate": 5.805554450997476e-06, "loss": 0.9533, "step": 12831 }, { "epoch": 0.4650791924903048, "grad_norm": 2.547551046628358, "learning_rate": 5.804975184554494e-06, "loss": 1.0366, "step": 12832 }, { "epoch": 0.4651154361929615, "grad_norm": 2.711211083926163, "learning_rate": 5.804395907019443e-06, "loss": 0.9446, "step": 12833 }, { "epoch": 0.4651516798956181, "grad_norm": 2.25367791667971, "learning_rate": 5.803816618400301e-06, "loss": 0.8557, "step": 12834 }, { "epoch": 0.4651879235982748, "grad_norm": 2.5494575784741276, "learning_rate": 5.803237318705053e-06, "loss": 0.8748, "step": 12835 }, { "epoch": 0.46522416730093147, "grad_norm": 2.438882572592566, "learning_rate": 5.80265800794168e-06, "loss": 0.866, "step": 12836 }, { "epoch": 0.4652604110035881, "grad_norm": 2.4811845408086186, "learning_rate": 5.802078686118165e-06, "loss": 0.7939, "step": 12837 }, { "epoch": 0.4652966547062448, "grad_norm": 2.279645256872106, "learning_rate": 5.801499353242493e-06, "loss": 0.9598, "step": 12838 }, { "epoch": 0.46533289840890146, "grad_norm": 2.333971600646333, "learning_rate": 5.800920009322642e-06, "loss": 1.1617, "step": 12839 }, { "epoch": 0.4653691421115581, "grad_norm": 2.4317231030331583, "learning_rate": 5.800340654366599e-06, "loss": 0.9353, "step": 12840 }, { "epoch": 0.46540538581421476, "grad_norm": 2.580996705613037, "learning_rate": 5.799761288382345e-06, "loss": 0.996, "step": 12841 }, { "epoch": 0.46544162951687146, "grad_norm": 2.379736051089753, "learning_rate": 5.799181911377864e-06, "loss": 0.846, "step": 12842 }, { "epoch": 0.4654778732195281, "grad_norm": 2.221964462442074, "learning_rate": 5.798602523361141e-06, "loss": 0.8261, "step": 12843 }, { "epoch": 0.46551411692218475, "grad_norm": 1.9907033443751048, "learning_rate": 5.798023124340156e-06, "loss": 0.916, "step": 12844 }, { "epoch": 0.46555036062484145, "grad_norm": 2.523368008587828, "learning_rate": 5.797443714322897e-06, "loss": 0.9427, "step": 12845 }, { "epoch": 0.4655866043274981, "grad_norm": 1.8507478318090849, "learning_rate": 5.796864293317345e-06, "loss": 0.9043, "step": 12846 }, { "epoch": 0.46562284803015475, "grad_norm": 2.2087827380273817, "learning_rate": 5.7962848613314846e-06, "loss": 0.7675, "step": 12847 }, { "epoch": 0.46565909173281145, "grad_norm": 2.5493038785570024, "learning_rate": 5.795705418373302e-06, "loss": 1.0943, "step": 12848 }, { "epoch": 0.4656953354354681, "grad_norm": 2.59974383829944, "learning_rate": 5.795125964450779e-06, "loss": 1.1911, "step": 12849 }, { "epoch": 0.46573157913812474, "grad_norm": 2.543285762134051, "learning_rate": 5.7945464995719e-06, "loss": 0.9539, "step": 12850 }, { "epoch": 0.4657678228407814, "grad_norm": 2.2804172742139546, "learning_rate": 5.793967023744652e-06, "loss": 0.8585, "step": 12851 }, { "epoch": 0.4658040665434381, "grad_norm": 2.3890750331928667, "learning_rate": 5.793387536977018e-06, "loss": 0.9104, "step": 12852 }, { "epoch": 0.46584031024609474, "grad_norm": 2.2943805287396533, "learning_rate": 5.792808039276985e-06, "loss": 0.9018, "step": 12853 }, { "epoch": 0.4658765539487514, "grad_norm": 2.2120089973113792, "learning_rate": 5.792228530652534e-06, "loss": 0.8565, "step": 12854 }, { "epoch": 0.4659127976514081, "grad_norm": 1.8395296939014998, "learning_rate": 5.791649011111656e-06, "loss": 0.816, "step": 12855 }, { "epoch": 0.46594904135406473, "grad_norm": 2.287475763574693, "learning_rate": 5.79106948066233e-06, "loss": 0.9472, "step": 12856 }, { "epoch": 0.4659852850567214, "grad_norm": 2.360876467842985, "learning_rate": 5.790489939312548e-06, "loss": 1.0411, "step": 12857 }, { "epoch": 0.4660215287593781, "grad_norm": 2.593601801773173, "learning_rate": 5.789910387070292e-06, "loss": 0.9988, "step": 12858 }, { "epoch": 0.4660577724620347, "grad_norm": 2.3979629888898395, "learning_rate": 5.7893308239435484e-06, "loss": 0.9192, "step": 12859 }, { "epoch": 0.46609401616469137, "grad_norm": 2.1212112289484373, "learning_rate": 5.788751249940302e-06, "loss": 0.7618, "step": 12860 }, { "epoch": 0.4661302598673481, "grad_norm": 2.4988499665308024, "learning_rate": 5.788171665068543e-06, "loss": 0.8763, "step": 12861 }, { "epoch": 0.4661665035700047, "grad_norm": 2.3549602658978004, "learning_rate": 5.787592069336252e-06, "loss": 0.9575, "step": 12862 }, { "epoch": 0.46620274727266137, "grad_norm": 2.444517823297187, "learning_rate": 5.787012462751421e-06, "loss": 1.0807, "step": 12863 }, { "epoch": 0.466238990975318, "grad_norm": 2.0845432873340575, "learning_rate": 5.786432845322033e-06, "loss": 0.895, "step": 12864 }, { "epoch": 0.4662752346779747, "grad_norm": 2.281595944599325, "learning_rate": 5.785853217056077e-06, "loss": 0.9467, "step": 12865 }, { "epoch": 0.46631147838063136, "grad_norm": 2.2772316500165797, "learning_rate": 5.785273577961538e-06, "loss": 0.9719, "step": 12866 }, { "epoch": 0.466347722083288, "grad_norm": 2.15486917592673, "learning_rate": 5.784693928046405e-06, "loss": 0.8771, "step": 12867 }, { "epoch": 0.4663839657859447, "grad_norm": 2.3364488155509484, "learning_rate": 5.784114267318664e-06, "loss": 1.0694, "step": 12868 }, { "epoch": 0.46642020948860136, "grad_norm": 2.316718539313867, "learning_rate": 5.783534595786302e-06, "loss": 0.9192, "step": 12869 }, { "epoch": 0.466456453191258, "grad_norm": 2.328848721100613, "learning_rate": 5.782954913457309e-06, "loss": 0.8523, "step": 12870 }, { "epoch": 0.4664926968939147, "grad_norm": 2.5464512562825288, "learning_rate": 5.782375220339668e-06, "loss": 1.0142, "step": 12871 }, { "epoch": 0.46652894059657135, "grad_norm": 2.464437195706722, "learning_rate": 5.781795516441371e-06, "loss": 0.8245, "step": 12872 }, { "epoch": 0.466565184299228, "grad_norm": 2.432171758627649, "learning_rate": 5.781215801770403e-06, "loss": 0.9004, "step": 12873 }, { "epoch": 0.4666014280018847, "grad_norm": 2.4803800774817573, "learning_rate": 5.780636076334757e-06, "loss": 0.9481, "step": 12874 }, { "epoch": 0.46663767170454135, "grad_norm": 2.5467351798718836, "learning_rate": 5.780056340142416e-06, "loss": 0.8308, "step": 12875 }, { "epoch": 0.466673915407198, "grad_norm": 2.3940258460404804, "learning_rate": 5.7794765932013715e-06, "loss": 1.0497, "step": 12876 }, { "epoch": 0.46671015910985464, "grad_norm": 2.2436016102965297, "learning_rate": 5.778896835519609e-06, "loss": 0.884, "step": 12877 }, { "epoch": 0.46674640281251134, "grad_norm": 2.093339287487638, "learning_rate": 5.7783170671051214e-06, "loss": 0.8292, "step": 12878 }, { "epoch": 0.466782646515168, "grad_norm": 2.4443209111028943, "learning_rate": 5.777737287965893e-06, "loss": 0.8932, "step": 12879 }, { "epoch": 0.46681889021782463, "grad_norm": 2.3444652372123067, "learning_rate": 5.777157498109916e-06, "loss": 1.1401, "step": 12880 }, { "epoch": 0.46685513392048134, "grad_norm": 2.432979152338633, "learning_rate": 5.776577697545179e-06, "loss": 0.9846, "step": 12881 }, { "epoch": 0.466891377623138, "grad_norm": 2.3464952350075015, "learning_rate": 5.775997886279672e-06, "loss": 1.2279, "step": 12882 }, { "epoch": 0.46692762132579463, "grad_norm": 2.333629592028586, "learning_rate": 5.775418064321382e-06, "loss": 1.1488, "step": 12883 }, { "epoch": 0.46696386502845133, "grad_norm": 2.428889174633871, "learning_rate": 5.7748382316783e-06, "loss": 0.9133, "step": 12884 }, { "epoch": 0.467000108731108, "grad_norm": 2.5296755924081373, "learning_rate": 5.774258388358417e-06, "loss": 0.9096, "step": 12885 }, { "epoch": 0.4670363524337646, "grad_norm": 2.2185759506002434, "learning_rate": 5.773678534369722e-06, "loss": 0.6748, "step": 12886 }, { "epoch": 0.46707259613642127, "grad_norm": 2.345733267175591, "learning_rate": 5.773098669720203e-06, "loss": 1.0411, "step": 12887 }, { "epoch": 0.46710883983907797, "grad_norm": 2.6783843230371507, "learning_rate": 5.772518794417853e-06, "loss": 0.7737, "step": 12888 }, { "epoch": 0.4671450835417346, "grad_norm": 2.489968867930801, "learning_rate": 5.7719389084706605e-06, "loss": 0.9599, "step": 12889 }, { "epoch": 0.46718132724439126, "grad_norm": 2.1874950027706817, "learning_rate": 5.771359011886617e-06, "loss": 0.9308, "step": 12890 }, { "epoch": 0.46721757094704797, "grad_norm": 2.3475082384678445, "learning_rate": 5.770779104673712e-06, "loss": 0.9213, "step": 12891 }, { "epoch": 0.4672538146497046, "grad_norm": 2.3550130485362364, "learning_rate": 5.770199186839938e-06, "loss": 1.088, "step": 12892 }, { "epoch": 0.46729005835236126, "grad_norm": 2.2971741180761827, "learning_rate": 5.769619258393284e-06, "loss": 0.7574, "step": 12893 }, { "epoch": 0.46732630205501796, "grad_norm": 2.5206875125305093, "learning_rate": 5.769039319341742e-06, "loss": 1.1626, "step": 12894 }, { "epoch": 0.4673625457576746, "grad_norm": 2.3767627312290363, "learning_rate": 5.768459369693305e-06, "loss": 0.8957, "step": 12895 }, { "epoch": 0.46739878946033125, "grad_norm": 2.4534955928678417, "learning_rate": 5.767879409455963e-06, "loss": 0.9792, "step": 12896 }, { "epoch": 0.46743503316298796, "grad_norm": 2.326359148397866, "learning_rate": 5.767299438637705e-06, "loss": 1.1603, "step": 12897 }, { "epoch": 0.4674712768656446, "grad_norm": 2.4590779719886102, "learning_rate": 5.766719457246527e-06, "loss": 0.9445, "step": 12898 }, { "epoch": 0.46750752056830125, "grad_norm": 2.3963923679753183, "learning_rate": 5.766139465290417e-06, "loss": 1.0502, "step": 12899 }, { "epoch": 0.4675437642709579, "grad_norm": 2.4955401031314532, "learning_rate": 5.765559462777368e-06, "loss": 0.8376, "step": 12900 }, { "epoch": 0.4675800079736146, "grad_norm": 2.4707790817151967, "learning_rate": 5.764979449715373e-06, "loss": 0.9023, "step": 12901 }, { "epoch": 0.46761625167627124, "grad_norm": 2.5301619161654174, "learning_rate": 5.764399426112425e-06, "loss": 0.8507, "step": 12902 }, { "epoch": 0.4676524953789279, "grad_norm": 2.1609028420054743, "learning_rate": 5.763819391976514e-06, "loss": 0.7809, "step": 12903 }, { "epoch": 0.4676887390815846, "grad_norm": 2.369652539476258, "learning_rate": 5.763239347315635e-06, "loss": 0.7882, "step": 12904 }, { "epoch": 0.46772498278424124, "grad_norm": 2.305149163486506, "learning_rate": 5.7626592921377776e-06, "loss": 0.865, "step": 12905 }, { "epoch": 0.4677612264868979, "grad_norm": 2.606985418979031, "learning_rate": 5.762079226450938e-06, "loss": 0.9397, "step": 12906 }, { "epoch": 0.4677974701895546, "grad_norm": 2.0830725320905548, "learning_rate": 5.7614991502631065e-06, "loss": 0.8132, "step": 12907 }, { "epoch": 0.46783371389221123, "grad_norm": 13.84889165479798, "learning_rate": 5.760919063582279e-06, "loss": 1.0249, "step": 12908 }, { "epoch": 0.4678699575948679, "grad_norm": 2.2232533779939687, "learning_rate": 5.760338966416444e-06, "loss": 1.0091, "step": 12909 }, { "epoch": 0.4679062012975246, "grad_norm": 2.338355907097083, "learning_rate": 5.759758858773601e-06, "loss": 1.0677, "step": 12910 }, { "epoch": 0.46794244500018123, "grad_norm": 2.1057867652136615, "learning_rate": 5.759178740661737e-06, "loss": 0.8351, "step": 12911 }, { "epoch": 0.4679786887028379, "grad_norm": 2.3212610053990597, "learning_rate": 5.758598612088853e-06, "loss": 0.9493, "step": 12912 }, { "epoch": 0.4680149324054945, "grad_norm": 2.197313475480626, "learning_rate": 5.7580184730629356e-06, "loss": 0.8761, "step": 12913 }, { "epoch": 0.4680511761081512, "grad_norm": 2.478729452181917, "learning_rate": 5.757438323591984e-06, "loss": 0.98, "step": 12914 }, { "epoch": 0.46808741981080787, "grad_norm": 2.2063653374667664, "learning_rate": 5.756858163683989e-06, "loss": 0.8278, "step": 12915 }, { "epoch": 0.4681236635134645, "grad_norm": 2.155257398280636, "learning_rate": 5.7562779933469485e-06, "loss": 0.9583, "step": 12916 }, { "epoch": 0.4681599072161212, "grad_norm": 2.609250646479571, "learning_rate": 5.755697812588852e-06, "loss": 0.8736, "step": 12917 }, { "epoch": 0.46819615091877786, "grad_norm": 2.263584331271806, "learning_rate": 5.755117621417699e-06, "loss": 1.1188, "step": 12918 }, { "epoch": 0.4682323946214345, "grad_norm": 2.3905317103567936, "learning_rate": 5.754537419841479e-06, "loss": 0.7703, "step": 12919 }, { "epoch": 0.4682686383240912, "grad_norm": 2.2525462386150035, "learning_rate": 5.7539572078681924e-06, "loss": 0.8517, "step": 12920 }, { "epoch": 0.46830488202674786, "grad_norm": 2.287819029825206, "learning_rate": 5.75337698550583e-06, "loss": 0.9835, "step": 12921 }, { "epoch": 0.4683411257294045, "grad_norm": 2.10187409854845, "learning_rate": 5.752796752762389e-06, "loss": 0.797, "step": 12922 }, { "epoch": 0.46837736943206115, "grad_norm": 2.354854169449985, "learning_rate": 5.752216509645864e-06, "loss": 0.9362, "step": 12923 }, { "epoch": 0.46841361313471785, "grad_norm": 2.471834126480669, "learning_rate": 5.75163625616425e-06, "loss": 1.05, "step": 12924 }, { "epoch": 0.4684498568373745, "grad_norm": 2.524421695850654, "learning_rate": 5.751055992325544e-06, "loss": 0.9854, "step": 12925 }, { "epoch": 0.46848610054003115, "grad_norm": 2.356065178514739, "learning_rate": 5.75047571813774e-06, "loss": 0.9035, "step": 12926 }, { "epoch": 0.46852234424268785, "grad_norm": 2.3880953333632386, "learning_rate": 5.749895433608834e-06, "loss": 1.1111, "step": 12927 }, { "epoch": 0.4685585879453445, "grad_norm": 2.1742859051833396, "learning_rate": 5.749315138746823e-06, "loss": 0.9669, "step": 12928 }, { "epoch": 0.46859483164800114, "grad_norm": 2.175684116556864, "learning_rate": 5.748734833559702e-06, "loss": 0.9095, "step": 12929 }, { "epoch": 0.46863107535065784, "grad_norm": 2.1757497530154057, "learning_rate": 5.748154518055469e-06, "loss": 0.8861, "step": 12930 }, { "epoch": 0.4686673190533145, "grad_norm": 2.2317890014532096, "learning_rate": 5.747574192242117e-06, "loss": 0.9497, "step": 12931 }, { "epoch": 0.46870356275597114, "grad_norm": 2.4795987809752553, "learning_rate": 5.746993856127647e-06, "loss": 1.083, "step": 12932 }, { "epoch": 0.46873980645862784, "grad_norm": 2.3116303874140973, "learning_rate": 5.746413509720051e-06, "loss": 0.8954, "step": 12933 }, { "epoch": 0.4687760501612845, "grad_norm": 2.4486993559999957, "learning_rate": 5.745833153027331e-06, "loss": 1.0477, "step": 12934 }, { "epoch": 0.46881229386394113, "grad_norm": 2.451833582441553, "learning_rate": 5.74525278605748e-06, "loss": 0.8453, "step": 12935 }, { "epoch": 0.4688485375665978, "grad_norm": 2.276237195416575, "learning_rate": 5.7446724088184965e-06, "loss": 0.9078, "step": 12936 }, { "epoch": 0.4688847812692545, "grad_norm": 2.306956056383202, "learning_rate": 5.744092021318377e-06, "loss": 0.8411, "step": 12937 }, { "epoch": 0.4689210249719111, "grad_norm": 2.5856189127536773, "learning_rate": 5.743511623565121e-06, "loss": 0.8181, "step": 12938 }, { "epoch": 0.4689572686745678, "grad_norm": 2.485796041987137, "learning_rate": 5.742931215566723e-06, "loss": 1.0344, "step": 12939 }, { "epoch": 0.4689935123772245, "grad_norm": 2.5106484741644755, "learning_rate": 5.742350797331182e-06, "loss": 0.7955, "step": 12940 }, { "epoch": 0.4690297560798811, "grad_norm": 2.4428455957937003, "learning_rate": 5.741770368866496e-06, "loss": 0.9305, "step": 12941 }, { "epoch": 0.46906599978253777, "grad_norm": 2.2948037230658236, "learning_rate": 5.7411899301806635e-06, "loss": 1.0063, "step": 12942 }, { "epoch": 0.46910224348519447, "grad_norm": 2.546033902004449, "learning_rate": 5.740609481281681e-06, "loss": 0.7899, "step": 12943 }, { "epoch": 0.4691384871878511, "grad_norm": 2.1953841009420123, "learning_rate": 5.74002902217755e-06, "loss": 0.8644, "step": 12944 }, { "epoch": 0.46917473089050776, "grad_norm": 2.2211534353060114, "learning_rate": 5.739448552876265e-06, "loss": 0.922, "step": 12945 }, { "epoch": 0.46921097459316446, "grad_norm": 2.4486234972847267, "learning_rate": 5.7388680733858285e-06, "loss": 0.8682, "step": 12946 }, { "epoch": 0.4692472182958211, "grad_norm": 2.3081530639481675, "learning_rate": 5.7382875837142336e-06, "loss": 0.8371, "step": 12947 }, { "epoch": 0.46928346199847776, "grad_norm": 2.2572887545861793, "learning_rate": 5.737707083869484e-06, "loss": 0.8365, "step": 12948 }, { "epoch": 0.4693197057011344, "grad_norm": 2.406490216460052, "learning_rate": 5.737126573859575e-06, "loss": 0.8314, "step": 12949 }, { "epoch": 0.4693559494037911, "grad_norm": 2.1173367813019923, "learning_rate": 5.736546053692509e-06, "loss": 0.9256, "step": 12950 }, { "epoch": 0.46939219310644775, "grad_norm": 2.1983795521218625, "learning_rate": 5.7359655233762844e-06, "loss": 0.8708, "step": 12951 }, { "epoch": 0.4694284368091044, "grad_norm": 2.6690486253083696, "learning_rate": 5.735384982918899e-06, "loss": 0.9356, "step": 12952 }, { "epoch": 0.4694646805117611, "grad_norm": 2.3543663261543415, "learning_rate": 5.734804432328354e-06, "loss": 0.7856, "step": 12953 }, { "epoch": 0.46950092421441775, "grad_norm": 2.615117613625651, "learning_rate": 5.734223871612649e-06, "loss": 1.1224, "step": 12954 }, { "epoch": 0.4695371679170744, "grad_norm": 2.293125313976363, "learning_rate": 5.733643300779782e-06, "loss": 0.9356, "step": 12955 }, { "epoch": 0.4695734116197311, "grad_norm": 2.188902003206871, "learning_rate": 5.733062719837753e-06, "loss": 1.0522, "step": 12956 }, { "epoch": 0.46960965532238774, "grad_norm": 2.1921098102338306, "learning_rate": 5.732482128794565e-06, "loss": 0.8292, "step": 12957 }, { "epoch": 0.4696458990250444, "grad_norm": 2.3974459903366894, "learning_rate": 5.731901527658214e-06, "loss": 0.8817, "step": 12958 }, { "epoch": 0.46968214272770104, "grad_norm": 2.4911078921572742, "learning_rate": 5.7313209164367045e-06, "loss": 0.9446, "step": 12959 }, { "epoch": 0.46971838643035774, "grad_norm": 2.424903832917312, "learning_rate": 5.730740295138034e-06, "loss": 0.8147, "step": 12960 }, { "epoch": 0.4697546301330144, "grad_norm": 2.077975833242936, "learning_rate": 5.730159663770206e-06, "loss": 0.968, "step": 12961 }, { "epoch": 0.46979087383567103, "grad_norm": 2.207108047818031, "learning_rate": 5.729579022341218e-06, "loss": 0.8696, "step": 12962 }, { "epoch": 0.46982711753832773, "grad_norm": 2.247754348465448, "learning_rate": 5.728998370859074e-06, "loss": 0.8516, "step": 12963 }, { "epoch": 0.4698633612409844, "grad_norm": 2.4050268551020295, "learning_rate": 5.728417709331771e-06, "loss": 0.8967, "step": 12964 }, { "epoch": 0.469899604943641, "grad_norm": 2.1061512132887885, "learning_rate": 5.727837037767313e-06, "loss": 0.7791, "step": 12965 }, { "epoch": 0.4699358486462977, "grad_norm": 2.345500973257344, "learning_rate": 5.7272563561737e-06, "loss": 0.9487, "step": 12966 }, { "epoch": 0.4699720923489544, "grad_norm": 2.0782535625341976, "learning_rate": 5.726675664558936e-06, "loss": 0.8939, "step": 12967 }, { "epoch": 0.470008336051611, "grad_norm": 2.7076476078531737, "learning_rate": 5.72609496293102e-06, "loss": 0.9039, "step": 12968 }, { "epoch": 0.4700445797542677, "grad_norm": 2.286698030763547, "learning_rate": 5.7255142512979555e-06, "loss": 0.9707, "step": 12969 }, { "epoch": 0.47008082345692437, "grad_norm": 2.483074752485725, "learning_rate": 5.724933529667742e-06, "loss": 0.8727, "step": 12970 }, { "epoch": 0.470117067159581, "grad_norm": 2.3320143934830293, "learning_rate": 5.724352798048384e-06, "loss": 0.8534, "step": 12971 }, { "epoch": 0.47015331086223766, "grad_norm": 2.565316824436762, "learning_rate": 5.723772056447883e-06, "loss": 0.8849, "step": 12972 }, { "epoch": 0.47018955456489436, "grad_norm": 2.2820636868480304, "learning_rate": 5.723191304874239e-06, "loss": 0.9638, "step": 12973 }, { "epoch": 0.470225798267551, "grad_norm": 2.5177510293066967, "learning_rate": 5.722610543335457e-06, "loss": 0.908, "step": 12974 }, { "epoch": 0.47026204197020766, "grad_norm": 2.237280438900555, "learning_rate": 5.722029771839539e-06, "loss": 0.7567, "step": 12975 }, { "epoch": 0.47029828567286436, "grad_norm": 2.5039058343323704, "learning_rate": 5.721448990394487e-06, "loss": 0.898, "step": 12976 }, { "epoch": 0.470334529375521, "grad_norm": 2.2589902550866805, "learning_rate": 5.720868199008304e-06, "loss": 1.0353, "step": 12977 }, { "epoch": 0.47037077307817765, "grad_norm": 2.4043610750381457, "learning_rate": 5.7202873976889936e-06, "loss": 0.9568, "step": 12978 }, { "epoch": 0.47040701678083435, "grad_norm": 2.2020527723506635, "learning_rate": 5.719706586444559e-06, "loss": 0.9114, "step": 12979 }, { "epoch": 0.470443260483491, "grad_norm": 2.199734239729471, "learning_rate": 5.7191257652830015e-06, "loss": 0.9608, "step": 12980 }, { "epoch": 0.47047950418614765, "grad_norm": 2.5302330218166422, "learning_rate": 5.718544934212327e-06, "loss": 0.9788, "step": 12981 }, { "epoch": 0.47051574788880435, "grad_norm": 2.7675992143675603, "learning_rate": 5.717964093240538e-06, "loss": 1.0208, "step": 12982 }, { "epoch": 0.470551991591461, "grad_norm": 2.195184955539172, "learning_rate": 5.7173832423756384e-06, "loss": 0.7483, "step": 12983 }, { "epoch": 0.47058823529411764, "grad_norm": 2.0171939433918977, "learning_rate": 5.7168023816256305e-06, "loss": 0.7754, "step": 12984 }, { "epoch": 0.4706244789967743, "grad_norm": 2.2722992115385936, "learning_rate": 5.71622151099852e-06, "loss": 1.0848, "step": 12985 }, { "epoch": 0.470660722699431, "grad_norm": 2.1814236206603965, "learning_rate": 5.7156406305023085e-06, "loss": 0.8828, "step": 12986 }, { "epoch": 0.47069696640208764, "grad_norm": 2.5499903580218404, "learning_rate": 5.715059740145003e-06, "loss": 0.9101, "step": 12987 }, { "epoch": 0.4707332101047443, "grad_norm": 2.259384945148144, "learning_rate": 5.714478839934608e-06, "loss": 0.8361, "step": 12988 }, { "epoch": 0.470769453807401, "grad_norm": 2.3459545184164794, "learning_rate": 5.713897929879125e-06, "loss": 0.9745, "step": 12989 }, { "epoch": 0.47080569751005763, "grad_norm": 2.4702189759197046, "learning_rate": 5.713317009986561e-06, "loss": 1.0126, "step": 12990 }, { "epoch": 0.4708419412127143, "grad_norm": 2.4339185857287275, "learning_rate": 5.71273608026492e-06, "loss": 0.9444, "step": 12991 }, { "epoch": 0.470878184915371, "grad_norm": 2.5318912236348132, "learning_rate": 5.712155140722206e-06, "loss": 1.1178, "step": 12992 }, { "epoch": 0.4709144286180276, "grad_norm": 2.1716121604904006, "learning_rate": 5.711574191366427e-06, "loss": 0.8513, "step": 12993 }, { "epoch": 0.47095067232068427, "grad_norm": 2.2029524428550493, "learning_rate": 5.710993232205584e-06, "loss": 0.9536, "step": 12994 }, { "epoch": 0.4709869160233409, "grad_norm": 2.5738672983056934, "learning_rate": 5.710412263247686e-06, "loss": 0.861, "step": 12995 }, { "epoch": 0.4710231597259976, "grad_norm": 2.334729819914677, "learning_rate": 5.709831284500734e-06, "loss": 0.8429, "step": 12996 }, { "epoch": 0.47105940342865427, "grad_norm": 2.2623863280774104, "learning_rate": 5.709250295972739e-06, "loss": 1.0604, "step": 12997 }, { "epoch": 0.4710956471313109, "grad_norm": 1.9403563614589598, "learning_rate": 5.708669297671701e-06, "loss": 0.7105, "step": 12998 }, { "epoch": 0.4711318908339676, "grad_norm": 2.2808933215733322, "learning_rate": 5.708088289605631e-06, "loss": 0.8938, "step": 12999 }, { "epoch": 0.47116813453662426, "grad_norm": 2.485217964349591, "learning_rate": 5.707507271782531e-06, "loss": 0.9957, "step": 13000 }, { "epoch": 0.4712043782392809, "grad_norm": 2.6484671601970726, "learning_rate": 5.70692624421041e-06, "loss": 0.8692, "step": 13001 }, { "epoch": 0.4712406219419376, "grad_norm": 2.4578773366401525, "learning_rate": 5.706345206897274e-06, "loss": 0.9857, "step": 13002 }, { "epoch": 0.47127686564459426, "grad_norm": 2.5234102020736247, "learning_rate": 5.7057641598511256e-06, "loss": 0.9621, "step": 13003 }, { "epoch": 0.4713131093472509, "grad_norm": 2.24641433087518, "learning_rate": 5.705183103079975e-06, "loss": 0.7794, "step": 13004 }, { "epoch": 0.4713493530499076, "grad_norm": 2.272151250499551, "learning_rate": 5.7046020365918284e-06, "loss": 0.9493, "step": 13005 }, { "epoch": 0.47138559675256425, "grad_norm": 2.6082815501643255, "learning_rate": 5.704020960394691e-06, "loss": 0.8881, "step": 13006 }, { "epoch": 0.4714218404552209, "grad_norm": 1.9819958926330976, "learning_rate": 5.7034398744965704e-06, "loss": 1.0089, "step": 13007 }, { "epoch": 0.47145808415787754, "grad_norm": 2.1859820429353833, "learning_rate": 5.702858778905473e-06, "loss": 0.8824, "step": 13008 }, { "epoch": 0.47149432786053425, "grad_norm": 2.3038775047347855, "learning_rate": 5.702277673629408e-06, "loss": 1.0156, "step": 13009 }, { "epoch": 0.4715305715631909, "grad_norm": 2.462785426208272, "learning_rate": 5.701696558676382e-06, "loss": 0.898, "step": 13010 }, { "epoch": 0.47156681526584754, "grad_norm": 2.659098665760859, "learning_rate": 5.701115434054401e-06, "loss": 0.9663, "step": 13011 }, { "epoch": 0.47160305896850424, "grad_norm": 2.2175757937304073, "learning_rate": 5.700534299771474e-06, "loss": 0.9024, "step": 13012 }, { "epoch": 0.4716393026711609, "grad_norm": 2.5397673592028895, "learning_rate": 5.6999531558356065e-06, "loss": 0.9184, "step": 13013 }, { "epoch": 0.47167554637381753, "grad_norm": 2.1853674806574417, "learning_rate": 5.6993720022548086e-06, "loss": 0.8926, "step": 13014 }, { "epoch": 0.47171179007647424, "grad_norm": 2.5179034675072667, "learning_rate": 5.698790839037088e-06, "loss": 0.9128, "step": 13015 }, { "epoch": 0.4717480337791309, "grad_norm": 2.2059968708451096, "learning_rate": 5.69820966619045e-06, "loss": 0.9483, "step": 13016 }, { "epoch": 0.47178427748178753, "grad_norm": 2.448012750587831, "learning_rate": 5.697628483722909e-06, "loss": 0.9886, "step": 13017 }, { "epoch": 0.47182052118444423, "grad_norm": 2.628206341781087, "learning_rate": 5.6970472916424656e-06, "loss": 0.8423, "step": 13018 }, { "epoch": 0.4718567648871009, "grad_norm": 2.4310383062173124, "learning_rate": 5.696466089957134e-06, "loss": 0.8967, "step": 13019 }, { "epoch": 0.4718930085897575, "grad_norm": 2.2434926517661773, "learning_rate": 5.695884878674921e-06, "loss": 0.9909, "step": 13020 }, { "epoch": 0.47192925229241417, "grad_norm": 2.4939753037767463, "learning_rate": 5.695303657803835e-06, "loss": 0.9842, "step": 13021 }, { "epoch": 0.47196549599507087, "grad_norm": 2.271369375942732, "learning_rate": 5.6947224273518855e-06, "loss": 1.0573, "step": 13022 }, { "epoch": 0.4720017396977275, "grad_norm": 2.7317040594270403, "learning_rate": 5.694141187327082e-06, "loss": 1.0364, "step": 13023 }, { "epoch": 0.47203798340038416, "grad_norm": 2.395800492640936, "learning_rate": 5.693559937737429e-06, "loss": 1.0396, "step": 13024 }, { "epoch": 0.47207422710304087, "grad_norm": 1.8768685377329122, "learning_rate": 5.692978678590942e-06, "loss": 0.7311, "step": 13025 }, { "epoch": 0.4721104708056975, "grad_norm": 2.3922589165997903, "learning_rate": 5.692397409895628e-06, "loss": 0.9637, "step": 13026 }, { "epoch": 0.47214671450835416, "grad_norm": 2.3376837751185877, "learning_rate": 5.691816131659497e-06, "loss": 0.9313, "step": 13027 }, { "epoch": 0.47218295821101086, "grad_norm": 2.3956570065779, "learning_rate": 5.691234843890556e-06, "loss": 1.0506, "step": 13028 }, { "epoch": 0.4722192019136675, "grad_norm": 2.127242633704787, "learning_rate": 5.690653546596818e-06, "loss": 0.8166, "step": 13029 }, { "epoch": 0.47225544561632415, "grad_norm": 2.2559844006022507, "learning_rate": 5.690072239786292e-06, "loss": 0.9285, "step": 13030 }, { "epoch": 0.4722916893189808, "grad_norm": 2.265681701891605, "learning_rate": 5.6894909234669884e-06, "loss": 0.9396, "step": 13031 }, { "epoch": 0.4723279330216375, "grad_norm": 2.1006945628963827, "learning_rate": 5.688909597646917e-06, "loss": 0.9512, "step": 13032 }, { "epoch": 0.47236417672429415, "grad_norm": 2.353693085951948, "learning_rate": 5.688328262334086e-06, "loss": 0.8499, "step": 13033 }, { "epoch": 0.4724004204269508, "grad_norm": 2.7789635683898735, "learning_rate": 5.6877469175365094e-06, "loss": 1.0139, "step": 13034 }, { "epoch": 0.4724366641296075, "grad_norm": 2.5117274533703124, "learning_rate": 5.687165563262196e-06, "loss": 0.985, "step": 13035 }, { "epoch": 0.47247290783226414, "grad_norm": 2.6405190482481693, "learning_rate": 5.6865841995191565e-06, "loss": 0.9217, "step": 13036 }, { "epoch": 0.4725091515349208, "grad_norm": 2.272679543508585, "learning_rate": 5.686002826315402e-06, "loss": 0.8836, "step": 13037 }, { "epoch": 0.4725453952375775, "grad_norm": 2.313958603645058, "learning_rate": 5.685421443658944e-06, "loss": 0.8896, "step": 13038 }, { "epoch": 0.47258163894023414, "grad_norm": 2.7525985497474497, "learning_rate": 5.684840051557791e-06, "loss": 0.9559, "step": 13039 }, { "epoch": 0.4726178826428908, "grad_norm": 2.488096045766316, "learning_rate": 5.684258650019959e-06, "loss": 0.8615, "step": 13040 }, { "epoch": 0.4726541263455475, "grad_norm": 2.3108176148815387, "learning_rate": 5.683677239053456e-06, "loss": 0.7387, "step": 13041 }, { "epoch": 0.47269037004820413, "grad_norm": 2.201615448666385, "learning_rate": 5.6830958186662945e-06, "loss": 0.8507, "step": 13042 }, { "epoch": 0.4727266137508608, "grad_norm": 2.6468402500635313, "learning_rate": 5.682514388866484e-06, "loss": 0.853, "step": 13043 }, { "epoch": 0.4727628574535174, "grad_norm": 2.5100867851561173, "learning_rate": 5.681932949662039e-06, "loss": 0.8675, "step": 13044 }, { "epoch": 0.47279910115617413, "grad_norm": 2.3856646893628812, "learning_rate": 5.68135150106097e-06, "loss": 0.8675, "step": 13045 }, { "epoch": 0.4728353448588308, "grad_norm": 2.121111037733684, "learning_rate": 5.6807700430712894e-06, "loss": 0.6885, "step": 13046 }, { "epoch": 0.4728715885614874, "grad_norm": 2.1852770974174227, "learning_rate": 5.680188575701009e-06, "loss": 0.922, "step": 13047 }, { "epoch": 0.4729078322641441, "grad_norm": 2.312318423351676, "learning_rate": 5.679607098958142e-06, "loss": 0.8334, "step": 13048 }, { "epoch": 0.47294407596680077, "grad_norm": 2.146176490520405, "learning_rate": 5.679025612850702e-06, "loss": 0.9154, "step": 13049 }, { "epoch": 0.4729803196694574, "grad_norm": 2.534930782880492, "learning_rate": 5.678444117386698e-06, "loss": 1.1967, "step": 13050 }, { "epoch": 0.4730165633721141, "grad_norm": 2.3853334203223313, "learning_rate": 5.677862612574143e-06, "loss": 0.8245, "step": 13051 }, { "epoch": 0.47305280707477076, "grad_norm": 2.0170650279473903, "learning_rate": 5.677281098421052e-06, "loss": 0.7464, "step": 13052 }, { "epoch": 0.4730890507774274, "grad_norm": 2.278427577829143, "learning_rate": 5.676699574935438e-06, "loss": 0.9927, "step": 13053 }, { "epoch": 0.47312529448008406, "grad_norm": 2.240751146950154, "learning_rate": 5.676118042125312e-06, "loss": 1.0615, "step": 13054 }, { "epoch": 0.47316153818274076, "grad_norm": 2.414856137066312, "learning_rate": 5.675536499998689e-06, "loss": 0.9677, "step": 13055 }, { "epoch": 0.4731977818853974, "grad_norm": 2.333737439616243, "learning_rate": 5.674954948563581e-06, "loss": 1.0784, "step": 13056 }, { "epoch": 0.47323402558805405, "grad_norm": 2.3436960121960766, "learning_rate": 5.674373387828003e-06, "loss": 1.0416, "step": 13057 }, { "epoch": 0.47327026929071075, "grad_norm": 2.296221108004971, "learning_rate": 5.673791817799968e-06, "loss": 0.8034, "step": 13058 }, { "epoch": 0.4733065129933674, "grad_norm": 2.3259236479459218, "learning_rate": 5.673210238487488e-06, "loss": 0.8536, "step": 13059 }, { "epoch": 0.47334275669602405, "grad_norm": 2.1884932616414927, "learning_rate": 5.672628649898578e-06, "loss": 0.7031, "step": 13060 }, { "epoch": 0.47337900039868075, "grad_norm": 2.324028151964799, "learning_rate": 5.6720470520412514e-06, "loss": 0.962, "step": 13061 }, { "epoch": 0.4734152441013374, "grad_norm": 2.496203189415569, "learning_rate": 5.671465444923525e-06, "loss": 0.9986, "step": 13062 }, { "epoch": 0.47345148780399404, "grad_norm": 2.2529312629863143, "learning_rate": 5.6708838285534085e-06, "loss": 0.7947, "step": 13063 }, { "epoch": 0.47348773150665074, "grad_norm": 2.4625366336740555, "learning_rate": 5.67030220293892e-06, "loss": 0.9574, "step": 13064 }, { "epoch": 0.4735239752093074, "grad_norm": 2.5340824537009725, "learning_rate": 5.669720568088072e-06, "loss": 0.9055, "step": 13065 }, { "epoch": 0.47356021891196404, "grad_norm": 2.1919490717772754, "learning_rate": 5.66913892400888e-06, "loss": 0.9228, "step": 13066 }, { "epoch": 0.4735964626146207, "grad_norm": 2.4343820053059866, "learning_rate": 5.6685572707093575e-06, "loss": 0.9943, "step": 13067 }, { "epoch": 0.4736327063172774, "grad_norm": 2.168875560117138, "learning_rate": 5.6679756081975215e-06, "loss": 0.8688, "step": 13068 }, { "epoch": 0.47366895001993403, "grad_norm": 2.536460035299749, "learning_rate": 5.667393936481384e-06, "loss": 0.8924, "step": 13069 }, { "epoch": 0.4737051937225907, "grad_norm": 2.3922257667767446, "learning_rate": 5.6668122555689645e-06, "loss": 1.0177, "step": 13070 }, { "epoch": 0.4737414374252474, "grad_norm": 2.230911108683529, "learning_rate": 5.666230565468273e-06, "loss": 0.7982, "step": 13071 }, { "epoch": 0.473777681127904, "grad_norm": 2.241175216038542, "learning_rate": 5.665648866187328e-06, "loss": 1.0077, "step": 13072 }, { "epoch": 0.4738139248305607, "grad_norm": 2.302117543572371, "learning_rate": 5.665067157734143e-06, "loss": 0.8327, "step": 13073 }, { "epoch": 0.4738501685332174, "grad_norm": 2.1881035994789295, "learning_rate": 5.664485440116736e-06, "loss": 0.9479, "step": 13074 }, { "epoch": 0.473886412235874, "grad_norm": 2.2311520027718603, "learning_rate": 5.66390371334312e-06, "loss": 0.8009, "step": 13075 }, { "epoch": 0.47392265593853067, "grad_norm": 2.1197595457084835, "learning_rate": 5.663321977421314e-06, "loss": 0.9904, "step": 13076 }, { "epoch": 0.47395889964118737, "grad_norm": 2.418280847224085, "learning_rate": 5.662740232359332e-06, "loss": 0.7999, "step": 13077 }, { "epoch": 0.473995143343844, "grad_norm": 2.2624744053405306, "learning_rate": 5.662158478165189e-06, "loss": 0.7935, "step": 13078 }, { "epoch": 0.47403138704650066, "grad_norm": 2.554800626923028, "learning_rate": 5.661576714846906e-06, "loss": 0.93, "step": 13079 }, { "epoch": 0.4740676307491573, "grad_norm": 2.2623434804651925, "learning_rate": 5.660994942412493e-06, "loss": 0.8944, "step": 13080 }, { "epoch": 0.474103874451814, "grad_norm": 2.3276419018160825, "learning_rate": 5.66041316086997e-06, "loss": 0.9638, "step": 13081 }, { "epoch": 0.47414011815447066, "grad_norm": 2.532172006724039, "learning_rate": 5.659831370227354e-06, "loss": 1.077, "step": 13082 }, { "epoch": 0.4741763618571273, "grad_norm": 2.333744686325664, "learning_rate": 5.65924957049266e-06, "loss": 0.952, "step": 13083 }, { "epoch": 0.474212605559784, "grad_norm": 2.3059999004598604, "learning_rate": 5.658667761673906e-06, "loss": 1.0764, "step": 13084 }, { "epoch": 0.47424884926244065, "grad_norm": 2.2443737264488193, "learning_rate": 5.658085943779107e-06, "loss": 0.9605, "step": 13085 }, { "epoch": 0.4742850929650973, "grad_norm": 2.450230295008834, "learning_rate": 5.657504116816284e-06, "loss": 0.8688, "step": 13086 }, { "epoch": 0.474321336667754, "grad_norm": 2.5448033580335627, "learning_rate": 5.65692228079345e-06, "loss": 1.1836, "step": 13087 }, { "epoch": 0.47435758037041065, "grad_norm": 2.379487301208567, "learning_rate": 5.656340435718626e-06, "loss": 0.9205, "step": 13088 }, { "epoch": 0.4743938240730673, "grad_norm": 2.094815844819581, "learning_rate": 5.655758581599827e-06, "loss": 0.8498, "step": 13089 }, { "epoch": 0.47443006777572394, "grad_norm": 2.238026983273859, "learning_rate": 5.655176718445073e-06, "loss": 1.0184, "step": 13090 }, { "epoch": 0.47446631147838064, "grad_norm": 2.281343820937297, "learning_rate": 5.654594846262377e-06, "loss": 0.9869, "step": 13091 }, { "epoch": 0.4745025551810373, "grad_norm": 2.4820213548358265, "learning_rate": 5.654012965059761e-06, "loss": 0.9531, "step": 13092 }, { "epoch": 0.47453879888369394, "grad_norm": 2.7772580365164363, "learning_rate": 5.653431074845241e-06, "loss": 1.0936, "step": 13093 }, { "epoch": 0.47457504258635064, "grad_norm": 2.341373057094766, "learning_rate": 5.652849175626838e-06, "loss": 1.0563, "step": 13094 }, { "epoch": 0.4746112862890073, "grad_norm": 2.2387804579152912, "learning_rate": 5.652267267412567e-06, "loss": 0.9833, "step": 13095 }, { "epoch": 0.47464752999166393, "grad_norm": 2.1836633031261883, "learning_rate": 5.651685350210447e-06, "loss": 0.6527, "step": 13096 }, { "epoch": 0.47468377369432063, "grad_norm": 2.288275489198817, "learning_rate": 5.651103424028498e-06, "loss": 1.1031, "step": 13097 }, { "epoch": 0.4747200173969773, "grad_norm": 2.1450245236428924, "learning_rate": 5.650521488874738e-06, "loss": 0.8534, "step": 13098 }, { "epoch": 0.4747562610996339, "grad_norm": 2.3207721972825803, "learning_rate": 5.649939544757185e-06, "loss": 0.997, "step": 13099 }, { "epoch": 0.4747925048022906, "grad_norm": 2.4953075139988403, "learning_rate": 5.6493575916838585e-06, "loss": 0.8172, "step": 13100 }, { "epoch": 0.4748287485049473, "grad_norm": 2.4331900743563306, "learning_rate": 5.648775629662776e-06, "loss": 0.9356, "step": 13101 }, { "epoch": 0.4748649922076039, "grad_norm": 2.5525600717595025, "learning_rate": 5.648193658701958e-06, "loss": 0.8424, "step": 13102 }, { "epoch": 0.47490123591026057, "grad_norm": 2.206287368525507, "learning_rate": 5.647611678809424e-06, "loss": 0.7413, "step": 13103 }, { "epoch": 0.47493747961291727, "grad_norm": 2.3722647780848085, "learning_rate": 5.647029689993192e-06, "loss": 0.7369, "step": 13104 }, { "epoch": 0.4749737233155739, "grad_norm": 2.1186291547234597, "learning_rate": 5.646447692261282e-06, "loss": 0.915, "step": 13105 }, { "epoch": 0.47500996701823056, "grad_norm": 2.1407658911133334, "learning_rate": 5.645865685621714e-06, "loss": 0.9177, "step": 13106 }, { "epoch": 0.47504621072088726, "grad_norm": 2.2477636665402256, "learning_rate": 5.645283670082507e-06, "loss": 0.9983, "step": 13107 }, { "epoch": 0.4750824544235439, "grad_norm": 2.2085112058426497, "learning_rate": 5.644701645651682e-06, "loss": 0.6833, "step": 13108 }, { "epoch": 0.47511869812620056, "grad_norm": 2.1086458892675646, "learning_rate": 5.64411961233726e-06, "loss": 0.868, "step": 13109 }, { "epoch": 0.47515494182885726, "grad_norm": 2.1821258952007034, "learning_rate": 5.6435375701472575e-06, "loss": 0.7355, "step": 13110 }, { "epoch": 0.4751911855315139, "grad_norm": 2.096811535333514, "learning_rate": 5.642955519089696e-06, "loss": 0.8571, "step": 13111 }, { "epoch": 0.47522742923417055, "grad_norm": 2.841637126923472, "learning_rate": 5.642373459172596e-06, "loss": 0.9158, "step": 13112 }, { "epoch": 0.47526367293682725, "grad_norm": 2.5441814174973416, "learning_rate": 5.641791390403979e-06, "loss": 0.9732, "step": 13113 }, { "epoch": 0.4752999166394839, "grad_norm": 2.417746746604413, "learning_rate": 5.641209312791864e-06, "loss": 0.8584, "step": 13114 }, { "epoch": 0.47533616034214055, "grad_norm": 2.194763087592848, "learning_rate": 5.640627226344274e-06, "loss": 0.8286, "step": 13115 }, { "epoch": 0.4753724040447972, "grad_norm": 2.5012984955327564, "learning_rate": 5.640045131069227e-06, "loss": 0.7986, "step": 13116 }, { "epoch": 0.4754086477474539, "grad_norm": 2.3174470344895126, "learning_rate": 5.6394630269747455e-06, "loss": 0.9748, "step": 13117 }, { "epoch": 0.47544489145011054, "grad_norm": 2.4861559232919705, "learning_rate": 5.638880914068851e-06, "loss": 0.7819, "step": 13118 }, { "epoch": 0.4754811351527672, "grad_norm": 2.0799999467992984, "learning_rate": 5.638298792359564e-06, "loss": 0.831, "step": 13119 }, { "epoch": 0.4755173788554239, "grad_norm": 1.9313028218565804, "learning_rate": 5.637716661854904e-06, "loss": 0.9177, "step": 13120 }, { "epoch": 0.47555362255808054, "grad_norm": 2.5737757570543516, "learning_rate": 5.637134522562896e-06, "loss": 0.9554, "step": 13121 }, { "epoch": 0.4755898662607372, "grad_norm": 2.1571243020472655, "learning_rate": 5.6365523744915576e-06, "loss": 0.8094, "step": 13122 }, { "epoch": 0.4756261099633939, "grad_norm": 2.6232890697952094, "learning_rate": 5.635970217648914e-06, "loss": 1.0801, "step": 13123 }, { "epoch": 0.47566235366605053, "grad_norm": 2.2831974442807272, "learning_rate": 5.635388052042984e-06, "loss": 0.894, "step": 13124 }, { "epoch": 0.4756985973687072, "grad_norm": 2.2654771174509256, "learning_rate": 5.634805877681793e-06, "loss": 0.8279, "step": 13125 }, { "epoch": 0.4757348410713638, "grad_norm": 2.420990358213516, "learning_rate": 5.634223694573358e-06, "loss": 0.9676, "step": 13126 }, { "epoch": 0.4757710847740205, "grad_norm": 2.4344629537102596, "learning_rate": 5.633641502725706e-06, "loss": 1.0216, "step": 13127 }, { "epoch": 0.47580732847667717, "grad_norm": 2.611902260594871, "learning_rate": 5.633059302146858e-06, "loss": 0.8938, "step": 13128 }, { "epoch": 0.4758435721793338, "grad_norm": 2.2173810363002397, "learning_rate": 5.632477092844836e-06, "loss": 0.9571, "step": 13129 }, { "epoch": 0.4758798158819905, "grad_norm": 2.143059869027112, "learning_rate": 5.6318948748276605e-06, "loss": 0.8798, "step": 13130 }, { "epoch": 0.47591605958464717, "grad_norm": 2.802252124317863, "learning_rate": 5.6313126481033566e-06, "loss": 0.8484, "step": 13131 }, { "epoch": 0.4759523032873038, "grad_norm": 2.380847406077807, "learning_rate": 5.630730412679945e-06, "loss": 0.8879, "step": 13132 }, { "epoch": 0.4759885469899605, "grad_norm": 2.139535075862546, "learning_rate": 5.63014816856545e-06, "loss": 0.8565, "step": 13133 }, { "epoch": 0.47602479069261716, "grad_norm": 2.400751608158161, "learning_rate": 5.629565915767895e-06, "loss": 0.8601, "step": 13134 }, { "epoch": 0.4760610343952738, "grad_norm": 2.1151932017842388, "learning_rate": 5.628983654295303e-06, "loss": 1.0778, "step": 13135 }, { "epoch": 0.4760972780979305, "grad_norm": 2.135296823473277, "learning_rate": 5.628401384155696e-06, "loss": 0.8671, "step": 13136 }, { "epoch": 0.47613352180058716, "grad_norm": 2.162405035120395, "learning_rate": 5.627819105357099e-06, "loss": 0.8483, "step": 13137 }, { "epoch": 0.4761697655032438, "grad_norm": 2.435305227453366, "learning_rate": 5.627236817907532e-06, "loss": 0.948, "step": 13138 }, { "epoch": 0.47620600920590045, "grad_norm": 2.3745685386455473, "learning_rate": 5.626654521815024e-06, "loss": 0.8503, "step": 13139 }, { "epoch": 0.47624225290855715, "grad_norm": 2.315724232277515, "learning_rate": 5.626072217087592e-06, "loss": 0.8117, "step": 13140 }, { "epoch": 0.4762784966112138, "grad_norm": 2.4401687472980083, "learning_rate": 5.625489903733266e-06, "loss": 1.0084, "step": 13141 }, { "epoch": 0.47631474031387044, "grad_norm": 2.2281323318903348, "learning_rate": 5.624907581760068e-06, "loss": 0.9617, "step": 13142 }, { "epoch": 0.47635098401652715, "grad_norm": 2.409409725484712, "learning_rate": 5.62432525117602e-06, "loss": 0.9262, "step": 13143 }, { "epoch": 0.4763872277191838, "grad_norm": 2.280830045385092, "learning_rate": 5.623742911989148e-06, "loss": 0.8322, "step": 13144 }, { "epoch": 0.47642347142184044, "grad_norm": 2.602978299696452, "learning_rate": 5.623160564207476e-06, "loss": 0.9993, "step": 13145 }, { "epoch": 0.47645971512449714, "grad_norm": 2.5221748943883315, "learning_rate": 5.622578207839029e-06, "loss": 0.964, "step": 13146 }, { "epoch": 0.4764959588271538, "grad_norm": 2.362540679706184, "learning_rate": 5.62199584289183e-06, "loss": 0.852, "step": 13147 }, { "epoch": 0.47653220252981043, "grad_norm": 2.4556950124182744, "learning_rate": 5.621413469373905e-06, "loss": 0.809, "step": 13148 }, { "epoch": 0.47656844623246714, "grad_norm": 2.423812377583571, "learning_rate": 5.620831087293277e-06, "loss": 0.8577, "step": 13149 }, { "epoch": 0.4766046899351238, "grad_norm": 2.2927481450210108, "learning_rate": 5.620248696657971e-06, "loss": 0.9623, "step": 13150 }, { "epoch": 0.47664093363778043, "grad_norm": 2.346686803584875, "learning_rate": 5.619666297476016e-06, "loss": 1.0215, "step": 13151 }, { "epoch": 0.4766771773404371, "grad_norm": 1.9978732681470757, "learning_rate": 5.6190838897554336e-06, "loss": 0.8521, "step": 13152 }, { "epoch": 0.4767134210430938, "grad_norm": 2.1155251640207933, "learning_rate": 5.618501473504248e-06, "loss": 0.8339, "step": 13153 }, { "epoch": 0.4767496647457504, "grad_norm": 2.0357550639211865, "learning_rate": 5.617919048730488e-06, "loss": 0.6807, "step": 13154 }, { "epoch": 0.47678590844840707, "grad_norm": 2.1699287140159513, "learning_rate": 5.617336615442176e-06, "loss": 0.7288, "step": 13155 }, { "epoch": 0.47682215215106377, "grad_norm": 2.4079807719106188, "learning_rate": 5.61675417364734e-06, "loss": 0.9174, "step": 13156 }, { "epoch": 0.4768583958537204, "grad_norm": 2.304195242717419, "learning_rate": 5.6161717233540044e-06, "loss": 1.0085, "step": 13157 }, { "epoch": 0.47689463955637706, "grad_norm": 2.0632491473805943, "learning_rate": 5.615589264570194e-06, "loss": 0.801, "step": 13158 }, { "epoch": 0.47693088325903377, "grad_norm": 2.2482727760737715, "learning_rate": 5.615006797303937e-06, "loss": 1.0058, "step": 13159 }, { "epoch": 0.4769671269616904, "grad_norm": 2.3060478830034516, "learning_rate": 5.614424321563256e-06, "loss": 0.9207, "step": 13160 }, { "epoch": 0.47700337066434706, "grad_norm": 2.2653186942279877, "learning_rate": 5.613841837356182e-06, "loss": 0.7581, "step": 13161 }, { "epoch": 0.4770396143670037, "grad_norm": 2.225793723964129, "learning_rate": 5.613259344690737e-06, "loss": 0.8983, "step": 13162 }, { "epoch": 0.4770758580696604, "grad_norm": 2.4394170372167894, "learning_rate": 5.61267684357495e-06, "loss": 0.9759, "step": 13163 }, { "epoch": 0.47711210177231705, "grad_norm": 2.6516215017107365, "learning_rate": 5.612094334016846e-06, "loss": 1.0851, "step": 13164 }, { "epoch": 0.4771483454749737, "grad_norm": 2.289338544639, "learning_rate": 5.611511816024453e-06, "loss": 0.845, "step": 13165 }, { "epoch": 0.4771845891776304, "grad_norm": 2.5728046307904693, "learning_rate": 5.610929289605798e-06, "loss": 0.973, "step": 13166 }, { "epoch": 0.47722083288028705, "grad_norm": 2.085675156963769, "learning_rate": 5.610346754768905e-06, "loss": 0.9913, "step": 13167 }, { "epoch": 0.4772570765829437, "grad_norm": 2.4281918140781378, "learning_rate": 5.6097642115218036e-06, "loss": 0.9597, "step": 13168 }, { "epoch": 0.4772933202856004, "grad_norm": 2.186791991655955, "learning_rate": 5.60918165987252e-06, "loss": 0.8791, "step": 13169 }, { "epoch": 0.47732956398825704, "grad_norm": 2.448150807593704, "learning_rate": 5.60859909982908e-06, "loss": 0.875, "step": 13170 }, { "epoch": 0.4773658076909137, "grad_norm": 2.6088078818363214, "learning_rate": 5.608016531399515e-06, "loss": 0.8995, "step": 13171 }, { "epoch": 0.4774020513935704, "grad_norm": 2.334138586644, "learning_rate": 5.607433954591849e-06, "loss": 0.8935, "step": 13172 }, { "epoch": 0.47743829509622704, "grad_norm": 2.389023086335079, "learning_rate": 5.60685136941411e-06, "loss": 0.9433, "step": 13173 }, { "epoch": 0.4774745387988837, "grad_norm": 2.306979348958477, "learning_rate": 5.606268775874326e-06, "loss": 0.9346, "step": 13174 }, { "epoch": 0.47751078250154033, "grad_norm": 2.1365912122921067, "learning_rate": 5.6056861739805254e-06, "loss": 0.7837, "step": 13175 }, { "epoch": 0.47754702620419703, "grad_norm": 2.419129057546014, "learning_rate": 5.605103563740737e-06, "loss": 0.8629, "step": 13176 }, { "epoch": 0.4775832699068537, "grad_norm": 2.196875229365187, "learning_rate": 5.6045209451629864e-06, "loss": 0.982, "step": 13177 }, { "epoch": 0.4776195136095103, "grad_norm": 2.452353083647991, "learning_rate": 5.603938318255302e-06, "loss": 0.8871, "step": 13178 }, { "epoch": 0.47765575731216703, "grad_norm": 2.184746335823777, "learning_rate": 5.603355683025713e-06, "loss": 0.9183, "step": 13179 }, { "epoch": 0.4776920010148237, "grad_norm": 2.1396059281837507, "learning_rate": 5.602773039482247e-06, "loss": 0.9616, "step": 13180 }, { "epoch": 0.4777282447174803, "grad_norm": 2.6437501306893565, "learning_rate": 5.6021903876329355e-06, "loss": 0.9331, "step": 13181 }, { "epoch": 0.477764488420137, "grad_norm": 2.2034286263960596, "learning_rate": 5.601607727485803e-06, "loss": 0.7519, "step": 13182 }, { "epoch": 0.47780073212279367, "grad_norm": 2.2367100388035612, "learning_rate": 5.601025059048879e-06, "loss": 0.6698, "step": 13183 }, { "epoch": 0.4778369758254503, "grad_norm": 2.2004805130060454, "learning_rate": 5.600442382330194e-06, "loss": 0.8538, "step": 13184 }, { "epoch": 0.477873219528107, "grad_norm": 2.6717399347123196, "learning_rate": 5.5998596973377774e-06, "loss": 0.9939, "step": 13185 }, { "epoch": 0.47790946323076366, "grad_norm": 2.037183251310718, "learning_rate": 5.599277004079655e-06, "loss": 0.8397, "step": 13186 }, { "epoch": 0.4779457069334203, "grad_norm": 2.4049913738763893, "learning_rate": 5.59869430256386e-06, "loss": 0.9903, "step": 13187 }, { "epoch": 0.47798195063607696, "grad_norm": 2.658379585774338, "learning_rate": 5.598111592798418e-06, "loss": 1.1571, "step": 13188 }, { "epoch": 0.47801819433873366, "grad_norm": 2.3629900969786424, "learning_rate": 5.5975288747913606e-06, "loss": 0.7443, "step": 13189 }, { "epoch": 0.4780544380413903, "grad_norm": 2.4904409617597665, "learning_rate": 5.596946148550715e-06, "loss": 0.7693, "step": 13190 }, { "epoch": 0.47809068174404695, "grad_norm": 2.3744760912475185, "learning_rate": 5.596363414084514e-06, "loss": 0.9249, "step": 13191 }, { "epoch": 0.47812692544670365, "grad_norm": 2.6521042154881282, "learning_rate": 5.595780671400785e-06, "loss": 0.9476, "step": 13192 }, { "epoch": 0.4781631691493603, "grad_norm": 2.2168873840049654, "learning_rate": 5.595197920507559e-06, "loss": 0.9477, "step": 13193 }, { "epoch": 0.47819941285201695, "grad_norm": 2.3429860569803136, "learning_rate": 5.594615161412866e-06, "loss": 1.0585, "step": 13194 }, { "epoch": 0.47823565655467365, "grad_norm": 2.466594034542539, "learning_rate": 5.594032394124736e-06, "loss": 0.9942, "step": 13195 }, { "epoch": 0.4782719002573303, "grad_norm": 2.223719372634302, "learning_rate": 5.5934496186512e-06, "loss": 0.7417, "step": 13196 }, { "epoch": 0.47830814395998694, "grad_norm": 2.383707035715744, "learning_rate": 5.592866835000285e-06, "loss": 0.8164, "step": 13197 }, { "epoch": 0.4783443876626436, "grad_norm": 2.5361814502361035, "learning_rate": 5.5922840431800255e-06, "loss": 0.9206, "step": 13198 }, { "epoch": 0.4783806313653003, "grad_norm": 2.223567821288852, "learning_rate": 5.591701243198448e-06, "loss": 0.963, "step": 13199 }, { "epoch": 0.47841687506795694, "grad_norm": 2.3824250941730454, "learning_rate": 5.591118435063586e-06, "loss": 0.8643, "step": 13200 }, { "epoch": 0.4784531187706136, "grad_norm": 2.3652114631410215, "learning_rate": 5.590535618783469e-06, "loss": 1.0811, "step": 13201 }, { "epoch": 0.4784893624732703, "grad_norm": 2.2569046018345125, "learning_rate": 5.5899527943661295e-06, "loss": 0.8041, "step": 13202 }, { "epoch": 0.47852560617592693, "grad_norm": 2.5741471535717526, "learning_rate": 5.589369961819595e-06, "loss": 1.0065, "step": 13203 }, { "epoch": 0.4785618498785836, "grad_norm": 2.2358832222062723, "learning_rate": 5.588787121151902e-06, "loss": 0.8144, "step": 13204 }, { "epoch": 0.4785980935812403, "grad_norm": 2.2993772926740483, "learning_rate": 5.588204272371077e-06, "loss": 0.8423, "step": 13205 }, { "epoch": 0.4786343372838969, "grad_norm": 2.388849497914532, "learning_rate": 5.587621415485154e-06, "loss": 0.8629, "step": 13206 }, { "epoch": 0.4786705809865536, "grad_norm": 2.2494929420233434, "learning_rate": 5.5870385505021604e-06, "loss": 0.8806, "step": 13207 }, { "epoch": 0.4787068246892103, "grad_norm": 2.4140425475176492, "learning_rate": 5.586455677430133e-06, "loss": 1.1653, "step": 13208 }, { "epoch": 0.4787430683918669, "grad_norm": 2.27169698866403, "learning_rate": 5.585872796277099e-06, "loss": 0.8731, "step": 13209 }, { "epoch": 0.47877931209452357, "grad_norm": 2.2469131244733553, "learning_rate": 5.585289907051096e-06, "loss": 0.7737, "step": 13210 }, { "epoch": 0.4788155557971802, "grad_norm": 2.464018247128446, "learning_rate": 5.584707009760149e-06, "loss": 0.9768, "step": 13211 }, { "epoch": 0.4788517994998369, "grad_norm": 2.419593584735393, "learning_rate": 5.584124104412294e-06, "loss": 0.9448, "step": 13212 }, { "epoch": 0.47888804320249356, "grad_norm": 2.425333429231832, "learning_rate": 5.58354119101556e-06, "loss": 0.8835, "step": 13213 }, { "epoch": 0.4789242869051502, "grad_norm": 2.282902185712389, "learning_rate": 5.582958269577984e-06, "loss": 1.0756, "step": 13214 }, { "epoch": 0.4789605306078069, "grad_norm": 2.211212074923366, "learning_rate": 5.582375340107595e-06, "loss": 0.8388, "step": 13215 }, { "epoch": 0.47899677431046356, "grad_norm": 2.498172901709637, "learning_rate": 5.581792402612427e-06, "loss": 0.8569, "step": 13216 }, { "epoch": 0.4790330180131202, "grad_norm": 2.479967007443184, "learning_rate": 5.58120945710051e-06, "loss": 0.9706, "step": 13217 }, { "epoch": 0.4790692617157769, "grad_norm": 2.42293945537442, "learning_rate": 5.5806265035798776e-06, "loss": 0.7726, "step": 13218 }, { "epoch": 0.47910550541843355, "grad_norm": 2.440962861969104, "learning_rate": 5.580043542058564e-06, "loss": 0.9459, "step": 13219 }, { "epoch": 0.4791417491210902, "grad_norm": 2.133558720164773, "learning_rate": 5.579460572544602e-06, "loss": 0.899, "step": 13220 }, { "epoch": 0.4791779928237469, "grad_norm": 2.3616040978267527, "learning_rate": 5.578877595046022e-06, "loss": 0.9551, "step": 13221 }, { "epoch": 0.47921423652640355, "grad_norm": 2.434965308384585, "learning_rate": 5.57829460957086e-06, "loss": 0.7763, "step": 13222 }, { "epoch": 0.4792504802290602, "grad_norm": 2.243669341260235, "learning_rate": 5.577711616127148e-06, "loss": 0.9277, "step": 13223 }, { "epoch": 0.47928672393171684, "grad_norm": 2.331833030774739, "learning_rate": 5.577128614722919e-06, "loss": 0.8669, "step": 13224 }, { "epoch": 0.47932296763437354, "grad_norm": 2.343607769980096, "learning_rate": 5.576545605366207e-06, "loss": 0.9211, "step": 13225 }, { "epoch": 0.4793592113370302, "grad_norm": 2.5405927076683184, "learning_rate": 5.575962588065046e-06, "loss": 0.9298, "step": 13226 }, { "epoch": 0.47939545503968684, "grad_norm": 2.6867679187814613, "learning_rate": 5.5753795628274675e-06, "loss": 0.8596, "step": 13227 }, { "epoch": 0.47943169874234354, "grad_norm": 2.415422125753844, "learning_rate": 5.574796529661507e-06, "loss": 1.1284, "step": 13228 }, { "epoch": 0.4794679424450002, "grad_norm": 2.086711200890263, "learning_rate": 5.574213488575198e-06, "loss": 0.7918, "step": 13229 }, { "epoch": 0.47950418614765683, "grad_norm": 2.136360371914725, "learning_rate": 5.573630439576574e-06, "loss": 0.7721, "step": 13230 }, { "epoch": 0.47954042985031353, "grad_norm": 2.28181325422062, "learning_rate": 5.57304738267367e-06, "loss": 0.756, "step": 13231 }, { "epoch": 0.4795766735529702, "grad_norm": 2.925927409996156, "learning_rate": 5.57246431787452e-06, "loss": 0.8498, "step": 13232 }, { "epoch": 0.4796129172556268, "grad_norm": 1.9961839358415814, "learning_rate": 5.5718812451871574e-06, "loss": 0.8412, "step": 13233 }, { "epoch": 0.47964916095828347, "grad_norm": 2.451346947395166, "learning_rate": 5.571298164619617e-06, "loss": 1.0539, "step": 13234 }, { "epoch": 0.4796854046609402, "grad_norm": 2.0351885034947497, "learning_rate": 5.5707150761799334e-06, "loss": 0.9751, "step": 13235 }, { "epoch": 0.4797216483635968, "grad_norm": 2.216776929578549, "learning_rate": 5.570131979876142e-06, "loss": 0.8595, "step": 13236 }, { "epoch": 0.47975789206625347, "grad_norm": 2.123183547404638, "learning_rate": 5.569548875716275e-06, "loss": 0.8341, "step": 13237 }, { "epoch": 0.47979413576891017, "grad_norm": 2.1937228959129924, "learning_rate": 5.56896576370837e-06, "loss": 0.9841, "step": 13238 }, { "epoch": 0.4798303794715668, "grad_norm": 2.0695598204103436, "learning_rate": 5.5683826438604615e-06, "loss": 0.9531, "step": 13239 }, { "epoch": 0.47986662317422346, "grad_norm": 2.626382846116115, "learning_rate": 5.567799516180583e-06, "loss": 0.9338, "step": 13240 }, { "epoch": 0.47990286687688016, "grad_norm": 2.1378764770344634, "learning_rate": 5.567216380676771e-06, "loss": 0.8195, "step": 13241 }, { "epoch": 0.4799391105795368, "grad_norm": 2.160048544504518, "learning_rate": 5.5666332373570595e-06, "loss": 0.8923, "step": 13242 }, { "epoch": 0.47997535428219346, "grad_norm": 1.9227908221393122, "learning_rate": 5.566050086229486e-06, "loss": 0.8792, "step": 13243 }, { "epoch": 0.48001159798485016, "grad_norm": 2.2618733096049954, "learning_rate": 5.5654669273020845e-06, "loss": 0.8408, "step": 13244 }, { "epoch": 0.4800478416875068, "grad_norm": 2.1272223162655672, "learning_rate": 5.56488376058289e-06, "loss": 0.8403, "step": 13245 }, { "epoch": 0.48008408539016345, "grad_norm": 2.1869335227002673, "learning_rate": 5.564300586079939e-06, "loss": 0.9427, "step": 13246 }, { "epoch": 0.4801203290928201, "grad_norm": 2.423082564246349, "learning_rate": 5.563717403801267e-06, "loss": 0.8933, "step": 13247 }, { "epoch": 0.4801565727954768, "grad_norm": 2.182714826229956, "learning_rate": 5.563134213754912e-06, "loss": 0.9538, "step": 13248 }, { "epoch": 0.48019281649813345, "grad_norm": 2.826264592373469, "learning_rate": 5.5625510159489045e-06, "loss": 0.9358, "step": 13249 }, { "epoch": 0.4802290602007901, "grad_norm": 2.223356562852013, "learning_rate": 5.561967810391288e-06, "loss": 0.83, "step": 13250 }, { "epoch": 0.4802653039034468, "grad_norm": 2.590481732430025, "learning_rate": 5.561384597090092e-06, "loss": 0.9168, "step": 13251 }, { "epoch": 0.48030154760610344, "grad_norm": 2.4303264616967906, "learning_rate": 5.560801376053357e-06, "loss": 0.9247, "step": 13252 }, { "epoch": 0.4803377913087601, "grad_norm": 2.455768446400147, "learning_rate": 5.560218147289119e-06, "loss": 0.9382, "step": 13253 }, { "epoch": 0.4803740350114168, "grad_norm": 2.304494254487195, "learning_rate": 5.559634910805414e-06, "loss": 0.9726, "step": 13254 }, { "epoch": 0.48041027871407344, "grad_norm": 2.33638702114803, "learning_rate": 5.559051666610276e-06, "loss": 1.0228, "step": 13255 }, { "epoch": 0.4804465224167301, "grad_norm": 2.106085652462591, "learning_rate": 5.558468414711746e-06, "loss": 0.9581, "step": 13256 }, { "epoch": 0.4804827661193868, "grad_norm": 2.0409502772227577, "learning_rate": 5.557885155117858e-06, "loss": 0.6636, "step": 13257 }, { "epoch": 0.48051900982204343, "grad_norm": 2.5310851028617103, "learning_rate": 5.55730188783665e-06, "loss": 0.8167, "step": 13258 }, { "epoch": 0.4805552535247001, "grad_norm": 2.272819157525183, "learning_rate": 5.55671861287616e-06, "loss": 0.859, "step": 13259 }, { "epoch": 0.4805914972273567, "grad_norm": 2.3421569553717063, "learning_rate": 5.556135330244423e-06, "loss": 0.9338, "step": 13260 }, { "epoch": 0.4806277409300134, "grad_norm": 2.457224160014958, "learning_rate": 5.555552039949478e-06, "loss": 0.846, "step": 13261 }, { "epoch": 0.48066398463267007, "grad_norm": 2.194194676758374, "learning_rate": 5.554968741999363e-06, "loss": 0.7563, "step": 13262 }, { "epoch": 0.4807002283353267, "grad_norm": 2.092873252297937, "learning_rate": 5.5543854364021135e-06, "loss": 0.825, "step": 13263 }, { "epoch": 0.4807364720379834, "grad_norm": 2.3656153135398, "learning_rate": 5.553802123165768e-06, "loss": 0.9326, "step": 13264 }, { "epoch": 0.48077271574064007, "grad_norm": 2.3689573875890573, "learning_rate": 5.553218802298364e-06, "loss": 0.9471, "step": 13265 }, { "epoch": 0.4808089594432967, "grad_norm": 2.769174393012481, "learning_rate": 5.552635473807939e-06, "loss": 1.0567, "step": 13266 }, { "epoch": 0.4808452031459534, "grad_norm": 2.5086804028727836, "learning_rate": 5.552052137702532e-06, "loss": 1.0195, "step": 13267 }, { "epoch": 0.48088144684861006, "grad_norm": 2.4206661691646048, "learning_rate": 5.55146879399018e-06, "loss": 0.8382, "step": 13268 }, { "epoch": 0.4809176905512667, "grad_norm": 2.3862066274151243, "learning_rate": 5.5508854426789215e-06, "loss": 0.8405, "step": 13269 }, { "epoch": 0.48095393425392335, "grad_norm": 2.1594848991743394, "learning_rate": 5.550302083776795e-06, "loss": 0.8522, "step": 13270 }, { "epoch": 0.48099017795658006, "grad_norm": 2.3407443652910698, "learning_rate": 5.549718717291839e-06, "loss": 0.9474, "step": 13271 }, { "epoch": 0.4810264216592367, "grad_norm": 2.140835519674162, "learning_rate": 5.549135343232091e-06, "loss": 0.7649, "step": 13272 }, { "epoch": 0.48106266536189335, "grad_norm": 2.329409904893923, "learning_rate": 5.548551961605591e-06, "loss": 0.894, "step": 13273 }, { "epoch": 0.48109890906455005, "grad_norm": 2.1524250020399207, "learning_rate": 5.5479685724203765e-06, "loss": 0.827, "step": 13274 }, { "epoch": 0.4811351527672067, "grad_norm": 2.4322078075758315, "learning_rate": 5.547385175684485e-06, "loss": 1.0057, "step": 13275 }, { "epoch": 0.48117139646986334, "grad_norm": 2.5679574517918704, "learning_rate": 5.546801771405957e-06, "loss": 0.9185, "step": 13276 }, { "epoch": 0.48120764017252005, "grad_norm": 2.3466241067534352, "learning_rate": 5.546218359592833e-06, "loss": 0.8137, "step": 13277 }, { "epoch": 0.4812438838751767, "grad_norm": 2.5064434663883945, "learning_rate": 5.5456349402531485e-06, "loss": 0.9473, "step": 13278 }, { "epoch": 0.48128012757783334, "grad_norm": 2.2790624807625703, "learning_rate": 5.545051513394947e-06, "loss": 0.8265, "step": 13279 }, { "epoch": 0.48131637128049004, "grad_norm": 2.1397213844303296, "learning_rate": 5.544468079026263e-06, "loss": 0.8113, "step": 13280 }, { "epoch": 0.4813526149831467, "grad_norm": 2.285774372151136, "learning_rate": 5.543884637155139e-06, "loss": 0.8791, "step": 13281 }, { "epoch": 0.48138885868580333, "grad_norm": 2.55125910056716, "learning_rate": 5.5433011877896135e-06, "loss": 1.058, "step": 13282 }, { "epoch": 0.48142510238846, "grad_norm": 2.5751807823083195, "learning_rate": 5.5427177309377265e-06, "loss": 0.8088, "step": 13283 }, { "epoch": 0.4814613460911167, "grad_norm": 2.543436567728829, "learning_rate": 5.542134266607518e-06, "loss": 1.0473, "step": 13284 }, { "epoch": 0.48149758979377333, "grad_norm": 2.1449990453056906, "learning_rate": 5.5415507948070255e-06, "loss": 0.8805, "step": 13285 }, { "epoch": 0.48153383349643, "grad_norm": 2.3010624063197285, "learning_rate": 5.540967315544291e-06, "loss": 0.9802, "step": 13286 }, { "epoch": 0.4815700771990867, "grad_norm": 2.3550647211653004, "learning_rate": 5.540383828827355e-06, "loss": 0.8325, "step": 13287 }, { "epoch": 0.4816063209017433, "grad_norm": 2.370687439833856, "learning_rate": 5.5398003346642556e-06, "loss": 0.7779, "step": 13288 }, { "epoch": 0.48164256460439997, "grad_norm": 2.0395763301567835, "learning_rate": 5.539216833063034e-06, "loss": 0.8299, "step": 13289 }, { "epoch": 0.48167880830705667, "grad_norm": 2.188249530588926, "learning_rate": 5.5386333240317315e-06, "loss": 0.9254, "step": 13290 }, { "epoch": 0.4817150520097133, "grad_norm": 2.6402244296263966, "learning_rate": 5.538049807578387e-06, "loss": 0.8685, "step": 13291 }, { "epoch": 0.48175129571236996, "grad_norm": 2.333286824124119, "learning_rate": 5.537466283711042e-06, "loss": 0.7761, "step": 13292 }, { "epoch": 0.48178753941502667, "grad_norm": 2.1115985015962506, "learning_rate": 5.536882752437738e-06, "loss": 0.6998, "step": 13293 }, { "epoch": 0.4818237831176833, "grad_norm": 2.156495483103929, "learning_rate": 5.536299213766512e-06, "loss": 0.9785, "step": 13294 }, { "epoch": 0.48186002682033996, "grad_norm": 2.3439169638097255, "learning_rate": 5.535715667705408e-06, "loss": 0.994, "step": 13295 }, { "epoch": 0.4818962705229966, "grad_norm": 2.371950878209076, "learning_rate": 5.535132114262465e-06, "loss": 1.041, "step": 13296 }, { "epoch": 0.4819325142256533, "grad_norm": 2.354954130374955, "learning_rate": 5.534548553445727e-06, "loss": 0.9216, "step": 13297 }, { "epoch": 0.48196875792830995, "grad_norm": 2.514866796872639, "learning_rate": 5.533964985263232e-06, "loss": 0.9489, "step": 13298 }, { "epoch": 0.4820050016309666, "grad_norm": 2.5835740616014875, "learning_rate": 5.533381409723022e-06, "loss": 0.9195, "step": 13299 }, { "epoch": 0.4820412453336233, "grad_norm": 2.2334994442245106, "learning_rate": 5.532797826833139e-06, "loss": 0.7238, "step": 13300 }, { "epoch": 0.48207748903627995, "grad_norm": 2.219783682483179, "learning_rate": 5.532214236601625e-06, "loss": 0.8583, "step": 13301 }, { "epoch": 0.4821137327389366, "grad_norm": 2.0997165559407884, "learning_rate": 5.531630639036522e-06, "loss": 0.7943, "step": 13302 }, { "epoch": 0.4821499764415933, "grad_norm": 2.36042725631266, "learning_rate": 5.531047034145869e-06, "loss": 0.8359, "step": 13303 }, { "epoch": 0.48218622014424994, "grad_norm": 2.2511627218440378, "learning_rate": 5.530463421937707e-06, "loss": 0.8296, "step": 13304 }, { "epoch": 0.4822224638469066, "grad_norm": 2.190320315195248, "learning_rate": 5.529879802420083e-06, "loss": 0.8792, "step": 13305 }, { "epoch": 0.48225870754956324, "grad_norm": 2.075865220287561, "learning_rate": 5.529296175601033e-06, "loss": 0.8637, "step": 13306 }, { "epoch": 0.48229495125221994, "grad_norm": 2.0988469965705923, "learning_rate": 5.528712541488605e-06, "loss": 0.9865, "step": 13307 }, { "epoch": 0.4823311949548766, "grad_norm": 2.4994009658005134, "learning_rate": 5.528128900090835e-06, "loss": 0.8777, "step": 13308 }, { "epoch": 0.48236743865753323, "grad_norm": 2.3155955549243896, "learning_rate": 5.527545251415769e-06, "loss": 1.0818, "step": 13309 }, { "epoch": 0.48240368236018993, "grad_norm": 2.1661196304330153, "learning_rate": 5.526961595471449e-06, "loss": 0.8713, "step": 13310 }, { "epoch": 0.4824399260628466, "grad_norm": 2.205010343245265, "learning_rate": 5.526377932265916e-06, "loss": 0.8447, "step": 13311 }, { "epoch": 0.4824761697655032, "grad_norm": 2.3830445449861615, "learning_rate": 5.525794261807215e-06, "loss": 0.966, "step": 13312 }, { "epoch": 0.48251241346815993, "grad_norm": 2.2327422572371423, "learning_rate": 5.525210584103387e-06, "loss": 0.9681, "step": 13313 }, { "epoch": 0.4825486571708166, "grad_norm": 2.237684195088811, "learning_rate": 5.524626899162473e-06, "loss": 0.6523, "step": 13314 }, { "epoch": 0.4825849008734732, "grad_norm": 2.5332124581618674, "learning_rate": 5.524043206992519e-06, "loss": 0.914, "step": 13315 }, { "epoch": 0.4826211445761299, "grad_norm": 2.2681514887536887, "learning_rate": 5.523459507601565e-06, "loss": 0.821, "step": 13316 }, { "epoch": 0.48265738827878657, "grad_norm": 2.303418375200668, "learning_rate": 5.522875800997658e-06, "loss": 0.778, "step": 13317 }, { "epoch": 0.4826936319814432, "grad_norm": 2.6114008260214625, "learning_rate": 5.522292087188836e-06, "loss": 1.1276, "step": 13318 }, { "epoch": 0.48272987568409986, "grad_norm": 2.2994631605511153, "learning_rate": 5.521708366183147e-06, "loss": 1.0443, "step": 13319 }, { "epoch": 0.48276611938675656, "grad_norm": 2.094883419641054, "learning_rate": 5.521124637988632e-06, "loss": 0.7466, "step": 13320 }, { "epoch": 0.4828023630894132, "grad_norm": 2.338191359937974, "learning_rate": 5.520540902613334e-06, "loss": 0.9834, "step": 13321 }, { "epoch": 0.48283860679206986, "grad_norm": 2.3043581099348964, "learning_rate": 5.519957160065297e-06, "loss": 0.8581, "step": 13322 }, { "epoch": 0.48287485049472656, "grad_norm": 2.3320586204134415, "learning_rate": 5.5193734103525655e-06, "loss": 0.8902, "step": 13323 }, { "epoch": 0.4829110941973832, "grad_norm": 2.174295077443462, "learning_rate": 5.518789653483182e-06, "loss": 0.6959, "step": 13324 }, { "epoch": 0.48294733790003985, "grad_norm": 2.355516069518221, "learning_rate": 5.518205889465191e-06, "loss": 0.8822, "step": 13325 }, { "epoch": 0.48298358160269655, "grad_norm": 2.572560726316608, "learning_rate": 5.5176221183066356e-06, "loss": 0.9106, "step": 13326 }, { "epoch": 0.4830198253053532, "grad_norm": 2.0546013763639457, "learning_rate": 5.517038340015561e-06, "loss": 0.6876, "step": 13327 }, { "epoch": 0.48305606900800985, "grad_norm": 2.3781100118916174, "learning_rate": 5.516454554600011e-06, "loss": 1.0177, "step": 13328 }, { "epoch": 0.48309231271066655, "grad_norm": 2.224039799085338, "learning_rate": 5.51587076206803e-06, "loss": 0.8883, "step": 13329 }, { "epoch": 0.4831285564133232, "grad_norm": 2.724696062957116, "learning_rate": 5.515286962427661e-06, "loss": 0.8308, "step": 13330 }, { "epoch": 0.48316480011597984, "grad_norm": 2.6643813140070147, "learning_rate": 5.51470315568695e-06, "loss": 0.9222, "step": 13331 }, { "epoch": 0.4832010438186365, "grad_norm": 2.300685752821877, "learning_rate": 5.51411934185394e-06, "loss": 0.8779, "step": 13332 }, { "epoch": 0.4832372875212932, "grad_norm": 2.397840573677662, "learning_rate": 5.513535520936677e-06, "loss": 0.9775, "step": 13333 }, { "epoch": 0.48327353122394984, "grad_norm": 2.5804113698858235, "learning_rate": 5.512951692943205e-06, "loss": 0.9831, "step": 13334 }, { "epoch": 0.4833097749266065, "grad_norm": 2.5159871821762594, "learning_rate": 5.512367857881569e-06, "loss": 0.902, "step": 13335 }, { "epoch": 0.4833460186292632, "grad_norm": 2.6091799504002045, "learning_rate": 5.511784015759813e-06, "loss": 0.8627, "step": 13336 }, { "epoch": 0.48338226233191983, "grad_norm": 2.2486308782029796, "learning_rate": 5.511200166585984e-06, "loss": 0.8381, "step": 13337 }, { "epoch": 0.4834185060345765, "grad_norm": 2.647638398610565, "learning_rate": 5.510616310368124e-06, "loss": 0.9819, "step": 13338 }, { "epoch": 0.4834547497372332, "grad_norm": 2.3581504504038766, "learning_rate": 5.510032447114282e-06, "loss": 0.9376, "step": 13339 }, { "epoch": 0.4834909934398898, "grad_norm": 2.122879926159256, "learning_rate": 5.509448576832501e-06, "loss": 0.8198, "step": 13340 }, { "epoch": 0.4835272371425465, "grad_norm": 2.299679601313908, "learning_rate": 5.508864699530827e-06, "loss": 1.0133, "step": 13341 }, { "epoch": 0.4835634808452031, "grad_norm": 2.1808947616842493, "learning_rate": 5.5082808152173036e-06, "loss": 0.841, "step": 13342 }, { "epoch": 0.4835997245478598, "grad_norm": 2.0837152257230724, "learning_rate": 5.507696923899979e-06, "loss": 0.8551, "step": 13343 }, { "epoch": 0.48363596825051647, "grad_norm": 2.3090959022702346, "learning_rate": 5.507113025586898e-06, "loss": 0.9894, "step": 13344 }, { "epoch": 0.4836722119531731, "grad_norm": 2.094014109018107, "learning_rate": 5.506529120286106e-06, "loss": 1.0406, "step": 13345 }, { "epoch": 0.4837084556558298, "grad_norm": 2.5071040599854313, "learning_rate": 5.505945208005648e-06, "loss": 1.0109, "step": 13346 }, { "epoch": 0.48374469935848646, "grad_norm": 2.5909308371013435, "learning_rate": 5.505361288753572e-06, "loss": 0.9211, "step": 13347 }, { "epoch": 0.4837809430611431, "grad_norm": 2.2275440362481436, "learning_rate": 5.504777362537923e-06, "loss": 0.9065, "step": 13348 }, { "epoch": 0.4838171867637998, "grad_norm": 2.332968529053729, "learning_rate": 5.504193429366748e-06, "loss": 1.1082, "step": 13349 }, { "epoch": 0.48385343046645646, "grad_norm": 2.194496590544334, "learning_rate": 5.503609489248092e-06, "loss": 1.0253, "step": 13350 }, { "epoch": 0.4838896741691131, "grad_norm": 2.4149940683158753, "learning_rate": 5.5030255421900015e-06, "loss": 0.8798, "step": 13351 }, { "epoch": 0.4839259178717698, "grad_norm": 2.2828068082393917, "learning_rate": 5.502441588200524e-06, "loss": 0.9896, "step": 13352 }, { "epoch": 0.48396216157442645, "grad_norm": 2.4342732388924, "learning_rate": 5.501857627287704e-06, "loss": 0.9164, "step": 13353 }, { "epoch": 0.4839984052770831, "grad_norm": 2.6201233515935876, "learning_rate": 5.5012736594595895e-06, "loss": 1.0231, "step": 13354 }, { "epoch": 0.48403464897973975, "grad_norm": 2.3122800928564593, "learning_rate": 5.5006896847242275e-06, "loss": 0.9251, "step": 13355 }, { "epoch": 0.48407089268239645, "grad_norm": 2.2956905366704556, "learning_rate": 5.5001057030896645e-06, "loss": 0.8885, "step": 13356 }, { "epoch": 0.4841071363850531, "grad_norm": 2.437639930364442, "learning_rate": 5.499521714563947e-06, "loss": 0.8926, "step": 13357 }, { "epoch": 0.48414338008770974, "grad_norm": 1.9989989910286572, "learning_rate": 5.498937719155123e-06, "loss": 0.8387, "step": 13358 }, { "epoch": 0.48417962379036644, "grad_norm": 2.277579600146389, "learning_rate": 5.4983537168712394e-06, "loss": 0.8006, "step": 13359 }, { "epoch": 0.4842158674930231, "grad_norm": 2.2023842612547218, "learning_rate": 5.497769707720343e-06, "loss": 0.766, "step": 13360 }, { "epoch": 0.48425211119567974, "grad_norm": 1.9762795770433315, "learning_rate": 5.49718569171048e-06, "loss": 0.7953, "step": 13361 }, { "epoch": 0.48428835489833644, "grad_norm": 2.075685737475599, "learning_rate": 5.496601668849699e-06, "loss": 0.8289, "step": 13362 }, { "epoch": 0.4843245986009931, "grad_norm": 2.130323390618705, "learning_rate": 5.496017639146048e-06, "loss": 0.9056, "step": 13363 }, { "epoch": 0.48436084230364973, "grad_norm": 2.470575031190308, "learning_rate": 5.495433602607574e-06, "loss": 0.9049, "step": 13364 }, { "epoch": 0.4843970860063064, "grad_norm": 2.715266099865092, "learning_rate": 5.494849559242324e-06, "loss": 0.7783, "step": 13365 }, { "epoch": 0.4844333297089631, "grad_norm": 2.2956450194119524, "learning_rate": 5.494265509058346e-06, "loss": 1.0276, "step": 13366 }, { "epoch": 0.4844695734116197, "grad_norm": 2.6986750920193776, "learning_rate": 5.493681452063688e-06, "loss": 0.9317, "step": 13367 }, { "epoch": 0.48450581711427637, "grad_norm": 2.057845189807518, "learning_rate": 5.4930973882664e-06, "loss": 0.7881, "step": 13368 }, { "epoch": 0.4845420608169331, "grad_norm": 2.3105195018520335, "learning_rate": 5.492513317674527e-06, "loss": 0.787, "step": 13369 }, { "epoch": 0.4845783045195897, "grad_norm": 2.2653434833703257, "learning_rate": 5.4919292402961186e-06, "loss": 0.8223, "step": 13370 }, { "epoch": 0.48461454822224637, "grad_norm": 2.315291820604995, "learning_rate": 5.491345156139222e-06, "loss": 0.7845, "step": 13371 }, { "epoch": 0.48465079192490307, "grad_norm": 2.82483357330677, "learning_rate": 5.490761065211887e-06, "loss": 0.8765, "step": 13372 }, { "epoch": 0.4846870356275597, "grad_norm": 2.261517693348945, "learning_rate": 5.490176967522161e-06, "loss": 0.9743, "step": 13373 }, { "epoch": 0.48472327933021636, "grad_norm": 2.548720736491549, "learning_rate": 5.489592863078094e-06, "loss": 0.9757, "step": 13374 }, { "epoch": 0.48475952303287306, "grad_norm": 2.2824774670233197, "learning_rate": 5.489008751887731e-06, "loss": 0.8795, "step": 13375 }, { "epoch": 0.4847957667355297, "grad_norm": 2.5365589113046534, "learning_rate": 5.488424633959125e-06, "loss": 0.803, "step": 13376 }, { "epoch": 0.48483201043818636, "grad_norm": 2.2556588705335314, "learning_rate": 5.487840509300323e-06, "loss": 0.8474, "step": 13377 }, { "epoch": 0.484868254140843, "grad_norm": 2.7028563746079293, "learning_rate": 5.487256377919373e-06, "loss": 0.8907, "step": 13378 }, { "epoch": 0.4849044978434997, "grad_norm": 2.5778506488846196, "learning_rate": 5.486672239824326e-06, "loss": 0.8887, "step": 13379 }, { "epoch": 0.48494074154615635, "grad_norm": 2.414497894742498, "learning_rate": 5.4860880950232294e-06, "loss": 0.9968, "step": 13380 }, { "epoch": 0.484976985248813, "grad_norm": 2.700408481729486, "learning_rate": 5.485503943524132e-06, "loss": 0.8001, "step": 13381 }, { "epoch": 0.4850132289514697, "grad_norm": 2.3250599229375126, "learning_rate": 5.484919785335084e-06, "loss": 0.9432, "step": 13382 }, { "epoch": 0.48504947265412635, "grad_norm": 2.231233979168895, "learning_rate": 5.484335620464135e-06, "loss": 1.0048, "step": 13383 }, { "epoch": 0.485085716356783, "grad_norm": 2.197480913825723, "learning_rate": 5.483751448919335e-06, "loss": 0.9102, "step": 13384 }, { "epoch": 0.4851219600594397, "grad_norm": 2.2858287778955155, "learning_rate": 5.483167270708731e-06, "loss": 0.7497, "step": 13385 }, { "epoch": 0.48515820376209634, "grad_norm": 2.0882877597672937, "learning_rate": 5.482583085840375e-06, "loss": 0.9411, "step": 13386 }, { "epoch": 0.485194447464753, "grad_norm": 2.379292775346779, "learning_rate": 5.4819988943223145e-06, "loss": 0.8356, "step": 13387 }, { "epoch": 0.4852306911674097, "grad_norm": 2.203539127240516, "learning_rate": 5.481414696162604e-06, "loss": 0.7588, "step": 13388 }, { "epoch": 0.48526693487006634, "grad_norm": 2.320156048157955, "learning_rate": 5.480830491369288e-06, "loss": 1.0126, "step": 13389 }, { "epoch": 0.485303178572723, "grad_norm": 2.2635451893736045, "learning_rate": 5.480246279950419e-06, "loss": 0.9602, "step": 13390 }, { "epoch": 0.48533942227537963, "grad_norm": 2.5301361355214596, "learning_rate": 5.4796620619140465e-06, "loss": 1.0216, "step": 13391 }, { "epoch": 0.48537566597803633, "grad_norm": 2.10920864257503, "learning_rate": 5.47907783726822e-06, "loss": 0.8824, "step": 13392 }, { "epoch": 0.485411909680693, "grad_norm": 2.3180372459821244, "learning_rate": 5.478493606020991e-06, "loss": 0.715, "step": 13393 }, { "epoch": 0.4854481533833496, "grad_norm": 2.3364192432723274, "learning_rate": 5.477909368180411e-06, "loss": 0.8203, "step": 13394 }, { "epoch": 0.4854843970860063, "grad_norm": 2.2706793998885773, "learning_rate": 5.4773251237545274e-06, "loss": 0.9275, "step": 13395 }, { "epoch": 0.48552064078866297, "grad_norm": 2.227528562027094, "learning_rate": 5.476740872751394e-06, "loss": 0.9372, "step": 13396 }, { "epoch": 0.4855568844913196, "grad_norm": 2.0471079115000888, "learning_rate": 5.476156615179057e-06, "loss": 0.7271, "step": 13397 }, { "epoch": 0.4855931281939763, "grad_norm": 2.1383863356316413, "learning_rate": 5.475572351045572e-06, "loss": 0.9328, "step": 13398 }, { "epoch": 0.48562937189663297, "grad_norm": 2.3782074931098967, "learning_rate": 5.474988080358987e-06, "loss": 1.0399, "step": 13399 }, { "epoch": 0.4856656155992896, "grad_norm": 2.4808409850827604, "learning_rate": 5.474403803127354e-06, "loss": 0.9617, "step": 13400 }, { "epoch": 0.48570185930194626, "grad_norm": 2.359074969138292, "learning_rate": 5.473819519358723e-06, "loss": 0.7127, "step": 13401 }, { "epoch": 0.48573810300460296, "grad_norm": 2.4472446534620795, "learning_rate": 5.473235229061146e-06, "loss": 1.0658, "step": 13402 }, { "epoch": 0.4857743467072596, "grad_norm": 2.067812199037759, "learning_rate": 5.472650932242672e-06, "loss": 0.9647, "step": 13403 }, { "epoch": 0.48581059040991625, "grad_norm": 2.293060423162963, "learning_rate": 5.472066628911357e-06, "loss": 0.8922, "step": 13404 }, { "epoch": 0.48584683411257296, "grad_norm": 2.3746966366217497, "learning_rate": 5.471482319075246e-06, "loss": 0.8308, "step": 13405 }, { "epoch": 0.4858830778152296, "grad_norm": 2.156397394162315, "learning_rate": 5.470898002742396e-06, "loss": 0.9754, "step": 13406 }, { "epoch": 0.48591932151788625, "grad_norm": 2.7694393565573687, "learning_rate": 5.470313679920857e-06, "loss": 1.1876, "step": 13407 }, { "epoch": 0.48595556522054295, "grad_norm": 2.485505154128563, "learning_rate": 5.469729350618679e-06, "loss": 0.8935, "step": 13408 }, { "epoch": 0.4859918089231996, "grad_norm": 2.432353358192485, "learning_rate": 5.469145014843913e-06, "loss": 0.9328, "step": 13409 }, { "epoch": 0.48602805262585624, "grad_norm": 2.34331361483817, "learning_rate": 5.468560672604615e-06, "loss": 0.8382, "step": 13410 }, { "epoch": 0.48606429632851295, "grad_norm": 2.24608324778421, "learning_rate": 5.467976323908832e-06, "loss": 0.8781, "step": 13411 }, { "epoch": 0.4861005400311696, "grad_norm": 2.3363417304112994, "learning_rate": 5.467391968764619e-06, "loss": 0.9218, "step": 13412 }, { "epoch": 0.48613678373382624, "grad_norm": 2.6088074900033296, "learning_rate": 5.466807607180028e-06, "loss": 0.899, "step": 13413 }, { "epoch": 0.4861730274364829, "grad_norm": 2.5498006404564477, "learning_rate": 5.46622323916311e-06, "loss": 0.9771, "step": 13414 }, { "epoch": 0.4862092711391396, "grad_norm": 2.12530204434483, "learning_rate": 5.465638864721917e-06, "loss": 0.8855, "step": 13415 }, { "epoch": 0.48624551484179623, "grad_norm": 2.3001261507778086, "learning_rate": 5.465054483864504e-06, "loss": 0.954, "step": 13416 }, { "epoch": 0.4862817585444529, "grad_norm": 2.6379208142220967, "learning_rate": 5.464470096598921e-06, "loss": 0.9331, "step": 13417 }, { "epoch": 0.4863180022471096, "grad_norm": 2.1269234512809354, "learning_rate": 5.463885702933221e-06, "loss": 0.741, "step": 13418 }, { "epoch": 0.48635424594976623, "grad_norm": 2.221989072486782, "learning_rate": 5.463301302875455e-06, "loss": 0.7342, "step": 13419 }, { "epoch": 0.4863904896524229, "grad_norm": 2.219160373666071, "learning_rate": 5.462716896433679e-06, "loss": 1.0057, "step": 13420 }, { "epoch": 0.4864267333550796, "grad_norm": 2.2817083870506805, "learning_rate": 5.462132483615942e-06, "loss": 0.9408, "step": 13421 }, { "epoch": 0.4864629770577362, "grad_norm": 2.1393069145794357, "learning_rate": 5.461548064430301e-06, "loss": 1.0795, "step": 13422 }, { "epoch": 0.48649922076039287, "grad_norm": 2.5305548721662428, "learning_rate": 5.460963638884805e-06, "loss": 0.8088, "step": 13423 }, { "epoch": 0.48653546446304957, "grad_norm": 2.333068171026458, "learning_rate": 5.46037920698751e-06, "loss": 0.9775, "step": 13424 }, { "epoch": 0.4865717081657062, "grad_norm": 2.246728276984142, "learning_rate": 5.459794768746467e-06, "loss": 0.8444, "step": 13425 }, { "epoch": 0.48660795186836286, "grad_norm": 2.2785924355500256, "learning_rate": 5.459210324169732e-06, "loss": 0.9049, "step": 13426 }, { "epoch": 0.4866441955710195, "grad_norm": 2.5165366296482565, "learning_rate": 5.458625873265355e-06, "loss": 0.8419, "step": 13427 }, { "epoch": 0.4866804392736762, "grad_norm": 2.378140364816818, "learning_rate": 5.4580414160413915e-06, "loss": 0.9909, "step": 13428 }, { "epoch": 0.48671668297633286, "grad_norm": 2.3115132603978243, "learning_rate": 5.457456952505894e-06, "loss": 0.9195, "step": 13429 }, { "epoch": 0.4867529266789895, "grad_norm": 2.634404508764211, "learning_rate": 5.456872482666916e-06, "loss": 0.9628, "step": 13430 }, { "epoch": 0.4867891703816462, "grad_norm": 2.260766241884725, "learning_rate": 5.4562880065325115e-06, "loss": 0.7769, "step": 13431 }, { "epoch": 0.48682541408430285, "grad_norm": 2.346761227638879, "learning_rate": 5.455703524110735e-06, "loss": 0.93, "step": 13432 }, { "epoch": 0.4868616577869595, "grad_norm": 2.3226747075284586, "learning_rate": 5.455119035409638e-06, "loss": 0.8464, "step": 13433 }, { "epoch": 0.4868979014896162, "grad_norm": 2.2313908758961136, "learning_rate": 5.454534540437278e-06, "loss": 1.0619, "step": 13434 }, { "epoch": 0.48693414519227285, "grad_norm": 2.338700711613156, "learning_rate": 5.453950039201705e-06, "loss": 0.9655, "step": 13435 }, { "epoch": 0.4869703888949295, "grad_norm": 2.4973676876569333, "learning_rate": 5.453365531710977e-06, "loss": 0.9773, "step": 13436 }, { "epoch": 0.48700663259758614, "grad_norm": 1.9185694418260277, "learning_rate": 5.452781017973144e-06, "loss": 0.7869, "step": 13437 }, { "epoch": 0.48704287630024284, "grad_norm": 2.293030447874793, "learning_rate": 5.452196497996264e-06, "loss": 0.6457, "step": 13438 }, { "epoch": 0.4870791200028995, "grad_norm": 2.3547597353101266, "learning_rate": 5.451611971788389e-06, "loss": 0.85, "step": 13439 }, { "epoch": 0.48711536370555614, "grad_norm": 2.564139736189394, "learning_rate": 5.451027439357574e-06, "loss": 0.9436, "step": 13440 }, { "epoch": 0.48715160740821284, "grad_norm": 2.3418249392693467, "learning_rate": 5.450442900711874e-06, "loss": 0.8914, "step": 13441 }, { "epoch": 0.4871878511108695, "grad_norm": 2.527512486159585, "learning_rate": 5.449858355859343e-06, "loss": 0.979, "step": 13442 }, { "epoch": 0.48722409481352613, "grad_norm": 2.3592407500973374, "learning_rate": 5.449273804808037e-06, "loss": 0.8874, "step": 13443 }, { "epoch": 0.48726033851618283, "grad_norm": 2.655782369954939, "learning_rate": 5.448689247566008e-06, "loss": 1.047, "step": 13444 }, { "epoch": 0.4872965822188395, "grad_norm": 2.2602343766089854, "learning_rate": 5.448104684141314e-06, "loss": 1.0003, "step": 13445 }, { "epoch": 0.4873328259214961, "grad_norm": 2.5330382360761132, "learning_rate": 5.447520114542009e-06, "loss": 1.0173, "step": 13446 }, { "epoch": 0.48736906962415283, "grad_norm": 1.9908838553016055, "learning_rate": 5.446935538776146e-06, "loss": 0.7878, "step": 13447 }, { "epoch": 0.4874053133268095, "grad_norm": 1.9837174784267573, "learning_rate": 5.446350956851781e-06, "loss": 0.7211, "step": 13448 }, { "epoch": 0.4874415570294661, "grad_norm": 2.250085930920413, "learning_rate": 5.445766368776971e-06, "loss": 0.854, "step": 13449 }, { "epoch": 0.48747780073212277, "grad_norm": 2.4437450121633058, "learning_rate": 5.445181774559769e-06, "loss": 0.9538, "step": 13450 }, { "epoch": 0.48751404443477947, "grad_norm": 2.3971036639334056, "learning_rate": 5.4445971742082325e-06, "loss": 0.8928, "step": 13451 }, { "epoch": 0.4875502881374361, "grad_norm": 2.461590106318001, "learning_rate": 5.444012567730415e-06, "loss": 0.9399, "step": 13452 }, { "epoch": 0.48758653184009276, "grad_norm": 2.366131142215092, "learning_rate": 5.4434279551343734e-06, "loss": 1.1984, "step": 13453 }, { "epoch": 0.48762277554274946, "grad_norm": 2.327230046729105, "learning_rate": 5.442843336428163e-06, "loss": 0.9084, "step": 13454 }, { "epoch": 0.4876590192454061, "grad_norm": 2.354798619024205, "learning_rate": 5.442258711619839e-06, "loss": 0.8695, "step": 13455 }, { "epoch": 0.48769526294806276, "grad_norm": 2.2584969114161453, "learning_rate": 5.441674080717457e-06, "loss": 0.7875, "step": 13456 }, { "epoch": 0.48773150665071946, "grad_norm": 2.3335909189170394, "learning_rate": 5.441089443729074e-06, "loss": 1.0445, "step": 13457 }, { "epoch": 0.4877677503533761, "grad_norm": 2.284052955722099, "learning_rate": 5.440504800662745e-06, "loss": 0.837, "step": 13458 }, { "epoch": 0.48780399405603275, "grad_norm": 2.3265032212842525, "learning_rate": 5.439920151526527e-06, "loss": 0.8357, "step": 13459 }, { "epoch": 0.48784023775868945, "grad_norm": 2.233856584107977, "learning_rate": 5.439335496328473e-06, "loss": 0.8538, "step": 13460 }, { "epoch": 0.4878764814613461, "grad_norm": 2.272477210608807, "learning_rate": 5.438750835076643e-06, "loss": 0.7953, "step": 13461 }, { "epoch": 0.48791272516400275, "grad_norm": 2.205952575382059, "learning_rate": 5.438166167779091e-06, "loss": 0.9415, "step": 13462 }, { "epoch": 0.4879489688666594, "grad_norm": 2.30594818224955, "learning_rate": 5.437581494443875e-06, "loss": 0.8855, "step": 13463 }, { "epoch": 0.4879852125693161, "grad_norm": 2.435351515762882, "learning_rate": 5.436996815079051e-06, "loss": 0.9342, "step": 13464 }, { "epoch": 0.48802145627197274, "grad_norm": 2.094090383951582, "learning_rate": 5.436412129692675e-06, "loss": 0.9184, "step": 13465 }, { "epoch": 0.4880576999746294, "grad_norm": 2.930378640394813, "learning_rate": 5.435827438292804e-06, "loss": 1.1859, "step": 13466 }, { "epoch": 0.4880939436772861, "grad_norm": 2.225147177654198, "learning_rate": 5.435242740887495e-06, "loss": 0.8751, "step": 13467 }, { "epoch": 0.48813018737994274, "grad_norm": 2.265188191262025, "learning_rate": 5.434658037484803e-06, "loss": 0.9075, "step": 13468 }, { "epoch": 0.4881664310825994, "grad_norm": 2.45797695366993, "learning_rate": 5.434073328092786e-06, "loss": 1.02, "step": 13469 }, { "epoch": 0.4882026747852561, "grad_norm": 2.326777324170541, "learning_rate": 5.4334886127195015e-06, "loss": 0.8604, "step": 13470 }, { "epoch": 0.48823891848791273, "grad_norm": 2.2284054617549365, "learning_rate": 5.432903891373007e-06, "loss": 0.9207, "step": 13471 }, { "epoch": 0.4882751621905694, "grad_norm": 2.4233903210899106, "learning_rate": 5.432319164061357e-06, "loss": 0.9409, "step": 13472 }, { "epoch": 0.488311405893226, "grad_norm": 2.5727389968148646, "learning_rate": 5.431734430792611e-06, "loss": 0.7288, "step": 13473 }, { "epoch": 0.4883476495958827, "grad_norm": 2.387376997189805, "learning_rate": 5.431149691574825e-06, "loss": 0.8866, "step": 13474 }, { "epoch": 0.4883838932985394, "grad_norm": 2.290670615306912, "learning_rate": 5.430564946416059e-06, "loss": 0.9458, "step": 13475 }, { "epoch": 0.488420137001196, "grad_norm": 2.514386948885061, "learning_rate": 5.429980195324367e-06, "loss": 0.9366, "step": 13476 }, { "epoch": 0.4884563807038527, "grad_norm": 2.4546263824215213, "learning_rate": 5.42939543830781e-06, "loss": 0.9034, "step": 13477 }, { "epoch": 0.48849262440650937, "grad_norm": 2.245076733004955, "learning_rate": 5.4288106753744405e-06, "loss": 0.6717, "step": 13478 }, { "epoch": 0.488528868109166, "grad_norm": 2.560797949267334, "learning_rate": 5.428225906532321e-06, "loss": 0.8522, "step": 13479 }, { "epoch": 0.4885651118118227, "grad_norm": 2.3331511585368303, "learning_rate": 5.4276411317895064e-06, "loss": 0.9336, "step": 13480 }, { "epoch": 0.48860135551447936, "grad_norm": 2.236521012812803, "learning_rate": 5.4270563511540565e-06, "loss": 0.7342, "step": 13481 }, { "epoch": 0.488637599217136, "grad_norm": 2.05246252691255, "learning_rate": 5.426471564634027e-06, "loss": 0.8579, "step": 13482 }, { "epoch": 0.4886738429197927, "grad_norm": 2.3527598717031903, "learning_rate": 5.425886772237478e-06, "loss": 1.0837, "step": 13483 }, { "epoch": 0.48871008662244936, "grad_norm": 2.3384412632614406, "learning_rate": 5.425301973972467e-06, "loss": 0.8919, "step": 13484 }, { "epoch": 0.488746330325106, "grad_norm": 2.249137659311285, "learning_rate": 5.424717169847054e-06, "loss": 0.788, "step": 13485 }, { "epoch": 0.48878257402776265, "grad_norm": 2.130435093079823, "learning_rate": 5.424132359869293e-06, "loss": 0.9701, "step": 13486 }, { "epoch": 0.48881881773041935, "grad_norm": 2.958572539628902, "learning_rate": 5.423547544047245e-06, "loss": 0.9744, "step": 13487 }, { "epoch": 0.488855061433076, "grad_norm": 2.1932193409967264, "learning_rate": 5.422962722388968e-06, "loss": 0.821, "step": 13488 }, { "epoch": 0.48889130513573265, "grad_norm": 2.187249432386999, "learning_rate": 5.422377894902521e-06, "loss": 0.8327, "step": 13489 }, { "epoch": 0.48892754883838935, "grad_norm": 2.4177088512647167, "learning_rate": 5.421793061595961e-06, "loss": 0.9286, "step": 13490 }, { "epoch": 0.488963792541046, "grad_norm": 2.7571331385860707, "learning_rate": 5.421208222477349e-06, "loss": 0.8868, "step": 13491 }, { "epoch": 0.48900003624370264, "grad_norm": 2.3649482332529144, "learning_rate": 5.420623377554741e-06, "loss": 0.8442, "step": 13492 }, { "epoch": 0.48903627994635934, "grad_norm": 2.416428496144609, "learning_rate": 5.420038526836198e-06, "loss": 0.9399, "step": 13493 }, { "epoch": 0.489072523649016, "grad_norm": 2.261035102626131, "learning_rate": 5.419453670329778e-06, "loss": 0.8913, "step": 13494 }, { "epoch": 0.48910876735167264, "grad_norm": 2.439109665298984, "learning_rate": 5.41886880804354e-06, "loss": 0.9333, "step": 13495 }, { "epoch": 0.48914501105432934, "grad_norm": 2.556947692479671, "learning_rate": 5.418283939985544e-06, "loss": 0.9533, "step": 13496 }, { "epoch": 0.489181254756986, "grad_norm": 2.2637028225191873, "learning_rate": 5.417699066163849e-06, "loss": 0.8467, "step": 13497 }, { "epoch": 0.48921749845964263, "grad_norm": 2.2341996945184817, "learning_rate": 5.417114186586511e-06, "loss": 0.9471, "step": 13498 }, { "epoch": 0.4892537421622993, "grad_norm": 2.2535746759272945, "learning_rate": 5.416529301261593e-06, "loss": 0.927, "step": 13499 }, { "epoch": 0.489289985864956, "grad_norm": 2.335065705132027, "learning_rate": 5.415944410197153e-06, "loss": 0.9913, "step": 13500 }, { "epoch": 0.4893262295676126, "grad_norm": 2.1289972682970353, "learning_rate": 5.41535951340125e-06, "loss": 0.8702, "step": 13501 }, { "epoch": 0.48936247327026927, "grad_norm": 2.051063559006756, "learning_rate": 5.4147746108819445e-06, "loss": 0.8852, "step": 13502 }, { "epoch": 0.489398716972926, "grad_norm": 2.3091074524345876, "learning_rate": 5.4141897026472965e-06, "loss": 0.9282, "step": 13503 }, { "epoch": 0.4894349606755826, "grad_norm": 2.3627269996445683, "learning_rate": 5.413604788705364e-06, "loss": 0.8729, "step": 13504 }, { "epoch": 0.48947120437823927, "grad_norm": 2.48816085640977, "learning_rate": 5.413019869064209e-06, "loss": 1.1148, "step": 13505 }, { "epoch": 0.48950744808089597, "grad_norm": 2.38820104261499, "learning_rate": 5.4124349437318876e-06, "loss": 0.9572, "step": 13506 }, { "epoch": 0.4895436917835526, "grad_norm": 2.3958071116852686, "learning_rate": 5.411850012716464e-06, "loss": 0.9932, "step": 13507 }, { "epoch": 0.48957993548620926, "grad_norm": 2.603933024842531, "learning_rate": 5.4112650760259956e-06, "loss": 0.9804, "step": 13508 }, { "epoch": 0.4896161791888659, "grad_norm": 2.4528059898107903, "learning_rate": 5.410680133668543e-06, "loss": 1.0049, "step": 13509 }, { "epoch": 0.4896524228915226, "grad_norm": 2.385000549023952, "learning_rate": 5.410095185652167e-06, "loss": 0.8214, "step": 13510 }, { "epoch": 0.48968866659417926, "grad_norm": 2.310748110324933, "learning_rate": 5.409510231984928e-06, "loss": 0.8525, "step": 13511 }, { "epoch": 0.4897249102968359, "grad_norm": 2.1484158131496076, "learning_rate": 5.408925272674884e-06, "loss": 0.9574, "step": 13512 }, { "epoch": 0.4897611539994926, "grad_norm": 2.109920899243705, "learning_rate": 5.408340307730098e-06, "loss": 0.7225, "step": 13513 }, { "epoch": 0.48979739770214925, "grad_norm": 2.3071940965351163, "learning_rate": 5.40775533715863e-06, "loss": 0.9859, "step": 13514 }, { "epoch": 0.4898336414048059, "grad_norm": 1.9800279004428725, "learning_rate": 5.40717036096854e-06, "loss": 0.8828, "step": 13515 }, { "epoch": 0.4898698851074626, "grad_norm": 2.3750274066956067, "learning_rate": 5.406585379167888e-06, "loss": 0.9264, "step": 13516 }, { "epoch": 0.48990612881011925, "grad_norm": 2.7954302881296624, "learning_rate": 5.406000391764735e-06, "loss": 1.0734, "step": 13517 }, { "epoch": 0.4899423725127759, "grad_norm": 3.151943586015455, "learning_rate": 5.4054153987671435e-06, "loss": 0.8451, "step": 13518 }, { "epoch": 0.4899786162154326, "grad_norm": 2.134857466031036, "learning_rate": 5.4048304001831705e-06, "loss": 0.779, "step": 13519 }, { "epoch": 0.49001485991808924, "grad_norm": 2.200694061495311, "learning_rate": 5.404245396020882e-06, "loss": 0.9925, "step": 13520 }, { "epoch": 0.4900511036207459, "grad_norm": 2.376238780878242, "learning_rate": 5.403660386288336e-06, "loss": 0.9222, "step": 13521 }, { "epoch": 0.49008734732340253, "grad_norm": 2.2240642675017726, "learning_rate": 5.403075370993593e-06, "loss": 1.123, "step": 13522 }, { "epoch": 0.49012359102605924, "grad_norm": 2.6185639884898957, "learning_rate": 5.402490350144716e-06, "loss": 0.9022, "step": 13523 }, { "epoch": 0.4901598347287159, "grad_norm": 2.2584370783634964, "learning_rate": 5.4019053237497654e-06, "loss": 0.8723, "step": 13524 }, { "epoch": 0.49019607843137253, "grad_norm": 2.4616027845272352, "learning_rate": 5.401320291816802e-06, "loss": 1.0074, "step": 13525 }, { "epoch": 0.49023232213402923, "grad_norm": 2.2456919987263686, "learning_rate": 5.400735254353888e-06, "loss": 0.8311, "step": 13526 }, { "epoch": 0.4902685658366859, "grad_norm": 2.4722288842515896, "learning_rate": 5.400150211369084e-06, "loss": 1.0197, "step": 13527 }, { "epoch": 0.4903048095393425, "grad_norm": 2.080816231673259, "learning_rate": 5.399565162870453e-06, "loss": 0.8915, "step": 13528 }, { "epoch": 0.4903410532419992, "grad_norm": 2.2941487218412733, "learning_rate": 5.3989801088660545e-06, "loss": 0.9259, "step": 13529 }, { "epoch": 0.49037729694465587, "grad_norm": 2.491247914515551, "learning_rate": 5.398395049363952e-06, "loss": 0.8602, "step": 13530 }, { "epoch": 0.4904135406473125, "grad_norm": 2.2047552395717154, "learning_rate": 5.397809984372207e-06, "loss": 0.7844, "step": 13531 }, { "epoch": 0.4904497843499692, "grad_norm": 13.479053681854671, "learning_rate": 5.3972249138988805e-06, "loss": 1.185, "step": 13532 }, { "epoch": 0.49048602805262587, "grad_norm": 2.265588336997043, "learning_rate": 5.396639837952036e-06, "loss": 0.8844, "step": 13533 }, { "epoch": 0.4905222717552825, "grad_norm": 2.268014841723196, "learning_rate": 5.396054756539733e-06, "loss": 0.7621, "step": 13534 }, { "epoch": 0.49055851545793916, "grad_norm": 2.116131701950456, "learning_rate": 5.395469669670036e-06, "loss": 0.8146, "step": 13535 }, { "epoch": 0.49059475916059586, "grad_norm": 2.419355163346127, "learning_rate": 5.394884577351006e-06, "loss": 1.1227, "step": 13536 }, { "epoch": 0.4906310028632525, "grad_norm": 2.236487888289974, "learning_rate": 5.394299479590704e-06, "loss": 0.9436, "step": 13537 }, { "epoch": 0.49066724656590915, "grad_norm": 2.2463865961942115, "learning_rate": 5.393714376397195e-06, "loss": 0.7434, "step": 13538 }, { "epoch": 0.49070349026856586, "grad_norm": 2.513103922092902, "learning_rate": 5.39312926777854e-06, "loss": 1.1925, "step": 13539 }, { "epoch": 0.4907397339712225, "grad_norm": 2.3628032051373813, "learning_rate": 5.392544153742801e-06, "loss": 0.9621, "step": 13540 }, { "epoch": 0.49077597767387915, "grad_norm": 2.3336217367492913, "learning_rate": 5.391959034298041e-06, "loss": 0.8951, "step": 13541 }, { "epoch": 0.49081222137653585, "grad_norm": 2.3480360513584966, "learning_rate": 5.391373909452323e-06, "loss": 1.0397, "step": 13542 }, { "epoch": 0.4908484650791925, "grad_norm": 2.1916186812829745, "learning_rate": 5.390788779213709e-06, "loss": 0.7431, "step": 13543 }, { "epoch": 0.49088470878184914, "grad_norm": 2.2385112877726714, "learning_rate": 5.390203643590263e-06, "loss": 0.9192, "step": 13544 }, { "epoch": 0.4909209524845058, "grad_norm": 2.606320570748042, "learning_rate": 5.389618502590045e-06, "loss": 1.0427, "step": 13545 }, { "epoch": 0.4909571961871625, "grad_norm": 2.2978031209228806, "learning_rate": 5.38903335622112e-06, "loss": 0.9869, "step": 13546 }, { "epoch": 0.49099343988981914, "grad_norm": 2.5241745146724295, "learning_rate": 5.388448204491551e-06, "loss": 1.1105, "step": 13547 }, { "epoch": 0.4910296835924758, "grad_norm": 2.340793826453275, "learning_rate": 5.387863047409401e-06, "loss": 0.9073, "step": 13548 }, { "epoch": 0.4910659272951325, "grad_norm": 2.191108578935485, "learning_rate": 5.387277884982731e-06, "loss": 1.0452, "step": 13549 }, { "epoch": 0.49110217099778913, "grad_norm": 2.601755093843234, "learning_rate": 5.386692717219608e-06, "loss": 0.846, "step": 13550 }, { "epoch": 0.4911384147004458, "grad_norm": 2.4456167773071367, "learning_rate": 5.386107544128092e-06, "loss": 0.815, "step": 13551 }, { "epoch": 0.4911746584031025, "grad_norm": 2.46755557441115, "learning_rate": 5.3855223657162494e-06, "loss": 1.0413, "step": 13552 }, { "epoch": 0.49121090210575913, "grad_norm": 3.5720336658369662, "learning_rate": 5.384937181992141e-06, "loss": 0.8974, "step": 13553 }, { "epoch": 0.4912471458084158, "grad_norm": 2.2753259106262034, "learning_rate": 5.38435199296383e-06, "loss": 0.9723, "step": 13554 }, { "epoch": 0.4912833895110725, "grad_norm": 2.370702209430782, "learning_rate": 5.383766798639381e-06, "loss": 0.7843, "step": 13555 }, { "epoch": 0.4913196332137291, "grad_norm": 2.703021360437218, "learning_rate": 5.383181599026859e-06, "loss": 1.0229, "step": 13556 }, { "epoch": 0.49135587691638577, "grad_norm": 2.452202064977085, "learning_rate": 5.382596394134325e-06, "loss": 1.0296, "step": 13557 }, { "epoch": 0.4913921206190424, "grad_norm": 2.362469866862499, "learning_rate": 5.382011183969844e-06, "loss": 0.8268, "step": 13558 }, { "epoch": 0.4914283643216991, "grad_norm": 2.066199769695132, "learning_rate": 5.38142596854148e-06, "loss": 0.7033, "step": 13559 }, { "epoch": 0.49146460802435576, "grad_norm": 2.2217554833375672, "learning_rate": 5.380840747857298e-06, "loss": 0.9804, "step": 13560 }, { "epoch": 0.4915008517270124, "grad_norm": 2.3967192430869018, "learning_rate": 5.38025552192536e-06, "loss": 0.8629, "step": 13561 }, { "epoch": 0.4915370954296691, "grad_norm": 2.1138964476397506, "learning_rate": 5.379670290753731e-06, "loss": 0.8755, "step": 13562 }, { "epoch": 0.49157333913232576, "grad_norm": 2.486788666215568, "learning_rate": 5.379085054350476e-06, "loss": 0.8064, "step": 13563 }, { "epoch": 0.4916095828349824, "grad_norm": 2.6180913614875845, "learning_rate": 5.378499812723657e-06, "loss": 0.9627, "step": 13564 }, { "epoch": 0.4916458265376391, "grad_norm": 2.6624275037284995, "learning_rate": 5.3779145658813395e-06, "loss": 0.9426, "step": 13565 }, { "epoch": 0.49168207024029575, "grad_norm": 2.689445959197727, "learning_rate": 5.377329313831587e-06, "loss": 1.0884, "step": 13566 }, { "epoch": 0.4917183139429524, "grad_norm": 2.2113541343498113, "learning_rate": 5.376744056582466e-06, "loss": 0.8147, "step": 13567 }, { "epoch": 0.4917545576456091, "grad_norm": 2.0660992655705486, "learning_rate": 5.37615879414204e-06, "loss": 0.6814, "step": 13568 }, { "epoch": 0.49179080134826575, "grad_norm": 2.3651828885152475, "learning_rate": 5.375573526518372e-06, "loss": 0.9404, "step": 13569 }, { "epoch": 0.4918270450509224, "grad_norm": 2.4918332546809507, "learning_rate": 5.37498825371953e-06, "loss": 0.9127, "step": 13570 }, { "epoch": 0.49186328875357904, "grad_norm": 1.925875944599728, "learning_rate": 5.374402975753574e-06, "loss": 0.7929, "step": 13571 }, { "epoch": 0.49189953245623574, "grad_norm": 2.112379530970029, "learning_rate": 5.373817692628573e-06, "loss": 0.9082, "step": 13572 }, { "epoch": 0.4919357761588924, "grad_norm": 2.3916885678431807, "learning_rate": 5.373232404352591e-06, "loss": 0.9195, "step": 13573 }, { "epoch": 0.49197201986154904, "grad_norm": 2.521757990925758, "learning_rate": 5.372647110933692e-06, "loss": 0.9008, "step": 13574 }, { "epoch": 0.49200826356420574, "grad_norm": 2.090669826384162, "learning_rate": 5.37206181237994e-06, "loss": 0.7812, "step": 13575 }, { "epoch": 0.4920445072668624, "grad_norm": 1.9469733953707762, "learning_rate": 5.371476508699401e-06, "loss": 0.9117, "step": 13576 }, { "epoch": 0.49208075096951903, "grad_norm": 1.9913422827455531, "learning_rate": 5.3708911999001415e-06, "loss": 1.0554, "step": 13577 }, { "epoch": 0.49211699467217573, "grad_norm": 2.3754498198535074, "learning_rate": 5.370305885990226e-06, "loss": 0.9517, "step": 13578 }, { "epoch": 0.4921532383748324, "grad_norm": 2.2206683842958825, "learning_rate": 5.369720566977717e-06, "loss": 0.9178, "step": 13579 }, { "epoch": 0.492189482077489, "grad_norm": 2.442223864340292, "learning_rate": 5.369135242870684e-06, "loss": 0.9102, "step": 13580 }, { "epoch": 0.4922257257801457, "grad_norm": 2.3135107066984726, "learning_rate": 5.36854991367719e-06, "loss": 0.9255, "step": 13581 }, { "epoch": 0.4922619694828024, "grad_norm": 1.9258630274930753, "learning_rate": 5.3679645794053015e-06, "loss": 0.8144, "step": 13582 }, { "epoch": 0.492298213185459, "grad_norm": 2.4517775186756974, "learning_rate": 5.367379240063084e-06, "loss": 0.8626, "step": 13583 }, { "epoch": 0.49233445688811567, "grad_norm": 2.5714832633311375, "learning_rate": 5.366793895658603e-06, "loss": 0.8869, "step": 13584 }, { "epoch": 0.49237070059077237, "grad_norm": 2.4246144281889808, "learning_rate": 5.3662085461999225e-06, "loss": 1.0381, "step": 13585 }, { "epoch": 0.492406944293429, "grad_norm": 2.3960570409104327, "learning_rate": 5.365623191695111e-06, "loss": 0.9404, "step": 13586 }, { "epoch": 0.49244318799608566, "grad_norm": 2.4232371508128088, "learning_rate": 5.3650378321522325e-06, "loss": 1.1218, "step": 13587 }, { "epoch": 0.49247943169874236, "grad_norm": 2.30908619197988, "learning_rate": 5.364452467579353e-06, "loss": 0.943, "step": 13588 }, { "epoch": 0.492515675401399, "grad_norm": 2.186907695897818, "learning_rate": 5.3638670979845386e-06, "loss": 1.0153, "step": 13589 }, { "epoch": 0.49255191910405566, "grad_norm": 2.3005246266592936, "learning_rate": 5.363281723375857e-06, "loss": 0.7244, "step": 13590 }, { "epoch": 0.49258816280671236, "grad_norm": 2.2935852761195687, "learning_rate": 5.362696343761374e-06, "loss": 0.9251, "step": 13591 }, { "epoch": 0.492624406509369, "grad_norm": 2.080154175694238, "learning_rate": 5.362110959149153e-06, "loss": 0.8837, "step": 13592 }, { "epoch": 0.49266065021202565, "grad_norm": 2.0177092063594335, "learning_rate": 5.361525569547261e-06, "loss": 0.7347, "step": 13593 }, { "epoch": 0.4926968939146823, "grad_norm": 2.5001161009222153, "learning_rate": 5.360940174963767e-06, "loss": 0.959, "step": 13594 }, { "epoch": 0.492733137617339, "grad_norm": 2.297666036401401, "learning_rate": 5.3603547754067345e-06, "loss": 0.9373, "step": 13595 }, { "epoch": 0.49276938131999565, "grad_norm": 2.1635006346954677, "learning_rate": 5.359769370884232e-06, "loss": 1.0485, "step": 13596 }, { "epoch": 0.4928056250226523, "grad_norm": 2.587070191285583, "learning_rate": 5.359183961404324e-06, "loss": 0.9244, "step": 13597 }, { "epoch": 0.492841868725309, "grad_norm": 2.41359739899821, "learning_rate": 5.358598546975079e-06, "loss": 0.8988, "step": 13598 }, { "epoch": 0.49287811242796564, "grad_norm": 2.4725847320699215, "learning_rate": 5.358013127604562e-06, "loss": 0.8069, "step": 13599 }, { "epoch": 0.4929143561306223, "grad_norm": 2.3200806647892156, "learning_rate": 5.357427703300842e-06, "loss": 1.0165, "step": 13600 }, { "epoch": 0.492950599833279, "grad_norm": 2.4131432385679883, "learning_rate": 5.356842274071985e-06, "loss": 0.9914, "step": 13601 }, { "epoch": 0.49298684353593564, "grad_norm": 2.4778439004919064, "learning_rate": 5.356256839926055e-06, "loss": 1.0637, "step": 13602 }, { "epoch": 0.4930230872385923, "grad_norm": 2.284294731464105, "learning_rate": 5.355671400871123e-06, "loss": 0.8704, "step": 13603 }, { "epoch": 0.493059330941249, "grad_norm": 2.6086114981018538, "learning_rate": 5.355085956915254e-06, "loss": 0.8681, "step": 13604 }, { "epoch": 0.49309557464390563, "grad_norm": 1.9444632578094712, "learning_rate": 5.354500508066515e-06, "loss": 0.6834, "step": 13605 }, { "epoch": 0.4931318183465623, "grad_norm": 2.1726672561135216, "learning_rate": 5.353915054332972e-06, "loss": 0.9935, "step": 13606 }, { "epoch": 0.4931680620492189, "grad_norm": 2.637310967383103, "learning_rate": 5.353329595722696e-06, "loss": 0.8657, "step": 13607 }, { "epoch": 0.4932043057518756, "grad_norm": 2.796748016782181, "learning_rate": 5.35274413224375e-06, "loss": 0.9054, "step": 13608 }, { "epoch": 0.4932405494545323, "grad_norm": 2.0819779407255163, "learning_rate": 5.352158663904204e-06, "loss": 0.9565, "step": 13609 }, { "epoch": 0.4932767931571889, "grad_norm": 2.1737134697804086, "learning_rate": 5.351573190712125e-06, "loss": 0.9174, "step": 13610 }, { "epoch": 0.4933130368598456, "grad_norm": 2.288542254573611, "learning_rate": 5.35098771267558e-06, "loss": 0.8116, "step": 13611 }, { "epoch": 0.49334928056250227, "grad_norm": 2.359842078890191, "learning_rate": 5.350402229802636e-06, "loss": 0.7243, "step": 13612 }, { "epoch": 0.4933855242651589, "grad_norm": 2.656589655741785, "learning_rate": 5.349816742101361e-06, "loss": 0.8138, "step": 13613 }, { "epoch": 0.4934217679678156, "grad_norm": 2.492727927210602, "learning_rate": 5.3492312495798226e-06, "loss": 0.7641, "step": 13614 }, { "epoch": 0.49345801167047226, "grad_norm": 2.296802264662162, "learning_rate": 5.34864575224609e-06, "loss": 0.8304, "step": 13615 }, { "epoch": 0.4934942553731289, "grad_norm": 2.348235799113906, "learning_rate": 5.348060250108229e-06, "loss": 0.8438, "step": 13616 }, { "epoch": 0.49353049907578556, "grad_norm": 2.379933304277483, "learning_rate": 5.347474743174309e-06, "loss": 0.823, "step": 13617 }, { "epoch": 0.49356674277844226, "grad_norm": 2.0896870036876947, "learning_rate": 5.346889231452396e-06, "loss": 0.7822, "step": 13618 }, { "epoch": 0.4936029864810989, "grad_norm": 2.079514267259227, "learning_rate": 5.34630371495056e-06, "loss": 0.8155, "step": 13619 }, { "epoch": 0.49363923018375555, "grad_norm": 2.0493949037980097, "learning_rate": 5.345718193676869e-06, "loss": 0.8591, "step": 13620 }, { "epoch": 0.49367547388641225, "grad_norm": 2.569963012073258, "learning_rate": 5.34513266763939e-06, "loss": 0.9668, "step": 13621 }, { "epoch": 0.4937117175890689, "grad_norm": 2.5079119087584285, "learning_rate": 5.34454713684619e-06, "loss": 0.8051, "step": 13622 }, { "epoch": 0.49374796129172555, "grad_norm": 2.4160633148874786, "learning_rate": 5.34396160130534e-06, "loss": 0.9595, "step": 13623 }, { "epoch": 0.49378420499438225, "grad_norm": 2.4586257129482765, "learning_rate": 5.343376061024906e-06, "loss": 1.0859, "step": 13624 }, { "epoch": 0.4938204486970389, "grad_norm": 2.3419455425116302, "learning_rate": 5.34279051601296e-06, "loss": 0.8113, "step": 13625 }, { "epoch": 0.49385669239969554, "grad_norm": 2.3717225718176884, "learning_rate": 5.342204966277566e-06, "loss": 1.086, "step": 13626 }, { "epoch": 0.49389293610235224, "grad_norm": 2.0181916724968145, "learning_rate": 5.341619411826795e-06, "loss": 0.9026, "step": 13627 }, { "epoch": 0.4939291798050089, "grad_norm": 2.4504006472443725, "learning_rate": 5.3410338526687145e-06, "loss": 0.8513, "step": 13628 }, { "epoch": 0.49396542350766554, "grad_norm": 2.3042758751296746, "learning_rate": 5.340448288811395e-06, "loss": 0.8125, "step": 13629 }, { "epoch": 0.4940016672103222, "grad_norm": 2.4428925430625674, "learning_rate": 5.3398627202629035e-06, "loss": 0.8535, "step": 13630 }, { "epoch": 0.4940379109129789, "grad_norm": 2.3871720350630357, "learning_rate": 5.33927714703131e-06, "loss": 0.9024, "step": 13631 }, { "epoch": 0.49407415461563553, "grad_norm": 2.3263712951024806, "learning_rate": 5.338691569124681e-06, "loss": 0.9918, "step": 13632 }, { "epoch": 0.4941103983182922, "grad_norm": 2.2633350708809115, "learning_rate": 5.338105986551089e-06, "loss": 0.9297, "step": 13633 }, { "epoch": 0.4941466420209489, "grad_norm": 2.3599310919302376, "learning_rate": 5.337520399318599e-06, "loss": 0.9885, "step": 13634 }, { "epoch": 0.4941828857236055, "grad_norm": 2.269238479574631, "learning_rate": 5.336934807435282e-06, "loss": 0.8994, "step": 13635 }, { "epoch": 0.49421912942626217, "grad_norm": 2.3255071741291413, "learning_rate": 5.336349210909207e-06, "loss": 0.7688, "step": 13636 }, { "epoch": 0.4942553731289189, "grad_norm": 2.2170008383728343, "learning_rate": 5.335763609748445e-06, "loss": 0.9537, "step": 13637 }, { "epoch": 0.4942916168315755, "grad_norm": 2.315020974608413, "learning_rate": 5.335178003961062e-06, "loss": 0.8737, "step": 13638 }, { "epoch": 0.49432786053423217, "grad_norm": 2.349602790194532, "learning_rate": 5.3345923935551305e-06, "loss": 1.0071, "step": 13639 }, { "epoch": 0.49436410423688887, "grad_norm": 11.154309377987946, "learning_rate": 5.334006778538719e-06, "loss": 0.9652, "step": 13640 }, { "epoch": 0.4944003479395455, "grad_norm": 2.1684701434605205, "learning_rate": 5.333421158919893e-06, "loss": 0.8161, "step": 13641 }, { "epoch": 0.49443659164220216, "grad_norm": 2.328676087237522, "learning_rate": 5.3328355347067265e-06, "loss": 1.1014, "step": 13642 }, { "epoch": 0.4944728353448588, "grad_norm": 2.3910725188406516, "learning_rate": 5.332249905907287e-06, "loss": 0.985, "step": 13643 }, { "epoch": 0.4945090790475155, "grad_norm": 2.2901425528931654, "learning_rate": 5.331664272529646e-06, "loss": 0.8773, "step": 13644 }, { "epoch": 0.49454532275017216, "grad_norm": 2.0181234387154348, "learning_rate": 5.331078634581871e-06, "loss": 0.9022, "step": 13645 }, { "epoch": 0.4945815664528288, "grad_norm": 2.2702854178300758, "learning_rate": 5.330492992072032e-06, "loss": 1.0231, "step": 13646 }, { "epoch": 0.4946178101554855, "grad_norm": 2.290297498349394, "learning_rate": 5.3299073450082e-06, "loss": 0.8168, "step": 13647 }, { "epoch": 0.49465405385814215, "grad_norm": 2.4183062861518074, "learning_rate": 5.329321693398445e-06, "loss": 0.935, "step": 13648 }, { "epoch": 0.4946902975607988, "grad_norm": 2.277712875137398, "learning_rate": 5.328736037250836e-06, "loss": 0.9864, "step": 13649 }, { "epoch": 0.4947265412634555, "grad_norm": 2.2987451136080463, "learning_rate": 5.3281503765734435e-06, "loss": 0.9305, "step": 13650 }, { "epoch": 0.49476278496611215, "grad_norm": 2.162905487806661, "learning_rate": 5.327564711374337e-06, "loss": 0.791, "step": 13651 }, { "epoch": 0.4947990286687688, "grad_norm": 2.2397138622100115, "learning_rate": 5.326979041661587e-06, "loss": 0.9194, "step": 13652 }, { "epoch": 0.49483527237142544, "grad_norm": 2.601972465655329, "learning_rate": 5.326393367443263e-06, "loss": 0.9749, "step": 13653 }, { "epoch": 0.49487151607408214, "grad_norm": 2.3196933138640268, "learning_rate": 5.325807688727436e-06, "loss": 0.8554, "step": 13654 }, { "epoch": 0.4949077597767388, "grad_norm": 2.3018360636670567, "learning_rate": 5.325222005522176e-06, "loss": 1.0817, "step": 13655 }, { "epoch": 0.49494400347939543, "grad_norm": 2.385966780798419, "learning_rate": 5.324636317835553e-06, "loss": 0.9121, "step": 13656 }, { "epoch": 0.49498024718205214, "grad_norm": 2.18499567357861, "learning_rate": 5.324050625675639e-06, "loss": 0.7156, "step": 13657 }, { "epoch": 0.4950164908847088, "grad_norm": 2.1814991644633923, "learning_rate": 5.323464929050501e-06, "loss": 0.8045, "step": 13658 }, { "epoch": 0.49505273458736543, "grad_norm": 2.124853933738911, "learning_rate": 5.322879227968215e-06, "loss": 0.8397, "step": 13659 }, { "epoch": 0.49508897829002213, "grad_norm": 2.537206815303605, "learning_rate": 5.322293522436848e-06, "loss": 0.7586, "step": 13660 }, { "epoch": 0.4951252219926788, "grad_norm": 2.0568440662420038, "learning_rate": 5.321707812464471e-06, "loss": 0.774, "step": 13661 }, { "epoch": 0.4951614656953354, "grad_norm": 2.19365468037031, "learning_rate": 5.321122098059153e-06, "loss": 1.0316, "step": 13662 }, { "epoch": 0.4951977093979921, "grad_norm": 2.526206455808396, "learning_rate": 5.320536379228969e-06, "loss": 0.9719, "step": 13663 }, { "epoch": 0.49523395310064877, "grad_norm": 2.4440666297192783, "learning_rate": 5.319950655981985e-06, "loss": 0.8694, "step": 13664 }, { "epoch": 0.4952701968033054, "grad_norm": 2.110112765285394, "learning_rate": 5.319364928326277e-06, "loss": 0.8489, "step": 13665 }, { "epoch": 0.49530644050596206, "grad_norm": 2.3019585134748595, "learning_rate": 5.318779196269912e-06, "loss": 0.9622, "step": 13666 }, { "epoch": 0.49534268420861877, "grad_norm": 2.0912077278909917, "learning_rate": 5.318193459820962e-06, "loss": 0.9616, "step": 13667 }, { "epoch": 0.4953789279112754, "grad_norm": 2.2429994825858124, "learning_rate": 5.3176077189875e-06, "loss": 0.939, "step": 13668 }, { "epoch": 0.49541517161393206, "grad_norm": 2.3101570747099864, "learning_rate": 5.317021973777595e-06, "loss": 0.9341, "step": 13669 }, { "epoch": 0.49545141531658876, "grad_norm": 2.473037194181068, "learning_rate": 5.316436224199319e-06, "loss": 0.8792, "step": 13670 }, { "epoch": 0.4954876590192454, "grad_norm": 2.692802920586446, "learning_rate": 5.315850470260745e-06, "loss": 0.9898, "step": 13671 }, { "epoch": 0.49552390272190205, "grad_norm": 2.2334868909905277, "learning_rate": 5.315264711969939e-06, "loss": 0.8962, "step": 13672 }, { "epoch": 0.49556014642455876, "grad_norm": 2.353389897818036, "learning_rate": 5.314678949334979e-06, "loss": 0.99, "step": 13673 }, { "epoch": 0.4955963901272154, "grad_norm": 2.3491958179010703, "learning_rate": 5.314093182363932e-06, "loss": 0.8661, "step": 13674 }, { "epoch": 0.49563263382987205, "grad_norm": 2.398734157046812, "learning_rate": 5.313507411064871e-06, "loss": 0.9499, "step": 13675 }, { "epoch": 0.4956688775325287, "grad_norm": 2.328102258522357, "learning_rate": 5.3129216354458665e-06, "loss": 0.7754, "step": 13676 }, { "epoch": 0.4957051212351854, "grad_norm": 2.2935609378859954, "learning_rate": 5.312335855514993e-06, "loss": 0.9794, "step": 13677 }, { "epoch": 0.49574136493784204, "grad_norm": 2.2839734003946996, "learning_rate": 5.311750071280319e-06, "loss": 0.8539, "step": 13678 }, { "epoch": 0.4957776086404987, "grad_norm": 2.232919473502645, "learning_rate": 5.31116428274992e-06, "loss": 0.8125, "step": 13679 }, { "epoch": 0.4958138523431554, "grad_norm": 2.226848797298004, "learning_rate": 5.310578489931862e-06, "loss": 0.9447, "step": 13680 }, { "epoch": 0.49585009604581204, "grad_norm": 2.0836207917274256, "learning_rate": 5.309992692834222e-06, "loss": 0.87, "step": 13681 }, { "epoch": 0.4958863397484687, "grad_norm": 2.329925851866384, "learning_rate": 5.309406891465069e-06, "loss": 0.8127, "step": 13682 }, { "epoch": 0.4959225834511254, "grad_norm": 2.201253187865317, "learning_rate": 5.308821085832476e-06, "loss": 0.92, "step": 13683 }, { "epoch": 0.49595882715378203, "grad_norm": 2.380832323031156, "learning_rate": 5.3082352759445176e-06, "loss": 0.8393, "step": 13684 }, { "epoch": 0.4959950708564387, "grad_norm": 2.2511251005606643, "learning_rate": 5.307649461809261e-06, "loss": 0.8389, "step": 13685 }, { "epoch": 0.4960313145590954, "grad_norm": 2.811214549201666, "learning_rate": 5.307063643434782e-06, "loss": 1.0966, "step": 13686 }, { "epoch": 0.49606755826175203, "grad_norm": 2.5370079168906146, "learning_rate": 5.3064778208291524e-06, "loss": 0.9495, "step": 13687 }, { "epoch": 0.4961038019644087, "grad_norm": 2.3776946043553693, "learning_rate": 5.305891994000444e-06, "loss": 0.7554, "step": 13688 }, { "epoch": 0.4961400456670653, "grad_norm": 2.449589314485527, "learning_rate": 5.305306162956727e-06, "loss": 0.9241, "step": 13689 }, { "epoch": 0.496176289369722, "grad_norm": 2.534862684984053, "learning_rate": 5.304720327706077e-06, "loss": 0.8588, "step": 13690 }, { "epoch": 0.49621253307237867, "grad_norm": 2.4189257103601265, "learning_rate": 5.304134488256565e-06, "loss": 0.888, "step": 13691 }, { "epoch": 0.4962487767750353, "grad_norm": 2.273346557296423, "learning_rate": 5.303548644616263e-06, "loss": 1.1903, "step": 13692 }, { "epoch": 0.496285020477692, "grad_norm": 2.2474035332307363, "learning_rate": 5.302962796793244e-06, "loss": 0.7204, "step": 13693 }, { "epoch": 0.49632126418034866, "grad_norm": 2.370246660195392, "learning_rate": 5.302376944795583e-06, "loss": 1.1155, "step": 13694 }, { "epoch": 0.4963575078830053, "grad_norm": 2.051752063379744, "learning_rate": 5.301791088631348e-06, "loss": 1.0039, "step": 13695 }, { "epoch": 0.496393751585662, "grad_norm": 2.2564173003979033, "learning_rate": 5.3012052283086165e-06, "loss": 0.7618, "step": 13696 }, { "epoch": 0.49642999528831866, "grad_norm": 2.531422172925002, "learning_rate": 5.300619363835459e-06, "loss": 1.0884, "step": 13697 }, { "epoch": 0.4964662389909753, "grad_norm": 2.4325364766590893, "learning_rate": 5.3000334952199475e-06, "loss": 0.9226, "step": 13698 }, { "epoch": 0.496502482693632, "grad_norm": 2.226409651727136, "learning_rate": 5.299447622470156e-06, "loss": 0.837, "step": 13699 }, { "epoch": 0.49653872639628865, "grad_norm": 2.384873574474108, "learning_rate": 5.298861745594157e-06, "loss": 0.8648, "step": 13700 }, { "epoch": 0.4965749700989453, "grad_norm": 2.228063533621774, "learning_rate": 5.298275864600024e-06, "loss": 0.7961, "step": 13701 }, { "epoch": 0.49661121380160195, "grad_norm": 2.4065725792324675, "learning_rate": 5.2976899794958305e-06, "loss": 1.017, "step": 13702 }, { "epoch": 0.49664745750425865, "grad_norm": 2.281237542338123, "learning_rate": 5.297104090289649e-06, "loss": 0.9976, "step": 13703 }, { "epoch": 0.4966837012069153, "grad_norm": 2.0342845870824884, "learning_rate": 5.2965181969895526e-06, "loss": 0.848, "step": 13704 }, { "epoch": 0.49671994490957194, "grad_norm": 2.139895320738342, "learning_rate": 5.295932299603614e-06, "loss": 0.8725, "step": 13705 }, { "epoch": 0.49675618861222864, "grad_norm": 2.223371822542198, "learning_rate": 5.2953463981399086e-06, "loss": 0.9355, "step": 13706 }, { "epoch": 0.4967924323148853, "grad_norm": 1.9980835920716924, "learning_rate": 5.2947604926065086e-06, "loss": 0.7843, "step": 13707 }, { "epoch": 0.49682867601754194, "grad_norm": 2.3091458624855834, "learning_rate": 5.294174583011487e-06, "loss": 0.9538, "step": 13708 }, { "epoch": 0.49686491972019864, "grad_norm": 2.2993407626231557, "learning_rate": 5.293588669362917e-06, "loss": 0.8798, "step": 13709 }, { "epoch": 0.4969011634228553, "grad_norm": 2.26321395000814, "learning_rate": 5.2930027516688734e-06, "loss": 0.896, "step": 13710 }, { "epoch": 0.49693740712551193, "grad_norm": 2.938545026298346, "learning_rate": 5.292416829937428e-06, "loss": 0.9463, "step": 13711 }, { "epoch": 0.4969736508281686, "grad_norm": 2.2837445586775518, "learning_rate": 5.2918309041766565e-06, "loss": 0.9492, "step": 13712 }, { "epoch": 0.4970098945308253, "grad_norm": 2.3451300027756323, "learning_rate": 5.29124497439463e-06, "loss": 0.8568, "step": 13713 }, { "epoch": 0.4970461382334819, "grad_norm": 2.509329111941279, "learning_rate": 5.290659040599426e-06, "loss": 0.8239, "step": 13714 }, { "epoch": 0.4970823819361386, "grad_norm": 2.350679882550357, "learning_rate": 5.290073102799115e-06, "loss": 0.7965, "step": 13715 }, { "epoch": 0.4971186256387953, "grad_norm": 2.130263298309371, "learning_rate": 5.289487161001773e-06, "loss": 0.8154, "step": 13716 }, { "epoch": 0.4971548693414519, "grad_norm": 2.4170307977941046, "learning_rate": 5.288901215215473e-06, "loss": 0.883, "step": 13717 }, { "epoch": 0.49719111304410857, "grad_norm": 2.240932953904835, "learning_rate": 5.288315265448289e-06, "loss": 0.725, "step": 13718 }, { "epoch": 0.49722735674676527, "grad_norm": 2.1652129804187403, "learning_rate": 5.287729311708294e-06, "loss": 0.679, "step": 13719 }, { "epoch": 0.4972636004494219, "grad_norm": 2.407215825706009, "learning_rate": 5.2871433540035636e-06, "loss": 0.8822, "step": 13720 }, { "epoch": 0.49729984415207856, "grad_norm": 2.30222153147342, "learning_rate": 5.286557392342171e-06, "loss": 0.8482, "step": 13721 }, { "epoch": 0.49733608785473526, "grad_norm": 2.1567674009477007, "learning_rate": 5.2859714267321916e-06, "loss": 0.7393, "step": 13722 }, { "epoch": 0.4973723315573919, "grad_norm": 2.5223736588490646, "learning_rate": 5.285385457181697e-06, "loss": 0.9019, "step": 13723 }, { "epoch": 0.49740857526004856, "grad_norm": 2.2697731247061603, "learning_rate": 5.284799483698766e-06, "loss": 0.8118, "step": 13724 }, { "epoch": 0.4974448189627052, "grad_norm": 2.5618779552901114, "learning_rate": 5.284213506291469e-06, "loss": 0.7922, "step": 13725 }, { "epoch": 0.4974810626653619, "grad_norm": 2.1810954835158003, "learning_rate": 5.2836275249678825e-06, "loss": 0.9007, "step": 13726 }, { "epoch": 0.49751730636801855, "grad_norm": 2.249886637934967, "learning_rate": 5.28304153973608e-06, "loss": 0.8487, "step": 13727 }, { "epoch": 0.4975535500706752, "grad_norm": 2.4963929057024217, "learning_rate": 5.282455550604137e-06, "loss": 0.9627, "step": 13728 }, { "epoch": 0.4975897937733319, "grad_norm": 2.2861619457859867, "learning_rate": 5.281869557580126e-06, "loss": 0.8403, "step": 13729 }, { "epoch": 0.49762603747598855, "grad_norm": 2.04493861905193, "learning_rate": 5.281283560672123e-06, "loss": 0.8844, "step": 13730 }, { "epoch": 0.4976622811786452, "grad_norm": 2.5495838851808728, "learning_rate": 5.280697559888202e-06, "loss": 0.9994, "step": 13731 }, { "epoch": 0.4976985248813019, "grad_norm": 2.3820067120409094, "learning_rate": 5.28011155523644e-06, "loss": 0.8371, "step": 13732 }, { "epoch": 0.49773476858395854, "grad_norm": 2.129687451120516, "learning_rate": 5.27952554672491e-06, "loss": 0.8903, "step": 13733 }, { "epoch": 0.4977710122866152, "grad_norm": 2.446258419590579, "learning_rate": 5.2789395343616865e-06, "loss": 0.9427, "step": 13734 }, { "epoch": 0.4978072559892719, "grad_norm": 2.2946625219214414, "learning_rate": 5.278353518154844e-06, "loss": 0.9405, "step": 13735 }, { "epoch": 0.49784349969192854, "grad_norm": 2.0886661094601524, "learning_rate": 5.277767498112461e-06, "loss": 0.8446, "step": 13736 }, { "epoch": 0.4978797433945852, "grad_norm": 2.557708324903387, "learning_rate": 5.277181474242608e-06, "loss": 0.8694, "step": 13737 }, { "epoch": 0.49791598709724183, "grad_norm": 2.563597707797016, "learning_rate": 5.276595446553362e-06, "loss": 0.9487, "step": 13738 }, { "epoch": 0.49795223079989853, "grad_norm": 2.370907133834135, "learning_rate": 5.276009415052797e-06, "loss": 0.9586, "step": 13739 }, { "epoch": 0.4979884745025552, "grad_norm": 2.293170707543203, "learning_rate": 5.275423379748992e-06, "loss": 0.89, "step": 13740 }, { "epoch": 0.4980247182052118, "grad_norm": 2.230582507779243, "learning_rate": 5.274837340650017e-06, "loss": 1.0568, "step": 13741 }, { "epoch": 0.4980609619078685, "grad_norm": 2.3502592318234767, "learning_rate": 5.27425129776395e-06, "loss": 0.993, "step": 13742 }, { "epoch": 0.4980972056105252, "grad_norm": 2.369061500390403, "learning_rate": 5.273665251098866e-06, "loss": 0.9138, "step": 13743 }, { "epoch": 0.4981334493131818, "grad_norm": 2.692590837674375, "learning_rate": 5.273079200662842e-06, "loss": 1.1368, "step": 13744 }, { "epoch": 0.4981696930158385, "grad_norm": 2.3002831548017375, "learning_rate": 5.27249314646395e-06, "loss": 0.9667, "step": 13745 }, { "epoch": 0.49820593671849517, "grad_norm": 2.3444019627713297, "learning_rate": 5.271907088510268e-06, "loss": 0.9105, "step": 13746 }, { "epoch": 0.4982421804211518, "grad_norm": 2.0821677374032084, "learning_rate": 5.27132102680987e-06, "loss": 0.9084, "step": 13747 }, { "epoch": 0.49827842412380846, "grad_norm": 2.1151321235427933, "learning_rate": 5.270734961370834e-06, "loss": 0.9616, "step": 13748 }, { "epoch": 0.49831466782646516, "grad_norm": 2.1938797012449727, "learning_rate": 5.270148892201233e-06, "loss": 1.0757, "step": 13749 }, { "epoch": 0.4983509115291218, "grad_norm": 2.2812619539678876, "learning_rate": 5.269562819309143e-06, "loss": 0.9791, "step": 13750 }, { "epoch": 0.49838715523177846, "grad_norm": 2.4176155106182606, "learning_rate": 5.2689767427026405e-06, "loss": 0.883, "step": 13751 }, { "epoch": 0.49842339893443516, "grad_norm": 2.3004559674562035, "learning_rate": 5.268390662389802e-06, "loss": 1.0117, "step": 13752 }, { "epoch": 0.4984596426370918, "grad_norm": 2.209692108966258, "learning_rate": 5.267804578378701e-06, "loss": 0.844, "step": 13753 }, { "epoch": 0.49849588633974845, "grad_norm": 2.1721985352000486, "learning_rate": 5.267218490677416e-06, "loss": 0.8413, "step": 13754 }, { "epoch": 0.49853213004240515, "grad_norm": 1.9966015625240117, "learning_rate": 5.266632399294021e-06, "loss": 0.818, "step": 13755 }, { "epoch": 0.4985683737450618, "grad_norm": 2.5420240074797293, "learning_rate": 5.2660463042365936e-06, "loss": 0.8596, "step": 13756 }, { "epoch": 0.49860461744771845, "grad_norm": 2.2292120501246346, "learning_rate": 5.26546020551321e-06, "loss": 0.7455, "step": 13757 }, { "epoch": 0.49864086115037515, "grad_norm": 2.0851026974869966, "learning_rate": 5.264874103131945e-06, "loss": 0.7065, "step": 13758 }, { "epoch": 0.4986771048530318, "grad_norm": 2.4497172770973954, "learning_rate": 5.264287997100873e-06, "loss": 1.1002, "step": 13759 }, { "epoch": 0.49871334855568844, "grad_norm": 2.662545213740071, "learning_rate": 5.263701887428074e-06, "loss": 0.9086, "step": 13760 }, { "epoch": 0.4987495922583451, "grad_norm": 2.5683490858942726, "learning_rate": 5.263115774121621e-06, "loss": 0.952, "step": 13761 }, { "epoch": 0.4987858359610018, "grad_norm": 2.1478499297480744, "learning_rate": 5.262529657189593e-06, "loss": 0.7562, "step": 13762 }, { "epoch": 0.49882207966365844, "grad_norm": 2.2482461711732546, "learning_rate": 5.2619435366400655e-06, "loss": 0.8896, "step": 13763 }, { "epoch": 0.4988583233663151, "grad_norm": 2.1017749666464356, "learning_rate": 5.261357412481114e-06, "loss": 0.8801, "step": 13764 }, { "epoch": 0.4988945670689718, "grad_norm": 2.5055768495549184, "learning_rate": 5.260771284720815e-06, "loss": 0.9323, "step": 13765 }, { "epoch": 0.49893081077162843, "grad_norm": 2.291876811498956, "learning_rate": 5.260185153367245e-06, "loss": 0.9658, "step": 13766 }, { "epoch": 0.4989670544742851, "grad_norm": 2.3512255847282946, "learning_rate": 5.259599018428482e-06, "loss": 0.8317, "step": 13767 }, { "epoch": 0.4990032981769418, "grad_norm": 1.979679481107765, "learning_rate": 5.259012879912601e-06, "loss": 0.9216, "step": 13768 }, { "epoch": 0.4990395418795984, "grad_norm": 2.281621672666546, "learning_rate": 5.25842673782768e-06, "loss": 0.8694, "step": 13769 }, { "epoch": 0.49907578558225507, "grad_norm": 2.3115406481205194, "learning_rate": 5.257840592181793e-06, "loss": 0.9155, "step": 13770 }, { "epoch": 0.4991120292849118, "grad_norm": 2.289220739892545, "learning_rate": 5.25725444298302e-06, "loss": 1.0301, "step": 13771 }, { "epoch": 0.4991482729875684, "grad_norm": 2.129541627271553, "learning_rate": 5.256668290239435e-06, "loss": 0.9782, "step": 13772 }, { "epoch": 0.49918451669022507, "grad_norm": 2.3750400960258933, "learning_rate": 5.256082133959118e-06, "loss": 0.9399, "step": 13773 }, { "epoch": 0.4992207603928817, "grad_norm": 2.2103166788382316, "learning_rate": 5.255495974150143e-06, "loss": 0.7073, "step": 13774 }, { "epoch": 0.4992570040955384, "grad_norm": 2.2438429162576066, "learning_rate": 5.2549098108205895e-06, "loss": 0.8232, "step": 13775 }, { "epoch": 0.49929324779819506, "grad_norm": 2.492845401347325, "learning_rate": 5.254323643978531e-06, "loss": 0.8979, "step": 13776 }, { "epoch": 0.4993294915008517, "grad_norm": 2.3522356920340077, "learning_rate": 5.253737473632048e-06, "loss": 0.9774, "step": 13777 }, { "epoch": 0.4993657352035084, "grad_norm": 2.3760971030570928, "learning_rate": 5.253151299789214e-06, "loss": 0.8924, "step": 13778 }, { "epoch": 0.49940197890616506, "grad_norm": 2.030906719008291, "learning_rate": 5.252565122458111e-06, "loss": 0.7937, "step": 13779 }, { "epoch": 0.4994382226088217, "grad_norm": 2.4779151011476896, "learning_rate": 5.25197894164681e-06, "loss": 0.8567, "step": 13780 }, { "epoch": 0.4994744663114784, "grad_norm": 2.3613946218918542, "learning_rate": 5.251392757363395e-06, "loss": 1.095, "step": 13781 }, { "epoch": 0.49951071001413505, "grad_norm": 2.247436712147705, "learning_rate": 5.250806569615937e-06, "loss": 0.9639, "step": 13782 }, { "epoch": 0.4995469537167917, "grad_norm": 2.450142650174557, "learning_rate": 5.250220378412518e-06, "loss": 0.9095, "step": 13783 }, { "epoch": 0.49958319741944834, "grad_norm": 2.5844395025555733, "learning_rate": 5.249634183761213e-06, "loss": 1.1172, "step": 13784 }, { "epoch": 0.49961944112210505, "grad_norm": 2.547589528990396, "learning_rate": 5.2490479856701e-06, "loss": 0.9111, "step": 13785 }, { "epoch": 0.4996556848247617, "grad_norm": 2.3970528254881454, "learning_rate": 5.248461784147256e-06, "loss": 0.9333, "step": 13786 }, { "epoch": 0.49969192852741834, "grad_norm": 1.9567395229958444, "learning_rate": 5.247875579200759e-06, "loss": 0.8082, "step": 13787 }, { "epoch": 0.49972817223007504, "grad_norm": 2.35307752765531, "learning_rate": 5.247289370838687e-06, "loss": 0.8888, "step": 13788 }, { "epoch": 0.4997644159327317, "grad_norm": 2.4620246179866743, "learning_rate": 5.246703159069116e-06, "loss": 0.9598, "step": 13789 }, { "epoch": 0.49980065963538833, "grad_norm": 2.386686085966184, "learning_rate": 5.246116943900126e-06, "loss": 0.8103, "step": 13790 }, { "epoch": 0.49983690333804504, "grad_norm": 2.6367485226033622, "learning_rate": 5.245530725339793e-06, "loss": 1.0418, "step": 13791 }, { "epoch": 0.4998731470407017, "grad_norm": 2.4352119770648843, "learning_rate": 5.2449445033961935e-06, "loss": 0.8236, "step": 13792 }, { "epoch": 0.49990939074335833, "grad_norm": 2.5510163905354517, "learning_rate": 5.244358278077408e-06, "loss": 0.9759, "step": 13793 }, { "epoch": 0.49994563444601503, "grad_norm": 2.5279812194898, "learning_rate": 5.243772049391514e-06, "loss": 1.0701, "step": 13794 }, { "epoch": 0.4999818781486717, "grad_norm": 2.2334652375908672, "learning_rate": 5.243185817346589e-06, "loss": 0.8155, "step": 13795 }, { "epoch": 0.5000181218513283, "grad_norm": 2.342423277232948, "learning_rate": 5.242599581950708e-06, "loss": 0.8669, "step": 13796 }, { "epoch": 0.500054365553985, "grad_norm": 2.4072888791246614, "learning_rate": 5.242013343211954e-06, "loss": 0.9134, "step": 13797 }, { "epoch": 0.5000906092566416, "grad_norm": 2.227196950953239, "learning_rate": 5.241427101138401e-06, "loss": 0.8541, "step": 13798 }, { "epoch": 0.5001268529592984, "grad_norm": 2.453464386060543, "learning_rate": 5.240840855738129e-06, "loss": 0.8416, "step": 13799 }, { "epoch": 0.500163096661955, "grad_norm": 2.738042984696872, "learning_rate": 5.240254607019215e-06, "loss": 1.0854, "step": 13800 }, { "epoch": 0.5001993403646117, "grad_norm": 2.51425121294893, "learning_rate": 5.239668354989739e-06, "loss": 1.1731, "step": 13801 }, { "epoch": 0.5002355840672683, "grad_norm": 2.47038745426397, "learning_rate": 5.239082099657776e-06, "loss": 1.1971, "step": 13802 }, { "epoch": 0.500271827769925, "grad_norm": 2.4586141269932305, "learning_rate": 5.238495841031409e-06, "loss": 0.9993, "step": 13803 }, { "epoch": 0.5003080714725816, "grad_norm": 2.201295911595947, "learning_rate": 5.237909579118713e-06, "loss": 0.8004, "step": 13804 }, { "epoch": 0.5003443151752383, "grad_norm": 2.395910849014937, "learning_rate": 5.237323313927765e-06, "loss": 0.7382, "step": 13805 }, { "epoch": 0.500380558877895, "grad_norm": 2.2236302624841615, "learning_rate": 5.236737045466646e-06, "loss": 0.7282, "step": 13806 }, { "epoch": 0.5004168025805517, "grad_norm": 2.404008268077767, "learning_rate": 5.236150773743436e-06, "loss": 0.8589, "step": 13807 }, { "epoch": 0.5004530462832083, "grad_norm": 2.2628059490284262, "learning_rate": 5.235564498766208e-06, "loss": 0.917, "step": 13808 }, { "epoch": 0.500489289985865, "grad_norm": 2.2197265225195486, "learning_rate": 5.2349782205430456e-06, "loss": 0.988, "step": 13809 }, { "epoch": 0.5005255336885216, "grad_norm": 2.5605391962284374, "learning_rate": 5.234391939082024e-06, "loss": 1.0822, "step": 13810 }, { "epoch": 0.5005617773911782, "grad_norm": 2.116729394405107, "learning_rate": 5.233805654391224e-06, "loss": 0.6681, "step": 13811 }, { "epoch": 0.500598021093835, "grad_norm": 1.9768331362765568, "learning_rate": 5.233219366478724e-06, "loss": 0.7625, "step": 13812 }, { "epoch": 0.5006342647964916, "grad_norm": 2.5396395067918296, "learning_rate": 5.2326330753526015e-06, "loss": 0.7991, "step": 13813 }, { "epoch": 0.5006705084991483, "grad_norm": 2.351033473958706, "learning_rate": 5.232046781020936e-06, "loss": 0.9948, "step": 13814 }, { "epoch": 0.5007067522018049, "grad_norm": 2.259834827616085, "learning_rate": 5.231460483491807e-06, "loss": 0.8423, "step": 13815 }, { "epoch": 0.5007429959044616, "grad_norm": 2.3859985870839333, "learning_rate": 5.230874182773291e-06, "loss": 0.893, "step": 13816 }, { "epoch": 0.5007792396071182, "grad_norm": 2.2708695015624034, "learning_rate": 5.230287878873469e-06, "loss": 0.8277, "step": 13817 }, { "epoch": 0.5008154833097749, "grad_norm": 2.450248942276503, "learning_rate": 5.22970157180042e-06, "loss": 0.613, "step": 13818 }, { "epoch": 0.5008517270124316, "grad_norm": 2.208193697642062, "learning_rate": 5.229115261562221e-06, "loss": 0.9308, "step": 13819 }, { "epoch": 0.5008879707150883, "grad_norm": 2.287928164332552, "learning_rate": 5.228528948166953e-06, "loss": 0.9681, "step": 13820 }, { "epoch": 0.5009242144177449, "grad_norm": 2.23694657147039, "learning_rate": 5.227942631622694e-06, "loss": 0.9445, "step": 13821 }, { "epoch": 0.5009604581204016, "grad_norm": 2.6723737453567056, "learning_rate": 5.227356311937524e-06, "loss": 0.9567, "step": 13822 }, { "epoch": 0.5009967018230582, "grad_norm": 2.342931812019287, "learning_rate": 5.226769989119521e-06, "loss": 0.8037, "step": 13823 }, { "epoch": 0.5010329455257149, "grad_norm": 2.332883596510988, "learning_rate": 5.2261836631767645e-06, "loss": 0.9129, "step": 13824 }, { "epoch": 0.5010691892283716, "grad_norm": 2.5340789000800807, "learning_rate": 5.225597334117336e-06, "loss": 0.9077, "step": 13825 }, { "epoch": 0.5011054329310283, "grad_norm": 2.306775294626522, "learning_rate": 5.225011001949309e-06, "loss": 0.9879, "step": 13826 }, { "epoch": 0.5011416766336849, "grad_norm": 2.4837211326910524, "learning_rate": 5.2244246666807695e-06, "loss": 1.0609, "step": 13827 }, { "epoch": 0.5011779203363416, "grad_norm": 2.0247321401877834, "learning_rate": 5.223838328319792e-06, "loss": 0.8183, "step": 13828 }, { "epoch": 0.5012141640389982, "grad_norm": 2.31224750000084, "learning_rate": 5.223251986874459e-06, "loss": 0.8992, "step": 13829 }, { "epoch": 0.5012504077416549, "grad_norm": 2.263160592757438, "learning_rate": 5.222665642352847e-06, "loss": 1.0665, "step": 13830 }, { "epoch": 0.5012866514443115, "grad_norm": 2.2929732853619664, "learning_rate": 5.2220792947630395e-06, "loss": 0.8687, "step": 13831 }, { "epoch": 0.5013228951469683, "grad_norm": 2.695735261614467, "learning_rate": 5.221492944113112e-06, "loss": 0.9202, "step": 13832 }, { "epoch": 0.5013591388496249, "grad_norm": 2.230699630062037, "learning_rate": 5.220906590411147e-06, "loss": 1.0645, "step": 13833 }, { "epoch": 0.5013953825522816, "grad_norm": 2.4759858278311158, "learning_rate": 5.220320233665221e-06, "loss": 0.895, "step": 13834 }, { "epoch": 0.5014316262549382, "grad_norm": 2.5249549341521673, "learning_rate": 5.219733873883417e-06, "loss": 0.9224, "step": 13835 }, { "epoch": 0.5014678699575948, "grad_norm": 2.253620839440305, "learning_rate": 5.219147511073812e-06, "loss": 0.9166, "step": 13836 }, { "epoch": 0.5015041136602515, "grad_norm": 2.5058250242676534, "learning_rate": 5.218561145244487e-06, "loss": 0.9605, "step": 13837 }, { "epoch": 0.5015403573629081, "grad_norm": 2.2247466994668583, "learning_rate": 5.217974776403521e-06, "loss": 0.8409, "step": 13838 }, { "epoch": 0.5015766010655649, "grad_norm": 2.341369562600416, "learning_rate": 5.217388404558996e-06, "loss": 1.0059, "step": 13839 }, { "epoch": 0.5016128447682215, "grad_norm": 2.458597243420475, "learning_rate": 5.216802029718989e-06, "loss": 0.9259, "step": 13840 }, { "epoch": 0.5016490884708782, "grad_norm": 2.3779158356023777, "learning_rate": 5.216215651891582e-06, "loss": 0.8723, "step": 13841 }, { "epoch": 0.5016853321735348, "grad_norm": 1.7733499236799493, "learning_rate": 5.215629271084853e-06, "loss": 0.6739, "step": 13842 }, { "epoch": 0.5017215758761915, "grad_norm": 2.6979477925100324, "learning_rate": 5.2150428873068846e-06, "loss": 0.8052, "step": 13843 }, { "epoch": 0.5017578195788481, "grad_norm": 2.1908620436826096, "learning_rate": 5.2144565005657545e-06, "loss": 0.861, "step": 13844 }, { "epoch": 0.5017940632815049, "grad_norm": 2.8637470513990606, "learning_rate": 5.213870110869543e-06, "loss": 0.8194, "step": 13845 }, { "epoch": 0.5018303069841615, "grad_norm": 2.725392565274935, "learning_rate": 5.213283718226332e-06, "loss": 1.3818, "step": 13846 }, { "epoch": 0.5018665506868182, "grad_norm": 2.3647601556633555, "learning_rate": 5.212697322644198e-06, "loss": 1.0404, "step": 13847 }, { "epoch": 0.5019027943894748, "grad_norm": 2.6338332649528353, "learning_rate": 5.2121109241312264e-06, "loss": 1.0838, "step": 13848 }, { "epoch": 0.5019390380921315, "grad_norm": 2.1336361837198092, "learning_rate": 5.211524522695493e-06, "loss": 1.0594, "step": 13849 }, { "epoch": 0.5019752817947881, "grad_norm": 2.444701232305251, "learning_rate": 5.2109381183450795e-06, "loss": 0.9146, "step": 13850 }, { "epoch": 0.5020115254974448, "grad_norm": 2.4696365399685356, "learning_rate": 5.210351711088066e-06, "loss": 0.9956, "step": 13851 }, { "epoch": 0.5020477692001015, "grad_norm": 2.2932272248567913, "learning_rate": 5.2097653009325346e-06, "loss": 0.9665, "step": 13852 }, { "epoch": 0.5020840129027582, "grad_norm": 2.34372245459189, "learning_rate": 5.209178887886563e-06, "loss": 0.9244, "step": 13853 }, { "epoch": 0.5021202566054148, "grad_norm": 2.2593246688620097, "learning_rate": 5.208592471958234e-06, "loss": 0.9753, "step": 13854 }, { "epoch": 0.5021565003080715, "grad_norm": 2.3425722670376334, "learning_rate": 5.208006053155625e-06, "loss": 0.9395, "step": 13855 }, { "epoch": 0.5021927440107281, "grad_norm": 2.2712830243900446, "learning_rate": 5.2074196314868195e-06, "loss": 0.9452, "step": 13856 }, { "epoch": 0.5022289877133848, "grad_norm": 2.345691890068185, "learning_rate": 5.206833206959897e-06, "loss": 0.9061, "step": 13857 }, { "epoch": 0.5022652314160415, "grad_norm": 2.227095570231292, "learning_rate": 5.206246779582937e-06, "loss": 0.9586, "step": 13858 }, { "epoch": 0.5023014751186982, "grad_norm": 2.5158529007735746, "learning_rate": 5.205660349364023e-06, "loss": 0.726, "step": 13859 }, { "epoch": 0.5023377188213548, "grad_norm": 2.1797946205987166, "learning_rate": 5.2050739163112314e-06, "loss": 0.8376, "step": 13860 }, { "epoch": 0.5023739625240115, "grad_norm": 2.515460352063862, "learning_rate": 5.2044874804326486e-06, "loss": 1.0019, "step": 13861 }, { "epoch": 0.5024102062266681, "grad_norm": 2.5265705189893066, "learning_rate": 5.2039010417363504e-06, "loss": 0.9074, "step": 13862 }, { "epoch": 0.5024464499293247, "grad_norm": 2.045307922545364, "learning_rate": 5.203314600230418e-06, "loss": 0.6915, "step": 13863 }, { "epoch": 0.5024826936319814, "grad_norm": 2.1803095568445, "learning_rate": 5.202728155922934e-06, "loss": 1.0357, "step": 13864 }, { "epoch": 0.5025189373346381, "grad_norm": 2.5569321684647464, "learning_rate": 5.202141708821978e-06, "loss": 0.7183, "step": 13865 }, { "epoch": 0.5025551810372948, "grad_norm": 2.632354260081071, "learning_rate": 5.201555258935633e-06, "loss": 1.0676, "step": 13866 }, { "epoch": 0.5025914247399514, "grad_norm": 2.2875019596243042, "learning_rate": 5.200968806271977e-06, "loss": 0.7694, "step": 13867 }, { "epoch": 0.5026276684426081, "grad_norm": 2.1273387065246947, "learning_rate": 5.200382350839094e-06, "loss": 0.9746, "step": 13868 }, { "epoch": 0.5026639121452647, "grad_norm": 2.240930799540965, "learning_rate": 5.199795892645061e-06, "loss": 1.071, "step": 13869 }, { "epoch": 0.5027001558479214, "grad_norm": 2.490456491107236, "learning_rate": 5.199209431697964e-06, "loss": 1.01, "step": 13870 }, { "epoch": 0.5027363995505781, "grad_norm": 2.1972450547232847, "learning_rate": 5.19862296800588e-06, "loss": 0.7264, "step": 13871 }, { "epoch": 0.5027726432532348, "grad_norm": 2.3973844082185485, "learning_rate": 5.198036501576893e-06, "loss": 0.879, "step": 13872 }, { "epoch": 0.5028088869558914, "grad_norm": 2.1887275576196807, "learning_rate": 5.197450032419081e-06, "loss": 0.8748, "step": 13873 }, { "epoch": 0.5028451306585481, "grad_norm": 2.47965798416955, "learning_rate": 5.196863560540528e-06, "loss": 1.073, "step": 13874 }, { "epoch": 0.5028813743612047, "grad_norm": 2.202375791892802, "learning_rate": 5.196277085949314e-06, "loss": 0.7242, "step": 13875 }, { "epoch": 0.5029176180638614, "grad_norm": 2.151912869341704, "learning_rate": 5.19569060865352e-06, "loss": 0.9679, "step": 13876 }, { "epoch": 0.502953861766518, "grad_norm": 2.4256586054922975, "learning_rate": 5.195104128661228e-06, "loss": 0.9232, "step": 13877 }, { "epoch": 0.5029901054691748, "grad_norm": 2.4548434088257616, "learning_rate": 5.194517645980519e-06, "loss": 1.0403, "step": 13878 }, { "epoch": 0.5030263491718314, "grad_norm": 2.3442432927661856, "learning_rate": 5.193931160619474e-06, "loss": 0.9051, "step": 13879 }, { "epoch": 0.5030625928744881, "grad_norm": 2.4950229303402245, "learning_rate": 5.193344672586177e-06, "loss": 1.0886, "step": 13880 }, { "epoch": 0.5030988365771447, "grad_norm": 2.310102654772083, "learning_rate": 5.1927581818887064e-06, "loss": 0.9313, "step": 13881 }, { "epoch": 0.5031350802798014, "grad_norm": 2.1546314645479647, "learning_rate": 5.192171688535145e-06, "loss": 0.8477, "step": 13882 }, { "epoch": 0.503171323982458, "grad_norm": 2.413224065457596, "learning_rate": 5.1915851925335715e-06, "loss": 0.9125, "step": 13883 }, { "epoch": 0.5032075676851148, "grad_norm": 2.2617843839527967, "learning_rate": 5.190998693892072e-06, "loss": 0.9039, "step": 13884 }, { "epoch": 0.5032438113877714, "grad_norm": 2.098618614162239, "learning_rate": 5.190412192618726e-06, "loss": 0.8218, "step": 13885 }, { "epoch": 0.5032800550904281, "grad_norm": 2.2906164599830476, "learning_rate": 5.189825688721614e-06, "loss": 0.9702, "step": 13886 }, { "epoch": 0.5033162987930847, "grad_norm": 2.380718937883655, "learning_rate": 5.189239182208819e-06, "loss": 0.9377, "step": 13887 }, { "epoch": 0.5033525424957414, "grad_norm": 1.8953308231709152, "learning_rate": 5.188652673088423e-06, "loss": 0.6639, "step": 13888 }, { "epoch": 0.503388786198398, "grad_norm": 2.265750754370008, "learning_rate": 5.188066161368508e-06, "loss": 0.9038, "step": 13889 }, { "epoch": 0.5034250299010546, "grad_norm": 2.2054987159341364, "learning_rate": 5.187479647057154e-06, "loss": 0.9001, "step": 13890 }, { "epoch": 0.5034612736037114, "grad_norm": 2.261713653745732, "learning_rate": 5.1868931301624445e-06, "loss": 0.7359, "step": 13891 }, { "epoch": 0.503497517306368, "grad_norm": 2.26112673450536, "learning_rate": 5.186306610692462e-06, "loss": 0.872, "step": 13892 }, { "epoch": 0.5035337610090247, "grad_norm": 2.733215384205989, "learning_rate": 5.1857200886552835e-06, "loss": 1.034, "step": 13893 }, { "epoch": 0.5035700047116813, "grad_norm": 2.111198013205502, "learning_rate": 5.1851335640589974e-06, "loss": 0.9944, "step": 13894 }, { "epoch": 0.503606248414338, "grad_norm": 2.341814536311885, "learning_rate": 5.184547036911681e-06, "loss": 0.7426, "step": 13895 }, { "epoch": 0.5036424921169946, "grad_norm": 1.8932094301535374, "learning_rate": 5.183960507221419e-06, "loss": 0.8013, "step": 13896 }, { "epoch": 0.5036787358196514, "grad_norm": 2.44010587558347, "learning_rate": 5.183373974996293e-06, "loss": 0.9213, "step": 13897 }, { "epoch": 0.503714979522308, "grad_norm": 2.424808729310384, "learning_rate": 5.182787440244383e-06, "loss": 0.9299, "step": 13898 }, { "epoch": 0.5037512232249647, "grad_norm": 2.2630147959826665, "learning_rate": 5.182200902973774e-06, "loss": 0.8548, "step": 13899 }, { "epoch": 0.5037874669276213, "grad_norm": 2.4507465126330366, "learning_rate": 5.181614363192546e-06, "loss": 1.054, "step": 13900 }, { "epoch": 0.503823710630278, "grad_norm": 2.1797907362326483, "learning_rate": 5.181027820908783e-06, "loss": 0.8203, "step": 13901 }, { "epoch": 0.5038599543329346, "grad_norm": 2.2817325798373473, "learning_rate": 5.180441276130566e-06, "loss": 0.8998, "step": 13902 }, { "epoch": 0.5038961980355913, "grad_norm": 2.470747364681711, "learning_rate": 5.179854728865975e-06, "loss": 0.867, "step": 13903 }, { "epoch": 0.503932441738248, "grad_norm": 2.4511900239839677, "learning_rate": 5.179268179123097e-06, "loss": 0.9737, "step": 13904 }, { "epoch": 0.5039686854409047, "grad_norm": 2.3205992209349566, "learning_rate": 5.178681626910011e-06, "loss": 0.9043, "step": 13905 }, { "epoch": 0.5040049291435613, "grad_norm": 2.530751680295997, "learning_rate": 5.1780950722348004e-06, "loss": 0.951, "step": 13906 }, { "epoch": 0.504041172846218, "grad_norm": 2.3415028386613304, "learning_rate": 5.177508515105547e-06, "loss": 0.8886, "step": 13907 }, { "epoch": 0.5040774165488746, "grad_norm": 2.399084613966584, "learning_rate": 5.176921955530334e-06, "loss": 0.7546, "step": 13908 }, { "epoch": 0.5041136602515313, "grad_norm": 2.1469418117314816, "learning_rate": 5.176335393517243e-06, "loss": 0.9148, "step": 13909 }, { "epoch": 0.5041499039541879, "grad_norm": 2.272752685664592, "learning_rate": 5.175748829074357e-06, "loss": 1.138, "step": 13910 }, { "epoch": 0.5041861476568447, "grad_norm": 2.364960886976031, "learning_rate": 5.17516226220976e-06, "loss": 1.0354, "step": 13911 }, { "epoch": 0.5042223913595013, "grad_norm": 2.508418512509002, "learning_rate": 5.174575692931533e-06, "loss": 0.8984, "step": 13912 }, { "epoch": 0.504258635062158, "grad_norm": 2.474023820345672, "learning_rate": 5.173989121247755e-06, "loss": 0.904, "step": 13913 }, { "epoch": 0.5042948787648146, "grad_norm": 2.3368159881971082, "learning_rate": 5.173402547166516e-06, "loss": 0.762, "step": 13914 }, { "epoch": 0.5043311224674712, "grad_norm": 2.365572842414237, "learning_rate": 5.172815970695893e-06, "loss": 0.774, "step": 13915 }, { "epoch": 0.5043673661701279, "grad_norm": 2.1069560067603534, "learning_rate": 5.172229391843971e-06, "loss": 0.8215, "step": 13916 }, { "epoch": 0.5044036098727847, "grad_norm": 2.1281581433821137, "learning_rate": 5.171642810618831e-06, "loss": 0.9839, "step": 13917 }, { "epoch": 0.5044398535754413, "grad_norm": 2.466872875052344, "learning_rate": 5.171056227028558e-06, "loss": 1.0675, "step": 13918 }, { "epoch": 0.504476097278098, "grad_norm": 2.3719820501410744, "learning_rate": 5.170469641081232e-06, "loss": 0.8534, "step": 13919 }, { "epoch": 0.5045123409807546, "grad_norm": 2.2991898217931763, "learning_rate": 5.16988305278494e-06, "loss": 0.8693, "step": 13920 }, { "epoch": 0.5045485846834112, "grad_norm": 2.0692381451702344, "learning_rate": 5.169296462147761e-06, "loss": 0.955, "step": 13921 }, { "epoch": 0.5045848283860679, "grad_norm": 2.530193269435186, "learning_rate": 5.168709869177779e-06, "loss": 0.7584, "step": 13922 }, { "epoch": 0.5046210720887245, "grad_norm": 1.8741503968679492, "learning_rate": 5.1681232738830765e-06, "loss": 0.7565, "step": 13923 }, { "epoch": 0.5046573157913813, "grad_norm": 2.2514296005248164, "learning_rate": 5.167536676271738e-06, "loss": 0.9852, "step": 13924 }, { "epoch": 0.5046935594940379, "grad_norm": 2.2683397447742317, "learning_rate": 5.166950076351845e-06, "loss": 1.0365, "step": 13925 }, { "epoch": 0.5047298031966946, "grad_norm": 2.1303195128878096, "learning_rate": 5.16636347413148e-06, "loss": 0.91, "step": 13926 }, { "epoch": 0.5047660468993512, "grad_norm": 2.3571503681956205, "learning_rate": 5.165776869618728e-06, "loss": 1.0379, "step": 13927 }, { "epoch": 0.5048022906020079, "grad_norm": 2.4023939277932573, "learning_rate": 5.165190262821671e-06, "loss": 1.1321, "step": 13928 }, { "epoch": 0.5048385343046645, "grad_norm": 2.501976565390073, "learning_rate": 5.164603653748393e-06, "loss": 0.8373, "step": 13929 }, { "epoch": 0.5048747780073213, "grad_norm": 2.4250934957010686, "learning_rate": 5.1640170424069746e-06, "loss": 0.8094, "step": 13930 }, { "epoch": 0.5049110217099779, "grad_norm": 2.365952281262708, "learning_rate": 5.163430428805502e-06, "loss": 0.6927, "step": 13931 }, { "epoch": 0.5049472654126346, "grad_norm": 2.896397782978333, "learning_rate": 5.162843812952055e-06, "loss": 0.8156, "step": 13932 }, { "epoch": 0.5049835091152912, "grad_norm": 2.379431158841279, "learning_rate": 5.162257194854721e-06, "loss": 0.8242, "step": 13933 }, { "epoch": 0.5050197528179479, "grad_norm": 2.207743212822863, "learning_rate": 5.161670574521579e-06, "loss": 1.0133, "step": 13934 }, { "epoch": 0.5050559965206045, "grad_norm": 2.4365938554286166, "learning_rate": 5.161083951960716e-06, "loss": 0.9226, "step": 13935 }, { "epoch": 0.5050922402232612, "grad_norm": 2.297822233092408, "learning_rate": 5.1604973271802115e-06, "loss": 1.0749, "step": 13936 }, { "epoch": 0.5051284839259179, "grad_norm": 2.1328455415035386, "learning_rate": 5.159910700188152e-06, "loss": 0.5845, "step": 13937 }, { "epoch": 0.5051647276285746, "grad_norm": 2.6691674991294843, "learning_rate": 5.1593240709926194e-06, "loss": 0.9167, "step": 13938 }, { "epoch": 0.5052009713312312, "grad_norm": 2.309898854190929, "learning_rate": 5.1587374396016986e-06, "loss": 0.893, "step": 13939 }, { "epoch": 0.5052372150338879, "grad_norm": 2.4850971464742426, "learning_rate": 5.158150806023472e-06, "loss": 0.9854, "step": 13940 }, { "epoch": 0.5052734587365445, "grad_norm": 2.3942192319907907, "learning_rate": 5.1575641702660215e-06, "loss": 0.9833, "step": 13941 }, { "epoch": 0.5053097024392011, "grad_norm": 2.5473049445175087, "learning_rate": 5.156977532337433e-06, "loss": 1.0819, "step": 13942 }, { "epoch": 0.5053459461418579, "grad_norm": 2.104211962634373, "learning_rate": 5.1563908922457885e-06, "loss": 0.99, "step": 13943 }, { "epoch": 0.5053821898445146, "grad_norm": 2.1523205462292965, "learning_rate": 5.155804249999171e-06, "loss": 0.8379, "step": 13944 }, { "epoch": 0.5054184335471712, "grad_norm": 2.4561894348650792, "learning_rate": 5.155217605605667e-06, "loss": 0.962, "step": 13945 }, { "epoch": 0.5054546772498278, "grad_norm": 2.355272607410357, "learning_rate": 5.154630959073358e-06, "loss": 0.966, "step": 13946 }, { "epoch": 0.5054909209524845, "grad_norm": 2.35770727307218, "learning_rate": 5.1540443104103275e-06, "loss": 0.8033, "step": 13947 }, { "epoch": 0.5055271646551411, "grad_norm": 2.3556049893054407, "learning_rate": 5.15345765962466e-06, "loss": 1.0, "step": 13948 }, { "epoch": 0.5055634083577978, "grad_norm": 2.5702328841265545, "learning_rate": 5.152871006724439e-06, "loss": 1.0185, "step": 13949 }, { "epoch": 0.5055996520604545, "grad_norm": 2.4467685554477008, "learning_rate": 5.152284351717746e-06, "loss": 0.9855, "step": 13950 }, { "epoch": 0.5056358957631112, "grad_norm": 2.3105241853742937, "learning_rate": 5.1516976946126696e-06, "loss": 1.0064, "step": 13951 }, { "epoch": 0.5056721394657678, "grad_norm": 2.536360579063046, "learning_rate": 5.151111035417287e-06, "loss": 0.9124, "step": 13952 }, { "epoch": 0.5057083831684245, "grad_norm": 2.3176829012445386, "learning_rate": 5.150524374139689e-06, "loss": 0.8261, "step": 13953 }, { "epoch": 0.5057446268710811, "grad_norm": 2.637249051380305, "learning_rate": 5.149937710787954e-06, "loss": 0.8079, "step": 13954 }, { "epoch": 0.5057808705737378, "grad_norm": 2.198915054996894, "learning_rate": 5.14935104537017e-06, "loss": 0.7785, "step": 13955 }, { "epoch": 0.5058171142763945, "grad_norm": 2.7766145847609005, "learning_rate": 5.148764377894417e-06, "loss": 0.8378, "step": 13956 }, { "epoch": 0.5058533579790512, "grad_norm": 2.478890946609666, "learning_rate": 5.148177708368783e-06, "loss": 0.9158, "step": 13957 }, { "epoch": 0.5058896016817078, "grad_norm": 2.359085066906838, "learning_rate": 5.14759103680135e-06, "loss": 0.9846, "step": 13958 }, { "epoch": 0.5059258453843645, "grad_norm": 2.4378720048592113, "learning_rate": 5.1470043632001995e-06, "loss": 1.0062, "step": 13959 }, { "epoch": 0.5059620890870211, "grad_norm": 2.301524893146273, "learning_rate": 5.146417687573418e-06, "loss": 0.8928, "step": 13960 }, { "epoch": 0.5059983327896778, "grad_norm": 2.28667261085251, "learning_rate": 5.145831009929091e-06, "loss": 0.9143, "step": 13961 }, { "epoch": 0.5060345764923344, "grad_norm": 2.4519542198723685, "learning_rate": 5.1452443302753e-06, "loss": 0.8991, "step": 13962 }, { "epoch": 0.5060708201949912, "grad_norm": 2.268070571299265, "learning_rate": 5.1446576486201295e-06, "loss": 0.7954, "step": 13963 }, { "epoch": 0.5061070638976478, "grad_norm": 2.9335708747463753, "learning_rate": 5.144070964971663e-06, "loss": 0.9585, "step": 13964 }, { "epoch": 0.5061433076003045, "grad_norm": 2.233288290815785, "learning_rate": 5.143484279337989e-06, "loss": 0.8597, "step": 13965 }, { "epoch": 0.5061795513029611, "grad_norm": 2.132217145828121, "learning_rate": 5.142897591727186e-06, "loss": 0.8737, "step": 13966 }, { "epoch": 0.5062157950056178, "grad_norm": 2.3377101608430957, "learning_rate": 5.142310902147342e-06, "loss": 1.0226, "step": 13967 }, { "epoch": 0.5062520387082744, "grad_norm": 2.3942696690027727, "learning_rate": 5.141724210606539e-06, "loss": 0.8376, "step": 13968 }, { "epoch": 0.506288282410931, "grad_norm": 2.2449059844308685, "learning_rate": 5.141137517112862e-06, "loss": 0.849, "step": 13969 }, { "epoch": 0.5063245261135878, "grad_norm": 2.4739526076685485, "learning_rate": 5.140550821674395e-06, "loss": 0.9547, "step": 13970 }, { "epoch": 0.5063607698162444, "grad_norm": 2.2395264115186797, "learning_rate": 5.1399641242992235e-06, "loss": 0.7507, "step": 13971 }, { "epoch": 0.5063970135189011, "grad_norm": 2.298038259327468, "learning_rate": 5.139377424995431e-06, "loss": 0.9738, "step": 13972 }, { "epoch": 0.5064332572215577, "grad_norm": 2.664205123023052, "learning_rate": 5.138790723771101e-06, "loss": 1.0869, "step": 13973 }, { "epoch": 0.5064695009242144, "grad_norm": 2.1561355269390634, "learning_rate": 5.138204020634318e-06, "loss": 0.8684, "step": 13974 }, { "epoch": 0.506505744626871, "grad_norm": 2.2612716387810363, "learning_rate": 5.1376173155931695e-06, "loss": 1.0995, "step": 13975 }, { "epoch": 0.5065419883295278, "grad_norm": 2.199350241984703, "learning_rate": 5.137030608655737e-06, "loss": 0.8874, "step": 13976 }, { "epoch": 0.5065782320321844, "grad_norm": 2.381826660536105, "learning_rate": 5.136443899830106e-06, "loss": 0.8638, "step": 13977 }, { "epoch": 0.5066144757348411, "grad_norm": 2.206272093846804, "learning_rate": 5.1358571891243595e-06, "loss": 0.8606, "step": 13978 }, { "epoch": 0.5066507194374977, "grad_norm": 2.552320867612671, "learning_rate": 5.135270476546584e-06, "loss": 1.0284, "step": 13979 }, { "epoch": 0.5066869631401544, "grad_norm": 2.55100843062401, "learning_rate": 5.134683762104862e-06, "loss": 0.855, "step": 13980 }, { "epoch": 0.506723206842811, "grad_norm": 2.397922009717845, "learning_rate": 5.134097045807281e-06, "loss": 0.82, "step": 13981 }, { "epoch": 0.5067594505454677, "grad_norm": 2.2329683290870226, "learning_rate": 5.1335103276619215e-06, "loss": 0.9535, "step": 13982 }, { "epoch": 0.5067956942481244, "grad_norm": 2.359479017926262, "learning_rate": 5.132923607676872e-06, "loss": 0.8426, "step": 13983 }, { "epoch": 0.5068319379507811, "grad_norm": 2.241600412182931, "learning_rate": 5.132336885860214e-06, "loss": 1.0016, "step": 13984 }, { "epoch": 0.5068681816534377, "grad_norm": 1.9500460122895347, "learning_rate": 5.131750162220035e-06, "loss": 0.7361, "step": 13985 }, { "epoch": 0.5069044253560944, "grad_norm": 2.45489827335927, "learning_rate": 5.1311634367644185e-06, "loss": 1.0175, "step": 13986 }, { "epoch": 0.506940669058751, "grad_norm": 2.1164854185777005, "learning_rate": 5.130576709501449e-06, "loss": 0.8112, "step": 13987 }, { "epoch": 0.5069769127614077, "grad_norm": 2.325031253886746, "learning_rate": 5.129989980439212e-06, "loss": 0.8807, "step": 13988 }, { "epoch": 0.5070131564640644, "grad_norm": 2.351160754605359, "learning_rate": 5.129403249585792e-06, "loss": 0.9564, "step": 13989 }, { "epoch": 0.5070494001667211, "grad_norm": 2.157384141357731, "learning_rate": 5.128816516949271e-06, "loss": 1.0471, "step": 13990 }, { "epoch": 0.5070856438693777, "grad_norm": 2.319941681672839, "learning_rate": 5.128229782537738e-06, "loss": 0.9176, "step": 13991 }, { "epoch": 0.5071218875720344, "grad_norm": 2.3043240378274317, "learning_rate": 5.127643046359275e-06, "loss": 0.9817, "step": 13992 }, { "epoch": 0.507158131274691, "grad_norm": 2.151248044312411, "learning_rate": 5.12705630842197e-06, "loss": 0.9323, "step": 13993 }, { "epoch": 0.5071943749773477, "grad_norm": 2.3215249599191745, "learning_rate": 5.126469568733904e-06, "loss": 0.9343, "step": 13994 }, { "epoch": 0.5072306186800043, "grad_norm": 2.167039288955735, "learning_rate": 5.125882827303166e-06, "loss": 0.8046, "step": 13995 }, { "epoch": 0.507266862382661, "grad_norm": 2.245846631324258, "learning_rate": 5.1252960841378365e-06, "loss": 0.9641, "step": 13996 }, { "epoch": 0.5073031060853177, "grad_norm": 2.6297326461582275, "learning_rate": 5.124709339246003e-06, "loss": 0.8544, "step": 13997 }, { "epoch": 0.5073393497879743, "grad_norm": 2.5805924614071523, "learning_rate": 5.124122592635752e-06, "loss": 1.0072, "step": 13998 }, { "epoch": 0.507375593490631, "grad_norm": 2.215109297761457, "learning_rate": 5.123535844315166e-06, "loss": 0.7734, "step": 13999 }, { "epoch": 0.5074118371932876, "grad_norm": 2.472214195261964, "learning_rate": 5.12294909429233e-06, "loss": 0.7281, "step": 14000 }, { "epoch": 0.5074480808959443, "grad_norm": 2.389384601095289, "learning_rate": 5.122362342575331e-06, "loss": 0.8054, "step": 14001 }, { "epoch": 0.507484324598601, "grad_norm": 2.1903623325788666, "learning_rate": 5.121775589172252e-06, "loss": 0.9354, "step": 14002 }, { "epoch": 0.5075205683012577, "grad_norm": 2.902500654136071, "learning_rate": 5.12118883409118e-06, "loss": 0.8877, "step": 14003 }, { "epoch": 0.5075568120039143, "grad_norm": 2.416901684543631, "learning_rate": 5.120602077340197e-06, "loss": 0.9027, "step": 14004 }, { "epoch": 0.507593055706571, "grad_norm": 2.189028219011816, "learning_rate": 5.120015318927393e-06, "loss": 0.7199, "step": 14005 }, { "epoch": 0.5076292994092276, "grad_norm": 2.3936984125254304, "learning_rate": 5.119428558860848e-06, "loss": 0.8719, "step": 14006 }, { "epoch": 0.5076655431118843, "grad_norm": 2.330813087375309, "learning_rate": 5.118841797148652e-06, "loss": 0.7768, "step": 14007 }, { "epoch": 0.5077017868145409, "grad_norm": 2.3551609525249977, "learning_rate": 5.118255033798887e-06, "loss": 0.886, "step": 14008 }, { "epoch": 0.5077380305171977, "grad_norm": 2.511983369808601, "learning_rate": 5.117668268819638e-06, "loss": 0.8226, "step": 14009 }, { "epoch": 0.5077742742198543, "grad_norm": 2.1889784279560236, "learning_rate": 5.1170815022189924e-06, "loss": 0.7885, "step": 14010 }, { "epoch": 0.507810517922511, "grad_norm": 2.5361373357075414, "learning_rate": 5.1164947340050346e-06, "loss": 0.9076, "step": 14011 }, { "epoch": 0.5078467616251676, "grad_norm": 1.962174796064002, "learning_rate": 5.11590796418585e-06, "loss": 0.8013, "step": 14012 }, { "epoch": 0.5078830053278243, "grad_norm": 2.605810861309942, "learning_rate": 5.115321192769522e-06, "loss": 1.0416, "step": 14013 }, { "epoch": 0.5079192490304809, "grad_norm": 2.1973106512681424, "learning_rate": 5.1147344197641406e-06, "loss": 0.88, "step": 14014 }, { "epoch": 0.5079554927331377, "grad_norm": 2.366996298031582, "learning_rate": 5.114147645177786e-06, "loss": 0.8183, "step": 14015 }, { "epoch": 0.5079917364357943, "grad_norm": 2.5050605524022327, "learning_rate": 5.1135608690185465e-06, "loss": 0.897, "step": 14016 }, { "epoch": 0.508027980138451, "grad_norm": 2.409565024064988, "learning_rate": 5.112974091294508e-06, "loss": 0.727, "step": 14017 }, { "epoch": 0.5080642238411076, "grad_norm": 2.337237621785782, "learning_rate": 5.112387312013754e-06, "loss": 1.0302, "step": 14018 }, { "epoch": 0.5081004675437643, "grad_norm": 2.138082471703287, "learning_rate": 5.111800531184371e-06, "loss": 0.8686, "step": 14019 }, { "epoch": 0.5081367112464209, "grad_norm": 2.2129157855530033, "learning_rate": 5.111213748814444e-06, "loss": 0.8362, "step": 14020 }, { "epoch": 0.5081729549490775, "grad_norm": 2.3254113108674015, "learning_rate": 5.110626964912059e-06, "loss": 0.8216, "step": 14021 }, { "epoch": 0.5082091986517343, "grad_norm": 2.1297293643129764, "learning_rate": 5.1100401794853e-06, "loss": 1.0088, "step": 14022 }, { "epoch": 0.508245442354391, "grad_norm": 2.3456940882575736, "learning_rate": 5.1094533925422555e-06, "loss": 0.9553, "step": 14023 }, { "epoch": 0.5082816860570476, "grad_norm": 2.5669091000793167, "learning_rate": 5.108866604091009e-06, "loss": 0.8838, "step": 14024 }, { "epoch": 0.5083179297597042, "grad_norm": 2.1873753564645364, "learning_rate": 5.108279814139646e-06, "loss": 0.9016, "step": 14025 }, { "epoch": 0.5083541734623609, "grad_norm": 2.8292026689878758, "learning_rate": 5.107693022696254e-06, "loss": 1.0658, "step": 14026 }, { "epoch": 0.5083904171650175, "grad_norm": 2.437438599881244, "learning_rate": 5.107106229768916e-06, "loss": 0.9053, "step": 14027 }, { "epoch": 0.5084266608676743, "grad_norm": 2.0563562952553798, "learning_rate": 5.106519435365721e-06, "loss": 0.9707, "step": 14028 }, { "epoch": 0.5084629045703309, "grad_norm": 2.255038184021684, "learning_rate": 5.10593263949475e-06, "loss": 0.9619, "step": 14029 }, { "epoch": 0.5084991482729876, "grad_norm": 2.341129524931674, "learning_rate": 5.105345842164093e-06, "loss": 1.1611, "step": 14030 }, { "epoch": 0.5085353919756442, "grad_norm": 2.4892887758061177, "learning_rate": 5.104759043381833e-06, "loss": 1.089, "step": 14031 }, { "epoch": 0.5085716356783009, "grad_norm": 2.4041764864455795, "learning_rate": 5.104172243156057e-06, "loss": 0.8884, "step": 14032 }, { "epoch": 0.5086078793809575, "grad_norm": 2.5111693091174114, "learning_rate": 5.103585441494851e-06, "loss": 0.9941, "step": 14033 }, { "epoch": 0.5086441230836142, "grad_norm": 2.3889281545823637, "learning_rate": 5.1029986384063e-06, "loss": 0.7801, "step": 14034 }, { "epoch": 0.5086803667862709, "grad_norm": 2.520314064448483, "learning_rate": 5.10241183389849e-06, "loss": 1.0642, "step": 14035 }, { "epoch": 0.5087166104889276, "grad_norm": 2.3179828787475323, "learning_rate": 5.101825027979507e-06, "loss": 0.8086, "step": 14036 }, { "epoch": 0.5087528541915842, "grad_norm": 2.186394038380065, "learning_rate": 5.101238220657436e-06, "loss": 0.8902, "step": 14037 }, { "epoch": 0.5087890978942409, "grad_norm": 2.363649002429017, "learning_rate": 5.100651411940365e-06, "loss": 0.9221, "step": 14038 }, { "epoch": 0.5088253415968975, "grad_norm": 2.1209103363312236, "learning_rate": 5.100064601836377e-06, "loss": 0.8145, "step": 14039 }, { "epoch": 0.5088615852995542, "grad_norm": 2.542223069935013, "learning_rate": 5.099477790353559e-06, "loss": 0.934, "step": 14040 }, { "epoch": 0.5088978290022108, "grad_norm": 2.44416651552489, "learning_rate": 5.098890977499997e-06, "loss": 0.8637, "step": 14041 }, { "epoch": 0.5089340727048676, "grad_norm": 2.47085367469587, "learning_rate": 5.09830416328378e-06, "loss": 0.9241, "step": 14042 }, { "epoch": 0.5089703164075242, "grad_norm": 2.384365158001719, "learning_rate": 5.097717347712988e-06, "loss": 0.8477, "step": 14043 }, { "epoch": 0.5090065601101809, "grad_norm": 2.253129750232515, "learning_rate": 5.097130530795711e-06, "loss": 0.9244, "step": 14044 }, { "epoch": 0.5090428038128375, "grad_norm": 2.564753738719061, "learning_rate": 5.096543712540036e-06, "loss": 0.827, "step": 14045 }, { "epoch": 0.5090790475154942, "grad_norm": 2.290631559483585, "learning_rate": 5.0959568929540445e-06, "loss": 0.9082, "step": 14046 }, { "epoch": 0.5091152912181508, "grad_norm": 2.201255761563462, "learning_rate": 5.0953700720458256e-06, "loss": 0.8584, "step": 14047 }, { "epoch": 0.5091515349208076, "grad_norm": 2.660378107071721, "learning_rate": 5.094783249823465e-06, "loss": 1.0267, "step": 14048 }, { "epoch": 0.5091877786234642, "grad_norm": 2.104558620719099, "learning_rate": 5.0941964262950485e-06, "loss": 0.7581, "step": 14049 }, { "epoch": 0.5092240223261209, "grad_norm": 2.5981285256292943, "learning_rate": 5.093609601468662e-06, "loss": 0.9715, "step": 14050 }, { "epoch": 0.5092602660287775, "grad_norm": 2.3488287239548473, "learning_rate": 5.093022775352392e-06, "loss": 0.8508, "step": 14051 }, { "epoch": 0.5092965097314341, "grad_norm": 2.368722530586308, "learning_rate": 5.092435947954325e-06, "loss": 0.9261, "step": 14052 }, { "epoch": 0.5093327534340908, "grad_norm": 2.219076583741576, "learning_rate": 5.091849119282545e-06, "loss": 0.8924, "step": 14053 }, { "epoch": 0.5093689971367474, "grad_norm": 2.399816792081811, "learning_rate": 5.091262289345141e-06, "loss": 0.8744, "step": 14054 }, { "epoch": 0.5094052408394042, "grad_norm": 2.341463232027036, "learning_rate": 5.090675458150198e-06, "loss": 0.9758, "step": 14055 }, { "epoch": 0.5094414845420608, "grad_norm": 2.4307157518964595, "learning_rate": 5.090088625705802e-06, "loss": 0.8653, "step": 14056 }, { "epoch": 0.5094777282447175, "grad_norm": 2.3459366978619958, "learning_rate": 5.089501792020038e-06, "loss": 0.897, "step": 14057 }, { "epoch": 0.5095139719473741, "grad_norm": 2.2948554808294945, "learning_rate": 5.088914957100995e-06, "loss": 0.9045, "step": 14058 }, { "epoch": 0.5095502156500308, "grad_norm": 2.3788107712732423, "learning_rate": 5.0883281209567555e-06, "loss": 0.942, "step": 14059 }, { "epoch": 0.5095864593526874, "grad_norm": 2.0512063546394526, "learning_rate": 5.0877412835954095e-06, "loss": 0.8554, "step": 14060 }, { "epoch": 0.5096227030553442, "grad_norm": 2.2202229674703053, "learning_rate": 5.087154445025042e-06, "loss": 0.9095, "step": 14061 }, { "epoch": 0.5096589467580008, "grad_norm": 2.3534178419939975, "learning_rate": 5.086567605253738e-06, "loss": 0.7075, "step": 14062 }, { "epoch": 0.5096951904606575, "grad_norm": 2.2499211206168486, "learning_rate": 5.0859807642895855e-06, "loss": 0.8419, "step": 14063 }, { "epoch": 0.5097314341633141, "grad_norm": 2.173627104396468, "learning_rate": 5.08539392214067e-06, "loss": 0.9486, "step": 14064 }, { "epoch": 0.5097676778659708, "grad_norm": 2.3818755835439527, "learning_rate": 5.084807078815078e-06, "loss": 0.852, "step": 14065 }, { "epoch": 0.5098039215686274, "grad_norm": 2.2117270853000415, "learning_rate": 5.084220234320896e-06, "loss": 0.8797, "step": 14066 }, { "epoch": 0.5098401652712841, "grad_norm": 2.742273080803452, "learning_rate": 5.083633388666208e-06, "loss": 0.8606, "step": 14067 }, { "epoch": 0.5098764089739408, "grad_norm": 2.25234205551746, "learning_rate": 5.0830465418591045e-06, "loss": 0.915, "step": 14068 }, { "epoch": 0.5099126526765975, "grad_norm": 2.307422480992753, "learning_rate": 5.082459693907669e-06, "loss": 0.6441, "step": 14069 }, { "epoch": 0.5099488963792541, "grad_norm": 2.411047129687015, "learning_rate": 5.0818728448199895e-06, "loss": 0.9168, "step": 14070 }, { "epoch": 0.5099851400819108, "grad_norm": 2.473201323248988, "learning_rate": 5.081285994604151e-06, "loss": 0.9304, "step": 14071 }, { "epoch": 0.5100213837845674, "grad_norm": 2.193697837138655, "learning_rate": 5.080699143268242e-06, "loss": 0.7876, "step": 14072 }, { "epoch": 0.510057627487224, "grad_norm": 2.393027814659311, "learning_rate": 5.080112290820346e-06, "loss": 0.9423, "step": 14073 }, { "epoch": 0.5100938711898808, "grad_norm": 2.4982805599893405, "learning_rate": 5.079525437268552e-06, "loss": 0.8802, "step": 14074 }, { "epoch": 0.5101301148925375, "grad_norm": 2.522008653004893, "learning_rate": 5.078938582620946e-06, "loss": 1.0167, "step": 14075 }, { "epoch": 0.5101663585951941, "grad_norm": 2.38283642111847, "learning_rate": 5.078351726885613e-06, "loss": 0.9435, "step": 14076 }, { "epoch": 0.5102026022978507, "grad_norm": 2.6838067216941828, "learning_rate": 5.077764870070641e-06, "loss": 0.8089, "step": 14077 }, { "epoch": 0.5102388460005074, "grad_norm": 2.1346054895853195, "learning_rate": 5.077178012184117e-06, "loss": 0.6327, "step": 14078 }, { "epoch": 0.510275089703164, "grad_norm": 2.14889407470736, "learning_rate": 5.0765911532341235e-06, "loss": 0.8095, "step": 14079 }, { "epoch": 0.5103113334058207, "grad_norm": 2.2031184738827227, "learning_rate": 5.0760042932287535e-06, "loss": 1.0098, "step": 14080 }, { "epoch": 0.5103475771084774, "grad_norm": 2.8104092959868225, "learning_rate": 5.075417432176088e-06, "loss": 0.8891, "step": 14081 }, { "epoch": 0.5103838208111341, "grad_norm": 2.1655147039072857, "learning_rate": 5.074830570084219e-06, "loss": 0.6685, "step": 14082 }, { "epoch": 0.5104200645137907, "grad_norm": 2.674172714682978, "learning_rate": 5.074243706961227e-06, "loss": 1.1329, "step": 14083 }, { "epoch": 0.5104563082164474, "grad_norm": 2.2128920243015022, "learning_rate": 5.073656842815203e-06, "loss": 0.9344, "step": 14084 }, { "epoch": 0.510492551919104, "grad_norm": 2.0322012571659784, "learning_rate": 5.073069977654233e-06, "loss": 0.7809, "step": 14085 }, { "epoch": 0.5105287956217607, "grad_norm": 2.4409642500188964, "learning_rate": 5.0724831114864014e-06, "loss": 1.0798, "step": 14086 }, { "epoch": 0.5105650393244174, "grad_norm": 2.391469272927573, "learning_rate": 5.0718962443197965e-06, "loss": 0.9904, "step": 14087 }, { "epoch": 0.5106012830270741, "grad_norm": 2.260934486508254, "learning_rate": 5.071309376162504e-06, "loss": 0.8997, "step": 14088 }, { "epoch": 0.5106375267297307, "grad_norm": 2.812390197525517, "learning_rate": 5.070722507022614e-06, "loss": 1.0592, "step": 14089 }, { "epoch": 0.5106737704323874, "grad_norm": 2.496432602826272, "learning_rate": 5.070135636908209e-06, "loss": 0.9987, "step": 14090 }, { "epoch": 0.510710014135044, "grad_norm": 2.3115096344560846, "learning_rate": 5.069548765827378e-06, "loss": 0.843, "step": 14091 }, { "epoch": 0.5107462578377007, "grad_norm": 2.3641923066127353, "learning_rate": 5.0689618937882055e-06, "loss": 1.0184, "step": 14092 }, { "epoch": 0.5107825015403573, "grad_norm": 2.128243235463212, "learning_rate": 5.068375020798782e-06, "loss": 0.8696, "step": 14093 }, { "epoch": 0.5108187452430141, "grad_norm": 2.256617051968881, "learning_rate": 5.067788146867191e-06, "loss": 0.8698, "step": 14094 }, { "epoch": 0.5108549889456707, "grad_norm": 2.4382126392511996, "learning_rate": 5.06720127200152e-06, "loss": 0.8576, "step": 14095 }, { "epoch": 0.5108912326483274, "grad_norm": 2.5619196460563356, "learning_rate": 5.066614396209857e-06, "loss": 1.0092, "step": 14096 }, { "epoch": 0.510927476350984, "grad_norm": 2.325139790783323, "learning_rate": 5.066027519500287e-06, "loss": 0.8928, "step": 14097 }, { "epoch": 0.5109637200536407, "grad_norm": 2.391456087012062, "learning_rate": 5.0654406418808975e-06, "loss": 0.8806, "step": 14098 }, { "epoch": 0.5109999637562973, "grad_norm": 2.139881122556129, "learning_rate": 5.064853763359777e-06, "loss": 0.965, "step": 14099 }, { "epoch": 0.5110362074589541, "grad_norm": 2.4962907979454343, "learning_rate": 5.0642668839450094e-06, "loss": 0.8787, "step": 14100 }, { "epoch": 0.5110724511616107, "grad_norm": 2.4029925811608943, "learning_rate": 5.063680003644684e-06, "loss": 0.9256, "step": 14101 }, { "epoch": 0.5111086948642674, "grad_norm": 2.567568882213462, "learning_rate": 5.0630931224668855e-06, "loss": 0.9598, "step": 14102 }, { "epoch": 0.511144938566924, "grad_norm": 2.3103004552735458, "learning_rate": 5.062506240419703e-06, "loss": 0.883, "step": 14103 }, { "epoch": 0.5111811822695806, "grad_norm": 2.4271294991455914, "learning_rate": 5.061919357511222e-06, "loss": 0.9316, "step": 14104 }, { "epoch": 0.5112174259722373, "grad_norm": 2.3118696675928163, "learning_rate": 5.061332473749531e-06, "loss": 1.0038, "step": 14105 }, { "epoch": 0.5112536696748939, "grad_norm": 2.424002899136256, "learning_rate": 5.060745589142714e-06, "loss": 0.8745, "step": 14106 }, { "epoch": 0.5112899133775507, "grad_norm": 2.5942574527740536, "learning_rate": 5.06015870369886e-06, "loss": 0.9809, "step": 14107 }, { "epoch": 0.5113261570802073, "grad_norm": 2.50031043084694, "learning_rate": 5.0595718174260546e-06, "loss": 0.9588, "step": 14108 }, { "epoch": 0.511362400782864, "grad_norm": 2.1658410436705964, "learning_rate": 5.058984930332387e-06, "loss": 0.7699, "step": 14109 }, { "epoch": 0.5113986444855206, "grad_norm": 2.3626999608506125, "learning_rate": 5.0583980424259416e-06, "loss": 0.8343, "step": 14110 }, { "epoch": 0.5114348881881773, "grad_norm": 2.67058967321599, "learning_rate": 5.057811153714807e-06, "loss": 1.0823, "step": 14111 }, { "epoch": 0.5114711318908339, "grad_norm": 2.337556186514168, "learning_rate": 5.057224264207069e-06, "loss": 0.7728, "step": 14112 }, { "epoch": 0.5115073755934906, "grad_norm": 2.1037584696221168, "learning_rate": 5.056637373910816e-06, "loss": 0.8666, "step": 14113 }, { "epoch": 0.5115436192961473, "grad_norm": 2.1321189108701777, "learning_rate": 5.056050482834135e-06, "loss": 0.8025, "step": 14114 }, { "epoch": 0.511579862998804, "grad_norm": 2.2641045595495743, "learning_rate": 5.055463590985112e-06, "loss": 0.8378, "step": 14115 }, { "epoch": 0.5116161067014606, "grad_norm": 2.313718609954632, "learning_rate": 5.054876698371832e-06, "loss": 0.926, "step": 14116 }, { "epoch": 0.5116523504041173, "grad_norm": 2.297282830844215, "learning_rate": 5.054289805002387e-06, "loss": 0.7758, "step": 14117 }, { "epoch": 0.5116885941067739, "grad_norm": 2.2651359583244184, "learning_rate": 5.053702910884859e-06, "loss": 0.9342, "step": 14118 }, { "epoch": 0.5117248378094306, "grad_norm": 2.2072681807373637, "learning_rate": 5.053116016027339e-06, "loss": 0.7433, "step": 14119 }, { "epoch": 0.5117610815120873, "grad_norm": 2.61097372340841, "learning_rate": 5.052529120437911e-06, "loss": 0.8871, "step": 14120 }, { "epoch": 0.511797325214744, "grad_norm": 2.3724955357956286, "learning_rate": 5.051942224124665e-06, "loss": 0.8432, "step": 14121 }, { "epoch": 0.5118335689174006, "grad_norm": 2.644894077225921, "learning_rate": 5.0513553270956875e-06, "loss": 0.8516, "step": 14122 }, { "epoch": 0.5118698126200573, "grad_norm": 2.3621389623025255, "learning_rate": 5.050768429359063e-06, "loss": 0.9912, "step": 14123 }, { "epoch": 0.5119060563227139, "grad_norm": 2.185069538459281, "learning_rate": 5.05018153092288e-06, "loss": 0.88, "step": 14124 }, { "epoch": 0.5119423000253706, "grad_norm": 2.2170353784212105, "learning_rate": 5.049594631795226e-06, "loss": 0.7523, "step": 14125 }, { "epoch": 0.5119785437280272, "grad_norm": 2.375939985910542, "learning_rate": 5.0490077319841875e-06, "loss": 0.9113, "step": 14126 }, { "epoch": 0.512014787430684, "grad_norm": 2.366257494976697, "learning_rate": 5.0484208314978534e-06, "loss": 0.985, "step": 14127 }, { "epoch": 0.5120510311333406, "grad_norm": 2.24375655469565, "learning_rate": 5.047833930344308e-06, "loss": 0.9594, "step": 14128 }, { "epoch": 0.5120872748359973, "grad_norm": 2.2273849684249587, "learning_rate": 5.047247028531642e-06, "loss": 0.8995, "step": 14129 }, { "epoch": 0.5121235185386539, "grad_norm": 2.1871700391562094, "learning_rate": 5.046660126067938e-06, "loss": 0.8369, "step": 14130 }, { "epoch": 0.5121597622413105, "grad_norm": 2.366402555881616, "learning_rate": 5.046073222961289e-06, "loss": 0.9939, "step": 14131 }, { "epoch": 0.5121960059439672, "grad_norm": 2.215169910043237, "learning_rate": 5.0454863192197765e-06, "loss": 0.8703, "step": 14132 }, { "epoch": 0.512232249646624, "grad_norm": 2.176849256249793, "learning_rate": 5.04489941485149e-06, "loss": 1.0344, "step": 14133 }, { "epoch": 0.5122684933492806, "grad_norm": 2.472556916703328, "learning_rate": 5.044312509864517e-06, "loss": 1.0207, "step": 14134 }, { "epoch": 0.5123047370519372, "grad_norm": 2.415011156796801, "learning_rate": 5.043725604266946e-06, "loss": 0.9026, "step": 14135 }, { "epoch": 0.5123409807545939, "grad_norm": 2.3205991912617607, "learning_rate": 5.043138698066861e-06, "loss": 1.0441, "step": 14136 }, { "epoch": 0.5123772244572505, "grad_norm": 2.304500559822961, "learning_rate": 5.042551791272351e-06, "loss": 0.8172, "step": 14137 }, { "epoch": 0.5124134681599072, "grad_norm": 2.24668567050068, "learning_rate": 5.041964883891502e-06, "loss": 0.9277, "step": 14138 }, { "epoch": 0.5124497118625638, "grad_norm": 2.3943948679991776, "learning_rate": 5.0413779759324045e-06, "loss": 0.905, "step": 14139 }, { "epoch": 0.5124859555652206, "grad_norm": 2.2708376481174386, "learning_rate": 5.040791067403141e-06, "loss": 1.0458, "step": 14140 }, { "epoch": 0.5125221992678772, "grad_norm": 2.5342257716895245, "learning_rate": 5.0402041583118035e-06, "loss": 1.0316, "step": 14141 }, { "epoch": 0.5125584429705339, "grad_norm": 2.1797441935041624, "learning_rate": 5.039617248666478e-06, "loss": 1.0368, "step": 14142 }, { "epoch": 0.5125946866731905, "grad_norm": 2.162078738379898, "learning_rate": 5.039030338475248e-06, "loss": 0.8017, "step": 14143 }, { "epoch": 0.5126309303758472, "grad_norm": 2.2479340005648663, "learning_rate": 5.038443427746205e-06, "loss": 0.9098, "step": 14144 }, { "epoch": 0.5126671740785038, "grad_norm": 2.837251184711762, "learning_rate": 5.0378565164874335e-06, "loss": 0.9664, "step": 14145 }, { "epoch": 0.5127034177811606, "grad_norm": 1.9827585216774712, "learning_rate": 5.037269604707022e-06, "loss": 0.8303, "step": 14146 }, { "epoch": 0.5127396614838172, "grad_norm": 2.3720531223652466, "learning_rate": 5.03668269241306e-06, "loss": 0.9053, "step": 14147 }, { "epoch": 0.5127759051864739, "grad_norm": 2.461528953018108, "learning_rate": 5.036095779613631e-06, "loss": 0.8888, "step": 14148 }, { "epoch": 0.5128121488891305, "grad_norm": 2.7587340593938126, "learning_rate": 5.035508866316825e-06, "loss": 0.9564, "step": 14149 }, { "epoch": 0.5128483925917872, "grad_norm": 2.534690240682824, "learning_rate": 5.034921952530727e-06, "loss": 0.8068, "step": 14150 }, { "epoch": 0.5128846362944438, "grad_norm": 2.5416666680107975, "learning_rate": 5.034335038263427e-06, "loss": 0.8174, "step": 14151 }, { "epoch": 0.5129208799971005, "grad_norm": 2.263444173001367, "learning_rate": 5.03374812352301e-06, "loss": 0.8657, "step": 14152 }, { "epoch": 0.5129571236997572, "grad_norm": 2.1161122831061583, "learning_rate": 5.033161208317565e-06, "loss": 0.8142, "step": 14153 }, { "epoch": 0.5129933674024139, "grad_norm": 2.572988114155531, "learning_rate": 5.032574292655178e-06, "loss": 0.9031, "step": 14154 }, { "epoch": 0.5130296111050705, "grad_norm": 2.2537437344905693, "learning_rate": 5.031987376543938e-06, "loss": 0.8778, "step": 14155 }, { "epoch": 0.5130658548077272, "grad_norm": 2.197234276987219, "learning_rate": 5.03140045999193e-06, "loss": 0.7462, "step": 14156 }, { "epoch": 0.5131020985103838, "grad_norm": 2.1990412310445784, "learning_rate": 5.030813543007244e-06, "loss": 0.9082, "step": 14157 }, { "epoch": 0.5131383422130404, "grad_norm": 2.325389739857435, "learning_rate": 5.030226625597964e-06, "loss": 0.9565, "step": 14158 }, { "epoch": 0.5131745859156972, "grad_norm": 2.197622064904453, "learning_rate": 5.02963970777218e-06, "loss": 1.0054, "step": 14159 }, { "epoch": 0.5132108296183538, "grad_norm": 2.285522422648596, "learning_rate": 5.029052789537979e-06, "loss": 0.905, "step": 14160 }, { "epoch": 0.5132470733210105, "grad_norm": 2.2922422446040196, "learning_rate": 5.02846587090345e-06, "loss": 1.0864, "step": 14161 }, { "epoch": 0.5132833170236671, "grad_norm": 2.285427334297163, "learning_rate": 5.027878951876677e-06, "loss": 1.0954, "step": 14162 }, { "epoch": 0.5133195607263238, "grad_norm": 2.547178149187528, "learning_rate": 5.027292032465748e-06, "loss": 1.0604, "step": 14163 }, { "epoch": 0.5133558044289804, "grad_norm": 2.518306020123469, "learning_rate": 5.0267051126787525e-06, "loss": 0.9258, "step": 14164 }, { "epoch": 0.5133920481316371, "grad_norm": 2.3933693945166326, "learning_rate": 5.026118192523776e-06, "loss": 0.9795, "step": 14165 }, { "epoch": 0.5134282918342938, "grad_norm": 2.2264726651473, "learning_rate": 5.025531272008907e-06, "loss": 0.7645, "step": 14166 }, { "epoch": 0.5134645355369505, "grad_norm": 2.5657302955645656, "learning_rate": 5.0249443511422325e-06, "loss": 1.0469, "step": 14167 }, { "epoch": 0.5135007792396071, "grad_norm": 2.230814879570143, "learning_rate": 5.024357429931839e-06, "loss": 0.9588, "step": 14168 }, { "epoch": 0.5135370229422638, "grad_norm": 2.5461316152426208, "learning_rate": 5.0237705083858144e-06, "loss": 0.9104, "step": 14169 }, { "epoch": 0.5135732666449204, "grad_norm": 2.654907651251826, "learning_rate": 5.023183586512249e-06, "loss": 0.8199, "step": 14170 }, { "epoch": 0.5136095103475771, "grad_norm": 2.493626375690492, "learning_rate": 5.022596664319227e-06, "loss": 0.8875, "step": 14171 }, { "epoch": 0.5136457540502338, "grad_norm": 2.3918221734853664, "learning_rate": 5.022009741814837e-06, "loss": 1.0083, "step": 14172 }, { "epoch": 0.5136819977528905, "grad_norm": 2.3986468575818174, "learning_rate": 5.021422819007165e-06, "loss": 1.0103, "step": 14173 }, { "epoch": 0.5137182414555471, "grad_norm": 2.140804044174313, "learning_rate": 5.0208358959043006e-06, "loss": 0.8207, "step": 14174 }, { "epoch": 0.5137544851582038, "grad_norm": 2.317165271059128, "learning_rate": 5.020248972514328e-06, "loss": 0.9854, "step": 14175 }, { "epoch": 0.5137907288608604, "grad_norm": 2.2540110693737043, "learning_rate": 5.019662048845339e-06, "loss": 1.0818, "step": 14176 }, { "epoch": 0.5138269725635171, "grad_norm": 2.69269431737093, "learning_rate": 5.019075124905417e-06, "loss": 0.7918, "step": 14177 }, { "epoch": 0.5138632162661737, "grad_norm": 2.427752188089587, "learning_rate": 5.018488200702653e-06, "loss": 0.9552, "step": 14178 }, { "epoch": 0.5138994599688305, "grad_norm": 2.4666938602484616, "learning_rate": 5.017901276245132e-06, "loss": 1.0322, "step": 14179 }, { "epoch": 0.5139357036714871, "grad_norm": 2.3649203274005037, "learning_rate": 5.017314351540943e-06, "loss": 0.9046, "step": 14180 }, { "epoch": 0.5139719473741438, "grad_norm": 2.2634669498749402, "learning_rate": 5.016727426598172e-06, "loss": 0.8212, "step": 14181 }, { "epoch": 0.5140081910768004, "grad_norm": 2.1169649995321898, "learning_rate": 5.016140501424908e-06, "loss": 0.7405, "step": 14182 }, { "epoch": 0.514044434779457, "grad_norm": 2.5544816921041753, "learning_rate": 5.015553576029236e-06, "loss": 0.9188, "step": 14183 }, { "epoch": 0.5140806784821137, "grad_norm": 2.3088602165835574, "learning_rate": 5.014966650419246e-06, "loss": 0.8295, "step": 14184 }, { "epoch": 0.5141169221847703, "grad_norm": 2.1931208673500184, "learning_rate": 5.014379724603024e-06, "loss": 0.865, "step": 14185 }, { "epoch": 0.5141531658874271, "grad_norm": 2.2170423315952674, "learning_rate": 5.0137927985886605e-06, "loss": 0.9335, "step": 14186 }, { "epoch": 0.5141894095900837, "grad_norm": 2.7166459514544763, "learning_rate": 5.013205872384238e-06, "loss": 0.9096, "step": 14187 }, { "epoch": 0.5142256532927404, "grad_norm": 2.108192423084039, "learning_rate": 5.012618945997847e-06, "loss": 1.0075, "step": 14188 }, { "epoch": 0.514261896995397, "grad_norm": 2.1968383615197835, "learning_rate": 5.0120320194375745e-06, "loss": 0.9571, "step": 14189 }, { "epoch": 0.5142981406980537, "grad_norm": 2.249743887968591, "learning_rate": 5.011445092711509e-06, "loss": 1.0724, "step": 14190 }, { "epoch": 0.5143343844007103, "grad_norm": 2.2044025337569497, "learning_rate": 5.010858165827738e-06, "loss": 0.768, "step": 14191 }, { "epoch": 0.5143706281033671, "grad_norm": 2.4620704824611277, "learning_rate": 5.010271238794346e-06, "loss": 0.9543, "step": 14192 }, { "epoch": 0.5144068718060237, "grad_norm": 2.4069214420743767, "learning_rate": 5.0096843116194225e-06, "loss": 0.8925, "step": 14193 }, { "epoch": 0.5144431155086804, "grad_norm": 2.2415962863692154, "learning_rate": 5.009097384311057e-06, "loss": 0.8795, "step": 14194 }, { "epoch": 0.514479359211337, "grad_norm": 2.398820520485892, "learning_rate": 5.008510456877332e-06, "loss": 1.0051, "step": 14195 }, { "epoch": 0.5145156029139937, "grad_norm": 2.5543518485518986, "learning_rate": 5.007923529326341e-06, "loss": 0.9734, "step": 14196 }, { "epoch": 0.5145518466166503, "grad_norm": 2.1748147493550807, "learning_rate": 5.0073366016661664e-06, "loss": 0.7234, "step": 14197 }, { "epoch": 0.514588090319307, "grad_norm": 2.420441514390176, "learning_rate": 5.0067496739049e-06, "loss": 0.8388, "step": 14198 }, { "epoch": 0.5146243340219637, "grad_norm": 2.4432599875620395, "learning_rate": 5.006162746050624e-06, "loss": 1.116, "step": 14199 }, { "epoch": 0.5146605777246204, "grad_norm": 2.7602322192352395, "learning_rate": 5.005575818111432e-06, "loss": 1.0955, "step": 14200 }, { "epoch": 0.514696821427277, "grad_norm": 2.291138862796338, "learning_rate": 5.004988890095408e-06, "loss": 0.7401, "step": 14201 }, { "epoch": 0.5147330651299337, "grad_norm": 2.2470374162226405, "learning_rate": 5.004401962010641e-06, "loss": 1.0071, "step": 14202 }, { "epoch": 0.5147693088325903, "grad_norm": 2.175465104056799, "learning_rate": 5.003815033865215e-06, "loss": 0.9113, "step": 14203 }, { "epoch": 0.514805552535247, "grad_norm": 2.1258997509364876, "learning_rate": 5.003228105667223e-06, "loss": 0.8047, "step": 14204 }, { "epoch": 0.5148417962379037, "grad_norm": 2.430679032398323, "learning_rate": 5.002641177424747e-06, "loss": 0.8911, "step": 14205 }, { "epoch": 0.5148780399405604, "grad_norm": 2.4567296815404847, "learning_rate": 5.002054249145879e-06, "loss": 1.0002, "step": 14206 }, { "epoch": 0.514914283643217, "grad_norm": 2.2018419353851946, "learning_rate": 5.0014673208387036e-06, "loss": 1.0301, "step": 14207 }, { "epoch": 0.5149505273458737, "grad_norm": 2.3805129900233655, "learning_rate": 5.0008803925113105e-06, "loss": 0.8846, "step": 14208 }, { "epoch": 0.5149867710485303, "grad_norm": 2.2297494906545894, "learning_rate": 5.000293464171786e-06, "loss": 0.8693, "step": 14209 }, { "epoch": 0.515023014751187, "grad_norm": 2.3938680431405284, "learning_rate": 4.999706535828216e-06, "loss": 1.0985, "step": 14210 }, { "epoch": 0.5150592584538436, "grad_norm": 2.445581373755204, "learning_rate": 4.999119607488691e-06, "loss": 1.0377, "step": 14211 }, { "epoch": 0.5150955021565004, "grad_norm": 2.509274624429846, "learning_rate": 4.998532679161298e-06, "loss": 0.9108, "step": 14212 }, { "epoch": 0.515131745859157, "grad_norm": 2.057939066136151, "learning_rate": 4.997945750854124e-06, "loss": 0.7674, "step": 14213 }, { "epoch": 0.5151679895618136, "grad_norm": 2.2630209465960114, "learning_rate": 4.997358822575254e-06, "loss": 0.9166, "step": 14214 }, { "epoch": 0.5152042332644703, "grad_norm": 2.1953320808339676, "learning_rate": 4.996771894332779e-06, "loss": 0.8163, "step": 14215 }, { "epoch": 0.5152404769671269, "grad_norm": 2.2793899856413016, "learning_rate": 4.996184966134786e-06, "loss": 0.9246, "step": 14216 }, { "epoch": 0.5152767206697836, "grad_norm": 2.4137065997594287, "learning_rate": 4.995598037989362e-06, "loss": 0.9268, "step": 14217 }, { "epoch": 0.5153129643724403, "grad_norm": 2.449689193909185, "learning_rate": 4.995011109904593e-06, "loss": 0.805, "step": 14218 }, { "epoch": 0.515349208075097, "grad_norm": 2.4605752473161715, "learning_rate": 4.994424181888569e-06, "loss": 0.8428, "step": 14219 }, { "epoch": 0.5153854517777536, "grad_norm": 2.3750572193926334, "learning_rate": 4.993837253949376e-06, "loss": 0.8228, "step": 14220 }, { "epoch": 0.5154216954804103, "grad_norm": 2.5636873543154537, "learning_rate": 4.993250326095103e-06, "loss": 1.0617, "step": 14221 }, { "epoch": 0.5154579391830669, "grad_norm": 2.1990962275557493, "learning_rate": 4.992663398333834e-06, "loss": 0.9893, "step": 14222 }, { "epoch": 0.5154941828857236, "grad_norm": 2.5366102384899016, "learning_rate": 4.992076470673661e-06, "loss": 0.8751, "step": 14223 }, { "epoch": 0.5155304265883802, "grad_norm": 2.3790563390593626, "learning_rate": 4.991489543122669e-06, "loss": 0.9462, "step": 14224 }, { "epoch": 0.515566670291037, "grad_norm": 2.400595381403837, "learning_rate": 4.990902615688946e-06, "loss": 0.7903, "step": 14225 }, { "epoch": 0.5156029139936936, "grad_norm": 2.3767595544335847, "learning_rate": 4.9903156883805775e-06, "loss": 0.9456, "step": 14226 }, { "epoch": 0.5156391576963503, "grad_norm": 2.280491714389846, "learning_rate": 4.989728761205655e-06, "loss": 0.9121, "step": 14227 }, { "epoch": 0.5156754013990069, "grad_norm": 2.3476103206378673, "learning_rate": 4.989141834172264e-06, "loss": 1.1048, "step": 14228 }, { "epoch": 0.5157116451016636, "grad_norm": 2.1126233565605754, "learning_rate": 4.988554907288492e-06, "loss": 0.8491, "step": 14229 }, { "epoch": 0.5157478888043202, "grad_norm": 2.5652903152684603, "learning_rate": 4.9879679805624255e-06, "loss": 0.9059, "step": 14230 }, { "epoch": 0.515784132506977, "grad_norm": 2.563284706970078, "learning_rate": 4.987381054002154e-06, "loss": 0.9073, "step": 14231 }, { "epoch": 0.5158203762096336, "grad_norm": 2.701667965338545, "learning_rate": 4.986794127615764e-06, "loss": 0.9198, "step": 14232 }, { "epoch": 0.5158566199122903, "grad_norm": 2.715518126103249, "learning_rate": 4.986207201411343e-06, "loss": 0.9188, "step": 14233 }, { "epoch": 0.5158928636149469, "grad_norm": 2.423080396739086, "learning_rate": 4.985620275396976e-06, "loss": 1.0272, "step": 14234 }, { "epoch": 0.5159291073176036, "grad_norm": 2.282517020853978, "learning_rate": 4.985033349580755e-06, "loss": 0.9698, "step": 14235 }, { "epoch": 0.5159653510202602, "grad_norm": 2.66568335441586, "learning_rate": 4.9844464239707654e-06, "loss": 0.855, "step": 14236 }, { "epoch": 0.5160015947229168, "grad_norm": 2.3527473522382167, "learning_rate": 4.983859498575095e-06, "loss": 0.8441, "step": 14237 }, { "epoch": 0.5160378384255736, "grad_norm": 2.177511074869009, "learning_rate": 4.98327257340183e-06, "loss": 0.9261, "step": 14238 }, { "epoch": 0.5160740821282302, "grad_norm": 2.2689730030035564, "learning_rate": 4.982685648459059e-06, "loss": 1.0351, "step": 14239 }, { "epoch": 0.5161103258308869, "grad_norm": 2.491312277450007, "learning_rate": 4.9820987237548705e-06, "loss": 0.8996, "step": 14240 }, { "epoch": 0.5161465695335435, "grad_norm": 2.346217273414353, "learning_rate": 4.981511799297349e-06, "loss": 0.8099, "step": 14241 }, { "epoch": 0.5161828132362002, "grad_norm": 2.596375999691347, "learning_rate": 4.980924875094583e-06, "loss": 0.8547, "step": 14242 }, { "epoch": 0.5162190569388568, "grad_norm": 2.5647608912048607, "learning_rate": 4.980337951154663e-06, "loss": 0.8418, "step": 14243 }, { "epoch": 0.5162553006415135, "grad_norm": 2.382656628720259, "learning_rate": 4.979751027485673e-06, "loss": 0.9135, "step": 14244 }, { "epoch": 0.5162915443441702, "grad_norm": 2.2356239207287345, "learning_rate": 4.979164104095703e-06, "loss": 0.8859, "step": 14245 }, { "epoch": 0.5163277880468269, "grad_norm": 2.4159910151214827, "learning_rate": 4.978577180992836e-06, "loss": 0.9107, "step": 14246 }, { "epoch": 0.5163640317494835, "grad_norm": 2.3113240119940883, "learning_rate": 4.977990258185164e-06, "loss": 0.8901, "step": 14247 }, { "epoch": 0.5164002754521402, "grad_norm": 2.3600309382625584, "learning_rate": 4.977403335680775e-06, "loss": 0.9746, "step": 14248 }, { "epoch": 0.5164365191547968, "grad_norm": 2.170836728984713, "learning_rate": 4.9768164134877526e-06, "loss": 0.9561, "step": 14249 }, { "epoch": 0.5164727628574535, "grad_norm": 2.65536413586112, "learning_rate": 4.9762294916141855e-06, "loss": 1.0546, "step": 14250 }, { "epoch": 0.5165090065601102, "grad_norm": 2.515488560655913, "learning_rate": 4.975642570068162e-06, "loss": 0.9158, "step": 14251 }, { "epoch": 0.5165452502627669, "grad_norm": 2.4100134979411716, "learning_rate": 4.97505564885777e-06, "loss": 0.9821, "step": 14252 }, { "epoch": 0.5165814939654235, "grad_norm": 2.231094412999685, "learning_rate": 4.974468727991094e-06, "loss": 1.0277, "step": 14253 }, { "epoch": 0.5166177376680802, "grad_norm": 2.575185425627319, "learning_rate": 4.973881807476225e-06, "loss": 0.9591, "step": 14254 }, { "epoch": 0.5166539813707368, "grad_norm": 2.4079527917596413, "learning_rate": 4.973294887321249e-06, "loss": 0.9673, "step": 14255 }, { "epoch": 0.5166902250733935, "grad_norm": 2.6107637900162937, "learning_rate": 4.972707967534254e-06, "loss": 0.8981, "step": 14256 }, { "epoch": 0.5167264687760501, "grad_norm": 2.4444959659030054, "learning_rate": 4.972121048123325e-06, "loss": 0.944, "step": 14257 }, { "epoch": 0.5167627124787069, "grad_norm": 2.21552902741244, "learning_rate": 4.971534129096553e-06, "loss": 0.9061, "step": 14258 }, { "epoch": 0.5167989561813635, "grad_norm": 2.358647077110916, "learning_rate": 4.970947210462022e-06, "loss": 1.0242, "step": 14259 }, { "epoch": 0.5168351998840202, "grad_norm": 1.9421253898212127, "learning_rate": 4.9703602922278225e-06, "loss": 0.7568, "step": 14260 }, { "epoch": 0.5168714435866768, "grad_norm": 2.547763617712718, "learning_rate": 4.969773374402036e-06, "loss": 0.9609, "step": 14261 }, { "epoch": 0.5169076872893335, "grad_norm": 2.569230383089362, "learning_rate": 4.969186456992758e-06, "loss": 0.9109, "step": 14262 }, { "epoch": 0.5169439309919901, "grad_norm": 2.433589873259262, "learning_rate": 4.968599540008072e-06, "loss": 0.7894, "step": 14263 }, { "epoch": 0.5169801746946469, "grad_norm": 2.2769222420518944, "learning_rate": 4.968012623456065e-06, "loss": 0.9296, "step": 14264 }, { "epoch": 0.5170164183973035, "grad_norm": 2.5336963618336017, "learning_rate": 4.967425707344822e-06, "loss": 0.8673, "step": 14265 }, { "epoch": 0.5170526620999601, "grad_norm": 2.4577792349366594, "learning_rate": 4.966838791682436e-06, "loss": 0.9421, "step": 14266 }, { "epoch": 0.5170889058026168, "grad_norm": 2.3210609134735685, "learning_rate": 4.966251876476991e-06, "loss": 0.9262, "step": 14267 }, { "epoch": 0.5171251495052734, "grad_norm": 2.090034397307058, "learning_rate": 4.965664961736575e-06, "loss": 0.7866, "step": 14268 }, { "epoch": 0.5171613932079301, "grad_norm": 2.7191946997360783, "learning_rate": 4.965078047469273e-06, "loss": 0.9105, "step": 14269 }, { "epoch": 0.5171976369105867, "grad_norm": 2.4969344508129425, "learning_rate": 4.9644911336831765e-06, "loss": 1.0149, "step": 14270 }, { "epoch": 0.5172338806132435, "grad_norm": 2.4139925809659744, "learning_rate": 4.96390422038637e-06, "loss": 0.9647, "step": 14271 }, { "epoch": 0.5172701243159001, "grad_norm": 2.082513692830565, "learning_rate": 4.963317307586942e-06, "loss": 0.7132, "step": 14272 }, { "epoch": 0.5173063680185568, "grad_norm": 2.594896985516421, "learning_rate": 4.962730395292977e-06, "loss": 0.9932, "step": 14273 }, { "epoch": 0.5173426117212134, "grad_norm": 2.350338566646956, "learning_rate": 4.962143483512567e-06, "loss": 0.9453, "step": 14274 }, { "epoch": 0.5173788554238701, "grad_norm": 2.5195438136038786, "learning_rate": 4.961556572253797e-06, "loss": 0.9958, "step": 14275 }, { "epoch": 0.5174150991265267, "grad_norm": 2.256657318778609, "learning_rate": 4.960969661524754e-06, "loss": 0.9417, "step": 14276 }, { "epoch": 0.5174513428291835, "grad_norm": 2.4120947334538223, "learning_rate": 4.960382751333525e-06, "loss": 1.0494, "step": 14277 }, { "epoch": 0.5174875865318401, "grad_norm": 2.692788190706875, "learning_rate": 4.959795841688197e-06, "loss": 0.842, "step": 14278 }, { "epoch": 0.5175238302344968, "grad_norm": 2.3678085301375944, "learning_rate": 4.95920893259686e-06, "loss": 0.6361, "step": 14279 }, { "epoch": 0.5175600739371534, "grad_norm": 2.284778798727217, "learning_rate": 4.958622024067598e-06, "loss": 0.756, "step": 14280 }, { "epoch": 0.5175963176398101, "grad_norm": 2.539242364454596, "learning_rate": 4.958035116108498e-06, "loss": 0.8912, "step": 14281 }, { "epoch": 0.5176325613424667, "grad_norm": 2.4003736479153956, "learning_rate": 4.95744820872765e-06, "loss": 1.0114, "step": 14282 }, { "epoch": 0.5176688050451234, "grad_norm": 2.2187330138191013, "learning_rate": 4.956861301933141e-06, "loss": 0.7761, "step": 14283 }, { "epoch": 0.5177050487477801, "grad_norm": 2.1864520514888834, "learning_rate": 4.956274395733057e-06, "loss": 0.8344, "step": 14284 }, { "epoch": 0.5177412924504368, "grad_norm": 2.332850665344494, "learning_rate": 4.955687490135482e-06, "loss": 1.0705, "step": 14285 }, { "epoch": 0.5177775361530934, "grad_norm": 2.3335224833447756, "learning_rate": 4.95510058514851e-06, "loss": 0.9597, "step": 14286 }, { "epoch": 0.5178137798557501, "grad_norm": 2.032318106890457, "learning_rate": 4.954513680780224e-06, "loss": 0.8283, "step": 14287 }, { "epoch": 0.5178500235584067, "grad_norm": 2.5079698421844583, "learning_rate": 4.953926777038714e-06, "loss": 1.1508, "step": 14288 }, { "epoch": 0.5178862672610633, "grad_norm": 2.2156474671090916, "learning_rate": 4.953339873932061e-06, "loss": 0.9576, "step": 14289 }, { "epoch": 0.5179225109637201, "grad_norm": 2.5881934471726344, "learning_rate": 4.952752971468359e-06, "loss": 0.9287, "step": 14290 }, { "epoch": 0.5179587546663768, "grad_norm": 2.2701382628692968, "learning_rate": 4.952166069655693e-06, "loss": 0.674, "step": 14291 }, { "epoch": 0.5179949983690334, "grad_norm": 2.2980676201931827, "learning_rate": 4.951579168502148e-06, "loss": 0.9918, "step": 14292 }, { "epoch": 0.51803124207169, "grad_norm": 2.5967710525052463, "learning_rate": 4.9509922680158124e-06, "loss": 0.9463, "step": 14293 }, { "epoch": 0.5180674857743467, "grad_norm": 2.1845592820109396, "learning_rate": 4.950405368204775e-06, "loss": 0.9397, "step": 14294 }, { "epoch": 0.5181037294770033, "grad_norm": 2.338984813725121, "learning_rate": 4.949818469077122e-06, "loss": 0.8565, "step": 14295 }, { "epoch": 0.51813997317966, "grad_norm": 2.5309787202401557, "learning_rate": 4.94923157064094e-06, "loss": 0.9437, "step": 14296 }, { "epoch": 0.5181762168823167, "grad_norm": 2.2601539182931876, "learning_rate": 4.948644672904315e-06, "loss": 0.9829, "step": 14297 }, { "epoch": 0.5182124605849734, "grad_norm": 2.4530272865801757, "learning_rate": 4.9480577758753365e-06, "loss": 1.1326, "step": 14298 }, { "epoch": 0.51824870428763, "grad_norm": 2.530903457743868, "learning_rate": 4.94747087956209e-06, "loss": 0.9072, "step": 14299 }, { "epoch": 0.5182849479902867, "grad_norm": 2.065931741095794, "learning_rate": 4.946883983972663e-06, "loss": 0.9549, "step": 14300 }, { "epoch": 0.5183211916929433, "grad_norm": 2.146708187315295, "learning_rate": 4.946297089115141e-06, "loss": 0.8249, "step": 14301 }, { "epoch": 0.5183574353956, "grad_norm": 2.3698836850434972, "learning_rate": 4.945710194997615e-06, "loss": 0.9437, "step": 14302 }, { "epoch": 0.5183936790982567, "grad_norm": 2.6515716691955418, "learning_rate": 4.945123301628169e-06, "loss": 0.7965, "step": 14303 }, { "epoch": 0.5184299228009134, "grad_norm": 2.3115967530009405, "learning_rate": 4.944536409014891e-06, "loss": 0.8435, "step": 14304 }, { "epoch": 0.51846616650357, "grad_norm": 2.3284333990474426, "learning_rate": 4.943949517165867e-06, "loss": 0.8402, "step": 14305 }, { "epoch": 0.5185024102062267, "grad_norm": 2.486350326214606, "learning_rate": 4.9433626260891855e-06, "loss": 0.9362, "step": 14306 }, { "epoch": 0.5185386539088833, "grad_norm": 2.4527286250891116, "learning_rate": 4.942775735792933e-06, "loss": 0.8838, "step": 14307 }, { "epoch": 0.51857489761154, "grad_norm": 2.162868930139635, "learning_rate": 4.9421888462851954e-06, "loss": 0.8695, "step": 14308 }, { "epoch": 0.5186111413141966, "grad_norm": 2.159265478726755, "learning_rate": 4.941601957574059e-06, "loss": 0.8378, "step": 14309 }, { "epoch": 0.5186473850168534, "grad_norm": 2.178661789889223, "learning_rate": 4.9410150696676146e-06, "loss": 0.8473, "step": 14310 }, { "epoch": 0.51868362871951, "grad_norm": 2.514053043700534, "learning_rate": 4.940428182573947e-06, "loss": 0.7146, "step": 14311 }, { "epoch": 0.5187198724221667, "grad_norm": 2.3701955964242543, "learning_rate": 4.939841296301143e-06, "loss": 0.8265, "step": 14312 }, { "epoch": 0.5187561161248233, "grad_norm": 2.2080803045301693, "learning_rate": 4.939254410857286e-06, "loss": 0.6951, "step": 14313 }, { "epoch": 0.51879235982748, "grad_norm": 2.462631920570466, "learning_rate": 4.93866752625047e-06, "loss": 0.9985, "step": 14314 }, { "epoch": 0.5188286035301366, "grad_norm": 2.3387492230358493, "learning_rate": 4.938080642488779e-06, "loss": 0.701, "step": 14315 }, { "epoch": 0.5188648472327932, "grad_norm": 2.3497145726890487, "learning_rate": 4.937493759580298e-06, "loss": 0.8212, "step": 14316 }, { "epoch": 0.51890109093545, "grad_norm": 2.739823782190797, "learning_rate": 4.9369068775331145e-06, "loss": 0.8984, "step": 14317 }, { "epoch": 0.5189373346381067, "grad_norm": 2.3777564183273365, "learning_rate": 4.936319996355317e-06, "loss": 0.7704, "step": 14318 }, { "epoch": 0.5189735783407633, "grad_norm": 2.159200371280563, "learning_rate": 4.935733116054992e-06, "loss": 0.8924, "step": 14319 }, { "epoch": 0.5190098220434199, "grad_norm": 2.309126590941614, "learning_rate": 4.935146236640226e-06, "loss": 0.9672, "step": 14320 }, { "epoch": 0.5190460657460766, "grad_norm": 2.3000334184055298, "learning_rate": 4.9345593581191025e-06, "loss": 0.9401, "step": 14321 }, { "epoch": 0.5190823094487332, "grad_norm": 2.6092468410022227, "learning_rate": 4.9339724804997145e-06, "loss": 0.8807, "step": 14322 }, { "epoch": 0.51911855315139, "grad_norm": 2.0992647143216994, "learning_rate": 4.933385603790145e-06, "loss": 0.7746, "step": 14323 }, { "epoch": 0.5191547968540466, "grad_norm": 2.618245516517088, "learning_rate": 4.932798727998482e-06, "loss": 0.9003, "step": 14324 }, { "epoch": 0.5191910405567033, "grad_norm": 2.462462950190726, "learning_rate": 4.93221185313281e-06, "loss": 0.9311, "step": 14325 }, { "epoch": 0.5192272842593599, "grad_norm": 2.1747690772191266, "learning_rate": 4.93162497920122e-06, "loss": 1.0702, "step": 14326 }, { "epoch": 0.5192635279620166, "grad_norm": 2.0932316309870522, "learning_rate": 4.931038106211796e-06, "loss": 1.0062, "step": 14327 }, { "epoch": 0.5192997716646732, "grad_norm": 1.8332286271071816, "learning_rate": 4.930451234172625e-06, "loss": 0.7633, "step": 14328 }, { "epoch": 0.5193360153673299, "grad_norm": 2.388983767367144, "learning_rate": 4.929864363091792e-06, "loss": 0.9662, "step": 14329 }, { "epoch": 0.5193722590699866, "grad_norm": 2.227618663174622, "learning_rate": 4.9292774929773876e-06, "loss": 0.7806, "step": 14330 }, { "epoch": 0.5194085027726433, "grad_norm": 3.564693273329622, "learning_rate": 4.928690623837497e-06, "loss": 0.8571, "step": 14331 }, { "epoch": 0.5194447464752999, "grad_norm": 2.4407013358848513, "learning_rate": 4.9281037556802035e-06, "loss": 0.8972, "step": 14332 }, { "epoch": 0.5194809901779566, "grad_norm": 2.259621545538515, "learning_rate": 4.9275168885136e-06, "loss": 0.8063, "step": 14333 }, { "epoch": 0.5195172338806132, "grad_norm": 2.444104884921334, "learning_rate": 4.926930022345769e-06, "loss": 0.924, "step": 14334 }, { "epoch": 0.5195534775832699, "grad_norm": 2.676656720821838, "learning_rate": 4.926343157184798e-06, "loss": 0.9815, "step": 14335 }, { "epoch": 0.5195897212859266, "grad_norm": 2.5014143502350077, "learning_rate": 4.925756293038774e-06, "loss": 0.9771, "step": 14336 }, { "epoch": 0.5196259649885833, "grad_norm": 2.516187021522267, "learning_rate": 4.925169429915783e-06, "loss": 1.0405, "step": 14337 }, { "epoch": 0.5196622086912399, "grad_norm": 2.593785529764987, "learning_rate": 4.9245825678239126e-06, "loss": 1.0459, "step": 14338 }, { "epoch": 0.5196984523938966, "grad_norm": 2.257740692944927, "learning_rate": 4.923995706771249e-06, "loss": 1.0007, "step": 14339 }, { "epoch": 0.5197346960965532, "grad_norm": 2.2468903560858817, "learning_rate": 4.9234088467658765e-06, "loss": 0.5976, "step": 14340 }, { "epoch": 0.5197709397992099, "grad_norm": 2.4514448270513136, "learning_rate": 4.922821987815886e-06, "loss": 1.1164, "step": 14341 }, { "epoch": 0.5198071835018665, "grad_norm": 2.311181917346219, "learning_rate": 4.922235129929361e-06, "loss": 0.8208, "step": 14342 }, { "epoch": 0.5198434272045233, "grad_norm": 2.4329676755747682, "learning_rate": 4.92164827311439e-06, "loss": 1.0263, "step": 14343 }, { "epoch": 0.5198796709071799, "grad_norm": 2.6389759732256555, "learning_rate": 4.921061417379056e-06, "loss": 0.8321, "step": 14344 }, { "epoch": 0.5199159146098365, "grad_norm": 2.06961392949438, "learning_rate": 4.92047456273145e-06, "loss": 0.8384, "step": 14345 }, { "epoch": 0.5199521583124932, "grad_norm": 2.009061257225061, "learning_rate": 4.919887709179656e-06, "loss": 0.6649, "step": 14346 }, { "epoch": 0.5199884020151498, "grad_norm": 2.2831677115773465, "learning_rate": 4.919300856731761e-06, "loss": 0.7398, "step": 14347 }, { "epoch": 0.5200246457178065, "grad_norm": 2.474947460170464, "learning_rate": 4.91871400539585e-06, "loss": 0.7714, "step": 14348 }, { "epoch": 0.5200608894204632, "grad_norm": 2.065721820957101, "learning_rate": 4.918127155180011e-06, "loss": 0.9304, "step": 14349 }, { "epoch": 0.5200971331231199, "grad_norm": 2.2936016803756876, "learning_rate": 4.917540306092332e-06, "loss": 0.8353, "step": 14350 }, { "epoch": 0.5201333768257765, "grad_norm": 2.636347309499178, "learning_rate": 4.916953458140897e-06, "loss": 1.0093, "step": 14351 }, { "epoch": 0.5201696205284332, "grad_norm": 2.573725638450822, "learning_rate": 4.916366611333791e-06, "loss": 0.9028, "step": 14352 }, { "epoch": 0.5202058642310898, "grad_norm": 2.282791732775774, "learning_rate": 4.915779765679106e-06, "loss": 0.7713, "step": 14353 }, { "epoch": 0.5202421079337465, "grad_norm": 2.2258580593018658, "learning_rate": 4.915192921184923e-06, "loss": 0.9146, "step": 14354 }, { "epoch": 0.5202783516364031, "grad_norm": 2.3553571725435676, "learning_rate": 4.914606077859332e-06, "loss": 0.8926, "step": 14355 }, { "epoch": 0.5203145953390599, "grad_norm": 2.1550283720801175, "learning_rate": 4.914019235710415e-06, "loss": 0.8845, "step": 14356 }, { "epoch": 0.5203508390417165, "grad_norm": 2.3513311800554013, "learning_rate": 4.913432394746263e-06, "loss": 0.9399, "step": 14357 }, { "epoch": 0.5203870827443732, "grad_norm": 2.3207842148557645, "learning_rate": 4.91284555497496e-06, "loss": 0.784, "step": 14358 }, { "epoch": 0.5204233264470298, "grad_norm": 2.3673558011626668, "learning_rate": 4.912258716404592e-06, "loss": 0.9243, "step": 14359 }, { "epoch": 0.5204595701496865, "grad_norm": 2.1159925374900466, "learning_rate": 4.9116718790432445e-06, "loss": 1.0549, "step": 14360 }, { "epoch": 0.5204958138523431, "grad_norm": 2.211967308773751, "learning_rate": 4.911085042899006e-06, "loss": 0.775, "step": 14361 }, { "epoch": 0.5205320575549999, "grad_norm": 2.6076703564157935, "learning_rate": 4.9104982079799635e-06, "loss": 0.85, "step": 14362 }, { "epoch": 0.5205683012576565, "grad_norm": 2.2714220928785, "learning_rate": 4.909911374294201e-06, "loss": 1.0601, "step": 14363 }, { "epoch": 0.5206045449603132, "grad_norm": 2.1861568568180827, "learning_rate": 4.909324541849803e-06, "loss": 0.7521, "step": 14364 }, { "epoch": 0.5206407886629698, "grad_norm": 2.1279011487639217, "learning_rate": 4.9087377106548606e-06, "loss": 0.793, "step": 14365 }, { "epoch": 0.5206770323656265, "grad_norm": 2.2349721107580787, "learning_rate": 4.908150880717457e-06, "loss": 0.7639, "step": 14366 }, { "epoch": 0.5207132760682831, "grad_norm": 2.2909170540987027, "learning_rate": 4.907564052045678e-06, "loss": 0.9643, "step": 14367 }, { "epoch": 0.5207495197709398, "grad_norm": 2.3707601187156717, "learning_rate": 4.9069772246476085e-06, "loss": 0.9777, "step": 14368 }, { "epoch": 0.5207857634735965, "grad_norm": 2.467004607337852, "learning_rate": 4.906390398531339e-06, "loss": 0.8908, "step": 14369 }, { "epoch": 0.5208220071762532, "grad_norm": 2.550028848976231, "learning_rate": 4.905803573704953e-06, "loss": 0.9058, "step": 14370 }, { "epoch": 0.5208582508789098, "grad_norm": 2.228122241621395, "learning_rate": 4.905216750176538e-06, "loss": 0.8455, "step": 14371 }, { "epoch": 0.5208944945815664, "grad_norm": 2.3576150706865064, "learning_rate": 4.904629927954174e-06, "loss": 0.9348, "step": 14372 }, { "epoch": 0.5209307382842231, "grad_norm": 2.2936913809188018, "learning_rate": 4.904043107045956e-06, "loss": 0.7123, "step": 14373 }, { "epoch": 0.5209669819868797, "grad_norm": 2.497704536689406, "learning_rate": 4.903456287459966e-06, "loss": 0.9184, "step": 14374 }, { "epoch": 0.5210032256895365, "grad_norm": 2.1898363624406536, "learning_rate": 4.90286946920429e-06, "loss": 0.8739, "step": 14375 }, { "epoch": 0.5210394693921931, "grad_norm": 2.1388899281328033, "learning_rate": 4.902282652287013e-06, "loss": 0.8684, "step": 14376 }, { "epoch": 0.5210757130948498, "grad_norm": 2.6274263842917627, "learning_rate": 4.901695836716222e-06, "loss": 1.1809, "step": 14377 }, { "epoch": 0.5211119567975064, "grad_norm": 2.492561217063335, "learning_rate": 4.901109022500004e-06, "loss": 0.8152, "step": 14378 }, { "epoch": 0.5211482005001631, "grad_norm": 2.3278663878318797, "learning_rate": 4.900522209646443e-06, "loss": 0.8517, "step": 14379 }, { "epoch": 0.5211844442028197, "grad_norm": 2.550524202242878, "learning_rate": 4.899935398163624e-06, "loss": 0.848, "step": 14380 }, { "epoch": 0.5212206879054764, "grad_norm": 2.277494135884801, "learning_rate": 4.899348588059637e-06, "loss": 1.2141, "step": 14381 }, { "epoch": 0.5212569316081331, "grad_norm": 2.3172526450406163, "learning_rate": 4.8987617793425655e-06, "loss": 1.0681, "step": 14382 }, { "epoch": 0.5212931753107898, "grad_norm": 2.5916369235376875, "learning_rate": 4.898174972020495e-06, "loss": 0.9552, "step": 14383 }, { "epoch": 0.5213294190134464, "grad_norm": 2.483266207178151, "learning_rate": 4.897588166101511e-06, "loss": 0.8661, "step": 14384 }, { "epoch": 0.5213656627161031, "grad_norm": 2.239461243017372, "learning_rate": 4.8970013615937015e-06, "loss": 0.7158, "step": 14385 }, { "epoch": 0.5214019064187597, "grad_norm": 2.3703823797406782, "learning_rate": 4.896414558505151e-06, "loss": 1.0861, "step": 14386 }, { "epoch": 0.5214381501214164, "grad_norm": 2.4078294520923897, "learning_rate": 4.895827756843945e-06, "loss": 1.0102, "step": 14387 }, { "epoch": 0.521474393824073, "grad_norm": 2.0466119839635417, "learning_rate": 4.895240956618167e-06, "loss": 0.8739, "step": 14388 }, { "epoch": 0.5215106375267298, "grad_norm": 2.5270026182103456, "learning_rate": 4.894654157835908e-06, "loss": 0.8276, "step": 14389 }, { "epoch": 0.5215468812293864, "grad_norm": 2.3035298091694725, "learning_rate": 4.8940673605052505e-06, "loss": 0.8879, "step": 14390 }, { "epoch": 0.5215831249320431, "grad_norm": 2.5836292711987063, "learning_rate": 4.893480564634281e-06, "loss": 1.1014, "step": 14391 }, { "epoch": 0.5216193686346997, "grad_norm": 2.5636332921843428, "learning_rate": 4.892893770231084e-06, "loss": 0.9093, "step": 14392 }, { "epoch": 0.5216556123373564, "grad_norm": 2.6371699849552983, "learning_rate": 4.892306977303748e-06, "loss": 0.9506, "step": 14393 }, { "epoch": 0.521691856040013, "grad_norm": 2.295328524812903, "learning_rate": 4.891720185860355e-06, "loss": 0.8055, "step": 14394 }, { "epoch": 0.5217280997426698, "grad_norm": 2.2206266015421696, "learning_rate": 4.891133395908993e-06, "loss": 0.7944, "step": 14395 }, { "epoch": 0.5217643434453264, "grad_norm": 2.3926839735319256, "learning_rate": 4.8905466074577444e-06, "loss": 0.8831, "step": 14396 }, { "epoch": 0.521800587147983, "grad_norm": 2.5214300186186893, "learning_rate": 4.8899598205147005e-06, "loss": 0.8469, "step": 14397 }, { "epoch": 0.5218368308506397, "grad_norm": 2.258249447833562, "learning_rate": 4.889373035087943e-06, "loss": 0.756, "step": 14398 }, { "epoch": 0.5218730745532963, "grad_norm": 2.530239737987764, "learning_rate": 4.8887862511855585e-06, "loss": 0.9855, "step": 14399 }, { "epoch": 0.521909318255953, "grad_norm": 2.78696678558298, "learning_rate": 4.8881994688156294e-06, "loss": 0.9477, "step": 14400 }, { "epoch": 0.5219455619586096, "grad_norm": 2.1863456155300782, "learning_rate": 4.887612687986247e-06, "loss": 0.8558, "step": 14401 }, { "epoch": 0.5219818056612664, "grad_norm": 2.080980029330357, "learning_rate": 4.887025908705493e-06, "loss": 0.7886, "step": 14402 }, { "epoch": 0.522018049363923, "grad_norm": 2.517830669029936, "learning_rate": 4.886439130981454e-06, "loss": 0.8708, "step": 14403 }, { "epoch": 0.5220542930665797, "grad_norm": 2.4254806859063454, "learning_rate": 4.885852354822215e-06, "loss": 0.9739, "step": 14404 }, { "epoch": 0.5220905367692363, "grad_norm": 2.296630905209192, "learning_rate": 4.885265580235861e-06, "loss": 0.7727, "step": 14405 }, { "epoch": 0.522126780471893, "grad_norm": 2.538556835574727, "learning_rate": 4.884678807230479e-06, "loss": 0.9516, "step": 14406 }, { "epoch": 0.5221630241745496, "grad_norm": 2.362701509537404, "learning_rate": 4.884092035814153e-06, "loss": 0.8935, "step": 14407 }, { "epoch": 0.5221992678772064, "grad_norm": 2.309685973430335, "learning_rate": 4.883505265994966e-06, "loss": 1.1745, "step": 14408 }, { "epoch": 0.522235511579863, "grad_norm": 2.1343917393858516, "learning_rate": 4.882918497781009e-06, "loss": 0.9134, "step": 14409 }, { "epoch": 0.5222717552825197, "grad_norm": 2.537570956187119, "learning_rate": 4.882331731180364e-06, "loss": 0.782, "step": 14410 }, { "epoch": 0.5223079989851763, "grad_norm": 2.4767796913010782, "learning_rate": 4.881744966201116e-06, "loss": 1.0507, "step": 14411 }, { "epoch": 0.522344242687833, "grad_norm": 2.342820867850111, "learning_rate": 4.881158202851351e-06, "loss": 1.13, "step": 14412 }, { "epoch": 0.5223804863904896, "grad_norm": 2.379568371311947, "learning_rate": 4.8805714411391535e-06, "loss": 0.6748, "step": 14413 }, { "epoch": 0.5224167300931463, "grad_norm": 2.5218086844857073, "learning_rate": 4.879984681072611e-06, "loss": 0.8426, "step": 14414 }, { "epoch": 0.522452973795803, "grad_norm": 2.4888204471558057, "learning_rate": 4.879397922659803e-06, "loss": 0.8616, "step": 14415 }, { "epoch": 0.5224892174984597, "grad_norm": 2.184358096528996, "learning_rate": 4.878811165908822e-06, "loss": 0.9001, "step": 14416 }, { "epoch": 0.5225254612011163, "grad_norm": 2.531807205857388, "learning_rate": 4.87822441082775e-06, "loss": 0.826, "step": 14417 }, { "epoch": 0.522561704903773, "grad_norm": 2.4046237346031236, "learning_rate": 4.877637657424671e-06, "loss": 0.8248, "step": 14418 }, { "epoch": 0.5225979486064296, "grad_norm": 2.378781039733528, "learning_rate": 4.87705090570767e-06, "loss": 0.9407, "step": 14419 }, { "epoch": 0.5226341923090863, "grad_norm": 2.3652355663148827, "learning_rate": 4.876464155684835e-06, "loss": 1.1264, "step": 14420 }, { "epoch": 0.522670436011743, "grad_norm": 2.4442857996324343, "learning_rate": 4.875877407364249e-06, "loss": 0.9121, "step": 14421 }, { "epoch": 0.5227066797143997, "grad_norm": 2.0702829365580766, "learning_rate": 4.8752906607539975e-06, "loss": 0.8812, "step": 14422 }, { "epoch": 0.5227429234170563, "grad_norm": 2.597723120155602, "learning_rate": 4.874703915862164e-06, "loss": 0.9973, "step": 14423 }, { "epoch": 0.522779167119713, "grad_norm": 2.4033836353594706, "learning_rate": 4.874117172696836e-06, "loss": 1.0259, "step": 14424 }, { "epoch": 0.5228154108223696, "grad_norm": 2.222709880404161, "learning_rate": 4.873530431266097e-06, "loss": 0.856, "step": 14425 }, { "epoch": 0.5228516545250262, "grad_norm": 2.2898565366938297, "learning_rate": 4.872943691578033e-06, "loss": 1.0835, "step": 14426 }, { "epoch": 0.5228878982276829, "grad_norm": 2.0963492681982983, "learning_rate": 4.872356953640725e-06, "loss": 0.8337, "step": 14427 }, { "epoch": 0.5229241419303396, "grad_norm": 2.5553410941757777, "learning_rate": 4.8717702174622635e-06, "loss": 0.9187, "step": 14428 }, { "epoch": 0.5229603856329963, "grad_norm": 2.398219138293455, "learning_rate": 4.87118348305073e-06, "loss": 0.7274, "step": 14429 }, { "epoch": 0.5229966293356529, "grad_norm": 1.990485884203292, "learning_rate": 4.870596750414212e-06, "loss": 0.6899, "step": 14430 }, { "epoch": 0.5230328730383096, "grad_norm": 1.9692759582151012, "learning_rate": 4.87001001956079e-06, "loss": 0.9721, "step": 14431 }, { "epoch": 0.5230691167409662, "grad_norm": 2.2686635527952825, "learning_rate": 4.8694232904985525e-06, "loss": 0.7889, "step": 14432 }, { "epoch": 0.5231053604436229, "grad_norm": 2.4703213312475936, "learning_rate": 4.868836563235584e-06, "loss": 1.0139, "step": 14433 }, { "epoch": 0.5231416041462796, "grad_norm": 2.643921893887973, "learning_rate": 4.868249837779968e-06, "loss": 1.0285, "step": 14434 }, { "epoch": 0.5231778478489363, "grad_norm": 2.289972086759855, "learning_rate": 4.8676631141397865e-06, "loss": 0.9087, "step": 14435 }, { "epoch": 0.5232140915515929, "grad_norm": 2.622598342471225, "learning_rate": 4.86707639232313e-06, "loss": 1.0149, "step": 14436 }, { "epoch": 0.5232503352542496, "grad_norm": 2.3343713738203316, "learning_rate": 4.86648967233808e-06, "loss": 0.8278, "step": 14437 }, { "epoch": 0.5232865789569062, "grad_norm": 2.16452875260967, "learning_rate": 4.865902954192722e-06, "loss": 0.8047, "step": 14438 }, { "epoch": 0.5233228226595629, "grad_norm": 2.288669179322053, "learning_rate": 4.865316237895139e-06, "loss": 0.8601, "step": 14439 }, { "epoch": 0.5233590663622195, "grad_norm": 2.2057442732521877, "learning_rate": 4.864729523453417e-06, "loss": 0.7751, "step": 14440 }, { "epoch": 0.5233953100648763, "grad_norm": 2.2227169340883606, "learning_rate": 4.864142810875641e-06, "loss": 0.6834, "step": 14441 }, { "epoch": 0.5234315537675329, "grad_norm": 2.3154060318865426, "learning_rate": 4.863556100169896e-06, "loss": 0.8513, "step": 14442 }, { "epoch": 0.5234677974701896, "grad_norm": 2.3136368095040503, "learning_rate": 4.862969391344264e-06, "loss": 1.0486, "step": 14443 }, { "epoch": 0.5235040411728462, "grad_norm": 2.4720545756565238, "learning_rate": 4.862382684406831e-06, "loss": 1.0028, "step": 14444 }, { "epoch": 0.5235402848755029, "grad_norm": 2.0612391524625253, "learning_rate": 4.8617959793656824e-06, "loss": 0.78, "step": 14445 }, { "epoch": 0.5235765285781595, "grad_norm": 2.207919863054136, "learning_rate": 4.861209276228901e-06, "loss": 1.0264, "step": 14446 }, { "epoch": 0.5236127722808163, "grad_norm": 2.32666458451896, "learning_rate": 4.86062257500457e-06, "loss": 0.918, "step": 14447 }, { "epoch": 0.5236490159834729, "grad_norm": 2.49519928628186, "learning_rate": 4.860035875700777e-06, "loss": 0.9428, "step": 14448 }, { "epoch": 0.5236852596861296, "grad_norm": 2.196271308086045, "learning_rate": 4.859449178325606e-06, "loss": 0.8448, "step": 14449 }, { "epoch": 0.5237215033887862, "grad_norm": 2.330753614634883, "learning_rate": 4.85886248288714e-06, "loss": 0.9337, "step": 14450 }, { "epoch": 0.5237577470914428, "grad_norm": 2.4384440421279834, "learning_rate": 4.858275789393463e-06, "loss": 0.9809, "step": 14451 }, { "epoch": 0.5237939907940995, "grad_norm": 2.2756496258309467, "learning_rate": 4.8576890978526605e-06, "loss": 0.9453, "step": 14452 }, { "epoch": 0.5238302344967561, "grad_norm": 2.2123049004245945, "learning_rate": 4.857102408272816e-06, "loss": 0.9947, "step": 14453 }, { "epoch": 0.5238664781994129, "grad_norm": 2.1409692881074944, "learning_rate": 4.856515720662014e-06, "loss": 0.8565, "step": 14454 }, { "epoch": 0.5239027219020695, "grad_norm": 2.061934696426614, "learning_rate": 4.855929035028336e-06, "loss": 0.8685, "step": 14455 }, { "epoch": 0.5239389656047262, "grad_norm": 2.3153011246598476, "learning_rate": 4.855342351379872e-06, "loss": 1.0661, "step": 14456 }, { "epoch": 0.5239752093073828, "grad_norm": 2.167545581940757, "learning_rate": 4.8547556697247024e-06, "loss": 0.7611, "step": 14457 }, { "epoch": 0.5240114530100395, "grad_norm": 2.4820715323553384, "learning_rate": 4.8541689900709115e-06, "loss": 0.8945, "step": 14458 }, { "epoch": 0.5240476967126961, "grad_norm": 2.657651327322043, "learning_rate": 4.853582312426582e-06, "loss": 1.0006, "step": 14459 }, { "epoch": 0.5240839404153528, "grad_norm": 2.532108976520636, "learning_rate": 4.852995636799801e-06, "loss": 1.0216, "step": 14460 }, { "epoch": 0.5241201841180095, "grad_norm": 2.4065330329544135, "learning_rate": 4.852408963198652e-06, "loss": 0.833, "step": 14461 }, { "epoch": 0.5241564278206662, "grad_norm": 2.6191478265573593, "learning_rate": 4.851822291631219e-06, "loss": 0.9066, "step": 14462 }, { "epoch": 0.5241926715233228, "grad_norm": 2.5211840475044554, "learning_rate": 4.851235622105582e-06, "loss": 0.8208, "step": 14463 }, { "epoch": 0.5242289152259795, "grad_norm": 2.391756818007987, "learning_rate": 4.850648954629831e-06, "loss": 0.8921, "step": 14464 }, { "epoch": 0.5242651589286361, "grad_norm": 2.6525189180090316, "learning_rate": 4.8500622892120465e-06, "loss": 1.0518, "step": 14465 }, { "epoch": 0.5243014026312928, "grad_norm": 2.0871990485421215, "learning_rate": 4.8494756258603135e-06, "loss": 0.7829, "step": 14466 }, { "epoch": 0.5243376463339495, "grad_norm": 2.2108815776377817, "learning_rate": 4.8488889645827126e-06, "loss": 0.7582, "step": 14467 }, { "epoch": 0.5243738900366062, "grad_norm": 2.3209136229731335, "learning_rate": 4.848302305387332e-06, "loss": 0.8198, "step": 14468 }, { "epoch": 0.5244101337392628, "grad_norm": 2.5054230368945714, "learning_rate": 4.847715648282255e-06, "loss": 0.9381, "step": 14469 }, { "epoch": 0.5244463774419195, "grad_norm": 2.3978819635440476, "learning_rate": 4.847128993275564e-06, "loss": 1.0359, "step": 14470 }, { "epoch": 0.5244826211445761, "grad_norm": 2.4087883786644775, "learning_rate": 4.8465423403753414e-06, "loss": 0.9588, "step": 14471 }, { "epoch": 0.5245188648472328, "grad_norm": 2.2591971215382234, "learning_rate": 4.845955689589674e-06, "loss": 0.8976, "step": 14472 }, { "epoch": 0.5245551085498894, "grad_norm": 2.8985287513399456, "learning_rate": 4.8453690409266445e-06, "loss": 1.1101, "step": 14473 }, { "epoch": 0.5245913522525462, "grad_norm": 2.4909523373666986, "learning_rate": 4.844782394394335e-06, "loss": 0.8373, "step": 14474 }, { "epoch": 0.5246275959552028, "grad_norm": 2.252666748117036, "learning_rate": 4.844195750000829e-06, "loss": 1.0761, "step": 14475 }, { "epoch": 0.5246638396578595, "grad_norm": 2.4665177644633594, "learning_rate": 4.843609107754213e-06, "loss": 0.6511, "step": 14476 }, { "epoch": 0.5247000833605161, "grad_norm": 2.4208483410512636, "learning_rate": 4.843022467662569e-06, "loss": 1.1319, "step": 14477 }, { "epoch": 0.5247363270631727, "grad_norm": 2.4887241641227607, "learning_rate": 4.842435829733981e-06, "loss": 0.9553, "step": 14478 }, { "epoch": 0.5247725707658294, "grad_norm": 2.5152813225249573, "learning_rate": 4.84184919397653e-06, "loss": 0.825, "step": 14479 }, { "epoch": 0.5248088144684862, "grad_norm": 2.1404437001743872, "learning_rate": 4.841262560398303e-06, "loss": 0.7847, "step": 14480 }, { "epoch": 0.5248450581711428, "grad_norm": 2.5052397446659485, "learning_rate": 4.840675929007382e-06, "loss": 0.837, "step": 14481 }, { "epoch": 0.5248813018737994, "grad_norm": 2.2213131784003566, "learning_rate": 4.840089299811849e-06, "loss": 0.8348, "step": 14482 }, { "epoch": 0.5249175455764561, "grad_norm": 2.809132644419528, "learning_rate": 4.839502672819789e-06, "loss": 1.0368, "step": 14483 }, { "epoch": 0.5249537892791127, "grad_norm": 2.1235789453640335, "learning_rate": 4.838916048039286e-06, "loss": 0.9025, "step": 14484 }, { "epoch": 0.5249900329817694, "grad_norm": 2.347608871210303, "learning_rate": 4.838329425478422e-06, "loss": 1.0299, "step": 14485 }, { "epoch": 0.525026276684426, "grad_norm": 2.538354821293393, "learning_rate": 4.837742805145282e-06, "loss": 0.9019, "step": 14486 }, { "epoch": 0.5250625203870828, "grad_norm": 2.5015837088912285, "learning_rate": 4.837156187047945e-06, "loss": 0.8784, "step": 14487 }, { "epoch": 0.5250987640897394, "grad_norm": 2.5998721402318803, "learning_rate": 4.836569571194499e-06, "loss": 0.9872, "step": 14488 }, { "epoch": 0.5251350077923961, "grad_norm": 2.4669936623698026, "learning_rate": 4.835982957593026e-06, "loss": 0.9616, "step": 14489 }, { "epoch": 0.5251712514950527, "grad_norm": 2.4232311155113333, "learning_rate": 4.835396346251609e-06, "loss": 0.9985, "step": 14490 }, { "epoch": 0.5252074951977094, "grad_norm": 2.393944133946011, "learning_rate": 4.834809737178329e-06, "loss": 0.8464, "step": 14491 }, { "epoch": 0.525243738900366, "grad_norm": 2.4726564306780183, "learning_rate": 4.8342231303812725e-06, "loss": 0.896, "step": 14492 }, { "epoch": 0.5252799826030228, "grad_norm": 2.2383784844981545, "learning_rate": 4.8336365258685205e-06, "loss": 0.8778, "step": 14493 }, { "epoch": 0.5253162263056794, "grad_norm": 2.7184320268208304, "learning_rate": 4.833049923648157e-06, "loss": 0.9197, "step": 14494 }, { "epoch": 0.5253524700083361, "grad_norm": 2.2529260485808384, "learning_rate": 4.832463323728263e-06, "loss": 1.0617, "step": 14495 }, { "epoch": 0.5253887137109927, "grad_norm": 2.451987260726036, "learning_rate": 4.831876726116924e-06, "loss": 0.9644, "step": 14496 }, { "epoch": 0.5254249574136494, "grad_norm": 2.484118057392161, "learning_rate": 4.8312901308222236e-06, "loss": 0.887, "step": 14497 }, { "epoch": 0.525461201116306, "grad_norm": 2.459284981955665, "learning_rate": 4.83070353785224e-06, "loss": 0.9754, "step": 14498 }, { "epoch": 0.5254974448189627, "grad_norm": 2.4624597468094493, "learning_rate": 4.830116947215062e-06, "loss": 0.8674, "step": 14499 }, { "epoch": 0.5255336885216194, "grad_norm": 2.327605189687891, "learning_rate": 4.829530358918769e-06, "loss": 0.8403, "step": 14500 }, { "epoch": 0.5255699322242761, "grad_norm": 2.123258140045257, "learning_rate": 4.828943772971445e-06, "loss": 1.0455, "step": 14501 }, { "epoch": 0.5256061759269327, "grad_norm": 2.4384872668090947, "learning_rate": 4.82835718938117e-06, "loss": 1.0571, "step": 14502 }, { "epoch": 0.5256424196295894, "grad_norm": 1.9688633985341135, "learning_rate": 4.827770608156031e-06, "loss": 0.8121, "step": 14503 }, { "epoch": 0.525678663332246, "grad_norm": 2.3774059358979716, "learning_rate": 4.827184029304109e-06, "loss": 0.8496, "step": 14504 }, { "epoch": 0.5257149070349026, "grad_norm": 2.431978097057484, "learning_rate": 4.8265974528334865e-06, "loss": 0.8292, "step": 14505 }, { "epoch": 0.5257511507375594, "grad_norm": 2.2954829059068147, "learning_rate": 4.826010878752244e-06, "loss": 0.9625, "step": 14506 }, { "epoch": 0.525787394440216, "grad_norm": 2.3720380364034015, "learning_rate": 4.825424307068469e-06, "loss": 0.8963, "step": 14507 }, { "epoch": 0.5258236381428727, "grad_norm": 2.4093910534463965, "learning_rate": 4.824837737790241e-06, "loss": 0.8499, "step": 14508 }, { "epoch": 0.5258598818455293, "grad_norm": 2.301087783568079, "learning_rate": 4.824251170925644e-06, "loss": 1.01, "step": 14509 }, { "epoch": 0.525896125548186, "grad_norm": 2.3461199853231265, "learning_rate": 4.823664606482758e-06, "loss": 0.8677, "step": 14510 }, { "epoch": 0.5259323692508426, "grad_norm": 2.4052419204555617, "learning_rate": 4.823078044469667e-06, "loss": 0.6843, "step": 14511 }, { "epoch": 0.5259686129534993, "grad_norm": 2.168754627112524, "learning_rate": 4.822491484894455e-06, "loss": 0.9058, "step": 14512 }, { "epoch": 0.526004856656156, "grad_norm": 2.397278462507834, "learning_rate": 4.821904927765202e-06, "loss": 1.0072, "step": 14513 }, { "epoch": 0.5260411003588127, "grad_norm": 2.429271956079689, "learning_rate": 4.82131837308999e-06, "loss": 0.8656, "step": 14514 }, { "epoch": 0.5260773440614693, "grad_norm": 2.514974497834975, "learning_rate": 4.8207318208769045e-06, "loss": 0.9407, "step": 14515 }, { "epoch": 0.526113587764126, "grad_norm": 2.3198021952362904, "learning_rate": 4.820145271134025e-06, "loss": 0.8317, "step": 14516 }, { "epoch": 0.5261498314667826, "grad_norm": 2.1285916640488862, "learning_rate": 4.819558723869437e-06, "loss": 0.9559, "step": 14517 }, { "epoch": 0.5261860751694393, "grad_norm": 2.522175211112658, "learning_rate": 4.818972179091219e-06, "loss": 0.8464, "step": 14518 }, { "epoch": 0.526222318872096, "grad_norm": 2.284071531350935, "learning_rate": 4.818385636807455e-06, "loss": 1.0943, "step": 14519 }, { "epoch": 0.5262585625747527, "grad_norm": 2.4262798396678726, "learning_rate": 4.817799097026228e-06, "loss": 0.8627, "step": 14520 }, { "epoch": 0.5262948062774093, "grad_norm": 2.435520229311886, "learning_rate": 4.817212559755618e-06, "loss": 0.7934, "step": 14521 }, { "epoch": 0.526331049980066, "grad_norm": 2.428207971446923, "learning_rate": 4.816626025003708e-06, "loss": 0.9111, "step": 14522 }, { "epoch": 0.5263672936827226, "grad_norm": 2.23590183238311, "learning_rate": 4.8160394927785815e-06, "loss": 0.7939, "step": 14523 }, { "epoch": 0.5264035373853793, "grad_norm": 2.1610591204527463, "learning_rate": 4.81545296308832e-06, "loss": 0.8498, "step": 14524 }, { "epoch": 0.5264397810880359, "grad_norm": 2.466311629759377, "learning_rate": 4.814866435941005e-06, "loss": 1.176, "step": 14525 }, { "epoch": 0.5264760247906927, "grad_norm": 2.706513526189649, "learning_rate": 4.814279911344716e-06, "loss": 1.0391, "step": 14526 }, { "epoch": 0.5265122684933493, "grad_norm": 2.1736079331261524, "learning_rate": 4.81369338930754e-06, "loss": 0.8641, "step": 14527 }, { "epoch": 0.526548512196006, "grad_norm": 2.246818161637913, "learning_rate": 4.813106869837556e-06, "loss": 0.8802, "step": 14528 }, { "epoch": 0.5265847558986626, "grad_norm": 2.403676047062153, "learning_rate": 4.812520352942847e-06, "loss": 0.7878, "step": 14529 }, { "epoch": 0.5266209996013193, "grad_norm": 2.2919044929270846, "learning_rate": 4.8119338386314925e-06, "loss": 0.9355, "step": 14530 }, { "epoch": 0.5266572433039759, "grad_norm": 2.248206079189635, "learning_rate": 4.811347326911578e-06, "loss": 0.8904, "step": 14531 }, { "epoch": 0.5266934870066325, "grad_norm": 2.1745357809170893, "learning_rate": 4.8107608177911815e-06, "loss": 0.8294, "step": 14532 }, { "epoch": 0.5267297307092893, "grad_norm": 2.292335479632923, "learning_rate": 4.810174311278387e-06, "loss": 0.966, "step": 14533 }, { "epoch": 0.526765974411946, "grad_norm": 2.2400922856793763, "learning_rate": 4.809587807381275e-06, "loss": 1.0703, "step": 14534 }, { "epoch": 0.5268022181146026, "grad_norm": 2.6360409803905096, "learning_rate": 4.809001306107928e-06, "loss": 1.0191, "step": 14535 }, { "epoch": 0.5268384618172592, "grad_norm": 2.333194798342903, "learning_rate": 4.808414807466429e-06, "loss": 0.926, "step": 14536 }, { "epoch": 0.5268747055199159, "grad_norm": 2.25687648808372, "learning_rate": 4.807828311464858e-06, "loss": 1.0325, "step": 14537 }, { "epoch": 0.5269109492225725, "grad_norm": 2.434730257883377, "learning_rate": 4.807241818111295e-06, "loss": 0.9571, "step": 14538 }, { "epoch": 0.5269471929252293, "grad_norm": 2.4071876359687794, "learning_rate": 4.806655327413825e-06, "loss": 0.7963, "step": 14539 }, { "epoch": 0.5269834366278859, "grad_norm": 2.6794320366882194, "learning_rate": 4.806068839380527e-06, "loss": 0.8096, "step": 14540 }, { "epoch": 0.5270196803305426, "grad_norm": 2.295877043685994, "learning_rate": 4.805482354019483e-06, "loss": 1.0196, "step": 14541 }, { "epoch": 0.5270559240331992, "grad_norm": 2.403975500402365, "learning_rate": 4.804895871338773e-06, "loss": 0.9227, "step": 14542 }, { "epoch": 0.5270921677358559, "grad_norm": 2.11499331491376, "learning_rate": 4.804309391346481e-06, "loss": 0.8443, "step": 14543 }, { "epoch": 0.5271284114385125, "grad_norm": 2.2153476849987417, "learning_rate": 4.803722914050688e-06, "loss": 1.0409, "step": 14544 }, { "epoch": 0.5271646551411692, "grad_norm": 2.697314063826776, "learning_rate": 4.803136439459474e-06, "loss": 0.8281, "step": 14545 }, { "epoch": 0.5272008988438259, "grad_norm": 2.3325671061449933, "learning_rate": 4.80254996758092e-06, "loss": 0.9827, "step": 14546 }, { "epoch": 0.5272371425464826, "grad_norm": 2.21181990682194, "learning_rate": 4.801963498423109e-06, "loss": 0.9361, "step": 14547 }, { "epoch": 0.5272733862491392, "grad_norm": 2.2042528411812867, "learning_rate": 4.801377031994121e-06, "loss": 0.9621, "step": 14548 }, { "epoch": 0.5273096299517959, "grad_norm": 2.681458793453666, "learning_rate": 4.800790568302038e-06, "loss": 0.7631, "step": 14549 }, { "epoch": 0.5273458736544525, "grad_norm": 2.2670585517461337, "learning_rate": 4.800204107354939e-06, "loss": 0.8749, "step": 14550 }, { "epoch": 0.5273821173571092, "grad_norm": 2.654041826627427, "learning_rate": 4.799617649160908e-06, "loss": 0.9314, "step": 14551 }, { "epoch": 0.5274183610597659, "grad_norm": 2.445910694747756, "learning_rate": 4.799031193728025e-06, "loss": 0.9119, "step": 14552 }, { "epoch": 0.5274546047624226, "grad_norm": 2.328025565515379, "learning_rate": 4.7984447410643695e-06, "loss": 0.9017, "step": 14553 }, { "epoch": 0.5274908484650792, "grad_norm": 2.235314016250899, "learning_rate": 4.797858291178022e-06, "loss": 0.6759, "step": 14554 }, { "epoch": 0.5275270921677359, "grad_norm": 2.241520763223795, "learning_rate": 4.797271844077067e-06, "loss": 0.8338, "step": 14555 }, { "epoch": 0.5275633358703925, "grad_norm": 2.1523014808611363, "learning_rate": 4.796685399769584e-06, "loss": 0.8747, "step": 14556 }, { "epoch": 0.5275995795730491, "grad_norm": 2.612334420617527, "learning_rate": 4.796098958263652e-06, "loss": 0.9241, "step": 14557 }, { "epoch": 0.5276358232757058, "grad_norm": 2.0348567841690577, "learning_rate": 4.795512519567353e-06, "loss": 0.9196, "step": 14558 }, { "epoch": 0.5276720669783626, "grad_norm": 2.505581345223791, "learning_rate": 4.794926083688769e-06, "loss": 0.7733, "step": 14559 }, { "epoch": 0.5277083106810192, "grad_norm": 2.398647581168274, "learning_rate": 4.794339650635979e-06, "loss": 0.7987, "step": 14560 }, { "epoch": 0.5277445543836758, "grad_norm": 2.522959614321977, "learning_rate": 4.7937532204170645e-06, "loss": 0.8987, "step": 14561 }, { "epoch": 0.5277807980863325, "grad_norm": 2.1697687414139555, "learning_rate": 4.793166793040104e-06, "loss": 0.8973, "step": 14562 }, { "epoch": 0.5278170417889891, "grad_norm": 2.1101602685410024, "learning_rate": 4.792580368513181e-06, "loss": 0.9218, "step": 14563 }, { "epoch": 0.5278532854916458, "grad_norm": 2.5388414434502287, "learning_rate": 4.7919939468443765e-06, "loss": 0.8298, "step": 14564 }, { "epoch": 0.5278895291943025, "grad_norm": 2.360145962554131, "learning_rate": 4.791407528041769e-06, "loss": 1.0029, "step": 14565 }, { "epoch": 0.5279257728969592, "grad_norm": 2.4012983127306025, "learning_rate": 4.790821112113438e-06, "loss": 0.9364, "step": 14566 }, { "epoch": 0.5279620165996158, "grad_norm": 2.1457170470614546, "learning_rate": 4.790234699067466e-06, "loss": 0.9668, "step": 14567 }, { "epoch": 0.5279982603022725, "grad_norm": 2.187640199535646, "learning_rate": 4.789648288911935e-06, "loss": 0.9226, "step": 14568 }, { "epoch": 0.5280345040049291, "grad_norm": 2.3718466238283575, "learning_rate": 4.789061881654922e-06, "loss": 0.9324, "step": 14569 }, { "epoch": 0.5280707477075858, "grad_norm": 2.54312838389608, "learning_rate": 4.788475477304508e-06, "loss": 0.8745, "step": 14570 }, { "epoch": 0.5281069914102424, "grad_norm": 2.381115021961478, "learning_rate": 4.787889075868776e-06, "loss": 0.9, "step": 14571 }, { "epoch": 0.5281432351128992, "grad_norm": 10.42034369590416, "learning_rate": 4.787302677355803e-06, "loss": 0.9981, "step": 14572 }, { "epoch": 0.5281794788155558, "grad_norm": 2.780760249954231, "learning_rate": 4.786716281773671e-06, "loss": 0.8978, "step": 14573 }, { "epoch": 0.5282157225182125, "grad_norm": 2.484618099293958, "learning_rate": 4.786129889130458e-06, "loss": 0.8968, "step": 14574 }, { "epoch": 0.5282519662208691, "grad_norm": 2.11082438087428, "learning_rate": 4.785543499434246e-06, "loss": 0.8957, "step": 14575 }, { "epoch": 0.5282882099235258, "grad_norm": 2.410777805973936, "learning_rate": 4.784957112693117e-06, "loss": 0.8233, "step": 14576 }, { "epoch": 0.5283244536261824, "grad_norm": 2.020720810267847, "learning_rate": 4.784370728915147e-06, "loss": 0.8132, "step": 14577 }, { "epoch": 0.5283606973288392, "grad_norm": 2.066829679302168, "learning_rate": 4.783784348108419e-06, "loss": 0.6899, "step": 14578 }, { "epoch": 0.5283969410314958, "grad_norm": 2.1472215482019257, "learning_rate": 4.783197970281012e-06, "loss": 0.6336, "step": 14579 }, { "epoch": 0.5284331847341525, "grad_norm": 2.4345162511290246, "learning_rate": 4.782611595441006e-06, "loss": 0.8576, "step": 14580 }, { "epoch": 0.5284694284368091, "grad_norm": 1.933432552283013, "learning_rate": 4.782025223596479e-06, "loss": 0.6099, "step": 14581 }, { "epoch": 0.5285056721394658, "grad_norm": 1.9194823705176416, "learning_rate": 4.781438854755514e-06, "loss": 0.8837, "step": 14582 }, { "epoch": 0.5285419158421224, "grad_norm": 2.4150629695808656, "learning_rate": 4.780852488926189e-06, "loss": 0.9022, "step": 14583 }, { "epoch": 0.528578159544779, "grad_norm": 2.358221415954772, "learning_rate": 4.7802661261165856e-06, "loss": 0.8686, "step": 14584 }, { "epoch": 0.5286144032474358, "grad_norm": 2.1237444603030053, "learning_rate": 4.77967976633478e-06, "loss": 1.0357, "step": 14585 }, { "epoch": 0.5286506469500925, "grad_norm": 2.54605270939765, "learning_rate": 4.7790934095888554e-06, "loss": 0.9358, "step": 14586 }, { "epoch": 0.5286868906527491, "grad_norm": 2.4816296197291896, "learning_rate": 4.778507055886889e-06, "loss": 0.9914, "step": 14587 }, { "epoch": 0.5287231343554057, "grad_norm": 2.1108245194168447, "learning_rate": 4.777920705236963e-06, "loss": 1.0501, "step": 14588 }, { "epoch": 0.5287593780580624, "grad_norm": 2.3533225651374536, "learning_rate": 4.777334357647153e-06, "loss": 0.8673, "step": 14589 }, { "epoch": 0.528795621760719, "grad_norm": 2.6133375840187543, "learning_rate": 4.776748013125542e-06, "loss": 0.8726, "step": 14590 }, { "epoch": 0.5288318654633757, "grad_norm": 2.1050913932442885, "learning_rate": 4.776161671680209e-06, "loss": 0.8898, "step": 14591 }, { "epoch": 0.5288681091660324, "grad_norm": 2.0576435251914957, "learning_rate": 4.775575333319233e-06, "loss": 0.8354, "step": 14592 }, { "epoch": 0.5289043528686891, "grad_norm": 2.5337648614975765, "learning_rate": 4.77498899805069e-06, "loss": 0.9498, "step": 14593 }, { "epoch": 0.5289405965713457, "grad_norm": 2.5586517959459956, "learning_rate": 4.774402665882666e-06, "loss": 0.8572, "step": 14594 }, { "epoch": 0.5289768402740024, "grad_norm": 2.339834485776401, "learning_rate": 4.773816336823236e-06, "loss": 1.1107, "step": 14595 }, { "epoch": 0.529013083976659, "grad_norm": 2.3324615905666066, "learning_rate": 4.77323001088048e-06, "loss": 0.9408, "step": 14596 }, { "epoch": 0.5290493276793157, "grad_norm": 2.223697789239143, "learning_rate": 4.772643688062477e-06, "loss": 0.8096, "step": 14597 }, { "epoch": 0.5290855713819724, "grad_norm": 2.4596415602101946, "learning_rate": 4.7720573683773065e-06, "loss": 0.9092, "step": 14598 }, { "epoch": 0.5291218150846291, "grad_norm": 2.6111418835934397, "learning_rate": 4.7714710518330485e-06, "loss": 0.7801, "step": 14599 }, { "epoch": 0.5291580587872857, "grad_norm": 2.2016061763540615, "learning_rate": 4.770884738437781e-06, "loss": 1.034, "step": 14600 }, { "epoch": 0.5291943024899424, "grad_norm": 2.4241432737690882, "learning_rate": 4.770298428199581e-06, "loss": 0.9191, "step": 14601 }, { "epoch": 0.529230546192599, "grad_norm": 2.389275520160856, "learning_rate": 4.769712121126532e-06, "loss": 0.8549, "step": 14602 }, { "epoch": 0.5292667898952557, "grad_norm": 2.3358044481295104, "learning_rate": 4.769125817226711e-06, "loss": 0.8485, "step": 14603 }, { "epoch": 0.5293030335979123, "grad_norm": 2.3078286180506016, "learning_rate": 4.768539516508196e-06, "loss": 0.7791, "step": 14604 }, { "epoch": 0.5293392773005691, "grad_norm": 2.133531945246966, "learning_rate": 4.767953218979065e-06, "loss": 0.9024, "step": 14605 }, { "epoch": 0.5293755210032257, "grad_norm": 2.2997332473636334, "learning_rate": 4.7673669246474e-06, "loss": 0.8218, "step": 14606 }, { "epoch": 0.5294117647058824, "grad_norm": 2.2701324120620088, "learning_rate": 4.766780633521279e-06, "loss": 0.9407, "step": 14607 }, { "epoch": 0.529448008408539, "grad_norm": 2.3032272333725707, "learning_rate": 4.766194345608778e-06, "loss": 0.9508, "step": 14608 }, { "epoch": 0.5294842521111957, "grad_norm": 2.417553822993956, "learning_rate": 4.765608060917977e-06, "loss": 1.0034, "step": 14609 }, { "epoch": 0.5295204958138523, "grad_norm": 2.294051234484525, "learning_rate": 4.765021779456956e-06, "loss": 0.7979, "step": 14610 }, { "epoch": 0.5295567395165091, "grad_norm": 2.4257306657120155, "learning_rate": 4.764435501233793e-06, "loss": 0.9169, "step": 14611 }, { "epoch": 0.5295929832191657, "grad_norm": 2.418095155103007, "learning_rate": 4.763849226256567e-06, "loss": 0.904, "step": 14612 }, { "epoch": 0.5296292269218223, "grad_norm": 2.566870029229033, "learning_rate": 4.763262954533354e-06, "loss": 0.8239, "step": 14613 }, { "epoch": 0.529665470624479, "grad_norm": 2.6232973813287654, "learning_rate": 4.7626766860722355e-06, "loss": 0.9301, "step": 14614 }, { "epoch": 0.5297017143271356, "grad_norm": 2.4259639262496804, "learning_rate": 4.762090420881289e-06, "loss": 0.9794, "step": 14615 }, { "epoch": 0.5297379580297923, "grad_norm": 2.091747540165067, "learning_rate": 4.7615041589685925e-06, "loss": 0.8207, "step": 14616 }, { "epoch": 0.5297742017324489, "grad_norm": 2.3331554167124016, "learning_rate": 4.760917900342223e-06, "loss": 0.8625, "step": 14617 }, { "epoch": 0.5298104454351057, "grad_norm": 2.3138339179208947, "learning_rate": 4.760331645010263e-06, "loss": 0.8768, "step": 14618 }, { "epoch": 0.5298466891377623, "grad_norm": 2.47022776136361, "learning_rate": 4.759745392980786e-06, "loss": 0.9402, "step": 14619 }, { "epoch": 0.529882932840419, "grad_norm": 2.387232439390014, "learning_rate": 4.759159144261873e-06, "loss": 0.7411, "step": 14620 }, { "epoch": 0.5299191765430756, "grad_norm": 2.2006598951529726, "learning_rate": 4.7585728988616e-06, "loss": 0.7688, "step": 14621 }, { "epoch": 0.5299554202457323, "grad_norm": 2.176351854975814, "learning_rate": 4.757986656788047e-06, "loss": 0.9384, "step": 14622 }, { "epoch": 0.5299916639483889, "grad_norm": 2.4709449631295506, "learning_rate": 4.757400418049293e-06, "loss": 1.1713, "step": 14623 }, { "epoch": 0.5300279076510457, "grad_norm": 2.2757850983963968, "learning_rate": 4.756814182653414e-06, "loss": 0.9374, "step": 14624 }, { "epoch": 0.5300641513537023, "grad_norm": 2.563461333044138, "learning_rate": 4.756227950608487e-06, "loss": 0.7934, "step": 14625 }, { "epoch": 0.530100395056359, "grad_norm": 2.407615039444616, "learning_rate": 4.755641721922592e-06, "loss": 0.7424, "step": 14626 }, { "epoch": 0.5301366387590156, "grad_norm": 2.357137297858506, "learning_rate": 4.755055496603807e-06, "loss": 0.9401, "step": 14627 }, { "epoch": 0.5301728824616723, "grad_norm": 2.3611225748950933, "learning_rate": 4.75446927466021e-06, "loss": 0.8426, "step": 14628 }, { "epoch": 0.5302091261643289, "grad_norm": 2.4695167224317096, "learning_rate": 4.753883056099875e-06, "loss": 1.0151, "step": 14629 }, { "epoch": 0.5302453698669856, "grad_norm": 2.5705170197829728, "learning_rate": 4.753296840930884e-06, "loss": 1.0819, "step": 14630 }, { "epoch": 0.5302816135696423, "grad_norm": 2.138099590287485, "learning_rate": 4.752710629161315e-06, "loss": 0.9968, "step": 14631 }, { "epoch": 0.530317857272299, "grad_norm": 2.5734759882219076, "learning_rate": 4.752124420799242e-06, "loss": 0.83, "step": 14632 }, { "epoch": 0.5303541009749556, "grad_norm": 2.2067578052570633, "learning_rate": 4.751538215852744e-06, "loss": 0.8236, "step": 14633 }, { "epoch": 0.5303903446776123, "grad_norm": 2.135033373599966, "learning_rate": 4.7509520143299e-06, "loss": 0.8962, "step": 14634 }, { "epoch": 0.5304265883802689, "grad_norm": 2.098518648473976, "learning_rate": 4.750365816238788e-06, "loss": 0.9214, "step": 14635 }, { "epoch": 0.5304628320829256, "grad_norm": 1.963388414300831, "learning_rate": 4.749779621587484e-06, "loss": 0.6194, "step": 14636 }, { "epoch": 0.5304990757855823, "grad_norm": 2.486089387685216, "learning_rate": 4.749193430384063e-06, "loss": 0.8224, "step": 14637 }, { "epoch": 0.530535319488239, "grad_norm": 2.3768856998222114, "learning_rate": 4.748607242636607e-06, "loss": 1.0036, "step": 14638 }, { "epoch": 0.5305715631908956, "grad_norm": 2.375930673100631, "learning_rate": 4.748021058353191e-06, "loss": 0.8244, "step": 14639 }, { "epoch": 0.5306078068935522, "grad_norm": 2.853745709530543, "learning_rate": 4.7474348775418926e-06, "loss": 0.7942, "step": 14640 }, { "epoch": 0.5306440505962089, "grad_norm": 2.37832761184846, "learning_rate": 4.746848700210786e-06, "loss": 0.9251, "step": 14641 }, { "epoch": 0.5306802942988655, "grad_norm": 1.9378034134833269, "learning_rate": 4.746262526367953e-06, "loss": 0.807, "step": 14642 }, { "epoch": 0.5307165380015222, "grad_norm": 2.689461047004436, "learning_rate": 4.74567635602147e-06, "loss": 0.9472, "step": 14643 }, { "epoch": 0.5307527817041789, "grad_norm": 2.166806943221328, "learning_rate": 4.745090189179413e-06, "loss": 0.871, "step": 14644 }, { "epoch": 0.5307890254068356, "grad_norm": 2.395101611034687, "learning_rate": 4.7445040258498575e-06, "loss": 0.9059, "step": 14645 }, { "epoch": 0.5308252691094922, "grad_norm": 2.4113720209261857, "learning_rate": 4.7439178660408836e-06, "loss": 0.8985, "step": 14646 }, { "epoch": 0.5308615128121489, "grad_norm": 2.2573060562507683, "learning_rate": 4.743331709760567e-06, "loss": 0.9549, "step": 14647 }, { "epoch": 0.5308977565148055, "grad_norm": 2.5672463615763323, "learning_rate": 4.7427455570169825e-06, "loss": 1.1004, "step": 14648 }, { "epoch": 0.5309340002174622, "grad_norm": 1.9858223388287721, "learning_rate": 4.7421594078182075e-06, "loss": 0.7975, "step": 14649 }, { "epoch": 0.5309702439201189, "grad_norm": 2.3776915912998624, "learning_rate": 4.741573262172322e-06, "loss": 0.8197, "step": 14650 }, { "epoch": 0.5310064876227756, "grad_norm": 2.6341880977284697, "learning_rate": 4.740987120087401e-06, "loss": 0.9178, "step": 14651 }, { "epoch": 0.5310427313254322, "grad_norm": 2.524012769725439, "learning_rate": 4.74040098157152e-06, "loss": 0.8136, "step": 14652 }, { "epoch": 0.5310789750280889, "grad_norm": 2.057115401157495, "learning_rate": 4.739814846632755e-06, "loss": 0.8647, "step": 14653 }, { "epoch": 0.5311152187307455, "grad_norm": 2.161887311946621, "learning_rate": 4.739228715279186e-06, "loss": 0.8936, "step": 14654 }, { "epoch": 0.5311514624334022, "grad_norm": 2.4065746464642026, "learning_rate": 4.7386425875188876e-06, "loss": 0.9866, "step": 14655 }, { "epoch": 0.5311877061360588, "grad_norm": 2.2622749329895084, "learning_rate": 4.738056463359937e-06, "loss": 0.8874, "step": 14656 }, { "epoch": 0.5312239498387156, "grad_norm": 2.7280936830868234, "learning_rate": 4.737470342810408e-06, "loss": 1.0184, "step": 14657 }, { "epoch": 0.5312601935413722, "grad_norm": 2.104056657458776, "learning_rate": 4.73688422587838e-06, "loss": 0.7551, "step": 14658 }, { "epoch": 0.5312964372440289, "grad_norm": 2.4687796995492985, "learning_rate": 4.736298112571929e-06, "loss": 0.7942, "step": 14659 }, { "epoch": 0.5313326809466855, "grad_norm": 2.4755921571751016, "learning_rate": 4.7357120028991275e-06, "loss": 1.0477, "step": 14660 }, { "epoch": 0.5313689246493422, "grad_norm": 2.334287216343327, "learning_rate": 4.735125896868057e-06, "loss": 1.0948, "step": 14661 }, { "epoch": 0.5314051683519988, "grad_norm": 2.499562544888578, "learning_rate": 4.734539794486792e-06, "loss": 0.9151, "step": 14662 }, { "epoch": 0.5314414120546554, "grad_norm": 2.495574264146785, "learning_rate": 4.733953695763407e-06, "loss": 0.953, "step": 14663 }, { "epoch": 0.5314776557573122, "grad_norm": 2.655411192351143, "learning_rate": 4.73336760070598e-06, "loss": 0.8471, "step": 14664 }, { "epoch": 0.5315138994599689, "grad_norm": 2.685657592080013, "learning_rate": 4.732781509322585e-06, "loss": 0.9228, "step": 14665 }, { "epoch": 0.5315501431626255, "grad_norm": 2.3071992495146305, "learning_rate": 4.732195421621301e-06, "loss": 0.8149, "step": 14666 }, { "epoch": 0.5315863868652821, "grad_norm": 2.4156649459545676, "learning_rate": 4.731609337610201e-06, "loss": 0.8108, "step": 14667 }, { "epoch": 0.5316226305679388, "grad_norm": 2.2012457703028168, "learning_rate": 4.73102325729736e-06, "loss": 0.912, "step": 14668 }, { "epoch": 0.5316588742705954, "grad_norm": 2.1878819055137275, "learning_rate": 4.730437180690858e-06, "loss": 0.8227, "step": 14669 }, { "epoch": 0.5316951179732522, "grad_norm": 2.2333510302054607, "learning_rate": 4.72985110779877e-06, "loss": 0.8148, "step": 14670 }, { "epoch": 0.5317313616759088, "grad_norm": 2.087553002238421, "learning_rate": 4.729265038629169e-06, "loss": 0.7096, "step": 14671 }, { "epoch": 0.5317676053785655, "grad_norm": 2.691868391937889, "learning_rate": 4.728678973190131e-06, "loss": 1.0653, "step": 14672 }, { "epoch": 0.5318038490812221, "grad_norm": 2.628560913752477, "learning_rate": 4.728092911489734e-06, "loss": 0.8713, "step": 14673 }, { "epoch": 0.5318400927838788, "grad_norm": 2.494519400109795, "learning_rate": 4.727506853536052e-06, "loss": 0.933, "step": 14674 }, { "epoch": 0.5318763364865354, "grad_norm": 2.72217208217422, "learning_rate": 4.726920799337162e-06, "loss": 0.8211, "step": 14675 }, { "epoch": 0.5319125801891921, "grad_norm": 2.6558753472127394, "learning_rate": 4.726334748901134e-06, "loss": 1.103, "step": 14676 }, { "epoch": 0.5319488238918488, "grad_norm": 2.225335203394591, "learning_rate": 4.725748702236051e-06, "loss": 0.8153, "step": 14677 }, { "epoch": 0.5319850675945055, "grad_norm": 2.2995972132510802, "learning_rate": 4.725162659349985e-06, "loss": 0.9096, "step": 14678 }, { "epoch": 0.5320213112971621, "grad_norm": 2.232459100710003, "learning_rate": 4.7245766202510105e-06, "loss": 0.8725, "step": 14679 }, { "epoch": 0.5320575549998188, "grad_norm": 2.5012135516924494, "learning_rate": 4.723990584947202e-06, "loss": 0.9296, "step": 14680 }, { "epoch": 0.5320937987024754, "grad_norm": 2.3864898358437268, "learning_rate": 4.723404553446639e-06, "loss": 0.8297, "step": 14681 }, { "epoch": 0.5321300424051321, "grad_norm": 2.403228456068857, "learning_rate": 4.722818525757394e-06, "loss": 0.8812, "step": 14682 }, { "epoch": 0.5321662861077888, "grad_norm": 2.430922913149853, "learning_rate": 4.722232501887542e-06, "loss": 0.9113, "step": 14683 }, { "epoch": 0.5322025298104455, "grad_norm": 2.3067413305493734, "learning_rate": 4.721646481845156e-06, "loss": 0.8697, "step": 14684 }, { "epoch": 0.5322387735131021, "grad_norm": 2.3963846763410728, "learning_rate": 4.721060465638315e-06, "loss": 0.9289, "step": 14685 }, { "epoch": 0.5322750172157588, "grad_norm": 2.235756340659869, "learning_rate": 4.720474453275092e-06, "loss": 0.8583, "step": 14686 }, { "epoch": 0.5323112609184154, "grad_norm": 2.1607749622674643, "learning_rate": 4.7198884447635624e-06, "loss": 1.0172, "step": 14687 }, { "epoch": 0.532347504621072, "grad_norm": 2.5692234178784563, "learning_rate": 4.719302440111798e-06, "loss": 0.7912, "step": 14688 }, { "epoch": 0.5323837483237287, "grad_norm": 2.2009719623845085, "learning_rate": 4.718716439327878e-06, "loss": 0.9845, "step": 14689 }, { "epoch": 0.5324199920263855, "grad_norm": 2.3498539679668413, "learning_rate": 4.718130442419876e-06, "loss": 0.7781, "step": 14690 }, { "epoch": 0.5324562357290421, "grad_norm": 2.5106301193829386, "learning_rate": 4.717544449395866e-06, "loss": 1.0521, "step": 14691 }, { "epoch": 0.5324924794316988, "grad_norm": 2.187154444325901, "learning_rate": 4.7169584602639215e-06, "loss": 1.0094, "step": 14692 }, { "epoch": 0.5325287231343554, "grad_norm": 2.6983511103616857, "learning_rate": 4.716372475032119e-06, "loss": 0.8695, "step": 14693 }, { "epoch": 0.532564966837012, "grad_norm": 2.8238326101247178, "learning_rate": 4.715786493708533e-06, "loss": 1.0342, "step": 14694 }, { "epoch": 0.5326012105396687, "grad_norm": 2.207108996623796, "learning_rate": 4.715200516301236e-06, "loss": 0.877, "step": 14695 }, { "epoch": 0.5326374542423254, "grad_norm": 2.480900558235492, "learning_rate": 4.714614542818303e-06, "loss": 0.9652, "step": 14696 }, { "epoch": 0.5326736979449821, "grad_norm": 2.5821185426129594, "learning_rate": 4.71402857326781e-06, "loss": 0.7835, "step": 14697 }, { "epoch": 0.5327099416476387, "grad_norm": 2.2143170200258036, "learning_rate": 4.71344260765783e-06, "loss": 0.858, "step": 14698 }, { "epoch": 0.5327461853502954, "grad_norm": 2.64063683685296, "learning_rate": 4.712856645996439e-06, "loss": 0.8824, "step": 14699 }, { "epoch": 0.532782429052952, "grad_norm": 2.1717057923820877, "learning_rate": 4.7122706882917064e-06, "loss": 0.8956, "step": 14700 }, { "epoch": 0.5328186727556087, "grad_norm": 2.37598504261956, "learning_rate": 4.711684734551712e-06, "loss": 0.8974, "step": 14701 }, { "epoch": 0.5328549164582653, "grad_norm": 2.179473298079349, "learning_rate": 4.711098784784528e-06, "loss": 0.9022, "step": 14702 }, { "epoch": 0.5328911601609221, "grad_norm": 2.4550351606337077, "learning_rate": 4.7105128389982286e-06, "loss": 1.1216, "step": 14703 }, { "epoch": 0.5329274038635787, "grad_norm": 2.0515492271815217, "learning_rate": 4.709926897200885e-06, "loss": 0.9083, "step": 14704 }, { "epoch": 0.5329636475662354, "grad_norm": 2.0966914199414086, "learning_rate": 4.709340959400575e-06, "loss": 0.7796, "step": 14705 }, { "epoch": 0.532999891268892, "grad_norm": 2.4647538909866333, "learning_rate": 4.7087550256053706e-06, "loss": 0.8216, "step": 14706 }, { "epoch": 0.5330361349715487, "grad_norm": 2.6800370435660086, "learning_rate": 4.708169095823346e-06, "loss": 0.8801, "step": 14707 }, { "epoch": 0.5330723786742053, "grad_norm": 2.325639453571554, "learning_rate": 4.707583170062572e-06, "loss": 0.83, "step": 14708 }, { "epoch": 0.5331086223768621, "grad_norm": 2.123818642587422, "learning_rate": 4.706997248331128e-06, "loss": 0.7566, "step": 14709 }, { "epoch": 0.5331448660795187, "grad_norm": 2.430294293790102, "learning_rate": 4.706411330637085e-06, "loss": 0.9719, "step": 14710 }, { "epoch": 0.5331811097821754, "grad_norm": 2.53319736642367, "learning_rate": 4.705825416988515e-06, "loss": 0.9292, "step": 14711 }, { "epoch": 0.533217353484832, "grad_norm": 2.6731000022342233, "learning_rate": 4.705239507393492e-06, "loss": 1.0514, "step": 14712 }, { "epoch": 0.5332535971874887, "grad_norm": 2.331106478089813, "learning_rate": 4.704653601860092e-06, "loss": 0.9848, "step": 14713 }, { "epoch": 0.5332898408901453, "grad_norm": 1.9662767600708908, "learning_rate": 4.7040677003963866e-06, "loss": 0.8512, "step": 14714 }, { "epoch": 0.533326084592802, "grad_norm": 2.5785977904655706, "learning_rate": 4.70348180301045e-06, "loss": 1.0353, "step": 14715 }, { "epoch": 0.5333623282954587, "grad_norm": 2.4007442187670267, "learning_rate": 4.702895909710352e-06, "loss": 1.0344, "step": 14716 }, { "epoch": 0.5333985719981154, "grad_norm": 2.297927937035076, "learning_rate": 4.70231002050417e-06, "loss": 0.8433, "step": 14717 }, { "epoch": 0.533434815700772, "grad_norm": 2.3392966681614396, "learning_rate": 4.701724135399978e-06, "loss": 0.872, "step": 14718 }, { "epoch": 0.5334710594034286, "grad_norm": 2.362771928771571, "learning_rate": 4.7011382544058455e-06, "loss": 0.9208, "step": 14719 }, { "epoch": 0.5335073031060853, "grad_norm": 2.359217584107195, "learning_rate": 4.700552377529845e-06, "loss": 1.0222, "step": 14720 }, { "epoch": 0.5335435468087419, "grad_norm": 2.29154083964404, "learning_rate": 4.699966504780054e-06, "loss": 1.0403, "step": 14721 }, { "epoch": 0.5335797905113987, "grad_norm": 2.4540878780875977, "learning_rate": 4.699380636164543e-06, "loss": 0.9966, "step": 14722 }, { "epoch": 0.5336160342140553, "grad_norm": 2.54060602371747, "learning_rate": 4.698794771691385e-06, "loss": 0.9358, "step": 14723 }, { "epoch": 0.533652277916712, "grad_norm": 2.1565722489227914, "learning_rate": 4.698208911368652e-06, "loss": 1.0888, "step": 14724 }, { "epoch": 0.5336885216193686, "grad_norm": 2.3252217326342146, "learning_rate": 4.697623055204419e-06, "loss": 0.8918, "step": 14725 }, { "epoch": 0.5337247653220253, "grad_norm": 2.7996138317081707, "learning_rate": 4.697037203206757e-06, "loss": 0.8531, "step": 14726 }, { "epoch": 0.5337610090246819, "grad_norm": 2.2114596130007187, "learning_rate": 4.6964513553837395e-06, "loss": 0.7888, "step": 14727 }, { "epoch": 0.5337972527273386, "grad_norm": 2.5745157221631203, "learning_rate": 4.695865511743436e-06, "loss": 0.8246, "step": 14728 }, { "epoch": 0.5338334964299953, "grad_norm": 2.36154457340456, "learning_rate": 4.695279672293924e-06, "loss": 1.0435, "step": 14729 }, { "epoch": 0.533869740132652, "grad_norm": 2.6826281146851807, "learning_rate": 4.694693837043275e-06, "loss": 0.8483, "step": 14730 }, { "epoch": 0.5339059838353086, "grad_norm": 2.2015186700377347, "learning_rate": 4.694108005999559e-06, "loss": 0.933, "step": 14731 }, { "epoch": 0.5339422275379653, "grad_norm": 2.3052792246839577, "learning_rate": 4.693522179170848e-06, "loss": 0.8514, "step": 14732 }, { "epoch": 0.5339784712406219, "grad_norm": 2.4971320184751473, "learning_rate": 4.692936356565219e-06, "loss": 0.8455, "step": 14733 }, { "epoch": 0.5340147149432786, "grad_norm": 2.3284435230810336, "learning_rate": 4.69235053819074e-06, "loss": 0.8167, "step": 14734 }, { "epoch": 0.5340509586459352, "grad_norm": 2.476832421597194, "learning_rate": 4.691764724055485e-06, "loss": 1.0048, "step": 14735 }, { "epoch": 0.534087202348592, "grad_norm": 2.392528175194471, "learning_rate": 4.691178914167523e-06, "loss": 0.9487, "step": 14736 }, { "epoch": 0.5341234460512486, "grad_norm": 2.01577423763639, "learning_rate": 4.690593108534932e-06, "loss": 0.6207, "step": 14737 }, { "epoch": 0.5341596897539053, "grad_norm": 2.3607180261041942, "learning_rate": 4.69000730716578e-06, "loss": 0.9611, "step": 14738 }, { "epoch": 0.5341959334565619, "grad_norm": 2.267499182671002, "learning_rate": 4.6894215100681386e-06, "loss": 0.8167, "step": 14739 }, { "epoch": 0.5342321771592186, "grad_norm": 2.3773910107213414, "learning_rate": 4.688835717250082e-06, "loss": 0.8146, "step": 14740 }, { "epoch": 0.5342684208618752, "grad_norm": 2.3944420358537992, "learning_rate": 4.688249928719682e-06, "loss": 0.7533, "step": 14741 }, { "epoch": 0.534304664564532, "grad_norm": 2.3664073097590057, "learning_rate": 4.687664144485009e-06, "loss": 0.9309, "step": 14742 }, { "epoch": 0.5343409082671886, "grad_norm": 2.5759853811581195, "learning_rate": 4.6870783645541335e-06, "loss": 1.1525, "step": 14743 }, { "epoch": 0.5343771519698453, "grad_norm": 2.3096398181625584, "learning_rate": 4.68649258893513e-06, "loss": 0.8325, "step": 14744 }, { "epoch": 0.5344133956725019, "grad_norm": 2.216191314444986, "learning_rate": 4.68590681763607e-06, "loss": 0.8958, "step": 14745 }, { "epoch": 0.5344496393751585, "grad_norm": 1.9856775741484054, "learning_rate": 4.685321050665024e-06, "loss": 0.8868, "step": 14746 }, { "epoch": 0.5344858830778152, "grad_norm": 2.3418498639718894, "learning_rate": 4.684735288030061e-06, "loss": 0.9726, "step": 14747 }, { "epoch": 0.5345221267804718, "grad_norm": 2.3501719668881513, "learning_rate": 4.684149529739257e-06, "loss": 0.8111, "step": 14748 }, { "epoch": 0.5345583704831286, "grad_norm": 2.743765915654433, "learning_rate": 4.6835637758006815e-06, "loss": 0.8364, "step": 14749 }, { "epoch": 0.5345946141857852, "grad_norm": 2.4165899262011377, "learning_rate": 4.682978026222406e-06, "loss": 0.9146, "step": 14750 }, { "epoch": 0.5346308578884419, "grad_norm": 2.1832653376310955, "learning_rate": 4.6823922810125e-06, "loss": 0.9471, "step": 14751 }, { "epoch": 0.5346671015910985, "grad_norm": 2.4318553540287415, "learning_rate": 4.6818065401790384e-06, "loss": 0.9366, "step": 14752 }, { "epoch": 0.5347033452937552, "grad_norm": 1.988414060114986, "learning_rate": 4.68122080373009e-06, "loss": 0.7902, "step": 14753 }, { "epoch": 0.5347395889964118, "grad_norm": 2.118841527444736, "learning_rate": 4.680635071673726e-06, "loss": 0.8213, "step": 14754 }, { "epoch": 0.5347758326990686, "grad_norm": 2.4471285890999726, "learning_rate": 4.680049344018015e-06, "loss": 0.9283, "step": 14755 }, { "epoch": 0.5348120764017252, "grad_norm": 2.200910465116874, "learning_rate": 4.679463620771033e-06, "loss": 0.7544, "step": 14756 }, { "epoch": 0.5348483201043819, "grad_norm": 2.3759713113627896, "learning_rate": 4.678877901940849e-06, "loss": 0.8432, "step": 14757 }, { "epoch": 0.5348845638070385, "grad_norm": 2.5665727924159274, "learning_rate": 4.678292187535533e-06, "loss": 0.9461, "step": 14758 }, { "epoch": 0.5349208075096952, "grad_norm": 2.38661588422073, "learning_rate": 4.677706477563154e-06, "loss": 0.9207, "step": 14759 }, { "epoch": 0.5349570512123518, "grad_norm": 2.3628079595692495, "learning_rate": 4.677120772031787e-06, "loss": 1.0141, "step": 14760 }, { "epoch": 0.5349932949150085, "grad_norm": 2.1508876797650576, "learning_rate": 4.6765350709495e-06, "loss": 0.8306, "step": 14761 }, { "epoch": 0.5350295386176652, "grad_norm": 2.1241512790948947, "learning_rate": 4.675949374324364e-06, "loss": 0.8885, "step": 14762 }, { "epoch": 0.5350657823203219, "grad_norm": 2.431277164753486, "learning_rate": 4.6753636821644476e-06, "loss": 1.0355, "step": 14763 }, { "epoch": 0.5351020260229785, "grad_norm": 2.409267415600695, "learning_rate": 4.674777994477826e-06, "loss": 0.7751, "step": 14764 }, { "epoch": 0.5351382697256352, "grad_norm": 2.3768990685775218, "learning_rate": 4.6741923112725665e-06, "loss": 0.8094, "step": 14765 }, { "epoch": 0.5351745134282918, "grad_norm": 2.859556660383968, "learning_rate": 4.6736066325567395e-06, "loss": 0.9662, "step": 14766 }, { "epoch": 0.5352107571309485, "grad_norm": 2.631787848560728, "learning_rate": 4.673020958338414e-06, "loss": 1.1346, "step": 14767 }, { "epoch": 0.5352470008336052, "grad_norm": 2.231328535345594, "learning_rate": 4.672435288625664e-06, "loss": 0.8948, "step": 14768 }, { "epoch": 0.5352832445362619, "grad_norm": 2.0538987971110627, "learning_rate": 4.671849623426558e-06, "loss": 0.8807, "step": 14769 }, { "epoch": 0.5353194882389185, "grad_norm": 2.245333539596758, "learning_rate": 4.671263962749165e-06, "loss": 0.9113, "step": 14770 }, { "epoch": 0.5353557319415752, "grad_norm": 2.369174724254404, "learning_rate": 4.670678306601555e-06, "loss": 0.971, "step": 14771 }, { "epoch": 0.5353919756442318, "grad_norm": 2.173201498140514, "learning_rate": 4.6700926549918e-06, "loss": 0.8579, "step": 14772 }, { "epoch": 0.5354282193468884, "grad_norm": 2.2901930690749226, "learning_rate": 4.669507007927969e-06, "loss": 0.8905, "step": 14773 }, { "epoch": 0.5354644630495451, "grad_norm": 2.37504818243722, "learning_rate": 4.668921365418132e-06, "loss": 0.9031, "step": 14774 }, { "epoch": 0.5355007067522018, "grad_norm": 2.224227362837779, "learning_rate": 4.668335727470355e-06, "loss": 0.7184, "step": 14775 }, { "epoch": 0.5355369504548585, "grad_norm": 2.037034804418075, "learning_rate": 4.667750094092714e-06, "loss": 0.8053, "step": 14776 }, { "epoch": 0.5355731941575151, "grad_norm": 2.6580313927235815, "learning_rate": 4.667164465293275e-06, "loss": 1.0198, "step": 14777 }, { "epoch": 0.5356094378601718, "grad_norm": 2.3923937950996983, "learning_rate": 4.666578841080109e-06, "loss": 0.8541, "step": 14778 }, { "epoch": 0.5356456815628284, "grad_norm": 2.409283794610406, "learning_rate": 4.665993221461284e-06, "loss": 0.9074, "step": 14779 }, { "epoch": 0.5356819252654851, "grad_norm": 2.2215381899224993, "learning_rate": 4.66540760644487e-06, "loss": 0.7713, "step": 14780 }, { "epoch": 0.5357181689681418, "grad_norm": 2.366777326906182, "learning_rate": 4.6648219960389385e-06, "loss": 0.8564, "step": 14781 }, { "epoch": 0.5357544126707985, "grad_norm": 2.310829003273911, "learning_rate": 4.664236390251557e-06, "loss": 1.1118, "step": 14782 }, { "epoch": 0.5357906563734551, "grad_norm": 2.406997897258567, "learning_rate": 4.663650789090793e-06, "loss": 0.8364, "step": 14783 }, { "epoch": 0.5358269000761118, "grad_norm": 2.2714114709858455, "learning_rate": 4.663065192564719e-06, "loss": 0.9732, "step": 14784 }, { "epoch": 0.5358631437787684, "grad_norm": 2.160549873335689, "learning_rate": 4.662479600681404e-06, "loss": 0.815, "step": 14785 }, { "epoch": 0.5358993874814251, "grad_norm": 2.052070933646599, "learning_rate": 4.661894013448914e-06, "loss": 0.7762, "step": 14786 }, { "epoch": 0.5359356311840817, "grad_norm": 2.4494992553699655, "learning_rate": 4.66130843087532e-06, "loss": 0.9154, "step": 14787 }, { "epoch": 0.5359718748867385, "grad_norm": 2.2609105530424336, "learning_rate": 4.660722852968692e-06, "loss": 0.9622, "step": 14788 }, { "epoch": 0.5360081185893951, "grad_norm": 2.4250307499337365, "learning_rate": 4.660137279737097e-06, "loss": 0.8842, "step": 14789 }, { "epoch": 0.5360443622920518, "grad_norm": 2.607800352699722, "learning_rate": 4.659551711188607e-06, "loss": 0.9127, "step": 14790 }, { "epoch": 0.5360806059947084, "grad_norm": 2.366727788965681, "learning_rate": 4.6589661473312855e-06, "loss": 0.9583, "step": 14791 }, { "epoch": 0.5361168496973651, "grad_norm": 2.1489941231101097, "learning_rate": 4.658380588173206e-06, "loss": 0.9314, "step": 14792 }, { "epoch": 0.5361530934000217, "grad_norm": 2.2966591037657427, "learning_rate": 4.657795033722436e-06, "loss": 0.707, "step": 14793 }, { "epoch": 0.5361893371026785, "grad_norm": 2.756278837435035, "learning_rate": 4.657209483987043e-06, "loss": 1.0668, "step": 14794 }, { "epoch": 0.5362255808053351, "grad_norm": 2.2053635176987134, "learning_rate": 4.656623938975094e-06, "loss": 0.9514, "step": 14795 }, { "epoch": 0.5362618245079918, "grad_norm": 2.475968234237644, "learning_rate": 4.656038398694661e-06, "loss": 0.8325, "step": 14796 }, { "epoch": 0.5362980682106484, "grad_norm": 2.2195026672604934, "learning_rate": 4.655452863153811e-06, "loss": 0.8632, "step": 14797 }, { "epoch": 0.536334311913305, "grad_norm": 2.1655890979624153, "learning_rate": 4.654867332360613e-06, "loss": 0.8131, "step": 14798 }, { "epoch": 0.5363705556159617, "grad_norm": 2.171095001120524, "learning_rate": 4.654281806323133e-06, "loss": 0.9853, "step": 14799 }, { "epoch": 0.5364067993186183, "grad_norm": 2.5317778957187174, "learning_rate": 4.653696285049441e-06, "loss": 0.9493, "step": 14800 }, { "epoch": 0.5364430430212751, "grad_norm": 2.1909716812528592, "learning_rate": 4.653110768547606e-06, "loss": 0.9338, "step": 14801 }, { "epoch": 0.5364792867239317, "grad_norm": 2.193047810543774, "learning_rate": 4.652525256825694e-06, "loss": 0.8404, "step": 14802 }, { "epoch": 0.5365155304265884, "grad_norm": 2.2116432233686996, "learning_rate": 4.651939749891771e-06, "loss": 0.9111, "step": 14803 }, { "epoch": 0.536551774129245, "grad_norm": 2.5491828487484747, "learning_rate": 4.651354247753911e-06, "loss": 0.8396, "step": 14804 }, { "epoch": 0.5365880178319017, "grad_norm": 2.7575295643153352, "learning_rate": 4.650768750420178e-06, "loss": 0.9485, "step": 14805 }, { "epoch": 0.5366242615345583, "grad_norm": 2.329062461822689, "learning_rate": 4.6501832578986415e-06, "loss": 0.9436, "step": 14806 }, { "epoch": 0.536660505237215, "grad_norm": 2.4801241209078264, "learning_rate": 4.649597770197364e-06, "loss": 0.8396, "step": 14807 }, { "epoch": 0.5366967489398717, "grad_norm": 2.4214693817177935, "learning_rate": 4.649012287324421e-06, "loss": 0.8872, "step": 14808 }, { "epoch": 0.5367329926425284, "grad_norm": 2.0851564312690183, "learning_rate": 4.648426809287876e-06, "loss": 0.8716, "step": 14809 }, { "epoch": 0.536769236345185, "grad_norm": 2.2055995541220232, "learning_rate": 4.6478413360957975e-06, "loss": 0.8285, "step": 14810 }, { "epoch": 0.5368054800478417, "grad_norm": 2.10687726858094, "learning_rate": 4.647255867756251e-06, "loss": 0.856, "step": 14811 }, { "epoch": 0.5368417237504983, "grad_norm": 2.495644333449079, "learning_rate": 4.646670404277306e-06, "loss": 1.008, "step": 14812 }, { "epoch": 0.536877967453155, "grad_norm": 2.4471939030466454, "learning_rate": 4.646084945667029e-06, "loss": 0.9841, "step": 14813 }, { "epoch": 0.5369142111558117, "grad_norm": 2.4489827255760743, "learning_rate": 4.645499491933488e-06, "loss": 1.0598, "step": 14814 }, { "epoch": 0.5369504548584684, "grad_norm": 2.047445537893204, "learning_rate": 4.644914043084747e-06, "loss": 0.8975, "step": 14815 }, { "epoch": 0.536986698561125, "grad_norm": 2.591789018818455, "learning_rate": 4.6443285991288776e-06, "loss": 1.0201, "step": 14816 }, { "epoch": 0.5370229422637817, "grad_norm": 2.187268515178758, "learning_rate": 4.643743160073946e-06, "loss": 1.0017, "step": 14817 }, { "epoch": 0.5370591859664383, "grad_norm": 2.306809585908038, "learning_rate": 4.6431577259280175e-06, "loss": 0.9929, "step": 14818 }, { "epoch": 0.537095429669095, "grad_norm": 2.440791141263507, "learning_rate": 4.6425722966991585e-06, "loss": 0.8293, "step": 14819 }, { "epoch": 0.5371316733717516, "grad_norm": 2.1342911588616644, "learning_rate": 4.6419868723954395e-06, "loss": 0.8129, "step": 14820 }, { "epoch": 0.5371679170744084, "grad_norm": 2.2615184032480005, "learning_rate": 4.641401453024923e-06, "loss": 0.8106, "step": 14821 }, { "epoch": 0.537204160777065, "grad_norm": 2.4203309511245212, "learning_rate": 4.640816038595677e-06, "loss": 0.8566, "step": 14822 }, { "epoch": 0.5372404044797217, "grad_norm": 2.4325876850482913, "learning_rate": 4.64023062911577e-06, "loss": 0.8598, "step": 14823 }, { "epoch": 0.5372766481823783, "grad_norm": 2.3474356671392185, "learning_rate": 4.639645224593268e-06, "loss": 0.9349, "step": 14824 }, { "epoch": 0.537312891885035, "grad_norm": 2.1636102476194283, "learning_rate": 4.6390598250362365e-06, "loss": 0.9033, "step": 14825 }, { "epoch": 0.5373491355876916, "grad_norm": 2.5575751665574487, "learning_rate": 4.6384744304527395e-06, "loss": 1.0018, "step": 14826 }, { "epoch": 0.5373853792903484, "grad_norm": 2.2953757039782774, "learning_rate": 4.637889040850849e-06, "loss": 0.8733, "step": 14827 }, { "epoch": 0.537421622993005, "grad_norm": 2.546042435619719, "learning_rate": 4.637303656238629e-06, "loss": 0.9884, "step": 14828 }, { "epoch": 0.5374578666956616, "grad_norm": 2.6332624925830244, "learning_rate": 4.6367182766241445e-06, "loss": 0.8965, "step": 14829 }, { "epoch": 0.5374941103983183, "grad_norm": 2.4914288133395233, "learning_rate": 4.636132902015461e-06, "loss": 0.816, "step": 14830 }, { "epoch": 0.5375303541009749, "grad_norm": 2.329872053522303, "learning_rate": 4.635547532420648e-06, "loss": 0.9947, "step": 14831 }, { "epoch": 0.5375665978036316, "grad_norm": 2.421826974243083, "learning_rate": 4.63496216784777e-06, "loss": 1.0128, "step": 14832 }, { "epoch": 0.5376028415062882, "grad_norm": 2.095560674755993, "learning_rate": 4.634376808304891e-06, "loss": 0.8995, "step": 14833 }, { "epoch": 0.537639085208945, "grad_norm": 2.7934517297739516, "learning_rate": 4.6337914538000775e-06, "loss": 1.1102, "step": 14834 }, { "epoch": 0.5376753289116016, "grad_norm": 2.4599537484179126, "learning_rate": 4.633206104341398e-06, "loss": 0.7297, "step": 14835 }, { "epoch": 0.5377115726142583, "grad_norm": 2.40051207934866, "learning_rate": 4.6326207599369175e-06, "loss": 0.8526, "step": 14836 }, { "epoch": 0.5377478163169149, "grad_norm": 2.043163865478239, "learning_rate": 4.632035420594699e-06, "loss": 0.8857, "step": 14837 }, { "epoch": 0.5377840600195716, "grad_norm": 2.709116037571736, "learning_rate": 4.63145008632281e-06, "loss": 0.9835, "step": 14838 }, { "epoch": 0.5378203037222282, "grad_norm": 2.4339759794911915, "learning_rate": 4.630864757129317e-06, "loss": 0.8611, "step": 14839 }, { "epoch": 0.537856547424885, "grad_norm": 1.9447152901400129, "learning_rate": 4.630279433022284e-06, "loss": 0.7269, "step": 14840 }, { "epoch": 0.5378927911275416, "grad_norm": 2.4013253625941693, "learning_rate": 4.629694114009778e-06, "loss": 0.9501, "step": 14841 }, { "epoch": 0.5379290348301983, "grad_norm": 2.7495377515359234, "learning_rate": 4.629108800099859e-06, "loss": 0.8924, "step": 14842 }, { "epoch": 0.5379652785328549, "grad_norm": 2.6549182419349395, "learning_rate": 4.6285234913006e-06, "loss": 0.9787, "step": 14843 }, { "epoch": 0.5380015222355116, "grad_norm": 2.4330726871420985, "learning_rate": 4.627938187620062e-06, "loss": 1.0383, "step": 14844 }, { "epoch": 0.5380377659381682, "grad_norm": 2.412077340221453, "learning_rate": 4.6273528890663104e-06, "loss": 0.9597, "step": 14845 }, { "epoch": 0.5380740096408249, "grad_norm": 2.4507026568271644, "learning_rate": 4.626767595647411e-06, "loss": 0.8719, "step": 14846 }, { "epoch": 0.5381102533434816, "grad_norm": 2.53677879298202, "learning_rate": 4.626182307371428e-06, "loss": 0.8975, "step": 14847 }, { "epoch": 0.5381464970461383, "grad_norm": 2.1616007791923417, "learning_rate": 4.625597024246427e-06, "loss": 0.7997, "step": 14848 }, { "epoch": 0.5381827407487949, "grad_norm": 2.7154381175684636, "learning_rate": 4.625011746280474e-06, "loss": 0.9424, "step": 14849 }, { "epoch": 0.5382189844514516, "grad_norm": 2.6829283714921517, "learning_rate": 4.624426473481628e-06, "loss": 1.027, "step": 14850 }, { "epoch": 0.5382552281541082, "grad_norm": 2.5528228605259007, "learning_rate": 4.623841205857961e-06, "loss": 0.9954, "step": 14851 }, { "epoch": 0.5382914718567648, "grad_norm": 2.204854312316786, "learning_rate": 4.623255943417536e-06, "loss": 0.8125, "step": 14852 }, { "epoch": 0.5383277155594216, "grad_norm": 2.164667252993174, "learning_rate": 4.622670686168414e-06, "loss": 0.865, "step": 14853 }, { "epoch": 0.5383639592620783, "grad_norm": 2.0941033023452977, "learning_rate": 4.622085434118661e-06, "loss": 0.8712, "step": 14854 }, { "epoch": 0.5384002029647349, "grad_norm": 2.2219270552428614, "learning_rate": 4.621500187276344e-06, "loss": 0.8722, "step": 14855 }, { "epoch": 0.5384364466673915, "grad_norm": 2.1869992791416433, "learning_rate": 4.6209149456495265e-06, "loss": 0.7935, "step": 14856 }, { "epoch": 0.5384726903700482, "grad_norm": 2.3030465155562996, "learning_rate": 4.62032970924627e-06, "loss": 0.7853, "step": 14857 }, { "epoch": 0.5385089340727048, "grad_norm": 2.758712645651685, "learning_rate": 4.619744478074641e-06, "loss": 0.7275, "step": 14858 }, { "epoch": 0.5385451777753615, "grad_norm": 2.466768697791232, "learning_rate": 4.619159252142703e-06, "loss": 0.8753, "step": 14859 }, { "epoch": 0.5385814214780182, "grad_norm": 2.1855578081980194, "learning_rate": 4.618574031458521e-06, "loss": 0.9053, "step": 14860 }, { "epoch": 0.5386176651806749, "grad_norm": 2.536705227138019, "learning_rate": 4.6179888160301575e-06, "loss": 0.9921, "step": 14861 }, { "epoch": 0.5386539088833315, "grad_norm": 2.4725194843264573, "learning_rate": 4.617403605865676e-06, "loss": 1.0406, "step": 14862 }, { "epoch": 0.5386901525859882, "grad_norm": 2.312601388134119, "learning_rate": 4.616818400973143e-06, "loss": 0.9255, "step": 14863 }, { "epoch": 0.5387263962886448, "grad_norm": 2.3150886042519985, "learning_rate": 4.6162332013606205e-06, "loss": 1.0784, "step": 14864 }, { "epoch": 0.5387626399913015, "grad_norm": 2.317094675510074, "learning_rate": 4.615648007036172e-06, "loss": 0.8511, "step": 14865 }, { "epoch": 0.5387988836939581, "grad_norm": 2.321189600979558, "learning_rate": 4.615062818007861e-06, "loss": 0.7916, "step": 14866 }, { "epoch": 0.5388351273966149, "grad_norm": 2.4248502111662726, "learning_rate": 4.614477634283753e-06, "loss": 0.8973, "step": 14867 }, { "epoch": 0.5388713710992715, "grad_norm": 2.0621327710912145, "learning_rate": 4.613892455871909e-06, "loss": 0.7932, "step": 14868 }, { "epoch": 0.5389076148019282, "grad_norm": 2.1692956481319605, "learning_rate": 4.613307282780395e-06, "loss": 0.8015, "step": 14869 }, { "epoch": 0.5389438585045848, "grad_norm": 2.334803157949283, "learning_rate": 4.612722115017268e-06, "loss": 0.8861, "step": 14870 }, { "epoch": 0.5389801022072415, "grad_norm": 2.3153937131626052, "learning_rate": 4.612136952590601e-06, "loss": 0.9639, "step": 14871 }, { "epoch": 0.5390163459098981, "grad_norm": 2.513624680772078, "learning_rate": 4.611551795508451e-06, "loss": 0.8252, "step": 14872 }, { "epoch": 0.5390525896125549, "grad_norm": 2.139673491063074, "learning_rate": 4.610966643778882e-06, "loss": 0.8988, "step": 14873 }, { "epoch": 0.5390888333152115, "grad_norm": 2.3685770131666577, "learning_rate": 4.6103814974099555e-06, "loss": 1.019, "step": 14874 }, { "epoch": 0.5391250770178682, "grad_norm": 1.965206426709289, "learning_rate": 4.609796356409739e-06, "loss": 0.8436, "step": 14875 }, { "epoch": 0.5391613207205248, "grad_norm": 2.246216906379095, "learning_rate": 4.6092112207862925e-06, "loss": 1.0232, "step": 14876 }, { "epoch": 0.5391975644231815, "grad_norm": 2.28008352808245, "learning_rate": 4.608626090547679e-06, "loss": 0.7492, "step": 14877 }, { "epoch": 0.5392338081258381, "grad_norm": 2.5840122600978956, "learning_rate": 4.608040965701959e-06, "loss": 1.0343, "step": 14878 }, { "epoch": 0.5392700518284947, "grad_norm": 2.1729676335981933, "learning_rate": 4.6074558462572e-06, "loss": 0.8048, "step": 14879 }, { "epoch": 0.5393062955311515, "grad_norm": 2.075578545372313, "learning_rate": 4.606870732221462e-06, "loss": 0.8621, "step": 14880 }, { "epoch": 0.5393425392338081, "grad_norm": 2.4102366589587536, "learning_rate": 4.6062856236028066e-06, "loss": 0.8498, "step": 14881 }, { "epoch": 0.5393787829364648, "grad_norm": 2.9395222250486372, "learning_rate": 4.605700520409296e-06, "loss": 0.8815, "step": 14882 }, { "epoch": 0.5394150266391214, "grad_norm": 2.4683846268020138, "learning_rate": 4.605115422648995e-06, "loss": 0.9916, "step": 14883 }, { "epoch": 0.5394512703417781, "grad_norm": 2.2386464504865957, "learning_rate": 4.604530330329965e-06, "loss": 0.7931, "step": 14884 }, { "epoch": 0.5394875140444347, "grad_norm": 2.3294389735613783, "learning_rate": 4.603945243460268e-06, "loss": 0.8635, "step": 14885 }, { "epoch": 0.5395237577470915, "grad_norm": 2.2781387377233973, "learning_rate": 4.603360162047966e-06, "loss": 1.0695, "step": 14886 }, { "epoch": 0.5395600014497481, "grad_norm": 2.258205740179219, "learning_rate": 4.60277508610112e-06, "loss": 0.9126, "step": 14887 }, { "epoch": 0.5395962451524048, "grad_norm": 2.5692126636867725, "learning_rate": 4.602190015627795e-06, "loss": 0.9295, "step": 14888 }, { "epoch": 0.5396324888550614, "grad_norm": 2.232555977696353, "learning_rate": 4.60160495063605e-06, "loss": 1.0951, "step": 14889 }, { "epoch": 0.5396687325577181, "grad_norm": 2.1360149066907943, "learning_rate": 4.601019891133946e-06, "loss": 0.9278, "step": 14890 }, { "epoch": 0.5397049762603747, "grad_norm": 1.986502705938324, "learning_rate": 4.600434837129549e-06, "loss": 0.8161, "step": 14891 }, { "epoch": 0.5397412199630314, "grad_norm": 2.5092105500590907, "learning_rate": 4.599849788630918e-06, "loss": 0.8545, "step": 14892 }, { "epoch": 0.5397774636656881, "grad_norm": 2.042167793440208, "learning_rate": 4.599264745646115e-06, "loss": 1.0832, "step": 14893 }, { "epoch": 0.5398137073683448, "grad_norm": 2.4013410943697817, "learning_rate": 4.598679708183198e-06, "loss": 0.813, "step": 14894 }, { "epoch": 0.5398499510710014, "grad_norm": 2.1885451992310325, "learning_rate": 4.598094676250236e-06, "loss": 0.8175, "step": 14895 }, { "epoch": 0.5398861947736581, "grad_norm": 2.487329631042984, "learning_rate": 4.597509649855286e-06, "loss": 0.8975, "step": 14896 }, { "epoch": 0.5399224384763147, "grad_norm": 9.576619301840475, "learning_rate": 4.596924629006408e-06, "loss": 0.8218, "step": 14897 }, { "epoch": 0.5399586821789714, "grad_norm": 2.337581975158061, "learning_rate": 4.596339613711665e-06, "loss": 0.7993, "step": 14898 }, { "epoch": 0.5399949258816281, "grad_norm": 12.087302628049596, "learning_rate": 4.595754603979119e-06, "loss": 1.1671, "step": 14899 }, { "epoch": 0.5400311695842848, "grad_norm": 2.5383087846180556, "learning_rate": 4.59516959981683e-06, "loss": 0.9613, "step": 14900 }, { "epoch": 0.5400674132869414, "grad_norm": 2.5314091354526296, "learning_rate": 4.594584601232857e-06, "loss": 0.7596, "step": 14901 }, { "epoch": 0.5401036569895981, "grad_norm": 2.4143158707505137, "learning_rate": 4.593999608235266e-06, "loss": 0.9372, "step": 14902 }, { "epoch": 0.5401399006922547, "grad_norm": 2.325591816352583, "learning_rate": 4.593414620832114e-06, "loss": 0.7974, "step": 14903 }, { "epoch": 0.5401761443949114, "grad_norm": 2.0094652477124013, "learning_rate": 4.592829639031462e-06, "loss": 0.8755, "step": 14904 }, { "epoch": 0.540212388097568, "grad_norm": 2.1934590443718753, "learning_rate": 4.592244662841372e-06, "loss": 0.9206, "step": 14905 }, { "epoch": 0.5402486318002248, "grad_norm": 2.0213767818740482, "learning_rate": 4.591659692269903e-06, "loss": 0.9861, "step": 14906 }, { "epoch": 0.5402848755028814, "grad_norm": 2.335351116349105, "learning_rate": 4.591074727325118e-06, "loss": 0.9222, "step": 14907 }, { "epoch": 0.540321119205538, "grad_norm": 2.118724005206112, "learning_rate": 4.590489768015075e-06, "loss": 0.8592, "step": 14908 }, { "epoch": 0.5403573629081947, "grad_norm": 2.2876257069175963, "learning_rate": 4.589904814347833e-06, "loss": 1.0146, "step": 14909 }, { "epoch": 0.5403936066108513, "grad_norm": 2.3899046393472814, "learning_rate": 4.589319866331458e-06, "loss": 1.1376, "step": 14910 }, { "epoch": 0.540429850313508, "grad_norm": 2.1441690500271986, "learning_rate": 4.588734923974006e-06, "loss": 0.9118, "step": 14911 }, { "epoch": 0.5404660940161647, "grad_norm": 2.6808075770591113, "learning_rate": 4.588149987283538e-06, "loss": 0.949, "step": 14912 }, { "epoch": 0.5405023377188214, "grad_norm": 2.2515436822740544, "learning_rate": 4.587565056268112e-06, "loss": 0.9306, "step": 14913 }, { "epoch": 0.540538581421478, "grad_norm": 2.0644849163799712, "learning_rate": 4.586980130935793e-06, "loss": 0.8493, "step": 14914 }, { "epoch": 0.5405748251241347, "grad_norm": 2.4634348657870797, "learning_rate": 4.5863952112946365e-06, "loss": 0.9933, "step": 14915 }, { "epoch": 0.5406110688267913, "grad_norm": 2.430471954353434, "learning_rate": 4.585810297352705e-06, "loss": 1.0308, "step": 14916 }, { "epoch": 0.540647312529448, "grad_norm": 2.4952237016980288, "learning_rate": 4.585225389118056e-06, "loss": 0.7926, "step": 14917 }, { "epoch": 0.5406835562321046, "grad_norm": 2.5289441901483936, "learning_rate": 4.584640486598751e-06, "loss": 0.9117, "step": 14918 }, { "epoch": 0.5407197999347614, "grad_norm": 2.3190168251263295, "learning_rate": 4.58405558980285e-06, "loss": 0.908, "step": 14919 }, { "epoch": 0.540756043637418, "grad_norm": 2.519762689187549, "learning_rate": 4.58347069873841e-06, "loss": 0.8766, "step": 14920 }, { "epoch": 0.5407922873400747, "grad_norm": 2.0823938061761456, "learning_rate": 4.582885813413489e-06, "loss": 0.8683, "step": 14921 }, { "epoch": 0.5408285310427313, "grad_norm": 2.251333444587145, "learning_rate": 4.582300933836154e-06, "loss": 0.9104, "step": 14922 }, { "epoch": 0.540864774745388, "grad_norm": 2.1071106747735837, "learning_rate": 4.581716060014457e-06, "loss": 0.8258, "step": 14923 }, { "epoch": 0.5409010184480446, "grad_norm": 2.2083434273015476, "learning_rate": 4.581131191956461e-06, "loss": 0.9713, "step": 14924 }, { "epoch": 0.5409372621507014, "grad_norm": 2.381918334901359, "learning_rate": 4.580546329670223e-06, "loss": 0.8778, "step": 14925 }, { "epoch": 0.540973505853358, "grad_norm": 2.45172420250587, "learning_rate": 4.579961473163803e-06, "loss": 0.9413, "step": 14926 }, { "epoch": 0.5410097495560147, "grad_norm": 2.1048237275036135, "learning_rate": 4.5793766224452605e-06, "loss": 0.7407, "step": 14927 }, { "epoch": 0.5410459932586713, "grad_norm": 2.623065626258453, "learning_rate": 4.5787917775226534e-06, "loss": 1.0071, "step": 14928 }, { "epoch": 0.541082236961328, "grad_norm": 2.384925699064458, "learning_rate": 4.57820693840404e-06, "loss": 0.8529, "step": 14929 }, { "epoch": 0.5411184806639846, "grad_norm": 2.3621984663767304, "learning_rate": 4.577622105097481e-06, "loss": 0.8064, "step": 14930 }, { "epoch": 0.5411547243666412, "grad_norm": 2.4586558823711666, "learning_rate": 4.5770372776110335e-06, "loss": 0.8943, "step": 14931 }, { "epoch": 0.541190968069298, "grad_norm": 2.4163424501945894, "learning_rate": 4.576452455952756e-06, "loss": 0.7813, "step": 14932 }, { "epoch": 0.5412272117719547, "grad_norm": 2.163098646437831, "learning_rate": 4.575867640130708e-06, "loss": 0.8926, "step": 14933 }, { "epoch": 0.5412634554746113, "grad_norm": 2.1020922070261228, "learning_rate": 4.5752828301529486e-06, "loss": 0.7771, "step": 14934 }, { "epoch": 0.541299699177268, "grad_norm": 2.360023492935558, "learning_rate": 4.574698026027534e-06, "loss": 0.9137, "step": 14935 }, { "epoch": 0.5413359428799246, "grad_norm": 2.1875980461220577, "learning_rate": 4.574113227762523e-06, "loss": 0.8959, "step": 14936 }, { "epoch": 0.5413721865825812, "grad_norm": 2.776118136498302, "learning_rate": 4.5735284353659735e-06, "loss": 0.9492, "step": 14937 }, { "epoch": 0.5414084302852379, "grad_norm": 2.536136726133585, "learning_rate": 4.572943648845945e-06, "loss": 0.7923, "step": 14938 }, { "epoch": 0.5414446739878946, "grad_norm": 1.9211564621114423, "learning_rate": 4.572358868210495e-06, "loss": 0.8052, "step": 14939 }, { "epoch": 0.5414809176905513, "grad_norm": 2.156153868272648, "learning_rate": 4.5717740934676816e-06, "loss": 0.8726, "step": 14940 }, { "epoch": 0.5415171613932079, "grad_norm": 2.4685328289671133, "learning_rate": 4.57118932462556e-06, "loss": 0.9061, "step": 14941 }, { "epoch": 0.5415534050958646, "grad_norm": 2.1796181199852667, "learning_rate": 4.570604561692193e-06, "loss": 0.7448, "step": 14942 }, { "epoch": 0.5415896487985212, "grad_norm": 2.159342986551997, "learning_rate": 4.570019804675634e-06, "loss": 0.7461, "step": 14943 }, { "epoch": 0.5416258925011779, "grad_norm": 2.0972789712678033, "learning_rate": 4.569435053583943e-06, "loss": 0.8842, "step": 14944 }, { "epoch": 0.5416621362038346, "grad_norm": 2.162730311094091, "learning_rate": 4.568850308425175e-06, "loss": 0.9263, "step": 14945 }, { "epoch": 0.5416983799064913, "grad_norm": 2.537896790488595, "learning_rate": 4.56826556920739e-06, "loss": 0.8851, "step": 14946 }, { "epoch": 0.5417346236091479, "grad_norm": 1.8536673586142594, "learning_rate": 4.567680835938645e-06, "loss": 0.545, "step": 14947 }, { "epoch": 0.5417708673118046, "grad_norm": 2.2956214036294607, "learning_rate": 4.567096108626996e-06, "loss": 0.9594, "step": 14948 }, { "epoch": 0.5418071110144612, "grad_norm": 2.4787267653252134, "learning_rate": 4.566511387280499e-06, "loss": 1.0199, "step": 14949 }, { "epoch": 0.5418433547171179, "grad_norm": 2.220049566729696, "learning_rate": 4.565926671907215e-06, "loss": 0.8659, "step": 14950 }, { "epoch": 0.5418795984197745, "grad_norm": 2.472577137724509, "learning_rate": 4.565341962515199e-06, "loss": 0.8789, "step": 14951 }, { "epoch": 0.5419158421224313, "grad_norm": 2.389520380103359, "learning_rate": 4.564757259112508e-06, "loss": 0.8569, "step": 14952 }, { "epoch": 0.5419520858250879, "grad_norm": 2.493177593927846, "learning_rate": 4.564172561707197e-06, "loss": 0.547, "step": 14953 }, { "epoch": 0.5419883295277446, "grad_norm": 2.7605442883130134, "learning_rate": 4.5635878703073265e-06, "loss": 1.0005, "step": 14954 }, { "epoch": 0.5420245732304012, "grad_norm": 2.36731215166559, "learning_rate": 4.563003184920951e-06, "loss": 0.9667, "step": 14955 }, { "epoch": 0.5420608169330579, "grad_norm": 2.402471797276528, "learning_rate": 4.5624185055561265e-06, "loss": 0.8988, "step": 14956 }, { "epoch": 0.5420970606357145, "grad_norm": 2.3915198508696056, "learning_rate": 4.561833832220909e-06, "loss": 0.8494, "step": 14957 }, { "epoch": 0.5421333043383713, "grad_norm": 2.2971874476317407, "learning_rate": 4.561249164923358e-06, "loss": 0.8214, "step": 14958 }, { "epoch": 0.5421695480410279, "grad_norm": 2.6331310590802013, "learning_rate": 4.5606645036715285e-06, "loss": 0.8783, "step": 14959 }, { "epoch": 0.5422057917436846, "grad_norm": 2.5176634295498594, "learning_rate": 4.5600798484734765e-06, "loss": 0.8955, "step": 14960 }, { "epoch": 0.5422420354463412, "grad_norm": 2.1811932404305483, "learning_rate": 4.559495199337256e-06, "loss": 0.8098, "step": 14961 }, { "epoch": 0.5422782791489978, "grad_norm": 2.448342754137361, "learning_rate": 4.558910556270927e-06, "loss": 0.7692, "step": 14962 }, { "epoch": 0.5423145228516545, "grad_norm": 2.405124208635544, "learning_rate": 4.5583259192825445e-06, "loss": 0.886, "step": 14963 }, { "epoch": 0.5423507665543111, "grad_norm": 2.407376120486928, "learning_rate": 4.557741288380162e-06, "loss": 0.8668, "step": 14964 }, { "epoch": 0.5423870102569679, "grad_norm": 2.4045885551401964, "learning_rate": 4.557156663571838e-06, "loss": 1.0531, "step": 14965 }, { "epoch": 0.5424232539596245, "grad_norm": 2.444584146544225, "learning_rate": 4.556572044865627e-06, "loss": 0.9153, "step": 14966 }, { "epoch": 0.5424594976622812, "grad_norm": 2.3209827332604855, "learning_rate": 4.555987432269586e-06, "loss": 0.7968, "step": 14967 }, { "epoch": 0.5424957413649378, "grad_norm": 2.2038384501530626, "learning_rate": 4.55540282579177e-06, "loss": 0.7738, "step": 14968 }, { "epoch": 0.5425319850675945, "grad_norm": 2.19266486913914, "learning_rate": 4.5548182254402305e-06, "loss": 0.8799, "step": 14969 }, { "epoch": 0.5425682287702511, "grad_norm": 1.9700320663320492, "learning_rate": 4.55423363122303e-06, "loss": 0.8984, "step": 14970 }, { "epoch": 0.5426044724729079, "grad_norm": 2.317987594162152, "learning_rate": 4.55364904314822e-06, "loss": 0.8278, "step": 14971 }, { "epoch": 0.5426407161755645, "grad_norm": 2.4632954125159743, "learning_rate": 4.553064461223857e-06, "loss": 0.884, "step": 14972 }, { "epoch": 0.5426769598782212, "grad_norm": 2.431807162274852, "learning_rate": 4.552479885457993e-06, "loss": 1.0069, "step": 14973 }, { "epoch": 0.5427132035808778, "grad_norm": 2.6980177258870675, "learning_rate": 4.551895315858688e-06, "loss": 0.9135, "step": 14974 }, { "epoch": 0.5427494472835345, "grad_norm": 2.6392505591738495, "learning_rate": 4.551310752433993e-06, "loss": 0.9533, "step": 14975 }, { "epoch": 0.5427856909861911, "grad_norm": 2.4180176631364776, "learning_rate": 4.550726195191965e-06, "loss": 0.9827, "step": 14976 }, { "epoch": 0.5428219346888478, "grad_norm": 2.203155016965688, "learning_rate": 4.550141644140657e-06, "loss": 0.7999, "step": 14977 }, { "epoch": 0.5428581783915045, "grad_norm": 2.2498573293330972, "learning_rate": 4.549557099288127e-06, "loss": 0.8757, "step": 14978 }, { "epoch": 0.5428944220941612, "grad_norm": 2.392003639843895, "learning_rate": 4.5489725606424275e-06, "loss": 0.7476, "step": 14979 }, { "epoch": 0.5429306657968178, "grad_norm": 2.022781920459095, "learning_rate": 4.548388028211613e-06, "loss": 0.6955, "step": 14980 }, { "epoch": 0.5429669094994745, "grad_norm": 2.372914606760311, "learning_rate": 4.547803502003736e-06, "loss": 0.9172, "step": 14981 }, { "epoch": 0.5430031532021311, "grad_norm": 2.7376015373300984, "learning_rate": 4.547218982026857e-06, "loss": 0.822, "step": 14982 }, { "epoch": 0.5430393969047878, "grad_norm": 2.332807745943726, "learning_rate": 4.546634468289025e-06, "loss": 0.9402, "step": 14983 }, { "epoch": 0.5430756406074445, "grad_norm": 2.443127031180328, "learning_rate": 4.546049960798296e-06, "loss": 0.9691, "step": 14984 }, { "epoch": 0.5431118843101012, "grad_norm": 2.3170608024561012, "learning_rate": 4.545465459562723e-06, "loss": 0.82, "step": 14985 }, { "epoch": 0.5431481280127578, "grad_norm": 2.2670498680859352, "learning_rate": 4.544880964590363e-06, "loss": 0.8977, "step": 14986 }, { "epoch": 0.5431843717154144, "grad_norm": 2.211648947756761, "learning_rate": 4.5442964758892676e-06, "loss": 0.9119, "step": 14987 }, { "epoch": 0.5432206154180711, "grad_norm": 2.404959346458572, "learning_rate": 4.543711993467489e-06, "loss": 0.9754, "step": 14988 }, { "epoch": 0.5432568591207277, "grad_norm": 2.240787138053084, "learning_rate": 4.543127517333085e-06, "loss": 0.7843, "step": 14989 }, { "epoch": 0.5432931028233844, "grad_norm": 2.2282358673301843, "learning_rate": 4.542543047494108e-06, "loss": 0.955, "step": 14990 }, { "epoch": 0.5433293465260411, "grad_norm": 2.5327493032179906, "learning_rate": 4.54195858395861e-06, "loss": 1.0084, "step": 14991 }, { "epoch": 0.5433655902286978, "grad_norm": 2.295345384076072, "learning_rate": 4.541374126734646e-06, "loss": 0.9932, "step": 14992 }, { "epoch": 0.5434018339313544, "grad_norm": 2.0123090489278534, "learning_rate": 4.54078967583027e-06, "loss": 1.031, "step": 14993 }, { "epoch": 0.5434380776340111, "grad_norm": 2.206925603272727, "learning_rate": 4.540205231253534e-06, "loss": 0.9645, "step": 14994 }, { "epoch": 0.5434743213366677, "grad_norm": 2.2900269357218273, "learning_rate": 4.539620793012492e-06, "loss": 0.7965, "step": 14995 }, { "epoch": 0.5435105650393244, "grad_norm": 2.305385495341328, "learning_rate": 4.539036361115196e-06, "loss": 0.7737, "step": 14996 }, { "epoch": 0.5435468087419811, "grad_norm": 2.1312604648208517, "learning_rate": 4.538451935569701e-06, "loss": 0.8989, "step": 14997 }, { "epoch": 0.5435830524446378, "grad_norm": 2.5259506105392173, "learning_rate": 4.537867516384059e-06, "loss": 0.9476, "step": 14998 }, { "epoch": 0.5436192961472944, "grad_norm": 2.3816147174240103, "learning_rate": 4.537283103566324e-06, "loss": 0.8153, "step": 14999 }, { "epoch": 0.5436555398499511, "grad_norm": 2.350771112964901, "learning_rate": 4.536698697124545e-06, "loss": 1.0261, "step": 15000 }, { "epoch": 0.5436917835526077, "grad_norm": 2.436592037600383, "learning_rate": 4.536114297066781e-06, "loss": 0.9661, "step": 15001 }, { "epoch": 0.5437280272552644, "grad_norm": 2.415725380075174, "learning_rate": 4.535529903401081e-06, "loss": 0.843, "step": 15002 }, { "epoch": 0.543764270957921, "grad_norm": 2.1950217875969886, "learning_rate": 4.534945516135497e-06, "loss": 0.9624, "step": 15003 }, { "epoch": 0.5438005146605778, "grad_norm": 2.3322315511746936, "learning_rate": 4.534361135278083e-06, "loss": 0.8682, "step": 15004 }, { "epoch": 0.5438367583632344, "grad_norm": 2.656285445787656, "learning_rate": 4.533776760836891e-06, "loss": 0.8366, "step": 15005 }, { "epoch": 0.5438730020658911, "grad_norm": 1.9946812376320842, "learning_rate": 4.533192392819974e-06, "loss": 0.851, "step": 15006 }, { "epoch": 0.5439092457685477, "grad_norm": 2.506724662198597, "learning_rate": 4.5326080312353824e-06, "loss": 0.9978, "step": 15007 }, { "epoch": 0.5439454894712044, "grad_norm": 2.0430015961908867, "learning_rate": 4.532023676091169e-06, "loss": 0.8681, "step": 15008 }, { "epoch": 0.543981733173861, "grad_norm": 2.338845612098733, "learning_rate": 4.531439327395387e-06, "loss": 0.9457, "step": 15009 }, { "epoch": 0.5440179768765177, "grad_norm": 2.0733181904921416, "learning_rate": 4.5308549851560885e-06, "loss": 0.862, "step": 15010 }, { "epoch": 0.5440542205791744, "grad_norm": 2.2201669580880434, "learning_rate": 4.530270649381324e-06, "loss": 0.7295, "step": 15011 }, { "epoch": 0.544090464281831, "grad_norm": 2.23691025797187, "learning_rate": 4.5296863200791454e-06, "loss": 0.9872, "step": 15012 }, { "epoch": 0.5441267079844877, "grad_norm": 2.4635041659575467, "learning_rate": 4.5291019972576055e-06, "loss": 0.7191, "step": 15013 }, { "epoch": 0.5441629516871443, "grad_norm": 2.107712619265381, "learning_rate": 4.528517680924755e-06, "loss": 0.9982, "step": 15014 }, { "epoch": 0.544199195389801, "grad_norm": 2.4753466434475646, "learning_rate": 4.5279333710886466e-06, "loss": 1.0289, "step": 15015 }, { "epoch": 0.5442354390924576, "grad_norm": 2.568401428962832, "learning_rate": 4.527349067757328e-06, "loss": 0.9787, "step": 15016 }, { "epoch": 0.5442716827951144, "grad_norm": 2.6497968745739744, "learning_rate": 4.526764770938856e-06, "loss": 0.8884, "step": 15017 }, { "epoch": 0.544307926497771, "grad_norm": 2.493352177735052, "learning_rate": 4.526180480641279e-06, "loss": 1.0089, "step": 15018 }, { "epoch": 0.5443441702004277, "grad_norm": 2.80338136852535, "learning_rate": 4.525596196872648e-06, "loss": 0.9205, "step": 15019 }, { "epoch": 0.5443804139030843, "grad_norm": 2.282643459696967, "learning_rate": 4.525011919641014e-06, "loss": 0.8077, "step": 15020 }, { "epoch": 0.544416657605741, "grad_norm": 2.176205582278444, "learning_rate": 4.524427648954429e-06, "loss": 0.9267, "step": 15021 }, { "epoch": 0.5444529013083976, "grad_norm": 2.291899262125774, "learning_rate": 4.523843384820944e-06, "loss": 1.019, "step": 15022 }, { "epoch": 0.5444891450110543, "grad_norm": 2.2609548232003394, "learning_rate": 4.523259127248609e-06, "loss": 0.8411, "step": 15023 }, { "epoch": 0.544525388713711, "grad_norm": 2.084409395413061, "learning_rate": 4.522674876245473e-06, "loss": 0.9708, "step": 15024 }, { "epoch": 0.5445616324163677, "grad_norm": 2.598972442180985, "learning_rate": 4.52209063181959e-06, "loss": 0.9643, "step": 15025 }, { "epoch": 0.5445978761190243, "grad_norm": 2.2942919612567545, "learning_rate": 4.52150639397901e-06, "loss": 0.7978, "step": 15026 }, { "epoch": 0.544634119821681, "grad_norm": 2.236251827290659, "learning_rate": 4.520922162731782e-06, "loss": 1.0098, "step": 15027 }, { "epoch": 0.5446703635243376, "grad_norm": 2.4778713534704364, "learning_rate": 4.520337938085954e-06, "loss": 0.769, "step": 15028 }, { "epoch": 0.5447066072269943, "grad_norm": 2.4796505009114354, "learning_rate": 4.519753720049582e-06, "loss": 0.9416, "step": 15029 }, { "epoch": 0.544742850929651, "grad_norm": 2.471460774524064, "learning_rate": 4.519169508630713e-06, "loss": 0.9784, "step": 15030 }, { "epoch": 0.5447790946323077, "grad_norm": 2.329192269134944, "learning_rate": 4.518585303837399e-06, "loss": 0.9448, "step": 15031 }, { "epoch": 0.5448153383349643, "grad_norm": 2.3334707939134542, "learning_rate": 4.5180011056776855e-06, "loss": 0.9346, "step": 15032 }, { "epoch": 0.544851582037621, "grad_norm": 2.3295165589564504, "learning_rate": 4.517416914159626e-06, "loss": 1.0129, "step": 15033 }, { "epoch": 0.5448878257402776, "grad_norm": 2.423892485215591, "learning_rate": 4.51683272929127e-06, "loss": 0.9428, "step": 15034 }, { "epoch": 0.5449240694429343, "grad_norm": 2.540848567815439, "learning_rate": 4.516248551080668e-06, "loss": 1.0358, "step": 15035 }, { "epoch": 0.5449603131455909, "grad_norm": 2.440567082903412, "learning_rate": 4.5156643795358655e-06, "loss": 0.9974, "step": 15036 }, { "epoch": 0.5449965568482477, "grad_norm": 2.396640010382316, "learning_rate": 4.515080214664917e-06, "loss": 0.8813, "step": 15037 }, { "epoch": 0.5450328005509043, "grad_norm": 2.335336037927896, "learning_rate": 4.51449605647587e-06, "loss": 0.8709, "step": 15038 }, { "epoch": 0.545069044253561, "grad_norm": 2.0429800102272377, "learning_rate": 4.513911904976773e-06, "loss": 0.7271, "step": 15039 }, { "epoch": 0.5451052879562176, "grad_norm": 2.5183521234022046, "learning_rate": 4.513327760175675e-06, "loss": 1.1325, "step": 15040 }, { "epoch": 0.5451415316588742, "grad_norm": 2.402064103662282, "learning_rate": 4.512743622080628e-06, "loss": 0.9057, "step": 15041 }, { "epoch": 0.5451777753615309, "grad_norm": 2.298306754633605, "learning_rate": 4.512159490699679e-06, "loss": 0.9192, "step": 15042 }, { "epoch": 0.5452140190641876, "grad_norm": 2.0098644548602604, "learning_rate": 4.511575366040877e-06, "loss": 0.8339, "step": 15043 }, { "epoch": 0.5452502627668443, "grad_norm": 2.2168853469566723, "learning_rate": 4.510991248112269e-06, "loss": 0.9537, "step": 15044 }, { "epoch": 0.5452865064695009, "grad_norm": 2.5792000496741094, "learning_rate": 4.510407136921909e-06, "loss": 1.0051, "step": 15045 }, { "epoch": 0.5453227501721576, "grad_norm": 2.2988664600622215, "learning_rate": 4.50982303247784e-06, "loss": 1.0081, "step": 15046 }, { "epoch": 0.5453589938748142, "grad_norm": 2.1824956731164287, "learning_rate": 4.509238934788115e-06, "loss": 0.8436, "step": 15047 }, { "epoch": 0.5453952375774709, "grad_norm": 2.2377389542280532, "learning_rate": 4.508654843860778e-06, "loss": 0.8056, "step": 15048 }, { "epoch": 0.5454314812801275, "grad_norm": 2.199057373354416, "learning_rate": 4.508070759703882e-06, "loss": 0.7409, "step": 15049 }, { "epoch": 0.5454677249827843, "grad_norm": 2.4388394450857893, "learning_rate": 4.507486682325475e-06, "loss": 1.0832, "step": 15050 }, { "epoch": 0.5455039686854409, "grad_norm": 2.456267554113492, "learning_rate": 4.506902611733602e-06, "loss": 0.9558, "step": 15051 }, { "epoch": 0.5455402123880976, "grad_norm": 2.2540907658860907, "learning_rate": 4.506318547936313e-06, "loss": 0.852, "step": 15052 }, { "epoch": 0.5455764560907542, "grad_norm": 2.1995133962187565, "learning_rate": 4.505734490941655e-06, "loss": 0.7831, "step": 15053 }, { "epoch": 0.5456126997934109, "grad_norm": 2.347163134029558, "learning_rate": 4.505150440757679e-06, "loss": 0.9799, "step": 15054 }, { "epoch": 0.5456489434960675, "grad_norm": 2.0259849355632635, "learning_rate": 4.5045663973924295e-06, "loss": 0.9168, "step": 15055 }, { "epoch": 0.5456851871987243, "grad_norm": 2.2028606814823295, "learning_rate": 4.503982360853953e-06, "loss": 0.9505, "step": 15056 }, { "epoch": 0.5457214309013809, "grad_norm": 2.483919039687193, "learning_rate": 4.5033983311503025e-06, "loss": 0.9922, "step": 15057 }, { "epoch": 0.5457576746040376, "grad_norm": 2.32083054664696, "learning_rate": 4.502814308289522e-06, "loss": 1.011, "step": 15058 }, { "epoch": 0.5457939183066942, "grad_norm": 2.152163703127924, "learning_rate": 4.50223029227966e-06, "loss": 0.7802, "step": 15059 }, { "epoch": 0.5458301620093509, "grad_norm": 2.286189531640463, "learning_rate": 4.501646283128762e-06, "loss": 0.7595, "step": 15060 }, { "epoch": 0.5458664057120075, "grad_norm": 2.034249610171475, "learning_rate": 4.5010622808448785e-06, "loss": 0.5859, "step": 15061 }, { "epoch": 0.5459026494146642, "grad_norm": 2.636055407095513, "learning_rate": 4.5004782854360545e-06, "loss": 1.0002, "step": 15062 }, { "epoch": 0.5459388931173209, "grad_norm": 2.42413808473887, "learning_rate": 4.499894296910338e-06, "loss": 1.0423, "step": 15063 }, { "epoch": 0.5459751368199776, "grad_norm": 2.2894518253622396, "learning_rate": 4.499310315275773e-06, "loss": 1.0789, "step": 15064 }, { "epoch": 0.5460113805226342, "grad_norm": 2.4984122274832834, "learning_rate": 4.498726340540412e-06, "loss": 0.8906, "step": 15065 }, { "epoch": 0.5460476242252909, "grad_norm": 2.075317669163314, "learning_rate": 4.498142372712299e-06, "loss": 1.0714, "step": 15066 }, { "epoch": 0.5460838679279475, "grad_norm": 2.4638064897402203, "learning_rate": 4.497558411799477e-06, "loss": 1.011, "step": 15067 }, { "epoch": 0.5461201116306041, "grad_norm": 2.3405437006245102, "learning_rate": 4.496974457809999e-06, "loss": 0.8594, "step": 15068 }, { "epoch": 0.5461563553332609, "grad_norm": 2.321445307934634, "learning_rate": 4.49639051075191e-06, "loss": 0.7595, "step": 15069 }, { "epoch": 0.5461925990359175, "grad_norm": 2.487710888252201, "learning_rate": 4.495806570633254e-06, "loss": 0.7914, "step": 15070 }, { "epoch": 0.5462288427385742, "grad_norm": 2.6472808701614, "learning_rate": 4.495222637462078e-06, "loss": 0.9943, "step": 15071 }, { "epoch": 0.5462650864412308, "grad_norm": 2.359303377807047, "learning_rate": 4.494638711246429e-06, "loss": 0.9209, "step": 15072 }, { "epoch": 0.5463013301438875, "grad_norm": 2.382079347869366, "learning_rate": 4.494054791994353e-06, "loss": 0.8381, "step": 15073 }, { "epoch": 0.5463375738465441, "grad_norm": 2.30933847424579, "learning_rate": 4.493470879713897e-06, "loss": 0.802, "step": 15074 }, { "epoch": 0.5463738175492008, "grad_norm": 2.098452684830265, "learning_rate": 4.492886974413103e-06, "loss": 0.7494, "step": 15075 }, { "epoch": 0.5464100612518575, "grad_norm": 2.290518173136347, "learning_rate": 4.492303076100022e-06, "loss": 0.9889, "step": 15076 }, { "epoch": 0.5464463049545142, "grad_norm": 2.987543611852758, "learning_rate": 4.491719184782698e-06, "loss": 0.9997, "step": 15077 }, { "epoch": 0.5464825486571708, "grad_norm": 2.4294750466292285, "learning_rate": 4.491135300469176e-06, "loss": 0.9782, "step": 15078 }, { "epoch": 0.5465187923598275, "grad_norm": 2.3120074739242558, "learning_rate": 4.490551423167501e-06, "loss": 0.8324, "step": 15079 }, { "epoch": 0.5465550360624841, "grad_norm": 2.0175919338060453, "learning_rate": 4.48996755288572e-06, "loss": 0.8517, "step": 15080 }, { "epoch": 0.5465912797651408, "grad_norm": 2.382555763764439, "learning_rate": 4.4893836896318774e-06, "loss": 0.9841, "step": 15081 }, { "epoch": 0.5466275234677974, "grad_norm": 2.33434939898392, "learning_rate": 4.488799833414019e-06, "loss": 0.9976, "step": 15082 }, { "epoch": 0.5466637671704542, "grad_norm": 2.329923387143701, "learning_rate": 4.488215984240187e-06, "loss": 0.9854, "step": 15083 }, { "epoch": 0.5467000108731108, "grad_norm": 2.22846989670257, "learning_rate": 4.487632142118433e-06, "loss": 1.0054, "step": 15084 }, { "epoch": 0.5467362545757675, "grad_norm": 2.2404595848171778, "learning_rate": 4.487048307056797e-06, "loss": 0.9718, "step": 15085 }, { "epoch": 0.5467724982784241, "grad_norm": 2.249133220674372, "learning_rate": 4.486464479063325e-06, "loss": 0.9227, "step": 15086 }, { "epoch": 0.5468087419810808, "grad_norm": 2.177332286504613, "learning_rate": 4.48588065814606e-06, "loss": 0.9902, "step": 15087 }, { "epoch": 0.5468449856837374, "grad_norm": 2.8314743467498102, "learning_rate": 4.4852968443130516e-06, "loss": 0.9377, "step": 15088 }, { "epoch": 0.5468812293863942, "grad_norm": 2.4507258850125284, "learning_rate": 4.48471303757234e-06, "loss": 0.9071, "step": 15089 }, { "epoch": 0.5469174730890508, "grad_norm": 2.587641640641993, "learning_rate": 4.484129237931972e-06, "loss": 1.1056, "step": 15090 }, { "epoch": 0.5469537167917075, "grad_norm": 2.157291238379342, "learning_rate": 4.483545445399989e-06, "loss": 0.8517, "step": 15091 }, { "epoch": 0.5469899604943641, "grad_norm": 2.37291702582595, "learning_rate": 4.48296165998444e-06, "loss": 0.8822, "step": 15092 }, { "epoch": 0.5470262041970207, "grad_norm": 2.4678718641280937, "learning_rate": 4.482377881693366e-06, "loss": 0.7351, "step": 15093 }, { "epoch": 0.5470624478996774, "grad_norm": 2.4308091065006336, "learning_rate": 4.4817941105348116e-06, "loss": 0.8944, "step": 15094 }, { "epoch": 0.547098691602334, "grad_norm": 2.0639859812416135, "learning_rate": 4.4812103465168184e-06, "loss": 0.7885, "step": 15095 }, { "epoch": 0.5471349353049908, "grad_norm": 2.227548235424295, "learning_rate": 4.480626589647436e-06, "loss": 0.788, "step": 15096 }, { "epoch": 0.5471711790076474, "grad_norm": 2.366440819009303, "learning_rate": 4.480042839934705e-06, "loss": 0.973, "step": 15097 }, { "epoch": 0.5472074227103041, "grad_norm": 2.3923196562744313, "learning_rate": 4.479459097386668e-06, "loss": 0.861, "step": 15098 }, { "epoch": 0.5472436664129607, "grad_norm": 2.391114509760376, "learning_rate": 4.4788753620113695e-06, "loss": 0.8927, "step": 15099 }, { "epoch": 0.5472799101156174, "grad_norm": 2.4688427652596197, "learning_rate": 4.4782916338168544e-06, "loss": 0.9169, "step": 15100 }, { "epoch": 0.547316153818274, "grad_norm": 2.4252388309202777, "learning_rate": 4.477707912811166e-06, "loss": 1.0032, "step": 15101 }, { "epoch": 0.5473523975209308, "grad_norm": 2.3358433631330224, "learning_rate": 4.477124199002345e-06, "loss": 0.9329, "step": 15102 }, { "epoch": 0.5473886412235874, "grad_norm": 2.4248225490186757, "learning_rate": 4.476540492398435e-06, "loss": 0.9502, "step": 15103 }, { "epoch": 0.5474248849262441, "grad_norm": 2.2071200542050575, "learning_rate": 4.475956793007483e-06, "loss": 0.8926, "step": 15104 }, { "epoch": 0.5474611286289007, "grad_norm": 2.3067155183830774, "learning_rate": 4.475373100837528e-06, "loss": 0.9304, "step": 15105 }, { "epoch": 0.5474973723315574, "grad_norm": 2.4898029876059886, "learning_rate": 4.474789415896616e-06, "loss": 0.969, "step": 15106 }, { "epoch": 0.547533616034214, "grad_norm": 2.297195241691885, "learning_rate": 4.474205738192787e-06, "loss": 0.7834, "step": 15107 }, { "epoch": 0.5475698597368707, "grad_norm": 2.0748404849076256, "learning_rate": 4.473622067734085e-06, "loss": 0.8081, "step": 15108 }, { "epoch": 0.5476061034395274, "grad_norm": 2.2102008615885995, "learning_rate": 4.473038404528553e-06, "loss": 0.8857, "step": 15109 }, { "epoch": 0.5476423471421841, "grad_norm": 2.398301484454588, "learning_rate": 4.472454748584233e-06, "loss": 0.8349, "step": 15110 }, { "epoch": 0.5476785908448407, "grad_norm": 2.288019876630744, "learning_rate": 4.471871099909166e-06, "loss": 0.7906, "step": 15111 }, { "epoch": 0.5477148345474974, "grad_norm": 2.2040971501548428, "learning_rate": 4.471287458511398e-06, "loss": 0.9921, "step": 15112 }, { "epoch": 0.547751078250154, "grad_norm": 2.3705630780904876, "learning_rate": 4.470703824398968e-06, "loss": 0.849, "step": 15113 }, { "epoch": 0.5477873219528107, "grad_norm": 2.4706253901004924, "learning_rate": 4.47012019757992e-06, "loss": 0.8915, "step": 15114 }, { "epoch": 0.5478235656554674, "grad_norm": 2.428130656288515, "learning_rate": 4.469536578062293e-06, "loss": 0.9766, "step": 15115 }, { "epoch": 0.5478598093581241, "grad_norm": 2.0560453874549434, "learning_rate": 4.468952965854133e-06, "loss": 0.8282, "step": 15116 }, { "epoch": 0.5478960530607807, "grad_norm": 2.2851145167843443, "learning_rate": 4.46836936096348e-06, "loss": 0.7851, "step": 15117 }, { "epoch": 0.5479322967634374, "grad_norm": 1.9859747057099872, "learning_rate": 4.467785763398376e-06, "loss": 0.6423, "step": 15118 }, { "epoch": 0.547968540466094, "grad_norm": 2.1459095238645496, "learning_rate": 4.467202173166861e-06, "loss": 0.9458, "step": 15119 }, { "epoch": 0.5480047841687506, "grad_norm": 2.0473755075058504, "learning_rate": 4.4666185902769785e-06, "loss": 0.7412, "step": 15120 }, { "epoch": 0.5480410278714073, "grad_norm": 2.539532473316606, "learning_rate": 4.466035014736771e-06, "loss": 0.8777, "step": 15121 }, { "epoch": 0.548077271574064, "grad_norm": 2.402332402659364, "learning_rate": 4.465451446554276e-06, "loss": 0.8425, "step": 15122 }, { "epoch": 0.5481135152767207, "grad_norm": 2.4571683196231535, "learning_rate": 4.464867885737535e-06, "loss": 0.8934, "step": 15123 }, { "epoch": 0.5481497589793773, "grad_norm": 2.5710764880778556, "learning_rate": 4.464284332294594e-06, "loss": 1.0615, "step": 15124 }, { "epoch": 0.548186002682034, "grad_norm": 2.1719825229771756, "learning_rate": 4.46370078623349e-06, "loss": 0.902, "step": 15125 }, { "epoch": 0.5482222463846906, "grad_norm": 2.5653871380060265, "learning_rate": 4.463117247562266e-06, "loss": 0.9417, "step": 15126 }, { "epoch": 0.5482584900873473, "grad_norm": 2.426259933207284, "learning_rate": 4.4625337162889585e-06, "loss": 1.0625, "step": 15127 }, { "epoch": 0.548294733790004, "grad_norm": 2.2569712413635346, "learning_rate": 4.4619501924216144e-06, "loss": 0.8027, "step": 15128 }, { "epoch": 0.5483309774926607, "grad_norm": 2.3036538392828008, "learning_rate": 4.46136667596827e-06, "loss": 0.8596, "step": 15129 }, { "epoch": 0.5483672211953173, "grad_norm": 2.445186348015942, "learning_rate": 4.460783166936967e-06, "loss": 0.9475, "step": 15130 }, { "epoch": 0.548403464897974, "grad_norm": 2.4666215619550567, "learning_rate": 4.460199665335745e-06, "loss": 0.881, "step": 15131 }, { "epoch": 0.5484397086006306, "grad_norm": 2.0839370872270435, "learning_rate": 4.4596161711726464e-06, "loss": 0.8444, "step": 15132 }, { "epoch": 0.5484759523032873, "grad_norm": 2.2691385994697297, "learning_rate": 4.45903268445571e-06, "loss": 0.7977, "step": 15133 }, { "epoch": 0.5485121960059439, "grad_norm": 2.1012852251043665, "learning_rate": 4.458449205192977e-06, "loss": 0.8719, "step": 15134 }, { "epoch": 0.5485484397086007, "grad_norm": 2.2972327917780455, "learning_rate": 4.457865733392483e-06, "loss": 0.8124, "step": 15135 }, { "epoch": 0.5485846834112573, "grad_norm": 2.3783963479216834, "learning_rate": 4.457282269062274e-06, "loss": 0.9085, "step": 15136 }, { "epoch": 0.548620927113914, "grad_norm": 2.383047078649458, "learning_rate": 4.456698812210387e-06, "loss": 0.7251, "step": 15137 }, { "epoch": 0.5486571708165706, "grad_norm": 2.6570492044757406, "learning_rate": 4.456115362844863e-06, "loss": 0.9494, "step": 15138 }, { "epoch": 0.5486934145192273, "grad_norm": 2.4227994969243536, "learning_rate": 4.455531920973738e-06, "loss": 0.8514, "step": 15139 }, { "epoch": 0.5487296582218839, "grad_norm": 2.436356758569994, "learning_rate": 4.454948486605055e-06, "loss": 0.8283, "step": 15140 }, { "epoch": 0.5487659019245406, "grad_norm": 2.4059515470337396, "learning_rate": 4.454365059746852e-06, "loss": 0.9355, "step": 15141 }, { "epoch": 0.5488021456271973, "grad_norm": 2.1505274064161872, "learning_rate": 4.4537816404071695e-06, "loss": 0.8216, "step": 15142 }, { "epoch": 0.548838389329854, "grad_norm": 2.4696229685592557, "learning_rate": 4.453198228594043e-06, "loss": 0.7855, "step": 15143 }, { "epoch": 0.5488746330325106, "grad_norm": 2.670125511699528, "learning_rate": 4.452614824315516e-06, "loss": 1.0807, "step": 15144 }, { "epoch": 0.5489108767351673, "grad_norm": 2.6235371936178717, "learning_rate": 4.452031427579626e-06, "loss": 0.8382, "step": 15145 }, { "epoch": 0.5489471204378239, "grad_norm": 2.5332210292191, "learning_rate": 4.45144803839441e-06, "loss": 0.848, "step": 15146 }, { "epoch": 0.5489833641404805, "grad_norm": 2.2456943014320476, "learning_rate": 4.4508646567679105e-06, "loss": 0.8355, "step": 15147 }, { "epoch": 0.5490196078431373, "grad_norm": 2.2715101796656043, "learning_rate": 4.450281282708163e-06, "loss": 0.8203, "step": 15148 }, { "epoch": 0.549055851545794, "grad_norm": 2.252639783056003, "learning_rate": 4.449697916223207e-06, "loss": 0.7377, "step": 15149 }, { "epoch": 0.5490920952484506, "grad_norm": 2.502503152388279, "learning_rate": 4.4491145573210785e-06, "loss": 0.9398, "step": 15150 }, { "epoch": 0.5491283389511072, "grad_norm": 2.4167851206014292, "learning_rate": 4.448531206009821e-06, "loss": 0.9851, "step": 15151 }, { "epoch": 0.5491645826537639, "grad_norm": 2.586438987150333, "learning_rate": 4.44794786229747e-06, "loss": 1.0351, "step": 15152 }, { "epoch": 0.5492008263564205, "grad_norm": 2.5452750798464425, "learning_rate": 4.447364526192063e-06, "loss": 0.9194, "step": 15153 }, { "epoch": 0.5492370700590772, "grad_norm": 2.5730477829469827, "learning_rate": 4.446781197701637e-06, "loss": 1.0178, "step": 15154 }, { "epoch": 0.5492733137617339, "grad_norm": 2.214653272068231, "learning_rate": 4.446197876834234e-06, "loss": 0.7793, "step": 15155 }, { "epoch": 0.5493095574643906, "grad_norm": 2.265491022341017, "learning_rate": 4.445614563597888e-06, "loss": 0.8883, "step": 15156 }, { "epoch": 0.5493458011670472, "grad_norm": 2.3691573580980494, "learning_rate": 4.445031258000639e-06, "loss": 0.8948, "step": 15157 }, { "epoch": 0.5493820448697039, "grad_norm": 2.3476587434356904, "learning_rate": 4.444447960050523e-06, "loss": 1.0394, "step": 15158 }, { "epoch": 0.5494182885723605, "grad_norm": 2.331520435366051, "learning_rate": 4.443864669755578e-06, "loss": 0.7724, "step": 15159 }, { "epoch": 0.5494545322750172, "grad_norm": 2.4301527030395196, "learning_rate": 4.443281387123842e-06, "loss": 1.0067, "step": 15160 }, { "epoch": 0.5494907759776739, "grad_norm": 2.8951263597265258, "learning_rate": 4.442698112163352e-06, "loss": 1.0001, "step": 15161 }, { "epoch": 0.5495270196803306, "grad_norm": 2.3297952759498024, "learning_rate": 4.442114844882143e-06, "loss": 0.7732, "step": 15162 }, { "epoch": 0.5495632633829872, "grad_norm": 2.1697720348472864, "learning_rate": 4.441531585288256e-06, "loss": 0.9054, "step": 15163 }, { "epoch": 0.5495995070856439, "grad_norm": 2.606725169698489, "learning_rate": 4.440948333389725e-06, "loss": 0.9739, "step": 15164 }, { "epoch": 0.5496357507883005, "grad_norm": 2.5019205837675993, "learning_rate": 4.4403650891945895e-06, "loss": 0.7644, "step": 15165 }, { "epoch": 0.5496719944909572, "grad_norm": 2.41038712300562, "learning_rate": 4.439781852710883e-06, "loss": 0.807, "step": 15166 }, { "epoch": 0.5497082381936138, "grad_norm": 2.2904601868563574, "learning_rate": 4.4391986239466436e-06, "loss": 0.9559, "step": 15167 }, { "epoch": 0.5497444818962706, "grad_norm": 2.2132973362102097, "learning_rate": 4.438615402909909e-06, "loss": 0.8493, "step": 15168 }, { "epoch": 0.5497807255989272, "grad_norm": 2.1247232960110907, "learning_rate": 4.4380321896087155e-06, "loss": 0.8777, "step": 15169 }, { "epoch": 0.5498169693015839, "grad_norm": 2.5124593842422094, "learning_rate": 4.4374489840510955e-06, "loss": 1.0123, "step": 15170 }, { "epoch": 0.5498532130042405, "grad_norm": 1.9900848241014697, "learning_rate": 4.436865786245091e-06, "loss": 0.9872, "step": 15171 }, { "epoch": 0.5498894567068972, "grad_norm": 2.2665463900652876, "learning_rate": 4.4362825961987344e-06, "loss": 0.857, "step": 15172 }, { "epoch": 0.5499257004095538, "grad_norm": 2.374992856722115, "learning_rate": 4.4356994139200625e-06, "loss": 0.8388, "step": 15173 }, { "epoch": 0.5499619441122106, "grad_norm": 2.387666152680364, "learning_rate": 4.435116239417111e-06, "loss": 0.7315, "step": 15174 }, { "epoch": 0.5499981878148672, "grad_norm": 2.3837631570439037, "learning_rate": 4.434533072697916e-06, "loss": 0.728, "step": 15175 }, { "epoch": 0.5500344315175238, "grad_norm": 2.3149857615587868, "learning_rate": 4.433949913770515e-06, "loss": 0.779, "step": 15176 }, { "epoch": 0.5500706752201805, "grad_norm": 2.1226032913679185, "learning_rate": 4.433366762642941e-06, "loss": 0.7278, "step": 15177 }, { "epoch": 0.5501069189228371, "grad_norm": 2.5324896680744473, "learning_rate": 4.43278361932323e-06, "loss": 1.0645, "step": 15178 }, { "epoch": 0.5501431626254938, "grad_norm": 2.158716237578665, "learning_rate": 4.432200483819419e-06, "loss": 1.0361, "step": 15179 }, { "epoch": 0.5501794063281504, "grad_norm": 2.444912835284261, "learning_rate": 4.43161735613954e-06, "loss": 0.9454, "step": 15180 }, { "epoch": 0.5502156500308072, "grad_norm": 2.705206934246323, "learning_rate": 4.4310342362916314e-06, "loss": 1.0605, "step": 15181 }, { "epoch": 0.5502518937334638, "grad_norm": 2.1730604269395286, "learning_rate": 4.430451124283725e-06, "loss": 0.7381, "step": 15182 }, { "epoch": 0.5502881374361205, "grad_norm": 2.4386333697669196, "learning_rate": 4.429868020123859e-06, "loss": 0.9431, "step": 15183 }, { "epoch": 0.5503243811387771, "grad_norm": 1.8718917279587455, "learning_rate": 4.429284923820068e-06, "loss": 0.8246, "step": 15184 }, { "epoch": 0.5503606248414338, "grad_norm": 2.2834726186096654, "learning_rate": 4.428701835380384e-06, "loss": 0.9354, "step": 15185 }, { "epoch": 0.5503968685440904, "grad_norm": 2.215969605844777, "learning_rate": 4.428118754812844e-06, "loss": 0.7716, "step": 15186 }, { "epoch": 0.5504331122467472, "grad_norm": 2.505448941969211, "learning_rate": 4.427535682125482e-06, "loss": 0.8452, "step": 15187 }, { "epoch": 0.5504693559494038, "grad_norm": 2.2917690093395517, "learning_rate": 4.426952617326332e-06, "loss": 1.0174, "step": 15188 }, { "epoch": 0.5505055996520605, "grad_norm": 1.941454009554878, "learning_rate": 4.426369560423428e-06, "loss": 0.7654, "step": 15189 }, { "epoch": 0.5505418433547171, "grad_norm": 2.37855891589621, "learning_rate": 4.425786511424803e-06, "loss": 0.8158, "step": 15190 }, { "epoch": 0.5505780870573738, "grad_norm": 2.1606608833632146, "learning_rate": 4.425203470338494e-06, "loss": 0.8973, "step": 15191 }, { "epoch": 0.5506143307600304, "grad_norm": 2.5163699118483436, "learning_rate": 4.424620437172534e-06, "loss": 0.8281, "step": 15192 }, { "epoch": 0.5506505744626871, "grad_norm": 7.641885104872358, "learning_rate": 4.4240374119349564e-06, "loss": 0.9754, "step": 15193 }, { "epoch": 0.5506868181653438, "grad_norm": 2.415776908318598, "learning_rate": 4.4234543946337946e-06, "loss": 0.8611, "step": 15194 }, { "epoch": 0.5507230618680005, "grad_norm": 2.418386266759532, "learning_rate": 4.422871385277082e-06, "loss": 0.893, "step": 15195 }, { "epoch": 0.5507593055706571, "grad_norm": 2.308210899354969, "learning_rate": 4.422288383872853e-06, "loss": 0.9339, "step": 15196 }, { "epoch": 0.5507955492733138, "grad_norm": 2.362501033153657, "learning_rate": 4.4217053904291414e-06, "loss": 0.8846, "step": 15197 }, { "epoch": 0.5508317929759704, "grad_norm": 2.3559918982737047, "learning_rate": 4.421122404953979e-06, "loss": 0.883, "step": 15198 }, { "epoch": 0.550868036678627, "grad_norm": 2.278043665769082, "learning_rate": 4.4205394274554e-06, "loss": 0.9688, "step": 15199 }, { "epoch": 0.5509042803812838, "grad_norm": 2.6722333343753357, "learning_rate": 4.419956457941437e-06, "loss": 0.9313, "step": 15200 }, { "epoch": 0.5509405240839405, "grad_norm": 2.1358076181903707, "learning_rate": 4.419373496420124e-06, "loss": 0.942, "step": 15201 }, { "epoch": 0.5509767677865971, "grad_norm": 2.2470429644829775, "learning_rate": 4.418790542899491e-06, "loss": 1.002, "step": 15202 }, { "epoch": 0.5510130114892537, "grad_norm": 2.177786483187431, "learning_rate": 4.418207597387575e-06, "loss": 0.8176, "step": 15203 }, { "epoch": 0.5510492551919104, "grad_norm": 2.6544771082539613, "learning_rate": 4.4176246598924064e-06, "loss": 1.1046, "step": 15204 }, { "epoch": 0.551085498894567, "grad_norm": 2.262422556666611, "learning_rate": 4.417041730422017e-06, "loss": 1.1482, "step": 15205 }, { "epoch": 0.5511217425972237, "grad_norm": 2.0741933147729292, "learning_rate": 4.41645880898444e-06, "loss": 1.0426, "step": 15206 }, { "epoch": 0.5511579862998804, "grad_norm": 2.3975122973769287, "learning_rate": 4.415875895587708e-06, "loss": 0.9604, "step": 15207 }, { "epoch": 0.5511942300025371, "grad_norm": 2.2267065370367223, "learning_rate": 4.4152929902398536e-06, "loss": 0.8538, "step": 15208 }, { "epoch": 0.5512304737051937, "grad_norm": 2.363918577353424, "learning_rate": 4.414710092948907e-06, "loss": 0.8648, "step": 15209 }, { "epoch": 0.5512667174078504, "grad_norm": 2.035384960120789, "learning_rate": 4.4141272037229e-06, "loss": 0.9651, "step": 15210 }, { "epoch": 0.551302961110507, "grad_norm": 2.2124899496995227, "learning_rate": 4.413544322569868e-06, "loss": 0.7105, "step": 15211 }, { "epoch": 0.5513392048131637, "grad_norm": 2.270632898269331, "learning_rate": 4.41296144949784e-06, "loss": 0.9826, "step": 15212 }, { "epoch": 0.5513754485158203, "grad_norm": 2.387130687207375, "learning_rate": 4.412378584514849e-06, "loss": 0.8681, "step": 15213 }, { "epoch": 0.5514116922184771, "grad_norm": 2.1949680942975838, "learning_rate": 4.411795727628924e-06, "loss": 0.9016, "step": 15214 }, { "epoch": 0.5514479359211337, "grad_norm": 2.1546920022227987, "learning_rate": 4.4112128788481e-06, "loss": 0.8343, "step": 15215 }, { "epoch": 0.5514841796237904, "grad_norm": 2.4560215010038475, "learning_rate": 4.410630038180406e-06, "loss": 0.879, "step": 15216 }, { "epoch": 0.551520423326447, "grad_norm": 2.28651818188565, "learning_rate": 4.410047205633873e-06, "loss": 0.8885, "step": 15217 }, { "epoch": 0.5515566670291037, "grad_norm": 2.359770583334334, "learning_rate": 4.4094643812165314e-06, "loss": 0.7795, "step": 15218 }, { "epoch": 0.5515929107317603, "grad_norm": 2.3051247063970934, "learning_rate": 4.408881564936415e-06, "loss": 0.7882, "step": 15219 }, { "epoch": 0.5516291544344171, "grad_norm": 2.2227476700035136, "learning_rate": 4.408298756801554e-06, "loss": 0.9309, "step": 15220 }, { "epoch": 0.5516653981370737, "grad_norm": 2.0991091660233274, "learning_rate": 4.407715956819978e-06, "loss": 0.9445, "step": 15221 }, { "epoch": 0.5517016418397304, "grad_norm": 2.657422811399363, "learning_rate": 4.407133164999715e-06, "loss": 0.9513, "step": 15222 }, { "epoch": 0.551737885542387, "grad_norm": 2.361934376248106, "learning_rate": 4.406550381348801e-06, "loss": 0.7141, "step": 15223 }, { "epoch": 0.5517741292450437, "grad_norm": 2.332491269964839, "learning_rate": 4.4059676058752645e-06, "loss": 0.9915, "step": 15224 }, { "epoch": 0.5518103729477003, "grad_norm": 2.349392653204833, "learning_rate": 4.405384838587135e-06, "loss": 0.9415, "step": 15225 }, { "epoch": 0.551846616650357, "grad_norm": 2.5677311771400317, "learning_rate": 4.404802079492441e-06, "loss": 0.975, "step": 15226 }, { "epoch": 0.5518828603530137, "grad_norm": 2.3418502057887984, "learning_rate": 4.404219328599216e-06, "loss": 0.8681, "step": 15227 }, { "epoch": 0.5519191040556704, "grad_norm": 2.5804133890540863, "learning_rate": 4.403636585915488e-06, "loss": 0.9811, "step": 15228 }, { "epoch": 0.551955347758327, "grad_norm": 2.3432974068305947, "learning_rate": 4.403053851449285e-06, "loss": 0.7759, "step": 15229 }, { "epoch": 0.5519915914609836, "grad_norm": 2.234204802611146, "learning_rate": 4.402471125208641e-06, "loss": 0.927, "step": 15230 }, { "epoch": 0.5520278351636403, "grad_norm": 2.498369237721181, "learning_rate": 4.401888407201584e-06, "loss": 0.8422, "step": 15231 }, { "epoch": 0.5520640788662969, "grad_norm": 2.251401837832164, "learning_rate": 4.401305697436143e-06, "loss": 1.077, "step": 15232 }, { "epoch": 0.5521003225689537, "grad_norm": 2.5767956705955526, "learning_rate": 4.400722995920346e-06, "loss": 1.0108, "step": 15233 }, { "epoch": 0.5521365662716103, "grad_norm": 1.9573377907582519, "learning_rate": 4.400140302662225e-06, "loss": 0.8976, "step": 15234 }, { "epoch": 0.552172809974267, "grad_norm": 1.9421578119723766, "learning_rate": 4.3995576176698075e-06, "loss": 0.7234, "step": 15235 }, { "epoch": 0.5522090536769236, "grad_norm": 2.172339267350209, "learning_rate": 4.398974940951123e-06, "loss": 0.7918, "step": 15236 }, { "epoch": 0.5522452973795803, "grad_norm": 2.5111140844422652, "learning_rate": 4.398392272514198e-06, "loss": 0.8634, "step": 15237 }, { "epoch": 0.5522815410822369, "grad_norm": 2.378584465939287, "learning_rate": 4.397809612367066e-06, "loss": 0.8871, "step": 15238 }, { "epoch": 0.5523177847848936, "grad_norm": 2.0970780652963197, "learning_rate": 4.3972269605177535e-06, "loss": 0.8016, "step": 15239 }, { "epoch": 0.5523540284875503, "grad_norm": 2.2970734120582827, "learning_rate": 4.396644316974289e-06, "loss": 1.0187, "step": 15240 }, { "epoch": 0.552390272190207, "grad_norm": 2.535783693183041, "learning_rate": 4.396061681744699e-06, "loss": 0.8892, "step": 15241 }, { "epoch": 0.5524265158928636, "grad_norm": 2.0157614258920913, "learning_rate": 4.395479054837015e-06, "loss": 0.7574, "step": 15242 }, { "epoch": 0.5524627595955203, "grad_norm": 2.084062523545129, "learning_rate": 4.394896436259265e-06, "loss": 0.7696, "step": 15243 }, { "epoch": 0.5524990032981769, "grad_norm": 2.1859950608579073, "learning_rate": 4.394313826019475e-06, "loss": 0.7621, "step": 15244 }, { "epoch": 0.5525352470008336, "grad_norm": 2.362266560304314, "learning_rate": 4.393731224125674e-06, "loss": 0.7375, "step": 15245 }, { "epoch": 0.5525714907034903, "grad_norm": 2.2899671802752963, "learning_rate": 4.393148630585891e-06, "loss": 0.889, "step": 15246 }, { "epoch": 0.552607734406147, "grad_norm": 2.278338530296892, "learning_rate": 4.3925660454081535e-06, "loss": 0.7312, "step": 15247 }, { "epoch": 0.5526439781088036, "grad_norm": 2.501275952402552, "learning_rate": 4.391983468600487e-06, "loss": 1.0122, "step": 15248 }, { "epoch": 0.5526802218114603, "grad_norm": 2.510269314278432, "learning_rate": 4.39140090017092e-06, "loss": 0.9871, "step": 15249 }, { "epoch": 0.5527164655141169, "grad_norm": 2.2511980893613486, "learning_rate": 4.390818340127481e-06, "loss": 0.8658, "step": 15250 }, { "epoch": 0.5527527092167736, "grad_norm": 2.2159699219292914, "learning_rate": 4.390235788478198e-06, "loss": 0.7995, "step": 15251 }, { "epoch": 0.5527889529194302, "grad_norm": 2.635744045639725, "learning_rate": 4.389653245231098e-06, "loss": 0.8282, "step": 15252 }, { "epoch": 0.552825196622087, "grad_norm": 2.3434246047949827, "learning_rate": 4.389070710394204e-06, "loss": 0.7388, "step": 15253 }, { "epoch": 0.5528614403247436, "grad_norm": 2.173127626352417, "learning_rate": 4.388488183975549e-06, "loss": 0.8722, "step": 15254 }, { "epoch": 0.5528976840274002, "grad_norm": 2.567317902082714, "learning_rate": 4.387905665983155e-06, "loss": 0.9234, "step": 15255 }, { "epoch": 0.5529339277300569, "grad_norm": 2.235108177152971, "learning_rate": 4.387323156425052e-06, "loss": 0.9421, "step": 15256 }, { "epoch": 0.5529701714327135, "grad_norm": 2.4379999751979726, "learning_rate": 4.386740655309263e-06, "loss": 0.7481, "step": 15257 }, { "epoch": 0.5530064151353702, "grad_norm": 2.3331481485196064, "learning_rate": 4.38615816264382e-06, "loss": 0.8515, "step": 15258 }, { "epoch": 0.553042658838027, "grad_norm": 2.2654680517112773, "learning_rate": 4.3855756784367455e-06, "loss": 0.7836, "step": 15259 }, { "epoch": 0.5530789025406836, "grad_norm": 2.2748340134485443, "learning_rate": 4.384993202696066e-06, "loss": 0.8067, "step": 15260 }, { "epoch": 0.5531151462433402, "grad_norm": 2.3941076137812125, "learning_rate": 4.384410735429807e-06, "loss": 0.8377, "step": 15261 }, { "epoch": 0.5531513899459969, "grad_norm": 2.1570797857800823, "learning_rate": 4.383828276645997e-06, "loss": 0.8895, "step": 15262 }, { "epoch": 0.5531876336486535, "grad_norm": 2.1155009052649003, "learning_rate": 4.383245826352661e-06, "loss": 0.584, "step": 15263 }, { "epoch": 0.5532238773513102, "grad_norm": 2.3530019055356224, "learning_rate": 4.382663384557825e-06, "loss": 0.8289, "step": 15264 }, { "epoch": 0.5532601210539668, "grad_norm": 2.6751807013293165, "learning_rate": 4.382080951269513e-06, "loss": 0.7903, "step": 15265 }, { "epoch": 0.5532963647566236, "grad_norm": 2.009008940867999, "learning_rate": 4.3814985264957526e-06, "loss": 0.8464, "step": 15266 }, { "epoch": 0.5533326084592802, "grad_norm": 2.5703489183641746, "learning_rate": 4.380916110244568e-06, "loss": 0.841, "step": 15267 }, { "epoch": 0.5533688521619369, "grad_norm": 1.9814027060080144, "learning_rate": 4.380333702523986e-06, "loss": 0.6562, "step": 15268 }, { "epoch": 0.5534050958645935, "grad_norm": 2.509509779386283, "learning_rate": 4.379751303342028e-06, "loss": 0.8803, "step": 15269 }, { "epoch": 0.5534413395672502, "grad_norm": 2.5054229530101013, "learning_rate": 4.379168912706725e-06, "loss": 0.9394, "step": 15270 }, { "epoch": 0.5534775832699068, "grad_norm": 2.5110521957795577, "learning_rate": 4.378586530626098e-06, "loss": 1.0677, "step": 15271 }, { "epoch": 0.5535138269725636, "grad_norm": 2.547144522309681, "learning_rate": 4.378004157108173e-06, "loss": 0.9579, "step": 15272 }, { "epoch": 0.5535500706752202, "grad_norm": 2.5865409170367983, "learning_rate": 4.377421792160973e-06, "loss": 1.0876, "step": 15273 }, { "epoch": 0.5535863143778769, "grad_norm": 2.5792391558914307, "learning_rate": 4.376839435792526e-06, "loss": 1.0093, "step": 15274 }, { "epoch": 0.5536225580805335, "grad_norm": 2.380954228986495, "learning_rate": 4.376257088010854e-06, "loss": 0.9192, "step": 15275 }, { "epoch": 0.5536588017831902, "grad_norm": 2.1642911668545914, "learning_rate": 4.375674748823982e-06, "loss": 0.8369, "step": 15276 }, { "epoch": 0.5536950454858468, "grad_norm": 2.77831917879684, "learning_rate": 4.375092418239933e-06, "loss": 0.9071, "step": 15277 }, { "epoch": 0.5537312891885035, "grad_norm": 2.245450842408566, "learning_rate": 4.3745100962667345e-06, "loss": 0.8223, "step": 15278 }, { "epoch": 0.5537675328911602, "grad_norm": 2.764080013737827, "learning_rate": 4.373927782912409e-06, "loss": 0.949, "step": 15279 }, { "epoch": 0.5538037765938169, "grad_norm": 2.1123706489041782, "learning_rate": 4.3733454781849794e-06, "loss": 0.7562, "step": 15280 }, { "epoch": 0.5538400202964735, "grad_norm": 2.2814294004333746, "learning_rate": 4.372763182092468e-06, "loss": 1.0108, "step": 15281 }, { "epoch": 0.5538762639991301, "grad_norm": 2.907959806936116, "learning_rate": 4.372180894642903e-06, "loss": 0.9606, "step": 15282 }, { "epoch": 0.5539125077017868, "grad_norm": 2.520930825736117, "learning_rate": 4.371598615844305e-06, "loss": 0.918, "step": 15283 }, { "epoch": 0.5539487514044434, "grad_norm": 2.387561675514586, "learning_rate": 4.371016345704698e-06, "loss": 0.9252, "step": 15284 }, { "epoch": 0.5539849951071001, "grad_norm": 2.520504621486017, "learning_rate": 4.370434084232105e-06, "loss": 0.8437, "step": 15285 }, { "epoch": 0.5540212388097568, "grad_norm": 2.34513844554007, "learning_rate": 4.3698518314345505e-06, "loss": 0.893, "step": 15286 }, { "epoch": 0.5540574825124135, "grad_norm": 2.4035187737533525, "learning_rate": 4.369269587320057e-06, "loss": 0.9159, "step": 15287 }, { "epoch": 0.5540937262150701, "grad_norm": 2.4045090759782926, "learning_rate": 4.368687351896647e-06, "loss": 0.9681, "step": 15288 }, { "epoch": 0.5541299699177268, "grad_norm": 2.348621067920709, "learning_rate": 4.36810512517234e-06, "loss": 0.8983, "step": 15289 }, { "epoch": 0.5541662136203834, "grad_norm": 2.2711781859387985, "learning_rate": 4.367522907155166e-06, "loss": 0.795, "step": 15290 }, { "epoch": 0.5542024573230401, "grad_norm": 1.9984710339673322, "learning_rate": 4.366940697853143e-06, "loss": 0.7124, "step": 15291 }, { "epoch": 0.5542387010256968, "grad_norm": 2.3010741835472452, "learning_rate": 4.366358497274295e-06, "loss": 0.9419, "step": 15292 }, { "epoch": 0.5542749447283535, "grad_norm": 2.1229198664811646, "learning_rate": 4.365776305426642e-06, "loss": 0.9205, "step": 15293 }, { "epoch": 0.5543111884310101, "grad_norm": 2.4018812917182406, "learning_rate": 4.365194122318209e-06, "loss": 0.8068, "step": 15294 }, { "epoch": 0.5543474321336668, "grad_norm": 2.239037342438304, "learning_rate": 4.364611947957017e-06, "loss": 0.7758, "step": 15295 }, { "epoch": 0.5543836758363234, "grad_norm": 2.911744761840645, "learning_rate": 4.364029782351088e-06, "loss": 0.9081, "step": 15296 }, { "epoch": 0.5544199195389801, "grad_norm": 2.3151219871323994, "learning_rate": 4.363447625508443e-06, "loss": 0.9358, "step": 15297 }, { "epoch": 0.5544561632416367, "grad_norm": 2.091935959304818, "learning_rate": 4.362865477437106e-06, "loss": 0.7457, "step": 15298 }, { "epoch": 0.5544924069442935, "grad_norm": 2.264095545320175, "learning_rate": 4.362283338145098e-06, "loss": 0.8394, "step": 15299 }, { "epoch": 0.5545286506469501, "grad_norm": 2.046241681722285, "learning_rate": 4.361701207640439e-06, "loss": 0.7287, "step": 15300 }, { "epoch": 0.5545648943496068, "grad_norm": 2.258835021321223, "learning_rate": 4.36111908593115e-06, "loss": 0.8703, "step": 15301 }, { "epoch": 0.5546011380522634, "grad_norm": 2.209551032792348, "learning_rate": 4.360536973025255e-06, "loss": 0.8513, "step": 15302 }, { "epoch": 0.5546373817549201, "grad_norm": 2.3864967432107047, "learning_rate": 4.359954868930775e-06, "loss": 1.0382, "step": 15303 }, { "epoch": 0.5546736254575767, "grad_norm": 2.490089106799214, "learning_rate": 4.359372773655729e-06, "loss": 0.9051, "step": 15304 }, { "epoch": 0.5547098691602335, "grad_norm": 2.3299150886620144, "learning_rate": 4.358790687208136e-06, "loss": 0.8932, "step": 15305 }, { "epoch": 0.5547461128628901, "grad_norm": 2.4785653022273837, "learning_rate": 4.358208609596023e-06, "loss": 0.7549, "step": 15306 }, { "epoch": 0.5547823565655468, "grad_norm": 2.143640955126918, "learning_rate": 4.357626540827405e-06, "loss": 0.9184, "step": 15307 }, { "epoch": 0.5548186002682034, "grad_norm": 2.2047306547928134, "learning_rate": 4.357044480910305e-06, "loss": 0.9411, "step": 15308 }, { "epoch": 0.55485484397086, "grad_norm": 2.3468094903635777, "learning_rate": 4.356462429852744e-06, "loss": 0.9384, "step": 15309 }, { "epoch": 0.5548910876735167, "grad_norm": 2.398006314203839, "learning_rate": 4.355880387662742e-06, "loss": 0.9862, "step": 15310 }, { "epoch": 0.5549273313761733, "grad_norm": 2.464677716672724, "learning_rate": 4.355298354348319e-06, "loss": 0.9236, "step": 15311 }, { "epoch": 0.5549635750788301, "grad_norm": 2.132434074844139, "learning_rate": 4.354716329917494e-06, "loss": 0.6561, "step": 15312 }, { "epoch": 0.5549998187814867, "grad_norm": 2.4536815047349467, "learning_rate": 4.354134314378287e-06, "loss": 0.8549, "step": 15313 }, { "epoch": 0.5550360624841434, "grad_norm": 2.1521057413560527, "learning_rate": 4.3535523077387195e-06, "loss": 0.9785, "step": 15314 }, { "epoch": 0.5550723061868, "grad_norm": 2.7435814038897446, "learning_rate": 4.352970310006811e-06, "loss": 0.875, "step": 15315 }, { "epoch": 0.5551085498894567, "grad_norm": 2.255956610203247, "learning_rate": 4.352388321190577e-06, "loss": 0.8213, "step": 15316 }, { "epoch": 0.5551447935921133, "grad_norm": 2.836916137578997, "learning_rate": 4.351806341298043e-06, "loss": 0.8296, "step": 15317 }, { "epoch": 0.5551810372947701, "grad_norm": 2.4091733608103736, "learning_rate": 4.351224370337226e-06, "loss": 0.9547, "step": 15318 }, { "epoch": 0.5552172809974267, "grad_norm": 2.5718932386455937, "learning_rate": 4.350642408316144e-06, "loss": 0.7453, "step": 15319 }, { "epoch": 0.5552535247000834, "grad_norm": 2.6316385679233814, "learning_rate": 4.350060455242816e-06, "loss": 0.9325, "step": 15320 }, { "epoch": 0.55528976840274, "grad_norm": 2.1181637456164712, "learning_rate": 4.349478511125264e-06, "loss": 1.021, "step": 15321 }, { "epoch": 0.5553260121053967, "grad_norm": 2.230957691412414, "learning_rate": 4.3488965759715035e-06, "loss": 1.0312, "step": 15322 }, { "epoch": 0.5553622558080533, "grad_norm": 2.5037233214838364, "learning_rate": 4.348314649789555e-06, "loss": 0.9079, "step": 15323 }, { "epoch": 0.55539849951071, "grad_norm": 2.384640851227362, "learning_rate": 4.347732732587434e-06, "loss": 0.7102, "step": 15324 }, { "epoch": 0.5554347432133667, "grad_norm": 2.4650677813862414, "learning_rate": 4.347150824373163e-06, "loss": 0.8707, "step": 15325 }, { "epoch": 0.5554709869160234, "grad_norm": 2.2820842947513613, "learning_rate": 4.3465689251547595e-06, "loss": 0.9083, "step": 15326 }, { "epoch": 0.55550723061868, "grad_norm": 2.152718713045436, "learning_rate": 4.345987034940241e-06, "loss": 0.9354, "step": 15327 }, { "epoch": 0.5555434743213367, "grad_norm": 2.202139942963153, "learning_rate": 4.345405153737624e-06, "loss": 0.8817, "step": 15328 }, { "epoch": 0.5555797180239933, "grad_norm": 2.512533656254397, "learning_rate": 4.34482328155493e-06, "loss": 1.0926, "step": 15329 }, { "epoch": 0.55561596172665, "grad_norm": 2.350898215162919, "learning_rate": 4.344241418400175e-06, "loss": 0.926, "step": 15330 }, { "epoch": 0.5556522054293067, "grad_norm": 2.0452997229547387, "learning_rate": 4.343659564281376e-06, "loss": 0.7851, "step": 15331 }, { "epoch": 0.5556884491319634, "grad_norm": 2.503619492953047, "learning_rate": 4.343077719206551e-06, "loss": 1.0545, "step": 15332 }, { "epoch": 0.55572469283462, "grad_norm": 2.337991719170835, "learning_rate": 4.342495883183718e-06, "loss": 0.9514, "step": 15333 }, { "epoch": 0.5557609365372767, "grad_norm": 2.4456225147504407, "learning_rate": 4.3419140562208935e-06, "loss": 0.9876, "step": 15334 }, { "epoch": 0.5557971802399333, "grad_norm": 2.142859606371198, "learning_rate": 4.341332238326097e-06, "loss": 0.9683, "step": 15335 }, { "epoch": 0.5558334239425899, "grad_norm": 2.2673018618316596, "learning_rate": 4.340750429507341e-06, "loss": 0.8022, "step": 15336 }, { "epoch": 0.5558696676452466, "grad_norm": 2.171850681004812, "learning_rate": 4.340168629772647e-06, "loss": 0.9789, "step": 15337 }, { "epoch": 0.5559059113479033, "grad_norm": 2.2714599028946973, "learning_rate": 4.339586839130031e-06, "loss": 0.9811, "step": 15338 }, { "epoch": 0.55594215505056, "grad_norm": 2.3188225412948347, "learning_rate": 4.339005057587508e-06, "loss": 0.882, "step": 15339 }, { "epoch": 0.5559783987532166, "grad_norm": 2.3147470701934814, "learning_rate": 4.3384232851530965e-06, "loss": 0.9035, "step": 15340 }, { "epoch": 0.5560146424558733, "grad_norm": 2.4137341575283346, "learning_rate": 4.3378415218348115e-06, "loss": 1.0164, "step": 15341 }, { "epoch": 0.5560508861585299, "grad_norm": 2.3060065182608387, "learning_rate": 4.3372597676406705e-06, "loss": 0.9275, "step": 15342 }, { "epoch": 0.5560871298611866, "grad_norm": 2.414055999337832, "learning_rate": 4.336678022578688e-06, "loss": 0.9368, "step": 15343 }, { "epoch": 0.5561233735638433, "grad_norm": 1.996548331305633, "learning_rate": 4.33609628665688e-06, "loss": 0.9747, "step": 15344 }, { "epoch": 0.5561596172665, "grad_norm": 2.5850960409815045, "learning_rate": 4.335514559883266e-06, "loss": 0.7845, "step": 15345 }, { "epoch": 0.5561958609691566, "grad_norm": 2.687244289113262, "learning_rate": 4.334932842265858e-06, "loss": 1.1029, "step": 15346 }, { "epoch": 0.5562321046718133, "grad_norm": 2.34162792467246, "learning_rate": 4.334351133812676e-06, "loss": 0.7245, "step": 15347 }, { "epoch": 0.5562683483744699, "grad_norm": 2.598129205276028, "learning_rate": 4.333769434531728e-06, "loss": 0.9064, "step": 15348 }, { "epoch": 0.5563045920771266, "grad_norm": 2.248152744937758, "learning_rate": 4.333187744431038e-06, "loss": 0.794, "step": 15349 }, { "epoch": 0.5563408357797832, "grad_norm": 2.0862813384742336, "learning_rate": 4.3326060635186166e-06, "loss": 0.8764, "step": 15350 }, { "epoch": 0.55637707948244, "grad_norm": 2.270933809527558, "learning_rate": 4.33202439180248e-06, "loss": 0.9248, "step": 15351 }, { "epoch": 0.5564133231850966, "grad_norm": 2.132734908139209, "learning_rate": 4.331442729290643e-06, "loss": 0.918, "step": 15352 }, { "epoch": 0.5564495668877533, "grad_norm": 2.1891268868469402, "learning_rate": 4.3308610759911216e-06, "loss": 0.9677, "step": 15353 }, { "epoch": 0.5564858105904099, "grad_norm": 2.602764855551996, "learning_rate": 4.3302794319119295e-06, "loss": 0.9703, "step": 15354 }, { "epoch": 0.5565220542930666, "grad_norm": 2.345870338599647, "learning_rate": 4.329697797061082e-06, "loss": 0.682, "step": 15355 }, { "epoch": 0.5565582979957232, "grad_norm": 2.1259410325582615, "learning_rate": 4.3291161714465914e-06, "loss": 0.9127, "step": 15356 }, { "epoch": 0.5565945416983799, "grad_norm": 2.1200466421377735, "learning_rate": 4.328534555076477e-06, "loss": 0.7665, "step": 15357 }, { "epoch": 0.5566307854010366, "grad_norm": 2.2883374085816195, "learning_rate": 4.327952947958749e-06, "loss": 0.9611, "step": 15358 }, { "epoch": 0.5566670291036933, "grad_norm": 2.4048062645474086, "learning_rate": 4.3273713501014245e-06, "loss": 0.8653, "step": 15359 }, { "epoch": 0.5567032728063499, "grad_norm": 2.613306923072979, "learning_rate": 4.3267897615125145e-06, "loss": 1.0102, "step": 15360 }, { "epoch": 0.5567395165090065, "grad_norm": 2.158523141952872, "learning_rate": 4.326208182200035e-06, "loss": 0.9199, "step": 15361 }, { "epoch": 0.5567757602116632, "grad_norm": 2.24314847447097, "learning_rate": 4.3256266121719995e-06, "loss": 0.8374, "step": 15362 }, { "epoch": 0.5568120039143198, "grad_norm": 2.0205981344510215, "learning_rate": 4.325045051436421e-06, "loss": 0.7314, "step": 15363 }, { "epoch": 0.5568482476169766, "grad_norm": 2.2334611596878258, "learning_rate": 4.324463500001311e-06, "loss": 0.9677, "step": 15364 }, { "epoch": 0.5568844913196332, "grad_norm": 2.4286004820409737, "learning_rate": 4.3238819578746885e-06, "loss": 0.7561, "step": 15365 }, { "epoch": 0.5569207350222899, "grad_norm": 2.257615640000352, "learning_rate": 4.323300425064564e-06, "loss": 0.7211, "step": 15366 }, { "epoch": 0.5569569787249465, "grad_norm": 2.5948160593466874, "learning_rate": 4.3227189015789495e-06, "loss": 0.8662, "step": 15367 }, { "epoch": 0.5569932224276032, "grad_norm": 2.4465753501957774, "learning_rate": 4.3221373874258575e-06, "loss": 0.9321, "step": 15368 }, { "epoch": 0.5570294661302598, "grad_norm": 2.4175561199420144, "learning_rate": 4.3215558826133045e-06, "loss": 0.8769, "step": 15369 }, { "epoch": 0.5570657098329165, "grad_norm": 2.3411213603622767, "learning_rate": 4.320974387149301e-06, "loss": 0.7669, "step": 15370 }, { "epoch": 0.5571019535355732, "grad_norm": 2.3321970740256095, "learning_rate": 4.320392901041858e-06, "loss": 0.9391, "step": 15371 }, { "epoch": 0.5571381972382299, "grad_norm": 2.3683409274006464, "learning_rate": 4.31981142429899e-06, "loss": 0.9184, "step": 15372 }, { "epoch": 0.5571744409408865, "grad_norm": 2.4224798694358887, "learning_rate": 4.319229956928711e-06, "loss": 0.9176, "step": 15373 }, { "epoch": 0.5572106846435432, "grad_norm": 2.5685794385224106, "learning_rate": 4.318648498939032e-06, "loss": 1.0419, "step": 15374 }, { "epoch": 0.5572469283461998, "grad_norm": 2.3151158359795754, "learning_rate": 4.3180670503379635e-06, "loss": 0.8735, "step": 15375 }, { "epoch": 0.5572831720488565, "grad_norm": 2.380133495327206, "learning_rate": 4.317485611133517e-06, "loss": 0.9244, "step": 15376 }, { "epoch": 0.5573194157515132, "grad_norm": 2.5317553330377627, "learning_rate": 4.316904181333707e-06, "loss": 1.0464, "step": 15377 }, { "epoch": 0.5573556594541699, "grad_norm": 2.2259174283569054, "learning_rate": 4.316322760946545e-06, "loss": 0.939, "step": 15378 }, { "epoch": 0.5573919031568265, "grad_norm": 2.5964817728958187, "learning_rate": 4.3157413499800425e-06, "loss": 0.9345, "step": 15379 }, { "epoch": 0.5574281468594832, "grad_norm": 2.385530991524391, "learning_rate": 4.3151599484422086e-06, "loss": 0.9279, "step": 15380 }, { "epoch": 0.5574643905621398, "grad_norm": 2.0221726529081834, "learning_rate": 4.314578556341058e-06, "loss": 0.8918, "step": 15381 }, { "epoch": 0.5575006342647965, "grad_norm": 2.522996871784111, "learning_rate": 4.3139971736845995e-06, "loss": 0.932, "step": 15382 }, { "epoch": 0.5575368779674531, "grad_norm": 2.4157230341007385, "learning_rate": 4.313415800480846e-06, "loss": 1.0039, "step": 15383 }, { "epoch": 0.5575731216701099, "grad_norm": 2.5646989281499266, "learning_rate": 4.312834436737804e-06, "loss": 0.9652, "step": 15384 }, { "epoch": 0.5576093653727665, "grad_norm": 2.367072764196778, "learning_rate": 4.312253082463491e-06, "loss": 0.9204, "step": 15385 }, { "epoch": 0.5576456090754232, "grad_norm": 2.0563312793577397, "learning_rate": 4.311671737665915e-06, "loss": 0.8779, "step": 15386 }, { "epoch": 0.5576818527780798, "grad_norm": 2.297953504702273, "learning_rate": 4.3110904023530855e-06, "loss": 0.9173, "step": 15387 }, { "epoch": 0.5577180964807364, "grad_norm": 2.382350089254143, "learning_rate": 4.310509076533013e-06, "loss": 0.828, "step": 15388 }, { "epoch": 0.5577543401833931, "grad_norm": 2.301976173344842, "learning_rate": 4.30992776021371e-06, "loss": 0.9866, "step": 15389 }, { "epoch": 0.5577905838860499, "grad_norm": 2.3151443054784884, "learning_rate": 4.309346453403184e-06, "loss": 0.8589, "step": 15390 }, { "epoch": 0.5578268275887065, "grad_norm": 2.2817540059986894, "learning_rate": 4.308765156109445e-06, "loss": 0.7944, "step": 15391 }, { "epoch": 0.5578630712913631, "grad_norm": 2.4915486593555123, "learning_rate": 4.308183868340506e-06, "loss": 0.9262, "step": 15392 }, { "epoch": 0.5578993149940198, "grad_norm": 2.201373835813109, "learning_rate": 4.307602590104374e-06, "loss": 0.8044, "step": 15393 }, { "epoch": 0.5579355586966764, "grad_norm": 2.717196984380684, "learning_rate": 4.3070213214090595e-06, "loss": 0.963, "step": 15394 }, { "epoch": 0.5579718023993331, "grad_norm": 2.477802269512425, "learning_rate": 4.306440062262571e-06, "loss": 1.041, "step": 15395 }, { "epoch": 0.5580080461019897, "grad_norm": 2.376961505711413, "learning_rate": 4.305858812672921e-06, "loss": 0.9462, "step": 15396 }, { "epoch": 0.5580442898046465, "grad_norm": 2.217454696476828, "learning_rate": 4.305277572648116e-06, "loss": 0.8195, "step": 15397 }, { "epoch": 0.5580805335073031, "grad_norm": 2.713060329678366, "learning_rate": 4.304696342196166e-06, "loss": 1.0502, "step": 15398 }, { "epoch": 0.5581167772099598, "grad_norm": 2.389218629719136, "learning_rate": 4.3041151213250795e-06, "loss": 0.8451, "step": 15399 }, { "epoch": 0.5581530209126164, "grad_norm": 2.267389114931648, "learning_rate": 4.303533910042867e-06, "loss": 1.0321, "step": 15400 }, { "epoch": 0.5581892646152731, "grad_norm": 2.4884878724948587, "learning_rate": 4.302952708357535e-06, "loss": 0.8675, "step": 15401 }, { "epoch": 0.5582255083179297, "grad_norm": 2.457054360582619, "learning_rate": 4.302371516277094e-06, "loss": 1.051, "step": 15402 }, { "epoch": 0.5582617520205865, "grad_norm": 2.1827091242336274, "learning_rate": 4.301790333809549e-06, "loss": 1.0891, "step": 15403 }, { "epoch": 0.5582979957232431, "grad_norm": 2.277383007550685, "learning_rate": 4.301209160962913e-06, "loss": 0.825, "step": 15404 }, { "epoch": 0.5583342394258998, "grad_norm": 2.2823576807516877, "learning_rate": 4.300627997745192e-06, "loss": 0.9072, "step": 15405 }, { "epoch": 0.5583704831285564, "grad_norm": 2.4183781096969303, "learning_rate": 4.300046844164395e-06, "loss": 1.0272, "step": 15406 }, { "epoch": 0.5584067268312131, "grad_norm": 2.096294677251607, "learning_rate": 4.2994657002285275e-06, "loss": 0.8232, "step": 15407 }, { "epoch": 0.5584429705338697, "grad_norm": 2.7054947120091, "learning_rate": 4.298884565945601e-06, "loss": 0.9343, "step": 15408 }, { "epoch": 0.5584792142365264, "grad_norm": 2.188762069630064, "learning_rate": 4.29830344132362e-06, "loss": 0.7272, "step": 15409 }, { "epoch": 0.5585154579391831, "grad_norm": 2.181359070746349, "learning_rate": 4.297722326370593e-06, "loss": 0.8761, "step": 15410 }, { "epoch": 0.5585517016418398, "grad_norm": 2.4188531097762787, "learning_rate": 4.297141221094527e-06, "loss": 0.9375, "step": 15411 }, { "epoch": 0.5585879453444964, "grad_norm": 2.340265016973712, "learning_rate": 4.29656012550343e-06, "loss": 0.9781, "step": 15412 }, { "epoch": 0.558624189047153, "grad_norm": 2.279618515199376, "learning_rate": 4.295979039605311e-06, "loss": 0.6976, "step": 15413 }, { "epoch": 0.5586604327498097, "grad_norm": 2.2946561470801496, "learning_rate": 4.295397963408174e-06, "loss": 0.8327, "step": 15414 }, { "epoch": 0.5586966764524663, "grad_norm": 2.3005284932384678, "learning_rate": 4.294816896920025e-06, "loss": 0.8008, "step": 15415 }, { "epoch": 0.5587329201551231, "grad_norm": 2.1373551669482618, "learning_rate": 4.294235840148875e-06, "loss": 0.895, "step": 15416 }, { "epoch": 0.5587691638577797, "grad_norm": 2.2357690681209657, "learning_rate": 4.293654793102728e-06, "loss": 0.8548, "step": 15417 }, { "epoch": 0.5588054075604364, "grad_norm": 2.335023700767328, "learning_rate": 4.293073755789591e-06, "loss": 1.0055, "step": 15418 }, { "epoch": 0.558841651263093, "grad_norm": 2.4863442196683607, "learning_rate": 4.292492728217469e-06, "loss": 0.9011, "step": 15419 }, { "epoch": 0.5588778949657497, "grad_norm": 2.174053790501343, "learning_rate": 4.29191171039437e-06, "loss": 0.8543, "step": 15420 }, { "epoch": 0.5589141386684063, "grad_norm": 2.2704514179245883, "learning_rate": 4.2913307023283e-06, "loss": 0.7766, "step": 15421 }, { "epoch": 0.558950382371063, "grad_norm": 2.3226962873292547, "learning_rate": 4.290749704027264e-06, "loss": 0.7934, "step": 15422 }, { "epoch": 0.5589866260737197, "grad_norm": 2.476528772083828, "learning_rate": 4.290168715499266e-06, "loss": 1.0336, "step": 15423 }, { "epoch": 0.5590228697763764, "grad_norm": 2.368890783709347, "learning_rate": 4.289587736752316e-06, "loss": 0.7787, "step": 15424 }, { "epoch": 0.559059113479033, "grad_norm": 2.377159978043614, "learning_rate": 4.289006767794417e-06, "loss": 1.0458, "step": 15425 }, { "epoch": 0.5590953571816897, "grad_norm": 2.40344546869866, "learning_rate": 4.2884258086335755e-06, "loss": 0.9685, "step": 15426 }, { "epoch": 0.5591316008843463, "grad_norm": 3.1169916509771807, "learning_rate": 4.287844859277795e-06, "loss": 0.8847, "step": 15427 }, { "epoch": 0.559167844587003, "grad_norm": 2.225758998636139, "learning_rate": 4.287263919735082e-06, "loss": 0.9641, "step": 15428 }, { "epoch": 0.5592040882896596, "grad_norm": 2.3376138497082626, "learning_rate": 4.286682990013441e-06, "loss": 0.9507, "step": 15429 }, { "epoch": 0.5592403319923164, "grad_norm": 2.5814997708449403, "learning_rate": 4.286102070120877e-06, "loss": 0.9043, "step": 15430 }, { "epoch": 0.559276575694973, "grad_norm": 3.0415753823752696, "learning_rate": 4.285521160065393e-06, "loss": 1.1209, "step": 15431 }, { "epoch": 0.5593128193976297, "grad_norm": 2.818375200368629, "learning_rate": 4.2849402598549975e-06, "loss": 0.95, "step": 15432 }, { "epoch": 0.5593490631002863, "grad_norm": 2.1586763326319045, "learning_rate": 4.284359369497692e-06, "loss": 0.7998, "step": 15433 }, { "epoch": 0.559385306802943, "grad_norm": 2.160483285147935, "learning_rate": 4.283778489001483e-06, "loss": 0.8946, "step": 15434 }, { "epoch": 0.5594215505055996, "grad_norm": 2.1634446703188814, "learning_rate": 4.28319761837437e-06, "loss": 0.8206, "step": 15435 }, { "epoch": 0.5594577942082564, "grad_norm": 2.9514106344892053, "learning_rate": 4.282616757624363e-06, "loss": 1.0026, "step": 15436 }, { "epoch": 0.559494037910913, "grad_norm": 2.084498661711327, "learning_rate": 4.282035906759463e-06, "loss": 0.7657, "step": 15437 }, { "epoch": 0.5595302816135697, "grad_norm": 2.398558723275672, "learning_rate": 4.281455065787674e-06, "loss": 0.9137, "step": 15438 }, { "epoch": 0.5595665253162263, "grad_norm": 2.484742801587363, "learning_rate": 4.280874234716999e-06, "loss": 0.7958, "step": 15439 }, { "epoch": 0.559602769018883, "grad_norm": 2.332824210600877, "learning_rate": 4.280293413555443e-06, "loss": 0.9269, "step": 15440 }, { "epoch": 0.5596390127215396, "grad_norm": 2.2011693878362157, "learning_rate": 4.279712602311008e-06, "loss": 1.0808, "step": 15441 }, { "epoch": 0.5596752564241962, "grad_norm": 2.609412148024497, "learning_rate": 4.279131800991698e-06, "loss": 1.0698, "step": 15442 }, { "epoch": 0.559711500126853, "grad_norm": 2.609309076348709, "learning_rate": 4.278551009605514e-06, "loss": 0.8083, "step": 15443 }, { "epoch": 0.5597477438295096, "grad_norm": 2.6899547963051402, "learning_rate": 4.277970228160462e-06, "loss": 0.9363, "step": 15444 }, { "epoch": 0.5597839875321663, "grad_norm": 2.4116449810030045, "learning_rate": 4.2773894566645446e-06, "loss": 0.8986, "step": 15445 }, { "epoch": 0.5598202312348229, "grad_norm": 2.593719660966548, "learning_rate": 4.2768086951257625e-06, "loss": 0.9134, "step": 15446 }, { "epoch": 0.5598564749374796, "grad_norm": 2.361534302885196, "learning_rate": 4.276227943552119e-06, "loss": 1.019, "step": 15447 }, { "epoch": 0.5598927186401362, "grad_norm": 2.3798000791241076, "learning_rate": 4.275647201951618e-06, "loss": 0.9634, "step": 15448 }, { "epoch": 0.559928962342793, "grad_norm": 2.4426153637304053, "learning_rate": 4.275066470332259e-06, "loss": 0.8813, "step": 15449 }, { "epoch": 0.5599652060454496, "grad_norm": 2.2555499193207313, "learning_rate": 4.274485748702047e-06, "loss": 1.0732, "step": 15450 }, { "epoch": 0.5600014497481063, "grad_norm": 2.5565587258992992, "learning_rate": 4.27390503706898e-06, "loss": 0.8424, "step": 15451 }, { "epoch": 0.5600376934507629, "grad_norm": 2.3213619623698394, "learning_rate": 4.2733243354410646e-06, "loss": 0.9228, "step": 15452 }, { "epoch": 0.5600739371534196, "grad_norm": 2.10608593158286, "learning_rate": 4.272743643826301e-06, "loss": 0.8723, "step": 15453 }, { "epoch": 0.5601101808560762, "grad_norm": 2.2091590981852347, "learning_rate": 4.27216296223269e-06, "loss": 0.8496, "step": 15454 }, { "epoch": 0.5601464245587329, "grad_norm": 2.5050206077518626, "learning_rate": 4.27158229066823e-06, "loss": 0.8144, "step": 15455 }, { "epoch": 0.5601826682613896, "grad_norm": 2.3978983963129172, "learning_rate": 4.271001629140928e-06, "loss": 0.9408, "step": 15456 }, { "epoch": 0.5602189119640463, "grad_norm": 2.2397197186800866, "learning_rate": 4.270420977658783e-06, "loss": 1.0702, "step": 15457 }, { "epoch": 0.5602551556667029, "grad_norm": 2.7680264788688667, "learning_rate": 4.269840336229796e-06, "loss": 0.9354, "step": 15458 }, { "epoch": 0.5602913993693596, "grad_norm": 2.2264947629385765, "learning_rate": 4.269259704861966e-06, "loss": 0.9494, "step": 15459 }, { "epoch": 0.5603276430720162, "grad_norm": 2.5417905939728525, "learning_rate": 4.268679083563296e-06, "loss": 0.996, "step": 15460 }, { "epoch": 0.5603638867746729, "grad_norm": 2.145559617196184, "learning_rate": 4.2680984723417865e-06, "loss": 0.8633, "step": 15461 }, { "epoch": 0.5604001304773296, "grad_norm": 2.367833874770119, "learning_rate": 4.267517871205438e-06, "loss": 0.9368, "step": 15462 }, { "epoch": 0.5604363741799863, "grad_norm": 2.3474942701141313, "learning_rate": 4.266937280162247e-06, "loss": 0.8542, "step": 15463 }, { "epoch": 0.5604726178826429, "grad_norm": 2.2560564463556654, "learning_rate": 4.26635669922022e-06, "loss": 0.8376, "step": 15464 }, { "epoch": 0.5605088615852996, "grad_norm": 2.147668264813152, "learning_rate": 4.265776128387353e-06, "loss": 0.7157, "step": 15465 }, { "epoch": 0.5605451052879562, "grad_norm": 2.5760185381606506, "learning_rate": 4.265195567671648e-06, "loss": 0.9422, "step": 15466 }, { "epoch": 0.5605813489906128, "grad_norm": 2.213740547708355, "learning_rate": 4.264615017081102e-06, "loss": 1.005, "step": 15467 }, { "epoch": 0.5606175926932695, "grad_norm": 2.5306751582908014, "learning_rate": 4.264034476623717e-06, "loss": 1.1314, "step": 15468 }, { "epoch": 0.5606538363959263, "grad_norm": 2.268966373232828, "learning_rate": 4.263453946307493e-06, "loss": 0.9852, "step": 15469 }, { "epoch": 0.5606900800985829, "grad_norm": 2.57855671782441, "learning_rate": 4.262873426140427e-06, "loss": 0.8877, "step": 15470 }, { "epoch": 0.5607263238012395, "grad_norm": 2.328975571771351, "learning_rate": 4.262292916130518e-06, "loss": 0.7133, "step": 15471 }, { "epoch": 0.5607625675038962, "grad_norm": 2.6381217908465104, "learning_rate": 4.261712416285768e-06, "loss": 1.0026, "step": 15472 }, { "epoch": 0.5607988112065528, "grad_norm": 2.238857799850009, "learning_rate": 4.261131926614175e-06, "loss": 0.8276, "step": 15473 }, { "epoch": 0.5608350549092095, "grad_norm": 2.4108324358227216, "learning_rate": 4.2605514471237355e-06, "loss": 1.1283, "step": 15474 }, { "epoch": 0.5608712986118662, "grad_norm": 2.638882502004746, "learning_rate": 4.259970977822451e-06, "loss": 1.0272, "step": 15475 }, { "epoch": 0.5609075423145229, "grad_norm": 2.2915761593315036, "learning_rate": 4.2593905187183195e-06, "loss": 0.8706, "step": 15476 }, { "epoch": 0.5609437860171795, "grad_norm": 2.2370527043750084, "learning_rate": 4.258810069819338e-06, "loss": 0.9963, "step": 15477 }, { "epoch": 0.5609800297198362, "grad_norm": 2.3159093791344834, "learning_rate": 4.258229631133504e-06, "loss": 0.7879, "step": 15478 }, { "epoch": 0.5610162734224928, "grad_norm": 2.5622786457645206, "learning_rate": 4.257649202668819e-06, "loss": 0.968, "step": 15479 }, { "epoch": 0.5610525171251495, "grad_norm": 2.2967114645682924, "learning_rate": 4.257068784433279e-06, "loss": 0.9597, "step": 15480 }, { "epoch": 0.5610887608278061, "grad_norm": 2.293053728528811, "learning_rate": 4.256488376434882e-06, "loss": 0.7657, "step": 15481 }, { "epoch": 0.5611250045304629, "grad_norm": 2.2866161315575377, "learning_rate": 4.255907978681623e-06, "loss": 0.8315, "step": 15482 }, { "epoch": 0.5611612482331195, "grad_norm": 2.326796103908929, "learning_rate": 4.255327591181504e-06, "loss": 0.8913, "step": 15483 }, { "epoch": 0.5611974919357762, "grad_norm": 2.291567264789597, "learning_rate": 4.254747213942521e-06, "loss": 0.7503, "step": 15484 }, { "epoch": 0.5612337356384328, "grad_norm": 2.5525172501292386, "learning_rate": 4.25416684697267e-06, "loss": 0.8456, "step": 15485 }, { "epoch": 0.5612699793410895, "grad_norm": 2.377126278730296, "learning_rate": 4.253586490279949e-06, "loss": 0.7578, "step": 15486 }, { "epoch": 0.5613062230437461, "grad_norm": 2.2804491844868906, "learning_rate": 4.253006143872355e-06, "loss": 1.0551, "step": 15487 }, { "epoch": 0.5613424667464028, "grad_norm": 2.4656855281921066, "learning_rate": 4.252425807757885e-06, "loss": 0.9253, "step": 15488 }, { "epoch": 0.5613787104490595, "grad_norm": 2.640454916670241, "learning_rate": 4.251845481944534e-06, "loss": 0.8589, "step": 15489 }, { "epoch": 0.5614149541517162, "grad_norm": 2.780412830798004, "learning_rate": 4.251265166440299e-06, "loss": 0.907, "step": 15490 }, { "epoch": 0.5614511978543728, "grad_norm": 2.2858829039722983, "learning_rate": 4.250684861253178e-06, "loss": 0.8551, "step": 15491 }, { "epoch": 0.5614874415570295, "grad_norm": 1.9629333827218585, "learning_rate": 4.2501045663911675e-06, "loss": 0.8063, "step": 15492 }, { "epoch": 0.5615236852596861, "grad_norm": 2.472319218599712, "learning_rate": 4.249524281862263e-06, "loss": 0.9754, "step": 15493 }, { "epoch": 0.5615599289623427, "grad_norm": 2.2417892514317117, "learning_rate": 4.248944007674458e-06, "loss": 0.7975, "step": 15494 }, { "epoch": 0.5615961726649995, "grad_norm": 2.43846030818467, "learning_rate": 4.2483637438357515e-06, "loss": 0.6665, "step": 15495 }, { "epoch": 0.5616324163676562, "grad_norm": 2.1335643156581483, "learning_rate": 4.247783490354138e-06, "loss": 0.76, "step": 15496 }, { "epoch": 0.5616686600703128, "grad_norm": 2.726276624494346, "learning_rate": 4.247203247237613e-06, "loss": 0.9694, "step": 15497 }, { "epoch": 0.5617049037729694, "grad_norm": 2.4822705311920616, "learning_rate": 4.24662301449417e-06, "loss": 0.8653, "step": 15498 }, { "epoch": 0.5617411474756261, "grad_norm": 2.0079132516323326, "learning_rate": 4.246042792131808e-06, "loss": 0.9093, "step": 15499 }, { "epoch": 0.5617773911782827, "grad_norm": 2.1025542099260957, "learning_rate": 4.245462580158522e-06, "loss": 0.8811, "step": 15500 }, { "epoch": 0.5618136348809394, "grad_norm": 2.428742242759399, "learning_rate": 4.244882378582304e-06, "loss": 0.9041, "step": 15501 }, { "epoch": 0.5618498785835961, "grad_norm": 2.171607562100504, "learning_rate": 4.244302187411148e-06, "loss": 0.7788, "step": 15502 }, { "epoch": 0.5618861222862528, "grad_norm": 2.0238753624704087, "learning_rate": 4.243722006653053e-06, "loss": 0.822, "step": 15503 }, { "epoch": 0.5619223659889094, "grad_norm": 2.541649397462669, "learning_rate": 4.243141836316012e-06, "loss": 0.9233, "step": 15504 }, { "epoch": 0.5619586096915661, "grad_norm": 2.1021998918755416, "learning_rate": 4.242561676408018e-06, "loss": 0.7779, "step": 15505 }, { "epoch": 0.5619948533942227, "grad_norm": 2.461948505244314, "learning_rate": 4.241981526937065e-06, "loss": 0.8922, "step": 15506 }, { "epoch": 0.5620310970968794, "grad_norm": 2.151167715328639, "learning_rate": 4.241401387911149e-06, "loss": 1.0503, "step": 15507 }, { "epoch": 0.5620673407995361, "grad_norm": 2.3891280009527565, "learning_rate": 4.240821259338264e-06, "loss": 0.8066, "step": 15508 }, { "epoch": 0.5621035845021928, "grad_norm": 2.4652210491692044, "learning_rate": 4.240241141226402e-06, "loss": 0.8443, "step": 15509 }, { "epoch": 0.5621398282048494, "grad_norm": 2.5312160175526337, "learning_rate": 4.2396610335835555e-06, "loss": 0.8566, "step": 15510 }, { "epoch": 0.5621760719075061, "grad_norm": 2.4615972116300315, "learning_rate": 4.239080936417723e-06, "loss": 0.9408, "step": 15511 }, { "epoch": 0.5622123156101627, "grad_norm": 2.378241622253818, "learning_rate": 4.238500849736894e-06, "loss": 0.8516, "step": 15512 }, { "epoch": 0.5622485593128194, "grad_norm": 2.6233672816721003, "learning_rate": 4.237920773549064e-06, "loss": 0.9661, "step": 15513 }, { "epoch": 0.562284803015476, "grad_norm": 1.9066793272811375, "learning_rate": 4.237340707862223e-06, "loss": 0.7949, "step": 15514 }, { "epoch": 0.5623210467181328, "grad_norm": 2.6714125821692485, "learning_rate": 4.236760652684368e-06, "loss": 0.946, "step": 15515 }, { "epoch": 0.5623572904207894, "grad_norm": 2.490019149110787, "learning_rate": 4.236180608023488e-06, "loss": 0.9081, "step": 15516 }, { "epoch": 0.5623935341234461, "grad_norm": 2.5487666615259834, "learning_rate": 4.235600573887578e-06, "loss": 1.0351, "step": 15517 }, { "epoch": 0.5624297778261027, "grad_norm": 2.291622320768891, "learning_rate": 4.235020550284627e-06, "loss": 0.8909, "step": 15518 }, { "epoch": 0.5624660215287594, "grad_norm": 2.2855939392030957, "learning_rate": 4.234440537222633e-06, "loss": 0.7923, "step": 15519 }, { "epoch": 0.562502265231416, "grad_norm": 2.395202184653925, "learning_rate": 4.233860534709585e-06, "loss": 0.848, "step": 15520 }, { "epoch": 0.5625385089340728, "grad_norm": 2.3605630760918683, "learning_rate": 4.233280542753476e-06, "loss": 0.8577, "step": 15521 }, { "epoch": 0.5625747526367294, "grad_norm": 2.1424659886342683, "learning_rate": 4.2327005613622954e-06, "loss": 0.9005, "step": 15522 }, { "epoch": 0.562610996339386, "grad_norm": 2.5874078799971176, "learning_rate": 4.232120590544038e-06, "loss": 0.829, "step": 15523 }, { "epoch": 0.5626472400420427, "grad_norm": 2.4174543764810608, "learning_rate": 4.231540630306696e-06, "loss": 0.9646, "step": 15524 }, { "epoch": 0.5626834837446993, "grad_norm": 2.077094046515802, "learning_rate": 4.2309606806582584e-06, "loss": 0.7238, "step": 15525 }, { "epoch": 0.562719727447356, "grad_norm": 2.5243572858839918, "learning_rate": 4.230380741606717e-06, "loss": 0.8058, "step": 15526 }, { "epoch": 0.5627559711500126, "grad_norm": 2.489144363418138, "learning_rate": 4.229800813160064e-06, "loss": 0.9871, "step": 15527 }, { "epoch": 0.5627922148526694, "grad_norm": 2.2092883385333395, "learning_rate": 4.22922089532629e-06, "loss": 0.9102, "step": 15528 }, { "epoch": 0.562828458555326, "grad_norm": 2.6935516785102602, "learning_rate": 4.228640988113386e-06, "loss": 0.9568, "step": 15529 }, { "epoch": 0.5628647022579827, "grad_norm": 2.4798373347993787, "learning_rate": 4.22806109152934e-06, "loss": 0.8389, "step": 15530 }, { "epoch": 0.5629009459606393, "grad_norm": 2.436973994976563, "learning_rate": 4.227481205582149e-06, "loss": 0.9267, "step": 15531 }, { "epoch": 0.562937189663296, "grad_norm": 2.2921861142285422, "learning_rate": 4.226901330279799e-06, "loss": 0.7589, "step": 15532 }, { "epoch": 0.5629734333659526, "grad_norm": 2.2266665110248107, "learning_rate": 4.2263214656302805e-06, "loss": 0.9617, "step": 15533 }, { "epoch": 0.5630096770686094, "grad_norm": 2.18719010038041, "learning_rate": 4.225741611641584e-06, "loss": 0.881, "step": 15534 }, { "epoch": 0.563045920771266, "grad_norm": 2.32801915115162, "learning_rate": 4.2251617683217014e-06, "loss": 1.2183, "step": 15535 }, { "epoch": 0.5630821644739227, "grad_norm": 2.3757182555934517, "learning_rate": 4.22458193567862e-06, "loss": 0.8195, "step": 15536 }, { "epoch": 0.5631184081765793, "grad_norm": 3.1426062933689694, "learning_rate": 4.2240021137203304e-06, "loss": 0.9344, "step": 15537 }, { "epoch": 0.563154651879236, "grad_norm": 2.477633363146414, "learning_rate": 4.223422302454821e-06, "loss": 0.9836, "step": 15538 }, { "epoch": 0.5631908955818926, "grad_norm": 2.303254195388714, "learning_rate": 4.222842501890085e-06, "loss": 0.969, "step": 15539 }, { "epoch": 0.5632271392845493, "grad_norm": 1.925419663969825, "learning_rate": 4.222262712034108e-06, "loss": 0.7605, "step": 15540 }, { "epoch": 0.563263382987206, "grad_norm": 2.2953734368113143, "learning_rate": 4.221682932894882e-06, "loss": 0.9361, "step": 15541 }, { "epoch": 0.5632996266898627, "grad_norm": 2.614811014813178, "learning_rate": 4.2211031644803915e-06, "loss": 0.8907, "step": 15542 }, { "epoch": 0.5633358703925193, "grad_norm": 3.1570154937691006, "learning_rate": 4.22052340679863e-06, "loss": 0.8968, "step": 15543 }, { "epoch": 0.563372114095176, "grad_norm": 2.2674567554445932, "learning_rate": 4.2199436598575846e-06, "loss": 0.9297, "step": 15544 }, { "epoch": 0.5634083577978326, "grad_norm": 2.4963501335423546, "learning_rate": 4.219363923665245e-06, "loss": 0.8888, "step": 15545 }, { "epoch": 0.5634446015004893, "grad_norm": 2.2941279684546854, "learning_rate": 4.2187841982295966e-06, "loss": 0.8583, "step": 15546 }, { "epoch": 0.563480845203146, "grad_norm": 2.449972903671118, "learning_rate": 4.218204483558631e-06, "loss": 0.8917, "step": 15547 }, { "epoch": 0.5635170889058027, "grad_norm": 2.392844027945295, "learning_rate": 4.217624779660334e-06, "loss": 0.9046, "step": 15548 }, { "epoch": 0.5635533326084593, "grad_norm": 2.3432098644365187, "learning_rate": 4.217045086542695e-06, "loss": 0.8766, "step": 15549 }, { "epoch": 0.563589576311116, "grad_norm": 1.9819015835842522, "learning_rate": 4.2164654042136984e-06, "loss": 0.9058, "step": 15550 }, { "epoch": 0.5636258200137726, "grad_norm": 2.1083258324762055, "learning_rate": 4.215885732681337e-06, "loss": 1.0034, "step": 15551 }, { "epoch": 0.5636620637164292, "grad_norm": 2.307317744691491, "learning_rate": 4.215306071953596e-06, "loss": 0.9509, "step": 15552 }, { "epoch": 0.5636983074190859, "grad_norm": 2.4176071679663154, "learning_rate": 4.214726422038462e-06, "loss": 1.0136, "step": 15553 }, { "epoch": 0.5637345511217426, "grad_norm": 2.1234469525604345, "learning_rate": 4.214146782943924e-06, "loss": 0.833, "step": 15554 }, { "epoch": 0.5637707948243993, "grad_norm": 2.9174448234341455, "learning_rate": 4.213567154677968e-06, "loss": 0.7243, "step": 15555 }, { "epoch": 0.5638070385270559, "grad_norm": 1.9685104399875377, "learning_rate": 4.2129875372485815e-06, "loss": 0.8995, "step": 15556 }, { "epoch": 0.5638432822297126, "grad_norm": 2.5106755522706865, "learning_rate": 4.212407930663748e-06, "loss": 0.9161, "step": 15557 }, { "epoch": 0.5638795259323692, "grad_norm": 2.304204609939467, "learning_rate": 4.211828334931459e-06, "loss": 0.8199, "step": 15558 }, { "epoch": 0.5639157696350259, "grad_norm": 2.4109221994855177, "learning_rate": 4.2112487500596985e-06, "loss": 0.8807, "step": 15559 }, { "epoch": 0.5639520133376825, "grad_norm": 2.0345756220994993, "learning_rate": 4.210669176056454e-06, "loss": 0.7752, "step": 15560 }, { "epoch": 0.5639882570403393, "grad_norm": 2.763735114002893, "learning_rate": 4.21008961292971e-06, "loss": 0.8603, "step": 15561 }, { "epoch": 0.5640245007429959, "grad_norm": 2.7725346881725144, "learning_rate": 4.209510060687454e-06, "loss": 0.989, "step": 15562 }, { "epoch": 0.5640607444456526, "grad_norm": 2.3296486483728676, "learning_rate": 4.208930519337671e-06, "loss": 1.005, "step": 15563 }, { "epoch": 0.5640969881483092, "grad_norm": 2.525510059043541, "learning_rate": 4.208350988888348e-06, "loss": 0.824, "step": 15564 }, { "epoch": 0.5641332318509659, "grad_norm": 2.254765742921057, "learning_rate": 4.207771469347466e-06, "loss": 0.887, "step": 15565 }, { "epoch": 0.5641694755536225, "grad_norm": 2.5271260121673267, "learning_rate": 4.207191960723017e-06, "loss": 0.9183, "step": 15566 }, { "epoch": 0.5642057192562793, "grad_norm": 2.2580954126230166, "learning_rate": 4.206612463022983e-06, "loss": 1.0499, "step": 15567 }, { "epoch": 0.5642419629589359, "grad_norm": 2.278768657749863, "learning_rate": 4.20603297625535e-06, "loss": 0.6445, "step": 15568 }, { "epoch": 0.5642782066615926, "grad_norm": 2.599004714861308, "learning_rate": 4.205453500428101e-06, "loss": 0.8489, "step": 15569 }, { "epoch": 0.5643144503642492, "grad_norm": 2.506626927752088, "learning_rate": 4.204874035549222e-06, "loss": 0.856, "step": 15570 }, { "epoch": 0.5643506940669059, "grad_norm": 2.1327892435478044, "learning_rate": 4.2042945816267e-06, "loss": 0.742, "step": 15571 }, { "epoch": 0.5643869377695625, "grad_norm": 2.2799118952193553, "learning_rate": 4.203715138668516e-06, "loss": 0.7674, "step": 15572 }, { "epoch": 0.5644231814722191, "grad_norm": 2.430173706795157, "learning_rate": 4.203135706682656e-06, "loss": 0.9443, "step": 15573 }, { "epoch": 0.5644594251748759, "grad_norm": 2.0501731048520138, "learning_rate": 4.2025562856771045e-06, "loss": 0.85, "step": 15574 }, { "epoch": 0.5644956688775326, "grad_norm": 2.233903708452999, "learning_rate": 4.201976875659845e-06, "loss": 0.7215, "step": 15575 }, { "epoch": 0.5645319125801892, "grad_norm": 2.112775772117442, "learning_rate": 4.201397476638862e-06, "loss": 0.9581, "step": 15576 }, { "epoch": 0.5645681562828458, "grad_norm": 2.510992069759499, "learning_rate": 4.2008180886221364e-06, "loss": 0.878, "step": 15577 }, { "epoch": 0.5646043999855025, "grad_norm": 2.187212025468156, "learning_rate": 4.200238711617656e-06, "loss": 0.9736, "step": 15578 }, { "epoch": 0.5646406436881591, "grad_norm": 2.1266139047493087, "learning_rate": 4.199659345633403e-06, "loss": 0.9574, "step": 15579 }, { "epoch": 0.5646768873908159, "grad_norm": 2.2132775142214474, "learning_rate": 4.19907999067736e-06, "loss": 0.8508, "step": 15580 }, { "epoch": 0.5647131310934725, "grad_norm": 2.18818033356068, "learning_rate": 4.198500646757509e-06, "loss": 0.9411, "step": 15581 }, { "epoch": 0.5647493747961292, "grad_norm": 2.196220290099176, "learning_rate": 4.197921313881836e-06, "loss": 0.9776, "step": 15582 }, { "epoch": 0.5647856184987858, "grad_norm": 2.503247614948205, "learning_rate": 4.197341992058321e-06, "loss": 0.8717, "step": 15583 }, { "epoch": 0.5648218622014425, "grad_norm": 2.4149815667283376, "learning_rate": 4.196762681294949e-06, "loss": 0.9753, "step": 15584 }, { "epoch": 0.5648581059040991, "grad_norm": 2.4288376739317616, "learning_rate": 4.1961833815997e-06, "loss": 0.9646, "step": 15585 }, { "epoch": 0.5648943496067558, "grad_norm": 2.5598568917540803, "learning_rate": 4.1956040929805585e-06, "loss": 0.9439, "step": 15586 }, { "epoch": 0.5649305933094125, "grad_norm": 2.255655789557023, "learning_rate": 4.195024815445507e-06, "loss": 0.9955, "step": 15587 }, { "epoch": 0.5649668370120692, "grad_norm": 2.078453859648802, "learning_rate": 4.194445549002526e-06, "loss": 1.0015, "step": 15588 }, { "epoch": 0.5650030807147258, "grad_norm": 2.2301159980508514, "learning_rate": 4.193866293659597e-06, "loss": 0.7677, "step": 15589 }, { "epoch": 0.5650393244173825, "grad_norm": 2.4239809834484296, "learning_rate": 4.193287049424704e-06, "loss": 0.9159, "step": 15590 }, { "epoch": 0.5650755681200391, "grad_norm": 2.2970274013347187, "learning_rate": 4.192707816305828e-06, "loss": 0.8993, "step": 15591 }, { "epoch": 0.5651118118226958, "grad_norm": 2.494697088247408, "learning_rate": 4.19212859431095e-06, "loss": 0.9495, "step": 15592 }, { "epoch": 0.5651480555253525, "grad_norm": 2.7867835140385084, "learning_rate": 4.1915493834480505e-06, "loss": 0.9612, "step": 15593 }, { "epoch": 0.5651842992280092, "grad_norm": 2.3680574232779437, "learning_rate": 4.190970183725113e-06, "loss": 0.8644, "step": 15594 }, { "epoch": 0.5652205429306658, "grad_norm": 2.4830441602768634, "learning_rate": 4.1903909951501165e-06, "loss": 0.9736, "step": 15595 }, { "epoch": 0.5652567866333225, "grad_norm": 2.0795338355951833, "learning_rate": 4.189811817731043e-06, "loss": 1.0036, "step": 15596 }, { "epoch": 0.5652930303359791, "grad_norm": 2.39646145372658, "learning_rate": 4.18923265147587e-06, "loss": 0.872, "step": 15597 }, { "epoch": 0.5653292740386358, "grad_norm": 2.4753133565635017, "learning_rate": 4.188653496392583e-06, "loss": 0.9894, "step": 15598 }, { "epoch": 0.5653655177412924, "grad_norm": 2.2644246617001493, "learning_rate": 4.188074352489161e-06, "loss": 1.0929, "step": 15599 }, { "epoch": 0.5654017614439492, "grad_norm": 2.070270604910965, "learning_rate": 4.1874952197735815e-06, "loss": 0.8834, "step": 15600 }, { "epoch": 0.5654380051466058, "grad_norm": 2.296572004896761, "learning_rate": 4.186916098253827e-06, "loss": 0.8839, "step": 15601 }, { "epoch": 0.5654742488492625, "grad_norm": 2.1767449484396137, "learning_rate": 4.186336987937878e-06, "loss": 0.7022, "step": 15602 }, { "epoch": 0.5655104925519191, "grad_norm": 2.420616260526114, "learning_rate": 4.185757888833712e-06, "loss": 0.9445, "step": 15603 }, { "epoch": 0.5655467362545757, "grad_norm": 2.4094232922343433, "learning_rate": 4.185178800949311e-06, "loss": 0.8287, "step": 15604 }, { "epoch": 0.5655829799572324, "grad_norm": 2.4146496180731134, "learning_rate": 4.18459972429265e-06, "loss": 0.9229, "step": 15605 }, { "epoch": 0.5656192236598891, "grad_norm": 2.238671315113632, "learning_rate": 4.184020658871715e-06, "loss": 0.8849, "step": 15606 }, { "epoch": 0.5656554673625458, "grad_norm": 2.4480892538889614, "learning_rate": 4.183441604694481e-06, "loss": 1.0697, "step": 15607 }, { "epoch": 0.5656917110652024, "grad_norm": 2.2658975728355117, "learning_rate": 4.182862561768928e-06, "loss": 0.6902, "step": 15608 }, { "epoch": 0.5657279547678591, "grad_norm": 2.2745901710572376, "learning_rate": 4.182283530103033e-06, "loss": 0.7794, "step": 15609 }, { "epoch": 0.5657641984705157, "grad_norm": 2.271128612912743, "learning_rate": 4.181704509704778e-06, "loss": 0.8692, "step": 15610 }, { "epoch": 0.5658004421731724, "grad_norm": 2.453902868416134, "learning_rate": 4.181125500582141e-06, "loss": 0.8544, "step": 15611 }, { "epoch": 0.565836685875829, "grad_norm": 2.324184453127793, "learning_rate": 4.180546502743099e-06, "loss": 0.9278, "step": 15612 }, { "epoch": 0.5658729295784858, "grad_norm": 2.3351953658697027, "learning_rate": 4.179967516195629e-06, "loss": 0.9433, "step": 15613 }, { "epoch": 0.5659091732811424, "grad_norm": 2.4211314598057245, "learning_rate": 4.179388540947712e-06, "loss": 1.0068, "step": 15614 }, { "epoch": 0.5659454169837991, "grad_norm": 2.5575608054451404, "learning_rate": 4.178809577007324e-06, "loss": 0.9174, "step": 15615 }, { "epoch": 0.5659816606864557, "grad_norm": 2.2312156731387702, "learning_rate": 4.178230624382444e-06, "loss": 0.7142, "step": 15616 }, { "epoch": 0.5660179043891124, "grad_norm": 2.6665156621033046, "learning_rate": 4.177651683081047e-06, "loss": 0.9225, "step": 15617 }, { "epoch": 0.566054148091769, "grad_norm": 2.4849785472873465, "learning_rate": 4.1770727531111146e-06, "loss": 0.7897, "step": 15618 }, { "epoch": 0.5660903917944258, "grad_norm": 2.072476598741413, "learning_rate": 4.176493834480622e-06, "loss": 0.798, "step": 15619 }, { "epoch": 0.5661266354970824, "grad_norm": 2.362136745379298, "learning_rate": 4.175914927197546e-06, "loss": 0.8423, "step": 15620 }, { "epoch": 0.5661628791997391, "grad_norm": 2.489029550060739, "learning_rate": 4.175336031269863e-06, "loss": 0.978, "step": 15621 }, { "epoch": 0.5661991229023957, "grad_norm": 2.3155423284926395, "learning_rate": 4.174757146705552e-06, "loss": 1.0118, "step": 15622 }, { "epoch": 0.5662353666050524, "grad_norm": 2.3939308943347495, "learning_rate": 4.174178273512588e-06, "loss": 1.0437, "step": 15623 }, { "epoch": 0.566271610307709, "grad_norm": 2.471170064725668, "learning_rate": 4.173599411698948e-06, "loss": 0.8584, "step": 15624 }, { "epoch": 0.5663078540103657, "grad_norm": 2.3936482576996454, "learning_rate": 4.173020561272606e-06, "loss": 0.9951, "step": 15625 }, { "epoch": 0.5663440977130224, "grad_norm": 2.3946823110633777, "learning_rate": 4.172441722241543e-06, "loss": 1.0237, "step": 15626 }, { "epoch": 0.5663803414156791, "grad_norm": 2.550592533289819, "learning_rate": 4.171862894613733e-06, "loss": 0.7761, "step": 15627 }, { "epoch": 0.5664165851183357, "grad_norm": 2.5704272553601677, "learning_rate": 4.171284078397151e-06, "loss": 0.8436, "step": 15628 }, { "epoch": 0.5664528288209923, "grad_norm": 2.330662147612999, "learning_rate": 4.170705273599771e-06, "loss": 0.868, "step": 15629 }, { "epoch": 0.566489072523649, "grad_norm": 2.524613034272548, "learning_rate": 4.1701264802295735e-06, "loss": 0.9749, "step": 15630 }, { "epoch": 0.5665253162263056, "grad_norm": 2.440757318321808, "learning_rate": 4.169547698294531e-06, "loss": 0.9737, "step": 15631 }, { "epoch": 0.5665615599289623, "grad_norm": 2.3692328981931525, "learning_rate": 4.168968927802618e-06, "loss": 0.9282, "step": 15632 }, { "epoch": 0.566597803631619, "grad_norm": 2.4085533484074384, "learning_rate": 4.16839016876181e-06, "loss": 0.8755, "step": 15633 }, { "epoch": 0.5666340473342757, "grad_norm": 2.4524754458747395, "learning_rate": 4.167811421180084e-06, "loss": 0.8941, "step": 15634 }, { "epoch": 0.5666702910369323, "grad_norm": 2.302145545932792, "learning_rate": 4.167232685065413e-06, "loss": 0.844, "step": 15635 }, { "epoch": 0.566706534739589, "grad_norm": 2.3229897349806556, "learning_rate": 4.1666539604257706e-06, "loss": 0.8741, "step": 15636 }, { "epoch": 0.5667427784422456, "grad_norm": 2.316661937041517, "learning_rate": 4.166075247269134e-06, "loss": 1.0667, "step": 15637 }, { "epoch": 0.5667790221449023, "grad_norm": 2.1975471028924516, "learning_rate": 4.165496545603476e-06, "loss": 0.9813, "step": 15638 }, { "epoch": 0.566815265847559, "grad_norm": 2.1087020733845563, "learning_rate": 4.1649178554367715e-06, "loss": 0.7936, "step": 15639 }, { "epoch": 0.5668515095502157, "grad_norm": 2.3338005237506034, "learning_rate": 4.1643391767769925e-06, "loss": 0.7404, "step": 15640 }, { "epoch": 0.5668877532528723, "grad_norm": 2.5479841593364827, "learning_rate": 4.163760509632116e-06, "loss": 0.835, "step": 15641 }, { "epoch": 0.566923996955529, "grad_norm": 2.244910401513323, "learning_rate": 4.1631818540101126e-06, "loss": 0.7996, "step": 15642 }, { "epoch": 0.5669602406581856, "grad_norm": 2.526663098345338, "learning_rate": 4.162603209918959e-06, "loss": 0.8811, "step": 15643 }, { "epoch": 0.5669964843608423, "grad_norm": 2.3900146810907743, "learning_rate": 4.162024577366622e-06, "loss": 0.8291, "step": 15644 }, { "epoch": 0.5670327280634989, "grad_norm": 2.4066552158980774, "learning_rate": 4.161445956361084e-06, "loss": 0.8091, "step": 15645 }, { "epoch": 0.5670689717661557, "grad_norm": 2.3560857136101157, "learning_rate": 4.160867346910312e-06, "loss": 0.817, "step": 15646 }, { "epoch": 0.5671052154688123, "grad_norm": 2.524058857059252, "learning_rate": 4.160288749022282e-06, "loss": 0.9132, "step": 15647 }, { "epoch": 0.567141459171469, "grad_norm": 2.6819178401469603, "learning_rate": 4.159710162704963e-06, "loss": 0.9004, "step": 15648 }, { "epoch": 0.5671777028741256, "grad_norm": 2.4442764786417257, "learning_rate": 4.159131587966332e-06, "loss": 0.9061, "step": 15649 }, { "epoch": 0.5672139465767823, "grad_norm": 1.921059220867596, "learning_rate": 4.158553024814357e-06, "loss": 0.8309, "step": 15650 }, { "epoch": 0.5672501902794389, "grad_norm": 2.486966595751172, "learning_rate": 4.157974473257014e-06, "loss": 1.06, "step": 15651 }, { "epoch": 0.5672864339820957, "grad_norm": 2.5327944836322933, "learning_rate": 4.157395933302271e-06, "loss": 0.8802, "step": 15652 }, { "epoch": 0.5673226776847523, "grad_norm": 2.044292468973641, "learning_rate": 4.156817404958105e-06, "loss": 1.0182, "step": 15653 }, { "epoch": 0.567358921387409, "grad_norm": 2.3030100298254497, "learning_rate": 4.156238888232485e-06, "loss": 0.739, "step": 15654 }, { "epoch": 0.5673951650900656, "grad_norm": 2.3699941168947034, "learning_rate": 4.155660383133383e-06, "loss": 0.7323, "step": 15655 }, { "epoch": 0.5674314087927222, "grad_norm": 2.3236210919709066, "learning_rate": 4.155081889668768e-06, "loss": 1.0317, "step": 15656 }, { "epoch": 0.5674676524953789, "grad_norm": 2.408631053515815, "learning_rate": 4.154503407846615e-06, "loss": 0.9439, "step": 15657 }, { "epoch": 0.5675038961980355, "grad_norm": 2.2962336464473756, "learning_rate": 4.153924937674895e-06, "loss": 0.8392, "step": 15658 }, { "epoch": 0.5675401399006923, "grad_norm": 2.3758445217786406, "learning_rate": 4.153346479161577e-06, "loss": 0.9486, "step": 15659 }, { "epoch": 0.5675763836033489, "grad_norm": 2.3206884606348517, "learning_rate": 4.152768032314632e-06, "loss": 1.0683, "step": 15660 }, { "epoch": 0.5676126273060056, "grad_norm": 2.1316163249803073, "learning_rate": 4.152189597142031e-06, "loss": 0.8601, "step": 15661 }, { "epoch": 0.5676488710086622, "grad_norm": 2.3231306715239213, "learning_rate": 4.151611173651745e-06, "loss": 1.0007, "step": 15662 }, { "epoch": 0.5676851147113189, "grad_norm": 2.494576612918634, "learning_rate": 4.1510327618517444e-06, "loss": 0.7383, "step": 15663 }, { "epoch": 0.5677213584139755, "grad_norm": 2.3615106300682775, "learning_rate": 4.150454361749996e-06, "loss": 1.1171, "step": 15664 }, { "epoch": 0.5677576021166323, "grad_norm": 2.050995199657615, "learning_rate": 4.1498759733544755e-06, "loss": 0.7823, "step": 15665 }, { "epoch": 0.5677938458192889, "grad_norm": 2.4996259187020162, "learning_rate": 4.149297596673149e-06, "loss": 0.824, "step": 15666 }, { "epoch": 0.5678300895219456, "grad_norm": 2.625767489467702, "learning_rate": 4.148719231713989e-06, "loss": 0.9644, "step": 15667 }, { "epoch": 0.5678663332246022, "grad_norm": 2.4688275854015393, "learning_rate": 4.148140878484961e-06, "loss": 0.941, "step": 15668 }, { "epoch": 0.5679025769272589, "grad_norm": 2.634731761830498, "learning_rate": 4.147562536994038e-06, "loss": 0.9508, "step": 15669 }, { "epoch": 0.5679388206299155, "grad_norm": 2.7213244132261503, "learning_rate": 4.146984207249186e-06, "loss": 1.1546, "step": 15670 }, { "epoch": 0.5679750643325722, "grad_norm": 2.3388102086627933, "learning_rate": 4.146405889258378e-06, "loss": 0.9739, "step": 15671 }, { "epoch": 0.5680113080352289, "grad_norm": 2.494947158385749, "learning_rate": 4.1458275830295775e-06, "loss": 0.8771, "step": 15672 }, { "epoch": 0.5680475517378856, "grad_norm": 2.34438713058875, "learning_rate": 4.1452492885707585e-06, "loss": 0.7646, "step": 15673 }, { "epoch": 0.5680837954405422, "grad_norm": 2.1015242178567686, "learning_rate": 4.144671005889887e-06, "loss": 0.7409, "step": 15674 }, { "epoch": 0.5681200391431989, "grad_norm": 2.1389953937314417, "learning_rate": 4.1440927349949325e-06, "loss": 0.8831, "step": 15675 }, { "epoch": 0.5681562828458555, "grad_norm": 2.513218808448584, "learning_rate": 4.14351447589386e-06, "loss": 1.0251, "step": 15676 }, { "epoch": 0.5681925265485122, "grad_norm": 2.703384334841451, "learning_rate": 4.142936228594642e-06, "loss": 0.9682, "step": 15677 }, { "epoch": 0.5682287702511689, "grad_norm": 2.3776155482186065, "learning_rate": 4.142357993105245e-06, "loss": 1.1066, "step": 15678 }, { "epoch": 0.5682650139538256, "grad_norm": 2.3835100648488288, "learning_rate": 4.141779769433635e-06, "loss": 0.888, "step": 15679 }, { "epoch": 0.5683012576564822, "grad_norm": 2.5009110625729494, "learning_rate": 4.141201557587781e-06, "loss": 1.0525, "step": 15680 }, { "epoch": 0.5683375013591389, "grad_norm": 2.2392980634278357, "learning_rate": 4.14062335757565e-06, "loss": 0.8681, "step": 15681 }, { "epoch": 0.5683737450617955, "grad_norm": 2.119443359066216, "learning_rate": 4.14004516940521e-06, "loss": 0.7896, "step": 15682 }, { "epoch": 0.5684099887644521, "grad_norm": 2.704342981939034, "learning_rate": 4.139466993084428e-06, "loss": 0.9274, "step": 15683 }, { "epoch": 0.5684462324671088, "grad_norm": 1.9560288104693684, "learning_rate": 4.138888828621267e-06, "loss": 0.9389, "step": 15684 }, { "epoch": 0.5684824761697655, "grad_norm": 2.0317471244685352, "learning_rate": 4.1383106760237e-06, "loss": 0.9517, "step": 15685 }, { "epoch": 0.5685187198724222, "grad_norm": 2.2381122337307104, "learning_rate": 4.1377325352996914e-06, "loss": 0.8169, "step": 15686 }, { "epoch": 0.5685549635750788, "grad_norm": 2.5635475150569915, "learning_rate": 4.137154406457206e-06, "loss": 0.891, "step": 15687 }, { "epoch": 0.5685912072777355, "grad_norm": 2.3712182876896972, "learning_rate": 4.13657628950421e-06, "loss": 0.8995, "step": 15688 }, { "epoch": 0.5686274509803921, "grad_norm": 2.147301861114278, "learning_rate": 4.1359981844486726e-06, "loss": 0.8688, "step": 15689 }, { "epoch": 0.5686636946830488, "grad_norm": 2.2742025807021244, "learning_rate": 4.135420091298557e-06, "loss": 0.6987, "step": 15690 }, { "epoch": 0.5686999383857055, "grad_norm": 2.350501077252757, "learning_rate": 4.1348420100618295e-06, "loss": 0.7849, "step": 15691 }, { "epoch": 0.5687361820883622, "grad_norm": 1.931547866851067, "learning_rate": 4.134263940746454e-06, "loss": 0.8711, "step": 15692 }, { "epoch": 0.5687724257910188, "grad_norm": 2.4133069050525817, "learning_rate": 4.1336858833604e-06, "loss": 0.9179, "step": 15693 }, { "epoch": 0.5688086694936755, "grad_norm": 2.617564583063603, "learning_rate": 4.13310783791163e-06, "loss": 0.9853, "step": 15694 }, { "epoch": 0.5688449131963321, "grad_norm": 2.1820194719093426, "learning_rate": 4.1325298044081106e-06, "loss": 0.9872, "step": 15695 }, { "epoch": 0.5688811568989888, "grad_norm": 2.3232528485167596, "learning_rate": 4.131951782857803e-06, "loss": 0.9907, "step": 15696 }, { "epoch": 0.5689174006016454, "grad_norm": 2.2377928529839295, "learning_rate": 4.131373773268678e-06, "loss": 0.8769, "step": 15697 }, { "epoch": 0.5689536443043022, "grad_norm": 2.300908889102708, "learning_rate": 4.130795775648696e-06, "loss": 0.7104, "step": 15698 }, { "epoch": 0.5689898880069588, "grad_norm": 2.0728404197685255, "learning_rate": 4.130217790005823e-06, "loss": 0.839, "step": 15699 }, { "epoch": 0.5690261317096155, "grad_norm": 2.421726282433656, "learning_rate": 4.129639816348022e-06, "loss": 0.9006, "step": 15700 }, { "epoch": 0.5690623754122721, "grad_norm": 2.301312923017026, "learning_rate": 4.129061854683258e-06, "loss": 0.7704, "step": 15701 }, { "epoch": 0.5690986191149288, "grad_norm": 2.4055065432681144, "learning_rate": 4.128483905019496e-06, "loss": 0.8783, "step": 15702 }, { "epoch": 0.5691348628175854, "grad_norm": 2.5743541492161035, "learning_rate": 4.1279059673646986e-06, "loss": 0.9997, "step": 15703 }, { "epoch": 0.569171106520242, "grad_norm": 2.2392483341985896, "learning_rate": 4.127328041726827e-06, "loss": 0.9451, "step": 15704 }, { "epoch": 0.5692073502228988, "grad_norm": 2.2242884749473673, "learning_rate": 4.126750128113849e-06, "loss": 0.7588, "step": 15705 }, { "epoch": 0.5692435939255555, "grad_norm": 2.6345416633560994, "learning_rate": 4.126172226533727e-06, "loss": 0.8779, "step": 15706 }, { "epoch": 0.5692798376282121, "grad_norm": 2.4667177118526165, "learning_rate": 4.125594336994423e-06, "loss": 0.9016, "step": 15707 }, { "epoch": 0.5693160813308688, "grad_norm": 2.4235027386589456, "learning_rate": 4.125016459503899e-06, "loss": 1.0552, "step": 15708 }, { "epoch": 0.5693523250335254, "grad_norm": 2.3945455665631847, "learning_rate": 4.12443859407012e-06, "loss": 0.9245, "step": 15709 }, { "epoch": 0.569388568736182, "grad_norm": 2.376979458439775, "learning_rate": 4.1238607407010476e-06, "loss": 0.9504, "step": 15710 }, { "epoch": 0.5694248124388388, "grad_norm": 2.608496886905393, "learning_rate": 4.1232828994046445e-06, "loss": 0.9215, "step": 15711 }, { "epoch": 0.5694610561414954, "grad_norm": 2.337599461452684, "learning_rate": 4.12270507018887e-06, "loss": 0.9939, "step": 15712 }, { "epoch": 0.5694972998441521, "grad_norm": 2.478692653028947, "learning_rate": 4.122127253061692e-06, "loss": 0.9929, "step": 15713 }, { "epoch": 0.5695335435468087, "grad_norm": 2.4693484780242065, "learning_rate": 4.12154944803107e-06, "loss": 0.8619, "step": 15714 }, { "epoch": 0.5695697872494654, "grad_norm": 2.4078168534753934, "learning_rate": 4.120971655104962e-06, "loss": 0.8987, "step": 15715 }, { "epoch": 0.569606030952122, "grad_norm": 2.6249809276086626, "learning_rate": 4.120393874291336e-06, "loss": 1.0167, "step": 15716 }, { "epoch": 0.5696422746547787, "grad_norm": 2.1942853734687873, "learning_rate": 4.11981610559815e-06, "loss": 0.894, "step": 15717 }, { "epoch": 0.5696785183574354, "grad_norm": 2.451324372738457, "learning_rate": 4.119238349033366e-06, "loss": 0.9102, "step": 15718 }, { "epoch": 0.5697147620600921, "grad_norm": 2.3817115136819775, "learning_rate": 4.118660604604944e-06, "loss": 0.7524, "step": 15719 }, { "epoch": 0.5697510057627487, "grad_norm": 2.534949698648742, "learning_rate": 4.118082872320847e-06, "loss": 1.0072, "step": 15720 }, { "epoch": 0.5697872494654054, "grad_norm": 2.5214840219788384, "learning_rate": 4.117505152189033e-06, "loss": 1.0071, "step": 15721 }, { "epoch": 0.569823493168062, "grad_norm": 2.434151114828999, "learning_rate": 4.116927444217466e-06, "loss": 0.9029, "step": 15722 }, { "epoch": 0.5698597368707187, "grad_norm": 2.260298292105406, "learning_rate": 4.116349748414102e-06, "loss": 1.0389, "step": 15723 }, { "epoch": 0.5698959805733754, "grad_norm": 2.378412517772549, "learning_rate": 4.115772064786906e-06, "loss": 0.784, "step": 15724 }, { "epoch": 0.5699322242760321, "grad_norm": 2.0490743805111555, "learning_rate": 4.115194393343837e-06, "loss": 0.8374, "step": 15725 }, { "epoch": 0.5699684679786887, "grad_norm": 2.4282252010247225, "learning_rate": 4.114616734092854e-06, "loss": 0.8339, "step": 15726 }, { "epoch": 0.5700047116813454, "grad_norm": 3.141954444328495, "learning_rate": 4.114039087041915e-06, "loss": 0.9331, "step": 15727 }, { "epoch": 0.570040955384002, "grad_norm": 2.0736528578661972, "learning_rate": 4.113461452198983e-06, "loss": 0.8508, "step": 15728 }, { "epoch": 0.5700771990866587, "grad_norm": 2.0059943165465755, "learning_rate": 4.112883829572017e-06, "loss": 0.701, "step": 15729 }, { "epoch": 0.5701134427893153, "grad_norm": 9.163552345624584, "learning_rate": 4.112306219168975e-06, "loss": 0.798, "step": 15730 }, { "epoch": 0.5701496864919721, "grad_norm": 2.2060618264983063, "learning_rate": 4.111728620997813e-06, "loss": 1.027, "step": 15731 }, { "epoch": 0.5701859301946287, "grad_norm": 2.5055995576013714, "learning_rate": 4.111151035066497e-06, "loss": 0.9397, "step": 15732 }, { "epoch": 0.5702221738972854, "grad_norm": 2.632316017266309, "learning_rate": 4.110573461382981e-06, "loss": 0.9952, "step": 15733 }, { "epoch": 0.570258417599942, "grad_norm": 2.425477478436061, "learning_rate": 4.109995899955226e-06, "loss": 0.8255, "step": 15734 }, { "epoch": 0.5702946613025986, "grad_norm": 2.054484952736166, "learning_rate": 4.109418350791187e-06, "loss": 0.8315, "step": 15735 }, { "epoch": 0.5703309050052553, "grad_norm": 2.5906586352937766, "learning_rate": 4.1088408138988266e-06, "loss": 0.8888, "step": 15736 }, { "epoch": 0.570367148707912, "grad_norm": 2.1848472759058297, "learning_rate": 4.1082632892861e-06, "loss": 0.8797, "step": 15737 }, { "epoch": 0.5704033924105687, "grad_norm": 2.5934255425598822, "learning_rate": 4.107685776960966e-06, "loss": 0.864, "step": 15738 }, { "epoch": 0.5704396361132253, "grad_norm": 2.3511507804553093, "learning_rate": 4.1071082769313805e-06, "loss": 0.8501, "step": 15739 }, { "epoch": 0.570475879815882, "grad_norm": 2.35001570346359, "learning_rate": 4.106530789205305e-06, "loss": 0.8797, "step": 15740 }, { "epoch": 0.5705121235185386, "grad_norm": 2.1166664715388794, "learning_rate": 4.105953313790696e-06, "loss": 0.9505, "step": 15741 }, { "epoch": 0.5705483672211953, "grad_norm": 2.301365503091867, "learning_rate": 4.105375850695508e-06, "loss": 0.8687, "step": 15742 }, { "epoch": 0.5705846109238519, "grad_norm": 2.381036428029499, "learning_rate": 4.104798399927698e-06, "loss": 0.7698, "step": 15743 }, { "epoch": 0.5706208546265087, "grad_norm": 2.3027601570482688, "learning_rate": 4.104220961495227e-06, "loss": 0.8516, "step": 15744 }, { "epoch": 0.5706570983291653, "grad_norm": 2.6606324706287094, "learning_rate": 4.10364353540605e-06, "loss": 0.8662, "step": 15745 }, { "epoch": 0.570693342031822, "grad_norm": 2.20744520250344, "learning_rate": 4.103066121668122e-06, "loss": 0.736, "step": 15746 }, { "epoch": 0.5707295857344786, "grad_norm": 2.280167951566041, "learning_rate": 4.1024887202894e-06, "loss": 0.9025, "step": 15747 }, { "epoch": 0.5707658294371353, "grad_norm": 2.406194908913587, "learning_rate": 4.1019113312778406e-06, "loss": 0.8483, "step": 15748 }, { "epoch": 0.5708020731397919, "grad_norm": 2.294718329316949, "learning_rate": 4.1013339546414015e-06, "loss": 0.8084, "step": 15749 }, { "epoch": 0.5708383168424487, "grad_norm": 2.725427185224086, "learning_rate": 4.100756590388035e-06, "loss": 1.1522, "step": 15750 }, { "epoch": 0.5708745605451053, "grad_norm": 2.346674741433296, "learning_rate": 4.100179238525699e-06, "loss": 0.9046, "step": 15751 }, { "epoch": 0.570910804247762, "grad_norm": 2.4714977207969757, "learning_rate": 4.099601899062349e-06, "loss": 0.9544, "step": 15752 }, { "epoch": 0.5709470479504186, "grad_norm": 2.4941840928749284, "learning_rate": 4.099024572005942e-06, "loss": 0.9276, "step": 15753 }, { "epoch": 0.5709832916530753, "grad_norm": 2.0071578838742052, "learning_rate": 4.09844725736443e-06, "loss": 0.8227, "step": 15754 }, { "epoch": 0.5710195353557319, "grad_norm": 2.2094594237720857, "learning_rate": 4.09786995514577e-06, "loss": 0.7909, "step": 15755 }, { "epoch": 0.5710557790583886, "grad_norm": 2.4350931902373563, "learning_rate": 4.097292665357916e-06, "loss": 0.8014, "step": 15756 }, { "epoch": 0.5710920227610453, "grad_norm": 2.1489122581505073, "learning_rate": 4.096715388008824e-06, "loss": 0.7523, "step": 15757 }, { "epoch": 0.571128266463702, "grad_norm": 2.365863403118794, "learning_rate": 4.096138123106448e-06, "loss": 0.9976, "step": 15758 }, { "epoch": 0.5711645101663586, "grad_norm": 2.1827599250620424, "learning_rate": 4.095560870658739e-06, "loss": 0.9712, "step": 15759 }, { "epoch": 0.5712007538690153, "grad_norm": 2.1886716825468215, "learning_rate": 4.094983630673657e-06, "loss": 0.7371, "step": 15760 }, { "epoch": 0.5712369975716719, "grad_norm": 2.3582446313065693, "learning_rate": 4.094406403159152e-06, "loss": 0.9919, "step": 15761 }, { "epoch": 0.5712732412743285, "grad_norm": 2.57118308477675, "learning_rate": 4.093829188123181e-06, "loss": 0.9447, "step": 15762 }, { "epoch": 0.5713094849769852, "grad_norm": 2.5423067328869156, "learning_rate": 4.093251985573692e-06, "loss": 0.7184, "step": 15763 }, { "epoch": 0.571345728679642, "grad_norm": 2.3480250168356234, "learning_rate": 4.092674795518645e-06, "loss": 1.0948, "step": 15764 }, { "epoch": 0.5713819723822986, "grad_norm": 2.3572040825904743, "learning_rate": 4.092097617965991e-06, "loss": 0.9771, "step": 15765 }, { "epoch": 0.5714182160849552, "grad_norm": 2.203251249521907, "learning_rate": 4.0915204529236824e-06, "loss": 0.9892, "step": 15766 }, { "epoch": 0.5714544597876119, "grad_norm": 2.4983199202920265, "learning_rate": 4.090943300399671e-06, "loss": 0.9901, "step": 15767 }, { "epoch": 0.5714907034902685, "grad_norm": 2.2406372984882714, "learning_rate": 4.090366160401913e-06, "loss": 0.9389, "step": 15768 }, { "epoch": 0.5715269471929252, "grad_norm": 2.4567395912436467, "learning_rate": 4.089789032938359e-06, "loss": 0.8545, "step": 15769 }, { "epoch": 0.5715631908955819, "grad_norm": 2.2130755911594764, "learning_rate": 4.089211918016962e-06, "loss": 1.0091, "step": 15770 }, { "epoch": 0.5715994345982386, "grad_norm": 2.31008718908134, "learning_rate": 4.088634815645671e-06, "loss": 0.8733, "step": 15771 }, { "epoch": 0.5716356783008952, "grad_norm": 2.463440069965549, "learning_rate": 4.088057725832443e-06, "loss": 0.8085, "step": 15772 }, { "epoch": 0.5716719220035519, "grad_norm": 2.277044420585743, "learning_rate": 4.087480648585229e-06, "loss": 1.032, "step": 15773 }, { "epoch": 0.5717081657062085, "grad_norm": 2.2745284115461177, "learning_rate": 4.086903583911979e-06, "loss": 0.7353, "step": 15774 }, { "epoch": 0.5717444094088652, "grad_norm": 2.374825393601774, "learning_rate": 4.0863265318206444e-06, "loss": 0.9289, "step": 15775 }, { "epoch": 0.5717806531115218, "grad_norm": 2.6298004122042915, "learning_rate": 4.085749492319179e-06, "loss": 0.897, "step": 15776 }, { "epoch": 0.5718168968141786, "grad_norm": 2.2214677353727863, "learning_rate": 4.085172465415533e-06, "loss": 0.859, "step": 15777 }, { "epoch": 0.5718531405168352, "grad_norm": 2.4180557479343, "learning_rate": 4.084595451117657e-06, "loss": 0.9684, "step": 15778 }, { "epoch": 0.5718893842194919, "grad_norm": 2.106298322021948, "learning_rate": 4.084018449433499e-06, "loss": 0.7849, "step": 15779 }, { "epoch": 0.5719256279221485, "grad_norm": 2.3402663493955975, "learning_rate": 4.083441460371016e-06, "loss": 0.889, "step": 15780 }, { "epoch": 0.5719618716248052, "grad_norm": 2.297441156257891, "learning_rate": 4.0828644839381546e-06, "loss": 0.7924, "step": 15781 }, { "epoch": 0.5719981153274618, "grad_norm": 2.417825926718445, "learning_rate": 4.082287520142866e-06, "loss": 1.0626, "step": 15782 }, { "epoch": 0.5720343590301186, "grad_norm": 2.8666165978729796, "learning_rate": 4.081710568993099e-06, "loss": 0.8835, "step": 15783 }, { "epoch": 0.5720706027327752, "grad_norm": 2.204446227112286, "learning_rate": 4.081133630496806e-06, "loss": 1.0503, "step": 15784 }, { "epoch": 0.5721068464354319, "grad_norm": 2.240655943407681, "learning_rate": 4.080556704661937e-06, "loss": 0.7529, "step": 15785 }, { "epoch": 0.5721430901380885, "grad_norm": 2.2142208218619777, "learning_rate": 4.07997979149644e-06, "loss": 0.7013, "step": 15786 }, { "epoch": 0.5721793338407452, "grad_norm": 2.4826216479525747, "learning_rate": 4.079402891008264e-06, "loss": 0.9131, "step": 15787 }, { "epoch": 0.5722155775434018, "grad_norm": 2.755826702087086, "learning_rate": 4.078826003205361e-06, "loss": 0.8555, "step": 15788 }, { "epoch": 0.5722518212460584, "grad_norm": 2.446243091105124, "learning_rate": 4.078249128095678e-06, "loss": 1.0281, "step": 15789 }, { "epoch": 0.5722880649487152, "grad_norm": 2.3559796678034264, "learning_rate": 4.077672265687164e-06, "loss": 0.8827, "step": 15790 }, { "epoch": 0.5723243086513718, "grad_norm": 2.2324317356345826, "learning_rate": 4.077095415987768e-06, "loss": 0.883, "step": 15791 }, { "epoch": 0.5723605523540285, "grad_norm": 2.632083093467418, "learning_rate": 4.076518579005439e-06, "loss": 0.8959, "step": 15792 }, { "epoch": 0.5723967960566851, "grad_norm": 2.347209411288345, "learning_rate": 4.075941754748127e-06, "loss": 0.8819, "step": 15793 }, { "epoch": 0.5724330397593418, "grad_norm": 2.598973769354688, "learning_rate": 4.075364943223779e-06, "loss": 0.9621, "step": 15794 }, { "epoch": 0.5724692834619984, "grad_norm": 2.1401186151815708, "learning_rate": 4.07478814444034e-06, "loss": 0.8339, "step": 15795 }, { "epoch": 0.5725055271646552, "grad_norm": 1.7980445505329654, "learning_rate": 4.074211358405763e-06, "loss": 0.6406, "step": 15796 }, { "epoch": 0.5725417708673118, "grad_norm": 2.2182572449906486, "learning_rate": 4.073634585127994e-06, "loss": 0.8189, "step": 15797 }, { "epoch": 0.5725780145699685, "grad_norm": 2.327796636323115, "learning_rate": 4.073057824614978e-06, "loss": 0.9473, "step": 15798 }, { "epoch": 0.5726142582726251, "grad_norm": 2.030738488302088, "learning_rate": 4.072481076874666e-06, "loss": 0.7422, "step": 15799 }, { "epoch": 0.5726505019752818, "grad_norm": 2.1631326492868403, "learning_rate": 4.071904341915004e-06, "loss": 0.864, "step": 15800 }, { "epoch": 0.5726867456779384, "grad_norm": 2.1816611582920404, "learning_rate": 4.071327619743939e-06, "loss": 0.7774, "step": 15801 }, { "epoch": 0.5727229893805951, "grad_norm": 2.4227014329934478, "learning_rate": 4.070750910369416e-06, "loss": 0.9148, "step": 15802 }, { "epoch": 0.5727592330832518, "grad_norm": 2.389056900142237, "learning_rate": 4.070174213799385e-06, "loss": 0.929, "step": 15803 }, { "epoch": 0.5727954767859085, "grad_norm": 2.4637611770143146, "learning_rate": 4.069597530041791e-06, "loss": 1.0701, "step": 15804 }, { "epoch": 0.5728317204885651, "grad_norm": 2.4637392648600835, "learning_rate": 4.069020859104581e-06, "loss": 1.0837, "step": 15805 }, { "epoch": 0.5728679641912218, "grad_norm": 2.2078282824133937, "learning_rate": 4.068444200995698e-06, "loss": 0.9521, "step": 15806 }, { "epoch": 0.5729042078938784, "grad_norm": 2.335030232519515, "learning_rate": 4.067867555723093e-06, "loss": 0.971, "step": 15807 }, { "epoch": 0.5729404515965351, "grad_norm": 2.3215986772825454, "learning_rate": 4.067290923294709e-06, "loss": 0.8838, "step": 15808 }, { "epoch": 0.5729766952991918, "grad_norm": 2.204437847245866, "learning_rate": 4.066714303718492e-06, "loss": 0.8332, "step": 15809 }, { "epoch": 0.5730129390018485, "grad_norm": 2.4787817913141157, "learning_rate": 4.066137697002386e-06, "loss": 0.8964, "step": 15810 }, { "epoch": 0.5730491827045051, "grad_norm": 2.5315645714977375, "learning_rate": 4.0655611031543384e-06, "loss": 0.8598, "step": 15811 }, { "epoch": 0.5730854264071618, "grad_norm": 3.5380991350140754, "learning_rate": 4.064984522182295e-06, "loss": 0.8516, "step": 15812 }, { "epoch": 0.5731216701098184, "grad_norm": 2.2283358672705984, "learning_rate": 4.064407954094198e-06, "loss": 0.8662, "step": 15813 }, { "epoch": 0.573157913812475, "grad_norm": 2.150298223346587, "learning_rate": 4.0638313988979935e-06, "loss": 0.8604, "step": 15814 }, { "epoch": 0.5731941575151317, "grad_norm": 2.445819549504272, "learning_rate": 4.0632548566016274e-06, "loss": 0.8597, "step": 15815 }, { "epoch": 0.5732304012177885, "grad_norm": 2.3875651463990586, "learning_rate": 4.062678327213042e-06, "loss": 0.8213, "step": 15816 }, { "epoch": 0.5732666449204451, "grad_norm": 2.1653889592274806, "learning_rate": 4.062101810740183e-06, "loss": 0.843, "step": 15817 }, { "epoch": 0.5733028886231017, "grad_norm": 2.4335371357490345, "learning_rate": 4.061525307190992e-06, "loss": 0.9945, "step": 15818 }, { "epoch": 0.5733391323257584, "grad_norm": 2.4440259591493327, "learning_rate": 4.060948816573416e-06, "loss": 0.8893, "step": 15819 }, { "epoch": 0.573375376028415, "grad_norm": 2.358017265274796, "learning_rate": 4.060372338895398e-06, "loss": 1.0224, "step": 15820 }, { "epoch": 0.5734116197310717, "grad_norm": 2.6200990596974547, "learning_rate": 4.059795874164881e-06, "loss": 0.9278, "step": 15821 }, { "epoch": 0.5734478634337284, "grad_norm": 2.7204366500982275, "learning_rate": 4.059219422389807e-06, "loss": 0.9418, "step": 15822 }, { "epoch": 0.5734841071363851, "grad_norm": 2.31784269318023, "learning_rate": 4.058642983578121e-06, "loss": 0.9041, "step": 15823 }, { "epoch": 0.5735203508390417, "grad_norm": 2.273875209023896, "learning_rate": 4.0580665577377656e-06, "loss": 0.9216, "step": 15824 }, { "epoch": 0.5735565945416984, "grad_norm": 2.214617895904908, "learning_rate": 4.057490144876682e-06, "loss": 0.8751, "step": 15825 }, { "epoch": 0.573592838244355, "grad_norm": 2.6266153611135468, "learning_rate": 4.056913745002815e-06, "loss": 1.0545, "step": 15826 }, { "epoch": 0.5736290819470117, "grad_norm": 2.104663289096677, "learning_rate": 4.056337358124107e-06, "loss": 0.8104, "step": 15827 }, { "epoch": 0.5736653256496683, "grad_norm": 2.574864439334805, "learning_rate": 4.055760984248499e-06, "loss": 0.9529, "step": 15828 }, { "epoch": 0.5737015693523251, "grad_norm": 2.465511322326519, "learning_rate": 4.055184623383934e-06, "loss": 0.8624, "step": 15829 }, { "epoch": 0.5737378130549817, "grad_norm": 2.712080780237317, "learning_rate": 4.054608275538352e-06, "loss": 0.9235, "step": 15830 }, { "epoch": 0.5737740567576384, "grad_norm": 2.308300237109859, "learning_rate": 4.054031940719697e-06, "loss": 0.8836, "step": 15831 }, { "epoch": 0.573810300460295, "grad_norm": 2.0582118679874797, "learning_rate": 4.053455618935911e-06, "loss": 0.7417, "step": 15832 }, { "epoch": 0.5738465441629517, "grad_norm": 2.2817423443403024, "learning_rate": 4.052879310194933e-06, "loss": 1.0434, "step": 15833 }, { "epoch": 0.5738827878656083, "grad_norm": 2.7074223016623593, "learning_rate": 4.052303014504705e-06, "loss": 0.8828, "step": 15834 }, { "epoch": 0.573919031568265, "grad_norm": 2.457934111187455, "learning_rate": 4.051726731873169e-06, "loss": 1.0589, "step": 15835 }, { "epoch": 0.5739552752709217, "grad_norm": 2.2359157171396578, "learning_rate": 4.051150462308265e-06, "loss": 0.8859, "step": 15836 }, { "epoch": 0.5739915189735784, "grad_norm": 2.3495591711792088, "learning_rate": 4.050574205817935e-06, "loss": 1.0996, "step": 15837 }, { "epoch": 0.574027762676235, "grad_norm": 2.2777485314564436, "learning_rate": 4.049997962410115e-06, "loss": 0.8889, "step": 15838 }, { "epoch": 0.5740640063788917, "grad_norm": 2.357560742564973, "learning_rate": 4.049421732092751e-06, "loss": 0.85, "step": 15839 }, { "epoch": 0.5741002500815483, "grad_norm": 2.230771463545941, "learning_rate": 4.04884551487378e-06, "loss": 0.9122, "step": 15840 }, { "epoch": 0.574136493784205, "grad_norm": 2.2466312008358167, "learning_rate": 4.0482693107611435e-06, "loss": 1.0835, "step": 15841 }, { "epoch": 0.5741727374868617, "grad_norm": 2.5189067200992814, "learning_rate": 4.047693119762778e-06, "loss": 0.939, "step": 15842 }, { "epoch": 0.5742089811895184, "grad_norm": 2.525735923377507, "learning_rate": 4.047116941886628e-06, "loss": 1.0752, "step": 15843 }, { "epoch": 0.574245224892175, "grad_norm": 2.1846595439333796, "learning_rate": 4.04654077714063e-06, "loss": 0.9621, "step": 15844 }, { "epoch": 0.5742814685948316, "grad_norm": 2.1105325647105135, "learning_rate": 4.0459646255327225e-06, "loss": 0.7501, "step": 15845 }, { "epoch": 0.5743177122974883, "grad_norm": 2.242910019321918, "learning_rate": 4.045388487070844e-06, "loss": 0.9307, "step": 15846 }, { "epoch": 0.5743539560001449, "grad_norm": 2.6054955810352065, "learning_rate": 4.044812361762936e-06, "loss": 0.8762, "step": 15847 }, { "epoch": 0.5743901997028016, "grad_norm": 2.4447525743254532, "learning_rate": 4.044236249616937e-06, "loss": 1.0954, "step": 15848 }, { "epoch": 0.5744264434054583, "grad_norm": 2.3504609691132967, "learning_rate": 4.043660150640784e-06, "loss": 0.8164, "step": 15849 }, { "epoch": 0.574462687108115, "grad_norm": 2.296657338594644, "learning_rate": 4.043084064842414e-06, "loss": 0.8251, "step": 15850 }, { "epoch": 0.5744989308107716, "grad_norm": 2.558950992222268, "learning_rate": 4.042507992229769e-06, "loss": 0.9429, "step": 15851 }, { "epoch": 0.5745351745134283, "grad_norm": 2.327086638440183, "learning_rate": 4.041931932810785e-06, "loss": 0.8795, "step": 15852 }, { "epoch": 0.5745714182160849, "grad_norm": 2.420705153407607, "learning_rate": 4.0413558865933985e-06, "loss": 0.9716, "step": 15853 }, { "epoch": 0.5746076619187416, "grad_norm": 2.2314069115379964, "learning_rate": 4.040779853585547e-06, "loss": 0.7634, "step": 15854 }, { "epoch": 0.5746439056213983, "grad_norm": 2.4526520663789504, "learning_rate": 4.040203833795171e-06, "loss": 0.9272, "step": 15855 }, { "epoch": 0.574680149324055, "grad_norm": 2.139279068659998, "learning_rate": 4.039627827230206e-06, "loss": 0.9193, "step": 15856 }, { "epoch": 0.5747163930267116, "grad_norm": 2.1683109273909116, "learning_rate": 4.039051833898588e-06, "loss": 0.8541, "step": 15857 }, { "epoch": 0.5747526367293683, "grad_norm": 2.125756548389189, "learning_rate": 4.038475853808252e-06, "loss": 0.9061, "step": 15858 }, { "epoch": 0.5747888804320249, "grad_norm": 1.8939658116716531, "learning_rate": 4.03789988696714e-06, "loss": 0.7037, "step": 15859 }, { "epoch": 0.5748251241346816, "grad_norm": 2.1639253779659784, "learning_rate": 4.037323933383186e-06, "loss": 0.7421, "step": 15860 }, { "epoch": 0.5748613678373382, "grad_norm": 2.0767511945460435, "learning_rate": 4.036747993064326e-06, "loss": 0.7015, "step": 15861 }, { "epoch": 0.574897611539995, "grad_norm": 2.346993890544212, "learning_rate": 4.036172066018495e-06, "loss": 1.0268, "step": 15862 }, { "epoch": 0.5749338552426516, "grad_norm": 2.225735281782396, "learning_rate": 4.03559615225363e-06, "loss": 0.908, "step": 15863 }, { "epoch": 0.5749700989453083, "grad_norm": 2.168860380220014, "learning_rate": 4.035020251777668e-06, "loss": 0.7933, "step": 15864 }, { "epoch": 0.5750063426479649, "grad_norm": 2.3304977459904337, "learning_rate": 4.0344443645985426e-06, "loss": 0.9607, "step": 15865 }, { "epoch": 0.5750425863506216, "grad_norm": 2.4829901616078223, "learning_rate": 4.033868490724188e-06, "loss": 0.9667, "step": 15866 }, { "epoch": 0.5750788300532782, "grad_norm": 2.2819950615317657, "learning_rate": 4.033292630162544e-06, "loss": 0.9759, "step": 15867 }, { "epoch": 0.575115073755935, "grad_norm": 2.0529576631063975, "learning_rate": 4.032716782921542e-06, "loss": 0.9853, "step": 15868 }, { "epoch": 0.5751513174585916, "grad_norm": 2.4358907907403324, "learning_rate": 4.0321409490091176e-06, "loss": 0.7754, "step": 15869 }, { "epoch": 0.5751875611612483, "grad_norm": 2.45142143527198, "learning_rate": 4.031565128433205e-06, "loss": 0.842, "step": 15870 }, { "epoch": 0.5752238048639049, "grad_norm": 2.551335203020935, "learning_rate": 4.03098932120174e-06, "loss": 0.9186, "step": 15871 }, { "epoch": 0.5752600485665615, "grad_norm": 2.2481125234683135, "learning_rate": 4.030413527322656e-06, "loss": 1.0721, "step": 15872 }, { "epoch": 0.5752962922692182, "grad_norm": 2.3104773769608755, "learning_rate": 4.029837746803888e-06, "loss": 0.7062, "step": 15873 }, { "epoch": 0.5753325359718748, "grad_norm": 2.1033260272680128, "learning_rate": 4.029261979653368e-06, "loss": 0.9514, "step": 15874 }, { "epoch": 0.5753687796745316, "grad_norm": 2.839246179537938, "learning_rate": 4.028686225879032e-06, "loss": 0.8989, "step": 15875 }, { "epoch": 0.5754050233771882, "grad_norm": 2.391495284418216, "learning_rate": 4.028110485488811e-06, "loss": 0.7938, "step": 15876 }, { "epoch": 0.5754412670798449, "grad_norm": 2.4487775496344417, "learning_rate": 4.027534758490641e-06, "loss": 1.0187, "step": 15877 }, { "epoch": 0.5754775107825015, "grad_norm": 2.4520029529429537, "learning_rate": 4.0269590448924515e-06, "loss": 0.9172, "step": 15878 }, { "epoch": 0.5755137544851582, "grad_norm": 2.277072233933701, "learning_rate": 4.02638334470218e-06, "loss": 0.9999, "step": 15879 }, { "epoch": 0.5755499981878148, "grad_norm": 2.3637762354114322, "learning_rate": 4.0258076579277575e-06, "loss": 0.9078, "step": 15880 }, { "epoch": 0.5755862418904716, "grad_norm": 2.5821281447589914, "learning_rate": 4.025231984577115e-06, "loss": 1.0164, "step": 15881 }, { "epoch": 0.5756224855931282, "grad_norm": 2.0901130920829942, "learning_rate": 4.024656324658187e-06, "loss": 0.868, "step": 15882 }, { "epoch": 0.5756587292957849, "grad_norm": 2.5709967906709954, "learning_rate": 4.024080678178906e-06, "loss": 0.9392, "step": 15883 }, { "epoch": 0.5756949729984415, "grad_norm": 2.3059116890825435, "learning_rate": 4.023505045147202e-06, "loss": 0.8622, "step": 15884 }, { "epoch": 0.5757312167010982, "grad_norm": 2.1743839019474533, "learning_rate": 4.022929425571006e-06, "loss": 0.8573, "step": 15885 }, { "epoch": 0.5757674604037548, "grad_norm": 2.4644645383590205, "learning_rate": 4.022353819458253e-06, "loss": 0.9921, "step": 15886 }, { "epoch": 0.5758037041064115, "grad_norm": 2.349050633849656, "learning_rate": 4.021778226816874e-06, "loss": 0.7004, "step": 15887 }, { "epoch": 0.5758399478090682, "grad_norm": 2.388939466740027, "learning_rate": 4.021202647654799e-06, "loss": 0.8945, "step": 15888 }, { "epoch": 0.5758761915117249, "grad_norm": 2.60772841252514, "learning_rate": 4.020627081979958e-06, "loss": 0.8596, "step": 15889 }, { "epoch": 0.5759124352143815, "grad_norm": 2.581527448430819, "learning_rate": 4.020051529800285e-06, "loss": 1.0236, "step": 15890 }, { "epoch": 0.5759486789170382, "grad_norm": 2.2133479478755005, "learning_rate": 4.01947599112371e-06, "loss": 0.7892, "step": 15891 }, { "epoch": 0.5759849226196948, "grad_norm": 2.2204932316928803, "learning_rate": 4.018900465958163e-06, "loss": 0.8223, "step": 15892 }, { "epoch": 0.5760211663223515, "grad_norm": 2.137228202899006, "learning_rate": 4.018324954311572e-06, "loss": 0.8607, "step": 15893 }, { "epoch": 0.5760574100250082, "grad_norm": 2.1856905044064217, "learning_rate": 4.017749456191871e-06, "loss": 0.8567, "step": 15894 }, { "epoch": 0.5760936537276649, "grad_norm": 2.4965789901588633, "learning_rate": 4.017173971606989e-06, "loss": 0.7949, "step": 15895 }, { "epoch": 0.5761298974303215, "grad_norm": 2.437796799837089, "learning_rate": 4.016598500564855e-06, "loss": 0.8691, "step": 15896 }, { "epoch": 0.5761661411329781, "grad_norm": 2.3180954918136676, "learning_rate": 4.016023043073396e-06, "loss": 1.0137, "step": 15897 }, { "epoch": 0.5762023848356348, "grad_norm": 2.588476607047244, "learning_rate": 4.0154475991405474e-06, "loss": 0.9123, "step": 15898 }, { "epoch": 0.5762386285382914, "grad_norm": 2.234891322132569, "learning_rate": 4.014872168774236e-06, "loss": 0.8915, "step": 15899 }, { "epoch": 0.5762748722409481, "grad_norm": 2.5547244704526557, "learning_rate": 4.014296751982388e-06, "loss": 0.9249, "step": 15900 }, { "epoch": 0.5763111159436048, "grad_norm": 2.6947350405436823, "learning_rate": 4.0137213487729355e-06, "loss": 0.8962, "step": 15901 }, { "epoch": 0.5763473596462615, "grad_norm": 2.193950050309319, "learning_rate": 4.013145959153807e-06, "loss": 0.8917, "step": 15902 }, { "epoch": 0.5763836033489181, "grad_norm": 2.2877749722526595, "learning_rate": 4.01257058313293e-06, "loss": 0.9891, "step": 15903 }, { "epoch": 0.5764198470515748, "grad_norm": 2.4865339306040646, "learning_rate": 4.011995220718234e-06, "loss": 0.9652, "step": 15904 }, { "epoch": 0.5764560907542314, "grad_norm": 2.488763600145449, "learning_rate": 4.011419871917644e-06, "loss": 0.9485, "step": 15905 }, { "epoch": 0.5764923344568881, "grad_norm": 2.405823782806797, "learning_rate": 4.010844536739092e-06, "loss": 0.8358, "step": 15906 }, { "epoch": 0.5765285781595447, "grad_norm": 2.5374827890263996, "learning_rate": 4.010269215190505e-06, "loss": 0.9545, "step": 15907 }, { "epoch": 0.5765648218622015, "grad_norm": 2.3255780324810638, "learning_rate": 4.009693907279808e-06, "loss": 1.0332, "step": 15908 }, { "epoch": 0.5766010655648581, "grad_norm": 2.6440742075996897, "learning_rate": 4.00911861301493e-06, "loss": 0.9785, "step": 15909 }, { "epoch": 0.5766373092675148, "grad_norm": 2.236573068301172, "learning_rate": 4.008543332403798e-06, "loss": 0.8064, "step": 15910 }, { "epoch": 0.5766735529701714, "grad_norm": 2.4257092547341306, "learning_rate": 4.00796806545434e-06, "loss": 0.9048, "step": 15911 }, { "epoch": 0.5767097966728281, "grad_norm": 2.142987074439833, "learning_rate": 4.0073928121744826e-06, "loss": 0.9899, "step": 15912 }, { "epoch": 0.5767460403754847, "grad_norm": 2.3066956403275625, "learning_rate": 4.0068175725721505e-06, "loss": 1.0316, "step": 15913 }, { "epoch": 0.5767822840781415, "grad_norm": 2.4238426619517695, "learning_rate": 4.006242346655273e-06, "loss": 0.9339, "step": 15914 }, { "epoch": 0.5768185277807981, "grad_norm": 2.4819260401104177, "learning_rate": 4.005667134431775e-06, "loss": 0.7869, "step": 15915 }, { "epoch": 0.5768547714834548, "grad_norm": 2.072068086153226, "learning_rate": 4.005091935909582e-06, "loss": 0.7044, "step": 15916 }, { "epoch": 0.5768910151861114, "grad_norm": 2.4190854444697623, "learning_rate": 4.004516751096619e-06, "loss": 0.8455, "step": 15917 }, { "epoch": 0.5769272588887681, "grad_norm": 2.296166737584943, "learning_rate": 4.003941580000815e-06, "loss": 0.8728, "step": 15918 }, { "epoch": 0.5769635025914247, "grad_norm": 2.4000772502818126, "learning_rate": 4.003366422630094e-06, "loss": 0.8589, "step": 15919 }, { "epoch": 0.5769997462940814, "grad_norm": 2.383043101369357, "learning_rate": 4.00279127899238e-06, "loss": 0.9138, "step": 15920 }, { "epoch": 0.5770359899967381, "grad_norm": 2.3614716608547086, "learning_rate": 4.002216149095599e-06, "loss": 0.749, "step": 15921 }, { "epoch": 0.5770722336993948, "grad_norm": 2.767779029543281, "learning_rate": 4.001641032947676e-06, "loss": 0.8108, "step": 15922 }, { "epoch": 0.5771084774020514, "grad_norm": 1.830107631601093, "learning_rate": 4.001065930556537e-06, "loss": 0.7766, "step": 15923 }, { "epoch": 0.577144721104708, "grad_norm": 2.2184085852085507, "learning_rate": 4.000490841930105e-06, "loss": 0.8889, "step": 15924 }, { "epoch": 0.5771809648073647, "grad_norm": 2.4365969839223656, "learning_rate": 3.999915767076302e-06, "loss": 0.9365, "step": 15925 }, { "epoch": 0.5772172085100213, "grad_norm": 2.5211963465246376, "learning_rate": 3.999340706003058e-06, "loss": 0.9052, "step": 15926 }, { "epoch": 0.5772534522126781, "grad_norm": 2.345104580795324, "learning_rate": 3.998765658718293e-06, "loss": 0.9343, "step": 15927 }, { "epoch": 0.5772896959153347, "grad_norm": 2.35203982289588, "learning_rate": 3.998190625229932e-06, "loss": 0.7396, "step": 15928 }, { "epoch": 0.5773259396179914, "grad_norm": 2.3367414143294303, "learning_rate": 3.997615605545897e-06, "loss": 0.7239, "step": 15929 }, { "epoch": 0.577362183320648, "grad_norm": 2.135694985731762, "learning_rate": 3.997040599674113e-06, "loss": 0.8849, "step": 15930 }, { "epoch": 0.5773984270233047, "grad_norm": 2.0544005071269584, "learning_rate": 3.996465607622505e-06, "loss": 0.8504, "step": 15931 }, { "epoch": 0.5774346707259613, "grad_norm": 2.304199749064313, "learning_rate": 3.995890629398993e-06, "loss": 1.0055, "step": 15932 }, { "epoch": 0.577470914428618, "grad_norm": 2.286602388483411, "learning_rate": 3.9953156650114985e-06, "loss": 0.8427, "step": 15933 }, { "epoch": 0.5775071581312747, "grad_norm": 2.2909401025728418, "learning_rate": 3.994740714467949e-06, "loss": 1.15, "step": 15934 }, { "epoch": 0.5775434018339314, "grad_norm": 2.3971817429392126, "learning_rate": 3.994165777776265e-06, "loss": 0.8285, "step": 15935 }, { "epoch": 0.577579645536588, "grad_norm": 2.229981953493479, "learning_rate": 3.993590854944367e-06, "loss": 0.8727, "step": 15936 }, { "epoch": 0.5776158892392447, "grad_norm": 2.4885067630272024, "learning_rate": 3.993015945980177e-06, "loss": 0.9373, "step": 15937 }, { "epoch": 0.5776521329419013, "grad_norm": 2.363024701365878, "learning_rate": 3.99244105089162e-06, "loss": 0.8557, "step": 15938 }, { "epoch": 0.577688376644558, "grad_norm": 2.3145497015701264, "learning_rate": 3.991866169686616e-06, "loss": 0.7882, "step": 15939 }, { "epoch": 0.5777246203472147, "grad_norm": 2.4894412549987193, "learning_rate": 3.991291302373087e-06, "loss": 0.8012, "step": 15940 }, { "epoch": 0.5777608640498714, "grad_norm": 2.831784136540687, "learning_rate": 3.990716448958952e-06, "loss": 0.9307, "step": 15941 }, { "epoch": 0.577797107752528, "grad_norm": 2.1511195983667797, "learning_rate": 3.990141609452136e-06, "loss": 0.9249, "step": 15942 }, { "epoch": 0.5778333514551847, "grad_norm": 2.21612767541498, "learning_rate": 3.989566783860557e-06, "loss": 0.8243, "step": 15943 }, { "epoch": 0.5778695951578413, "grad_norm": 2.0596488346211035, "learning_rate": 3.988991972192136e-06, "loss": 0.7509, "step": 15944 }, { "epoch": 0.577905838860498, "grad_norm": 2.2631005153222006, "learning_rate": 3.988417174454794e-06, "loss": 0.7645, "step": 15945 }, { "epoch": 0.5779420825631546, "grad_norm": 2.4079695354375885, "learning_rate": 3.987842390656452e-06, "loss": 0.9375, "step": 15946 }, { "epoch": 0.5779783262658114, "grad_norm": 2.127897177539687, "learning_rate": 3.987267620805031e-06, "loss": 0.8733, "step": 15947 }, { "epoch": 0.578014569968468, "grad_norm": 2.594853952966222, "learning_rate": 3.9866928649084494e-06, "loss": 1.0109, "step": 15948 }, { "epoch": 0.5780508136711247, "grad_norm": 2.558330655232465, "learning_rate": 3.986118122974626e-06, "loss": 0.9918, "step": 15949 }, { "epoch": 0.5780870573737813, "grad_norm": 2.4234329974867337, "learning_rate": 3.9855433950114826e-06, "loss": 1.0116, "step": 15950 }, { "epoch": 0.578123301076438, "grad_norm": 2.361845411757975, "learning_rate": 3.9849686810269375e-06, "loss": 0.8581, "step": 15951 }, { "epoch": 0.5781595447790946, "grad_norm": 2.2269733492419967, "learning_rate": 3.9843939810289105e-06, "loss": 0.8841, "step": 15952 }, { "epoch": 0.5781957884817513, "grad_norm": 2.884413958318961, "learning_rate": 3.983819295025318e-06, "loss": 0.8353, "step": 15953 }, { "epoch": 0.578232032184408, "grad_norm": 2.1075552759802254, "learning_rate": 3.983244623024083e-06, "loss": 0.9551, "step": 15954 }, { "epoch": 0.5782682758870646, "grad_norm": 2.287797055897598, "learning_rate": 3.982669965033123e-06, "loss": 0.8893, "step": 15955 }, { "epoch": 0.5783045195897213, "grad_norm": 2.3251654068595533, "learning_rate": 3.982095321060354e-06, "loss": 1.0927, "step": 15956 }, { "epoch": 0.5783407632923779, "grad_norm": 2.8246996428956015, "learning_rate": 3.981520691113695e-06, "loss": 1.0067, "step": 15957 }, { "epoch": 0.5783770069950346, "grad_norm": 2.4573930317617014, "learning_rate": 3.980946075201067e-06, "loss": 0.853, "step": 15958 }, { "epoch": 0.5784132506976912, "grad_norm": 2.3320480296438117, "learning_rate": 3.980371473330385e-06, "loss": 0.8157, "step": 15959 }, { "epoch": 0.578449494400348, "grad_norm": 2.189191956001956, "learning_rate": 3.9797968855095666e-06, "loss": 0.904, "step": 15960 }, { "epoch": 0.5784857381030046, "grad_norm": 2.3370862999287887, "learning_rate": 3.979222311746533e-06, "loss": 0.9812, "step": 15961 }, { "epoch": 0.5785219818056613, "grad_norm": 2.2298535674500854, "learning_rate": 3.9786477520491964e-06, "loss": 0.8901, "step": 15962 }, { "epoch": 0.5785582255083179, "grad_norm": 2.139928628959304, "learning_rate": 3.978073206425477e-06, "loss": 0.8037, "step": 15963 }, { "epoch": 0.5785944692109746, "grad_norm": 2.3407875630429813, "learning_rate": 3.977498674883288e-06, "loss": 0.7167, "step": 15964 }, { "epoch": 0.5786307129136312, "grad_norm": 2.2439353939572633, "learning_rate": 3.976924157430551e-06, "loss": 0.9433, "step": 15965 }, { "epoch": 0.578666956616288, "grad_norm": 2.0999853366865895, "learning_rate": 3.976349654075181e-06, "loss": 0.8648, "step": 15966 }, { "epoch": 0.5787032003189446, "grad_norm": 2.3478145255387552, "learning_rate": 3.975775164825093e-06, "loss": 0.8225, "step": 15967 }, { "epoch": 0.5787394440216013, "grad_norm": 2.5941026611918536, "learning_rate": 3.975200689688204e-06, "loss": 0.8407, "step": 15968 }, { "epoch": 0.5787756877242579, "grad_norm": 2.4764714799681564, "learning_rate": 3.9746262286724305e-06, "loss": 0.7318, "step": 15969 }, { "epoch": 0.5788119314269146, "grad_norm": 2.274476401600847, "learning_rate": 3.974051781785687e-06, "loss": 0.9549, "step": 15970 }, { "epoch": 0.5788481751295712, "grad_norm": 2.0629846267843055, "learning_rate": 3.973477349035889e-06, "loss": 0.8264, "step": 15971 }, { "epoch": 0.5788844188322279, "grad_norm": 2.281342806134926, "learning_rate": 3.97290293043095e-06, "loss": 0.6025, "step": 15972 }, { "epoch": 0.5789206625348846, "grad_norm": 2.3532356637780243, "learning_rate": 3.97232852597879e-06, "loss": 0.8676, "step": 15973 }, { "epoch": 0.5789569062375413, "grad_norm": 2.3327293002622023, "learning_rate": 3.971754135687322e-06, "loss": 1.0748, "step": 15974 }, { "epoch": 0.5789931499401979, "grad_norm": 2.3496090690174993, "learning_rate": 3.9711797595644595e-06, "loss": 0.8022, "step": 15975 }, { "epoch": 0.5790293936428546, "grad_norm": 2.723892489284766, "learning_rate": 3.970605397618116e-06, "loss": 1.0737, "step": 15976 }, { "epoch": 0.5790656373455112, "grad_norm": 2.218541398739273, "learning_rate": 3.970031049856209e-06, "loss": 0.8316, "step": 15977 }, { "epoch": 0.5791018810481678, "grad_norm": 2.3450590231041417, "learning_rate": 3.969456716286653e-06, "loss": 0.7359, "step": 15978 }, { "epoch": 0.5791381247508245, "grad_norm": 2.3435780459884903, "learning_rate": 3.968882396917358e-06, "loss": 0.8783, "step": 15979 }, { "epoch": 0.5791743684534812, "grad_norm": 2.183960926799566, "learning_rate": 3.96830809175624e-06, "loss": 0.9464, "step": 15980 }, { "epoch": 0.5792106121561379, "grad_norm": 2.250081813485667, "learning_rate": 3.9677338008112136e-06, "loss": 1.0066, "step": 15981 }, { "epoch": 0.5792468558587945, "grad_norm": 2.5544968492279327, "learning_rate": 3.96715952409019e-06, "loss": 0.9586, "step": 15982 }, { "epoch": 0.5792830995614512, "grad_norm": 2.7363586791879673, "learning_rate": 3.966585261601085e-06, "loss": 0.9947, "step": 15983 }, { "epoch": 0.5793193432641078, "grad_norm": 2.345309584947349, "learning_rate": 3.966011013351807e-06, "loss": 0.8751, "step": 15984 }, { "epoch": 0.5793555869667645, "grad_norm": 2.1792781842061966, "learning_rate": 3.965436779350275e-06, "loss": 0.896, "step": 15985 }, { "epoch": 0.5793918306694212, "grad_norm": 2.7294780468872464, "learning_rate": 3.964862559604398e-06, "loss": 1.0354, "step": 15986 }, { "epoch": 0.5794280743720779, "grad_norm": 2.4062574857953067, "learning_rate": 3.964288354122089e-06, "loss": 0.8894, "step": 15987 }, { "epoch": 0.5794643180747345, "grad_norm": 2.1318577709771125, "learning_rate": 3.9637141629112586e-06, "loss": 0.7742, "step": 15988 }, { "epoch": 0.5795005617773912, "grad_norm": 2.193206203889963, "learning_rate": 3.963139985979822e-06, "loss": 0.7068, "step": 15989 }, { "epoch": 0.5795368054800478, "grad_norm": 2.244135743448678, "learning_rate": 3.962565823335689e-06, "loss": 0.9185, "step": 15990 }, { "epoch": 0.5795730491827045, "grad_norm": 2.1020727188544237, "learning_rate": 3.961991674986772e-06, "loss": 0.7694, "step": 15991 }, { "epoch": 0.5796092928853611, "grad_norm": 2.534187181365332, "learning_rate": 3.961417540940979e-06, "loss": 0.994, "step": 15992 }, { "epoch": 0.5796455365880179, "grad_norm": 2.3531569085789488, "learning_rate": 3.960843421206226e-06, "loss": 0.8911, "step": 15993 }, { "epoch": 0.5796817802906745, "grad_norm": 2.0849308478247317, "learning_rate": 3.960269315790423e-06, "loss": 0.7434, "step": 15994 }, { "epoch": 0.5797180239933312, "grad_norm": 2.4518893644570534, "learning_rate": 3.9596952247014805e-06, "loss": 0.9495, "step": 15995 }, { "epoch": 0.5797542676959878, "grad_norm": 2.4145417747327413, "learning_rate": 3.959121147947307e-06, "loss": 0.8478, "step": 15996 }, { "epoch": 0.5797905113986445, "grad_norm": 1.9984738437099066, "learning_rate": 3.958547085535814e-06, "loss": 0.8942, "step": 15997 }, { "epoch": 0.5798267551013011, "grad_norm": 2.900711338904753, "learning_rate": 3.957973037474914e-06, "loss": 0.7463, "step": 15998 }, { "epoch": 0.5798629988039579, "grad_norm": 2.3313354476913353, "learning_rate": 3.9573990037725155e-06, "loss": 0.9446, "step": 15999 }, { "epoch": 0.5798992425066145, "grad_norm": 1.943271397019556, "learning_rate": 3.956824984436527e-06, "loss": 0.7655, "step": 16000 }, { "epoch": 0.5799354862092712, "grad_norm": 2.1467926817557004, "learning_rate": 3.95625097947486e-06, "loss": 0.9417, "step": 16001 }, { "epoch": 0.5799717299119278, "grad_norm": 2.1183539911169307, "learning_rate": 3.955676988895424e-06, "loss": 0.9095, "step": 16002 }, { "epoch": 0.5800079736145844, "grad_norm": 2.4204513617535772, "learning_rate": 3.9551030127061264e-06, "loss": 0.9468, "step": 16003 }, { "epoch": 0.5800442173172411, "grad_norm": 2.30127522971179, "learning_rate": 3.954529050914876e-06, "loss": 0.8901, "step": 16004 }, { "epoch": 0.5800804610198977, "grad_norm": 2.1521481073179154, "learning_rate": 3.9539551035295845e-06, "loss": 0.8376, "step": 16005 }, { "epoch": 0.5801167047225545, "grad_norm": 1.9702745145983447, "learning_rate": 3.95338117055816e-06, "loss": 0.7321, "step": 16006 }, { "epoch": 0.5801529484252111, "grad_norm": 2.0950028777079543, "learning_rate": 3.9528072520085094e-06, "loss": 0.7334, "step": 16007 }, { "epoch": 0.5801891921278678, "grad_norm": 2.2878831917471767, "learning_rate": 3.952233347888541e-06, "loss": 0.9593, "step": 16008 }, { "epoch": 0.5802254358305244, "grad_norm": 1.8810070637329208, "learning_rate": 3.951659458206164e-06, "loss": 0.8314, "step": 16009 }, { "epoch": 0.5802616795331811, "grad_norm": 2.528384036057606, "learning_rate": 3.951085582969285e-06, "loss": 0.9532, "step": 16010 }, { "epoch": 0.5802979232358377, "grad_norm": 2.132114973081922, "learning_rate": 3.950511722185813e-06, "loss": 0.8437, "step": 16011 }, { "epoch": 0.5803341669384945, "grad_norm": 2.163181548245646, "learning_rate": 3.949937875863653e-06, "loss": 0.7937, "step": 16012 }, { "epoch": 0.5803704106411511, "grad_norm": 2.4925338744908254, "learning_rate": 3.949364044010716e-06, "loss": 0.9054, "step": 16013 }, { "epoch": 0.5804066543438078, "grad_norm": 2.3977695600404987, "learning_rate": 3.948790226634906e-06, "loss": 0.9795, "step": 16014 }, { "epoch": 0.5804428980464644, "grad_norm": 2.3343514545185053, "learning_rate": 3.9482164237441325e-06, "loss": 0.8991, "step": 16015 }, { "epoch": 0.5804791417491211, "grad_norm": 2.424536655401255, "learning_rate": 3.947642635346299e-06, "loss": 0.8895, "step": 16016 }, { "epoch": 0.5805153854517777, "grad_norm": 2.4042670535788395, "learning_rate": 3.947068861449314e-06, "loss": 0.8819, "step": 16017 }, { "epoch": 0.5805516291544344, "grad_norm": 2.187336782328684, "learning_rate": 3.946495102061084e-06, "loss": 0.8846, "step": 16018 }, { "epoch": 0.5805878728570911, "grad_norm": 2.1559471396296397, "learning_rate": 3.9459213571895135e-06, "loss": 0.7719, "step": 16019 }, { "epoch": 0.5806241165597478, "grad_norm": 2.337119769020179, "learning_rate": 3.945347626842508e-06, "loss": 0.9126, "step": 16020 }, { "epoch": 0.5806603602624044, "grad_norm": 2.569272366351837, "learning_rate": 3.944773911027976e-06, "loss": 0.8201, "step": 16021 }, { "epoch": 0.5806966039650611, "grad_norm": 2.346136133275619, "learning_rate": 3.944200209753822e-06, "loss": 0.84, "step": 16022 }, { "epoch": 0.5807328476677177, "grad_norm": 2.640014097541969, "learning_rate": 3.94362652302795e-06, "loss": 1.1383, "step": 16023 }, { "epoch": 0.5807690913703744, "grad_norm": 2.0557528195238137, "learning_rate": 3.943052850858263e-06, "loss": 0.9017, "step": 16024 }, { "epoch": 0.5808053350730311, "grad_norm": 2.504280406828499, "learning_rate": 3.942479193252671e-06, "loss": 1.032, "step": 16025 }, { "epoch": 0.5808415787756878, "grad_norm": 2.071860569581599, "learning_rate": 3.941905550219077e-06, "loss": 0.943, "step": 16026 }, { "epoch": 0.5808778224783444, "grad_norm": 2.4706272866008887, "learning_rate": 3.941331921765383e-06, "loss": 1.1023, "step": 16027 }, { "epoch": 0.580914066181001, "grad_norm": 2.25667016741947, "learning_rate": 3.940758307899496e-06, "loss": 0.7483, "step": 16028 }, { "epoch": 0.5809503098836577, "grad_norm": 2.4552607206570713, "learning_rate": 3.940184708629318e-06, "loss": 0.9169, "step": 16029 }, { "epoch": 0.5809865535863143, "grad_norm": 2.3606701697407475, "learning_rate": 3.939611123962755e-06, "loss": 0.9283, "step": 16030 }, { "epoch": 0.581022797288971, "grad_norm": 2.390080345125376, "learning_rate": 3.9390375539077085e-06, "loss": 0.9457, "step": 16031 }, { "epoch": 0.5810590409916278, "grad_norm": 2.3299446949957416, "learning_rate": 3.938463998472081e-06, "loss": 0.8973, "step": 16032 }, { "epoch": 0.5810952846942844, "grad_norm": 2.0853264611894677, "learning_rate": 3.93789045766378e-06, "loss": 0.8604, "step": 16033 }, { "epoch": 0.581131528396941, "grad_norm": 2.333897444004668, "learning_rate": 3.937316931490706e-06, "loss": 0.9488, "step": 16034 }, { "epoch": 0.5811677720995977, "grad_norm": 2.2878281236109435, "learning_rate": 3.9367434199607615e-06, "loss": 0.8575, "step": 16035 }, { "epoch": 0.5812040158022543, "grad_norm": 2.533001200334838, "learning_rate": 3.93616992308185e-06, "loss": 0.8764, "step": 16036 }, { "epoch": 0.581240259504911, "grad_norm": 2.173878083613911, "learning_rate": 3.935596440861873e-06, "loss": 0.8726, "step": 16037 }, { "epoch": 0.5812765032075676, "grad_norm": 2.3065107149585025, "learning_rate": 3.935022973308734e-06, "loss": 1.0338, "step": 16038 }, { "epoch": 0.5813127469102244, "grad_norm": 2.292853369764879, "learning_rate": 3.934449520430334e-06, "loss": 1.0465, "step": 16039 }, { "epoch": 0.581348990612881, "grad_norm": 2.2476398206045913, "learning_rate": 3.933876082234573e-06, "loss": 0.8606, "step": 16040 }, { "epoch": 0.5813852343155377, "grad_norm": 2.197820027569829, "learning_rate": 3.933302658729357e-06, "loss": 0.7073, "step": 16041 }, { "epoch": 0.5814214780181943, "grad_norm": 2.3399851295688254, "learning_rate": 3.932729249922587e-06, "loss": 1.043, "step": 16042 }, { "epoch": 0.581457721720851, "grad_norm": 2.358913683805442, "learning_rate": 3.932155855822158e-06, "loss": 0.9858, "step": 16043 }, { "epoch": 0.5814939654235076, "grad_norm": 2.362705887491791, "learning_rate": 3.931582476435979e-06, "loss": 0.9434, "step": 16044 }, { "epoch": 0.5815302091261644, "grad_norm": 2.3139196619704974, "learning_rate": 3.931009111771947e-06, "loss": 0.9028, "step": 16045 }, { "epoch": 0.581566452828821, "grad_norm": 2.0757014850273445, "learning_rate": 3.930435761837963e-06, "loss": 0.8947, "step": 16046 }, { "epoch": 0.5816026965314777, "grad_norm": 2.4244758258688393, "learning_rate": 3.929862426641926e-06, "loss": 0.7472, "step": 16047 }, { "epoch": 0.5816389402341343, "grad_norm": 2.2393083854717166, "learning_rate": 3.9292891061917385e-06, "loss": 1.0172, "step": 16048 }, { "epoch": 0.581675183936791, "grad_norm": 2.4502628460924405, "learning_rate": 3.9287158004953e-06, "loss": 0.8695, "step": 16049 }, { "epoch": 0.5817114276394476, "grad_norm": 2.604683953824198, "learning_rate": 3.928142509560509e-06, "loss": 0.9172, "step": 16050 }, { "epoch": 0.5817476713421043, "grad_norm": 2.310114550939406, "learning_rate": 3.927569233395265e-06, "loss": 0.8482, "step": 16051 }, { "epoch": 0.581783915044761, "grad_norm": 2.6328603124958496, "learning_rate": 3.92699597200747e-06, "loss": 1.0018, "step": 16052 }, { "epoch": 0.5818201587474177, "grad_norm": 2.758440251914439, "learning_rate": 3.926422725405021e-06, "loss": 0.9532, "step": 16053 }, { "epoch": 0.5818564024500743, "grad_norm": 2.2760095725446785, "learning_rate": 3.925849493595819e-06, "loss": 0.9843, "step": 16054 }, { "epoch": 0.581892646152731, "grad_norm": 2.2570727629620144, "learning_rate": 3.92527627658776e-06, "loss": 1.0185, "step": 16055 }, { "epoch": 0.5819288898553876, "grad_norm": 2.569434168159718, "learning_rate": 3.924703074388744e-06, "loss": 1.0502, "step": 16056 }, { "epoch": 0.5819651335580442, "grad_norm": 2.510169960660146, "learning_rate": 3.9241298870066695e-06, "loss": 0.845, "step": 16057 }, { "epoch": 0.582001377260701, "grad_norm": 2.454151638504807, "learning_rate": 3.923556714449434e-06, "loss": 0.87, "step": 16058 }, { "epoch": 0.5820376209633576, "grad_norm": 2.308218360596144, "learning_rate": 3.922983556724935e-06, "loss": 0.7777, "step": 16059 }, { "epoch": 0.5820738646660143, "grad_norm": 2.4370601794285682, "learning_rate": 3.9224104138410735e-06, "loss": 0.8605, "step": 16060 }, { "epoch": 0.5821101083686709, "grad_norm": 2.175449719229332, "learning_rate": 3.921837285805744e-06, "loss": 0.7652, "step": 16061 }, { "epoch": 0.5821463520713276, "grad_norm": 2.2250132530410403, "learning_rate": 3.921264172626844e-06, "loss": 0.8017, "step": 16062 }, { "epoch": 0.5821825957739842, "grad_norm": 2.4000457866983003, "learning_rate": 3.920691074312271e-06, "loss": 0.8074, "step": 16063 }, { "epoch": 0.5822188394766409, "grad_norm": 2.28608232648889, "learning_rate": 3.920117990869923e-06, "loss": 0.8674, "step": 16064 }, { "epoch": 0.5822550831792976, "grad_norm": 2.3301844737082864, "learning_rate": 3.919544922307697e-06, "loss": 1.046, "step": 16065 }, { "epoch": 0.5822913268819543, "grad_norm": 2.2014675427168666, "learning_rate": 3.9189718686334876e-06, "loss": 0.9753, "step": 16066 }, { "epoch": 0.5823275705846109, "grad_norm": 2.2902519649403854, "learning_rate": 3.918398829855192e-06, "loss": 0.9917, "step": 16067 }, { "epoch": 0.5823638142872676, "grad_norm": 2.2305389099366284, "learning_rate": 3.917825805980706e-06, "loss": 1.0376, "step": 16068 }, { "epoch": 0.5824000579899242, "grad_norm": 2.4561891004564473, "learning_rate": 3.917252797017927e-06, "loss": 0.8472, "step": 16069 }, { "epoch": 0.5824363016925809, "grad_norm": 1.9952869568786824, "learning_rate": 3.91667980297475e-06, "loss": 0.7941, "step": 16070 }, { "epoch": 0.5824725453952376, "grad_norm": 2.4234626303183786, "learning_rate": 3.916106823859067e-06, "loss": 0.9779, "step": 16071 }, { "epoch": 0.5825087890978943, "grad_norm": 2.303060845213837, "learning_rate": 3.915533859678779e-06, "loss": 0.954, "step": 16072 }, { "epoch": 0.5825450328005509, "grad_norm": 2.3867813225474, "learning_rate": 3.914960910441778e-06, "loss": 1.0269, "step": 16073 }, { "epoch": 0.5825812765032076, "grad_norm": 2.285110987462504, "learning_rate": 3.9143879761559605e-06, "loss": 0.7589, "step": 16074 }, { "epoch": 0.5826175202058642, "grad_norm": 2.215821075528525, "learning_rate": 3.913815056829218e-06, "loss": 0.8651, "step": 16075 }, { "epoch": 0.5826537639085209, "grad_norm": 2.616757685671009, "learning_rate": 3.913242152469449e-06, "loss": 0.8825, "step": 16076 }, { "epoch": 0.5826900076111775, "grad_norm": 2.3674189098890035, "learning_rate": 3.9126692630845456e-06, "loss": 0.9149, "step": 16077 }, { "epoch": 0.5827262513138343, "grad_norm": 2.345854415810954, "learning_rate": 3.912096388682403e-06, "loss": 0.7025, "step": 16078 }, { "epoch": 0.5827624950164909, "grad_norm": 2.1630259560124414, "learning_rate": 3.911523529270911e-06, "loss": 0.8965, "step": 16079 }, { "epoch": 0.5827987387191476, "grad_norm": 2.3442342258111397, "learning_rate": 3.910950684857968e-06, "loss": 1.1808, "step": 16080 }, { "epoch": 0.5828349824218042, "grad_norm": 2.5731168226594106, "learning_rate": 3.910377855451467e-06, "loss": 0.9624, "step": 16081 }, { "epoch": 0.5828712261244609, "grad_norm": 2.2810938393785856, "learning_rate": 3.9098050410593e-06, "loss": 0.7496, "step": 16082 }, { "epoch": 0.5829074698271175, "grad_norm": 4.562195343882765, "learning_rate": 3.909232241689358e-06, "loss": 0.9532, "step": 16083 }, { "epoch": 0.5829437135297743, "grad_norm": 2.342459130567687, "learning_rate": 3.9086594573495385e-06, "loss": 0.9077, "step": 16084 }, { "epoch": 0.5829799572324309, "grad_norm": 2.3635440652748905, "learning_rate": 3.908086688047731e-06, "loss": 0.9838, "step": 16085 }, { "epoch": 0.5830162009350875, "grad_norm": 2.412490571016938, "learning_rate": 3.907513933791829e-06, "loss": 0.6869, "step": 16086 }, { "epoch": 0.5830524446377442, "grad_norm": 2.153650343809596, "learning_rate": 3.906941194589723e-06, "loss": 0.9427, "step": 16087 }, { "epoch": 0.5830886883404008, "grad_norm": 2.457722838169119, "learning_rate": 3.9063684704493075e-06, "loss": 0.8822, "step": 16088 }, { "epoch": 0.5831249320430575, "grad_norm": 2.471492594813456, "learning_rate": 3.905795761378473e-06, "loss": 1.0229, "step": 16089 }, { "epoch": 0.5831611757457141, "grad_norm": 2.4112755259826053, "learning_rate": 3.905223067385111e-06, "loss": 0.9067, "step": 16090 }, { "epoch": 0.5831974194483709, "grad_norm": 2.47098620127607, "learning_rate": 3.904650388477112e-06, "loss": 0.8938, "step": 16091 }, { "epoch": 0.5832336631510275, "grad_norm": 2.209067379867102, "learning_rate": 3.904077724662369e-06, "loss": 0.742, "step": 16092 }, { "epoch": 0.5832699068536842, "grad_norm": 2.539146587411375, "learning_rate": 3.9035050759487725e-06, "loss": 0.9653, "step": 16093 }, { "epoch": 0.5833061505563408, "grad_norm": 2.185870169994688, "learning_rate": 3.902932442344213e-06, "loss": 0.8575, "step": 16094 }, { "epoch": 0.5833423942589975, "grad_norm": 2.0368152806415285, "learning_rate": 3.90235982385658e-06, "loss": 0.7421, "step": 16095 }, { "epoch": 0.5833786379616541, "grad_norm": 2.585229935193362, "learning_rate": 3.9017872204937654e-06, "loss": 0.8969, "step": 16096 }, { "epoch": 0.5834148816643109, "grad_norm": 2.5093407386433784, "learning_rate": 3.90121463226366e-06, "loss": 0.9106, "step": 16097 }, { "epoch": 0.5834511253669675, "grad_norm": 2.4063916573869952, "learning_rate": 3.9006420591741514e-06, "loss": 0.9057, "step": 16098 }, { "epoch": 0.5834873690696242, "grad_norm": 2.279593327588257, "learning_rate": 3.900069501233128e-06, "loss": 0.7816, "step": 16099 }, { "epoch": 0.5835236127722808, "grad_norm": 2.146847090906202, "learning_rate": 3.899496958448484e-06, "loss": 0.6648, "step": 16100 }, { "epoch": 0.5835598564749375, "grad_norm": 2.4717627925709498, "learning_rate": 3.898924430828107e-06, "loss": 0.9018, "step": 16101 }, { "epoch": 0.5835961001775941, "grad_norm": 2.2701676976204728, "learning_rate": 3.898351918379884e-06, "loss": 0.7868, "step": 16102 }, { "epoch": 0.5836323438802508, "grad_norm": 2.385623897053563, "learning_rate": 3.897779421111706e-06, "loss": 0.8102, "step": 16103 }, { "epoch": 0.5836685875829075, "grad_norm": 2.2443314889654142, "learning_rate": 3.897206939031461e-06, "loss": 0.9959, "step": 16104 }, { "epoch": 0.5837048312855642, "grad_norm": 2.365423448887224, "learning_rate": 3.896634472147037e-06, "loss": 0.9392, "step": 16105 }, { "epoch": 0.5837410749882208, "grad_norm": 2.308709090249723, "learning_rate": 3.8960620204663235e-06, "loss": 0.8748, "step": 16106 }, { "epoch": 0.5837773186908775, "grad_norm": 2.3742647602052536, "learning_rate": 3.895489583997205e-06, "loss": 0.8817, "step": 16107 }, { "epoch": 0.5838135623935341, "grad_norm": 2.3282885470451267, "learning_rate": 3.894917162747575e-06, "loss": 0.8868, "step": 16108 }, { "epoch": 0.5838498060961907, "grad_norm": 2.468614022025024, "learning_rate": 3.894344756725318e-06, "loss": 0.8097, "step": 16109 }, { "epoch": 0.5838860497988474, "grad_norm": 1.9737882359263235, "learning_rate": 3.893772365938321e-06, "loss": 0.775, "step": 16110 }, { "epoch": 0.5839222935015042, "grad_norm": 2.426293589430574, "learning_rate": 3.8931999903944705e-06, "loss": 1.0606, "step": 16111 }, { "epoch": 0.5839585372041608, "grad_norm": 2.6504166722786073, "learning_rate": 3.892627630101656e-06, "loss": 0.9313, "step": 16112 }, { "epoch": 0.5839947809068174, "grad_norm": 2.136035722278368, "learning_rate": 3.892055285067763e-06, "loss": 1.0465, "step": 16113 }, { "epoch": 0.5840310246094741, "grad_norm": 2.0835840960554584, "learning_rate": 3.891482955300679e-06, "loss": 0.8711, "step": 16114 }, { "epoch": 0.5840672683121307, "grad_norm": 2.46152129713558, "learning_rate": 3.890910640808288e-06, "loss": 0.9347, "step": 16115 }, { "epoch": 0.5841035120147874, "grad_norm": 2.369934447039625, "learning_rate": 3.890338341598479e-06, "loss": 0.9489, "step": 16116 }, { "epoch": 0.5841397557174441, "grad_norm": 2.457087852424318, "learning_rate": 3.889766057679135e-06, "loss": 1.064, "step": 16117 }, { "epoch": 0.5841759994201008, "grad_norm": 2.5684580129497543, "learning_rate": 3.889193789058145e-06, "loss": 0.8101, "step": 16118 }, { "epoch": 0.5842122431227574, "grad_norm": 2.781362349666201, "learning_rate": 3.88862153574339e-06, "loss": 0.8909, "step": 16119 }, { "epoch": 0.5842484868254141, "grad_norm": 2.5725911771624883, "learning_rate": 3.88804929774276e-06, "loss": 0.9691, "step": 16120 }, { "epoch": 0.5842847305280707, "grad_norm": 2.317979622324653, "learning_rate": 3.887477075064139e-06, "loss": 1.0585, "step": 16121 }, { "epoch": 0.5843209742307274, "grad_norm": 2.3763904618646547, "learning_rate": 3.88690486771541e-06, "loss": 1.0664, "step": 16122 }, { "epoch": 0.584357217933384, "grad_norm": 2.430803156037648, "learning_rate": 3.886332675704459e-06, "loss": 0.8031, "step": 16123 }, { "epoch": 0.5843934616360408, "grad_norm": 2.320570448292596, "learning_rate": 3.88576049903917e-06, "loss": 0.9391, "step": 16124 }, { "epoch": 0.5844297053386974, "grad_norm": 2.3538565375180904, "learning_rate": 3.885188337727428e-06, "loss": 1.0524, "step": 16125 }, { "epoch": 0.5844659490413541, "grad_norm": 2.7949073674521236, "learning_rate": 3.884616191777115e-06, "loss": 1.0023, "step": 16126 }, { "epoch": 0.5845021927440107, "grad_norm": 1.9924230358933093, "learning_rate": 3.884044061196117e-06, "loss": 0.8604, "step": 16127 }, { "epoch": 0.5845384364466674, "grad_norm": 2.1952216881008284, "learning_rate": 3.883471945992318e-06, "loss": 0.9155, "step": 16128 }, { "epoch": 0.584574680149324, "grad_norm": 2.327188030614637, "learning_rate": 3.8828998461736005e-06, "loss": 0.8078, "step": 16129 }, { "epoch": 0.5846109238519808, "grad_norm": 2.263039257819172, "learning_rate": 3.882327761747845e-06, "loss": 0.9289, "step": 16130 }, { "epoch": 0.5846471675546374, "grad_norm": 2.6226550876455152, "learning_rate": 3.881755692722938e-06, "loss": 0.7149, "step": 16131 }, { "epoch": 0.5846834112572941, "grad_norm": 2.2027288405820227, "learning_rate": 3.881183639106762e-06, "loss": 0.6703, "step": 16132 }, { "epoch": 0.5847196549599507, "grad_norm": 2.300180560418673, "learning_rate": 3.8806116009072e-06, "loss": 0.8989, "step": 16133 }, { "epoch": 0.5847558986626074, "grad_norm": 2.5140035997433006, "learning_rate": 3.880039578132131e-06, "loss": 0.7959, "step": 16134 }, { "epoch": 0.584792142365264, "grad_norm": 2.157889587381456, "learning_rate": 3.87946757078944e-06, "loss": 0.7618, "step": 16135 }, { "epoch": 0.5848283860679206, "grad_norm": 2.5105505214693706, "learning_rate": 3.87889557888701e-06, "loss": 0.8017, "step": 16136 }, { "epoch": 0.5848646297705774, "grad_norm": 2.6897973540440683, "learning_rate": 3.878323602432719e-06, "loss": 0.9474, "step": 16137 }, { "epoch": 0.584900873473234, "grad_norm": 2.486766286636613, "learning_rate": 3.877751641434449e-06, "loss": 0.9914, "step": 16138 }, { "epoch": 0.5849371171758907, "grad_norm": 2.2036255669978955, "learning_rate": 3.877179695900085e-06, "loss": 0.9274, "step": 16139 }, { "epoch": 0.5849733608785473, "grad_norm": 2.441225825870213, "learning_rate": 3.876607765837505e-06, "loss": 0.9552, "step": 16140 }, { "epoch": 0.585009604581204, "grad_norm": 2.395310643109638, "learning_rate": 3.876035851254591e-06, "loss": 0.7681, "step": 16141 }, { "epoch": 0.5850458482838606, "grad_norm": 2.4767154134292464, "learning_rate": 3.875463952159223e-06, "loss": 0.939, "step": 16142 }, { "epoch": 0.5850820919865174, "grad_norm": 2.580430438157569, "learning_rate": 3.874892068559281e-06, "loss": 1.2479, "step": 16143 }, { "epoch": 0.585118335689174, "grad_norm": 2.213634784973914, "learning_rate": 3.874320200462647e-06, "loss": 0.8966, "step": 16144 }, { "epoch": 0.5851545793918307, "grad_norm": 2.559709677872794, "learning_rate": 3.8737483478772e-06, "loss": 0.9316, "step": 16145 }, { "epoch": 0.5851908230944873, "grad_norm": 2.480190401098372, "learning_rate": 3.8731765108108165e-06, "loss": 0.9866, "step": 16146 }, { "epoch": 0.585227066797144, "grad_norm": 2.0758346194584174, "learning_rate": 3.872604689271382e-06, "loss": 0.7224, "step": 16147 }, { "epoch": 0.5852633104998006, "grad_norm": 2.064807605277112, "learning_rate": 3.872032883266773e-06, "loss": 0.8454, "step": 16148 }, { "epoch": 0.5852995542024573, "grad_norm": 2.4993227456426896, "learning_rate": 3.871461092804869e-06, "loss": 1.05, "step": 16149 }, { "epoch": 0.585335797905114, "grad_norm": 2.3366856556032705, "learning_rate": 3.870889317893546e-06, "loss": 0.9653, "step": 16150 }, { "epoch": 0.5853720416077707, "grad_norm": 2.5699461216460078, "learning_rate": 3.870317558540688e-06, "loss": 0.9306, "step": 16151 }, { "epoch": 0.5854082853104273, "grad_norm": 2.3225713699481014, "learning_rate": 3.86974581475417e-06, "loss": 0.9388, "step": 16152 }, { "epoch": 0.585444529013084, "grad_norm": 2.2447176052171174, "learning_rate": 3.869174086541871e-06, "loss": 1.0008, "step": 16153 }, { "epoch": 0.5854807727157406, "grad_norm": 2.328395567672007, "learning_rate": 3.86860237391167e-06, "loss": 0.9473, "step": 16154 }, { "epoch": 0.5855170164183973, "grad_norm": 2.352609361180252, "learning_rate": 3.8680306768714435e-06, "loss": 1.1165, "step": 16155 }, { "epoch": 0.585553260121054, "grad_norm": 2.120802059075553, "learning_rate": 3.86745899542907e-06, "loss": 0.7939, "step": 16156 }, { "epoch": 0.5855895038237107, "grad_norm": 2.401342910851097, "learning_rate": 3.866887329592426e-06, "loss": 1.0812, "step": 16157 }, { "epoch": 0.5856257475263673, "grad_norm": 2.590484716653145, "learning_rate": 3.866315679369388e-06, "loss": 1.0093, "step": 16158 }, { "epoch": 0.585661991229024, "grad_norm": 2.252759988039807, "learning_rate": 3.865744044767836e-06, "loss": 0.9811, "step": 16159 }, { "epoch": 0.5856982349316806, "grad_norm": 2.3182223735214365, "learning_rate": 3.865172425795646e-06, "loss": 0.8131, "step": 16160 }, { "epoch": 0.5857344786343373, "grad_norm": 2.4758101824615855, "learning_rate": 3.864600822460692e-06, "loss": 0.8622, "step": 16161 }, { "epoch": 0.5857707223369939, "grad_norm": 2.2858751346284856, "learning_rate": 3.864029234770852e-06, "loss": 0.8327, "step": 16162 }, { "epoch": 0.5858069660396507, "grad_norm": 2.5517540807329415, "learning_rate": 3.8634576627340035e-06, "loss": 0.8559, "step": 16163 }, { "epoch": 0.5858432097423073, "grad_norm": 2.5814665188513612, "learning_rate": 3.862886106358021e-06, "loss": 0.8889, "step": 16164 }, { "epoch": 0.585879453444964, "grad_norm": 2.2687242669190892, "learning_rate": 3.862314565650779e-06, "loss": 1.069, "step": 16165 }, { "epoch": 0.5859156971476206, "grad_norm": 2.212472035270047, "learning_rate": 3.8617430406201536e-06, "loss": 0.9112, "step": 16166 }, { "epoch": 0.5859519408502772, "grad_norm": 2.282963248932226, "learning_rate": 3.861171531274022e-06, "loss": 0.9169, "step": 16167 }, { "epoch": 0.5859881845529339, "grad_norm": 2.5743947313556297, "learning_rate": 3.860600037620258e-06, "loss": 1.0334, "step": 16168 }, { "epoch": 0.5860244282555906, "grad_norm": 2.7788671481097245, "learning_rate": 3.860028559666736e-06, "loss": 0.9166, "step": 16169 }, { "epoch": 0.5860606719582473, "grad_norm": 1.9229657150220092, "learning_rate": 3.85945709742133e-06, "loss": 0.8443, "step": 16170 }, { "epoch": 0.5860969156609039, "grad_norm": 2.242119548692306, "learning_rate": 3.858885650891917e-06, "loss": 0.9232, "step": 16171 }, { "epoch": 0.5861331593635606, "grad_norm": 2.4151472598314854, "learning_rate": 3.85831422008637e-06, "loss": 0.9631, "step": 16172 }, { "epoch": 0.5861694030662172, "grad_norm": 2.391629749072109, "learning_rate": 3.857742805012562e-06, "loss": 0.9155, "step": 16173 }, { "epoch": 0.5862056467688739, "grad_norm": 2.15037612285768, "learning_rate": 3.857171405678366e-06, "loss": 0.987, "step": 16174 }, { "epoch": 0.5862418904715305, "grad_norm": 2.524425102726325, "learning_rate": 3.856600022091659e-06, "loss": 0.9952, "step": 16175 }, { "epoch": 0.5862781341741873, "grad_norm": 2.411728601980911, "learning_rate": 3.856028654260312e-06, "loss": 0.9166, "step": 16176 }, { "epoch": 0.5863143778768439, "grad_norm": 2.424277867457411, "learning_rate": 3.855457302192197e-06, "loss": 0.9249, "step": 16177 }, { "epoch": 0.5863506215795006, "grad_norm": 2.1772484780431602, "learning_rate": 3.854885965895187e-06, "loss": 0.9443, "step": 16178 }, { "epoch": 0.5863868652821572, "grad_norm": 2.506067045870704, "learning_rate": 3.854314645377158e-06, "loss": 0.9758, "step": 16179 }, { "epoch": 0.5864231089848139, "grad_norm": 2.2324514669756996, "learning_rate": 3.8537433406459805e-06, "loss": 0.8237, "step": 16180 }, { "epoch": 0.5864593526874705, "grad_norm": 2.4009402571249927, "learning_rate": 3.853172051709526e-06, "loss": 0.9274, "step": 16181 }, { "epoch": 0.5864955963901272, "grad_norm": 2.1729043663670917, "learning_rate": 3.852600778575666e-06, "loss": 0.8458, "step": 16182 }, { "epoch": 0.5865318400927839, "grad_norm": 2.2182514048249202, "learning_rate": 3.852029521252275e-06, "loss": 0.8673, "step": 16183 }, { "epoch": 0.5865680837954406, "grad_norm": 2.3969670018064546, "learning_rate": 3.851458279747222e-06, "loss": 0.9782, "step": 16184 }, { "epoch": 0.5866043274980972, "grad_norm": 2.4180468181486354, "learning_rate": 3.85088705406838e-06, "loss": 0.9711, "step": 16185 }, { "epoch": 0.5866405712007539, "grad_norm": 2.364398257024093, "learning_rate": 3.850315844223617e-06, "loss": 0.7981, "step": 16186 }, { "epoch": 0.5866768149034105, "grad_norm": 2.431366555263671, "learning_rate": 3.849744650220809e-06, "loss": 0.8938, "step": 16187 }, { "epoch": 0.5867130586060672, "grad_norm": 2.3808233690083296, "learning_rate": 3.8491734720678235e-06, "loss": 0.8207, "step": 16188 }, { "epoch": 0.5867493023087239, "grad_norm": 2.423284209929563, "learning_rate": 3.848602309772532e-06, "loss": 1.0499, "step": 16189 }, { "epoch": 0.5867855460113806, "grad_norm": 2.2442207479204983, "learning_rate": 3.848031163342803e-06, "loss": 0.8595, "step": 16190 }, { "epoch": 0.5868217897140372, "grad_norm": 2.4447914038187037, "learning_rate": 3.847460032786509e-06, "loss": 1.0434, "step": 16191 }, { "epoch": 0.5868580334166938, "grad_norm": 2.553094743699328, "learning_rate": 3.846888918111519e-06, "loss": 0.9079, "step": 16192 }, { "epoch": 0.5868942771193505, "grad_norm": 2.198396894423472, "learning_rate": 3.846317819325701e-06, "loss": 0.8969, "step": 16193 }, { "epoch": 0.5869305208220071, "grad_norm": 2.078242671001903, "learning_rate": 3.845746736436925e-06, "loss": 0.6702, "step": 16194 }, { "epoch": 0.5869667645246638, "grad_norm": 2.4619556412367256, "learning_rate": 3.8451756694530624e-06, "loss": 0.9215, "step": 16195 }, { "epoch": 0.5870030082273205, "grad_norm": 2.5594139377839986, "learning_rate": 3.844604618381981e-06, "loss": 0.9555, "step": 16196 }, { "epoch": 0.5870392519299772, "grad_norm": 2.7649277892148167, "learning_rate": 3.8440335832315495e-06, "loss": 1.0676, "step": 16197 }, { "epoch": 0.5870754956326338, "grad_norm": 2.1617486304523177, "learning_rate": 3.843462564009633e-06, "loss": 0.8888, "step": 16198 }, { "epoch": 0.5871117393352905, "grad_norm": 2.372297663589328, "learning_rate": 3.8428915607241056e-06, "loss": 0.7311, "step": 16199 }, { "epoch": 0.5871479830379471, "grad_norm": 2.4292951848045052, "learning_rate": 3.842320573382833e-06, "loss": 0.9363, "step": 16200 }, { "epoch": 0.5871842267406038, "grad_norm": 2.2674593917989747, "learning_rate": 3.841749601993683e-06, "loss": 1.0343, "step": 16201 }, { "epoch": 0.5872204704432605, "grad_norm": 2.7651910954520083, "learning_rate": 3.841178646564522e-06, "loss": 0.9801, "step": 16202 }, { "epoch": 0.5872567141459172, "grad_norm": 2.435844982053927, "learning_rate": 3.840607707103218e-06, "loss": 0.692, "step": 16203 }, { "epoch": 0.5872929578485738, "grad_norm": 2.3736095653825764, "learning_rate": 3.840036783617639e-06, "loss": 0.9339, "step": 16204 }, { "epoch": 0.5873292015512305, "grad_norm": 2.415562576471245, "learning_rate": 3.83946587611565e-06, "loss": 0.9328, "step": 16205 }, { "epoch": 0.5873654452538871, "grad_norm": 2.1068049091490053, "learning_rate": 3.838894984605122e-06, "loss": 0.7766, "step": 16206 }, { "epoch": 0.5874016889565438, "grad_norm": 2.2323572302944577, "learning_rate": 3.8383241090939184e-06, "loss": 0.7778, "step": 16207 }, { "epoch": 0.5874379326592004, "grad_norm": 2.192053672971053, "learning_rate": 3.837753249589906e-06, "loss": 0.9264, "step": 16208 }, { "epoch": 0.5874741763618572, "grad_norm": 2.1323933324745203, "learning_rate": 3.83718240610095e-06, "loss": 0.8503, "step": 16209 }, { "epoch": 0.5875104200645138, "grad_norm": 2.165618333171151, "learning_rate": 3.836611578634918e-06, "loss": 0.7304, "step": 16210 }, { "epoch": 0.5875466637671705, "grad_norm": 2.308645445081038, "learning_rate": 3.836040767199675e-06, "loss": 1.0786, "step": 16211 }, { "epoch": 0.5875829074698271, "grad_norm": 2.6227486876410024, "learning_rate": 3.835469971803087e-06, "loss": 1.0163, "step": 16212 }, { "epoch": 0.5876191511724838, "grad_norm": 2.6846299734685877, "learning_rate": 3.834899192453014e-06, "loss": 0.9467, "step": 16213 }, { "epoch": 0.5876553948751404, "grad_norm": 2.3625755236186996, "learning_rate": 3.83432842915733e-06, "loss": 0.8807, "step": 16214 }, { "epoch": 0.5876916385777972, "grad_norm": 2.145020121960645, "learning_rate": 3.833757681923894e-06, "loss": 0.6552, "step": 16215 }, { "epoch": 0.5877278822804538, "grad_norm": 2.1998811445005817, "learning_rate": 3.833186950760573e-06, "loss": 0.9967, "step": 16216 }, { "epoch": 0.5877641259831105, "grad_norm": 2.1423104953150722, "learning_rate": 3.832616235675228e-06, "loss": 0.8845, "step": 16217 }, { "epoch": 0.5878003696857671, "grad_norm": 2.341362026825392, "learning_rate": 3.8320455366757255e-06, "loss": 0.8078, "step": 16218 }, { "epoch": 0.5878366133884237, "grad_norm": 2.256073968402155, "learning_rate": 3.831474853769931e-06, "loss": 0.8153, "step": 16219 }, { "epoch": 0.5878728570910804, "grad_norm": 2.103131091043286, "learning_rate": 3.830904186965705e-06, "loss": 0.9291, "step": 16220 }, { "epoch": 0.587909100793737, "grad_norm": 2.145981101617035, "learning_rate": 3.830333536270912e-06, "loss": 0.8085, "step": 16221 }, { "epoch": 0.5879453444963938, "grad_norm": 2.405784774937652, "learning_rate": 3.829762901693417e-06, "loss": 0.9535, "step": 16222 }, { "epoch": 0.5879815881990504, "grad_norm": 2.333264165019532, "learning_rate": 3.829192283241081e-06, "loss": 0.8939, "step": 16223 }, { "epoch": 0.5880178319017071, "grad_norm": 2.589657686018865, "learning_rate": 3.828621680921767e-06, "loss": 1.043, "step": 16224 }, { "epoch": 0.5880540756043637, "grad_norm": 2.563410952459803, "learning_rate": 3.8280510947433354e-06, "loss": 0.9101, "step": 16225 }, { "epoch": 0.5880903193070204, "grad_norm": 2.7367773655031926, "learning_rate": 3.8274805247136535e-06, "loss": 0.9213, "step": 16226 }, { "epoch": 0.588126563009677, "grad_norm": 2.291754240126281, "learning_rate": 3.826909970840581e-06, "loss": 0.8732, "step": 16227 }, { "epoch": 0.5881628067123338, "grad_norm": 2.796274821108642, "learning_rate": 3.82633943313198e-06, "loss": 0.7629, "step": 16228 }, { "epoch": 0.5881990504149904, "grad_norm": 2.5290084883002093, "learning_rate": 3.82576891159571e-06, "loss": 0.9197, "step": 16229 }, { "epoch": 0.5882352941176471, "grad_norm": 2.6732907693794745, "learning_rate": 3.8251984062396355e-06, "loss": 0.928, "step": 16230 }, { "epoch": 0.5882715378203037, "grad_norm": 2.3462403315411864, "learning_rate": 3.824627917071617e-06, "loss": 1.0598, "step": 16231 }, { "epoch": 0.5883077815229604, "grad_norm": 2.2417331949963213, "learning_rate": 3.824057444099513e-06, "loss": 0.9254, "step": 16232 }, { "epoch": 0.588344025225617, "grad_norm": 2.17867670614087, "learning_rate": 3.823486987331187e-06, "loss": 0.9484, "step": 16233 }, { "epoch": 0.5883802689282737, "grad_norm": 2.4579077570983343, "learning_rate": 3.822916546774499e-06, "loss": 0.8609, "step": 16234 }, { "epoch": 0.5884165126309304, "grad_norm": 2.280449623651464, "learning_rate": 3.82234612243731e-06, "loss": 0.9122, "step": 16235 }, { "epoch": 0.5884527563335871, "grad_norm": 2.428344968443118, "learning_rate": 3.821775714327478e-06, "loss": 1.0756, "step": 16236 }, { "epoch": 0.5884890000362437, "grad_norm": 2.4508589343480116, "learning_rate": 3.821205322452863e-06, "loss": 0.8486, "step": 16237 }, { "epoch": 0.5885252437389004, "grad_norm": 2.2955062342178767, "learning_rate": 3.820634946821328e-06, "loss": 0.8107, "step": 16238 }, { "epoch": 0.588561487441557, "grad_norm": 2.485584388846744, "learning_rate": 3.82006458744073e-06, "loss": 0.9078, "step": 16239 }, { "epoch": 0.5885977311442137, "grad_norm": 2.3515127339682422, "learning_rate": 3.819494244318928e-06, "loss": 0.8054, "step": 16240 }, { "epoch": 0.5886339748468704, "grad_norm": 2.4573806805406693, "learning_rate": 3.81892391746378e-06, "loss": 0.8133, "step": 16241 }, { "epoch": 0.5886702185495271, "grad_norm": 2.3627747145229896, "learning_rate": 3.818353606883148e-06, "loss": 0.9092, "step": 16242 }, { "epoch": 0.5887064622521837, "grad_norm": 2.2461949189842882, "learning_rate": 3.817783312584889e-06, "loss": 0.8514, "step": 16243 }, { "epoch": 0.5887427059548404, "grad_norm": 2.306222344447518, "learning_rate": 3.81721303457686e-06, "loss": 0.8507, "step": 16244 }, { "epoch": 0.588778949657497, "grad_norm": 2.404936729730592, "learning_rate": 3.8166427728669185e-06, "loss": 0.9129, "step": 16245 }, { "epoch": 0.5888151933601536, "grad_norm": 2.2134811140290704, "learning_rate": 3.816072527462925e-06, "loss": 1.011, "step": 16246 }, { "epoch": 0.5888514370628103, "grad_norm": 2.2032211703701394, "learning_rate": 3.8155022983727375e-06, "loss": 0.9573, "step": 16247 }, { "epoch": 0.588887680765467, "grad_norm": 2.4974159094652895, "learning_rate": 3.814932085604211e-06, "loss": 0.8469, "step": 16248 }, { "epoch": 0.5889239244681237, "grad_norm": 2.3571378738113973, "learning_rate": 3.8143618891652036e-06, "loss": 0.8239, "step": 16249 }, { "epoch": 0.5889601681707803, "grad_norm": 2.409465580780909, "learning_rate": 3.8137917090635725e-06, "loss": 0.8546, "step": 16250 }, { "epoch": 0.588996411873437, "grad_norm": 2.2576967579289917, "learning_rate": 3.8132215453071743e-06, "loss": 0.9193, "step": 16251 }, { "epoch": 0.5890326555760936, "grad_norm": 2.307503071029956, "learning_rate": 3.812651397903866e-06, "loss": 0.9388, "step": 16252 }, { "epoch": 0.5890688992787503, "grad_norm": 2.300566807810315, "learning_rate": 3.8120812668615e-06, "loss": 0.9546, "step": 16253 }, { "epoch": 0.5891051429814069, "grad_norm": 2.3728100242445564, "learning_rate": 3.8115111521879393e-06, "loss": 0.8263, "step": 16254 }, { "epoch": 0.5891413866840637, "grad_norm": 2.2701169504150127, "learning_rate": 3.810941053891035e-06, "loss": 0.9215, "step": 16255 }, { "epoch": 0.5891776303867203, "grad_norm": 2.400137340563503, "learning_rate": 3.8103709719786447e-06, "loss": 0.9482, "step": 16256 }, { "epoch": 0.589213874089377, "grad_norm": 2.451117700305423, "learning_rate": 3.8098009064586205e-06, "loss": 0.848, "step": 16257 }, { "epoch": 0.5892501177920336, "grad_norm": 2.491636584402657, "learning_rate": 3.8092308573388224e-06, "loss": 0.9921, "step": 16258 }, { "epoch": 0.5892863614946903, "grad_norm": 2.329925301173706, "learning_rate": 3.8086608246271028e-06, "loss": 0.9398, "step": 16259 }, { "epoch": 0.5893226051973469, "grad_norm": 2.1524671054203193, "learning_rate": 3.8080908083313162e-06, "loss": 0.8661, "step": 16260 }, { "epoch": 0.5893588489000037, "grad_norm": 2.503640260349265, "learning_rate": 3.8075208084593164e-06, "loss": 1.0495, "step": 16261 }, { "epoch": 0.5893950926026603, "grad_norm": 2.8206810171006227, "learning_rate": 3.8069508250189604e-06, "loss": 1.0968, "step": 16262 }, { "epoch": 0.589431336305317, "grad_norm": 2.356170092575891, "learning_rate": 3.8063808580180993e-06, "loss": 0.8025, "step": 16263 }, { "epoch": 0.5894675800079736, "grad_norm": 2.6131037408928295, "learning_rate": 3.8058109074645894e-06, "loss": 0.8488, "step": 16264 }, { "epoch": 0.5895038237106303, "grad_norm": 2.520082472863143, "learning_rate": 3.80524097336628e-06, "loss": 1.0304, "step": 16265 }, { "epoch": 0.5895400674132869, "grad_norm": 2.2686578481299646, "learning_rate": 3.8046710557310296e-06, "loss": 0.9942, "step": 16266 }, { "epoch": 0.5895763111159436, "grad_norm": 2.3612393461414505, "learning_rate": 3.8041011545666895e-06, "loss": 0.9221, "step": 16267 }, { "epoch": 0.5896125548186003, "grad_norm": 2.5523835402946036, "learning_rate": 3.8035312698811124e-06, "loss": 0.9387, "step": 16268 }, { "epoch": 0.589648798521257, "grad_norm": 2.1910888683724763, "learning_rate": 3.8029614016821494e-06, "loss": 0.9885, "step": 16269 }, { "epoch": 0.5896850422239136, "grad_norm": 2.5317513848339477, "learning_rate": 3.8023915499776555e-06, "loss": 0.7967, "step": 16270 }, { "epoch": 0.5897212859265702, "grad_norm": 2.389058614486054, "learning_rate": 3.801821714775481e-06, "loss": 0.6774, "step": 16271 }, { "epoch": 0.5897575296292269, "grad_norm": 2.1578166591971693, "learning_rate": 3.8012518960834792e-06, "loss": 0.7884, "step": 16272 }, { "epoch": 0.5897937733318835, "grad_norm": 2.4132878163556337, "learning_rate": 3.8006820939094995e-06, "loss": 0.9191, "step": 16273 }, { "epoch": 0.5898300170345403, "grad_norm": 2.3404820875987045, "learning_rate": 3.8001123082613973e-06, "loss": 0.7753, "step": 16274 }, { "epoch": 0.589866260737197, "grad_norm": 2.466906515196085, "learning_rate": 3.7995425391470212e-06, "loss": 0.878, "step": 16275 }, { "epoch": 0.5899025044398536, "grad_norm": 2.4810624001882293, "learning_rate": 3.7989727865742237e-06, "loss": 0.9649, "step": 16276 }, { "epoch": 0.5899387481425102, "grad_norm": 2.331477923730038, "learning_rate": 3.7984030505508537e-06, "loss": 0.9334, "step": 16277 }, { "epoch": 0.5899749918451669, "grad_norm": 2.4003834634536316, "learning_rate": 3.797833331084763e-06, "loss": 0.8408, "step": 16278 }, { "epoch": 0.5900112355478235, "grad_norm": 2.133848191222399, "learning_rate": 3.7972636281838026e-06, "loss": 0.6863, "step": 16279 }, { "epoch": 0.5900474792504802, "grad_norm": 2.2726550758092996, "learning_rate": 3.7966939418558225e-06, "loss": 1.0055, "step": 16280 }, { "epoch": 0.5900837229531369, "grad_norm": 2.2785014374190418, "learning_rate": 3.7961242721086696e-06, "loss": 1.0142, "step": 16281 }, { "epoch": 0.5901199666557936, "grad_norm": 2.1892613736332516, "learning_rate": 3.795554618950198e-06, "loss": 1.0185, "step": 16282 }, { "epoch": 0.5901562103584502, "grad_norm": 2.1397314066255273, "learning_rate": 3.794984982388256e-06, "loss": 0.8528, "step": 16283 }, { "epoch": 0.5901924540611069, "grad_norm": 2.3514453057028586, "learning_rate": 3.7944153624306896e-06, "loss": 0.6195, "step": 16284 }, { "epoch": 0.5902286977637635, "grad_norm": 2.343829659119737, "learning_rate": 3.7938457590853526e-06, "loss": 0.9571, "step": 16285 }, { "epoch": 0.5902649414664202, "grad_norm": 2.1442047904315107, "learning_rate": 3.793276172360091e-06, "loss": 0.7258, "step": 16286 }, { "epoch": 0.5903011851690769, "grad_norm": 2.4702651249017085, "learning_rate": 3.7927066022627553e-06, "loss": 0.8453, "step": 16287 }, { "epoch": 0.5903374288717336, "grad_norm": 2.139075109561956, "learning_rate": 3.79213704880119e-06, "loss": 1.0235, "step": 16288 }, { "epoch": 0.5903736725743902, "grad_norm": 1.987502220703671, "learning_rate": 3.7915675119832475e-06, "loss": 0.8955, "step": 16289 }, { "epoch": 0.5904099162770469, "grad_norm": 2.5392875841215727, "learning_rate": 3.7909979918167738e-06, "loss": 0.9301, "step": 16290 }, { "epoch": 0.5904461599797035, "grad_norm": 2.530902486612802, "learning_rate": 3.7904284883096165e-06, "loss": 1.0077, "step": 16291 }, { "epoch": 0.5904824036823602, "grad_norm": 2.0726832236380104, "learning_rate": 3.789859001469621e-06, "loss": 1.0238, "step": 16292 }, { "epoch": 0.5905186473850168, "grad_norm": 2.5771001972861165, "learning_rate": 3.7892895313046385e-06, "loss": 0.908, "step": 16293 }, { "epoch": 0.5905548910876736, "grad_norm": 2.7141720636623328, "learning_rate": 3.788720077822514e-06, "loss": 0.7926, "step": 16294 }, { "epoch": 0.5905911347903302, "grad_norm": 2.292540038094845, "learning_rate": 3.7881506410310946e-06, "loss": 0.8634, "step": 16295 }, { "epoch": 0.5906273784929869, "grad_norm": 2.4591030539798893, "learning_rate": 3.787581220938226e-06, "loss": 1.0483, "step": 16296 }, { "epoch": 0.5906636221956435, "grad_norm": 2.4775847431085105, "learning_rate": 3.7870118175517546e-06, "loss": 1.0054, "step": 16297 }, { "epoch": 0.5906998658983001, "grad_norm": 2.21325853765255, "learning_rate": 3.786442430879528e-06, "loss": 0.6538, "step": 16298 }, { "epoch": 0.5907361096009568, "grad_norm": 2.1303524360237884, "learning_rate": 3.7858730609293897e-06, "loss": 0.9169, "step": 16299 }, { "epoch": 0.5907723533036136, "grad_norm": 2.5477145780103916, "learning_rate": 3.7853037077091847e-06, "loss": 0.8429, "step": 16300 }, { "epoch": 0.5908085970062702, "grad_norm": 2.4854673935826024, "learning_rate": 3.7847343712267624e-06, "loss": 0.8771, "step": 16301 }, { "epoch": 0.5908448407089268, "grad_norm": 2.3173670970944578, "learning_rate": 3.7841650514899654e-06, "loss": 0.8993, "step": 16302 }, { "epoch": 0.5908810844115835, "grad_norm": 2.147781478873866, "learning_rate": 3.7835957485066383e-06, "loss": 1.0313, "step": 16303 }, { "epoch": 0.5909173281142401, "grad_norm": 2.3603206540743775, "learning_rate": 3.783026462284624e-06, "loss": 0.9659, "step": 16304 }, { "epoch": 0.5909535718168968, "grad_norm": 2.648923742382477, "learning_rate": 3.7824571928317715e-06, "loss": 0.9042, "step": 16305 }, { "epoch": 0.5909898155195534, "grad_norm": 2.521058989184394, "learning_rate": 3.7818879401559227e-06, "loss": 0.9009, "step": 16306 }, { "epoch": 0.5910260592222102, "grad_norm": 2.4754673737954107, "learning_rate": 3.7813187042649208e-06, "loss": 0.8739, "step": 16307 }, { "epoch": 0.5910623029248668, "grad_norm": 2.305425345172264, "learning_rate": 3.780749485166609e-06, "loss": 0.9614, "step": 16308 }, { "epoch": 0.5910985466275235, "grad_norm": 2.2579847356746257, "learning_rate": 3.780180282868834e-06, "loss": 0.9178, "step": 16309 }, { "epoch": 0.5911347903301801, "grad_norm": 2.345875778336915, "learning_rate": 3.7796110973794366e-06, "loss": 0.8257, "step": 16310 }, { "epoch": 0.5911710340328368, "grad_norm": 2.4260639834127264, "learning_rate": 3.7790419287062597e-06, "loss": 0.9296, "step": 16311 }, { "epoch": 0.5912072777354934, "grad_norm": 2.5293495715562595, "learning_rate": 3.7784727768571442e-06, "loss": 0.7493, "step": 16312 }, { "epoch": 0.5912435214381502, "grad_norm": 2.462922679017877, "learning_rate": 3.777903641839938e-06, "loss": 0.9064, "step": 16313 }, { "epoch": 0.5912797651408068, "grad_norm": 2.341521794577548, "learning_rate": 3.7773345236624804e-06, "loss": 0.8588, "step": 16314 }, { "epoch": 0.5913160088434635, "grad_norm": 2.2024098064358255, "learning_rate": 3.7767654223326135e-06, "loss": 0.9724, "step": 16315 }, { "epoch": 0.5913522525461201, "grad_norm": 2.436809599216667, "learning_rate": 3.776196337858178e-06, "loss": 0.7916, "step": 16316 }, { "epoch": 0.5913884962487768, "grad_norm": 2.1989263229130334, "learning_rate": 3.775627270247018e-06, "loss": 0.8511, "step": 16317 }, { "epoch": 0.5914247399514334, "grad_norm": 2.384825448481318, "learning_rate": 3.775058219506974e-06, "loss": 0.7614, "step": 16318 }, { "epoch": 0.5914609836540901, "grad_norm": 2.342460833026226, "learning_rate": 3.7744891856458864e-06, "loss": 0.8287, "step": 16319 }, { "epoch": 0.5914972273567468, "grad_norm": 2.1900897148050142, "learning_rate": 3.7739201686715954e-06, "loss": 0.8911, "step": 16320 }, { "epoch": 0.5915334710594035, "grad_norm": 2.2470891038944933, "learning_rate": 3.7733511685919444e-06, "loss": 0.7669, "step": 16321 }, { "epoch": 0.5915697147620601, "grad_norm": 2.5363666134933616, "learning_rate": 3.7727821854147727e-06, "loss": 0.9493, "step": 16322 }, { "epoch": 0.5916059584647168, "grad_norm": 2.2570893267087606, "learning_rate": 3.7722132191479198e-06, "loss": 0.7744, "step": 16323 }, { "epoch": 0.5916422021673734, "grad_norm": 2.6297062835232845, "learning_rate": 3.7716442697992243e-06, "loss": 0.8691, "step": 16324 }, { "epoch": 0.59167844587003, "grad_norm": 2.6056508235734515, "learning_rate": 3.7710753373765307e-06, "loss": 0.8966, "step": 16325 }, { "epoch": 0.5917146895726867, "grad_norm": 2.3210379894379, "learning_rate": 3.7705064218876753e-06, "loss": 0.9587, "step": 16326 }, { "epoch": 0.5917509332753434, "grad_norm": 2.0705014006046634, "learning_rate": 3.769937523340498e-06, "loss": 0.9149, "step": 16327 }, { "epoch": 0.5917871769780001, "grad_norm": 2.6280261124347093, "learning_rate": 3.769368641742836e-06, "loss": 0.9151, "step": 16328 }, { "epoch": 0.5918234206806567, "grad_norm": 2.6056808225161423, "learning_rate": 3.768799777102531e-06, "loss": 0.7684, "step": 16329 }, { "epoch": 0.5918596643833134, "grad_norm": 2.5565029348503083, "learning_rate": 3.768230929427421e-06, "loss": 0.8949, "step": 16330 }, { "epoch": 0.59189590808597, "grad_norm": 2.4160350344395107, "learning_rate": 3.767662098725343e-06, "loss": 0.8493, "step": 16331 }, { "epoch": 0.5919321517886267, "grad_norm": 2.512595074395974, "learning_rate": 3.7670932850041348e-06, "loss": 1.2569, "step": 16332 }, { "epoch": 0.5919683954912834, "grad_norm": 2.1966927941228707, "learning_rate": 3.7665244882716373e-06, "loss": 0.8119, "step": 16333 }, { "epoch": 0.5920046391939401, "grad_norm": 2.4931213739170577, "learning_rate": 3.7659557085356864e-06, "loss": 0.9333, "step": 16334 }, { "epoch": 0.5920408828965967, "grad_norm": 2.3203737467884316, "learning_rate": 3.7653869458041192e-06, "loss": 0.89, "step": 16335 }, { "epoch": 0.5920771265992534, "grad_norm": 2.7086844182879886, "learning_rate": 3.7648182000847726e-06, "loss": 0.9899, "step": 16336 }, { "epoch": 0.59211337030191, "grad_norm": 2.066991239199492, "learning_rate": 3.764249471385485e-06, "loss": 0.8685, "step": 16337 }, { "epoch": 0.5921496140045667, "grad_norm": 2.1167966600505013, "learning_rate": 3.7636807597140922e-06, "loss": 0.806, "step": 16338 }, { "epoch": 0.5921858577072233, "grad_norm": 2.5875606106398212, "learning_rate": 3.7631120650784315e-06, "loss": 0.9703, "step": 16339 }, { "epoch": 0.5922221014098801, "grad_norm": 2.374253898339299, "learning_rate": 3.762543387486336e-06, "loss": 0.9085, "step": 16340 }, { "epoch": 0.5922583451125367, "grad_norm": 2.3870260820730467, "learning_rate": 3.7619747269456464e-06, "loss": 0.9539, "step": 16341 }, { "epoch": 0.5922945888151934, "grad_norm": 2.476465345335295, "learning_rate": 3.761406083464196e-06, "loss": 1.0176, "step": 16342 }, { "epoch": 0.59233083251785, "grad_norm": 2.1397514238906186, "learning_rate": 3.7608374570498207e-06, "loss": 1.0321, "step": 16343 }, { "epoch": 0.5923670762205067, "grad_norm": 2.1673608914794364, "learning_rate": 3.760268847710354e-06, "loss": 0.9049, "step": 16344 }, { "epoch": 0.5924033199231633, "grad_norm": 2.0212641313129023, "learning_rate": 3.7597002554536344e-06, "loss": 0.6642, "step": 16345 }, { "epoch": 0.5924395636258201, "grad_norm": 2.4017287846682476, "learning_rate": 3.7591316802874956e-06, "loss": 0.9571, "step": 16346 }, { "epoch": 0.5924758073284767, "grad_norm": 2.1081400419197767, "learning_rate": 3.758563122219772e-06, "loss": 0.9462, "step": 16347 }, { "epoch": 0.5925120510311334, "grad_norm": 2.4735624841010733, "learning_rate": 3.7579945812582963e-06, "loss": 0.8464, "step": 16348 }, { "epoch": 0.59254829473379, "grad_norm": 2.3004560939661025, "learning_rate": 3.7574260574109056e-06, "loss": 0.8645, "step": 16349 }, { "epoch": 0.5925845384364467, "grad_norm": 2.45593168828778, "learning_rate": 3.756857550685432e-06, "loss": 0.9794, "step": 16350 }, { "epoch": 0.5926207821391033, "grad_norm": 2.076976020670805, "learning_rate": 3.7562890610897096e-06, "loss": 0.7508, "step": 16351 }, { "epoch": 0.5926570258417599, "grad_norm": 2.138346874172463, "learning_rate": 3.7557205886315694e-06, "loss": 0.8238, "step": 16352 }, { "epoch": 0.5926932695444167, "grad_norm": 2.38298831759195, "learning_rate": 3.7551521333188494e-06, "loss": 0.8739, "step": 16353 }, { "epoch": 0.5927295132470733, "grad_norm": 2.7745756256504923, "learning_rate": 3.7545836951593805e-06, "loss": 1.0064, "step": 16354 }, { "epoch": 0.59276575694973, "grad_norm": 2.1087801942968647, "learning_rate": 3.754015274160995e-06, "loss": 0.8653, "step": 16355 }, { "epoch": 0.5928020006523866, "grad_norm": 2.330308838241234, "learning_rate": 3.7534468703315235e-06, "loss": 0.8808, "step": 16356 }, { "epoch": 0.5928382443550433, "grad_norm": 2.329591465611876, "learning_rate": 3.7528784836788025e-06, "loss": 0.8407, "step": 16357 }, { "epoch": 0.5928744880576999, "grad_norm": 2.2310948146113927, "learning_rate": 3.752310114210661e-06, "loss": 0.8536, "step": 16358 }, { "epoch": 0.5929107317603567, "grad_norm": 2.08562187444374, "learning_rate": 3.751741761934932e-06, "loss": 0.9762, "step": 16359 }, { "epoch": 0.5929469754630133, "grad_norm": 2.54452339638536, "learning_rate": 3.7511734268594434e-06, "loss": 1.0953, "step": 16360 }, { "epoch": 0.59298321916567, "grad_norm": 2.3636905622584234, "learning_rate": 3.750605108992033e-06, "loss": 1.0233, "step": 16361 }, { "epoch": 0.5930194628683266, "grad_norm": 2.256236131030827, "learning_rate": 3.750036808340528e-06, "loss": 0.8846, "step": 16362 }, { "epoch": 0.5930557065709833, "grad_norm": 2.2801464248742556, "learning_rate": 3.7494685249127606e-06, "loss": 1.0023, "step": 16363 }, { "epoch": 0.5930919502736399, "grad_norm": 2.2592137291347787, "learning_rate": 3.7489002587165586e-06, "loss": 0.7869, "step": 16364 }, { "epoch": 0.5931281939762966, "grad_norm": 2.6111586733688714, "learning_rate": 3.7483320097597557e-06, "loss": 0.9053, "step": 16365 }, { "epoch": 0.5931644376789533, "grad_norm": 2.3579385052726005, "learning_rate": 3.747763778050181e-06, "loss": 0.8503, "step": 16366 }, { "epoch": 0.59320068138161, "grad_norm": 2.3640741730615105, "learning_rate": 3.747195563595662e-06, "loss": 0.9586, "step": 16367 }, { "epoch": 0.5932369250842666, "grad_norm": 2.350355256598453, "learning_rate": 3.746627366404032e-06, "loss": 1.0343, "step": 16368 }, { "epoch": 0.5932731687869233, "grad_norm": 2.4775469214624892, "learning_rate": 3.7460591864831196e-06, "loss": 0.9008, "step": 16369 }, { "epoch": 0.5933094124895799, "grad_norm": 2.211350167687975, "learning_rate": 3.7454910238407527e-06, "loss": 0.8319, "step": 16370 }, { "epoch": 0.5933456561922366, "grad_norm": 2.420541644891884, "learning_rate": 3.744922878484759e-06, "loss": 0.8238, "step": 16371 }, { "epoch": 0.5933818998948933, "grad_norm": 2.334631953421211, "learning_rate": 3.7443547504229715e-06, "loss": 0.9752, "step": 16372 }, { "epoch": 0.59341814359755, "grad_norm": 2.4870423873576093, "learning_rate": 3.7437866396632153e-06, "loss": 0.8791, "step": 16373 }, { "epoch": 0.5934543873002066, "grad_norm": 2.6123888966313347, "learning_rate": 3.7432185462133203e-06, "loss": 0.9598, "step": 16374 }, { "epoch": 0.5934906310028633, "grad_norm": 2.1745451683541357, "learning_rate": 3.7426504700811117e-06, "loss": 0.7556, "step": 16375 }, { "epoch": 0.5935268747055199, "grad_norm": 2.4921647517050887, "learning_rate": 3.7420824112744213e-06, "loss": 0.8092, "step": 16376 }, { "epoch": 0.5935631184081765, "grad_norm": 2.2535910551909244, "learning_rate": 3.741514369801074e-06, "loss": 0.9227, "step": 16377 }, { "epoch": 0.5935993621108332, "grad_norm": 2.041933127118537, "learning_rate": 3.740946345668897e-06, "loss": 0.9044, "step": 16378 }, { "epoch": 0.59363560581349, "grad_norm": 2.378390489817745, "learning_rate": 3.7403783388857163e-06, "loss": 0.9199, "step": 16379 }, { "epoch": 0.5936718495161466, "grad_norm": 2.2552480911488795, "learning_rate": 3.739810349459363e-06, "loss": 0.8092, "step": 16380 }, { "epoch": 0.5937080932188032, "grad_norm": 2.3478272261864284, "learning_rate": 3.73924237739766e-06, "loss": 0.9124, "step": 16381 }, { "epoch": 0.5937443369214599, "grad_norm": 2.163805107676664, "learning_rate": 3.7386744227084347e-06, "loss": 0.8611, "step": 16382 }, { "epoch": 0.5937805806241165, "grad_norm": 2.0616785157659834, "learning_rate": 3.7381064853995124e-06, "loss": 0.9141, "step": 16383 }, { "epoch": 0.5938168243267732, "grad_norm": 2.3346186506747677, "learning_rate": 3.7375385654787204e-06, "loss": 0.854, "step": 16384 }, { "epoch": 0.5938530680294298, "grad_norm": 2.404228405281487, "learning_rate": 3.7369706629538837e-06, "loss": 0.8588, "step": 16385 }, { "epoch": 0.5938893117320866, "grad_norm": 2.280787155789567, "learning_rate": 3.736402777832827e-06, "loss": 0.9893, "step": 16386 }, { "epoch": 0.5939255554347432, "grad_norm": 2.268710366679395, "learning_rate": 3.735834910123374e-06, "loss": 0.8988, "step": 16387 }, { "epoch": 0.5939617991373999, "grad_norm": 2.111729506734001, "learning_rate": 3.7352670598333535e-06, "loss": 0.8734, "step": 16388 }, { "epoch": 0.5939980428400565, "grad_norm": 2.332290515671559, "learning_rate": 3.734699226970588e-06, "loss": 0.9091, "step": 16389 }, { "epoch": 0.5940342865427132, "grad_norm": 2.3435033615954834, "learning_rate": 3.7341314115429016e-06, "loss": 0.9439, "step": 16390 }, { "epoch": 0.5940705302453698, "grad_norm": 2.4905628708459147, "learning_rate": 3.7335636135581165e-06, "loss": 0.8328, "step": 16391 }, { "epoch": 0.5941067739480266, "grad_norm": 2.1635579848265145, "learning_rate": 3.7329958330240618e-06, "loss": 1.0192, "step": 16392 }, { "epoch": 0.5941430176506832, "grad_norm": 2.3180476894132283, "learning_rate": 3.7324280699485584e-06, "loss": 0.8393, "step": 16393 }, { "epoch": 0.5941792613533399, "grad_norm": 2.3568045583125716, "learning_rate": 3.7318603243394284e-06, "loss": 0.9845, "step": 16394 }, { "epoch": 0.5942155050559965, "grad_norm": 2.3266284489948235, "learning_rate": 3.731292596204496e-06, "loss": 0.9684, "step": 16395 }, { "epoch": 0.5942517487586532, "grad_norm": 2.466442987761698, "learning_rate": 3.7307248855515854e-06, "loss": 0.9735, "step": 16396 }, { "epoch": 0.5942879924613098, "grad_norm": 2.67298647725566, "learning_rate": 3.7301571923885184e-06, "loss": 0.805, "step": 16397 }, { "epoch": 0.5943242361639665, "grad_norm": 2.4576951923128427, "learning_rate": 3.7295895167231166e-06, "loss": 0.9515, "step": 16398 }, { "epoch": 0.5943604798666232, "grad_norm": 2.5467190194692018, "learning_rate": 3.7290218585632008e-06, "loss": 0.9539, "step": 16399 }, { "epoch": 0.5943967235692799, "grad_norm": 2.5457106705064465, "learning_rate": 3.728454217916598e-06, "loss": 1.0678, "step": 16400 }, { "epoch": 0.5944329672719365, "grad_norm": 2.235561991247597, "learning_rate": 3.7278865947911264e-06, "loss": 0.9678, "step": 16401 }, { "epoch": 0.5944692109745932, "grad_norm": 2.357733114216284, "learning_rate": 3.7273189891946084e-06, "loss": 0.9783, "step": 16402 }, { "epoch": 0.5945054546772498, "grad_norm": 2.4359116199449002, "learning_rate": 3.7267514011348636e-06, "loss": 1.0622, "step": 16403 }, { "epoch": 0.5945416983799064, "grad_norm": 2.731253778671659, "learning_rate": 3.7261838306197164e-06, "loss": 0.9466, "step": 16404 }, { "epoch": 0.5945779420825632, "grad_norm": 2.445822836296757, "learning_rate": 3.7256162776569855e-06, "loss": 0.9458, "step": 16405 }, { "epoch": 0.5946141857852199, "grad_norm": 2.60068671105962, "learning_rate": 3.7250487422544914e-06, "loss": 0.986, "step": 16406 }, { "epoch": 0.5946504294878765, "grad_norm": 2.307468898425456, "learning_rate": 3.7244812244200523e-06, "loss": 0.9235, "step": 16407 }, { "epoch": 0.5946866731905331, "grad_norm": 2.573171699998662, "learning_rate": 3.723913724161492e-06, "loss": 0.8354, "step": 16408 }, { "epoch": 0.5947229168931898, "grad_norm": 2.4115357171662923, "learning_rate": 3.7233462414866297e-06, "loss": 0.9015, "step": 16409 }, { "epoch": 0.5947591605958464, "grad_norm": 2.5057232114034997, "learning_rate": 3.722778776403284e-06, "loss": 0.8814, "step": 16410 }, { "epoch": 0.5947954042985031, "grad_norm": 2.461444655805463, "learning_rate": 3.722211328919272e-06, "loss": 1.0805, "step": 16411 }, { "epoch": 0.5948316480011598, "grad_norm": 2.1099877425799796, "learning_rate": 3.7216438990424175e-06, "loss": 0.7074, "step": 16412 }, { "epoch": 0.5948678917038165, "grad_norm": 2.458411828525056, "learning_rate": 3.7210764867805365e-06, "loss": 0.8059, "step": 16413 }, { "epoch": 0.5949041354064731, "grad_norm": 2.3590013527541034, "learning_rate": 3.7205090921414483e-06, "loss": 0.9576, "step": 16414 }, { "epoch": 0.5949403791091298, "grad_norm": 2.2818518397810656, "learning_rate": 3.7199417151329697e-06, "loss": 0.9203, "step": 16415 }, { "epoch": 0.5949766228117864, "grad_norm": 2.080856004025969, "learning_rate": 3.7193743557629215e-06, "loss": 0.7983, "step": 16416 }, { "epoch": 0.5950128665144431, "grad_norm": 2.1629713161968134, "learning_rate": 3.7188070140391203e-06, "loss": 0.8723, "step": 16417 }, { "epoch": 0.5950491102170998, "grad_norm": 2.358994802618034, "learning_rate": 3.7182396899693836e-06, "loss": 0.9783, "step": 16418 }, { "epoch": 0.5950853539197565, "grad_norm": 2.011825075415598, "learning_rate": 3.717672383561526e-06, "loss": 0.834, "step": 16419 }, { "epoch": 0.5951215976224131, "grad_norm": 2.344679714717328, "learning_rate": 3.71710509482337e-06, "loss": 1.0033, "step": 16420 }, { "epoch": 0.5951578413250698, "grad_norm": 2.267903188294004, "learning_rate": 3.7165378237627303e-06, "loss": 0.9034, "step": 16421 }, { "epoch": 0.5951940850277264, "grad_norm": 2.312236213242705, "learning_rate": 3.7159705703874225e-06, "loss": 0.8502, "step": 16422 }, { "epoch": 0.5952303287303831, "grad_norm": 2.4059747590037945, "learning_rate": 3.715403334705263e-06, "loss": 0.9769, "step": 16423 }, { "epoch": 0.5952665724330397, "grad_norm": 2.3086211240133614, "learning_rate": 3.7148361167240697e-06, "loss": 0.9702, "step": 16424 }, { "epoch": 0.5953028161356965, "grad_norm": 2.2910682441681787, "learning_rate": 3.7142689164516575e-06, "loss": 0.9222, "step": 16425 }, { "epoch": 0.5953390598383531, "grad_norm": 2.307236462637715, "learning_rate": 3.7137017338958415e-06, "loss": 0.867, "step": 16426 }, { "epoch": 0.5953753035410098, "grad_norm": 2.309623530131064, "learning_rate": 3.713134569064436e-06, "loss": 0.8392, "step": 16427 }, { "epoch": 0.5954115472436664, "grad_norm": 2.4376290440559716, "learning_rate": 3.7125674219652602e-06, "loss": 1.029, "step": 16428 }, { "epoch": 0.595447790946323, "grad_norm": 2.2250648062005296, "learning_rate": 3.7120002926061266e-06, "loss": 0.8008, "step": 16429 }, { "epoch": 0.5954840346489797, "grad_norm": 2.1543872920267217, "learning_rate": 3.71143318099485e-06, "loss": 0.7897, "step": 16430 }, { "epoch": 0.5955202783516365, "grad_norm": 2.333102897089548, "learning_rate": 3.7108660871392427e-06, "loss": 0.7055, "step": 16431 }, { "epoch": 0.5955565220542931, "grad_norm": 2.3413287575788546, "learning_rate": 3.7102990110471228e-06, "loss": 1.0544, "step": 16432 }, { "epoch": 0.5955927657569497, "grad_norm": 2.446410068021134, "learning_rate": 3.7097319527263033e-06, "loss": 0.8967, "step": 16433 }, { "epoch": 0.5956290094596064, "grad_norm": 2.4470269665397604, "learning_rate": 3.709164912184596e-06, "loss": 0.8374, "step": 16434 }, { "epoch": 0.595665253162263, "grad_norm": 2.3777130033735387, "learning_rate": 3.7085978894298157e-06, "loss": 1.1113, "step": 16435 }, { "epoch": 0.5957014968649197, "grad_norm": 2.3763125405555647, "learning_rate": 3.7080308844697765e-06, "loss": 0.9825, "step": 16436 }, { "epoch": 0.5957377405675763, "grad_norm": 2.0512841789406386, "learning_rate": 3.7074638973122895e-06, "loss": 0.8922, "step": 16437 }, { "epoch": 0.5957739842702331, "grad_norm": 2.2853142210021375, "learning_rate": 3.706896927965169e-06, "loss": 0.7929, "step": 16438 }, { "epoch": 0.5958102279728897, "grad_norm": 2.5879347035813782, "learning_rate": 3.7063299764362244e-06, "loss": 0.8581, "step": 16439 }, { "epoch": 0.5958464716755464, "grad_norm": 2.410632669553677, "learning_rate": 3.7057630427332723e-06, "loss": 0.8775, "step": 16440 }, { "epoch": 0.595882715378203, "grad_norm": 2.373929348260601, "learning_rate": 3.705196126864123e-06, "loss": 0.8942, "step": 16441 }, { "epoch": 0.5959189590808597, "grad_norm": 2.090309070479988, "learning_rate": 3.7046292288365876e-06, "loss": 0.7526, "step": 16442 }, { "epoch": 0.5959552027835163, "grad_norm": 2.1599975687205504, "learning_rate": 3.704062348658477e-06, "loss": 0.6771, "step": 16443 }, { "epoch": 0.5959914464861731, "grad_norm": 2.0621147372572675, "learning_rate": 3.703495486337605e-06, "loss": 0.6931, "step": 16444 }, { "epoch": 0.5960276901888297, "grad_norm": 2.470588737586129, "learning_rate": 3.702928641881781e-06, "loss": 0.8804, "step": 16445 }, { "epoch": 0.5960639338914864, "grad_norm": 2.1338723153476646, "learning_rate": 3.7023618152988163e-06, "loss": 0.8854, "step": 16446 }, { "epoch": 0.596100177594143, "grad_norm": 2.415555165634902, "learning_rate": 3.701795006596518e-06, "loss": 0.9586, "step": 16447 }, { "epoch": 0.5961364212967997, "grad_norm": 2.211356602527563, "learning_rate": 3.701228215782702e-06, "loss": 0.8196, "step": 16448 }, { "epoch": 0.5961726649994563, "grad_norm": 2.0176272582062045, "learning_rate": 3.7006614428651756e-06, "loss": 0.8017, "step": 16449 }, { "epoch": 0.596208908702113, "grad_norm": 2.1436173509511005, "learning_rate": 3.7000946878517474e-06, "loss": 1.0677, "step": 16450 }, { "epoch": 0.5962451524047697, "grad_norm": 2.4505841221374265, "learning_rate": 3.6995279507502292e-06, "loss": 0.9782, "step": 16451 }, { "epoch": 0.5962813961074264, "grad_norm": 2.4454604318400106, "learning_rate": 3.69896123156843e-06, "loss": 0.9787, "step": 16452 }, { "epoch": 0.596317639810083, "grad_norm": 2.36890749156421, "learning_rate": 3.6983945303141576e-06, "loss": 0.8838, "step": 16453 }, { "epoch": 0.5963538835127397, "grad_norm": 2.498569048586894, "learning_rate": 3.697827846995219e-06, "loss": 0.9167, "step": 16454 }, { "epoch": 0.5963901272153963, "grad_norm": 2.43001491333378, "learning_rate": 3.6972611816194283e-06, "loss": 0.8517, "step": 16455 }, { "epoch": 0.596426370918053, "grad_norm": 2.417519655512678, "learning_rate": 3.6966945341945897e-06, "loss": 0.8865, "step": 16456 }, { "epoch": 0.5964626146207096, "grad_norm": 2.501321575851829, "learning_rate": 3.6961279047285128e-06, "loss": 0.9619, "step": 16457 }, { "epoch": 0.5964988583233664, "grad_norm": 2.279131570069842, "learning_rate": 3.695561293229002e-06, "loss": 1.0605, "step": 16458 }, { "epoch": 0.596535102026023, "grad_norm": 2.0851829553097505, "learning_rate": 3.6949946997038705e-06, "loss": 0.9803, "step": 16459 }, { "epoch": 0.5965713457286796, "grad_norm": 2.1367886940744047, "learning_rate": 3.6944281241609224e-06, "loss": 0.8626, "step": 16460 }, { "epoch": 0.5966075894313363, "grad_norm": 2.3441251187000813, "learning_rate": 3.693861566607966e-06, "loss": 0.8844, "step": 16461 }, { "epoch": 0.5966438331339929, "grad_norm": 2.5042564215746754, "learning_rate": 3.693295027052805e-06, "loss": 0.8428, "step": 16462 }, { "epoch": 0.5966800768366496, "grad_norm": 2.615454177254367, "learning_rate": 3.6927285055032504e-06, "loss": 0.9864, "step": 16463 }, { "epoch": 0.5967163205393063, "grad_norm": 2.3434136544563113, "learning_rate": 3.6921620019671055e-06, "loss": 0.9828, "step": 16464 }, { "epoch": 0.596752564241963, "grad_norm": 2.5941253617938864, "learning_rate": 3.6915955164521777e-06, "loss": 0.8212, "step": 16465 }, { "epoch": 0.5967888079446196, "grad_norm": 2.046146799624401, "learning_rate": 3.6910290489662704e-06, "loss": 0.885, "step": 16466 }, { "epoch": 0.5968250516472763, "grad_norm": 2.34037063851251, "learning_rate": 3.690462599517193e-06, "loss": 0.9854, "step": 16467 }, { "epoch": 0.5968612953499329, "grad_norm": 2.16480986190724, "learning_rate": 3.6898961681127487e-06, "loss": 0.8668, "step": 16468 }, { "epoch": 0.5968975390525896, "grad_norm": 2.390290108999075, "learning_rate": 3.6893297547607433e-06, "loss": 0.9678, "step": 16469 }, { "epoch": 0.5969337827552462, "grad_norm": 2.0098514606775084, "learning_rate": 3.6887633594689796e-06, "loss": 0.8629, "step": 16470 }, { "epoch": 0.596970026457903, "grad_norm": 2.229494656808323, "learning_rate": 3.6881969822452656e-06, "loss": 0.9656, "step": 16471 }, { "epoch": 0.5970062701605596, "grad_norm": 2.6114828281026026, "learning_rate": 3.687630623097404e-06, "loss": 1.1162, "step": 16472 }, { "epoch": 0.5970425138632163, "grad_norm": 2.425665554231474, "learning_rate": 3.6870642820331982e-06, "loss": 0.8745, "step": 16473 }, { "epoch": 0.5970787575658729, "grad_norm": 2.2693584908032927, "learning_rate": 3.6864979590604503e-06, "loss": 0.9516, "step": 16474 }, { "epoch": 0.5971150012685296, "grad_norm": 2.3425856254130495, "learning_rate": 3.685931654186969e-06, "loss": 0.9319, "step": 16475 }, { "epoch": 0.5971512449711862, "grad_norm": 2.052809768103145, "learning_rate": 3.685365367420554e-06, "loss": 0.8264, "step": 16476 }, { "epoch": 0.597187488673843, "grad_norm": 2.3151867480406345, "learning_rate": 3.6847990987690095e-06, "loss": 0.8969, "step": 16477 }, { "epoch": 0.5972237323764996, "grad_norm": 2.6404818346530403, "learning_rate": 3.684232848240136e-06, "loss": 0.9949, "step": 16478 }, { "epoch": 0.5972599760791563, "grad_norm": 2.4790324746801473, "learning_rate": 3.68366661584174e-06, "loss": 0.9346, "step": 16479 }, { "epoch": 0.5972962197818129, "grad_norm": 2.729664328359299, "learning_rate": 3.683100401581622e-06, "loss": 0.8494, "step": 16480 }, { "epoch": 0.5973324634844696, "grad_norm": 2.444381198119114, "learning_rate": 3.682534205467584e-06, "loss": 0.97, "step": 16481 }, { "epoch": 0.5973687071871262, "grad_norm": 2.3780573394657627, "learning_rate": 3.6819680275074263e-06, "loss": 1.0078, "step": 16482 }, { "epoch": 0.5974049508897828, "grad_norm": 2.5675605946276883, "learning_rate": 3.681401867708954e-06, "loss": 0.9458, "step": 16483 }, { "epoch": 0.5974411945924396, "grad_norm": 1.893151400688619, "learning_rate": 3.6808357260799665e-06, "loss": 0.929, "step": 16484 }, { "epoch": 0.5974774382950963, "grad_norm": 2.676040302964317, "learning_rate": 3.680269602628265e-06, "loss": 0.8003, "step": 16485 }, { "epoch": 0.5975136819977529, "grad_norm": 2.3796834283797033, "learning_rate": 3.6797034973616475e-06, "loss": 1.0039, "step": 16486 }, { "epoch": 0.5975499257004095, "grad_norm": 2.3868633927184515, "learning_rate": 3.6791374102879192e-06, "loss": 0.8541, "step": 16487 }, { "epoch": 0.5975861694030662, "grad_norm": 2.426946926036239, "learning_rate": 3.6785713414148788e-06, "loss": 0.912, "step": 16488 }, { "epoch": 0.5976224131057228, "grad_norm": 2.428565578371694, "learning_rate": 3.678005290750326e-06, "loss": 0.9616, "step": 16489 }, { "epoch": 0.5976586568083796, "grad_norm": 2.358478084983306, "learning_rate": 3.67743925830206e-06, "loss": 0.9757, "step": 16490 }, { "epoch": 0.5976949005110362, "grad_norm": 2.228344711406755, "learning_rate": 3.6768732440778824e-06, "loss": 0.8812, "step": 16491 }, { "epoch": 0.5977311442136929, "grad_norm": 2.442981063622731, "learning_rate": 3.676307248085591e-06, "loss": 0.8325, "step": 16492 }, { "epoch": 0.5977673879163495, "grad_norm": 2.0653554500523725, "learning_rate": 3.6757412703329854e-06, "loss": 0.8516, "step": 16493 }, { "epoch": 0.5978036316190062, "grad_norm": 1.9415157533573282, "learning_rate": 3.675175310827862e-06, "loss": 0.7213, "step": 16494 }, { "epoch": 0.5978398753216628, "grad_norm": 2.538314527426728, "learning_rate": 3.6746093695780237e-06, "loss": 1.0834, "step": 16495 }, { "epoch": 0.5978761190243195, "grad_norm": 2.3097389929229943, "learning_rate": 3.674043446591267e-06, "loss": 1.0263, "step": 16496 }, { "epoch": 0.5979123627269762, "grad_norm": 2.4620071493045144, "learning_rate": 3.6734775418753897e-06, "loss": 0.8073, "step": 16497 }, { "epoch": 0.5979486064296329, "grad_norm": 2.180638860573175, "learning_rate": 3.672911655438187e-06, "loss": 0.6749, "step": 16498 }, { "epoch": 0.5979848501322895, "grad_norm": 2.353099251431908, "learning_rate": 3.672345787287461e-06, "loss": 1.0584, "step": 16499 }, { "epoch": 0.5980210938349462, "grad_norm": 2.1502727543223044, "learning_rate": 3.671779937431008e-06, "loss": 0.7837, "step": 16500 }, { "epoch": 0.5980573375376028, "grad_norm": 2.420452335095613, "learning_rate": 3.6712141058766235e-06, "loss": 0.8719, "step": 16501 }, { "epoch": 0.5980935812402595, "grad_norm": 2.55039186888557, "learning_rate": 3.670648292632104e-06, "loss": 0.886, "step": 16502 }, { "epoch": 0.5981298249429162, "grad_norm": 2.4055405706401336, "learning_rate": 3.6700824977052487e-06, "loss": 0.7736, "step": 16503 }, { "epoch": 0.5981660686455729, "grad_norm": 2.333867751851726, "learning_rate": 3.669516721103852e-06, "loss": 0.8348, "step": 16504 }, { "epoch": 0.5982023123482295, "grad_norm": 2.393797850585273, "learning_rate": 3.66895096283571e-06, "loss": 0.9349, "step": 16505 }, { "epoch": 0.5982385560508862, "grad_norm": 2.694698502492363, "learning_rate": 3.6683852229086163e-06, "loss": 1.0219, "step": 16506 }, { "epoch": 0.5982747997535428, "grad_norm": 2.45571049709802, "learning_rate": 3.667819501330372e-06, "loss": 0.8955, "step": 16507 }, { "epoch": 0.5983110434561995, "grad_norm": 2.495091361376314, "learning_rate": 3.6672537981087687e-06, "loss": 0.8669, "step": 16508 }, { "epoch": 0.5983472871588561, "grad_norm": 2.316677765619462, "learning_rate": 3.6666881132516018e-06, "loss": 0.9863, "step": 16509 }, { "epoch": 0.5983835308615129, "grad_norm": 2.5767990373707237, "learning_rate": 3.6661224467666654e-06, "loss": 0.8572, "step": 16510 }, { "epoch": 0.5984197745641695, "grad_norm": 2.1904331699766364, "learning_rate": 3.6655567986617562e-06, "loss": 0.961, "step": 16511 }, { "epoch": 0.5984560182668262, "grad_norm": 2.3356725758137027, "learning_rate": 3.6649911689446673e-06, "loss": 1.0561, "step": 16512 }, { "epoch": 0.5984922619694828, "grad_norm": 2.2924729551113447, "learning_rate": 3.6644255576231925e-06, "loss": 0.9128, "step": 16513 }, { "epoch": 0.5985285056721394, "grad_norm": 2.136309195623593, "learning_rate": 3.6638599647051242e-06, "loss": 0.8238, "step": 16514 }, { "epoch": 0.5985647493747961, "grad_norm": 2.4955471192778433, "learning_rate": 3.6632943901982586e-06, "loss": 0.9628, "step": 16515 }, { "epoch": 0.5986009930774528, "grad_norm": 2.457671217343177, "learning_rate": 3.662728834110389e-06, "loss": 1.1778, "step": 16516 }, { "epoch": 0.5986372367801095, "grad_norm": 2.3453393127147133, "learning_rate": 3.662163296449307e-06, "loss": 1.0205, "step": 16517 }, { "epoch": 0.5986734804827661, "grad_norm": 2.0781048670477364, "learning_rate": 3.661597777222803e-06, "loss": 0.605, "step": 16518 }, { "epoch": 0.5987097241854228, "grad_norm": 2.475611504958357, "learning_rate": 3.6610322764386754e-06, "loss": 1.0638, "step": 16519 }, { "epoch": 0.5987459678880794, "grad_norm": 2.371997346391995, "learning_rate": 3.6604667941047123e-06, "loss": 0.964, "step": 16520 }, { "epoch": 0.5987822115907361, "grad_norm": 2.449908957953484, "learning_rate": 3.6599013302287068e-06, "loss": 0.8091, "step": 16521 }, { "epoch": 0.5988184552933927, "grad_norm": 2.323702846222519, "learning_rate": 3.659335884818449e-06, "loss": 0.9273, "step": 16522 }, { "epoch": 0.5988546989960495, "grad_norm": 2.363337997405238, "learning_rate": 3.658770457881734e-06, "loss": 0.8593, "step": 16523 }, { "epoch": 0.5988909426987061, "grad_norm": 3.0809604423271435, "learning_rate": 3.6582050494263506e-06, "loss": 0.8074, "step": 16524 }, { "epoch": 0.5989271864013628, "grad_norm": 2.284195879128614, "learning_rate": 3.6576396594600896e-06, "loss": 0.7938, "step": 16525 }, { "epoch": 0.5989634301040194, "grad_norm": 2.287876151881658, "learning_rate": 3.6570742879907407e-06, "loss": 1.0368, "step": 16526 }, { "epoch": 0.5989996738066761, "grad_norm": 2.456418665730914, "learning_rate": 3.656508935026098e-06, "loss": 0.9178, "step": 16527 }, { "epoch": 0.5990359175093327, "grad_norm": 2.43511115839425, "learning_rate": 3.655943600573949e-06, "loss": 1.0067, "step": 16528 }, { "epoch": 0.5990721612119894, "grad_norm": 2.4748275017516805, "learning_rate": 3.655378284642084e-06, "loss": 0.8819, "step": 16529 }, { "epoch": 0.5991084049146461, "grad_norm": 2.3846838222848485, "learning_rate": 3.6548129872382944e-06, "loss": 0.8332, "step": 16530 }, { "epoch": 0.5991446486173028, "grad_norm": 2.031032309823768, "learning_rate": 3.6542477083703676e-06, "loss": 0.9105, "step": 16531 }, { "epoch": 0.5991808923199594, "grad_norm": 2.2909833065522225, "learning_rate": 3.6536824480460935e-06, "loss": 0.9139, "step": 16532 }, { "epoch": 0.5992171360226161, "grad_norm": 2.2052749857277405, "learning_rate": 3.653117206273259e-06, "loss": 0.8764, "step": 16533 }, { "epoch": 0.5992533797252727, "grad_norm": 4.042852050003163, "learning_rate": 3.6525519830596573e-06, "loss": 0.9399, "step": 16534 }, { "epoch": 0.5992896234279294, "grad_norm": 2.1696771068755947, "learning_rate": 3.6519867784130743e-06, "loss": 0.9046, "step": 16535 }, { "epoch": 0.5993258671305861, "grad_norm": 2.2559703409870586, "learning_rate": 3.651421592341298e-06, "loss": 0.8161, "step": 16536 }, { "epoch": 0.5993621108332428, "grad_norm": 2.3959273739573086, "learning_rate": 3.650856424852115e-06, "loss": 0.7921, "step": 16537 }, { "epoch": 0.5993983545358994, "grad_norm": 2.4243403855575174, "learning_rate": 3.6502912759533168e-06, "loss": 0.8341, "step": 16538 }, { "epoch": 0.599434598238556, "grad_norm": 2.1981199214710965, "learning_rate": 3.649726145652688e-06, "loss": 0.8819, "step": 16539 }, { "epoch": 0.5994708419412127, "grad_norm": 2.336695320652692, "learning_rate": 3.6491610339580147e-06, "loss": 1.065, "step": 16540 }, { "epoch": 0.5995070856438693, "grad_norm": 2.6122019345928065, "learning_rate": 3.6485959408770854e-06, "loss": 0.9566, "step": 16541 }, { "epoch": 0.599543329346526, "grad_norm": 2.202314379299017, "learning_rate": 3.648030866417689e-06, "loss": 0.8254, "step": 16542 }, { "epoch": 0.5995795730491827, "grad_norm": 2.1737226671569356, "learning_rate": 3.647465810587608e-06, "loss": 0.8189, "step": 16543 }, { "epoch": 0.5996158167518394, "grad_norm": 2.208732334613201, "learning_rate": 3.6469007733946317e-06, "loss": 0.9748, "step": 16544 }, { "epoch": 0.599652060454496, "grad_norm": 2.3127049186116655, "learning_rate": 3.6463357548465417e-06, "loss": 0.9796, "step": 16545 }, { "epoch": 0.5996883041571527, "grad_norm": 2.315106103004805, "learning_rate": 3.6457707549511277e-06, "loss": 0.8328, "step": 16546 }, { "epoch": 0.5997245478598093, "grad_norm": 2.274443253510562, "learning_rate": 3.6452057737161743e-06, "loss": 0.8757, "step": 16547 }, { "epoch": 0.599760791562466, "grad_norm": 2.2139941528227918, "learning_rate": 3.6446408111494663e-06, "loss": 0.8975, "step": 16548 }, { "epoch": 0.5997970352651227, "grad_norm": 2.541110940456364, "learning_rate": 3.6440758672587866e-06, "loss": 0.9417, "step": 16549 }, { "epoch": 0.5998332789677794, "grad_norm": 2.2660263219538157, "learning_rate": 3.643510942051923e-06, "loss": 0.8629, "step": 16550 }, { "epoch": 0.599869522670436, "grad_norm": 2.314884348000072, "learning_rate": 3.6429460355366573e-06, "loss": 0.7235, "step": 16551 }, { "epoch": 0.5999057663730927, "grad_norm": 2.3986877966916333, "learning_rate": 3.642381147720776e-06, "loss": 0.9404, "step": 16552 }, { "epoch": 0.5999420100757493, "grad_norm": 2.3260930478205237, "learning_rate": 3.641816278612058e-06, "loss": 0.8466, "step": 16553 }, { "epoch": 0.599978253778406, "grad_norm": 2.2799334800329945, "learning_rate": 3.6412514282182933e-06, "loss": 0.7896, "step": 16554 }, { "epoch": 0.6000144974810626, "grad_norm": 2.3522349584842837, "learning_rate": 3.6406865965472614e-06, "loss": 0.8523, "step": 16555 }, { "epoch": 0.6000507411837194, "grad_norm": 2.1541727620801407, "learning_rate": 3.6401217836067467e-06, "loss": 0.8252, "step": 16556 }, { "epoch": 0.600086984886376, "grad_norm": 2.472427923649334, "learning_rate": 3.6395569894045302e-06, "loss": 0.9368, "step": 16557 }, { "epoch": 0.6001232285890327, "grad_norm": 2.556838820287291, "learning_rate": 3.6389922139483964e-06, "loss": 0.9334, "step": 16558 }, { "epoch": 0.6001594722916893, "grad_norm": 2.192188210089506, "learning_rate": 3.638427457246127e-06, "loss": 0.8799, "step": 16559 }, { "epoch": 0.600195715994346, "grad_norm": 2.193896028576741, "learning_rate": 3.637862719305504e-06, "loss": 0.6861, "step": 16560 }, { "epoch": 0.6002319596970026, "grad_norm": 2.45899381867897, "learning_rate": 3.6372980001343062e-06, "loss": 0.9661, "step": 16561 }, { "epoch": 0.6002682033996594, "grad_norm": 2.6264281031593097, "learning_rate": 3.6367332997403204e-06, "loss": 0.9461, "step": 16562 }, { "epoch": 0.600304447102316, "grad_norm": 2.4504183840707356, "learning_rate": 3.6361686181313247e-06, "loss": 0.7311, "step": 16563 }, { "epoch": 0.6003406908049727, "grad_norm": 2.545636326932141, "learning_rate": 3.6356039553151014e-06, "loss": 0.8463, "step": 16564 }, { "epoch": 0.6003769345076293, "grad_norm": 2.017330955975801, "learning_rate": 3.635039311299428e-06, "loss": 0.7208, "step": 16565 }, { "epoch": 0.600413178210286, "grad_norm": 2.3760483084630803, "learning_rate": 3.63447468609209e-06, "loss": 0.9646, "step": 16566 }, { "epoch": 0.6004494219129426, "grad_norm": 2.6938268773969085, "learning_rate": 3.633910079700864e-06, "loss": 0.9152, "step": 16567 }, { "epoch": 0.6004856656155992, "grad_norm": 2.583436661392241, "learning_rate": 3.633345492133531e-06, "loss": 0.8541, "step": 16568 }, { "epoch": 0.600521909318256, "grad_norm": 2.565610392823213, "learning_rate": 3.6327809233978706e-06, "loss": 0.8703, "step": 16569 }, { "epoch": 0.6005581530209126, "grad_norm": 2.0776057824298118, "learning_rate": 3.6322163735016626e-06, "loss": 0.8262, "step": 16570 }, { "epoch": 0.6005943967235693, "grad_norm": 2.5395187623916584, "learning_rate": 3.6316518424526852e-06, "loss": 1.057, "step": 16571 }, { "epoch": 0.6006306404262259, "grad_norm": 2.0545363142720503, "learning_rate": 3.6310873302587184e-06, "loss": 0.9169, "step": 16572 }, { "epoch": 0.6006668841288826, "grad_norm": 2.4998746375390284, "learning_rate": 3.630522836927538e-06, "loss": 1.0394, "step": 16573 }, { "epoch": 0.6007031278315392, "grad_norm": 2.0201151519432963, "learning_rate": 3.6299583624669277e-06, "loss": 0.8269, "step": 16574 }, { "epoch": 0.600739371534196, "grad_norm": 2.309319497773518, "learning_rate": 3.6293939068846616e-06, "loss": 1.1394, "step": 16575 }, { "epoch": 0.6007756152368526, "grad_norm": 2.4084201677523027, "learning_rate": 3.6288294701885185e-06, "loss": 0.8341, "step": 16576 }, { "epoch": 0.6008118589395093, "grad_norm": 2.659746399103421, "learning_rate": 3.6282650523862757e-06, "loss": 1.0095, "step": 16577 }, { "epoch": 0.6008481026421659, "grad_norm": 2.2760321279725333, "learning_rate": 3.6277006534857117e-06, "loss": 0.9293, "step": 16578 }, { "epoch": 0.6008843463448226, "grad_norm": 2.278351828548522, "learning_rate": 3.6271362734946026e-06, "loss": 0.9953, "step": 16579 }, { "epoch": 0.6009205900474792, "grad_norm": 2.434230778707216, "learning_rate": 3.6265719124207254e-06, "loss": 0.7465, "step": 16580 }, { "epoch": 0.6009568337501359, "grad_norm": 2.365160669350347, "learning_rate": 3.6260075702718546e-06, "loss": 0.8174, "step": 16581 }, { "epoch": 0.6009930774527926, "grad_norm": 2.4086360170605015, "learning_rate": 3.62544324705577e-06, "loss": 0.936, "step": 16582 }, { "epoch": 0.6010293211554493, "grad_norm": 2.0571666100237516, "learning_rate": 3.624878942780247e-06, "loss": 0.7076, "step": 16583 }, { "epoch": 0.6010655648581059, "grad_norm": 2.4398376091914247, "learning_rate": 3.6243146574530596e-06, "loss": 0.9635, "step": 16584 }, { "epoch": 0.6011018085607626, "grad_norm": 2.2670840388363205, "learning_rate": 3.6237503910819827e-06, "loss": 0.8807, "step": 16585 }, { "epoch": 0.6011380522634192, "grad_norm": 2.128185354984298, "learning_rate": 3.6231861436747957e-06, "loss": 0.8255, "step": 16586 }, { "epoch": 0.6011742959660759, "grad_norm": 2.571687390590392, "learning_rate": 3.6226219152392705e-06, "loss": 0.9056, "step": 16587 }, { "epoch": 0.6012105396687326, "grad_norm": 2.39139258876436, "learning_rate": 3.6220577057831817e-06, "loss": 0.9319, "step": 16588 }, { "epoch": 0.6012467833713893, "grad_norm": 2.3703265214101457, "learning_rate": 3.6214935153143045e-06, "loss": 0.8176, "step": 16589 }, { "epoch": 0.6012830270740459, "grad_norm": 2.0397901466481985, "learning_rate": 3.6209293438404136e-06, "loss": 0.8588, "step": 16590 }, { "epoch": 0.6013192707767026, "grad_norm": 2.408457011623207, "learning_rate": 3.6203651913692815e-06, "loss": 0.7715, "step": 16591 }, { "epoch": 0.6013555144793592, "grad_norm": 2.5763045841304346, "learning_rate": 3.6198010579086834e-06, "loss": 0.8449, "step": 16592 }, { "epoch": 0.6013917581820158, "grad_norm": 2.2244736631974895, "learning_rate": 3.6192369434663898e-06, "loss": 0.7856, "step": 16593 }, { "epoch": 0.6014280018846725, "grad_norm": 2.6269365774456412, "learning_rate": 3.618672848050178e-06, "loss": 0.8493, "step": 16594 }, { "epoch": 0.6014642455873292, "grad_norm": 2.295958300247621, "learning_rate": 3.6181087716678193e-06, "loss": 0.9138, "step": 16595 }, { "epoch": 0.6015004892899859, "grad_norm": 2.1724324340617676, "learning_rate": 3.617544714327086e-06, "loss": 0.8917, "step": 16596 }, { "epoch": 0.6015367329926425, "grad_norm": 2.0268491083736895, "learning_rate": 3.616980676035749e-06, "loss": 0.6978, "step": 16597 }, { "epoch": 0.6015729766952992, "grad_norm": 2.2070470060588754, "learning_rate": 3.6164166568015834e-06, "loss": 0.7851, "step": 16598 }, { "epoch": 0.6016092203979558, "grad_norm": 2.644159330451097, "learning_rate": 3.6158526566323594e-06, "loss": 0.9972, "step": 16599 }, { "epoch": 0.6016454641006125, "grad_norm": 2.2185446337989942, "learning_rate": 3.615288675535848e-06, "loss": 0.8521, "step": 16600 }, { "epoch": 0.6016817078032691, "grad_norm": 2.3558677178376928, "learning_rate": 3.6147247135198195e-06, "loss": 0.807, "step": 16601 }, { "epoch": 0.6017179515059259, "grad_norm": 2.399878200497681, "learning_rate": 3.6141607705920488e-06, "loss": 1.1083, "step": 16602 }, { "epoch": 0.6017541952085825, "grad_norm": 2.280744717846227, "learning_rate": 3.6135968467603046e-06, "loss": 0.9445, "step": 16603 }, { "epoch": 0.6017904389112392, "grad_norm": 2.222762875844054, "learning_rate": 3.6130329420323573e-06, "loss": 0.842, "step": 16604 }, { "epoch": 0.6018266826138958, "grad_norm": 2.1205892691439674, "learning_rate": 3.612469056415976e-06, "loss": 0.991, "step": 16605 }, { "epoch": 0.6018629263165525, "grad_norm": 2.4618871216637146, "learning_rate": 3.6119051899189327e-06, "loss": 1.0189, "step": 16606 }, { "epoch": 0.6018991700192091, "grad_norm": 2.1442748707289154, "learning_rate": 3.6113413425489975e-06, "loss": 0.7792, "step": 16607 }, { "epoch": 0.6019354137218659, "grad_norm": 2.4213172169215853, "learning_rate": 3.6107775143139377e-06, "loss": 0.856, "step": 16608 }, { "epoch": 0.6019716574245225, "grad_norm": 2.7043257961228746, "learning_rate": 3.6102137052215235e-06, "loss": 0.9407, "step": 16609 }, { "epoch": 0.6020079011271792, "grad_norm": 2.241556372521241, "learning_rate": 3.609649915279524e-06, "loss": 0.9265, "step": 16610 }, { "epoch": 0.6020441448298358, "grad_norm": 2.3572996070850754, "learning_rate": 3.6090861444957087e-06, "loss": 0.9407, "step": 16611 }, { "epoch": 0.6020803885324925, "grad_norm": 2.514794326774038, "learning_rate": 3.6085223928778423e-06, "loss": 0.9476, "step": 16612 }, { "epoch": 0.6021166322351491, "grad_norm": 2.2306920031508572, "learning_rate": 3.6079586604336986e-06, "loss": 1.0068, "step": 16613 }, { "epoch": 0.6021528759378058, "grad_norm": 2.7803523990609795, "learning_rate": 3.607394947171042e-06, "loss": 1.0392, "step": 16614 }, { "epoch": 0.6021891196404625, "grad_norm": 2.5296643989139405, "learning_rate": 3.6068312530976414e-06, "loss": 0.8729, "step": 16615 }, { "epoch": 0.6022253633431192, "grad_norm": 2.3995260970546117, "learning_rate": 3.6062675782212624e-06, "loss": 1.0223, "step": 16616 }, { "epoch": 0.6022616070457758, "grad_norm": 2.310130162654188, "learning_rate": 3.605703922549674e-06, "loss": 0.9063, "step": 16617 }, { "epoch": 0.6022978507484325, "grad_norm": 2.109476996847662, "learning_rate": 3.6051402860906425e-06, "loss": 0.7623, "step": 16618 }, { "epoch": 0.6023340944510891, "grad_norm": 2.1663478085039296, "learning_rate": 3.6045766688519346e-06, "loss": 0.9927, "step": 16619 }, { "epoch": 0.6023703381537457, "grad_norm": 2.3393945467914916, "learning_rate": 3.6040130708413133e-06, "loss": 1.0136, "step": 16620 }, { "epoch": 0.6024065818564025, "grad_norm": 2.720959379818164, "learning_rate": 3.603449492066551e-06, "loss": 0.9327, "step": 16621 }, { "epoch": 0.6024428255590591, "grad_norm": 2.616447380331386, "learning_rate": 3.602885932535409e-06, "loss": 1.0332, "step": 16622 }, { "epoch": 0.6024790692617158, "grad_norm": 2.424756802717832, "learning_rate": 3.6023223922556546e-06, "loss": 0.939, "step": 16623 }, { "epoch": 0.6025153129643724, "grad_norm": 2.3546712955421043, "learning_rate": 3.6017588712350503e-06, "loss": 0.8951, "step": 16624 }, { "epoch": 0.6025515566670291, "grad_norm": 2.64782695378564, "learning_rate": 3.601195369481365e-06, "loss": 0.8765, "step": 16625 }, { "epoch": 0.6025878003696857, "grad_norm": 2.604230858359136, "learning_rate": 3.6006318870023603e-06, "loss": 0.8833, "step": 16626 }, { "epoch": 0.6026240440723424, "grad_norm": 2.5882714738560133, "learning_rate": 3.6000684238058036e-06, "loss": 0.8355, "step": 16627 }, { "epoch": 0.6026602877749991, "grad_norm": 2.367072386979653, "learning_rate": 3.5995049798994554e-06, "loss": 0.9066, "step": 16628 }, { "epoch": 0.6026965314776558, "grad_norm": 2.391344305571302, "learning_rate": 3.5989415552910833e-06, "loss": 0.8423, "step": 16629 }, { "epoch": 0.6027327751803124, "grad_norm": 2.1282079831467287, "learning_rate": 3.5983781499884485e-06, "loss": 0.7927, "step": 16630 }, { "epoch": 0.6027690188829691, "grad_norm": 2.194092911446087, "learning_rate": 3.5978147639993155e-06, "loss": 0.7591, "step": 16631 }, { "epoch": 0.6028052625856257, "grad_norm": 2.4712353080079934, "learning_rate": 3.597251397331445e-06, "loss": 0.9816, "step": 16632 }, { "epoch": 0.6028415062882824, "grad_norm": 2.328260460584635, "learning_rate": 3.596688049992604e-06, "loss": 0.7625, "step": 16633 }, { "epoch": 0.6028777499909391, "grad_norm": 2.2198951494862382, "learning_rate": 3.5961247219905525e-06, "loss": 0.9683, "step": 16634 }, { "epoch": 0.6029139936935958, "grad_norm": 2.6352655269655267, "learning_rate": 3.5955614133330542e-06, "loss": 0.9341, "step": 16635 }, { "epoch": 0.6029502373962524, "grad_norm": 2.541654372945945, "learning_rate": 3.5949981240278685e-06, "loss": 1.0258, "step": 16636 }, { "epoch": 0.6029864810989091, "grad_norm": 2.3996554751177226, "learning_rate": 3.59443485408276e-06, "loss": 0.7877, "step": 16637 }, { "epoch": 0.6030227248015657, "grad_norm": 2.6328375663403616, "learning_rate": 3.5938716035054894e-06, "loss": 0.8061, "step": 16638 }, { "epoch": 0.6030589685042224, "grad_norm": 2.4256997722654194, "learning_rate": 3.5933083723038174e-06, "loss": 0.9836, "step": 16639 }, { "epoch": 0.603095212206879, "grad_norm": 2.5854799353443907, "learning_rate": 3.5927451604855035e-06, "loss": 0.8214, "step": 16640 }, { "epoch": 0.6031314559095358, "grad_norm": 2.616951018813306, "learning_rate": 3.5921819680583126e-06, "loss": 0.7498, "step": 16641 }, { "epoch": 0.6031676996121924, "grad_norm": 2.261735036728947, "learning_rate": 3.591618795030002e-06, "loss": 1.0261, "step": 16642 }, { "epoch": 0.6032039433148491, "grad_norm": 2.2111285481486544, "learning_rate": 3.591055641408333e-06, "loss": 0.7548, "step": 16643 }, { "epoch": 0.6032401870175057, "grad_norm": 2.1696693752595886, "learning_rate": 3.5904925072010642e-06, "loss": 0.8935, "step": 16644 }, { "epoch": 0.6032764307201623, "grad_norm": 2.334794982216586, "learning_rate": 3.589929392415957e-06, "loss": 0.8793, "step": 16645 }, { "epoch": 0.603312674422819, "grad_norm": 2.138995060563428, "learning_rate": 3.5893662970607703e-06, "loss": 0.8863, "step": 16646 }, { "epoch": 0.6033489181254758, "grad_norm": 1.9797920351175036, "learning_rate": 3.588803221143263e-06, "loss": 0.9163, "step": 16647 }, { "epoch": 0.6033851618281324, "grad_norm": 2.3828412447525813, "learning_rate": 3.5882401646711913e-06, "loss": 0.7164, "step": 16648 }, { "epoch": 0.603421405530789, "grad_norm": 2.260236885705814, "learning_rate": 3.587677127652319e-06, "loss": 0.7749, "step": 16649 }, { "epoch": 0.6034576492334457, "grad_norm": 1.985319715901182, "learning_rate": 3.5871141100944006e-06, "loss": 0.8156, "step": 16650 }, { "epoch": 0.6034938929361023, "grad_norm": 2.547089540260391, "learning_rate": 3.5865511120051968e-06, "loss": 0.9439, "step": 16651 }, { "epoch": 0.603530136638759, "grad_norm": 2.7254590661243556, "learning_rate": 3.5859881333924605e-06, "loss": 0.9476, "step": 16652 }, { "epoch": 0.6035663803414156, "grad_norm": 2.0014864334437132, "learning_rate": 3.5854251742639554e-06, "loss": 0.7185, "step": 16653 }, { "epoch": 0.6036026240440724, "grad_norm": 2.5002562073745116, "learning_rate": 3.584862234627435e-06, "loss": 0.9017, "step": 16654 }, { "epoch": 0.603638867746729, "grad_norm": 2.352318544524509, "learning_rate": 3.5842993144906586e-06, "loss": 0.9513, "step": 16655 }, { "epoch": 0.6036751114493857, "grad_norm": 2.3455533884744972, "learning_rate": 3.5837364138613795e-06, "loss": 0.9136, "step": 16656 }, { "epoch": 0.6037113551520423, "grad_norm": 2.5032924375928656, "learning_rate": 3.583173532747357e-06, "loss": 1.0119, "step": 16657 }, { "epoch": 0.603747598854699, "grad_norm": 2.437260717388185, "learning_rate": 3.5826106711563467e-06, "loss": 0.9993, "step": 16658 }, { "epoch": 0.6037838425573556, "grad_norm": 2.149453721140243, "learning_rate": 3.5820478290961043e-06, "loss": 0.6973, "step": 16659 }, { "epoch": 0.6038200862600123, "grad_norm": 2.3071309764464103, "learning_rate": 3.5814850065743827e-06, "loss": 0.9471, "step": 16660 }, { "epoch": 0.603856329962669, "grad_norm": 2.1863953850460147, "learning_rate": 3.580922203598942e-06, "loss": 0.9724, "step": 16661 }, { "epoch": 0.6038925736653257, "grad_norm": 2.0169582020354957, "learning_rate": 3.5803594201775354e-06, "loss": 0.7344, "step": 16662 }, { "epoch": 0.6039288173679823, "grad_norm": 2.7211684895556267, "learning_rate": 3.5797966563179175e-06, "loss": 0.9805, "step": 16663 }, { "epoch": 0.603965061070639, "grad_norm": 2.1370491024396645, "learning_rate": 3.5792339120278413e-06, "loss": 0.8336, "step": 16664 }, { "epoch": 0.6040013047732956, "grad_norm": 2.126373846679291, "learning_rate": 3.5786711873150636e-06, "loss": 0.818, "step": 16665 }, { "epoch": 0.6040375484759523, "grad_norm": 2.343490839114058, "learning_rate": 3.578108482187338e-06, "loss": 0.9987, "step": 16666 }, { "epoch": 0.604073792178609, "grad_norm": 2.5456119050382537, "learning_rate": 3.5775457966524167e-06, "loss": 0.9549, "step": 16667 }, { "epoch": 0.6041100358812657, "grad_norm": 2.2695152503543663, "learning_rate": 3.5769831307180525e-06, "loss": 0.7874, "step": 16668 }, { "epoch": 0.6041462795839223, "grad_norm": 2.227555849376748, "learning_rate": 3.576420484392002e-06, "loss": 0.7921, "step": 16669 }, { "epoch": 0.604182523286579, "grad_norm": 2.3457826919982723, "learning_rate": 3.5758578576820164e-06, "loss": 0.8768, "step": 16670 }, { "epoch": 0.6042187669892356, "grad_norm": 2.327865067602617, "learning_rate": 3.575295250595848e-06, "loss": 1.0834, "step": 16671 }, { "epoch": 0.6042550106918922, "grad_norm": 2.3518355113714238, "learning_rate": 3.5747326631412477e-06, "loss": 0.923, "step": 16672 }, { "epoch": 0.6042912543945489, "grad_norm": 2.5142741497426813, "learning_rate": 3.574170095325971e-06, "loss": 0.8767, "step": 16673 }, { "epoch": 0.6043274980972057, "grad_norm": 2.2277185551596217, "learning_rate": 3.5736075471577687e-06, "loss": 0.7526, "step": 16674 }, { "epoch": 0.6043637417998623, "grad_norm": 2.352954900930189, "learning_rate": 3.573045018644391e-06, "loss": 0.9362, "step": 16675 }, { "epoch": 0.6043999855025189, "grad_norm": 2.07568667207541, "learning_rate": 3.5724825097935895e-06, "loss": 0.9626, "step": 16676 }, { "epoch": 0.6044362292051756, "grad_norm": 1.8769631367638917, "learning_rate": 3.5719200206131166e-06, "loss": 0.6643, "step": 16677 }, { "epoch": 0.6044724729078322, "grad_norm": 2.452043699403659, "learning_rate": 3.5713575511107217e-06, "loss": 0.8624, "step": 16678 }, { "epoch": 0.6045087166104889, "grad_norm": 2.490590741209993, "learning_rate": 3.570795101294156e-06, "loss": 1.0758, "step": 16679 }, { "epoch": 0.6045449603131456, "grad_norm": 2.1206792977934215, "learning_rate": 3.5702326711711677e-06, "loss": 0.8209, "step": 16680 }, { "epoch": 0.6045812040158023, "grad_norm": 2.502208777802196, "learning_rate": 3.5696702607495102e-06, "loss": 0.9135, "step": 16681 }, { "epoch": 0.6046174477184589, "grad_norm": 2.3452094838049553, "learning_rate": 3.5691078700369313e-06, "loss": 0.904, "step": 16682 }, { "epoch": 0.6046536914211156, "grad_norm": 2.025741674765939, "learning_rate": 3.5685454990411813e-06, "loss": 0.8822, "step": 16683 }, { "epoch": 0.6046899351237722, "grad_norm": 2.4163586357693148, "learning_rate": 3.5679831477700067e-06, "loss": 0.8854, "step": 16684 }, { "epoch": 0.6047261788264289, "grad_norm": 2.5459199454460846, "learning_rate": 3.5674208162311596e-06, "loss": 1.0176, "step": 16685 }, { "epoch": 0.6047624225290855, "grad_norm": 2.3257761616810497, "learning_rate": 3.566858504432388e-06, "loss": 0.9586, "step": 16686 }, { "epoch": 0.6047986662317423, "grad_norm": 2.5404383061103037, "learning_rate": 3.5662962123814383e-06, "loss": 0.6291, "step": 16687 }, { "epoch": 0.6048349099343989, "grad_norm": 2.655613154620585, "learning_rate": 3.565733940086058e-06, "loss": 0.9236, "step": 16688 }, { "epoch": 0.6048711536370556, "grad_norm": 2.1235634358786597, "learning_rate": 3.5651716875539986e-06, "loss": 0.8735, "step": 16689 }, { "epoch": 0.6049073973397122, "grad_norm": 2.1785089203104393, "learning_rate": 3.5646094547930066e-06, "loss": 0.6841, "step": 16690 }, { "epoch": 0.6049436410423689, "grad_norm": 2.4894011499211968, "learning_rate": 3.564047241810825e-06, "loss": 0.7465, "step": 16691 }, { "epoch": 0.6049798847450255, "grad_norm": 2.821008273802251, "learning_rate": 3.563485048615206e-06, "loss": 0.9113, "step": 16692 }, { "epoch": 0.6050161284476823, "grad_norm": 2.484569571318896, "learning_rate": 3.5629228752138956e-06, "loss": 0.9274, "step": 16693 }, { "epoch": 0.6050523721503389, "grad_norm": 2.3068202420790125, "learning_rate": 3.5623607216146373e-06, "loss": 0.8915, "step": 16694 }, { "epoch": 0.6050886158529956, "grad_norm": 2.5134600767888764, "learning_rate": 3.5617985878251786e-06, "loss": 0.7398, "step": 16695 }, { "epoch": 0.6051248595556522, "grad_norm": 2.3288507573785284, "learning_rate": 3.5612364738532666e-06, "loss": 0.9974, "step": 16696 }, { "epoch": 0.6051611032583089, "grad_norm": 2.627128663737045, "learning_rate": 3.5606743797066457e-06, "loss": 0.8658, "step": 16697 }, { "epoch": 0.6051973469609655, "grad_norm": 2.5681576361869802, "learning_rate": 3.560112305393061e-06, "loss": 1.0111, "step": 16698 }, { "epoch": 0.6052335906636221, "grad_norm": 2.6990967637060668, "learning_rate": 3.5595502509202563e-06, "loss": 1.0016, "step": 16699 }, { "epoch": 0.6052698343662789, "grad_norm": 2.6224936190757804, "learning_rate": 3.5589882162959797e-06, "loss": 1.1559, "step": 16700 }, { "epoch": 0.6053060780689355, "grad_norm": 2.538516080492521, "learning_rate": 3.558426201527975e-06, "loss": 0.8287, "step": 16701 }, { "epoch": 0.6053423217715922, "grad_norm": 2.150587472021117, "learning_rate": 3.5578642066239844e-06, "loss": 0.8083, "step": 16702 }, { "epoch": 0.6053785654742488, "grad_norm": 2.4953688473343325, "learning_rate": 3.5573022315917516e-06, "loss": 1.0364, "step": 16703 }, { "epoch": 0.6054148091769055, "grad_norm": 2.2697388733370234, "learning_rate": 3.556740276439023e-06, "loss": 1.0257, "step": 16704 }, { "epoch": 0.6054510528795621, "grad_norm": 2.56648300531632, "learning_rate": 3.5561783411735406e-06, "loss": 0.7622, "step": 16705 }, { "epoch": 0.6054872965822189, "grad_norm": 2.1845769596975853, "learning_rate": 3.5556164258030474e-06, "loss": 0.8132, "step": 16706 }, { "epoch": 0.6055235402848755, "grad_norm": 2.183688069445064, "learning_rate": 3.5550545303352845e-06, "loss": 0.7242, "step": 16707 }, { "epoch": 0.6055597839875322, "grad_norm": 2.5672399344794266, "learning_rate": 3.554492654777998e-06, "loss": 1.0496, "step": 16708 }, { "epoch": 0.6055960276901888, "grad_norm": 2.4220598225629457, "learning_rate": 3.5539307991389282e-06, "loss": 0.6966, "step": 16709 }, { "epoch": 0.6056322713928455, "grad_norm": 2.239144956132668, "learning_rate": 3.553368963425818e-06, "loss": 0.7307, "step": 16710 }, { "epoch": 0.6056685150955021, "grad_norm": 2.548661095065574, "learning_rate": 3.5528071476464076e-06, "loss": 0.7983, "step": 16711 }, { "epoch": 0.6057047587981588, "grad_norm": 2.2806932339046613, "learning_rate": 3.5522453518084395e-06, "loss": 0.892, "step": 16712 }, { "epoch": 0.6057410025008155, "grad_norm": 2.336265963438443, "learning_rate": 3.5516835759196555e-06, "loss": 1.0545, "step": 16713 }, { "epoch": 0.6057772462034722, "grad_norm": 2.375518707961745, "learning_rate": 3.551121819987796e-06, "loss": 0.8231, "step": 16714 }, { "epoch": 0.6058134899061288, "grad_norm": 2.0916310118253554, "learning_rate": 3.5505600840206007e-06, "loss": 0.8501, "step": 16715 }, { "epoch": 0.6058497336087855, "grad_norm": 2.430273781462221, "learning_rate": 3.549998368025812e-06, "loss": 0.918, "step": 16716 }, { "epoch": 0.6058859773114421, "grad_norm": 2.3654274527422365, "learning_rate": 3.5494366720111684e-06, "loss": 0.9096, "step": 16717 }, { "epoch": 0.6059222210140988, "grad_norm": 2.4554333915282225, "learning_rate": 3.5488749959844103e-06, "loss": 0.9552, "step": 16718 }, { "epoch": 0.6059584647167555, "grad_norm": 2.1638326266131074, "learning_rate": 3.5483133399532756e-06, "loss": 0.6541, "step": 16719 }, { "epoch": 0.6059947084194122, "grad_norm": 2.363190882520858, "learning_rate": 3.5477517039255067e-06, "loss": 0.9488, "step": 16720 }, { "epoch": 0.6060309521220688, "grad_norm": 2.3488053221581753, "learning_rate": 3.547190087908841e-06, "loss": 1.044, "step": 16721 }, { "epoch": 0.6060671958247255, "grad_norm": 2.311418999480635, "learning_rate": 3.546628491911017e-06, "loss": 0.8744, "step": 16722 }, { "epoch": 0.6061034395273821, "grad_norm": 2.3815128453228294, "learning_rate": 3.546066915939773e-06, "loss": 0.95, "step": 16723 }, { "epoch": 0.6061396832300388, "grad_norm": 2.0839547703842998, "learning_rate": 3.5455053600028487e-06, "loss": 0.8826, "step": 16724 }, { "epoch": 0.6061759269326954, "grad_norm": 1.9936921075480802, "learning_rate": 3.5449438241079805e-06, "loss": 0.7052, "step": 16725 }, { "epoch": 0.6062121706353522, "grad_norm": 2.5487342969521385, "learning_rate": 3.544382308262907e-06, "loss": 0.9609, "step": 16726 }, { "epoch": 0.6062484143380088, "grad_norm": 2.5131162978180024, "learning_rate": 3.543820812475362e-06, "loss": 0.8513, "step": 16727 }, { "epoch": 0.6062846580406654, "grad_norm": 2.0539106558475364, "learning_rate": 3.543259336753088e-06, "loss": 0.9377, "step": 16728 }, { "epoch": 0.6063209017433221, "grad_norm": 2.596965292960223, "learning_rate": 3.5426978811038194e-06, "loss": 0.9508, "step": 16729 }, { "epoch": 0.6063571454459787, "grad_norm": 2.284238036097961, "learning_rate": 3.5421364455352925e-06, "loss": 0.7647, "step": 16730 }, { "epoch": 0.6063933891486354, "grad_norm": 2.2658332477900256, "learning_rate": 3.541575030055243e-06, "loss": 1.0653, "step": 16731 }, { "epoch": 0.606429632851292, "grad_norm": 2.217688429198061, "learning_rate": 3.5410136346714085e-06, "loss": 0.8456, "step": 16732 }, { "epoch": 0.6064658765539488, "grad_norm": 2.3909572507924386, "learning_rate": 3.5404522593915236e-06, "loss": 0.9368, "step": 16733 }, { "epoch": 0.6065021202566054, "grad_norm": 2.1954634909012807, "learning_rate": 3.539890904223324e-06, "loss": 1.0204, "step": 16734 }, { "epoch": 0.6065383639592621, "grad_norm": 2.32699185151953, "learning_rate": 3.5393295691745423e-06, "loss": 0.7368, "step": 16735 }, { "epoch": 0.6065746076619187, "grad_norm": 2.2893914921994956, "learning_rate": 3.538768254252918e-06, "loss": 0.8264, "step": 16736 }, { "epoch": 0.6066108513645754, "grad_norm": 2.5697999707141275, "learning_rate": 3.5382069594661834e-06, "loss": 0.8388, "step": 16737 }, { "epoch": 0.606647095067232, "grad_norm": 2.6079879663389067, "learning_rate": 3.537645684822073e-06, "loss": 0.9387, "step": 16738 }, { "epoch": 0.6066833387698888, "grad_norm": 2.343001217025479, "learning_rate": 3.537084430328318e-06, "loss": 0.8438, "step": 16739 }, { "epoch": 0.6067195824725454, "grad_norm": 2.375790489875774, "learning_rate": 3.5365231959926573e-06, "loss": 0.8748, "step": 16740 }, { "epoch": 0.6067558261752021, "grad_norm": 2.0153221643862853, "learning_rate": 3.535961981822822e-06, "loss": 0.7703, "step": 16741 }, { "epoch": 0.6067920698778587, "grad_norm": 2.4574723371138014, "learning_rate": 3.535400787826545e-06, "loss": 0.8926, "step": 16742 }, { "epoch": 0.6068283135805154, "grad_norm": 2.3028371469432045, "learning_rate": 3.534839614011558e-06, "loss": 0.7672, "step": 16743 }, { "epoch": 0.606864557283172, "grad_norm": 2.359683690930577, "learning_rate": 3.5342784603855973e-06, "loss": 0.8724, "step": 16744 }, { "epoch": 0.6069008009858287, "grad_norm": 2.447463889977317, "learning_rate": 3.5337173269563918e-06, "loss": 0.8046, "step": 16745 }, { "epoch": 0.6069370446884854, "grad_norm": 2.0194756211202516, "learning_rate": 3.5331562137316753e-06, "loss": 0.7968, "step": 16746 }, { "epoch": 0.6069732883911421, "grad_norm": 2.4024071136030574, "learning_rate": 3.5325951207191767e-06, "loss": 0.9326, "step": 16747 }, { "epoch": 0.6070095320937987, "grad_norm": 2.456122988889198, "learning_rate": 3.532034047926632e-06, "loss": 0.9791, "step": 16748 }, { "epoch": 0.6070457757964554, "grad_norm": 2.3655642571813424, "learning_rate": 3.5314729953617703e-06, "loss": 0.8104, "step": 16749 }, { "epoch": 0.607082019499112, "grad_norm": 2.5666620525848476, "learning_rate": 3.530911963032323e-06, "loss": 0.7706, "step": 16750 }, { "epoch": 0.6071182632017686, "grad_norm": 2.5308289651988347, "learning_rate": 3.5303509509460188e-06, "loss": 0.9961, "step": 16751 }, { "epoch": 0.6071545069044254, "grad_norm": 2.6918904519853704, "learning_rate": 3.529789959110591e-06, "loss": 0.9766, "step": 16752 }, { "epoch": 0.607190750607082, "grad_norm": 2.420523082444797, "learning_rate": 3.5292289875337682e-06, "loss": 0.808, "step": 16753 }, { "epoch": 0.6072269943097387, "grad_norm": 1.9212563598955388, "learning_rate": 3.5286680362232807e-06, "loss": 0.8363, "step": 16754 }, { "epoch": 0.6072632380123953, "grad_norm": 2.4822406845162366, "learning_rate": 3.5281071051868555e-06, "loss": 0.8762, "step": 16755 }, { "epoch": 0.607299481715052, "grad_norm": 2.5451918824028743, "learning_rate": 3.5275461944322263e-06, "loss": 0.9325, "step": 16756 }, { "epoch": 0.6073357254177086, "grad_norm": 2.003241815691277, "learning_rate": 3.52698530396712e-06, "loss": 0.9259, "step": 16757 }, { "epoch": 0.6073719691203653, "grad_norm": 2.268228168185085, "learning_rate": 3.526424433799266e-06, "loss": 0.7205, "step": 16758 }, { "epoch": 0.607408212823022, "grad_norm": 2.3902362124536913, "learning_rate": 3.5258635839363895e-06, "loss": 0.9898, "step": 16759 }, { "epoch": 0.6074444565256787, "grad_norm": 2.084334176603041, "learning_rate": 3.525302754386223e-06, "loss": 0.889, "step": 16760 }, { "epoch": 0.6074807002283353, "grad_norm": 2.165765713397081, "learning_rate": 3.5247419451564935e-06, "loss": 0.8114, "step": 16761 }, { "epoch": 0.607516943930992, "grad_norm": 2.3083375981578023, "learning_rate": 3.524181156254927e-06, "loss": 0.9207, "step": 16762 }, { "epoch": 0.6075531876336486, "grad_norm": 2.144507695864811, "learning_rate": 3.523620387689251e-06, "loss": 0.8353, "step": 16763 }, { "epoch": 0.6075894313363053, "grad_norm": 2.304665301781853, "learning_rate": 3.5230596394671946e-06, "loss": 0.7501, "step": 16764 }, { "epoch": 0.607625675038962, "grad_norm": 2.318566606579964, "learning_rate": 3.5224989115964824e-06, "loss": 1.0983, "step": 16765 }, { "epoch": 0.6076619187416187, "grad_norm": 2.147496378249967, "learning_rate": 3.521938204084842e-06, "loss": 0.941, "step": 16766 }, { "epoch": 0.6076981624442753, "grad_norm": 2.264150525099754, "learning_rate": 3.5213775169399974e-06, "loss": 0.8406, "step": 16767 }, { "epoch": 0.607734406146932, "grad_norm": 2.295936902232161, "learning_rate": 3.5208168501696782e-06, "loss": 0.833, "step": 16768 }, { "epoch": 0.6077706498495886, "grad_norm": 2.373262104625477, "learning_rate": 3.5202562037816077e-06, "loss": 0.913, "step": 16769 }, { "epoch": 0.6078068935522453, "grad_norm": 2.194990958185518, "learning_rate": 3.519695577783512e-06, "loss": 0.9183, "step": 16770 }, { "epoch": 0.6078431372549019, "grad_norm": 2.626843229081676, "learning_rate": 3.519134972183116e-06, "loss": 0.8457, "step": 16771 }, { "epoch": 0.6078793809575587, "grad_norm": 2.302345212834644, "learning_rate": 3.5185743869881445e-06, "loss": 0.7997, "step": 16772 }, { "epoch": 0.6079156246602153, "grad_norm": 2.1579056965781525, "learning_rate": 3.518013822206322e-06, "loss": 0.7265, "step": 16773 }, { "epoch": 0.607951868362872, "grad_norm": 2.1090856861949066, "learning_rate": 3.5174532778453707e-06, "loss": 0.9004, "step": 16774 }, { "epoch": 0.6079881120655286, "grad_norm": 2.1551989366117668, "learning_rate": 3.5168927539130194e-06, "loss": 0.7666, "step": 16775 }, { "epoch": 0.6080243557681853, "grad_norm": 2.513212746459752, "learning_rate": 3.5163322504169884e-06, "loss": 0.8737, "step": 16776 }, { "epoch": 0.6080605994708419, "grad_norm": 2.247943903708335, "learning_rate": 3.515771767365002e-06, "loss": 0.7846, "step": 16777 }, { "epoch": 0.6080968431734987, "grad_norm": 2.169026474684998, "learning_rate": 3.5152113047647807e-06, "loss": 1.0356, "step": 16778 }, { "epoch": 0.6081330868761553, "grad_norm": 2.3614969547021643, "learning_rate": 3.5146508626240518e-06, "loss": 0.7492, "step": 16779 }, { "epoch": 0.608169330578812, "grad_norm": 2.7291925072508962, "learning_rate": 3.5140904409505357e-06, "loss": 0.7637, "step": 16780 }, { "epoch": 0.6082055742814686, "grad_norm": 2.66175104762482, "learning_rate": 3.5135300397519545e-06, "loss": 0.9937, "step": 16781 }, { "epoch": 0.6082418179841252, "grad_norm": 2.5170416881799538, "learning_rate": 3.5129696590360296e-06, "loss": 0.8837, "step": 16782 }, { "epoch": 0.6082780616867819, "grad_norm": 2.3158954408221217, "learning_rate": 3.512409298810484e-06, "loss": 0.9125, "step": 16783 }, { "epoch": 0.6083143053894385, "grad_norm": 2.46313145015877, "learning_rate": 3.511848959083039e-06, "loss": 0.8192, "step": 16784 }, { "epoch": 0.6083505490920953, "grad_norm": 2.4877951701665797, "learning_rate": 3.5112886398614153e-06, "loss": 0.9869, "step": 16785 }, { "epoch": 0.6083867927947519, "grad_norm": 2.3941889565590873, "learning_rate": 3.5107283411533323e-06, "loss": 0.9938, "step": 16786 }, { "epoch": 0.6084230364974086, "grad_norm": 2.0585082856260084, "learning_rate": 3.5101680629665136e-06, "loss": 0.7805, "step": 16787 }, { "epoch": 0.6084592802000652, "grad_norm": 2.5916274067540064, "learning_rate": 3.509607805308678e-06, "loss": 0.9273, "step": 16788 }, { "epoch": 0.6084955239027219, "grad_norm": 2.5594802034658533, "learning_rate": 3.5090475681875445e-06, "loss": 0.7822, "step": 16789 }, { "epoch": 0.6085317676053785, "grad_norm": 2.487403044903802, "learning_rate": 3.5084873516108343e-06, "loss": 0.8223, "step": 16790 }, { "epoch": 0.6085680113080353, "grad_norm": 2.413158073400295, "learning_rate": 3.507927155586267e-06, "loss": 0.946, "step": 16791 }, { "epoch": 0.6086042550106919, "grad_norm": 2.090360338620682, "learning_rate": 3.5073669801215604e-06, "loss": 0.733, "step": 16792 }, { "epoch": 0.6086404987133486, "grad_norm": 2.2816533987606276, "learning_rate": 3.5068068252244346e-06, "loss": 0.8566, "step": 16793 }, { "epoch": 0.6086767424160052, "grad_norm": 2.2565186467625367, "learning_rate": 3.506246690902606e-06, "loss": 0.9348, "step": 16794 }, { "epoch": 0.6087129861186619, "grad_norm": 2.197882482831537, "learning_rate": 3.505686577163796e-06, "loss": 0.9032, "step": 16795 }, { "epoch": 0.6087492298213185, "grad_norm": 2.2555713267497097, "learning_rate": 3.505126484015722e-06, "loss": 1.0311, "step": 16796 }, { "epoch": 0.6087854735239752, "grad_norm": 2.322426195425789, "learning_rate": 3.5045664114661003e-06, "loss": 0.8871, "step": 16797 }, { "epoch": 0.6088217172266319, "grad_norm": 2.3775704989499302, "learning_rate": 3.504006359522647e-06, "loss": 0.9316, "step": 16798 }, { "epoch": 0.6088579609292886, "grad_norm": 2.4894677747839986, "learning_rate": 3.5034463281930842e-06, "loss": 0.8208, "step": 16799 }, { "epoch": 0.6088942046319452, "grad_norm": 2.2467982010355856, "learning_rate": 3.5028863174851248e-06, "loss": 0.7869, "step": 16800 }, { "epoch": 0.6089304483346019, "grad_norm": 2.727633958566632, "learning_rate": 3.502326327406487e-06, "loss": 0.801, "step": 16801 }, { "epoch": 0.6089666920372585, "grad_norm": 2.5927152447296065, "learning_rate": 3.5017663579648856e-06, "loss": 1.0947, "step": 16802 }, { "epoch": 0.6090029357399152, "grad_norm": 2.208106308835209, "learning_rate": 3.5012064091680394e-06, "loss": 1.0107, "step": 16803 }, { "epoch": 0.6090391794425718, "grad_norm": 2.3500160166942927, "learning_rate": 3.500646481023662e-06, "loss": 0.7718, "step": 16804 }, { "epoch": 0.6090754231452286, "grad_norm": 1.8693460180049526, "learning_rate": 3.5000865735394686e-06, "loss": 0.8853, "step": 16805 }, { "epoch": 0.6091116668478852, "grad_norm": 2.120341724079557, "learning_rate": 3.4995266867231736e-06, "loss": 0.8705, "step": 16806 }, { "epoch": 0.6091479105505418, "grad_norm": 2.536397105355629, "learning_rate": 3.4989668205824955e-06, "loss": 1.1353, "step": 16807 }, { "epoch": 0.6091841542531985, "grad_norm": 2.36835493373388, "learning_rate": 3.4984069751251464e-06, "loss": 0.8954, "step": 16808 }, { "epoch": 0.6092203979558551, "grad_norm": 2.3202560734902913, "learning_rate": 3.4978471503588416e-06, "loss": 0.8907, "step": 16809 }, { "epoch": 0.6092566416585118, "grad_norm": 1.8933108817358888, "learning_rate": 3.497287346291294e-06, "loss": 0.7407, "step": 16810 }, { "epoch": 0.6092928853611685, "grad_norm": 2.8755460266106185, "learning_rate": 3.4967275629302177e-06, "loss": 1.0114, "step": 16811 }, { "epoch": 0.6093291290638252, "grad_norm": 2.2525907255222664, "learning_rate": 3.496167800283327e-06, "loss": 0.8959, "step": 16812 }, { "epoch": 0.6093653727664818, "grad_norm": 2.419775473400336, "learning_rate": 3.4956080583583346e-06, "loss": 0.8888, "step": 16813 }, { "epoch": 0.6094016164691385, "grad_norm": 2.6521647731819886, "learning_rate": 3.4950483371629512e-06, "loss": 0.9324, "step": 16814 }, { "epoch": 0.6094378601717951, "grad_norm": 2.560928884275473, "learning_rate": 3.4944886367048936e-06, "loss": 0.8758, "step": 16815 }, { "epoch": 0.6094741038744518, "grad_norm": 2.5341248109922576, "learning_rate": 3.4939289569918723e-06, "loss": 0.8874, "step": 16816 }, { "epoch": 0.6095103475771084, "grad_norm": 2.1570331098763846, "learning_rate": 3.493369298031599e-06, "loss": 0.7799, "step": 16817 }, { "epoch": 0.6095465912797652, "grad_norm": 2.4101621249144043, "learning_rate": 3.4928096598317845e-06, "loss": 0.8751, "step": 16818 }, { "epoch": 0.6095828349824218, "grad_norm": 2.2654474201248602, "learning_rate": 3.492250042400143e-06, "loss": 0.8527, "step": 16819 }, { "epoch": 0.6096190786850785, "grad_norm": 2.425323054884585, "learning_rate": 3.491690445744383e-06, "loss": 0.9299, "step": 16820 }, { "epoch": 0.6096553223877351, "grad_norm": 2.3537789771531656, "learning_rate": 3.491130869872217e-06, "loss": 0.9316, "step": 16821 }, { "epoch": 0.6096915660903918, "grad_norm": 2.233501297451639, "learning_rate": 3.490571314791353e-06, "loss": 0.7762, "step": 16822 }, { "epoch": 0.6097278097930484, "grad_norm": 2.004186547079909, "learning_rate": 3.490011780509505e-06, "loss": 0.7422, "step": 16823 }, { "epoch": 0.6097640534957052, "grad_norm": 2.596051457710791, "learning_rate": 3.4894522670343823e-06, "loss": 1.0264, "step": 16824 }, { "epoch": 0.6098002971983618, "grad_norm": 2.228738127096149, "learning_rate": 3.488892774373693e-06, "loss": 0.7956, "step": 16825 }, { "epoch": 0.6098365409010185, "grad_norm": 2.311838569174845, "learning_rate": 3.488333302535146e-06, "loss": 0.7812, "step": 16826 }, { "epoch": 0.6098727846036751, "grad_norm": 1.9228256045891805, "learning_rate": 3.487773851526454e-06, "loss": 0.8355, "step": 16827 }, { "epoch": 0.6099090283063318, "grad_norm": 2.5290619445826183, "learning_rate": 3.4872144213553226e-06, "loss": 0.9717, "step": 16828 }, { "epoch": 0.6099452720089884, "grad_norm": 2.4422105716649183, "learning_rate": 3.486655012029463e-06, "loss": 0.9144, "step": 16829 }, { "epoch": 0.609981515711645, "grad_norm": 2.1369535967468987, "learning_rate": 3.48609562355658e-06, "loss": 0.9509, "step": 16830 }, { "epoch": 0.6100177594143018, "grad_norm": 2.3232739000870226, "learning_rate": 3.4855362559443857e-06, "loss": 0.9917, "step": 16831 }, { "epoch": 0.6100540031169585, "grad_norm": 2.1414467679185694, "learning_rate": 3.484976909200585e-06, "loss": 0.9274, "step": 16832 }, { "epoch": 0.6100902468196151, "grad_norm": 2.3693441476323214, "learning_rate": 3.484417583332887e-06, "loss": 0.8963, "step": 16833 }, { "epoch": 0.6101264905222717, "grad_norm": 2.2754306083735134, "learning_rate": 3.4838582783489956e-06, "loss": 0.9287, "step": 16834 }, { "epoch": 0.6101627342249284, "grad_norm": 2.2714109115397982, "learning_rate": 3.483298994256622e-06, "loss": 0.8582, "step": 16835 }, { "epoch": 0.610198977927585, "grad_norm": 2.1551794551076875, "learning_rate": 3.4827397310634715e-06, "loss": 0.8817, "step": 16836 }, { "epoch": 0.6102352216302418, "grad_norm": 2.6851178094236916, "learning_rate": 3.48218048877725e-06, "loss": 0.9098, "step": 16837 }, { "epoch": 0.6102714653328984, "grad_norm": 2.267117295889706, "learning_rate": 3.4816212674056615e-06, "loss": 0.9101, "step": 16838 }, { "epoch": 0.6103077090355551, "grad_norm": 2.3579033113179597, "learning_rate": 3.4810620669564157e-06, "loss": 0.8382, "step": 16839 }, { "epoch": 0.6103439527382117, "grad_norm": 2.455868240984202, "learning_rate": 3.4805028874372155e-06, "loss": 0.8856, "step": 16840 }, { "epoch": 0.6103801964408684, "grad_norm": 2.316983818672663, "learning_rate": 3.4799437288557664e-06, "loss": 0.681, "step": 16841 }, { "epoch": 0.610416440143525, "grad_norm": 2.975964336530416, "learning_rate": 3.4793845912197714e-06, "loss": 1.0823, "step": 16842 }, { "epoch": 0.6104526838461817, "grad_norm": 2.247844025822631, "learning_rate": 3.47882547453694e-06, "loss": 0.9938, "step": 16843 }, { "epoch": 0.6104889275488384, "grad_norm": 2.505489610344065, "learning_rate": 3.4782663788149723e-06, "loss": 1.0305, "step": 16844 }, { "epoch": 0.6105251712514951, "grad_norm": 2.4426569898808497, "learning_rate": 3.4777073040615746e-06, "loss": 1.0146, "step": 16845 }, { "epoch": 0.6105614149541517, "grad_norm": 2.235312413374081, "learning_rate": 3.477148250284447e-06, "loss": 0.946, "step": 16846 }, { "epoch": 0.6105976586568084, "grad_norm": 2.5986408994083536, "learning_rate": 3.4765892174912974e-06, "loss": 0.9611, "step": 16847 }, { "epoch": 0.610633902359465, "grad_norm": 2.411872978363254, "learning_rate": 3.4760302056898275e-06, "loss": 0.774, "step": 16848 }, { "epoch": 0.6106701460621217, "grad_norm": 2.3503401016094445, "learning_rate": 3.4754712148877396e-06, "loss": 0.8494, "step": 16849 }, { "epoch": 0.6107063897647784, "grad_norm": 2.2873765423913555, "learning_rate": 3.474912245092735e-06, "loss": 0.9117, "step": 16850 }, { "epoch": 0.6107426334674351, "grad_norm": 2.531867958323893, "learning_rate": 3.474353296312519e-06, "loss": 0.8922, "step": 16851 }, { "epoch": 0.6107788771700917, "grad_norm": 2.277202577808251, "learning_rate": 3.4737943685547913e-06, "loss": 0.8294, "step": 16852 }, { "epoch": 0.6108151208727484, "grad_norm": 2.218973103786984, "learning_rate": 3.4732354618272546e-06, "loss": 1.078, "step": 16853 }, { "epoch": 0.610851364575405, "grad_norm": 2.3888530199316955, "learning_rate": 3.4726765761376072e-06, "loss": 0.7927, "step": 16854 }, { "epoch": 0.6108876082780617, "grad_norm": 2.3144778541762614, "learning_rate": 3.4721177114935557e-06, "loss": 0.9664, "step": 16855 }, { "epoch": 0.6109238519807183, "grad_norm": 2.3086830583980404, "learning_rate": 3.4715588679027977e-06, "loss": 0.9257, "step": 16856 }, { "epoch": 0.6109600956833751, "grad_norm": 2.3208982200989774, "learning_rate": 3.471000045373033e-06, "loss": 0.9879, "step": 16857 }, { "epoch": 0.6109963393860317, "grad_norm": 2.4244567076562578, "learning_rate": 3.470441243911964e-06, "loss": 0.8356, "step": 16858 }, { "epoch": 0.6110325830886884, "grad_norm": 2.244927671013982, "learning_rate": 3.46988246352729e-06, "loss": 0.8184, "step": 16859 }, { "epoch": 0.611068826791345, "grad_norm": 2.489893092029947, "learning_rate": 3.4693237042267105e-06, "loss": 0.7484, "step": 16860 }, { "epoch": 0.6111050704940016, "grad_norm": 2.423777598732141, "learning_rate": 3.4687649660179214e-06, "loss": 0.9416, "step": 16861 }, { "epoch": 0.6111413141966583, "grad_norm": 2.721233905345928, "learning_rate": 3.468206248908628e-06, "loss": 0.8267, "step": 16862 }, { "epoch": 0.611177557899315, "grad_norm": 2.565836358412338, "learning_rate": 3.4676475529065258e-06, "loss": 0.8465, "step": 16863 }, { "epoch": 0.6112138016019717, "grad_norm": 2.445528363830631, "learning_rate": 3.4670888780193135e-06, "loss": 0.9622, "step": 16864 }, { "epoch": 0.6112500453046283, "grad_norm": 2.4128627947648806, "learning_rate": 3.4665302242546877e-06, "loss": 0.9893, "step": 16865 }, { "epoch": 0.611286289007285, "grad_norm": 2.1396473884011433, "learning_rate": 3.4659715916203495e-06, "loss": 1.0307, "step": 16866 }, { "epoch": 0.6113225327099416, "grad_norm": 2.4903668724606076, "learning_rate": 3.4654129801239954e-06, "loss": 1.0918, "step": 16867 }, { "epoch": 0.6113587764125983, "grad_norm": 2.682367868248135, "learning_rate": 3.464854389773322e-06, "loss": 0.9398, "step": 16868 }, { "epoch": 0.6113950201152549, "grad_norm": 2.2756908748466373, "learning_rate": 3.464295820576027e-06, "loss": 0.97, "step": 16869 }, { "epoch": 0.6114312638179117, "grad_norm": 2.5094185098140014, "learning_rate": 3.4637372725398065e-06, "loss": 0.9792, "step": 16870 }, { "epoch": 0.6114675075205683, "grad_norm": 2.2860658337607735, "learning_rate": 3.4631787456723574e-06, "loss": 0.9906, "step": 16871 }, { "epoch": 0.611503751223225, "grad_norm": 2.610751671220979, "learning_rate": 3.4626202399813757e-06, "loss": 0.9475, "step": 16872 }, { "epoch": 0.6115399949258816, "grad_norm": 2.3993392745814424, "learning_rate": 3.4620617554745563e-06, "loss": 0.8854, "step": 16873 }, { "epoch": 0.6115762386285383, "grad_norm": 2.514957431707592, "learning_rate": 3.4615032921595966e-06, "loss": 1.2272, "step": 16874 }, { "epoch": 0.6116124823311949, "grad_norm": 2.0511596687751226, "learning_rate": 3.460944850044191e-06, "loss": 0.9221, "step": 16875 }, { "epoch": 0.6116487260338516, "grad_norm": 2.6934189007471105, "learning_rate": 3.4603864291360358e-06, "loss": 0.9642, "step": 16876 }, { "epoch": 0.6116849697365083, "grad_norm": 2.127684512413294, "learning_rate": 3.4598280294428223e-06, "loss": 0.9579, "step": 16877 }, { "epoch": 0.611721213439165, "grad_norm": 2.061146044692427, "learning_rate": 3.459269650972249e-06, "loss": 0.7845, "step": 16878 }, { "epoch": 0.6117574571418216, "grad_norm": 2.420237879109829, "learning_rate": 3.458711293732008e-06, "loss": 1.0294, "step": 16879 }, { "epoch": 0.6117937008444783, "grad_norm": 2.148662498477254, "learning_rate": 3.4581529577297923e-06, "loss": 0.8622, "step": 16880 }, { "epoch": 0.6118299445471349, "grad_norm": 1.9532373584791645, "learning_rate": 3.4575946429732946e-06, "loss": 0.7573, "step": 16881 }, { "epoch": 0.6118661882497916, "grad_norm": 2.706077810415041, "learning_rate": 3.4570363494702127e-06, "loss": 0.7441, "step": 16882 }, { "epoch": 0.6119024319524483, "grad_norm": 2.200364055401069, "learning_rate": 3.456478077228236e-06, "loss": 0.7862, "step": 16883 }, { "epoch": 0.611938675655105, "grad_norm": 2.3381487669177035, "learning_rate": 3.4559198262550585e-06, "loss": 1.0336, "step": 16884 }, { "epoch": 0.6119749193577616, "grad_norm": 2.5930035118294463, "learning_rate": 3.4553615965583696e-06, "loss": 0.9451, "step": 16885 }, { "epoch": 0.6120111630604183, "grad_norm": 2.3846423572329907, "learning_rate": 3.4548033881458654e-06, "loss": 0.703, "step": 16886 }, { "epoch": 0.6120474067630749, "grad_norm": 2.7424701835243908, "learning_rate": 3.4542452010252368e-06, "loss": 0.7312, "step": 16887 }, { "epoch": 0.6120836504657315, "grad_norm": 2.1214595127321463, "learning_rate": 3.453687035204174e-06, "loss": 0.8703, "step": 16888 }, { "epoch": 0.6121198941683882, "grad_norm": 2.5247311176716734, "learning_rate": 3.4531288906903683e-06, "loss": 0.9639, "step": 16889 }, { "epoch": 0.612156137871045, "grad_norm": 2.777861727740526, "learning_rate": 3.452570767491511e-06, "loss": 1.008, "step": 16890 }, { "epoch": 0.6121923815737016, "grad_norm": 2.5407915185486813, "learning_rate": 3.452012665615293e-06, "loss": 0.827, "step": 16891 }, { "epoch": 0.6122286252763582, "grad_norm": 2.205117711882551, "learning_rate": 3.451454585069405e-06, "loss": 0.9684, "step": 16892 }, { "epoch": 0.6122648689790149, "grad_norm": 2.2650168379302236, "learning_rate": 3.4508965258615336e-06, "loss": 1.0442, "step": 16893 }, { "epoch": 0.6123011126816715, "grad_norm": 2.2919441904443887, "learning_rate": 3.450338487999373e-06, "loss": 0.8469, "step": 16894 }, { "epoch": 0.6123373563843282, "grad_norm": 2.150087829408212, "learning_rate": 3.449780471490611e-06, "loss": 1.1025, "step": 16895 }, { "epoch": 0.6123736000869849, "grad_norm": 2.5565427182762357, "learning_rate": 3.4492224763429367e-06, "loss": 0.9283, "step": 16896 }, { "epoch": 0.6124098437896416, "grad_norm": 2.1618827462122443, "learning_rate": 3.4486645025640374e-06, "loss": 0.7871, "step": 16897 }, { "epoch": 0.6124460874922982, "grad_norm": 2.3121850583083337, "learning_rate": 3.4481065501616053e-06, "loss": 0.825, "step": 16898 }, { "epoch": 0.6124823311949549, "grad_norm": 2.482198288674242, "learning_rate": 3.447548619143325e-06, "loss": 0.8993, "step": 16899 }, { "epoch": 0.6125185748976115, "grad_norm": 2.1088330069389474, "learning_rate": 3.4469907095168865e-06, "loss": 0.7318, "step": 16900 }, { "epoch": 0.6125548186002682, "grad_norm": 2.2846502955806622, "learning_rate": 3.4464328212899746e-06, "loss": 0.7861, "step": 16901 }, { "epoch": 0.6125910623029248, "grad_norm": 2.746910020413552, "learning_rate": 3.4458749544702806e-06, "loss": 0.9411, "step": 16902 }, { "epoch": 0.6126273060055816, "grad_norm": 2.3069742445026282, "learning_rate": 3.44531710906549e-06, "loss": 1.0635, "step": 16903 }, { "epoch": 0.6126635497082382, "grad_norm": 2.177582656008422, "learning_rate": 3.44475928508329e-06, "loss": 0.8211, "step": 16904 }, { "epoch": 0.6126997934108949, "grad_norm": 2.114695377146217, "learning_rate": 3.4442014825313646e-06, "loss": 0.8444, "step": 16905 }, { "epoch": 0.6127360371135515, "grad_norm": 2.3794019756326463, "learning_rate": 3.443643701417403e-06, "loss": 0.9271, "step": 16906 }, { "epoch": 0.6127722808162082, "grad_norm": 2.2416350791508264, "learning_rate": 3.4430859417490904e-06, "loss": 0.9794, "step": 16907 }, { "epoch": 0.6128085245188648, "grad_norm": 2.4263781519495145, "learning_rate": 3.442528203534112e-06, "loss": 0.8193, "step": 16908 }, { "epoch": 0.6128447682215216, "grad_norm": 2.3431652867932873, "learning_rate": 3.4419704867801505e-06, "loss": 0.9702, "step": 16909 }, { "epoch": 0.6128810119241782, "grad_norm": 2.4773898121334432, "learning_rate": 3.4414127914948956e-06, "loss": 0.85, "step": 16910 }, { "epoch": 0.6129172556268349, "grad_norm": 2.029330966978938, "learning_rate": 3.4408551176860294e-06, "loss": 0.6985, "step": 16911 }, { "epoch": 0.6129534993294915, "grad_norm": 2.505487063965451, "learning_rate": 3.4402974653612375e-06, "loss": 0.9586, "step": 16912 }, { "epoch": 0.6129897430321481, "grad_norm": 2.1326872875747185, "learning_rate": 3.4397398345282006e-06, "loss": 0.773, "step": 16913 }, { "epoch": 0.6130259867348048, "grad_norm": 2.193717214334269, "learning_rate": 3.439182225194608e-06, "loss": 0.8543, "step": 16914 }, { "epoch": 0.6130622304374614, "grad_norm": 2.2546143418588374, "learning_rate": 3.43862463736814e-06, "loss": 0.8998, "step": 16915 }, { "epoch": 0.6130984741401182, "grad_norm": 2.3621214507118045, "learning_rate": 3.43806707105648e-06, "loss": 0.9257, "step": 16916 }, { "epoch": 0.6131347178427748, "grad_norm": 2.5458958193070287, "learning_rate": 3.43750952626731e-06, "loss": 0.9806, "step": 16917 }, { "epoch": 0.6131709615454315, "grad_norm": 1.9892069741634864, "learning_rate": 3.4369520030083153e-06, "loss": 0.6946, "step": 16918 }, { "epoch": 0.6132072052480881, "grad_norm": 2.452299448964467, "learning_rate": 3.4363945012871765e-06, "loss": 0.8663, "step": 16919 }, { "epoch": 0.6132434489507448, "grad_norm": 2.3414651341789536, "learning_rate": 3.4358370211115756e-06, "loss": 0.8778, "step": 16920 }, { "epoch": 0.6132796926534014, "grad_norm": 2.2450625631499306, "learning_rate": 3.4352795624891926e-06, "loss": 0.863, "step": 16921 }, { "epoch": 0.6133159363560582, "grad_norm": 2.8825051380432734, "learning_rate": 3.434722125427713e-06, "loss": 0.9338, "step": 16922 }, { "epoch": 0.6133521800587148, "grad_norm": 2.314834285790868, "learning_rate": 3.4341647099348163e-06, "loss": 0.8988, "step": 16923 }, { "epoch": 0.6133884237613715, "grad_norm": 2.268452443458365, "learning_rate": 3.4336073160181827e-06, "loss": 0.8489, "step": 16924 }, { "epoch": 0.6134246674640281, "grad_norm": 2.2242146206756366, "learning_rate": 3.4330499436854915e-06, "loss": 0.899, "step": 16925 }, { "epoch": 0.6134609111666848, "grad_norm": 2.476376037196897, "learning_rate": 3.432492592944425e-06, "loss": 1.164, "step": 16926 }, { "epoch": 0.6134971548693414, "grad_norm": 2.5708308056884737, "learning_rate": 3.431935263802664e-06, "loss": 0.8824, "step": 16927 }, { "epoch": 0.6135333985719981, "grad_norm": 2.3408503316487965, "learning_rate": 3.431377956267886e-06, "loss": 0.6958, "step": 16928 }, { "epoch": 0.6135696422746548, "grad_norm": 2.2936542932824824, "learning_rate": 3.4308206703477685e-06, "loss": 0.9132, "step": 16929 }, { "epoch": 0.6136058859773115, "grad_norm": 2.3591713414698465, "learning_rate": 3.430263406049996e-06, "loss": 0.8413, "step": 16930 }, { "epoch": 0.6136421296799681, "grad_norm": 2.354744062293956, "learning_rate": 3.4297061633822443e-06, "loss": 1.0476, "step": 16931 }, { "epoch": 0.6136783733826248, "grad_norm": 2.0380027228845443, "learning_rate": 3.429148942352192e-06, "loss": 0.6762, "step": 16932 }, { "epoch": 0.6137146170852814, "grad_norm": 2.1681132470969966, "learning_rate": 3.428591742967515e-06, "loss": 0.9439, "step": 16933 }, { "epoch": 0.6137508607879381, "grad_norm": 2.321046930044171, "learning_rate": 3.428034565235895e-06, "loss": 0.8819, "step": 16934 }, { "epoch": 0.6137871044905947, "grad_norm": 2.6677606183440608, "learning_rate": 3.4274774091650087e-06, "loss": 0.9803, "step": 16935 }, { "epoch": 0.6138233481932515, "grad_norm": 2.2877600994254434, "learning_rate": 3.426920274762532e-06, "loss": 0.8101, "step": 16936 }, { "epoch": 0.6138595918959081, "grad_norm": 2.565157224566799, "learning_rate": 3.426363162036143e-06, "loss": 0.9296, "step": 16937 }, { "epoch": 0.6138958355985648, "grad_norm": 2.2089291380443523, "learning_rate": 3.4258060709935176e-06, "loss": 0.7192, "step": 16938 }, { "epoch": 0.6139320793012214, "grad_norm": 2.6272548013209525, "learning_rate": 3.425249001642333e-06, "loss": 0.8665, "step": 16939 }, { "epoch": 0.613968323003878, "grad_norm": 2.5106847065380964, "learning_rate": 3.4246919539902623e-06, "loss": 0.8197, "step": 16940 }, { "epoch": 0.6140045667065347, "grad_norm": 2.496484767683765, "learning_rate": 3.4241349280449864e-06, "loss": 0.9315, "step": 16941 }, { "epoch": 0.6140408104091915, "grad_norm": 2.4526802367664318, "learning_rate": 3.4235779238141776e-06, "loss": 0.8402, "step": 16942 }, { "epoch": 0.6140770541118481, "grad_norm": 2.387740412762903, "learning_rate": 3.4230209413055114e-06, "loss": 0.8147, "step": 16943 }, { "epoch": 0.6141132978145047, "grad_norm": 2.3442724545140217, "learning_rate": 3.4224639805266623e-06, "loss": 0.9472, "step": 16944 }, { "epoch": 0.6141495415171614, "grad_norm": 2.373002263356682, "learning_rate": 3.4219070414853073e-06, "loss": 0.7251, "step": 16945 }, { "epoch": 0.614185785219818, "grad_norm": 2.603841794951008, "learning_rate": 3.4213501241891178e-06, "loss": 0.8297, "step": 16946 }, { "epoch": 0.6142220289224747, "grad_norm": 2.237381404991335, "learning_rate": 3.420793228645769e-06, "loss": 0.7258, "step": 16947 }, { "epoch": 0.6142582726251313, "grad_norm": 2.3507971875422093, "learning_rate": 3.4202363548629324e-06, "loss": 0.8847, "step": 16948 }, { "epoch": 0.6142945163277881, "grad_norm": 2.651576797736189, "learning_rate": 3.419679502848286e-06, "loss": 1.1058, "step": 16949 }, { "epoch": 0.6143307600304447, "grad_norm": 2.3719915963154206, "learning_rate": 3.4191226726094996e-06, "loss": 0.9241, "step": 16950 }, { "epoch": 0.6143670037331014, "grad_norm": 2.4636605523361843, "learning_rate": 3.4185658641542473e-06, "loss": 0.978, "step": 16951 }, { "epoch": 0.614403247435758, "grad_norm": 2.507662459839764, "learning_rate": 3.4180090774901986e-06, "loss": 0.9133, "step": 16952 }, { "epoch": 0.6144394911384147, "grad_norm": 2.367018477792697, "learning_rate": 3.4174523126250304e-06, "loss": 0.7577, "step": 16953 }, { "epoch": 0.6144757348410713, "grad_norm": 2.0020958313034347, "learning_rate": 3.416895569566412e-06, "loss": 0.8675, "step": 16954 }, { "epoch": 0.6145119785437281, "grad_norm": 2.5454944873923386, "learning_rate": 3.416338848322015e-06, "loss": 0.8733, "step": 16955 }, { "epoch": 0.6145482222463847, "grad_norm": 2.430422985037432, "learning_rate": 3.41578214889951e-06, "loss": 0.8811, "step": 16956 }, { "epoch": 0.6145844659490414, "grad_norm": 1.9461504149640807, "learning_rate": 3.41522547130657e-06, "loss": 0.6815, "step": 16957 }, { "epoch": 0.614620709651698, "grad_norm": 2.3749591712689133, "learning_rate": 3.4146688155508647e-06, "loss": 0.8812, "step": 16958 }, { "epoch": 0.6146569533543547, "grad_norm": 2.553986039651751, "learning_rate": 3.4141121816400635e-06, "loss": 0.9715, "step": 16959 }, { "epoch": 0.6146931970570113, "grad_norm": 2.203145157789802, "learning_rate": 3.413555569581836e-06, "loss": 0.8706, "step": 16960 }, { "epoch": 0.614729440759668, "grad_norm": 2.7951450803702866, "learning_rate": 3.412998979383856e-06, "loss": 0.9164, "step": 16961 }, { "epoch": 0.6147656844623247, "grad_norm": 2.351044591859183, "learning_rate": 3.412442411053789e-06, "loss": 0.7858, "step": 16962 }, { "epoch": 0.6148019281649814, "grad_norm": 2.4260855213609878, "learning_rate": 3.411885864599306e-06, "loss": 0.9549, "step": 16963 }, { "epoch": 0.614838171867638, "grad_norm": 2.429186791743204, "learning_rate": 3.4113293400280745e-06, "loss": 0.9555, "step": 16964 }, { "epoch": 0.6148744155702947, "grad_norm": 2.2558731557849607, "learning_rate": 3.4107728373477643e-06, "loss": 0.9742, "step": 16965 }, { "epoch": 0.6149106592729513, "grad_norm": 1.9862749584315877, "learning_rate": 3.410216356566044e-06, "loss": 0.7682, "step": 16966 }, { "epoch": 0.614946902975608, "grad_norm": 2.1948677281827305, "learning_rate": 3.4096598976905807e-06, "loss": 0.9362, "step": 16967 }, { "epoch": 0.6149831466782647, "grad_norm": 2.0343810547802077, "learning_rate": 3.4091034607290404e-06, "loss": 0.7334, "step": 16968 }, { "epoch": 0.6150193903809213, "grad_norm": 2.1287107148111444, "learning_rate": 3.4085470456890934e-06, "loss": 0.8112, "step": 16969 }, { "epoch": 0.615055634083578, "grad_norm": 2.249965755844865, "learning_rate": 3.407990652578407e-06, "loss": 1.0098, "step": 16970 }, { "epoch": 0.6150918777862346, "grad_norm": 2.215758919874063, "learning_rate": 3.4074342814046454e-06, "loss": 0.781, "step": 16971 }, { "epoch": 0.6151281214888913, "grad_norm": 2.219576499346115, "learning_rate": 3.4068779321754742e-06, "loss": 0.874, "step": 16972 }, { "epoch": 0.6151643651915479, "grad_norm": 2.754160523098716, "learning_rate": 3.4063216048985642e-06, "loss": 0.9895, "step": 16973 }, { "epoch": 0.6152006088942046, "grad_norm": 2.3972893345276125, "learning_rate": 3.4057652995815784e-06, "loss": 0.8314, "step": 16974 }, { "epoch": 0.6152368525968613, "grad_norm": 2.310238950321861, "learning_rate": 3.405209016232183e-06, "loss": 0.9579, "step": 16975 }, { "epoch": 0.615273096299518, "grad_norm": 2.4055199493697357, "learning_rate": 3.404652754858041e-06, "loss": 1.0033, "step": 16976 }, { "epoch": 0.6153093400021746, "grad_norm": 2.230970349164953, "learning_rate": 3.404096515466821e-06, "loss": 0.6493, "step": 16977 }, { "epoch": 0.6153455837048313, "grad_norm": 2.170862946729361, "learning_rate": 3.403540298066186e-06, "loss": 0.7799, "step": 16978 }, { "epoch": 0.6153818274074879, "grad_norm": 2.4899851206251857, "learning_rate": 3.402984102663799e-06, "loss": 0.9022, "step": 16979 }, { "epoch": 0.6154180711101446, "grad_norm": 2.2616009164845035, "learning_rate": 3.402427929267324e-06, "loss": 0.87, "step": 16980 }, { "epoch": 0.6154543148128013, "grad_norm": 2.233083273143513, "learning_rate": 3.4018717778844278e-06, "loss": 0.7831, "step": 16981 }, { "epoch": 0.615490558515458, "grad_norm": 2.659293701041123, "learning_rate": 3.4013156485227726e-06, "loss": 1.0442, "step": 16982 }, { "epoch": 0.6155268022181146, "grad_norm": 2.4957220921173917, "learning_rate": 3.40075954119002e-06, "loss": 0.8064, "step": 16983 }, { "epoch": 0.6155630459207713, "grad_norm": 2.5286285090633527, "learning_rate": 3.4002034558938336e-06, "loss": 0.9932, "step": 16984 }, { "epoch": 0.6155992896234279, "grad_norm": 2.23736587077818, "learning_rate": 3.399647392641876e-06, "loss": 0.8872, "step": 16985 }, { "epoch": 0.6156355333260846, "grad_norm": 2.2685641373782772, "learning_rate": 3.3990913514418105e-06, "loss": 0.7817, "step": 16986 }, { "epoch": 0.6156717770287412, "grad_norm": 2.5289784525549823, "learning_rate": 3.3985353323012977e-06, "loss": 1.0174, "step": 16987 }, { "epoch": 0.615708020731398, "grad_norm": 2.199312666110788, "learning_rate": 3.397979335227997e-06, "loss": 0.9946, "step": 16988 }, { "epoch": 0.6157442644340546, "grad_norm": 2.263901994106702, "learning_rate": 3.3974233602295746e-06, "loss": 0.8141, "step": 16989 }, { "epoch": 0.6157805081367113, "grad_norm": 2.338206139736835, "learning_rate": 3.3968674073136894e-06, "loss": 0.9066, "step": 16990 }, { "epoch": 0.6158167518393679, "grad_norm": 2.2438722769647437, "learning_rate": 3.396311476488001e-06, "loss": 0.7768, "step": 16991 }, { "epoch": 0.6158529955420246, "grad_norm": 2.440306228131613, "learning_rate": 3.39575556776017e-06, "loss": 0.9873, "step": 16992 }, { "epoch": 0.6158892392446812, "grad_norm": 2.4829200638154822, "learning_rate": 3.3951996811378586e-06, "loss": 1.007, "step": 16993 }, { "epoch": 0.615925482947338, "grad_norm": 2.3583070716075194, "learning_rate": 3.3946438166287244e-06, "loss": 0.9704, "step": 16994 }, { "epoch": 0.6159617266499946, "grad_norm": 2.270498159391406, "learning_rate": 3.3940879742404274e-06, "loss": 1.16, "step": 16995 }, { "epoch": 0.6159979703526512, "grad_norm": 2.2700277621085916, "learning_rate": 3.3935321539806247e-06, "loss": 0.9284, "step": 16996 }, { "epoch": 0.6160342140553079, "grad_norm": 2.29725839744046, "learning_rate": 3.39297635585698e-06, "loss": 0.9347, "step": 16997 }, { "epoch": 0.6160704577579645, "grad_norm": 2.1889119086398625, "learning_rate": 3.3924205798771493e-06, "loss": 0.8419, "step": 16998 }, { "epoch": 0.6161067014606212, "grad_norm": 2.345451351531401, "learning_rate": 3.39186482604879e-06, "loss": 0.8056, "step": 16999 }, { "epoch": 0.6161429451632778, "grad_norm": 2.07061757979484, "learning_rate": 3.391309094379559e-06, "loss": 0.8072, "step": 17000 }, { "epoch": 0.6161791888659346, "grad_norm": 2.0856557056831226, "learning_rate": 3.3907533848771184e-06, "loss": 0.8982, "step": 17001 }, { "epoch": 0.6162154325685912, "grad_norm": 2.3486589203222215, "learning_rate": 3.3901976975491223e-06, "loss": 0.9921, "step": 17002 }, { "epoch": 0.6162516762712479, "grad_norm": 2.414263114105131, "learning_rate": 3.3896420324032285e-06, "loss": 1.0573, "step": 17003 }, { "epoch": 0.6162879199739045, "grad_norm": 2.295419037901509, "learning_rate": 3.3890863894470927e-06, "loss": 0.9243, "step": 17004 }, { "epoch": 0.6163241636765612, "grad_norm": 2.6216444588078316, "learning_rate": 3.388530768688374e-06, "loss": 0.9376, "step": 17005 }, { "epoch": 0.6163604073792178, "grad_norm": 2.0799520909212856, "learning_rate": 3.387975170134725e-06, "loss": 0.7557, "step": 17006 }, { "epoch": 0.6163966510818745, "grad_norm": 2.3868704102559133, "learning_rate": 3.3874195937938047e-06, "loss": 0.8239, "step": 17007 }, { "epoch": 0.6164328947845312, "grad_norm": 2.418053171766413, "learning_rate": 3.3868640396732644e-06, "loss": 0.9245, "step": 17008 }, { "epoch": 0.6164691384871879, "grad_norm": 2.298447501107745, "learning_rate": 3.3863085077807646e-06, "loss": 0.8106, "step": 17009 }, { "epoch": 0.6165053821898445, "grad_norm": 2.2683879679426746, "learning_rate": 3.3857529981239567e-06, "loss": 0.7863, "step": 17010 }, { "epoch": 0.6165416258925012, "grad_norm": 2.1432998098133544, "learning_rate": 3.3851975107104975e-06, "loss": 0.8713, "step": 17011 }, { "epoch": 0.6165778695951578, "grad_norm": 2.164403724203484, "learning_rate": 3.3846420455480376e-06, "loss": 0.9005, "step": 17012 }, { "epoch": 0.6166141132978145, "grad_norm": 2.3789887847266065, "learning_rate": 3.3840866026442353e-06, "loss": 0.9565, "step": 17013 }, { "epoch": 0.6166503570004712, "grad_norm": 2.3781391146451547, "learning_rate": 3.3835311820067417e-06, "loss": 0.9856, "step": 17014 }, { "epoch": 0.6166866007031279, "grad_norm": 2.3771323043733354, "learning_rate": 3.382975783643212e-06, "loss": 0.7898, "step": 17015 }, { "epoch": 0.6167228444057845, "grad_norm": 2.227143282954694, "learning_rate": 3.382420407561295e-06, "loss": 0.9345, "step": 17016 }, { "epoch": 0.6167590881084412, "grad_norm": 2.423516864262887, "learning_rate": 3.381865053768649e-06, "loss": 1.0264, "step": 17017 }, { "epoch": 0.6167953318110978, "grad_norm": 2.07126604100457, "learning_rate": 3.381309722272923e-06, "loss": 0.8232, "step": 17018 }, { "epoch": 0.6168315755137544, "grad_norm": 2.5400331470753796, "learning_rate": 3.380754413081769e-06, "loss": 0.8796, "step": 17019 }, { "epoch": 0.6168678192164111, "grad_norm": 2.329829300811757, "learning_rate": 3.380199126202842e-06, "loss": 1.1038, "step": 17020 }, { "epoch": 0.6169040629190679, "grad_norm": 2.4849190944326955, "learning_rate": 3.379643861643791e-06, "loss": 0.8351, "step": 17021 }, { "epoch": 0.6169403066217245, "grad_norm": 2.399174220758032, "learning_rate": 3.3790886194122673e-06, "loss": 0.9119, "step": 17022 }, { "epoch": 0.6169765503243811, "grad_norm": 2.28945704132252, "learning_rate": 3.378533399515922e-06, "loss": 0.7501, "step": 17023 }, { "epoch": 0.6170127940270378, "grad_norm": 2.356278536184586, "learning_rate": 3.377978201962406e-06, "loss": 0.9625, "step": 17024 }, { "epoch": 0.6170490377296944, "grad_norm": 2.2329980681653723, "learning_rate": 3.37742302675937e-06, "loss": 0.9172, "step": 17025 }, { "epoch": 0.6170852814323511, "grad_norm": 2.242248060008987, "learning_rate": 3.376867873914463e-06, "loss": 0.9773, "step": 17026 }, { "epoch": 0.6171215251350078, "grad_norm": 2.191817109030417, "learning_rate": 3.3763127434353338e-06, "loss": 0.7502, "step": 17027 }, { "epoch": 0.6171577688376645, "grad_norm": 2.520137606986154, "learning_rate": 3.375757635329634e-06, "loss": 0.949, "step": 17028 }, { "epoch": 0.6171940125403211, "grad_norm": 2.3011648257408632, "learning_rate": 3.375202549605012e-06, "loss": 1.0786, "step": 17029 }, { "epoch": 0.6172302562429778, "grad_norm": 2.286320769761547, "learning_rate": 3.374647486269117e-06, "loss": 0.8084, "step": 17030 }, { "epoch": 0.6172664999456344, "grad_norm": 2.3248922427762713, "learning_rate": 3.374092445329594e-06, "loss": 0.7097, "step": 17031 }, { "epoch": 0.6173027436482911, "grad_norm": 2.446640517423707, "learning_rate": 3.3735374267940964e-06, "loss": 0.9456, "step": 17032 }, { "epoch": 0.6173389873509477, "grad_norm": 2.410706686812464, "learning_rate": 3.3729824306702683e-06, "loss": 0.7526, "step": 17033 }, { "epoch": 0.6173752310536045, "grad_norm": 2.230705957547679, "learning_rate": 3.3724274569657596e-06, "loss": 0.8546, "step": 17034 }, { "epoch": 0.6174114747562611, "grad_norm": 2.1482909141280153, "learning_rate": 3.371872505688213e-06, "loss": 0.8, "step": 17035 }, { "epoch": 0.6174477184589178, "grad_norm": 2.2788636391818686, "learning_rate": 3.371317576845281e-06, "loss": 0.8901, "step": 17036 }, { "epoch": 0.6174839621615744, "grad_norm": 2.6715998453252054, "learning_rate": 3.3707626704446083e-06, "loss": 1.0293, "step": 17037 }, { "epoch": 0.6175202058642311, "grad_norm": 2.385909035777957, "learning_rate": 3.3702077864938397e-06, "loss": 0.8085, "step": 17038 }, { "epoch": 0.6175564495668877, "grad_norm": 2.378664864035323, "learning_rate": 3.3696529250006204e-06, "loss": 0.9486, "step": 17039 }, { "epoch": 0.6175926932695445, "grad_norm": 2.653048193452208, "learning_rate": 3.3690980859726007e-06, "loss": 0.8838, "step": 17040 }, { "epoch": 0.6176289369722011, "grad_norm": 2.2374886966020435, "learning_rate": 3.3685432694174227e-06, "loss": 0.9956, "step": 17041 }, { "epoch": 0.6176651806748578, "grad_norm": 2.185805382208557, "learning_rate": 3.3679884753427307e-06, "loss": 0.9378, "step": 17042 }, { "epoch": 0.6177014243775144, "grad_norm": 2.2209259376181985, "learning_rate": 3.3674337037561706e-06, "loss": 0.9236, "step": 17043 }, { "epoch": 0.617737668080171, "grad_norm": 2.6326075562360582, "learning_rate": 3.3668789546653867e-06, "loss": 1.0606, "step": 17044 }, { "epoch": 0.6177739117828277, "grad_norm": 2.5354725055291025, "learning_rate": 3.366324228078024e-06, "loss": 0.7046, "step": 17045 }, { "epoch": 0.6178101554854843, "grad_norm": 2.240011277351278, "learning_rate": 3.365769524001725e-06, "loss": 0.975, "step": 17046 }, { "epoch": 0.6178463991881411, "grad_norm": 2.2195188815148055, "learning_rate": 3.3652148424441322e-06, "loss": 0.9509, "step": 17047 }, { "epoch": 0.6178826428907978, "grad_norm": 2.2394386650141067, "learning_rate": 3.3646601834128924e-06, "loss": 1.0936, "step": 17048 }, { "epoch": 0.6179188865934544, "grad_norm": 2.3106712012031667, "learning_rate": 3.3641055469156453e-06, "loss": 0.9046, "step": 17049 }, { "epoch": 0.617955130296111, "grad_norm": 2.269080131857792, "learning_rate": 3.3635509329600345e-06, "loss": 0.8369, "step": 17050 }, { "epoch": 0.6179913739987677, "grad_norm": 2.4328324410790083, "learning_rate": 3.3629963415537016e-06, "loss": 0.9603, "step": 17051 }, { "epoch": 0.6180276177014243, "grad_norm": 2.251630311296193, "learning_rate": 3.3624417727042902e-06, "loss": 0.9097, "step": 17052 }, { "epoch": 0.6180638614040811, "grad_norm": 2.4252038837252865, "learning_rate": 3.36188722641944e-06, "loss": 0.9763, "step": 17053 }, { "epoch": 0.6181001051067377, "grad_norm": 2.3165063716422214, "learning_rate": 3.3613327027067937e-06, "loss": 0.8993, "step": 17054 }, { "epoch": 0.6181363488093944, "grad_norm": 2.383833917422857, "learning_rate": 3.36077820157399e-06, "loss": 0.9726, "step": 17055 }, { "epoch": 0.618172592512051, "grad_norm": 2.3944926484231757, "learning_rate": 3.3602237230286727e-06, "loss": 0.9372, "step": 17056 }, { "epoch": 0.6182088362147077, "grad_norm": 2.4080736717675086, "learning_rate": 3.359669267078481e-06, "loss": 0.8081, "step": 17057 }, { "epoch": 0.6182450799173643, "grad_norm": 2.3441968618618625, "learning_rate": 3.359114833731055e-06, "loss": 0.8546, "step": 17058 }, { "epoch": 0.618281323620021, "grad_norm": 2.3500109796032076, "learning_rate": 3.358560422994032e-06, "loss": 0.7623, "step": 17059 }, { "epoch": 0.6183175673226777, "grad_norm": 2.4411174106681437, "learning_rate": 3.3580060348750553e-06, "loss": 1.0434, "step": 17060 }, { "epoch": 0.6183538110253344, "grad_norm": 2.3521909196043267, "learning_rate": 3.3574516693817628e-06, "loss": 0.8174, "step": 17061 }, { "epoch": 0.618390054727991, "grad_norm": 2.243272193231771, "learning_rate": 3.356897326521793e-06, "loss": 1.0412, "step": 17062 }, { "epoch": 0.6184262984306477, "grad_norm": 2.2966482807760293, "learning_rate": 3.3563430063027837e-06, "loss": 0.9476, "step": 17063 }, { "epoch": 0.6184625421333043, "grad_norm": 2.2900915973556235, "learning_rate": 3.3557887087323737e-06, "loss": 0.8179, "step": 17064 }, { "epoch": 0.618498785835961, "grad_norm": 2.1658770512681293, "learning_rate": 3.355234433818202e-06, "loss": 0.8565, "step": 17065 }, { "epoch": 0.6185350295386177, "grad_norm": 2.5408308873214547, "learning_rate": 3.354680181567904e-06, "loss": 0.8633, "step": 17066 }, { "epoch": 0.6185712732412744, "grad_norm": 2.382788039726727, "learning_rate": 3.354125951989117e-06, "loss": 1.013, "step": 17067 }, { "epoch": 0.618607516943931, "grad_norm": 2.1769415294383987, "learning_rate": 3.35357174508948e-06, "loss": 1.051, "step": 17068 }, { "epoch": 0.6186437606465877, "grad_norm": 2.1818141558464803, "learning_rate": 3.3530175608766296e-06, "loss": 0.8886, "step": 17069 }, { "epoch": 0.6186800043492443, "grad_norm": 2.4468482574988344, "learning_rate": 3.3524633993582005e-06, "loss": 0.9805, "step": 17070 }, { "epoch": 0.618716248051901, "grad_norm": 2.5161909841124506, "learning_rate": 3.351909260541829e-06, "loss": 0.9453, "step": 17071 }, { "epoch": 0.6187524917545576, "grad_norm": 2.3057718034485464, "learning_rate": 3.3513551444351515e-06, "loss": 0.85, "step": 17072 }, { "epoch": 0.6187887354572144, "grad_norm": 2.0088003912041836, "learning_rate": 3.350801051045803e-06, "loss": 0.775, "step": 17073 }, { "epoch": 0.618824979159871, "grad_norm": 2.5583145532997436, "learning_rate": 3.3502469803814197e-06, "loss": 0.7622, "step": 17074 }, { "epoch": 0.6188612228625276, "grad_norm": 2.5069020357052856, "learning_rate": 3.349692932449632e-06, "loss": 0.8714, "step": 17075 }, { "epoch": 0.6188974665651843, "grad_norm": 2.543164951992781, "learning_rate": 3.3491389072580804e-06, "loss": 0.9474, "step": 17076 }, { "epoch": 0.6189337102678409, "grad_norm": 2.3539628711824347, "learning_rate": 3.348584904814396e-06, "loss": 0.8148, "step": 17077 }, { "epoch": 0.6189699539704976, "grad_norm": 2.129082560906937, "learning_rate": 3.3480309251262124e-06, "loss": 0.8356, "step": 17078 }, { "epoch": 0.6190061976731542, "grad_norm": 2.257984454789684, "learning_rate": 3.347476968201163e-06, "loss": 1.0235, "step": 17079 }, { "epoch": 0.619042441375811, "grad_norm": 2.6671559129720186, "learning_rate": 3.346923034046883e-06, "loss": 0.9597, "step": 17080 }, { "epoch": 0.6190786850784676, "grad_norm": 2.579528658871065, "learning_rate": 3.346369122671003e-06, "loss": 0.8733, "step": 17081 }, { "epoch": 0.6191149287811243, "grad_norm": 2.3054566076704317, "learning_rate": 3.3458152340811566e-06, "loss": 0.9157, "step": 17082 }, { "epoch": 0.6191511724837809, "grad_norm": 2.358999748239071, "learning_rate": 3.3452613682849745e-06, "loss": 1.0279, "step": 17083 }, { "epoch": 0.6191874161864376, "grad_norm": 2.4044890158809875, "learning_rate": 3.3447075252900914e-06, "loss": 0.8527, "step": 17084 }, { "epoch": 0.6192236598890942, "grad_norm": 2.366548825919114, "learning_rate": 3.3441537051041374e-06, "loss": 0.9721, "step": 17085 }, { "epoch": 0.619259903591751, "grad_norm": 2.650279006755296, "learning_rate": 3.343599907734744e-06, "loss": 0.9872, "step": 17086 }, { "epoch": 0.6192961472944076, "grad_norm": 2.2420282344081266, "learning_rate": 3.34304613318954e-06, "loss": 0.9476, "step": 17087 }, { "epoch": 0.6193323909970643, "grad_norm": 2.0102621015268753, "learning_rate": 3.3424923814761612e-06, "loss": 0.6947, "step": 17088 }, { "epoch": 0.6193686346997209, "grad_norm": 2.2482435360266044, "learning_rate": 3.3419386526022342e-06, "loss": 0.7198, "step": 17089 }, { "epoch": 0.6194048784023776, "grad_norm": 2.825664682102849, "learning_rate": 3.34138494657539e-06, "loss": 1.0447, "step": 17090 }, { "epoch": 0.6194411221050342, "grad_norm": 2.3657097513483176, "learning_rate": 3.340831263403257e-06, "loss": 0.9337, "step": 17091 }, { "epoch": 0.6194773658076909, "grad_norm": 2.1561730492688977, "learning_rate": 3.3402776030934665e-06, "loss": 0.8477, "step": 17092 }, { "epoch": 0.6195136095103476, "grad_norm": 2.446091256102863, "learning_rate": 3.3397239656536473e-06, "loss": 0.9319, "step": 17093 }, { "epoch": 0.6195498532130043, "grad_norm": 2.6835977972938685, "learning_rate": 3.3391703510914273e-06, "loss": 0.9355, "step": 17094 }, { "epoch": 0.6195860969156609, "grad_norm": 2.194695627252635, "learning_rate": 3.3386167594144338e-06, "loss": 0.8628, "step": 17095 }, { "epoch": 0.6196223406183176, "grad_norm": 2.256690324981989, "learning_rate": 3.3380631906302983e-06, "loss": 0.9881, "step": 17096 }, { "epoch": 0.6196585843209742, "grad_norm": 2.397450358109679, "learning_rate": 3.337509644746647e-06, "loss": 0.9429, "step": 17097 }, { "epoch": 0.6196948280236309, "grad_norm": 2.0431015586289907, "learning_rate": 3.3369561217711066e-06, "loss": 0.9694, "step": 17098 }, { "epoch": 0.6197310717262876, "grad_norm": 2.1149023086553878, "learning_rate": 3.3364026217113053e-06, "loss": 0.7542, "step": 17099 }, { "epoch": 0.6197673154289443, "grad_norm": 2.375121726399192, "learning_rate": 3.335849144574871e-06, "loss": 0.9068, "step": 17100 }, { "epoch": 0.6198035591316009, "grad_norm": 2.475821060092377, "learning_rate": 3.3352956903694282e-06, "loss": 0.923, "step": 17101 }, { "epoch": 0.6198398028342575, "grad_norm": 2.2814579997189592, "learning_rate": 3.3347422591026025e-06, "loss": 0.8545, "step": 17102 }, { "epoch": 0.6198760465369142, "grad_norm": 2.1904485460450887, "learning_rate": 3.334188850782023e-06, "loss": 0.8753, "step": 17103 }, { "epoch": 0.6199122902395708, "grad_norm": 2.5725551954697394, "learning_rate": 3.3336354654153138e-06, "loss": 1.2237, "step": 17104 }, { "epoch": 0.6199485339422275, "grad_norm": 2.4828761730547857, "learning_rate": 3.3330821030101003e-06, "loss": 0.7867, "step": 17105 }, { "epoch": 0.6199847776448842, "grad_norm": 2.2418641092487523, "learning_rate": 3.3325287635740043e-06, "loss": 1.0074, "step": 17106 }, { "epoch": 0.6200210213475409, "grad_norm": 2.4339909700752558, "learning_rate": 3.3319754471146565e-06, "loss": 0.7157, "step": 17107 }, { "epoch": 0.6200572650501975, "grad_norm": 2.1169971977865445, "learning_rate": 3.3314221536396778e-06, "loss": 0.9157, "step": 17108 }, { "epoch": 0.6200935087528542, "grad_norm": 2.1315446951897425, "learning_rate": 3.3308688831566927e-06, "loss": 0.9674, "step": 17109 }, { "epoch": 0.6201297524555108, "grad_norm": 2.3460020461526385, "learning_rate": 3.3303156356733245e-06, "loss": 0.7122, "step": 17110 }, { "epoch": 0.6201659961581675, "grad_norm": 2.3886183082251433, "learning_rate": 3.3297624111971973e-06, "loss": 0.7955, "step": 17111 }, { "epoch": 0.6202022398608242, "grad_norm": 2.4871767001678573, "learning_rate": 3.3292092097359345e-06, "loss": 1.0463, "step": 17112 }, { "epoch": 0.6202384835634809, "grad_norm": 2.2062884017920794, "learning_rate": 3.3286560312971585e-06, "loss": 0.7645, "step": 17113 }, { "epoch": 0.6202747272661375, "grad_norm": 2.434645665935876, "learning_rate": 3.328102875888489e-06, "loss": 0.9561, "step": 17114 }, { "epoch": 0.6203109709687942, "grad_norm": 2.4935233487589237, "learning_rate": 3.327549743517553e-06, "loss": 0.9337, "step": 17115 }, { "epoch": 0.6203472146714508, "grad_norm": 2.44813629633864, "learning_rate": 3.3269966341919702e-06, "loss": 1.013, "step": 17116 }, { "epoch": 0.6203834583741075, "grad_norm": 2.0587839870566986, "learning_rate": 3.3264435479193612e-06, "loss": 0.6875, "step": 17117 }, { "epoch": 0.6204197020767641, "grad_norm": 2.1482762968279707, "learning_rate": 3.325890484707348e-06, "loss": 0.7851, "step": 17118 }, { "epoch": 0.6204559457794209, "grad_norm": 2.260874528870783, "learning_rate": 3.3253374445635522e-06, "loss": 0.939, "step": 17119 }, { "epoch": 0.6204921894820775, "grad_norm": 2.5882505521679326, "learning_rate": 3.3247844274955933e-06, "loss": 0.9526, "step": 17120 }, { "epoch": 0.6205284331847342, "grad_norm": 2.506532068139261, "learning_rate": 3.324231433511092e-06, "loss": 0.9149, "step": 17121 }, { "epoch": 0.6205646768873908, "grad_norm": 2.336770496381874, "learning_rate": 3.3236784626176662e-06, "loss": 0.8484, "step": 17122 }, { "epoch": 0.6206009205900475, "grad_norm": 2.3800406900713256, "learning_rate": 3.323125514822939e-06, "loss": 0.8314, "step": 17123 }, { "epoch": 0.6206371642927041, "grad_norm": 2.2261135019469105, "learning_rate": 3.3225725901345284e-06, "loss": 0.9064, "step": 17124 }, { "epoch": 0.6206734079953609, "grad_norm": 2.56186600162604, "learning_rate": 3.322019688560053e-06, "loss": 1.0554, "step": 17125 }, { "epoch": 0.6207096516980175, "grad_norm": 2.4722294500804294, "learning_rate": 3.321466810107129e-06, "loss": 0.8478, "step": 17126 }, { "epoch": 0.6207458954006742, "grad_norm": 2.48786441023734, "learning_rate": 3.3209139547833804e-06, "loss": 0.6941, "step": 17127 }, { "epoch": 0.6207821391033308, "grad_norm": 2.3494885286897604, "learning_rate": 3.3203611225964207e-06, "loss": 0.7689, "step": 17128 }, { "epoch": 0.6208183828059874, "grad_norm": 2.40474738939344, "learning_rate": 3.3198083135538693e-06, "loss": 1.0758, "step": 17129 }, { "epoch": 0.6208546265086441, "grad_norm": 2.684118513995743, "learning_rate": 3.3192555276633427e-06, "loss": 0.9399, "step": 17130 }, { "epoch": 0.6208908702113007, "grad_norm": 2.3675901776508343, "learning_rate": 3.3187027649324587e-06, "loss": 0.7453, "step": 17131 }, { "epoch": 0.6209271139139575, "grad_norm": 2.979776885279515, "learning_rate": 3.3181500253688347e-06, "loss": 0.9374, "step": 17132 }, { "epoch": 0.6209633576166141, "grad_norm": 2.5101571454848255, "learning_rate": 3.3175973089800855e-06, "loss": 0.9398, "step": 17133 }, { "epoch": 0.6209996013192708, "grad_norm": 2.55529450475402, "learning_rate": 3.3170446157738263e-06, "loss": 0.9815, "step": 17134 }, { "epoch": 0.6210358450219274, "grad_norm": 2.6681640390364927, "learning_rate": 3.316491945757676e-06, "loss": 1.0952, "step": 17135 }, { "epoch": 0.6210720887245841, "grad_norm": 2.4692578629592283, "learning_rate": 3.3159392989392493e-06, "loss": 0.7847, "step": 17136 }, { "epoch": 0.6211083324272407, "grad_norm": 2.302144160913794, "learning_rate": 3.31538667532616e-06, "loss": 0.7929, "step": 17137 }, { "epoch": 0.6211445761298975, "grad_norm": 2.0718446225136233, "learning_rate": 3.3148340749260223e-06, "loss": 0.8651, "step": 17138 }, { "epoch": 0.6211808198325541, "grad_norm": 2.4490681241281864, "learning_rate": 3.314281497746453e-06, "loss": 0.8261, "step": 17139 }, { "epoch": 0.6212170635352108, "grad_norm": 2.342344868172896, "learning_rate": 3.313728943795066e-06, "loss": 0.8372, "step": 17140 }, { "epoch": 0.6212533072378674, "grad_norm": 2.154630560004717, "learning_rate": 3.313176413079473e-06, "loss": 0.8579, "step": 17141 }, { "epoch": 0.6212895509405241, "grad_norm": 2.351060367717346, "learning_rate": 3.312623905607288e-06, "loss": 0.9435, "step": 17142 }, { "epoch": 0.6213257946431807, "grad_norm": 2.4642499194228593, "learning_rate": 3.312071421386126e-06, "loss": 0.8287, "step": 17143 }, { "epoch": 0.6213620383458374, "grad_norm": 2.0641325397923986, "learning_rate": 3.3115189604235996e-06, "loss": 1.0581, "step": 17144 }, { "epoch": 0.6213982820484941, "grad_norm": 2.5256131663504733, "learning_rate": 3.3109665227273214e-06, "loss": 1.0109, "step": 17145 }, { "epoch": 0.6214345257511508, "grad_norm": 2.33561000244954, "learning_rate": 3.3104141083049e-06, "loss": 0.9737, "step": 17146 }, { "epoch": 0.6214707694538074, "grad_norm": 2.350741433638079, "learning_rate": 3.309861717163953e-06, "loss": 1.0555, "step": 17147 }, { "epoch": 0.6215070131564641, "grad_norm": 3.008988014828686, "learning_rate": 3.3093093493120894e-06, "loss": 0.9207, "step": 17148 }, { "epoch": 0.6215432568591207, "grad_norm": 2.135673852866328, "learning_rate": 3.30875700475692e-06, "loss": 0.8965, "step": 17149 }, { "epoch": 0.6215795005617774, "grad_norm": 2.14341245924846, "learning_rate": 3.3082046835060557e-06, "loss": 0.7638, "step": 17150 }, { "epoch": 0.621615744264434, "grad_norm": 2.64122596735398, "learning_rate": 3.307652385567108e-06, "loss": 0.904, "step": 17151 }, { "epoch": 0.6216519879670908, "grad_norm": 2.2657898922903326, "learning_rate": 3.3071001109476874e-06, "loss": 0.8362, "step": 17152 }, { "epoch": 0.6216882316697474, "grad_norm": 2.1166161863738258, "learning_rate": 3.3065478596554033e-06, "loss": 0.818, "step": 17153 }, { "epoch": 0.621724475372404, "grad_norm": 2.5004665463473956, "learning_rate": 3.305995631697864e-06, "loss": 0.9802, "step": 17154 }, { "epoch": 0.6217607190750607, "grad_norm": 2.074596471625639, "learning_rate": 3.3054434270826817e-06, "loss": 0.8648, "step": 17155 }, { "epoch": 0.6217969627777173, "grad_norm": 2.373763297786492, "learning_rate": 3.304891245817465e-06, "loss": 0.835, "step": 17156 }, { "epoch": 0.621833206480374, "grad_norm": 2.509114244313587, "learning_rate": 3.304339087909821e-06, "loss": 0.8671, "step": 17157 }, { "epoch": 0.6218694501830307, "grad_norm": 2.1317376857339543, "learning_rate": 3.3037869533673582e-06, "loss": 1.0599, "step": 17158 }, { "epoch": 0.6219056938856874, "grad_norm": 2.565960879226873, "learning_rate": 3.303234842197686e-06, "loss": 0.9272, "step": 17159 }, { "epoch": 0.621941937588344, "grad_norm": 2.21200788991318, "learning_rate": 3.302682754408412e-06, "loss": 0.9322, "step": 17160 }, { "epoch": 0.6219781812910007, "grad_norm": 2.665034012597721, "learning_rate": 3.3021306900071425e-06, "loss": 0.8605, "step": 17161 }, { "epoch": 0.6220144249936573, "grad_norm": 2.3601334871409754, "learning_rate": 3.301578649001484e-06, "loss": 0.8561, "step": 17162 }, { "epoch": 0.622050668696314, "grad_norm": 2.356019522696697, "learning_rate": 3.3010266313990457e-06, "loss": 0.8055, "step": 17163 }, { "epoch": 0.6220869123989706, "grad_norm": 2.37781641113427, "learning_rate": 3.3004746372074335e-06, "loss": 1.0497, "step": 17164 }, { "epoch": 0.6221231561016274, "grad_norm": 2.2715415950039475, "learning_rate": 3.2999226664342533e-06, "loss": 0.8501, "step": 17165 }, { "epoch": 0.622159399804284, "grad_norm": 2.326421537184341, "learning_rate": 3.299370719087109e-06, "loss": 1.0236, "step": 17166 }, { "epoch": 0.6221956435069407, "grad_norm": 2.5204639220279366, "learning_rate": 3.298818795173609e-06, "loss": 0.8428, "step": 17167 }, { "epoch": 0.6222318872095973, "grad_norm": 2.2545161563435934, "learning_rate": 3.298266894701357e-06, "loss": 1.0266, "step": 17168 }, { "epoch": 0.622268130912254, "grad_norm": 2.3542133267697953, "learning_rate": 3.297715017677957e-06, "loss": 0.8842, "step": 17169 }, { "epoch": 0.6223043746149106, "grad_norm": 2.2513965577696897, "learning_rate": 3.297163164111015e-06, "loss": 0.9382, "step": 17170 }, { "epoch": 0.6223406183175674, "grad_norm": 2.4449965084784493, "learning_rate": 3.2966113340081346e-06, "loss": 0.864, "step": 17171 }, { "epoch": 0.622376862020224, "grad_norm": 2.35629373997938, "learning_rate": 3.2960595273769207e-06, "loss": 0.8039, "step": 17172 }, { "epoch": 0.6224131057228807, "grad_norm": 2.0585787551301364, "learning_rate": 3.295507744224976e-06, "loss": 0.7428, "step": 17173 }, { "epoch": 0.6224493494255373, "grad_norm": 1.9192863071858597, "learning_rate": 3.2949559845599013e-06, "loss": 0.9338, "step": 17174 }, { "epoch": 0.622485593128194, "grad_norm": 2.240719003933485, "learning_rate": 3.2944042483893047e-06, "loss": 0.9228, "step": 17175 }, { "epoch": 0.6225218368308506, "grad_norm": 2.6448914101260925, "learning_rate": 3.293852535720785e-06, "loss": 1.0616, "step": 17176 }, { "epoch": 0.6225580805335073, "grad_norm": 2.1407128717538075, "learning_rate": 3.2933008465619464e-06, "loss": 0.7714, "step": 17177 }, { "epoch": 0.622594324236164, "grad_norm": 2.3664479295949374, "learning_rate": 3.292749180920389e-06, "loss": 0.8933, "step": 17178 }, { "epoch": 0.6226305679388207, "grad_norm": 2.2661943534936237, "learning_rate": 3.2921975388037154e-06, "loss": 0.8949, "step": 17179 }, { "epoch": 0.6226668116414773, "grad_norm": 2.261119654773536, "learning_rate": 3.2916459202195277e-06, "loss": 0.8254, "step": 17180 }, { "epoch": 0.622703055344134, "grad_norm": 2.259096256052466, "learning_rate": 3.2910943251754237e-06, "loss": 0.9051, "step": 17181 }, { "epoch": 0.6227392990467906, "grad_norm": 2.261405285641079, "learning_rate": 3.290542753679009e-06, "loss": 0.792, "step": 17182 }, { "epoch": 0.6227755427494472, "grad_norm": 2.6401059466381795, "learning_rate": 3.28999120573788e-06, "loss": 0.9657, "step": 17183 }, { "epoch": 0.622811786452104, "grad_norm": 2.3877316945627185, "learning_rate": 3.2894396813596397e-06, "loss": 0.9974, "step": 17184 }, { "epoch": 0.6228480301547606, "grad_norm": 2.0601679778677378, "learning_rate": 3.288888180551884e-06, "loss": 0.7763, "step": 17185 }, { "epoch": 0.6228842738574173, "grad_norm": 2.1118704220266356, "learning_rate": 3.2883367033222158e-06, "loss": 0.8468, "step": 17186 }, { "epoch": 0.6229205175600739, "grad_norm": 2.4780737196165292, "learning_rate": 3.2877852496782327e-06, "loss": 0.7582, "step": 17187 }, { "epoch": 0.6229567612627306, "grad_norm": 2.452977241860487, "learning_rate": 3.287233819627532e-06, "loss": 0.7862, "step": 17188 }, { "epoch": 0.6229930049653872, "grad_norm": 2.358058978869124, "learning_rate": 3.2866824131777133e-06, "loss": 1.0239, "step": 17189 }, { "epoch": 0.6230292486680439, "grad_norm": 2.3406829387392816, "learning_rate": 3.2861310303363757e-06, "loss": 0.8655, "step": 17190 }, { "epoch": 0.6230654923707006, "grad_norm": 2.416480822934838, "learning_rate": 3.285579671111116e-06, "loss": 1.0546, "step": 17191 }, { "epoch": 0.6231017360733573, "grad_norm": 2.754634531207439, "learning_rate": 3.285028335509532e-06, "loss": 0.8466, "step": 17192 }, { "epoch": 0.6231379797760139, "grad_norm": 2.2806813481551886, "learning_rate": 3.2844770235392176e-06, "loss": 0.977, "step": 17193 }, { "epoch": 0.6231742234786706, "grad_norm": 2.1784726376027574, "learning_rate": 3.2839257352077754e-06, "loss": 0.8944, "step": 17194 }, { "epoch": 0.6232104671813272, "grad_norm": 2.655800058234615, "learning_rate": 3.2833744705227977e-06, "loss": 0.9284, "step": 17195 }, { "epoch": 0.6232467108839839, "grad_norm": 2.1219612916188493, "learning_rate": 3.2828232294918826e-06, "loss": 0.7731, "step": 17196 }, { "epoch": 0.6232829545866406, "grad_norm": 2.638052062098909, "learning_rate": 3.2822720121226233e-06, "loss": 0.8528, "step": 17197 }, { "epoch": 0.6233191982892973, "grad_norm": 2.429308462817148, "learning_rate": 3.281720818422618e-06, "loss": 0.8567, "step": 17198 }, { "epoch": 0.6233554419919539, "grad_norm": 2.2158854259994265, "learning_rate": 3.2811696483994604e-06, "loss": 0.9066, "step": 17199 }, { "epoch": 0.6233916856946106, "grad_norm": 2.5820605675935426, "learning_rate": 3.2806185020607458e-06, "loss": 1.0189, "step": 17200 }, { "epoch": 0.6234279293972672, "grad_norm": 2.3721100622567435, "learning_rate": 3.280067379414066e-06, "loss": 0.8949, "step": 17201 }, { "epoch": 0.6234641730999239, "grad_norm": 2.34296378926096, "learning_rate": 3.27951628046702e-06, "loss": 0.8029, "step": 17202 }, { "epoch": 0.6235004168025805, "grad_norm": 2.0844425243345572, "learning_rate": 3.2789652052271985e-06, "loss": 0.8161, "step": 17203 }, { "epoch": 0.6235366605052373, "grad_norm": 2.425822019537851, "learning_rate": 3.2784141537021965e-06, "loss": 0.8881, "step": 17204 }, { "epoch": 0.6235729042078939, "grad_norm": 2.3197397251733776, "learning_rate": 3.277863125899605e-06, "loss": 0.7179, "step": 17205 }, { "epoch": 0.6236091479105506, "grad_norm": 2.2921789003075492, "learning_rate": 3.2773121218270186e-06, "loss": 1.0236, "step": 17206 }, { "epoch": 0.6236453916132072, "grad_norm": 2.1884147970270535, "learning_rate": 3.2767611414920297e-06, "loss": 0.7495, "step": 17207 }, { "epoch": 0.6236816353158638, "grad_norm": 2.0541342519423083, "learning_rate": 3.27621018490223e-06, "loss": 0.5944, "step": 17208 }, { "epoch": 0.6237178790185205, "grad_norm": 2.4705668070643134, "learning_rate": 3.27565925206521e-06, "loss": 0.8124, "step": 17209 }, { "epoch": 0.6237541227211771, "grad_norm": 2.253018991271698, "learning_rate": 3.2751083429885634e-06, "loss": 0.8696, "step": 17210 }, { "epoch": 0.6237903664238339, "grad_norm": 2.4824313880910798, "learning_rate": 3.2745574576798817e-06, "loss": 0.8549, "step": 17211 }, { "epoch": 0.6238266101264905, "grad_norm": 2.304947498915079, "learning_rate": 3.2740065961467545e-06, "loss": 0.8465, "step": 17212 }, { "epoch": 0.6238628538291472, "grad_norm": 2.3894984715877094, "learning_rate": 3.27345575839677e-06, "loss": 1.0254, "step": 17213 }, { "epoch": 0.6238990975318038, "grad_norm": 2.443539123972075, "learning_rate": 3.2729049444375236e-06, "loss": 0.9068, "step": 17214 }, { "epoch": 0.6239353412344605, "grad_norm": 2.231032025308378, "learning_rate": 3.2723541542766026e-06, "loss": 0.7225, "step": 17215 }, { "epoch": 0.6239715849371171, "grad_norm": 2.600350413431251, "learning_rate": 3.271803387921596e-06, "loss": 0.9283, "step": 17216 }, { "epoch": 0.6240078286397739, "grad_norm": 2.3814381470226027, "learning_rate": 3.271252645380093e-06, "loss": 0.9371, "step": 17217 }, { "epoch": 0.6240440723424305, "grad_norm": 2.1612604334911967, "learning_rate": 3.270701926659684e-06, "loss": 0.7698, "step": 17218 }, { "epoch": 0.6240803160450872, "grad_norm": 2.0049355136449587, "learning_rate": 3.2701512317679563e-06, "loss": 0.8255, "step": 17219 }, { "epoch": 0.6241165597477438, "grad_norm": 2.4823194296620845, "learning_rate": 3.2696005607124986e-06, "loss": 0.7983, "step": 17220 }, { "epoch": 0.6241528034504005, "grad_norm": 2.309617432210762, "learning_rate": 3.269049913500897e-06, "loss": 0.7138, "step": 17221 }, { "epoch": 0.6241890471530571, "grad_norm": 2.3411489950488504, "learning_rate": 3.268499290140742e-06, "loss": 0.7847, "step": 17222 }, { "epoch": 0.6242252908557138, "grad_norm": 2.40560724959477, "learning_rate": 3.2679486906396203e-06, "loss": 0.9754, "step": 17223 }, { "epoch": 0.6242615345583705, "grad_norm": 2.048111429486846, "learning_rate": 3.2673981150051177e-06, "loss": 0.7372, "step": 17224 }, { "epoch": 0.6242977782610272, "grad_norm": 2.3525889288684043, "learning_rate": 3.2668475632448205e-06, "loss": 0.886, "step": 17225 }, { "epoch": 0.6243340219636838, "grad_norm": 2.440775602338377, "learning_rate": 3.2662970353663167e-06, "loss": 0.9525, "step": 17226 }, { "epoch": 0.6243702656663405, "grad_norm": 2.440887252063939, "learning_rate": 3.265746531377191e-06, "loss": 0.9985, "step": 17227 }, { "epoch": 0.6244065093689971, "grad_norm": 2.4029827969872213, "learning_rate": 3.265196051285029e-06, "loss": 1.0657, "step": 17228 }, { "epoch": 0.6244427530716538, "grad_norm": 2.226142562040202, "learning_rate": 3.2646455950974147e-06, "loss": 0.8837, "step": 17229 }, { "epoch": 0.6244789967743105, "grad_norm": 2.086822928821488, "learning_rate": 3.2640951628219365e-06, "loss": 0.7745, "step": 17230 }, { "epoch": 0.6245152404769672, "grad_norm": 2.244454737174037, "learning_rate": 3.2635447544661774e-06, "loss": 1.1472, "step": 17231 }, { "epoch": 0.6245514841796238, "grad_norm": 2.2854713149441777, "learning_rate": 3.262994370037721e-06, "loss": 1.0011, "step": 17232 }, { "epoch": 0.6245877278822805, "grad_norm": 2.256115104261189, "learning_rate": 3.2624440095441502e-06, "loss": 0.6908, "step": 17233 }, { "epoch": 0.6246239715849371, "grad_norm": 2.109941747351337, "learning_rate": 3.2618936729930517e-06, "loss": 0.731, "step": 17234 }, { "epoch": 0.6246602152875937, "grad_norm": 2.1838471231047833, "learning_rate": 3.261343360392007e-06, "loss": 0.9627, "step": 17235 }, { "epoch": 0.6246964589902504, "grad_norm": 2.086435130460794, "learning_rate": 3.260793071748599e-06, "loss": 0.8765, "step": 17236 }, { "epoch": 0.6247327026929071, "grad_norm": 2.5813430420863845, "learning_rate": 3.2602428070704106e-06, "loss": 0.8045, "step": 17237 }, { "epoch": 0.6247689463955638, "grad_norm": 2.269727382192793, "learning_rate": 3.259692566365025e-06, "loss": 0.959, "step": 17238 }, { "epoch": 0.6248051900982204, "grad_norm": 2.21829577350276, "learning_rate": 3.2591423496400232e-06, "loss": 0.7798, "step": 17239 }, { "epoch": 0.6248414338008771, "grad_norm": 2.220002548371888, "learning_rate": 3.2585921569029866e-06, "loss": 0.8481, "step": 17240 }, { "epoch": 0.6248776775035337, "grad_norm": 2.2599527202624525, "learning_rate": 3.258041988161495e-06, "loss": 0.71, "step": 17241 }, { "epoch": 0.6249139212061904, "grad_norm": 2.264107669395256, "learning_rate": 3.2574918434231335e-06, "loss": 0.7548, "step": 17242 }, { "epoch": 0.6249501649088471, "grad_norm": 2.3579223675676566, "learning_rate": 3.25694172269548e-06, "loss": 0.8506, "step": 17243 }, { "epoch": 0.6249864086115038, "grad_norm": 2.1971484384366264, "learning_rate": 3.256391625986116e-06, "loss": 0.9571, "step": 17244 }, { "epoch": 0.6250226523141604, "grad_norm": 2.289869367739048, "learning_rate": 3.25584155330262e-06, "loss": 0.9557, "step": 17245 }, { "epoch": 0.6250588960168171, "grad_norm": 2.635260418033272, "learning_rate": 3.2552915046525733e-06, "loss": 0.9411, "step": 17246 }, { "epoch": 0.6250951397194737, "grad_norm": 2.5487923981957694, "learning_rate": 3.2547414800435545e-06, "loss": 0.8912, "step": 17247 }, { "epoch": 0.6251313834221304, "grad_norm": 2.1700378014963717, "learning_rate": 3.254191479483143e-06, "loss": 0.8015, "step": 17248 }, { "epoch": 0.625167627124787, "grad_norm": 2.530619378563973, "learning_rate": 3.2536415029789148e-06, "loss": 0.9972, "step": 17249 }, { "epoch": 0.6252038708274438, "grad_norm": 2.2304379444726696, "learning_rate": 3.253091550538452e-06, "loss": 0.9117, "step": 17250 }, { "epoch": 0.6252401145301004, "grad_norm": 2.053215346848975, "learning_rate": 3.252541622169332e-06, "loss": 0.8411, "step": 17251 }, { "epoch": 0.6252763582327571, "grad_norm": 2.0486879672843186, "learning_rate": 3.251991717879131e-06, "loss": 0.8872, "step": 17252 }, { "epoch": 0.6253126019354137, "grad_norm": 2.151536080354808, "learning_rate": 3.2514418376754266e-06, "loss": 0.5734, "step": 17253 }, { "epoch": 0.6253488456380704, "grad_norm": 2.3086272209673875, "learning_rate": 3.2508919815657965e-06, "loss": 0.9603, "step": 17254 }, { "epoch": 0.625385089340727, "grad_norm": 2.1411177463547744, "learning_rate": 3.2503421495578156e-06, "loss": 0.9524, "step": 17255 }, { "epoch": 0.6254213330433838, "grad_norm": 5.711745654034966, "learning_rate": 3.249792341659064e-06, "loss": 0.897, "step": 17256 }, { "epoch": 0.6254575767460404, "grad_norm": 2.3732098832473496, "learning_rate": 3.2492425578771135e-06, "loss": 0.9729, "step": 17257 }, { "epoch": 0.6254938204486971, "grad_norm": 2.178837277469745, "learning_rate": 3.2486927982195436e-06, "loss": 0.8891, "step": 17258 }, { "epoch": 0.6255300641513537, "grad_norm": 2.439916601328393, "learning_rate": 3.248143062693927e-06, "loss": 0.8023, "step": 17259 }, { "epoch": 0.6255663078540104, "grad_norm": 1.9894336712453102, "learning_rate": 3.2475933513078378e-06, "loss": 0.8919, "step": 17260 }, { "epoch": 0.625602551556667, "grad_norm": 2.5624687753316446, "learning_rate": 3.247043664068854e-06, "loss": 0.8541, "step": 17261 }, { "epoch": 0.6256387952593236, "grad_norm": 2.1668206378846353, "learning_rate": 3.246494000984549e-06, "loss": 0.8412, "step": 17262 }, { "epoch": 0.6256750389619804, "grad_norm": 2.4428113799423046, "learning_rate": 3.2459443620624965e-06, "loss": 0.914, "step": 17263 }, { "epoch": 0.625711282664637, "grad_norm": 1.994332090144774, "learning_rate": 3.2453947473102677e-06, "loss": 0.6312, "step": 17264 }, { "epoch": 0.6257475263672937, "grad_norm": 2.6102279640799964, "learning_rate": 3.24484515673544e-06, "loss": 0.9441, "step": 17265 }, { "epoch": 0.6257837700699503, "grad_norm": 2.333760877926985, "learning_rate": 3.2442955903455843e-06, "loss": 0.7621, "step": 17266 }, { "epoch": 0.625820013772607, "grad_norm": 2.3372898102533135, "learning_rate": 3.2437460481482737e-06, "loss": 1.0154, "step": 17267 }, { "epoch": 0.6258562574752636, "grad_norm": 2.133750877250612, "learning_rate": 3.243196530151078e-06, "loss": 0.8038, "step": 17268 }, { "epoch": 0.6258925011779204, "grad_norm": 2.164676009856352, "learning_rate": 3.242647036361574e-06, "loss": 0.8265, "step": 17269 }, { "epoch": 0.625928744880577, "grad_norm": 2.2403078432892887, "learning_rate": 3.2420975667873312e-06, "loss": 0.6838, "step": 17270 }, { "epoch": 0.6259649885832337, "grad_norm": 2.124461758046989, "learning_rate": 3.24154812143592e-06, "loss": 0.7224, "step": 17271 }, { "epoch": 0.6260012322858903, "grad_norm": 2.141547343843817, "learning_rate": 3.240998700314912e-06, "loss": 0.7371, "step": 17272 }, { "epoch": 0.626037475988547, "grad_norm": 2.216424558121796, "learning_rate": 3.2404493034318785e-06, "loss": 0.7935, "step": 17273 }, { "epoch": 0.6260737196912036, "grad_norm": 2.051843861661044, "learning_rate": 3.2398999307943903e-06, "loss": 0.9135, "step": 17274 }, { "epoch": 0.6261099633938603, "grad_norm": 2.4248706364099917, "learning_rate": 3.2393505824100156e-06, "loss": 1.0064, "step": 17275 }, { "epoch": 0.626146207096517, "grad_norm": 2.395955514379702, "learning_rate": 3.2388012582863237e-06, "loss": 0.7602, "step": 17276 }, { "epoch": 0.6261824507991737, "grad_norm": 2.4145203078532904, "learning_rate": 3.238251958430887e-06, "loss": 0.9859, "step": 17277 }, { "epoch": 0.6262186945018303, "grad_norm": 2.013226414607799, "learning_rate": 3.237702682851273e-06, "loss": 0.6525, "step": 17278 }, { "epoch": 0.626254938204487, "grad_norm": 2.5141058526836546, "learning_rate": 3.23715343155505e-06, "loss": 0.9694, "step": 17279 }, { "epoch": 0.6262911819071436, "grad_norm": 2.196247067226887, "learning_rate": 3.2366042045497846e-06, "loss": 0.9516, "step": 17280 }, { "epoch": 0.6263274256098003, "grad_norm": 2.166526711645174, "learning_rate": 3.2360550018430493e-06, "loss": 0.7347, "step": 17281 }, { "epoch": 0.6263636693124569, "grad_norm": 2.1451865808913433, "learning_rate": 3.2355058234424094e-06, "loss": 0.884, "step": 17282 }, { "epoch": 0.6263999130151137, "grad_norm": 2.9002373501198617, "learning_rate": 3.2349566693554313e-06, "loss": 0.8568, "step": 17283 }, { "epoch": 0.6264361567177703, "grad_norm": 2.53897803260765, "learning_rate": 3.234407539589682e-06, "loss": 1.051, "step": 17284 }, { "epoch": 0.626472400420427, "grad_norm": 2.3823548304086315, "learning_rate": 3.23385843415273e-06, "loss": 0.8048, "step": 17285 }, { "epoch": 0.6265086441230836, "grad_norm": 2.942864635766076, "learning_rate": 3.2333093530521408e-06, "loss": 0.917, "step": 17286 }, { "epoch": 0.6265448878257402, "grad_norm": 2.3739433278594344, "learning_rate": 3.232760296295481e-06, "loss": 1.0238, "step": 17287 }, { "epoch": 0.6265811315283969, "grad_norm": 2.334794833538265, "learning_rate": 3.2322112638903126e-06, "loss": 0.8941, "step": 17288 }, { "epoch": 0.6266173752310537, "grad_norm": 2.565042513549733, "learning_rate": 3.231662255844206e-06, "loss": 1.0321, "step": 17289 }, { "epoch": 0.6266536189337103, "grad_norm": 2.306222678978412, "learning_rate": 3.2311132721647243e-06, "loss": 0.9211, "step": 17290 }, { "epoch": 0.626689862636367, "grad_norm": 2.4707637889654657, "learning_rate": 3.2305643128594316e-06, "loss": 0.9444, "step": 17291 }, { "epoch": 0.6267261063390236, "grad_norm": 2.5807943504939357, "learning_rate": 3.2300153779358915e-06, "loss": 0.9293, "step": 17292 }, { "epoch": 0.6267623500416802, "grad_norm": 2.1619375732149813, "learning_rate": 3.229466467401671e-06, "loss": 0.8076, "step": 17293 }, { "epoch": 0.6267985937443369, "grad_norm": 2.2572393824386086, "learning_rate": 3.2289175812643304e-06, "loss": 0.761, "step": 17294 }, { "epoch": 0.6268348374469935, "grad_norm": 2.453803217780278, "learning_rate": 3.2283687195314353e-06, "loss": 0.8046, "step": 17295 }, { "epoch": 0.6268710811496503, "grad_norm": 2.2418660800490517, "learning_rate": 3.2278198822105454e-06, "loss": 0.9555, "step": 17296 }, { "epoch": 0.6269073248523069, "grad_norm": 2.5816517401106314, "learning_rate": 3.227271069309228e-06, "loss": 0.9287, "step": 17297 }, { "epoch": 0.6269435685549636, "grad_norm": 2.508084518635653, "learning_rate": 3.226722280835043e-06, "loss": 0.8882, "step": 17298 }, { "epoch": 0.6269798122576202, "grad_norm": 2.5376527445710773, "learning_rate": 3.2261735167955526e-06, "loss": 0.908, "step": 17299 }, { "epoch": 0.6270160559602769, "grad_norm": 2.2873738746282735, "learning_rate": 3.2256247771983163e-06, "loss": 0.9227, "step": 17300 }, { "epoch": 0.6270522996629335, "grad_norm": 2.279383548031356, "learning_rate": 3.2250760620509e-06, "loss": 0.669, "step": 17301 }, { "epoch": 0.6270885433655903, "grad_norm": 2.6870139916312787, "learning_rate": 3.2245273713608615e-06, "loss": 0.8818, "step": 17302 }, { "epoch": 0.6271247870682469, "grad_norm": 2.3186297948890493, "learning_rate": 3.2239787051357617e-06, "loss": 0.7693, "step": 17303 }, { "epoch": 0.6271610307709036, "grad_norm": 2.239826710708121, "learning_rate": 3.223430063383161e-06, "loss": 0.7243, "step": 17304 }, { "epoch": 0.6271972744735602, "grad_norm": 2.057420274536045, "learning_rate": 3.2228814461106207e-06, "loss": 0.8393, "step": 17305 }, { "epoch": 0.6272335181762169, "grad_norm": 2.308295161457319, "learning_rate": 3.2223328533256987e-06, "loss": 0.7737, "step": 17306 }, { "epoch": 0.6272697618788735, "grad_norm": 2.0559333504992376, "learning_rate": 3.2217842850359547e-06, "loss": 0.7207, "step": 17307 }, { "epoch": 0.6273060055815302, "grad_norm": 2.1406535234981154, "learning_rate": 3.2212357412489465e-06, "loss": 0.9719, "step": 17308 }, { "epoch": 0.6273422492841869, "grad_norm": 2.405769373843801, "learning_rate": 3.220687221972236e-06, "loss": 1.0088, "step": 17309 }, { "epoch": 0.6273784929868436, "grad_norm": 2.3353307908844223, "learning_rate": 3.220138727213379e-06, "loss": 0.9275, "step": 17310 }, { "epoch": 0.6274147366895002, "grad_norm": 2.442072417160933, "learning_rate": 3.2195902569799344e-06, "loss": 0.831, "step": 17311 }, { "epoch": 0.6274509803921569, "grad_norm": 2.146291758919483, "learning_rate": 3.2190418112794575e-06, "loss": 0.8671, "step": 17312 }, { "epoch": 0.6274872240948135, "grad_norm": 2.0375021810170457, "learning_rate": 3.2184933901195093e-06, "loss": 0.7126, "step": 17313 }, { "epoch": 0.6275234677974701, "grad_norm": 2.1711504051750254, "learning_rate": 3.2179449935076444e-06, "loss": 0.6761, "step": 17314 }, { "epoch": 0.6275597115001269, "grad_norm": 2.4954915477028696, "learning_rate": 3.2173966214514197e-06, "loss": 0.9931, "step": 17315 }, { "epoch": 0.6275959552027836, "grad_norm": 2.502870456323934, "learning_rate": 3.2168482739583893e-06, "loss": 0.8374, "step": 17316 }, { "epoch": 0.6276321989054402, "grad_norm": 2.17541934290911, "learning_rate": 3.2162999510361136e-06, "loss": 0.9165, "step": 17317 }, { "epoch": 0.6276684426080968, "grad_norm": 2.480330193879153, "learning_rate": 3.2157516526921455e-06, "loss": 0.9777, "step": 17318 }, { "epoch": 0.6277046863107535, "grad_norm": 2.34053668428069, "learning_rate": 3.215203378934041e-06, "loss": 0.8182, "step": 17319 }, { "epoch": 0.6277409300134101, "grad_norm": 2.399647409001669, "learning_rate": 3.2146551297693516e-06, "loss": 1.1557, "step": 17320 }, { "epoch": 0.6277771737160668, "grad_norm": 2.215118027845, "learning_rate": 3.214106905205638e-06, "loss": 0.8209, "step": 17321 }, { "epoch": 0.6278134174187235, "grad_norm": 2.538944716756991, "learning_rate": 3.2135587052504503e-06, "loss": 1.0405, "step": 17322 }, { "epoch": 0.6278496611213802, "grad_norm": 2.820767292515601, "learning_rate": 3.2130105299113435e-06, "loss": 0.9741, "step": 17323 }, { "epoch": 0.6278859048240368, "grad_norm": 2.2754322683332195, "learning_rate": 3.21246237919587e-06, "loss": 0.976, "step": 17324 }, { "epoch": 0.6279221485266935, "grad_norm": 2.3474377367222505, "learning_rate": 3.2119142531115848e-06, "loss": 1.1209, "step": 17325 }, { "epoch": 0.6279583922293501, "grad_norm": 2.1990624656615414, "learning_rate": 3.21136615166604e-06, "loss": 0.8842, "step": 17326 }, { "epoch": 0.6279946359320068, "grad_norm": 2.28207530768911, "learning_rate": 3.2108180748667874e-06, "loss": 0.8341, "step": 17327 }, { "epoch": 0.6280308796346635, "grad_norm": 2.5815234451032487, "learning_rate": 3.210270022721378e-06, "loss": 0.9489, "step": 17328 }, { "epoch": 0.6280671233373202, "grad_norm": 2.2235871399709475, "learning_rate": 3.2097219952373672e-06, "loss": 0.798, "step": 17329 }, { "epoch": 0.6281033670399768, "grad_norm": 2.6380957459321275, "learning_rate": 3.209173992422304e-06, "loss": 0.9964, "step": 17330 }, { "epoch": 0.6281396107426335, "grad_norm": 2.5034275683482616, "learning_rate": 3.208626014283741e-06, "loss": 0.8829, "step": 17331 }, { "epoch": 0.6281758544452901, "grad_norm": 2.035077331472578, "learning_rate": 3.2080780608292273e-06, "loss": 0.7836, "step": 17332 }, { "epoch": 0.6282120981479468, "grad_norm": 2.374334079892819, "learning_rate": 3.2075301320663145e-06, "loss": 0.7835, "step": 17333 }, { "epoch": 0.6282483418506034, "grad_norm": 2.3932938427611066, "learning_rate": 3.2069822280025532e-06, "loss": 0.8294, "step": 17334 }, { "epoch": 0.6282845855532602, "grad_norm": 2.186895064138594, "learning_rate": 3.206434348645491e-06, "loss": 0.813, "step": 17335 }, { "epoch": 0.6283208292559168, "grad_norm": 2.297933652328723, "learning_rate": 3.205886494002678e-06, "loss": 0.9206, "step": 17336 }, { "epoch": 0.6283570729585735, "grad_norm": 2.520225845906213, "learning_rate": 3.205338664081666e-06, "loss": 1.0532, "step": 17337 }, { "epoch": 0.6283933166612301, "grad_norm": 2.3666596462807563, "learning_rate": 3.204790858890001e-06, "loss": 0.937, "step": 17338 }, { "epoch": 0.6284295603638868, "grad_norm": 2.2311360688744375, "learning_rate": 3.204243078435233e-06, "loss": 0.9442, "step": 17339 }, { "epoch": 0.6284658040665434, "grad_norm": 2.158662062955187, "learning_rate": 3.203695322724908e-06, "loss": 0.8518, "step": 17340 }, { "epoch": 0.6285020477692002, "grad_norm": 2.5214122784666975, "learning_rate": 3.2031475917665757e-06, "loss": 0.8907, "step": 17341 }, { "epoch": 0.6285382914718568, "grad_norm": 2.4964492073369082, "learning_rate": 3.202599885567783e-06, "loss": 0.8494, "step": 17342 }, { "epoch": 0.6285745351745134, "grad_norm": 2.463939348303971, "learning_rate": 3.2020522041360763e-06, "loss": 0.9579, "step": 17343 }, { "epoch": 0.6286107788771701, "grad_norm": 2.146956833352436, "learning_rate": 3.201504547479004e-06, "loss": 0.7201, "step": 17344 }, { "epoch": 0.6286470225798267, "grad_norm": 2.2076035005487538, "learning_rate": 3.200956915604111e-06, "loss": 0.8507, "step": 17345 }, { "epoch": 0.6286832662824834, "grad_norm": 2.3751421924089997, "learning_rate": 3.2004093085189434e-06, "loss": 0.8578, "step": 17346 }, { "epoch": 0.62871950998514, "grad_norm": 2.4900717769247684, "learning_rate": 3.199861726231046e-06, "loss": 1.0446, "step": 17347 }, { "epoch": 0.6287557536877968, "grad_norm": 2.3345786679562335, "learning_rate": 3.1993141687479667e-06, "loss": 0.8172, "step": 17348 }, { "epoch": 0.6287919973904534, "grad_norm": 2.5449457270008966, "learning_rate": 3.1987666360772497e-06, "loss": 0.9177, "step": 17349 }, { "epoch": 0.6288282410931101, "grad_norm": 2.3397094581731364, "learning_rate": 3.1982191282264387e-06, "loss": 0.9029, "step": 17350 }, { "epoch": 0.6288644847957667, "grad_norm": 2.326727949656188, "learning_rate": 3.1976716452030777e-06, "loss": 0.9473, "step": 17351 }, { "epoch": 0.6289007284984234, "grad_norm": 2.4683902234052617, "learning_rate": 3.1971241870147117e-06, "loss": 0.9142, "step": 17352 }, { "epoch": 0.62893697220108, "grad_norm": 2.292477721476125, "learning_rate": 3.1965767536688853e-06, "loss": 0.8588, "step": 17353 }, { "epoch": 0.6289732159037367, "grad_norm": 2.432921145723465, "learning_rate": 3.1960293451731395e-06, "loss": 1.0763, "step": 17354 }, { "epoch": 0.6290094596063934, "grad_norm": 2.5251351495531087, "learning_rate": 3.1954819615350163e-06, "loss": 0.8144, "step": 17355 }, { "epoch": 0.6290457033090501, "grad_norm": 2.632826322412592, "learning_rate": 3.1949346027620633e-06, "loss": 0.938, "step": 17356 }, { "epoch": 0.6290819470117067, "grad_norm": 2.4063599958919744, "learning_rate": 3.194387268861819e-06, "loss": 0.9308, "step": 17357 }, { "epoch": 0.6291181907143634, "grad_norm": 2.285265488912371, "learning_rate": 3.1938399598418263e-06, "loss": 1.0292, "step": 17358 }, { "epoch": 0.62915443441702, "grad_norm": 2.6021688589449434, "learning_rate": 3.1932926757096253e-06, "loss": 0.916, "step": 17359 }, { "epoch": 0.6291906781196767, "grad_norm": 2.357370999253483, "learning_rate": 3.19274541647276e-06, "loss": 0.8998, "step": 17360 }, { "epoch": 0.6292269218223334, "grad_norm": 2.3648366879340266, "learning_rate": 3.19219818213877e-06, "loss": 1.0849, "step": 17361 }, { "epoch": 0.6292631655249901, "grad_norm": 2.538119504339872, "learning_rate": 3.1916509727151956e-06, "loss": 0.8658, "step": 17362 }, { "epoch": 0.6292994092276467, "grad_norm": 2.387223181660203, "learning_rate": 3.191103788209575e-06, "loss": 1.0115, "step": 17363 }, { "epoch": 0.6293356529303034, "grad_norm": 2.4370474109582383, "learning_rate": 3.190556628629452e-06, "loss": 1.0288, "step": 17364 }, { "epoch": 0.62937189663296, "grad_norm": 2.3761010526582855, "learning_rate": 3.1900094939823646e-06, "loss": 1.0231, "step": 17365 }, { "epoch": 0.6294081403356167, "grad_norm": 2.545732972599533, "learning_rate": 3.189462384275852e-06, "loss": 1.0441, "step": 17366 }, { "epoch": 0.6294443840382733, "grad_norm": 2.427608785498383, "learning_rate": 3.18891529951745e-06, "loss": 1.0121, "step": 17367 }, { "epoch": 0.62948062774093, "grad_norm": 2.212400350240335, "learning_rate": 3.1883682397147026e-06, "loss": 0.9143, "step": 17368 }, { "epoch": 0.6295168714435867, "grad_norm": 2.3663006772754196, "learning_rate": 3.187821204875145e-06, "loss": 0.838, "step": 17369 }, { "epoch": 0.6295531151462433, "grad_norm": 2.2088778092389814, "learning_rate": 3.187274195006315e-06, "loss": 0.7376, "step": 17370 }, { "epoch": 0.6295893588489, "grad_norm": 2.6714217183742006, "learning_rate": 3.1867272101157497e-06, "loss": 0.8811, "step": 17371 }, { "epoch": 0.6296256025515566, "grad_norm": 2.3892464248844423, "learning_rate": 3.1861802502109872e-06, "loss": 0.9023, "step": 17372 }, { "epoch": 0.6296618462542133, "grad_norm": 2.359094618635325, "learning_rate": 3.1856333152995645e-06, "loss": 0.7746, "step": 17373 }, { "epoch": 0.62969808995687, "grad_norm": 2.3873833191757345, "learning_rate": 3.185086405389017e-06, "loss": 1.0598, "step": 17374 }, { "epoch": 0.6297343336595267, "grad_norm": 2.453753710217214, "learning_rate": 3.18453952048688e-06, "loss": 1.0195, "step": 17375 }, { "epoch": 0.6297705773621833, "grad_norm": 2.457267146830859, "learning_rate": 3.183992660600692e-06, "loss": 0.9536, "step": 17376 }, { "epoch": 0.62980682106484, "grad_norm": 2.952341546449207, "learning_rate": 3.1834458257379863e-06, "loss": 1.027, "step": 17377 }, { "epoch": 0.6298430647674966, "grad_norm": 2.5327309201399517, "learning_rate": 3.182899015906299e-06, "loss": 0.8645, "step": 17378 }, { "epoch": 0.6298793084701533, "grad_norm": 2.4042985793616327, "learning_rate": 3.1823522311131634e-06, "loss": 0.8941, "step": 17379 }, { "epoch": 0.6299155521728099, "grad_norm": 2.302395543018667, "learning_rate": 3.181805471366116e-06, "loss": 0.7602, "step": 17380 }, { "epoch": 0.6299517958754667, "grad_norm": 2.192906742657607, "learning_rate": 3.1812587366726888e-06, "loss": 0.8603, "step": 17381 }, { "epoch": 0.6299880395781233, "grad_norm": 2.3033890067748604, "learning_rate": 3.180712027040417e-06, "loss": 0.9428, "step": 17382 }, { "epoch": 0.63002428328078, "grad_norm": 2.10496090263709, "learning_rate": 3.1801653424768308e-06, "loss": 0.8561, "step": 17383 }, { "epoch": 0.6300605269834366, "grad_norm": 2.1619306126317257, "learning_rate": 3.1796186829894673e-06, "loss": 0.967, "step": 17384 }, { "epoch": 0.6300967706860933, "grad_norm": 2.0120490471463914, "learning_rate": 3.179072048585858e-06, "loss": 0.9292, "step": 17385 }, { "epoch": 0.6301330143887499, "grad_norm": 2.36942932569383, "learning_rate": 3.1785254392735342e-06, "loss": 0.5776, "step": 17386 }, { "epoch": 0.6301692580914067, "grad_norm": 2.3338421349500766, "learning_rate": 3.1779788550600266e-06, "loss": 1.0138, "step": 17387 }, { "epoch": 0.6302055017940633, "grad_norm": 2.4634626776801674, "learning_rate": 3.1774322959528704e-06, "loss": 0.8097, "step": 17388 }, { "epoch": 0.63024174549672, "grad_norm": 2.3687780690256734, "learning_rate": 3.176885761959595e-06, "loss": 0.8977, "step": 17389 }, { "epoch": 0.6302779891993766, "grad_norm": 2.5265735944761096, "learning_rate": 3.1763392530877306e-06, "loss": 1.1139, "step": 17390 }, { "epoch": 0.6303142329020333, "grad_norm": 2.55915463145406, "learning_rate": 3.1757927693448077e-06, "loss": 1.0867, "step": 17391 }, { "epoch": 0.6303504766046899, "grad_norm": 1.9234956764474975, "learning_rate": 3.175246310738358e-06, "loss": 0.5484, "step": 17392 }, { "epoch": 0.6303867203073465, "grad_norm": 2.378843839921744, "learning_rate": 3.1746998772759114e-06, "loss": 0.8342, "step": 17393 }, { "epoch": 0.6304229640100033, "grad_norm": 2.607006516489585, "learning_rate": 3.1741534689649956e-06, "loss": 0.9976, "step": 17394 }, { "epoch": 0.63045920771266, "grad_norm": 2.4349339741338465, "learning_rate": 3.1736070858131395e-06, "loss": 0.8683, "step": 17395 }, { "epoch": 0.6304954514153166, "grad_norm": 2.297991252054657, "learning_rate": 3.173060727827875e-06, "loss": 0.8818, "step": 17396 }, { "epoch": 0.6305316951179732, "grad_norm": 2.47772627576712, "learning_rate": 3.172514395016729e-06, "loss": 0.818, "step": 17397 }, { "epoch": 0.6305679388206299, "grad_norm": 2.2076570286118833, "learning_rate": 3.171968087387228e-06, "loss": 0.8072, "step": 17398 }, { "epoch": 0.6306041825232865, "grad_norm": 2.509601383388813, "learning_rate": 3.171421804946902e-06, "loss": 0.9879, "step": 17399 }, { "epoch": 0.6306404262259433, "grad_norm": 2.1372931756120424, "learning_rate": 3.1708755477032782e-06, "loss": 0.868, "step": 17400 }, { "epoch": 0.6306766699285999, "grad_norm": 2.1600911631610864, "learning_rate": 3.1703293156638825e-06, "loss": 0.6965, "step": 17401 }, { "epoch": 0.6307129136312566, "grad_norm": 2.4343348391390873, "learning_rate": 3.169783108836243e-06, "loss": 0.9268, "step": 17402 }, { "epoch": 0.6307491573339132, "grad_norm": 2.246484903043595, "learning_rate": 3.1692369272278822e-06, "loss": 0.8727, "step": 17403 }, { "epoch": 0.6307854010365699, "grad_norm": 2.334280715592426, "learning_rate": 3.168690770846332e-06, "loss": 0.8843, "step": 17404 }, { "epoch": 0.6308216447392265, "grad_norm": 2.247101955973538, "learning_rate": 3.168144639699116e-06, "loss": 0.7928, "step": 17405 }, { "epoch": 0.6308578884418832, "grad_norm": 2.2029110581586466, "learning_rate": 3.167598533793759e-06, "loss": 0.9694, "step": 17406 }, { "epoch": 0.6308941321445399, "grad_norm": 2.3134645795128956, "learning_rate": 3.1670524531377834e-06, "loss": 1.0827, "step": 17407 }, { "epoch": 0.6309303758471966, "grad_norm": 2.18849126609329, "learning_rate": 3.1665063977387185e-06, "loss": 0.9154, "step": 17408 }, { "epoch": 0.6309666195498532, "grad_norm": 2.310591441220373, "learning_rate": 3.1659603676040867e-06, "loss": 0.8216, "step": 17409 }, { "epoch": 0.6310028632525099, "grad_norm": 2.8065085653866357, "learning_rate": 3.1654143627414118e-06, "loss": 0.9992, "step": 17410 }, { "epoch": 0.6310391069551665, "grad_norm": 2.4209746163272787, "learning_rate": 3.1648683831582166e-06, "loss": 0.911, "step": 17411 }, { "epoch": 0.6310753506578232, "grad_norm": 2.5622935917185705, "learning_rate": 3.1643224288620262e-06, "loss": 0.8903, "step": 17412 }, { "epoch": 0.6311115943604799, "grad_norm": 2.5286128969601815, "learning_rate": 3.1637764998603628e-06, "loss": 0.7715, "step": 17413 }, { "epoch": 0.6311478380631366, "grad_norm": 2.4163978876663483, "learning_rate": 3.163230596160748e-06, "loss": 0.9527, "step": 17414 }, { "epoch": 0.6311840817657932, "grad_norm": 2.4208729128229325, "learning_rate": 3.162684717770703e-06, "loss": 0.7368, "step": 17415 }, { "epoch": 0.6312203254684499, "grad_norm": 2.3499051923048824, "learning_rate": 3.1621388646977537e-06, "loss": 0.8158, "step": 17416 }, { "epoch": 0.6312565691711065, "grad_norm": 2.293618582716423, "learning_rate": 3.161593036949419e-06, "loss": 0.8469, "step": 17417 }, { "epoch": 0.6312928128737632, "grad_norm": 2.619022702740781, "learning_rate": 3.161047234533221e-06, "loss": 1.0126, "step": 17418 }, { "epoch": 0.6313290565764198, "grad_norm": 2.582217906309397, "learning_rate": 3.1605014574566784e-06, "loss": 0.877, "step": 17419 }, { "epoch": 0.6313653002790766, "grad_norm": 2.4292429031562226, "learning_rate": 3.1599557057273145e-06, "loss": 0.8649, "step": 17420 }, { "epoch": 0.6314015439817332, "grad_norm": 2.314562651513889, "learning_rate": 3.159409979352648e-06, "loss": 1.041, "step": 17421 }, { "epoch": 0.6314377876843899, "grad_norm": 2.197631452725969, "learning_rate": 3.158864278340199e-06, "loss": 0.7096, "step": 17422 }, { "epoch": 0.6314740313870465, "grad_norm": 2.3691730883409274, "learning_rate": 3.1583186026974844e-06, "loss": 0.8669, "step": 17423 }, { "epoch": 0.6315102750897031, "grad_norm": 2.3007213565219233, "learning_rate": 3.1577729524320277e-06, "loss": 0.9371, "step": 17424 }, { "epoch": 0.6315465187923598, "grad_norm": 2.368254587251332, "learning_rate": 3.157227327551346e-06, "loss": 0.7544, "step": 17425 }, { "epoch": 0.6315827624950164, "grad_norm": 2.419629785109431, "learning_rate": 3.1566817280629557e-06, "loss": 0.9663, "step": 17426 }, { "epoch": 0.6316190061976732, "grad_norm": 2.0921988525440485, "learning_rate": 3.1561361539743775e-06, "loss": 0.9272, "step": 17427 }, { "epoch": 0.6316552499003298, "grad_norm": 2.281214943147692, "learning_rate": 3.1555906052931272e-06, "loss": 0.8512, "step": 17428 }, { "epoch": 0.6316914936029865, "grad_norm": 2.664102277353576, "learning_rate": 3.155045082026724e-06, "loss": 1.0162, "step": 17429 }, { "epoch": 0.6317277373056431, "grad_norm": 2.416581715951077, "learning_rate": 3.154499584182683e-06, "loss": 0.7766, "step": 17430 }, { "epoch": 0.6317639810082998, "grad_norm": 2.5419538374430957, "learning_rate": 3.1539541117685226e-06, "loss": 0.9247, "step": 17431 }, { "epoch": 0.6318002247109564, "grad_norm": 2.3986810227652033, "learning_rate": 3.153408664791757e-06, "loss": 1.0073, "step": 17432 }, { "epoch": 0.6318364684136132, "grad_norm": 2.2318972157684382, "learning_rate": 3.1528632432599045e-06, "loss": 0.7549, "step": 17433 }, { "epoch": 0.6318727121162698, "grad_norm": 2.4271714368511716, "learning_rate": 3.152317847180477e-06, "loss": 0.7391, "step": 17434 }, { "epoch": 0.6319089558189265, "grad_norm": 2.1408757780239163, "learning_rate": 3.1517724765609935e-06, "loss": 0.6738, "step": 17435 }, { "epoch": 0.6319451995215831, "grad_norm": 2.1514072109406333, "learning_rate": 3.151227131408968e-06, "loss": 0.9183, "step": 17436 }, { "epoch": 0.6319814432242398, "grad_norm": 2.4330017047624595, "learning_rate": 3.1506818117319148e-06, "loss": 0.8597, "step": 17437 }, { "epoch": 0.6320176869268964, "grad_norm": 2.343198866296308, "learning_rate": 3.150136517537346e-06, "loss": 0.9414, "step": 17438 }, { "epoch": 0.6320539306295531, "grad_norm": 2.4032682385991593, "learning_rate": 3.149591248832779e-06, "loss": 1.1246, "step": 17439 }, { "epoch": 0.6320901743322098, "grad_norm": 2.4405760546781896, "learning_rate": 3.1490460056257254e-06, "loss": 0.8945, "step": 17440 }, { "epoch": 0.6321264180348665, "grad_norm": 2.468046892897144, "learning_rate": 3.1485007879236983e-06, "loss": 0.7731, "step": 17441 }, { "epoch": 0.6321626617375231, "grad_norm": 2.4551905839068873, "learning_rate": 3.147955595734208e-06, "loss": 0.9805, "step": 17442 }, { "epoch": 0.6321989054401798, "grad_norm": 2.3238841917896416, "learning_rate": 3.1474104290647724e-06, "loss": 0.833, "step": 17443 }, { "epoch": 0.6322351491428364, "grad_norm": 2.3843149676718047, "learning_rate": 3.1468652879229e-06, "loss": 0.6979, "step": 17444 }, { "epoch": 0.632271392845493, "grad_norm": 2.076630729081815, "learning_rate": 3.146320172316104e-06, "loss": 0.8062, "step": 17445 }, { "epoch": 0.6323076365481498, "grad_norm": 2.3195676995438186, "learning_rate": 3.145775082251893e-06, "loss": 0.797, "step": 17446 }, { "epoch": 0.6323438802508065, "grad_norm": 2.3759063637332316, "learning_rate": 3.1452300177377813e-06, "loss": 0.8471, "step": 17447 }, { "epoch": 0.6323801239534631, "grad_norm": 2.2079695787668756, "learning_rate": 3.1446849787812783e-06, "loss": 0.7685, "step": 17448 }, { "epoch": 0.6324163676561197, "grad_norm": 2.669624227561977, "learning_rate": 3.1441399653898945e-06, "loss": 1.112, "step": 17449 }, { "epoch": 0.6324526113587764, "grad_norm": 2.3990019803639515, "learning_rate": 3.143594977571137e-06, "loss": 0.8623, "step": 17450 }, { "epoch": 0.632488855061433, "grad_norm": 2.4844550234181813, "learning_rate": 3.1430500153325206e-06, "loss": 0.8828, "step": 17451 }, { "epoch": 0.6325250987640897, "grad_norm": 2.4610633697175874, "learning_rate": 3.142505078681552e-06, "loss": 1.0015, "step": 17452 }, { "epoch": 0.6325613424667464, "grad_norm": 2.338374066632966, "learning_rate": 3.1419601676257394e-06, "loss": 0.985, "step": 17453 }, { "epoch": 0.6325975861694031, "grad_norm": 2.318551996451699, "learning_rate": 3.141415282172591e-06, "loss": 0.8919, "step": 17454 }, { "epoch": 0.6326338298720597, "grad_norm": 2.1676537397043356, "learning_rate": 3.1408704223296173e-06, "loss": 0.9062, "step": 17455 }, { "epoch": 0.6326700735747164, "grad_norm": 2.7192608358676265, "learning_rate": 3.1403255881043247e-06, "loss": 0.9625, "step": 17456 }, { "epoch": 0.632706317277373, "grad_norm": 2.248373518143693, "learning_rate": 3.1397807795042208e-06, "loss": 1.0155, "step": 17457 }, { "epoch": 0.6327425609800297, "grad_norm": 2.4350030341388975, "learning_rate": 3.139235996536812e-06, "loss": 0.9975, "step": 17458 }, { "epoch": 0.6327788046826864, "grad_norm": 2.1672144568717706, "learning_rate": 3.1386912392096066e-06, "loss": 0.9399, "step": 17459 }, { "epoch": 0.6328150483853431, "grad_norm": 2.4461548963173994, "learning_rate": 3.13814650753011e-06, "loss": 0.8276, "step": 17460 }, { "epoch": 0.6328512920879997, "grad_norm": 2.3261481669918176, "learning_rate": 3.137601801505829e-06, "loss": 1.0215, "step": 17461 }, { "epoch": 0.6328875357906564, "grad_norm": 2.348945845363415, "learning_rate": 3.137057121144267e-06, "loss": 0.8203, "step": 17462 }, { "epoch": 0.632923779493313, "grad_norm": 2.219381443903407, "learning_rate": 3.1365124664529323e-06, "loss": 0.99, "step": 17463 }, { "epoch": 0.6329600231959697, "grad_norm": 2.4968853264535107, "learning_rate": 3.13596783743933e-06, "loss": 0.8623, "step": 17464 }, { "epoch": 0.6329962668986263, "grad_norm": 2.5850546133006427, "learning_rate": 3.1354232341109625e-06, "loss": 0.7985, "step": 17465 }, { "epoch": 0.6330325106012831, "grad_norm": 2.0736239946059287, "learning_rate": 3.134878656475335e-06, "loss": 0.878, "step": 17466 }, { "epoch": 0.6330687543039397, "grad_norm": 2.2126268280420485, "learning_rate": 3.1343341045399524e-06, "loss": 0.9352, "step": 17467 }, { "epoch": 0.6331049980065964, "grad_norm": 2.4576450120921582, "learning_rate": 3.133789578312317e-06, "loss": 0.8714, "step": 17468 }, { "epoch": 0.633141241709253, "grad_norm": 2.255254445398946, "learning_rate": 3.133245077799933e-06, "loss": 0.9221, "step": 17469 }, { "epoch": 0.6331774854119097, "grad_norm": 2.1945355464580576, "learning_rate": 3.1327006030103e-06, "loss": 0.7982, "step": 17470 }, { "epoch": 0.6332137291145663, "grad_norm": 2.4022703283391733, "learning_rate": 3.132156153950927e-06, "loss": 1.0141, "step": 17471 }, { "epoch": 0.6332499728172231, "grad_norm": 2.383527337081738, "learning_rate": 3.1316117306293114e-06, "loss": 0.7953, "step": 17472 }, { "epoch": 0.6332862165198797, "grad_norm": 2.394732121334507, "learning_rate": 3.131067333052957e-06, "loss": 0.8157, "step": 17473 }, { "epoch": 0.6333224602225364, "grad_norm": 2.8748551799312727, "learning_rate": 3.130522961229362e-06, "loss": 0.9509, "step": 17474 }, { "epoch": 0.633358703925193, "grad_norm": 2.528631054780088, "learning_rate": 3.1299786151660317e-06, "loss": 0.8097, "step": 17475 }, { "epoch": 0.6333949476278496, "grad_norm": 2.404637799193907, "learning_rate": 3.129434294870465e-06, "loss": 0.772, "step": 17476 }, { "epoch": 0.6334311913305063, "grad_norm": 2.1844567680774865, "learning_rate": 3.1288900003501634e-06, "loss": 0.9744, "step": 17477 }, { "epoch": 0.6334674350331629, "grad_norm": 2.143179386661041, "learning_rate": 3.1283457316126244e-06, "loss": 0.8182, "step": 17478 }, { "epoch": 0.6335036787358197, "grad_norm": 2.4733886857077123, "learning_rate": 3.1278014886653497e-06, "loss": 0.9778, "step": 17479 }, { "epoch": 0.6335399224384763, "grad_norm": 2.8411174211405745, "learning_rate": 3.127257271515839e-06, "loss": 0.949, "step": 17480 }, { "epoch": 0.633576166141133, "grad_norm": 2.2441617732670123, "learning_rate": 3.1267130801715907e-06, "loss": 0.8488, "step": 17481 }, { "epoch": 0.6336124098437896, "grad_norm": 2.528991377440056, "learning_rate": 3.1261689146401e-06, "loss": 0.7851, "step": 17482 }, { "epoch": 0.6336486535464463, "grad_norm": 1.7909922744829476, "learning_rate": 3.1256247749288716e-06, "loss": 0.6346, "step": 17483 }, { "epoch": 0.6336848972491029, "grad_norm": 2.3871974507582205, "learning_rate": 3.1250806610453997e-06, "loss": 0.8598, "step": 17484 }, { "epoch": 0.6337211409517597, "grad_norm": 2.5861988833544776, "learning_rate": 3.1245365729971822e-06, "loss": 0.7613, "step": 17485 }, { "epoch": 0.6337573846544163, "grad_norm": 2.5817392784474307, "learning_rate": 3.1239925107917156e-06, "loss": 0.9558, "step": 17486 }, { "epoch": 0.633793628357073, "grad_norm": 2.557458149590868, "learning_rate": 3.123448474436499e-06, "loss": 0.9556, "step": 17487 }, { "epoch": 0.6338298720597296, "grad_norm": 2.12873158807881, "learning_rate": 3.1229044639390275e-06, "loss": 0.8114, "step": 17488 }, { "epoch": 0.6338661157623863, "grad_norm": 2.392247451686537, "learning_rate": 3.1223604793067973e-06, "loss": 0.7581, "step": 17489 }, { "epoch": 0.6339023594650429, "grad_norm": 2.5502081797448866, "learning_rate": 3.1218165205473016e-06, "loss": 0.9375, "step": 17490 }, { "epoch": 0.6339386031676996, "grad_norm": 2.512857469833471, "learning_rate": 3.1212725876680403e-06, "loss": 0.746, "step": 17491 }, { "epoch": 0.6339748468703563, "grad_norm": 2.420168638819986, "learning_rate": 3.1207286806765073e-06, "loss": 0.9395, "step": 17492 }, { "epoch": 0.634011090573013, "grad_norm": 2.292852074290195, "learning_rate": 3.1201847995801966e-06, "loss": 0.7415, "step": 17493 }, { "epoch": 0.6340473342756696, "grad_norm": 2.208662563059007, "learning_rate": 3.1196409443865994e-06, "loss": 0.8563, "step": 17494 }, { "epoch": 0.6340835779783263, "grad_norm": 2.5793079128305445, "learning_rate": 3.1190971151032158e-06, "loss": 1.0066, "step": 17495 }, { "epoch": 0.6341198216809829, "grad_norm": 2.2644598617324307, "learning_rate": 3.118553311737536e-06, "loss": 0.9349, "step": 17496 }, { "epoch": 0.6341560653836396, "grad_norm": 2.53304966998814, "learning_rate": 3.118009534297054e-06, "loss": 0.7734, "step": 17497 }, { "epoch": 0.6341923090862962, "grad_norm": 2.4146857795681207, "learning_rate": 3.1174657827892606e-06, "loss": 1.2029, "step": 17498 }, { "epoch": 0.634228552788953, "grad_norm": 2.37453060477941, "learning_rate": 3.116922057221652e-06, "loss": 1.0498, "step": 17499 }, { "epoch": 0.6342647964916096, "grad_norm": 2.0837816890193643, "learning_rate": 3.1163783576017183e-06, "loss": 0.8338, "step": 17500 }, { "epoch": 0.6343010401942663, "grad_norm": 2.2895089809722204, "learning_rate": 3.115834683936952e-06, "loss": 0.7894, "step": 17501 }, { "epoch": 0.6343372838969229, "grad_norm": 2.579371785008842, "learning_rate": 3.115291036234842e-06, "loss": 0.9297, "step": 17502 }, { "epoch": 0.6343735275995795, "grad_norm": 2.4743790899053577, "learning_rate": 3.1147474145028833e-06, "loss": 0.9138, "step": 17503 }, { "epoch": 0.6344097713022362, "grad_norm": 2.4393945052570802, "learning_rate": 3.1142038187485656e-06, "loss": 0.8948, "step": 17504 }, { "epoch": 0.634446015004893, "grad_norm": 2.4038074088411423, "learning_rate": 3.1136602489793778e-06, "loss": 0.7915, "step": 17505 }, { "epoch": 0.6344822587075496, "grad_norm": 2.155424321877227, "learning_rate": 3.1131167052028113e-06, "loss": 0.7921, "step": 17506 }, { "epoch": 0.6345185024102062, "grad_norm": 2.376360346981631, "learning_rate": 3.112573187426356e-06, "loss": 1.0005, "step": 17507 }, { "epoch": 0.6345547461128629, "grad_norm": 2.283515499774398, "learning_rate": 3.1120296956575003e-06, "loss": 0.887, "step": 17508 }, { "epoch": 0.6345909898155195, "grad_norm": 2.071144734823679, "learning_rate": 3.1114862299037317e-06, "loss": 0.8646, "step": 17509 }, { "epoch": 0.6346272335181762, "grad_norm": 2.353833789610437, "learning_rate": 3.110942790172543e-06, "loss": 0.7352, "step": 17510 }, { "epoch": 0.6346634772208328, "grad_norm": 2.421443085900548, "learning_rate": 3.11039937647142e-06, "loss": 1.0408, "step": 17511 }, { "epoch": 0.6346997209234896, "grad_norm": 2.4601752528171157, "learning_rate": 3.109855988807851e-06, "loss": 0.9386, "step": 17512 }, { "epoch": 0.6347359646261462, "grad_norm": 2.261162444244825, "learning_rate": 3.1093126271893214e-06, "loss": 0.9141, "step": 17513 }, { "epoch": 0.6347722083288029, "grad_norm": 2.427663810731038, "learning_rate": 3.1087692916233225e-06, "loss": 1.0634, "step": 17514 }, { "epoch": 0.6348084520314595, "grad_norm": 2.421916025370226, "learning_rate": 3.1082259821173384e-06, "loss": 0.8686, "step": 17515 }, { "epoch": 0.6348446957341162, "grad_norm": 2.4088527129707886, "learning_rate": 3.1076826986788567e-06, "loss": 0.8501, "step": 17516 }, { "epoch": 0.6348809394367728, "grad_norm": 2.585656820730714, "learning_rate": 3.107139441315361e-06, "loss": 0.8209, "step": 17517 }, { "epoch": 0.6349171831394296, "grad_norm": 2.136079854866829, "learning_rate": 3.106596210034341e-06, "loss": 0.8006, "step": 17518 }, { "epoch": 0.6349534268420862, "grad_norm": 2.564853494936633, "learning_rate": 3.10605300484328e-06, "loss": 1.0026, "step": 17519 }, { "epoch": 0.6349896705447429, "grad_norm": 2.2433886956847893, "learning_rate": 3.105509825749663e-06, "loss": 0.8734, "step": 17520 }, { "epoch": 0.6350259142473995, "grad_norm": 2.4011514779225878, "learning_rate": 3.104966672760973e-06, "loss": 0.9236, "step": 17521 }, { "epoch": 0.6350621579500562, "grad_norm": 2.386060584608849, "learning_rate": 3.1044235458846976e-06, "loss": 0.9383, "step": 17522 }, { "epoch": 0.6350984016527128, "grad_norm": 2.3537913360482694, "learning_rate": 3.10388044512832e-06, "loss": 0.7713, "step": 17523 }, { "epoch": 0.6351346453553695, "grad_norm": 2.524966902739137, "learning_rate": 3.103337370499323e-06, "loss": 0.9074, "step": 17524 }, { "epoch": 0.6351708890580262, "grad_norm": 2.2837338332695207, "learning_rate": 3.1027943220051886e-06, "loss": 0.9068, "step": 17525 }, { "epoch": 0.6352071327606829, "grad_norm": 2.3986250845920023, "learning_rate": 3.1022512996534025e-06, "loss": 0.8716, "step": 17526 }, { "epoch": 0.6352433764633395, "grad_norm": 2.574578814214866, "learning_rate": 3.1017083034514457e-06, "loss": 1.0084, "step": 17527 }, { "epoch": 0.6352796201659962, "grad_norm": 2.4593590383396413, "learning_rate": 3.1011653334068003e-06, "loss": 0.8088, "step": 17528 }, { "epoch": 0.6353158638686528, "grad_norm": 2.6800351015931083, "learning_rate": 3.100622389526946e-06, "loss": 0.9011, "step": 17529 }, { "epoch": 0.6353521075713094, "grad_norm": 2.3570502137384843, "learning_rate": 3.100079471819369e-06, "loss": 1.1083, "step": 17530 }, { "epoch": 0.6353883512739662, "grad_norm": 2.3605874285835613, "learning_rate": 3.0995365802915473e-06, "loss": 0.9348, "step": 17531 }, { "epoch": 0.6354245949766228, "grad_norm": 2.510859104579546, "learning_rate": 3.0989937149509624e-06, "loss": 1.0119, "step": 17532 }, { "epoch": 0.6354608386792795, "grad_norm": 2.6558191350752582, "learning_rate": 3.0984508758050937e-06, "loss": 0.9482, "step": 17533 }, { "epoch": 0.6354970823819361, "grad_norm": 2.4935361637792566, "learning_rate": 3.097908062861422e-06, "loss": 0.8287, "step": 17534 }, { "epoch": 0.6355333260845928, "grad_norm": 2.526556770766257, "learning_rate": 3.0973652761274277e-06, "loss": 1.0334, "step": 17535 }, { "epoch": 0.6355695697872494, "grad_norm": 2.2204190216940285, "learning_rate": 3.096822515610589e-06, "loss": 1.0029, "step": 17536 }, { "epoch": 0.6356058134899061, "grad_norm": 2.293735139511824, "learning_rate": 3.0962797813183833e-06, "loss": 0.8279, "step": 17537 }, { "epoch": 0.6356420571925628, "grad_norm": 2.165240585239892, "learning_rate": 3.0957370732582925e-06, "loss": 0.8677, "step": 17538 }, { "epoch": 0.6356783008952195, "grad_norm": 2.4177435461639174, "learning_rate": 3.0951943914377938e-06, "loss": 1.0802, "step": 17539 }, { "epoch": 0.6357145445978761, "grad_norm": 2.307313296449477, "learning_rate": 3.0946517358643634e-06, "loss": 0.9324, "step": 17540 }, { "epoch": 0.6357507883005328, "grad_norm": 2.4573776885742196, "learning_rate": 3.094109106545478e-06, "loss": 0.955, "step": 17541 }, { "epoch": 0.6357870320031894, "grad_norm": 2.4275021596231534, "learning_rate": 3.0935665034886186e-06, "loss": 0.9264, "step": 17542 }, { "epoch": 0.6358232757058461, "grad_norm": 2.371428565366979, "learning_rate": 3.0930239267012603e-06, "loss": 0.7667, "step": 17543 }, { "epoch": 0.6358595194085028, "grad_norm": 2.321145258964369, "learning_rate": 3.0924813761908783e-06, "loss": 0.8562, "step": 17544 }, { "epoch": 0.6358957631111595, "grad_norm": 2.3987672201882466, "learning_rate": 3.0919388519649484e-06, "loss": 0.8974, "step": 17545 }, { "epoch": 0.6359320068138161, "grad_norm": 2.138555539160929, "learning_rate": 3.0913963540309487e-06, "loss": 0.8622, "step": 17546 }, { "epoch": 0.6359682505164728, "grad_norm": 7.485381850403782, "learning_rate": 3.090853882396353e-06, "loss": 0.8641, "step": 17547 }, { "epoch": 0.6360044942191294, "grad_norm": 2.094086267954302, "learning_rate": 3.0903114370686353e-06, "loss": 0.8576, "step": 17548 }, { "epoch": 0.6360407379217861, "grad_norm": 2.5108997477704222, "learning_rate": 3.08976901805527e-06, "loss": 1.1584, "step": 17549 }, { "epoch": 0.6360769816244427, "grad_norm": 2.4896160717527924, "learning_rate": 3.0892266253637337e-06, "loss": 0.8583, "step": 17550 }, { "epoch": 0.6361132253270995, "grad_norm": 2.2998756064977446, "learning_rate": 3.0886842590014988e-06, "loss": 0.8398, "step": 17551 }, { "epoch": 0.6361494690297561, "grad_norm": 2.5408211278493344, "learning_rate": 3.08814191897604e-06, "loss": 0.8808, "step": 17552 }, { "epoch": 0.6361857127324128, "grad_norm": 2.1556036842239825, "learning_rate": 3.0875996052948272e-06, "loss": 0.8315, "step": 17553 }, { "epoch": 0.6362219564350694, "grad_norm": 2.3634540108636073, "learning_rate": 3.087057317965336e-06, "loss": 0.9858, "step": 17554 }, { "epoch": 0.636258200137726, "grad_norm": 2.218126184360877, "learning_rate": 3.086515056995039e-06, "loss": 0.8493, "step": 17555 }, { "epoch": 0.6362944438403827, "grad_norm": 2.2193500787362983, "learning_rate": 3.0859728223914064e-06, "loss": 0.71, "step": 17556 }, { "epoch": 0.6363306875430393, "grad_norm": 2.0276502220955446, "learning_rate": 3.0854306141619095e-06, "loss": 0.8955, "step": 17557 }, { "epoch": 0.6363669312456961, "grad_norm": 2.3423731695721393, "learning_rate": 3.0848884323140225e-06, "loss": 0.7791, "step": 17558 }, { "epoch": 0.6364031749483527, "grad_norm": 2.268803986724153, "learning_rate": 3.0843462768552153e-06, "loss": 0.7221, "step": 17559 }, { "epoch": 0.6364394186510094, "grad_norm": 2.3224477288644305, "learning_rate": 3.083804147792957e-06, "loss": 0.8971, "step": 17560 }, { "epoch": 0.636475662353666, "grad_norm": 2.207831232494438, "learning_rate": 3.0832620451347173e-06, "loss": 0.9427, "step": 17561 }, { "epoch": 0.6365119060563227, "grad_norm": 2.4040032752783604, "learning_rate": 3.082719968887969e-06, "loss": 0.8751, "step": 17562 }, { "epoch": 0.6365481497589793, "grad_norm": 2.2526705850080955, "learning_rate": 3.0821779190601795e-06, "loss": 0.9461, "step": 17563 }, { "epoch": 0.6365843934616361, "grad_norm": 2.0704144204495822, "learning_rate": 3.0816358956588194e-06, "loss": 0.8409, "step": 17564 }, { "epoch": 0.6366206371642927, "grad_norm": 2.44673264542015, "learning_rate": 3.081093898691355e-06, "loss": 0.8422, "step": 17565 }, { "epoch": 0.6366568808669494, "grad_norm": 2.124602070201079, "learning_rate": 3.0805519281652573e-06, "loss": 0.8057, "step": 17566 }, { "epoch": 0.636693124569606, "grad_norm": 2.6090505285195467, "learning_rate": 3.0800099840879927e-06, "loss": 0.8691, "step": 17567 }, { "epoch": 0.6367293682722627, "grad_norm": 2.741527490784143, "learning_rate": 3.07946806646703e-06, "loss": 1.0072, "step": 17568 }, { "epoch": 0.6367656119749193, "grad_norm": 2.2862869412234748, "learning_rate": 3.0789261753098337e-06, "loss": 0.9502, "step": 17569 }, { "epoch": 0.636801855677576, "grad_norm": 2.1117075179516354, "learning_rate": 3.0783843106238746e-06, "loss": 0.8948, "step": 17570 }, { "epoch": 0.6368380993802327, "grad_norm": 2.6399780358894285, "learning_rate": 3.0778424724166177e-06, "loss": 0.8859, "step": 17571 }, { "epoch": 0.6368743430828894, "grad_norm": 2.283190629818862, "learning_rate": 3.0773006606955293e-06, "loss": 0.9093, "step": 17572 }, { "epoch": 0.636910586785546, "grad_norm": 2.3342518541384623, "learning_rate": 3.0767588754680737e-06, "loss": 0.8895, "step": 17573 }, { "epoch": 0.6369468304882027, "grad_norm": 2.196515657652807, "learning_rate": 3.076217116741719e-06, "loss": 0.8983, "step": 17574 }, { "epoch": 0.6369830741908593, "grad_norm": 2.3216758604802825, "learning_rate": 3.0756753845239285e-06, "loss": 0.867, "step": 17575 }, { "epoch": 0.637019317893516, "grad_norm": 2.3972496301400446, "learning_rate": 3.0751336788221674e-06, "loss": 0.8574, "step": 17576 }, { "epoch": 0.6370555615961727, "grad_norm": 2.6766308105152605, "learning_rate": 3.0745919996438984e-06, "loss": 0.9447, "step": 17577 }, { "epoch": 0.6370918052988294, "grad_norm": 2.2443228719594237, "learning_rate": 3.074050346996589e-06, "loss": 0.6663, "step": 17578 }, { "epoch": 0.637128049001486, "grad_norm": 2.4151264057193953, "learning_rate": 3.0735087208877013e-06, "loss": 1.114, "step": 17579 }, { "epoch": 0.6371642927041427, "grad_norm": 2.2636494263998386, "learning_rate": 3.072967121324698e-06, "loss": 0.7808, "step": 17580 }, { "epoch": 0.6372005364067993, "grad_norm": 2.2714823369694352, "learning_rate": 3.07242554831504e-06, "loss": 1.0113, "step": 17581 }, { "epoch": 0.637236780109456, "grad_norm": 2.493183926325248, "learning_rate": 3.071884001866195e-06, "loss": 0.9509, "step": 17582 }, { "epoch": 0.6372730238121126, "grad_norm": 2.3420568260424672, "learning_rate": 3.071342481985622e-06, "loss": 1.0609, "step": 17583 }, { "epoch": 0.6373092675147694, "grad_norm": 2.8026813337129344, "learning_rate": 3.0708009886807833e-06, "loss": 0.8537, "step": 17584 }, { "epoch": 0.637345511217426, "grad_norm": 2.413507660502492, "learning_rate": 3.070259521959139e-06, "loss": 0.9221, "step": 17585 }, { "epoch": 0.6373817549200826, "grad_norm": 2.5815018592077674, "learning_rate": 3.0697180818281524e-06, "loss": 0.8976, "step": 17586 }, { "epoch": 0.6374179986227393, "grad_norm": 2.3092592404755803, "learning_rate": 3.069176668295284e-06, "loss": 0.8893, "step": 17587 }, { "epoch": 0.6374542423253959, "grad_norm": 2.193458718329275, "learning_rate": 3.06863528136799e-06, "loss": 0.8598, "step": 17588 }, { "epoch": 0.6374904860280526, "grad_norm": 2.2604635401742623, "learning_rate": 3.068093921053737e-06, "loss": 0.9169, "step": 17589 }, { "epoch": 0.6375267297307093, "grad_norm": 2.3043748181745474, "learning_rate": 3.067552587359981e-06, "loss": 0.9587, "step": 17590 }, { "epoch": 0.637562973433366, "grad_norm": 2.440461884920374, "learning_rate": 3.0670112802941808e-06, "loss": 0.995, "step": 17591 }, { "epoch": 0.6375992171360226, "grad_norm": 2.2340561857472783, "learning_rate": 3.0664699998637965e-06, "loss": 0.8983, "step": 17592 }, { "epoch": 0.6376354608386793, "grad_norm": 2.662601260330615, "learning_rate": 3.0659287460762864e-06, "loss": 0.9207, "step": 17593 }, { "epoch": 0.6376717045413359, "grad_norm": 2.1451157927041336, "learning_rate": 3.065387518939109e-06, "loss": 0.7222, "step": 17594 }, { "epoch": 0.6377079482439926, "grad_norm": 2.412818572033766, "learning_rate": 3.064846318459721e-06, "loss": 0.9456, "step": 17595 }, { "epoch": 0.6377441919466492, "grad_norm": 2.7400053506296853, "learning_rate": 3.0643051446455785e-06, "loss": 0.8664, "step": 17596 }, { "epoch": 0.637780435649306, "grad_norm": 2.2751531616689844, "learning_rate": 3.0637639975041434e-06, "loss": 1.0483, "step": 17597 }, { "epoch": 0.6378166793519626, "grad_norm": 2.3436240312592944, "learning_rate": 3.0632228770428684e-06, "loss": 0.9152, "step": 17598 }, { "epoch": 0.6378529230546193, "grad_norm": 2.383143544051187, "learning_rate": 3.062681783269211e-06, "loss": 0.8903, "step": 17599 }, { "epoch": 0.6378891667572759, "grad_norm": 2.6433767856563826, "learning_rate": 3.0621407161906245e-06, "loss": 0.8995, "step": 17600 }, { "epoch": 0.6379254104599326, "grad_norm": 2.496846626809261, "learning_rate": 3.0615996758145694e-06, "loss": 1.0491, "step": 17601 }, { "epoch": 0.6379616541625892, "grad_norm": 2.342525669837337, "learning_rate": 3.061058662148499e-06, "loss": 1.0044, "step": 17602 }, { "epoch": 0.637997897865246, "grad_norm": 1.8906046390062843, "learning_rate": 3.060517675199867e-06, "loss": 0.7193, "step": 17603 }, { "epoch": 0.6380341415679026, "grad_norm": 2.50131068424808, "learning_rate": 3.059976714976128e-06, "loss": 0.936, "step": 17604 }, { "epoch": 0.6380703852705593, "grad_norm": 1.9866924885123212, "learning_rate": 3.059435781484737e-06, "loss": 0.8778, "step": 17605 }, { "epoch": 0.6381066289732159, "grad_norm": 2.0689637573498265, "learning_rate": 3.058894874733147e-06, "loss": 0.9115, "step": 17606 }, { "epoch": 0.6381428726758726, "grad_norm": 2.310340669815461, "learning_rate": 3.0583539947288126e-06, "loss": 0.9141, "step": 17607 }, { "epoch": 0.6381791163785292, "grad_norm": 2.4060637831409246, "learning_rate": 3.0578131414791834e-06, "loss": 0.7769, "step": 17608 }, { "epoch": 0.6382153600811858, "grad_norm": 2.457067948977549, "learning_rate": 3.0572723149917164e-06, "loss": 1.0937, "step": 17609 }, { "epoch": 0.6382516037838426, "grad_norm": 2.4491893794591255, "learning_rate": 3.0567315152738627e-06, "loss": 0.9461, "step": 17610 }, { "epoch": 0.6382878474864992, "grad_norm": 2.397756316593211, "learning_rate": 3.056190742333073e-06, "loss": 0.949, "step": 17611 }, { "epoch": 0.6383240911891559, "grad_norm": 2.5386336252630666, "learning_rate": 3.0556499961767983e-06, "loss": 0.9412, "step": 17612 }, { "epoch": 0.6383603348918125, "grad_norm": 2.3272442896402814, "learning_rate": 3.0551092768124913e-06, "loss": 1.0308, "step": 17613 }, { "epoch": 0.6383965785944692, "grad_norm": 2.6241808963701656, "learning_rate": 3.0545685842476025e-06, "loss": 1.0205, "step": 17614 }, { "epoch": 0.6384328222971258, "grad_norm": 2.3874806586959583, "learning_rate": 3.0540279184895827e-06, "loss": 0.9088, "step": 17615 }, { "epoch": 0.6384690659997826, "grad_norm": 2.4255854691521206, "learning_rate": 3.053487279545878e-06, "loss": 0.9016, "step": 17616 }, { "epoch": 0.6385053097024392, "grad_norm": 2.4231825712847135, "learning_rate": 3.0529466674239442e-06, "loss": 0.9498, "step": 17617 }, { "epoch": 0.6385415534050959, "grad_norm": 2.2050827483703723, "learning_rate": 3.052406082131228e-06, "loss": 0.9217, "step": 17618 }, { "epoch": 0.6385777971077525, "grad_norm": 2.033434674980059, "learning_rate": 3.0518655236751784e-06, "loss": 0.9074, "step": 17619 }, { "epoch": 0.6386140408104092, "grad_norm": 2.3412291496955997, "learning_rate": 3.0513249920632415e-06, "loss": 1.0075, "step": 17620 }, { "epoch": 0.6386502845130658, "grad_norm": 2.2600901531462547, "learning_rate": 3.050784487302869e-06, "loss": 0.9033, "step": 17621 }, { "epoch": 0.6386865282157225, "grad_norm": 2.571854164667587, "learning_rate": 3.0502440094015083e-06, "loss": 0.9617, "step": 17622 }, { "epoch": 0.6387227719183792, "grad_norm": 2.2278945002104593, "learning_rate": 3.049703558366605e-06, "loss": 0.8819, "step": 17623 }, { "epoch": 0.6387590156210359, "grad_norm": 2.3599900163082728, "learning_rate": 3.049163134205606e-06, "loss": 0.7856, "step": 17624 }, { "epoch": 0.6387952593236925, "grad_norm": 2.1414080413069483, "learning_rate": 3.04862273692596e-06, "loss": 0.7531, "step": 17625 }, { "epoch": 0.6388315030263492, "grad_norm": 2.28869268890686, "learning_rate": 3.0480823665351138e-06, "loss": 0.6558, "step": 17626 }, { "epoch": 0.6388677467290058, "grad_norm": 2.057512485554745, "learning_rate": 3.0475420230405116e-06, "loss": 0.8906, "step": 17627 }, { "epoch": 0.6389039904316625, "grad_norm": 2.5421536275455745, "learning_rate": 3.0470017064495973e-06, "loss": 0.7844, "step": 17628 }, { "epoch": 0.6389402341343191, "grad_norm": 2.4879393142925204, "learning_rate": 3.046461416769821e-06, "loss": 0.9305, "step": 17629 }, { "epoch": 0.6389764778369759, "grad_norm": 2.4203055596836207, "learning_rate": 3.0459211540086243e-06, "loss": 0.8177, "step": 17630 }, { "epoch": 0.6390127215396325, "grad_norm": 2.456359073554618, "learning_rate": 3.045380918173453e-06, "loss": 1.1109, "step": 17631 }, { "epoch": 0.6390489652422892, "grad_norm": 2.7231630073855357, "learning_rate": 3.0448407092717487e-06, "loss": 0.9137, "step": 17632 }, { "epoch": 0.6390852089449458, "grad_norm": 2.2531962972041923, "learning_rate": 3.044300527310958e-06, "loss": 0.8268, "step": 17633 }, { "epoch": 0.6391214526476025, "grad_norm": 2.454061319949998, "learning_rate": 3.043760372298524e-06, "loss": 0.9112, "step": 17634 }, { "epoch": 0.6391576963502591, "grad_norm": 2.526520213246786, "learning_rate": 3.0432202442418877e-06, "loss": 1.0303, "step": 17635 }, { "epoch": 0.6391939400529159, "grad_norm": 2.3494803045952715, "learning_rate": 3.042680143148492e-06, "loss": 0.8189, "step": 17636 }, { "epoch": 0.6392301837555725, "grad_norm": 2.1890209134350727, "learning_rate": 3.0421400690257825e-06, "loss": 0.973, "step": 17637 }, { "epoch": 0.6392664274582291, "grad_norm": 2.3367905159736737, "learning_rate": 3.0416000218811982e-06, "loss": 1.0562, "step": 17638 }, { "epoch": 0.6393026711608858, "grad_norm": 2.4493105461088733, "learning_rate": 3.0410600017221816e-06, "loss": 0.9136, "step": 17639 }, { "epoch": 0.6393389148635424, "grad_norm": 2.561505411950095, "learning_rate": 3.0405200085561717e-06, "loss": 0.9518, "step": 17640 }, { "epoch": 0.6393751585661991, "grad_norm": 2.1466085789540448, "learning_rate": 3.039980042390612e-06, "loss": 0.7384, "step": 17641 }, { "epoch": 0.6394114022688557, "grad_norm": 2.4539940660945296, "learning_rate": 3.039440103232943e-06, "loss": 0.915, "step": 17642 }, { "epoch": 0.6394476459715125, "grad_norm": 2.396311836245512, "learning_rate": 3.038900191090603e-06, "loss": 1.0376, "step": 17643 }, { "epoch": 0.6394838896741691, "grad_norm": 2.3050785083476666, "learning_rate": 3.0383603059710304e-06, "loss": 0.9824, "step": 17644 }, { "epoch": 0.6395201333768258, "grad_norm": 2.680385267222949, "learning_rate": 3.0378204478816687e-06, "loss": 1.09, "step": 17645 }, { "epoch": 0.6395563770794824, "grad_norm": 2.0687089242980807, "learning_rate": 3.037280616829954e-06, "loss": 0.8281, "step": 17646 }, { "epoch": 0.6395926207821391, "grad_norm": 2.4628723684525813, "learning_rate": 3.0367408128233257e-06, "loss": 0.9134, "step": 17647 }, { "epoch": 0.6396288644847957, "grad_norm": 2.208376338816292, "learning_rate": 3.0362010358692196e-06, "loss": 0.8695, "step": 17648 }, { "epoch": 0.6396651081874525, "grad_norm": 2.270460800401942, "learning_rate": 3.0356612859750777e-06, "loss": 0.8822, "step": 17649 }, { "epoch": 0.6397013518901091, "grad_norm": 2.602878438871744, "learning_rate": 3.035121563148335e-06, "loss": 0.8177, "step": 17650 }, { "epoch": 0.6397375955927658, "grad_norm": 2.0235829282099647, "learning_rate": 3.034581867396429e-06, "loss": 0.8135, "step": 17651 }, { "epoch": 0.6397738392954224, "grad_norm": 2.1131680426090726, "learning_rate": 3.0340421987267954e-06, "loss": 0.9006, "step": 17652 }, { "epoch": 0.6398100829980791, "grad_norm": 2.2379993811319614, "learning_rate": 3.033502557146872e-06, "loss": 0.9284, "step": 17653 }, { "epoch": 0.6398463267007357, "grad_norm": 2.4477167532898996, "learning_rate": 3.032962942664094e-06, "loss": 0.8422, "step": 17654 }, { "epoch": 0.6398825704033924, "grad_norm": 2.1903079594926633, "learning_rate": 3.0324233552858974e-06, "loss": 0.814, "step": 17655 }, { "epoch": 0.6399188141060491, "grad_norm": 1.9216982667846962, "learning_rate": 3.0318837950197146e-06, "loss": 0.7843, "step": 17656 }, { "epoch": 0.6399550578087058, "grad_norm": 2.339116858531964, "learning_rate": 3.031344261872985e-06, "loss": 1.0592, "step": 17657 }, { "epoch": 0.6399913015113624, "grad_norm": 2.3753143909663113, "learning_rate": 3.030804755853141e-06, "loss": 0.7789, "step": 17658 }, { "epoch": 0.6400275452140191, "grad_norm": 2.5120483169699077, "learning_rate": 3.030265276967617e-06, "loss": 0.8215, "step": 17659 }, { "epoch": 0.6400637889166757, "grad_norm": 2.279324663164375, "learning_rate": 3.029725825223844e-06, "loss": 1.0085, "step": 17660 }, { "epoch": 0.6401000326193323, "grad_norm": 2.471214074080667, "learning_rate": 3.0291864006292595e-06, "loss": 0.8052, "step": 17661 }, { "epoch": 0.6401362763219891, "grad_norm": 2.4385467837531767, "learning_rate": 3.028647003191294e-06, "loss": 0.8896, "step": 17662 }, { "epoch": 0.6401725200246458, "grad_norm": 2.767836314687235, "learning_rate": 3.0281076329173807e-06, "loss": 0.9438, "step": 17663 }, { "epoch": 0.6402087637273024, "grad_norm": 2.1690834144869493, "learning_rate": 3.02756828981495e-06, "loss": 0.8701, "step": 17664 }, { "epoch": 0.640245007429959, "grad_norm": 2.6090545689380766, "learning_rate": 3.0270289738914373e-06, "loss": 1.0575, "step": 17665 }, { "epoch": 0.6402812511326157, "grad_norm": 2.4781132536373165, "learning_rate": 3.0264896851542717e-06, "loss": 1.1134, "step": 17666 }, { "epoch": 0.6403174948352723, "grad_norm": 2.5197105482763855, "learning_rate": 3.025950423610883e-06, "loss": 1.0378, "step": 17667 }, { "epoch": 0.640353738537929, "grad_norm": 2.2502412096739244, "learning_rate": 3.0254111892687056e-06, "loss": 0.8465, "step": 17668 }, { "epoch": 0.6403899822405857, "grad_norm": 2.1800204932128353, "learning_rate": 3.024871982135168e-06, "loss": 0.8338, "step": 17669 }, { "epoch": 0.6404262259432424, "grad_norm": 2.4008191711968503, "learning_rate": 3.0243328022176995e-06, "loss": 1.0059, "step": 17670 }, { "epoch": 0.640462469645899, "grad_norm": 2.4583864287336974, "learning_rate": 3.0237936495237297e-06, "loss": 0.8604, "step": 17671 }, { "epoch": 0.6404987133485557, "grad_norm": 2.2095442071125273, "learning_rate": 3.023254524060688e-06, "loss": 0.9514, "step": 17672 }, { "epoch": 0.6405349570512123, "grad_norm": 2.442278583314704, "learning_rate": 3.022715425836005e-06, "loss": 0.851, "step": 17673 }, { "epoch": 0.640571200753869, "grad_norm": 2.702031010270421, "learning_rate": 3.022176354857107e-06, "loss": 0.8842, "step": 17674 }, { "epoch": 0.6406074444565257, "grad_norm": 2.351704237180804, "learning_rate": 3.02163731113142e-06, "loss": 0.8839, "step": 17675 }, { "epoch": 0.6406436881591824, "grad_norm": 2.4510029836067417, "learning_rate": 3.0210982946663775e-06, "loss": 0.8923, "step": 17676 }, { "epoch": 0.640679931861839, "grad_norm": 2.142296549133886, "learning_rate": 3.0205593054694032e-06, "loss": 0.8016, "step": 17677 }, { "epoch": 0.6407161755644957, "grad_norm": 2.277138501465026, "learning_rate": 3.0200203435479245e-06, "loss": 0.8644, "step": 17678 }, { "epoch": 0.6407524192671523, "grad_norm": 2.2518864392551023, "learning_rate": 3.0194814089093673e-06, "loss": 0.8109, "step": 17679 }, { "epoch": 0.640788662969809, "grad_norm": 2.7225714156784595, "learning_rate": 3.0189425015611597e-06, "loss": 0.9237, "step": 17680 }, { "epoch": 0.6408249066724656, "grad_norm": 1.9252358439368993, "learning_rate": 3.0184036215107254e-06, "loss": 0.6529, "step": 17681 }, { "epoch": 0.6408611503751224, "grad_norm": 2.8301788052572996, "learning_rate": 3.0178647687654917e-06, "loss": 0.8207, "step": 17682 }, { "epoch": 0.640897394077779, "grad_norm": 2.164686608898204, "learning_rate": 3.0173259433328794e-06, "loss": 0.7853, "step": 17683 }, { "epoch": 0.6409336377804357, "grad_norm": 2.2899763234363912, "learning_rate": 3.01678714522032e-06, "loss": 0.7939, "step": 17684 }, { "epoch": 0.6409698814830923, "grad_norm": 2.3440011045433122, "learning_rate": 3.0162483744352334e-06, "loss": 0.9932, "step": 17685 }, { "epoch": 0.641006125185749, "grad_norm": 2.326025629148403, "learning_rate": 3.0157096309850453e-06, "loss": 0.917, "step": 17686 }, { "epoch": 0.6410423688884056, "grad_norm": 2.7639839658999765, "learning_rate": 3.0151709148771756e-06, "loss": 0.9108, "step": 17687 }, { "epoch": 0.6410786125910624, "grad_norm": 2.3030777307655503, "learning_rate": 3.014632226119053e-06, "loss": 0.7829, "step": 17688 }, { "epoch": 0.641114856293719, "grad_norm": 2.3552432940251418, "learning_rate": 3.0140935647180968e-06, "loss": 0.9544, "step": 17689 }, { "epoch": 0.6411510999963757, "grad_norm": 2.4826961409657224, "learning_rate": 3.0135549306817307e-06, "loss": 0.9236, "step": 17690 }, { "epoch": 0.6411873436990323, "grad_norm": 4.055829662019492, "learning_rate": 3.0130163240173753e-06, "loss": 0.8017, "step": 17691 }, { "epoch": 0.6412235874016889, "grad_norm": 2.4068492909887764, "learning_rate": 3.0124777447324545e-06, "loss": 0.8796, "step": 17692 }, { "epoch": 0.6412598311043456, "grad_norm": 2.267195948472183, "learning_rate": 3.011939192834388e-06, "loss": 0.9624, "step": 17693 }, { "epoch": 0.6412960748070022, "grad_norm": 2.5806008217453287, "learning_rate": 3.0114006683305974e-06, "loss": 0.9357, "step": 17694 }, { "epoch": 0.641332318509659, "grad_norm": 2.2803916446729633, "learning_rate": 3.0108621712285e-06, "loss": 0.8597, "step": 17695 }, { "epoch": 0.6413685622123156, "grad_norm": 2.1474837073836968, "learning_rate": 3.0103237015355223e-06, "loss": 0.8639, "step": 17696 }, { "epoch": 0.6414048059149723, "grad_norm": 2.65834570805112, "learning_rate": 3.00978525925908e-06, "loss": 0.758, "step": 17697 }, { "epoch": 0.6414410496176289, "grad_norm": 2.3809641696245514, "learning_rate": 3.009246844406593e-06, "loss": 0.6917, "step": 17698 }, { "epoch": 0.6414772933202856, "grad_norm": 2.575241856313427, "learning_rate": 3.0087084569854795e-06, "loss": 1.011, "step": 17699 }, { "epoch": 0.6415135370229422, "grad_norm": 2.3189728982725213, "learning_rate": 3.008170097003161e-06, "loss": 0.7602, "step": 17700 }, { "epoch": 0.6415497807255989, "grad_norm": 2.2771074299581904, "learning_rate": 3.007631764467053e-06, "loss": 0.9087, "step": 17701 }, { "epoch": 0.6415860244282556, "grad_norm": 2.485577518080054, "learning_rate": 3.0070934593845746e-06, "loss": 0.859, "step": 17702 }, { "epoch": 0.6416222681309123, "grad_norm": 2.2447375333190664, "learning_rate": 3.006555181763141e-06, "loss": 1.0231, "step": 17703 }, { "epoch": 0.6416585118335689, "grad_norm": 2.441747639875709, "learning_rate": 3.006016931610174e-06, "loss": 0.7923, "step": 17704 }, { "epoch": 0.6416947555362256, "grad_norm": 2.291114502473376, "learning_rate": 3.0054787089330874e-06, "loss": 0.8767, "step": 17705 }, { "epoch": 0.6417309992388822, "grad_norm": 2.4594364461255784, "learning_rate": 3.0049405137392973e-06, "loss": 1.0068, "step": 17706 }, { "epoch": 0.6417672429415389, "grad_norm": 2.126492640538611, "learning_rate": 3.0044023460362198e-06, "loss": 0.7811, "step": 17707 }, { "epoch": 0.6418034866441956, "grad_norm": 2.2693378645084756, "learning_rate": 3.0038642058312724e-06, "loss": 0.9751, "step": 17708 }, { "epoch": 0.6418397303468523, "grad_norm": 2.423256456162548, "learning_rate": 3.0033260931318684e-06, "loss": 1.0453, "step": 17709 }, { "epoch": 0.6418759740495089, "grad_norm": 2.0760014014091563, "learning_rate": 3.0027880079454235e-06, "loss": 0.9694, "step": 17710 }, { "epoch": 0.6419122177521656, "grad_norm": 2.251267295350213, "learning_rate": 3.00224995027935e-06, "loss": 1.071, "step": 17711 }, { "epoch": 0.6419484614548222, "grad_norm": 2.122173753037882, "learning_rate": 3.001711920141066e-06, "loss": 0.9015, "step": 17712 }, { "epoch": 0.6419847051574789, "grad_norm": 2.481524999505068, "learning_rate": 3.001173917537984e-06, "loss": 0.8324, "step": 17713 }, { "epoch": 0.6420209488601355, "grad_norm": 2.397646609764263, "learning_rate": 3.0006359424775155e-06, "loss": 0.7839, "step": 17714 }, { "epoch": 0.6420571925627923, "grad_norm": 2.2156152241393903, "learning_rate": 3.0000979949670728e-06, "loss": 1.0754, "step": 17715 }, { "epoch": 0.6420934362654489, "grad_norm": 2.0943286872623665, "learning_rate": 2.9995600750140723e-06, "loss": 0.8155, "step": 17716 }, { "epoch": 0.6421296799681055, "grad_norm": 2.461474365507442, "learning_rate": 2.9990221826259245e-06, "loss": 0.8852, "step": 17717 }, { "epoch": 0.6421659236707622, "grad_norm": 2.4812721698182343, "learning_rate": 2.9984843178100404e-06, "loss": 0.9383, "step": 17718 }, { "epoch": 0.6422021673734188, "grad_norm": 2.3227987444058265, "learning_rate": 2.997946480573831e-06, "loss": 0.8899, "step": 17719 }, { "epoch": 0.6422384110760755, "grad_norm": 2.5243294671584238, "learning_rate": 2.9974086709247095e-06, "loss": 0.8865, "step": 17720 }, { "epoch": 0.6422746547787322, "grad_norm": 2.650444559304005, "learning_rate": 2.996870888870085e-06, "loss": 0.8378, "step": 17721 }, { "epoch": 0.6423108984813889, "grad_norm": 1.990042447526962, "learning_rate": 2.9963331344173684e-06, "loss": 0.8368, "step": 17722 }, { "epoch": 0.6423471421840455, "grad_norm": 2.414156783974252, "learning_rate": 2.9957954075739675e-06, "loss": 0.926, "step": 17723 }, { "epoch": 0.6423833858867022, "grad_norm": 2.650236380726556, "learning_rate": 2.9952577083472956e-06, "loss": 0.8044, "step": 17724 }, { "epoch": 0.6424196295893588, "grad_norm": 2.1494374297478225, "learning_rate": 2.9947200367447602e-06, "loss": 0.8091, "step": 17725 }, { "epoch": 0.6424558732920155, "grad_norm": 2.4776307698433455, "learning_rate": 2.9941823927737702e-06, "loss": 0.9248, "step": 17726 }, { "epoch": 0.6424921169946721, "grad_norm": 2.5880903494139123, "learning_rate": 2.9936447764417326e-06, "loss": 0.9743, "step": 17727 }, { "epoch": 0.6425283606973289, "grad_norm": 2.505962923639217, "learning_rate": 2.9931071877560575e-06, "loss": 0.9439, "step": 17728 }, { "epoch": 0.6425646043999855, "grad_norm": 2.4604149208886628, "learning_rate": 2.9925696267241514e-06, "loss": 0.8649, "step": 17729 }, { "epoch": 0.6426008481026422, "grad_norm": 2.0818261370886777, "learning_rate": 2.992032093353422e-06, "loss": 0.9585, "step": 17730 }, { "epoch": 0.6426370918052988, "grad_norm": 2.379146912040723, "learning_rate": 2.991494587651274e-06, "loss": 0.921, "step": 17731 }, { "epoch": 0.6426733355079555, "grad_norm": 2.596268312365615, "learning_rate": 2.990957109625118e-06, "loss": 0.8778, "step": 17732 }, { "epoch": 0.6427095792106121, "grad_norm": 2.2505282537747378, "learning_rate": 2.9904196592823576e-06, "loss": 0.846, "step": 17733 }, { "epoch": 0.6427458229132689, "grad_norm": 2.5020193961997768, "learning_rate": 2.989882236630399e-06, "loss": 0.8237, "step": 17734 }, { "epoch": 0.6427820666159255, "grad_norm": 2.336972320369212, "learning_rate": 2.9893448416766457e-06, "loss": 1.0384, "step": 17735 }, { "epoch": 0.6428183103185822, "grad_norm": 2.458714675935041, "learning_rate": 2.9888074744285066e-06, "loss": 0.878, "step": 17736 }, { "epoch": 0.6428545540212388, "grad_norm": 2.380434737459647, "learning_rate": 2.988270134893384e-06, "loss": 0.8376, "step": 17737 }, { "epoch": 0.6428907977238955, "grad_norm": 2.188774608698968, "learning_rate": 2.987732823078682e-06, "loss": 0.8551, "step": 17738 }, { "epoch": 0.6429270414265521, "grad_norm": 2.2509727294442086, "learning_rate": 2.9871955389918045e-06, "loss": 0.8781, "step": 17739 }, { "epoch": 0.6429632851292088, "grad_norm": 2.225093548991448, "learning_rate": 2.986658282640156e-06, "loss": 0.8618, "step": 17740 }, { "epoch": 0.6429995288318655, "grad_norm": 2.347928027884136, "learning_rate": 2.9861210540311382e-06, "loss": 1.0409, "step": 17741 }, { "epoch": 0.6430357725345222, "grad_norm": 2.590421700169694, "learning_rate": 2.9855838531721544e-06, "loss": 0.8267, "step": 17742 }, { "epoch": 0.6430720162371788, "grad_norm": 2.4072662542351, "learning_rate": 2.9850466800706046e-06, "loss": 0.7774, "step": 17743 }, { "epoch": 0.6431082599398354, "grad_norm": 2.3882743502793096, "learning_rate": 2.984509534733896e-06, "loss": 0.9883, "step": 17744 }, { "epoch": 0.6431445036424921, "grad_norm": 2.278905608095085, "learning_rate": 2.983972417169426e-06, "loss": 0.8848, "step": 17745 }, { "epoch": 0.6431807473451487, "grad_norm": 2.376970001439061, "learning_rate": 2.9834353273845975e-06, "loss": 0.9707, "step": 17746 }, { "epoch": 0.6432169910478055, "grad_norm": 2.2045930979945525, "learning_rate": 2.982898265386809e-06, "loss": 1.0158, "step": 17747 }, { "epoch": 0.6432532347504621, "grad_norm": 2.361052610133263, "learning_rate": 2.9823612311834636e-06, "loss": 0.7926, "step": 17748 }, { "epoch": 0.6432894784531188, "grad_norm": 2.4040581885120833, "learning_rate": 2.9818242247819607e-06, "loss": 0.9709, "step": 17749 }, { "epoch": 0.6433257221557754, "grad_norm": 2.226938415125913, "learning_rate": 2.981287246189697e-06, "loss": 0.7451, "step": 17750 }, { "epoch": 0.6433619658584321, "grad_norm": 2.54060237814448, "learning_rate": 2.9807502954140764e-06, "loss": 0.7807, "step": 17751 }, { "epoch": 0.6433982095610887, "grad_norm": 2.399190583564344, "learning_rate": 2.9802133724624953e-06, "loss": 1.1033, "step": 17752 }, { "epoch": 0.6434344532637454, "grad_norm": 2.304759313354681, "learning_rate": 2.9796764773423527e-06, "loss": 0.8414, "step": 17753 }, { "epoch": 0.6434706969664021, "grad_norm": 2.2904777886048566, "learning_rate": 2.9791396100610443e-06, "loss": 1.0134, "step": 17754 }, { "epoch": 0.6435069406690588, "grad_norm": 2.390151097816224, "learning_rate": 2.978602770625972e-06, "loss": 0.9652, "step": 17755 }, { "epoch": 0.6435431843717154, "grad_norm": 2.2270274358761073, "learning_rate": 2.9780659590445304e-06, "loss": 0.8832, "step": 17756 }, { "epoch": 0.6435794280743721, "grad_norm": 2.1171736279595765, "learning_rate": 2.9775291753241175e-06, "loss": 0.8394, "step": 17757 }, { "epoch": 0.6436156717770287, "grad_norm": 2.3350223358418343, "learning_rate": 2.9769924194721287e-06, "loss": 0.912, "step": 17758 }, { "epoch": 0.6436519154796854, "grad_norm": 2.1002655473004572, "learning_rate": 2.976455691495962e-06, "loss": 0.8938, "step": 17759 }, { "epoch": 0.6436881591823421, "grad_norm": 2.1821878617156445, "learning_rate": 2.9759189914030116e-06, "loss": 0.6751, "step": 17760 }, { "epoch": 0.6437244028849988, "grad_norm": 2.4697585474849078, "learning_rate": 2.975382319200674e-06, "loss": 0.869, "step": 17761 }, { "epoch": 0.6437606465876554, "grad_norm": 2.8065773171430326, "learning_rate": 2.9748456748963406e-06, "loss": 0.8844, "step": 17762 }, { "epoch": 0.6437968902903121, "grad_norm": 2.5207060482893344, "learning_rate": 2.974309058497411e-06, "loss": 0.9949, "step": 17763 }, { "epoch": 0.6438331339929687, "grad_norm": 2.396036923517405, "learning_rate": 2.973772470011278e-06, "loss": 0.9086, "step": 17764 }, { "epoch": 0.6438693776956254, "grad_norm": 2.1873537638805285, "learning_rate": 2.973235909445335e-06, "loss": 0.8398, "step": 17765 }, { "epoch": 0.643905621398282, "grad_norm": 2.4300525846229535, "learning_rate": 2.9726993768069744e-06, "loss": 0.7601, "step": 17766 }, { "epoch": 0.6439418651009388, "grad_norm": 2.309946535453891, "learning_rate": 2.972162872103591e-06, "loss": 0.8267, "step": 17767 }, { "epoch": 0.6439781088035954, "grad_norm": 2.6775739638781832, "learning_rate": 2.971626395342577e-06, "loss": 0.8585, "step": 17768 }, { "epoch": 0.644014352506252, "grad_norm": 2.0824882668226143, "learning_rate": 2.9710899465313247e-06, "loss": 0.9085, "step": 17769 }, { "epoch": 0.6440505962089087, "grad_norm": 2.2231165380270115, "learning_rate": 2.9705535256772243e-06, "loss": 0.8068, "step": 17770 }, { "epoch": 0.6440868399115653, "grad_norm": 2.5444661938874384, "learning_rate": 2.97001713278767e-06, "loss": 0.7225, "step": 17771 }, { "epoch": 0.644123083614222, "grad_norm": 2.3357423237646078, "learning_rate": 2.9694807678700527e-06, "loss": 0.9873, "step": 17772 }, { "epoch": 0.6441593273168786, "grad_norm": 2.1722866055000254, "learning_rate": 2.9689444309317617e-06, "loss": 0.5703, "step": 17773 }, { "epoch": 0.6441955710195354, "grad_norm": 2.338509845435582, "learning_rate": 2.9684081219801865e-06, "loss": 0.9836, "step": 17774 }, { "epoch": 0.644231814722192, "grad_norm": 2.4800833735808907, "learning_rate": 2.967871841022721e-06, "loss": 0.8032, "step": 17775 }, { "epoch": 0.6442680584248487, "grad_norm": 2.64790998281402, "learning_rate": 2.967335588066752e-06, "loss": 0.8579, "step": 17776 }, { "epoch": 0.6443043021275053, "grad_norm": 2.3487832542593976, "learning_rate": 2.9667993631196697e-06, "loss": 1.0277, "step": 17777 }, { "epoch": 0.644340545830162, "grad_norm": 2.255595062079616, "learning_rate": 2.9662631661888607e-06, "loss": 1.0013, "step": 17778 }, { "epoch": 0.6443767895328186, "grad_norm": 2.3507483635941773, "learning_rate": 2.965726997281717e-06, "loss": 0.7427, "step": 17779 }, { "epoch": 0.6444130332354754, "grad_norm": 2.4330298759278803, "learning_rate": 2.9651908564056253e-06, "loss": 0.933, "step": 17780 }, { "epoch": 0.644449276938132, "grad_norm": 2.271030360521303, "learning_rate": 2.964654743567973e-06, "loss": 0.9271, "step": 17781 }, { "epoch": 0.6444855206407887, "grad_norm": 2.5177496927005913, "learning_rate": 2.964118658776145e-06, "loss": 0.9707, "step": 17782 }, { "epoch": 0.6445217643434453, "grad_norm": 2.5075185124737787, "learning_rate": 2.9635826020375325e-06, "loss": 0.7517, "step": 17783 }, { "epoch": 0.644558008046102, "grad_norm": 2.4740763049813626, "learning_rate": 2.9630465733595203e-06, "loss": 0.9323, "step": 17784 }, { "epoch": 0.6445942517487586, "grad_norm": 2.522814064733762, "learning_rate": 2.962510572749494e-06, "loss": 0.9164, "step": 17785 }, { "epoch": 0.6446304954514153, "grad_norm": 2.2090460977544866, "learning_rate": 2.9619746002148397e-06, "loss": 1.0089, "step": 17786 }, { "epoch": 0.644666739154072, "grad_norm": 2.5759653656052497, "learning_rate": 2.9614386557629428e-06, "loss": 0.8254, "step": 17787 }, { "epoch": 0.6447029828567287, "grad_norm": 2.542603822091738, "learning_rate": 2.9609027394011893e-06, "loss": 1.0642, "step": 17788 }, { "epoch": 0.6447392265593853, "grad_norm": 2.547987350641105, "learning_rate": 2.960366851136962e-06, "loss": 0.9863, "step": 17789 }, { "epoch": 0.644775470262042, "grad_norm": 2.2164306180878426, "learning_rate": 2.9598309909776446e-06, "loss": 0.6366, "step": 17790 }, { "epoch": 0.6448117139646986, "grad_norm": 2.444191885535023, "learning_rate": 2.959295158930624e-06, "loss": 1.0619, "step": 17791 }, { "epoch": 0.6448479576673553, "grad_norm": 2.270603210633918, "learning_rate": 2.9587593550032822e-06, "loss": 0.9407, "step": 17792 }, { "epoch": 0.644884201370012, "grad_norm": 2.2012818625441217, "learning_rate": 2.958223579203001e-06, "loss": 0.8372, "step": 17793 }, { "epoch": 0.6449204450726687, "grad_norm": 2.4324991591718454, "learning_rate": 2.9576878315371642e-06, "loss": 0.984, "step": 17794 }, { "epoch": 0.6449566887753253, "grad_norm": 2.1323286881028976, "learning_rate": 2.9571521120131546e-06, "loss": 0.9166, "step": 17795 }, { "epoch": 0.644992932477982, "grad_norm": 2.3482612420350635, "learning_rate": 2.9566164206383537e-06, "loss": 0.9475, "step": 17796 }, { "epoch": 0.6450291761806386, "grad_norm": 2.4827309488920775, "learning_rate": 2.9560807574201422e-06, "loss": 0.8368, "step": 17797 }, { "epoch": 0.6450654198832952, "grad_norm": 2.3309107909903433, "learning_rate": 2.9555451223659e-06, "loss": 0.7943, "step": 17798 }, { "epoch": 0.6451016635859519, "grad_norm": 2.480380392047601, "learning_rate": 2.9550095154830116e-06, "loss": 0.8759, "step": 17799 }, { "epoch": 0.6451379072886086, "grad_norm": 2.288245265507353, "learning_rate": 2.954473936778855e-06, "loss": 0.8324, "step": 17800 }, { "epoch": 0.6451741509912653, "grad_norm": 2.378889748019184, "learning_rate": 2.9539383862608107e-06, "loss": 0.7787, "step": 17801 }, { "epoch": 0.6452103946939219, "grad_norm": 2.0448916466306746, "learning_rate": 2.953402863936256e-06, "loss": 0.7798, "step": 17802 }, { "epoch": 0.6452466383965786, "grad_norm": 2.298348657038042, "learning_rate": 2.952867369812574e-06, "loss": 0.99, "step": 17803 }, { "epoch": 0.6452828820992352, "grad_norm": 2.2653187190660167, "learning_rate": 2.952331903897142e-06, "loss": 0.8203, "step": 17804 }, { "epoch": 0.6453191258018919, "grad_norm": 2.097123590664027, "learning_rate": 2.9517964661973374e-06, "loss": 1.0753, "step": 17805 }, { "epoch": 0.6453553695045486, "grad_norm": 2.083211122431055, "learning_rate": 2.951261056720538e-06, "loss": 0.9387, "step": 17806 }, { "epoch": 0.6453916132072053, "grad_norm": 2.298308510007183, "learning_rate": 2.9507256754741233e-06, "loss": 0.8723, "step": 17807 }, { "epoch": 0.6454278569098619, "grad_norm": 2.493820034934536, "learning_rate": 2.950190322465469e-06, "loss": 0.9385, "step": 17808 }, { "epoch": 0.6454641006125186, "grad_norm": 2.335640612277894, "learning_rate": 2.9496549977019527e-06, "loss": 0.824, "step": 17809 }, { "epoch": 0.6455003443151752, "grad_norm": 2.26772824433841, "learning_rate": 2.949119701190949e-06, "loss": 1.035, "step": 17810 }, { "epoch": 0.6455365880178319, "grad_norm": 2.8280661615576617, "learning_rate": 2.948584432939837e-06, "loss": 0.8712, "step": 17811 }, { "epoch": 0.6455728317204885, "grad_norm": 2.254211896668291, "learning_rate": 2.9480491929559907e-06, "loss": 1.1203, "step": 17812 }, { "epoch": 0.6456090754231453, "grad_norm": 2.007389324931113, "learning_rate": 2.9475139812467855e-06, "loss": 0.8512, "step": 17813 }, { "epoch": 0.6456453191258019, "grad_norm": 2.8314635272575317, "learning_rate": 2.946978797819596e-06, "loss": 0.8986, "step": 17814 }, { "epoch": 0.6456815628284586, "grad_norm": 2.159293218048549, "learning_rate": 2.9464436426817976e-06, "loss": 0.8332, "step": 17815 }, { "epoch": 0.6457178065311152, "grad_norm": 2.1983782465678905, "learning_rate": 2.9459085158407637e-06, "loss": 0.8014, "step": 17816 }, { "epoch": 0.6457540502337719, "grad_norm": 2.33189847471048, "learning_rate": 2.9453734173038682e-06, "loss": 0.7751, "step": 17817 }, { "epoch": 0.6457902939364285, "grad_norm": 2.348656350128371, "learning_rate": 2.9448383470784823e-06, "loss": 0.941, "step": 17818 }, { "epoch": 0.6458265376390853, "grad_norm": 2.603829635288377, "learning_rate": 2.9443033051719836e-06, "loss": 0.8334, "step": 17819 }, { "epoch": 0.6458627813417419, "grad_norm": 2.3783687901204322, "learning_rate": 2.9437682915917422e-06, "loss": 0.8358, "step": 17820 }, { "epoch": 0.6458990250443986, "grad_norm": 2.1423148834320047, "learning_rate": 2.943233306345129e-06, "loss": 0.8641, "step": 17821 }, { "epoch": 0.6459352687470552, "grad_norm": 2.3357267491924967, "learning_rate": 2.942698349439516e-06, "loss": 0.9681, "step": 17822 }, { "epoch": 0.6459715124497118, "grad_norm": 2.6668921300413713, "learning_rate": 2.9421634208822774e-06, "loss": 0.9961, "step": 17823 }, { "epoch": 0.6460077561523685, "grad_norm": 2.573571385167942, "learning_rate": 2.941628520680782e-06, "loss": 0.9496, "step": 17824 }, { "epoch": 0.6460439998550251, "grad_norm": 2.2342116020941614, "learning_rate": 2.9410936488424013e-06, "loss": 0.9368, "step": 17825 }, { "epoch": 0.6460802435576819, "grad_norm": 2.393651754571073, "learning_rate": 2.940558805374503e-06, "loss": 0.8841, "step": 17826 }, { "epoch": 0.6461164872603385, "grad_norm": 2.759369508676603, "learning_rate": 2.9400239902844606e-06, "loss": 0.9515, "step": 17827 }, { "epoch": 0.6461527309629952, "grad_norm": 2.1676655009798567, "learning_rate": 2.939489203579642e-06, "loss": 0.8461, "step": 17828 }, { "epoch": 0.6461889746656518, "grad_norm": 2.6147093564742536, "learning_rate": 2.938954445267415e-06, "loss": 0.9561, "step": 17829 }, { "epoch": 0.6462252183683085, "grad_norm": 2.3131621427637983, "learning_rate": 2.938419715355148e-06, "loss": 0.8738, "step": 17830 }, { "epoch": 0.6462614620709651, "grad_norm": 2.4581827331936292, "learning_rate": 2.9378850138502123e-06, "loss": 0.9636, "step": 17831 }, { "epoch": 0.6462977057736218, "grad_norm": 2.52501269963927, "learning_rate": 2.937350340759974e-06, "loss": 1.0014, "step": 17832 }, { "epoch": 0.6463339494762785, "grad_norm": 2.2767385213155618, "learning_rate": 2.9368156960917995e-06, "loss": 0.9885, "step": 17833 }, { "epoch": 0.6463701931789352, "grad_norm": 2.840197109692988, "learning_rate": 2.936281079853058e-06, "loss": 0.9234, "step": 17834 }, { "epoch": 0.6464064368815918, "grad_norm": 2.218946563845747, "learning_rate": 2.9357464920511153e-06, "loss": 0.9506, "step": 17835 }, { "epoch": 0.6464426805842485, "grad_norm": 2.57026641275365, "learning_rate": 2.935211932693337e-06, "loss": 0.9882, "step": 17836 }, { "epoch": 0.6464789242869051, "grad_norm": 2.564569338898336, "learning_rate": 2.934677401787088e-06, "loss": 0.7758, "step": 17837 }, { "epoch": 0.6465151679895618, "grad_norm": 2.4995120283043835, "learning_rate": 2.9341428993397376e-06, "loss": 0.8214, "step": 17838 }, { "epoch": 0.6465514116922185, "grad_norm": 2.2411645033999643, "learning_rate": 2.9336084253586483e-06, "loss": 0.962, "step": 17839 }, { "epoch": 0.6465876553948752, "grad_norm": 1.9789298117074394, "learning_rate": 2.933073979851185e-06, "loss": 0.9666, "step": 17840 }, { "epoch": 0.6466238990975318, "grad_norm": 2.2745446468435273, "learning_rate": 2.9325395628247107e-06, "loss": 0.7823, "step": 17841 }, { "epoch": 0.6466601428001885, "grad_norm": 2.0189294859774174, "learning_rate": 2.9320051742865923e-06, "loss": 0.7819, "step": 17842 }, { "epoch": 0.6466963865028451, "grad_norm": 2.413982824894462, "learning_rate": 2.9314708142441924e-06, "loss": 0.9598, "step": 17843 }, { "epoch": 0.6467326302055018, "grad_norm": 2.522897708879856, "learning_rate": 2.9309364827048735e-06, "loss": 0.9422, "step": 17844 }, { "epoch": 0.6467688739081584, "grad_norm": 2.0539258408754453, "learning_rate": 2.9304021796759974e-06, "loss": 0.8581, "step": 17845 }, { "epoch": 0.6468051176108152, "grad_norm": 2.2252666487121258, "learning_rate": 2.929867905164928e-06, "loss": 0.8626, "step": 17846 }, { "epoch": 0.6468413613134718, "grad_norm": 2.3812193123557908, "learning_rate": 2.929333659179028e-06, "loss": 0.7121, "step": 17847 }, { "epoch": 0.6468776050161285, "grad_norm": 2.211616725044073, "learning_rate": 2.9287994417256576e-06, "loss": 0.9455, "step": 17848 }, { "epoch": 0.6469138487187851, "grad_norm": 2.3526120555662122, "learning_rate": 2.9282652528121756e-06, "loss": 1.0238, "step": 17849 }, { "epoch": 0.6469500924214417, "grad_norm": 2.1114285788497136, "learning_rate": 2.9277310924459485e-06, "loss": 0.8256, "step": 17850 }, { "epoch": 0.6469863361240984, "grad_norm": 2.2575892284620216, "learning_rate": 2.9271969606343333e-06, "loss": 0.7618, "step": 17851 }, { "epoch": 0.6470225798267552, "grad_norm": 2.4896451870839127, "learning_rate": 2.92666285738469e-06, "loss": 0.8474, "step": 17852 }, { "epoch": 0.6470588235294118, "grad_norm": 2.190887770669232, "learning_rate": 2.9261287827043785e-06, "loss": 0.8913, "step": 17853 }, { "epoch": 0.6470950672320684, "grad_norm": 2.505911386618953, "learning_rate": 2.925594736600758e-06, "loss": 0.7629, "step": 17854 }, { "epoch": 0.6471313109347251, "grad_norm": 2.27742552423562, "learning_rate": 2.925060719081188e-06, "loss": 0.9551, "step": 17855 }, { "epoch": 0.6471675546373817, "grad_norm": 2.0598569511567044, "learning_rate": 2.9245267301530266e-06, "loss": 0.876, "step": 17856 }, { "epoch": 0.6472037983400384, "grad_norm": 2.0877636472767844, "learning_rate": 2.9239927698236292e-06, "loss": 0.9678, "step": 17857 }, { "epoch": 0.647240042042695, "grad_norm": 2.4919015044659223, "learning_rate": 2.923458838100358e-06, "loss": 0.9727, "step": 17858 }, { "epoch": 0.6472762857453518, "grad_norm": 2.158224623729598, "learning_rate": 2.9229249349905686e-06, "loss": 0.997, "step": 17859 }, { "epoch": 0.6473125294480084, "grad_norm": 2.7056568682071136, "learning_rate": 2.922391060501617e-06, "loss": 1.1152, "step": 17860 }, { "epoch": 0.6473487731506651, "grad_norm": 2.166219250318929, "learning_rate": 2.9218572146408587e-06, "loss": 0.9956, "step": 17861 }, { "epoch": 0.6473850168533217, "grad_norm": 2.325730293740249, "learning_rate": 2.9213233974156526e-06, "loss": 0.9474, "step": 17862 }, { "epoch": 0.6474212605559784, "grad_norm": 2.4347806185221565, "learning_rate": 2.9207896088333526e-06, "loss": 0.8978, "step": 17863 }, { "epoch": 0.647457504258635, "grad_norm": 2.4166378086981557, "learning_rate": 2.9202558489013148e-06, "loss": 0.9004, "step": 17864 }, { "epoch": 0.6474937479612918, "grad_norm": 2.444884354510309, "learning_rate": 2.9197221176268923e-06, "loss": 0.9571, "step": 17865 }, { "epoch": 0.6475299916639484, "grad_norm": 2.3064782432217417, "learning_rate": 2.9191884150174416e-06, "loss": 0.986, "step": 17866 }, { "epoch": 0.6475662353666051, "grad_norm": 2.5625804925461786, "learning_rate": 2.918654741080318e-06, "loss": 0.9083, "step": 17867 }, { "epoch": 0.6476024790692617, "grad_norm": 2.6528424389313114, "learning_rate": 2.9181210958228705e-06, "loss": 0.9723, "step": 17868 }, { "epoch": 0.6476387227719184, "grad_norm": 2.3303613953703928, "learning_rate": 2.9175874792524563e-06, "loss": 0.6622, "step": 17869 }, { "epoch": 0.647674966474575, "grad_norm": 2.2275178916353218, "learning_rate": 2.917053891376428e-06, "loss": 0.8953, "step": 17870 }, { "epoch": 0.6477112101772317, "grad_norm": 2.3082384248596584, "learning_rate": 2.9165203322021364e-06, "loss": 0.9456, "step": 17871 }, { "epoch": 0.6477474538798884, "grad_norm": 2.5564546287649192, "learning_rate": 2.915986801736936e-06, "loss": 0.8836, "step": 17872 }, { "epoch": 0.6477836975825451, "grad_norm": 2.2820623504712403, "learning_rate": 2.9154532999881736e-06, "loss": 0.7757, "step": 17873 }, { "epoch": 0.6478199412852017, "grad_norm": 2.249838031382187, "learning_rate": 2.9149198269632073e-06, "loss": 0.8879, "step": 17874 }, { "epoch": 0.6478561849878584, "grad_norm": 2.323336581622845, "learning_rate": 2.914386382669383e-06, "loss": 0.8587, "step": 17875 }, { "epoch": 0.647892428690515, "grad_norm": 2.3064011458348155, "learning_rate": 2.9138529671140554e-06, "loss": 0.9718, "step": 17876 }, { "epoch": 0.6479286723931716, "grad_norm": 2.3169407046420036, "learning_rate": 2.9133195803045698e-06, "loss": 0.816, "step": 17877 }, { "epoch": 0.6479649160958284, "grad_norm": 2.7827298886847243, "learning_rate": 2.912786222248278e-06, "loss": 1.0973, "step": 17878 }, { "epoch": 0.648001159798485, "grad_norm": 2.1944955058480713, "learning_rate": 2.912252892952533e-06, "loss": 0.7075, "step": 17879 }, { "epoch": 0.6480374035011417, "grad_norm": 2.3536121678860233, "learning_rate": 2.9117195924246777e-06, "loss": 1.0035, "step": 17880 }, { "epoch": 0.6480736472037983, "grad_norm": 2.313711830668813, "learning_rate": 2.9111863206720636e-06, "loss": 0.8069, "step": 17881 }, { "epoch": 0.648109890906455, "grad_norm": 2.262239814627785, "learning_rate": 2.9106530777020394e-06, "loss": 0.7523, "step": 17882 }, { "epoch": 0.6481461346091116, "grad_norm": 2.4833729037144803, "learning_rate": 2.9101198635219528e-06, "loss": 0.9664, "step": 17883 }, { "epoch": 0.6481823783117683, "grad_norm": 2.51944962225739, "learning_rate": 2.9095866781391504e-06, "loss": 0.9511, "step": 17884 }, { "epoch": 0.648218622014425, "grad_norm": 2.495263733028347, "learning_rate": 2.9090535215609777e-06, "loss": 0.9007, "step": 17885 }, { "epoch": 0.6482548657170817, "grad_norm": 2.4454856282673623, "learning_rate": 2.9085203937947837e-06, "loss": 0.9983, "step": 17886 }, { "epoch": 0.6482911094197383, "grad_norm": 2.2322107863854472, "learning_rate": 2.9079872948479154e-06, "loss": 0.8292, "step": 17887 }, { "epoch": 0.648327353122395, "grad_norm": 2.612889588752398, "learning_rate": 2.907454224727715e-06, "loss": 0.8699, "step": 17888 }, { "epoch": 0.6483635968250516, "grad_norm": 2.3217011841065056, "learning_rate": 2.9069211834415303e-06, "loss": 1.0232, "step": 17889 }, { "epoch": 0.6483998405277083, "grad_norm": 2.191316431653268, "learning_rate": 2.9063881709967074e-06, "loss": 0.9923, "step": 17890 }, { "epoch": 0.648436084230365, "grad_norm": 2.179115250004031, "learning_rate": 2.905855187400587e-06, "loss": 0.852, "step": 17891 }, { "epoch": 0.6484723279330217, "grad_norm": 2.490852032832229, "learning_rate": 2.9053222326605185e-06, "loss": 0.7445, "step": 17892 }, { "epoch": 0.6485085716356783, "grad_norm": 2.387606160228, "learning_rate": 2.9047893067838384e-06, "loss": 0.9599, "step": 17893 }, { "epoch": 0.648544815338335, "grad_norm": 2.408844038870874, "learning_rate": 2.904256409777898e-06, "loss": 0.8635, "step": 17894 }, { "epoch": 0.6485810590409916, "grad_norm": 2.2561984421955517, "learning_rate": 2.9037235416500355e-06, "loss": 0.8569, "step": 17895 }, { "epoch": 0.6486173027436483, "grad_norm": 2.562619012904613, "learning_rate": 2.903190702407597e-06, "loss": 0.965, "step": 17896 }, { "epoch": 0.6486535464463049, "grad_norm": 2.5083333092324933, "learning_rate": 2.9026578920579196e-06, "loss": 0.9364, "step": 17897 }, { "epoch": 0.6486897901489617, "grad_norm": 2.450499110733964, "learning_rate": 2.9021251106083488e-06, "loss": 0.8291, "step": 17898 }, { "epoch": 0.6487260338516183, "grad_norm": 2.574471829901058, "learning_rate": 2.901592358066227e-06, "loss": 0.9486, "step": 17899 }, { "epoch": 0.648762277554275, "grad_norm": 2.4366099439380013, "learning_rate": 2.9010596344388913e-06, "loss": 1.0036, "step": 17900 }, { "epoch": 0.6487985212569316, "grad_norm": 2.402402950878885, "learning_rate": 2.9005269397336843e-06, "loss": 1.013, "step": 17901 }, { "epoch": 0.6488347649595883, "grad_norm": 2.2647371859930563, "learning_rate": 2.899994273957947e-06, "loss": 1.0371, "step": 17902 }, { "epoch": 0.6488710086622449, "grad_norm": 2.8114876966515094, "learning_rate": 2.89946163711902e-06, "loss": 0.9008, "step": 17903 }, { "epoch": 0.6489072523649015, "grad_norm": 2.310402576299019, "learning_rate": 2.8989290292242393e-06, "loss": 0.8023, "step": 17904 }, { "epoch": 0.6489434960675583, "grad_norm": 2.3174834141806118, "learning_rate": 2.8983964502809457e-06, "loss": 0.9427, "step": 17905 }, { "epoch": 0.648979739770215, "grad_norm": 2.5079154981549636, "learning_rate": 2.8978639002964786e-06, "loss": 0.8198, "step": 17906 }, { "epoch": 0.6490159834728716, "grad_norm": 2.2234073642990957, "learning_rate": 2.8973313792781777e-06, "loss": 0.8829, "step": 17907 }, { "epoch": 0.6490522271755282, "grad_norm": 2.270728984318469, "learning_rate": 2.8967988872333765e-06, "loss": 0.7306, "step": 17908 }, { "epoch": 0.6490884708781849, "grad_norm": 2.385289291840124, "learning_rate": 2.8962664241694137e-06, "loss": 0.8701, "step": 17909 }, { "epoch": 0.6491247145808415, "grad_norm": 2.233055978644076, "learning_rate": 2.8957339900936305e-06, "loss": 0.8294, "step": 17910 }, { "epoch": 0.6491609582834983, "grad_norm": 2.744794187611375, "learning_rate": 2.895201585013358e-06, "loss": 0.8129, "step": 17911 }, { "epoch": 0.6491972019861549, "grad_norm": 2.353058869741665, "learning_rate": 2.8946692089359347e-06, "loss": 0.7423, "step": 17912 }, { "epoch": 0.6492334456888116, "grad_norm": 3.104784414377025, "learning_rate": 2.894136861868696e-06, "loss": 0.9407, "step": 17913 }, { "epoch": 0.6492696893914682, "grad_norm": 2.4733727817824227, "learning_rate": 2.8936045438189803e-06, "loss": 0.8851, "step": 17914 }, { "epoch": 0.6493059330941249, "grad_norm": 2.155288857113467, "learning_rate": 2.893072254794118e-06, "loss": 0.8592, "step": 17915 }, { "epoch": 0.6493421767967815, "grad_norm": 2.361068324256446, "learning_rate": 2.8925399948014455e-06, "loss": 0.7341, "step": 17916 }, { "epoch": 0.6493784204994382, "grad_norm": 2.291103375110661, "learning_rate": 2.8920077638482996e-06, "loss": 0.8568, "step": 17917 }, { "epoch": 0.6494146642020949, "grad_norm": 2.528836597162472, "learning_rate": 2.89147556194201e-06, "loss": 0.7606, "step": 17918 }, { "epoch": 0.6494509079047516, "grad_norm": 2.5114680816894843, "learning_rate": 2.8909433890899137e-06, "loss": 0.9226, "step": 17919 }, { "epoch": 0.6494871516074082, "grad_norm": 2.266302108152279, "learning_rate": 2.8904112452993375e-06, "loss": 1.0366, "step": 17920 }, { "epoch": 0.6495233953100649, "grad_norm": 2.4356862519830766, "learning_rate": 2.889879130577623e-06, "loss": 0.8291, "step": 17921 }, { "epoch": 0.6495596390127215, "grad_norm": 2.523186504139705, "learning_rate": 2.889347044932096e-06, "loss": 0.995, "step": 17922 }, { "epoch": 0.6495958827153782, "grad_norm": 2.1314602066178012, "learning_rate": 2.8888149883700923e-06, "loss": 0.8067, "step": 17923 }, { "epoch": 0.6496321264180349, "grad_norm": 2.257281418510868, "learning_rate": 2.8882829608989395e-06, "loss": 0.7582, "step": 17924 }, { "epoch": 0.6496683701206916, "grad_norm": 2.5107646005506625, "learning_rate": 2.8877509625259697e-06, "loss": 0.8437, "step": 17925 }, { "epoch": 0.6497046138233482, "grad_norm": 2.1981038102449095, "learning_rate": 2.8872189932585164e-06, "loss": 0.8269, "step": 17926 }, { "epoch": 0.6497408575260049, "grad_norm": 2.6686579836696627, "learning_rate": 2.886687053103906e-06, "loss": 0.8432, "step": 17927 }, { "epoch": 0.6497771012286615, "grad_norm": 2.303849327126569, "learning_rate": 2.8861551420694695e-06, "loss": 0.7323, "step": 17928 }, { "epoch": 0.6498133449313181, "grad_norm": 2.425685675741822, "learning_rate": 2.8856232601625368e-06, "loss": 0.8124, "step": 17929 }, { "epoch": 0.6498495886339748, "grad_norm": 2.3721361854470704, "learning_rate": 2.885091407390439e-06, "loss": 0.9651, "step": 17930 }, { "epoch": 0.6498858323366316, "grad_norm": 2.124698608710564, "learning_rate": 2.8845595837605e-06, "loss": 0.8521, "step": 17931 }, { "epoch": 0.6499220760392882, "grad_norm": 2.3831075671351707, "learning_rate": 2.8840277892800513e-06, "loss": 0.8091, "step": 17932 }, { "epoch": 0.6499583197419448, "grad_norm": 2.2231267552610103, "learning_rate": 2.8834960239564193e-06, "loss": 0.8366, "step": 17933 }, { "epoch": 0.6499945634446015, "grad_norm": 2.5589297145797354, "learning_rate": 2.8829642877969342e-06, "loss": 0.9805, "step": 17934 }, { "epoch": 0.6500308071472581, "grad_norm": 2.4324536156887016, "learning_rate": 2.882432580808919e-06, "loss": 0.8445, "step": 17935 }, { "epoch": 0.6500670508499148, "grad_norm": 2.5949749386601484, "learning_rate": 2.8819009029997026e-06, "loss": 0.8577, "step": 17936 }, { "epoch": 0.6501032945525715, "grad_norm": 2.808017675700586, "learning_rate": 2.8813692543766126e-06, "loss": 0.949, "step": 17937 }, { "epoch": 0.6501395382552282, "grad_norm": 2.3252891810783782, "learning_rate": 2.880837634946971e-06, "loss": 0.8648, "step": 17938 }, { "epoch": 0.6501757819578848, "grad_norm": 2.45073487511118, "learning_rate": 2.880306044718107e-06, "loss": 0.8828, "step": 17939 }, { "epoch": 0.6502120256605415, "grad_norm": 2.4523634961120497, "learning_rate": 2.8797744836973396e-06, "loss": 0.8375, "step": 17940 }, { "epoch": 0.6502482693631981, "grad_norm": 2.482114037854136, "learning_rate": 2.879242951892002e-06, "loss": 0.9155, "step": 17941 }, { "epoch": 0.6502845130658548, "grad_norm": 2.4158512611285925, "learning_rate": 2.8787114493094114e-06, "loss": 0.8948, "step": 17942 }, { "epoch": 0.6503207567685114, "grad_norm": 1.9517020100420521, "learning_rate": 2.8781799759568962e-06, "loss": 0.7736, "step": 17943 }, { "epoch": 0.6503570004711682, "grad_norm": 2.0324422410862875, "learning_rate": 2.8776485318417756e-06, "loss": 0.6773, "step": 17944 }, { "epoch": 0.6503932441738248, "grad_norm": 2.2498910061358246, "learning_rate": 2.877117116971375e-06, "loss": 0.7746, "step": 17945 }, { "epoch": 0.6504294878764815, "grad_norm": 2.587425498680743, "learning_rate": 2.876585731353019e-06, "loss": 0.9353, "step": 17946 }, { "epoch": 0.6504657315791381, "grad_norm": 2.4203630395056854, "learning_rate": 2.8760543749940247e-06, "loss": 0.9422, "step": 17947 }, { "epoch": 0.6505019752817948, "grad_norm": 2.1421059710995283, "learning_rate": 2.8755230479017167e-06, "loss": 0.9638, "step": 17948 }, { "epoch": 0.6505382189844514, "grad_norm": 2.320525926804022, "learning_rate": 2.874991750083416e-06, "loss": 0.8486, "step": 17949 }, { "epoch": 0.6505744626871082, "grad_norm": 2.3895001222442565, "learning_rate": 2.8744604815464462e-06, "loss": 1.0533, "step": 17950 }, { "epoch": 0.6506107063897648, "grad_norm": 2.556664933485592, "learning_rate": 2.8739292422981235e-06, "loss": 1.0584, "step": 17951 }, { "epoch": 0.6506469500924215, "grad_norm": 2.356884649005958, "learning_rate": 2.87339803234577e-06, "loss": 0.8998, "step": 17952 }, { "epoch": 0.6506831937950781, "grad_norm": 2.50993622518397, "learning_rate": 2.8728668516967055e-06, "loss": 1.0805, "step": 17953 }, { "epoch": 0.6507194374977348, "grad_norm": 2.575836185053888, "learning_rate": 2.872335700358252e-06, "loss": 0.8968, "step": 17954 }, { "epoch": 0.6507556812003914, "grad_norm": 2.6738943315558585, "learning_rate": 2.8718045783377223e-06, "loss": 0.7857, "step": 17955 }, { "epoch": 0.650791924903048, "grad_norm": 2.5158161133949632, "learning_rate": 2.8712734856424395e-06, "loss": 1.1187, "step": 17956 }, { "epoch": 0.6508281686057048, "grad_norm": 2.756486157267885, "learning_rate": 2.8707424222797227e-06, "loss": 0.8522, "step": 17957 }, { "epoch": 0.6508644123083615, "grad_norm": 2.194395599493629, "learning_rate": 2.870211388256885e-06, "loss": 0.8646, "step": 17958 }, { "epoch": 0.6509006560110181, "grad_norm": 2.3488483069575112, "learning_rate": 2.8696803835812487e-06, "loss": 0.8828, "step": 17959 }, { "epoch": 0.6509368997136747, "grad_norm": 1.891824513986921, "learning_rate": 2.8691494082601245e-06, "loss": 0.713, "step": 17960 }, { "epoch": 0.6509731434163314, "grad_norm": 2.184686065372644, "learning_rate": 2.8686184623008362e-06, "loss": 0.76, "step": 17961 }, { "epoch": 0.651009387118988, "grad_norm": 2.423613249378002, "learning_rate": 2.868087545710695e-06, "loss": 0.8832, "step": 17962 }, { "epoch": 0.6510456308216448, "grad_norm": 2.2312899409182845, "learning_rate": 2.8675566584970194e-06, "loss": 0.9627, "step": 17963 }, { "epoch": 0.6510818745243014, "grad_norm": 2.5177953909395243, "learning_rate": 2.8670258006671225e-06, "loss": 0.9053, "step": 17964 }, { "epoch": 0.6511181182269581, "grad_norm": 2.514637620105626, "learning_rate": 2.866494972228319e-06, "loss": 0.9671, "step": 17965 }, { "epoch": 0.6511543619296147, "grad_norm": 2.3809591848613465, "learning_rate": 2.865964173187927e-06, "loss": 0.906, "step": 17966 }, { "epoch": 0.6511906056322714, "grad_norm": 2.1727638406010574, "learning_rate": 2.865433403553256e-06, "loss": 0.9604, "step": 17967 }, { "epoch": 0.651226849334928, "grad_norm": 2.1123025922683443, "learning_rate": 2.8649026633316218e-06, "loss": 0.769, "step": 17968 }, { "epoch": 0.6512630930375847, "grad_norm": 2.3026290362498743, "learning_rate": 2.8643719525303374e-06, "loss": 0.9387, "step": 17969 }, { "epoch": 0.6512993367402414, "grad_norm": 2.3417957793919877, "learning_rate": 2.8638412711567184e-06, "loss": 0.963, "step": 17970 }, { "epoch": 0.6513355804428981, "grad_norm": 2.417548053567854, "learning_rate": 2.863310619218072e-06, "loss": 1.0291, "step": 17971 }, { "epoch": 0.6513718241455547, "grad_norm": 2.2584659153628714, "learning_rate": 2.8627799967217142e-06, "loss": 0.9042, "step": 17972 }, { "epoch": 0.6514080678482114, "grad_norm": 2.3840425359002952, "learning_rate": 2.8622494036749544e-06, "loss": 0.8435, "step": 17973 }, { "epoch": 0.651444311550868, "grad_norm": 2.393398972489291, "learning_rate": 2.861718840085107e-06, "loss": 0.7981, "step": 17974 }, { "epoch": 0.6514805552535247, "grad_norm": 2.6282976862668366, "learning_rate": 2.861188305959479e-06, "loss": 0.8546, "step": 17975 }, { "epoch": 0.6515167989561813, "grad_norm": 2.09802989806613, "learning_rate": 2.860657801305383e-06, "loss": 0.897, "step": 17976 }, { "epoch": 0.6515530426588381, "grad_norm": 2.3456342324403576, "learning_rate": 2.8601273261301303e-06, "loss": 0.9324, "step": 17977 }, { "epoch": 0.6515892863614947, "grad_norm": 2.299007082690915, "learning_rate": 2.859596880441027e-06, "loss": 0.9929, "step": 17978 }, { "epoch": 0.6516255300641514, "grad_norm": 2.603606381821632, "learning_rate": 2.8590664642453865e-06, "loss": 0.9325, "step": 17979 }, { "epoch": 0.651661773766808, "grad_norm": 2.3066385267642278, "learning_rate": 2.858536077550511e-06, "loss": 0.9875, "step": 17980 }, { "epoch": 0.6516980174694647, "grad_norm": 2.468955435618266, "learning_rate": 2.858005720363717e-06, "loss": 0.8474, "step": 17981 }, { "epoch": 0.6517342611721213, "grad_norm": 2.0479690263032126, "learning_rate": 2.857475392692307e-06, "loss": 0.6956, "step": 17982 }, { "epoch": 0.6517705048747781, "grad_norm": 2.771403148305987, "learning_rate": 2.8569450945435927e-06, "loss": 0.9149, "step": 17983 }, { "epoch": 0.6518067485774347, "grad_norm": 2.3450177812056148, "learning_rate": 2.8564148259248763e-06, "loss": 0.7555, "step": 17984 }, { "epoch": 0.6518429922800913, "grad_norm": 2.5523553495995808, "learning_rate": 2.8558845868434676e-06, "loss": 0.8516, "step": 17985 }, { "epoch": 0.651879235982748, "grad_norm": 2.4840496831873033, "learning_rate": 2.8553543773066744e-06, "loss": 0.8298, "step": 17986 }, { "epoch": 0.6519154796854046, "grad_norm": 2.4393767683032133, "learning_rate": 2.8548241973217994e-06, "loss": 0.8879, "step": 17987 }, { "epoch": 0.6519517233880613, "grad_norm": 2.3921101240308027, "learning_rate": 2.8542940468961487e-06, "loss": 0.962, "step": 17988 }, { "epoch": 0.6519879670907179, "grad_norm": 2.42344476974288, "learning_rate": 2.853763926037029e-06, "loss": 0.7989, "step": 17989 }, { "epoch": 0.6520242107933747, "grad_norm": 2.6759228777115327, "learning_rate": 2.8532338347517457e-06, "loss": 0.9224, "step": 17990 }, { "epoch": 0.6520604544960313, "grad_norm": 2.2487440213618486, "learning_rate": 2.8527037730476005e-06, "loss": 0.7506, "step": 17991 }, { "epoch": 0.652096698198688, "grad_norm": 2.625927547650054, "learning_rate": 2.8521737409318985e-06, "loss": 0.8465, "step": 17992 }, { "epoch": 0.6521329419013446, "grad_norm": 2.318273263714264, "learning_rate": 2.8516437384119435e-06, "loss": 0.8839, "step": 17993 }, { "epoch": 0.6521691856040013, "grad_norm": 2.3218148344471587, "learning_rate": 2.85111376549504e-06, "loss": 0.901, "step": 17994 }, { "epoch": 0.6522054293066579, "grad_norm": 2.2385339561827466, "learning_rate": 2.8505838221884875e-06, "loss": 0.7821, "step": 17995 }, { "epoch": 0.6522416730093147, "grad_norm": 2.548846773092483, "learning_rate": 2.8500539084995903e-06, "loss": 0.903, "step": 17996 }, { "epoch": 0.6522779167119713, "grad_norm": 2.2197378161548413, "learning_rate": 2.8495240244356514e-06, "loss": 1.2162, "step": 17997 }, { "epoch": 0.652314160414628, "grad_norm": 2.436597079778172, "learning_rate": 2.848994170003969e-06, "loss": 1.0229, "step": 17998 }, { "epoch": 0.6523504041172846, "grad_norm": 2.360619020906988, "learning_rate": 2.8484643452118464e-06, "loss": 1.015, "step": 17999 }, { "epoch": 0.6523866478199413, "grad_norm": 2.509519949116453, "learning_rate": 2.847934550066584e-06, "loss": 1.0887, "step": 18000 }, { "epoch": 0.6524228915225979, "grad_norm": 2.0361756481620277, "learning_rate": 2.8474047845754835e-06, "loss": 0.7584, "step": 18001 }, { "epoch": 0.6524591352252546, "grad_norm": 2.337431601738856, "learning_rate": 2.846875048745842e-06, "loss": 0.7647, "step": 18002 }, { "epoch": 0.6524953789279113, "grad_norm": 2.4595315517481797, "learning_rate": 2.84634534258496e-06, "loss": 0.8618, "step": 18003 }, { "epoch": 0.652531622630568, "grad_norm": 2.479718591817222, "learning_rate": 2.8458156661001377e-06, "loss": 0.6631, "step": 18004 }, { "epoch": 0.6525678663332246, "grad_norm": 2.4698934230274063, "learning_rate": 2.845286019298671e-06, "loss": 0.8812, "step": 18005 }, { "epoch": 0.6526041100358813, "grad_norm": 2.0487749955365047, "learning_rate": 2.8447564021878625e-06, "loss": 0.7297, "step": 18006 }, { "epoch": 0.6526403537385379, "grad_norm": 2.550086995190115, "learning_rate": 2.844226814775003e-06, "loss": 0.8027, "step": 18007 }, { "epoch": 0.6526765974411946, "grad_norm": 2.250563300905976, "learning_rate": 2.843697257067398e-06, "loss": 0.9031, "step": 18008 }, { "epoch": 0.6527128411438513, "grad_norm": 2.1652013440600086, "learning_rate": 2.843167729072339e-06, "loss": 0.7699, "step": 18009 }, { "epoch": 0.652749084846508, "grad_norm": 2.3718250936185226, "learning_rate": 2.8426382307971263e-06, "loss": 0.927, "step": 18010 }, { "epoch": 0.6527853285491646, "grad_norm": 2.499610686678515, "learning_rate": 2.842108762249052e-06, "loss": 0.9825, "step": 18011 }, { "epoch": 0.6528215722518212, "grad_norm": 2.1059640984554893, "learning_rate": 2.841579323435414e-06, "loss": 1.007, "step": 18012 }, { "epoch": 0.6528578159544779, "grad_norm": 2.3134036586373297, "learning_rate": 2.841049914363507e-06, "loss": 0.8693, "step": 18013 }, { "epoch": 0.6528940596571345, "grad_norm": 2.044414801022765, "learning_rate": 2.840520535040629e-06, "loss": 0.7699, "step": 18014 }, { "epoch": 0.6529303033597912, "grad_norm": 2.165969296862984, "learning_rate": 2.8399911854740704e-06, "loss": 0.8667, "step": 18015 }, { "epoch": 0.6529665470624479, "grad_norm": 2.2504311613888714, "learning_rate": 2.839461865671126e-06, "loss": 0.9323, "step": 18016 }, { "epoch": 0.6530027907651046, "grad_norm": 2.3153329020829045, "learning_rate": 2.838932575639093e-06, "loss": 1.0231, "step": 18017 }, { "epoch": 0.6530390344677612, "grad_norm": 2.470021205257319, "learning_rate": 2.83840331538526e-06, "loss": 0.8462, "step": 18018 }, { "epoch": 0.6530752781704179, "grad_norm": 2.2815860551116853, "learning_rate": 2.8378740849169216e-06, "loss": 0.7848, "step": 18019 }, { "epoch": 0.6531115218730745, "grad_norm": 2.3506451836562037, "learning_rate": 2.8373448842413715e-06, "loss": 0.9325, "step": 18020 }, { "epoch": 0.6531477655757312, "grad_norm": 2.736774334744918, "learning_rate": 2.8368157133659024e-06, "loss": 0.8778, "step": 18021 }, { "epoch": 0.6531840092783879, "grad_norm": 2.172712678890342, "learning_rate": 2.8362865722978027e-06, "loss": 0.6309, "step": 18022 }, { "epoch": 0.6532202529810446, "grad_norm": 2.347170786764004, "learning_rate": 2.8357574610443657e-06, "loss": 0.8396, "step": 18023 }, { "epoch": 0.6532564966837012, "grad_norm": 2.289460247107804, "learning_rate": 2.835228379612883e-06, "loss": 1.0779, "step": 18024 }, { "epoch": 0.6532927403863579, "grad_norm": 2.330946642427099, "learning_rate": 2.8346993280106432e-06, "loss": 0.7613, "step": 18025 }, { "epoch": 0.6533289840890145, "grad_norm": 2.3787725769817785, "learning_rate": 2.834170306244939e-06, "loss": 1.0252, "step": 18026 }, { "epoch": 0.6533652277916712, "grad_norm": 2.360043410208681, "learning_rate": 2.8336413143230533e-06, "loss": 0.8036, "step": 18027 }, { "epoch": 0.6534014714943278, "grad_norm": 2.2564423081734435, "learning_rate": 2.8331123522522853e-06, "loss": 0.9084, "step": 18028 }, { "epoch": 0.6534377151969846, "grad_norm": 2.1299874398330787, "learning_rate": 2.832583420039916e-06, "loss": 0.7587, "step": 18029 }, { "epoch": 0.6534739588996412, "grad_norm": 2.525634320644269, "learning_rate": 2.832054517693238e-06, "loss": 0.9173, "step": 18030 }, { "epoch": 0.6535102026022979, "grad_norm": 2.49100861390261, "learning_rate": 2.831525645219536e-06, "loss": 0.9505, "step": 18031 }, { "epoch": 0.6535464463049545, "grad_norm": 2.114864921108431, "learning_rate": 2.8309968026260994e-06, "loss": 0.8955, "step": 18032 }, { "epoch": 0.6535826900076112, "grad_norm": 2.3345844137274523, "learning_rate": 2.8304679899202162e-06, "loss": 0.8964, "step": 18033 }, { "epoch": 0.6536189337102678, "grad_norm": 2.1774892203349356, "learning_rate": 2.829939207109171e-06, "loss": 0.6582, "step": 18034 }, { "epoch": 0.6536551774129246, "grad_norm": 2.5365244208300033, "learning_rate": 2.829410454200251e-06, "loss": 0.9254, "step": 18035 }, { "epoch": 0.6536914211155812, "grad_norm": 2.4941875505236504, "learning_rate": 2.8288817312007418e-06, "loss": 0.8468, "step": 18036 }, { "epoch": 0.6537276648182379, "grad_norm": 2.246266185068077, "learning_rate": 2.8283530381179316e-06, "loss": 0.7795, "step": 18037 }, { "epoch": 0.6537639085208945, "grad_norm": 2.7570951534370023, "learning_rate": 2.8278243749591004e-06, "loss": 0.9246, "step": 18038 }, { "epoch": 0.6538001522235511, "grad_norm": 2.5972842356109562, "learning_rate": 2.827295741731536e-06, "loss": 0.9401, "step": 18039 }, { "epoch": 0.6538363959262078, "grad_norm": 2.775179987204264, "learning_rate": 2.826767138442523e-06, "loss": 0.8409, "step": 18040 }, { "epoch": 0.6538726396288644, "grad_norm": 2.305694370288984, "learning_rate": 2.8262385650993457e-06, "loss": 0.8656, "step": 18041 }, { "epoch": 0.6539088833315212, "grad_norm": 2.476911934399003, "learning_rate": 2.8257100217092848e-06, "loss": 1.0038, "step": 18042 }, { "epoch": 0.6539451270341778, "grad_norm": 2.3811658695606157, "learning_rate": 2.825181508279624e-06, "loss": 0.9709, "step": 18043 }, { "epoch": 0.6539813707368345, "grad_norm": 2.4240716449574387, "learning_rate": 2.8246530248176495e-06, "loss": 0.7602, "step": 18044 }, { "epoch": 0.6540176144394911, "grad_norm": 2.285692904201286, "learning_rate": 2.8241245713306385e-06, "loss": 0.9024, "step": 18045 }, { "epoch": 0.6540538581421478, "grad_norm": 2.265721965713782, "learning_rate": 2.823596147825878e-06, "loss": 0.7586, "step": 18046 }, { "epoch": 0.6540901018448044, "grad_norm": 2.3855719374685913, "learning_rate": 2.823067754310641e-06, "loss": 0.8061, "step": 18047 }, { "epoch": 0.6541263455474611, "grad_norm": 2.162196625741571, "learning_rate": 2.822539390792218e-06, "loss": 0.8669, "step": 18048 }, { "epoch": 0.6541625892501178, "grad_norm": 2.3863305899896887, "learning_rate": 2.8220110572778836e-06, "loss": 0.8654, "step": 18049 }, { "epoch": 0.6541988329527745, "grad_norm": 2.474675288377833, "learning_rate": 2.8214827537749212e-06, "loss": 0.9973, "step": 18050 }, { "epoch": 0.6542350766554311, "grad_norm": 2.511380620285909, "learning_rate": 2.8209544802906075e-06, "loss": 0.7613, "step": 18051 }, { "epoch": 0.6542713203580878, "grad_norm": 2.544117323832052, "learning_rate": 2.8204262368322226e-06, "loss": 0.8827, "step": 18052 }, { "epoch": 0.6543075640607444, "grad_norm": 2.3679278983561356, "learning_rate": 2.8198980234070485e-06, "loss": 0.7769, "step": 18053 }, { "epoch": 0.6543438077634011, "grad_norm": 2.011925778337735, "learning_rate": 2.819369840022359e-06, "loss": 0.8239, "step": 18054 }, { "epoch": 0.6543800514660578, "grad_norm": 2.284159178148137, "learning_rate": 2.818841686685434e-06, "loss": 0.933, "step": 18055 }, { "epoch": 0.6544162951687145, "grad_norm": 1.883808920505358, "learning_rate": 2.818313563403552e-06, "loss": 0.5338, "step": 18056 }, { "epoch": 0.6544525388713711, "grad_norm": 2.2475760822306463, "learning_rate": 2.817785470183991e-06, "loss": 0.7478, "step": 18057 }, { "epoch": 0.6544887825740278, "grad_norm": 2.7484246498112044, "learning_rate": 2.817257407034025e-06, "loss": 0.9844, "step": 18058 }, { "epoch": 0.6545250262766844, "grad_norm": 2.11156744121175, "learning_rate": 2.816729373960932e-06, "loss": 0.7861, "step": 18059 }, { "epoch": 0.654561269979341, "grad_norm": 2.341676768734576, "learning_rate": 2.816201370971987e-06, "loss": 0.985, "step": 18060 }, { "epoch": 0.6545975136819977, "grad_norm": 2.202467676484715, "learning_rate": 2.815673398074469e-06, "loss": 1.0036, "step": 18061 }, { "epoch": 0.6546337573846545, "grad_norm": 2.0570640474407504, "learning_rate": 2.8151454552756484e-06, "loss": 0.8363, "step": 18062 }, { "epoch": 0.6546700010873111, "grad_norm": 2.507467292596539, "learning_rate": 2.8146175425828027e-06, "loss": 0.9307, "step": 18063 }, { "epoch": 0.6547062447899678, "grad_norm": 2.173330835055859, "learning_rate": 2.8140896600032065e-06, "loss": 0.8513, "step": 18064 }, { "epoch": 0.6547424884926244, "grad_norm": 2.370820291004998, "learning_rate": 2.813561807544132e-06, "loss": 1.0374, "step": 18065 }, { "epoch": 0.654778732195281, "grad_norm": 2.4646739678974368, "learning_rate": 2.8130339852128542e-06, "loss": 0.9022, "step": 18066 }, { "epoch": 0.6548149758979377, "grad_norm": 2.6053347013648906, "learning_rate": 2.812506193016642e-06, "loss": 0.9671, "step": 18067 }, { "epoch": 0.6548512196005944, "grad_norm": 2.339714030574003, "learning_rate": 2.811978430962775e-06, "loss": 0.6768, "step": 18068 }, { "epoch": 0.6548874633032511, "grad_norm": 2.330972601262243, "learning_rate": 2.811450699058521e-06, "loss": 0.897, "step": 18069 }, { "epoch": 0.6549237070059077, "grad_norm": 1.9916720204334295, "learning_rate": 2.810922997311154e-06, "loss": 0.6935, "step": 18070 }, { "epoch": 0.6549599507085644, "grad_norm": 2.4114080682409647, "learning_rate": 2.8103953257279428e-06, "loss": 0.8537, "step": 18071 }, { "epoch": 0.654996194411221, "grad_norm": 2.4020799346911597, "learning_rate": 2.8098676843161594e-06, "loss": 0.9184, "step": 18072 }, { "epoch": 0.6550324381138777, "grad_norm": 2.3820731032723277, "learning_rate": 2.8093400730830765e-06, "loss": 1.1435, "step": 18073 }, { "epoch": 0.6550686818165343, "grad_norm": 2.3131750056523983, "learning_rate": 2.8088124920359588e-06, "loss": 0.8242, "step": 18074 }, { "epoch": 0.6551049255191911, "grad_norm": 2.2116943204238666, "learning_rate": 2.808284941182084e-06, "loss": 0.8777, "step": 18075 }, { "epoch": 0.6551411692218477, "grad_norm": 2.3026212669416752, "learning_rate": 2.807757420528715e-06, "loss": 0.9867, "step": 18076 }, { "epoch": 0.6551774129245044, "grad_norm": 2.4286619183143707, "learning_rate": 2.8072299300831248e-06, "loss": 0.9135, "step": 18077 }, { "epoch": 0.655213656627161, "grad_norm": 2.420440376176687, "learning_rate": 2.8067024698525756e-06, "loss": 0.9288, "step": 18078 }, { "epoch": 0.6552499003298177, "grad_norm": 2.618925889521013, "learning_rate": 2.8061750398443446e-06, "loss": 0.9944, "step": 18079 }, { "epoch": 0.6552861440324743, "grad_norm": 2.2384088835198472, "learning_rate": 2.805647640065693e-06, "loss": 0.9049, "step": 18080 }, { "epoch": 0.6553223877351311, "grad_norm": 2.2569763844570074, "learning_rate": 2.8051202705238912e-06, "loss": 0.8252, "step": 18081 }, { "epoch": 0.6553586314377877, "grad_norm": 2.2902771656799112, "learning_rate": 2.8045929312262028e-06, "loss": 0.8871, "step": 18082 }, { "epoch": 0.6553948751404444, "grad_norm": 2.321689132901426, "learning_rate": 2.804065622179896e-06, "loss": 0.8818, "step": 18083 }, { "epoch": 0.655431118843101, "grad_norm": 2.5780876570633398, "learning_rate": 2.803538343392239e-06, "loss": 1.0675, "step": 18084 }, { "epoch": 0.6554673625457577, "grad_norm": 2.407094755774966, "learning_rate": 2.8030110948704935e-06, "loss": 1.1347, "step": 18085 }, { "epoch": 0.6555036062484143, "grad_norm": 2.54801537520598, "learning_rate": 2.8024838766219257e-06, "loss": 0.6512, "step": 18086 }, { "epoch": 0.655539849951071, "grad_norm": 2.2416420165099864, "learning_rate": 2.801956688653802e-06, "loss": 0.7324, "step": 18087 }, { "epoch": 0.6555760936537277, "grad_norm": 2.5155581425714213, "learning_rate": 2.8014295309733867e-06, "loss": 0.9792, "step": 18088 }, { "epoch": 0.6556123373563844, "grad_norm": 2.68742060309692, "learning_rate": 2.8009024035879417e-06, "loss": 0.9544, "step": 18089 }, { "epoch": 0.655648581059041, "grad_norm": 2.3645786468013523, "learning_rate": 2.800375306504731e-06, "loss": 0.8067, "step": 18090 }, { "epoch": 0.6556848247616976, "grad_norm": 2.3442227490714767, "learning_rate": 2.7998482397310212e-06, "loss": 0.7613, "step": 18091 }, { "epoch": 0.6557210684643543, "grad_norm": 2.3272754471607064, "learning_rate": 2.799321203274069e-06, "loss": 0.8759, "step": 18092 }, { "epoch": 0.6557573121670109, "grad_norm": 2.4262872673105713, "learning_rate": 2.7987941971411426e-06, "loss": 0.8847, "step": 18093 }, { "epoch": 0.6557935558696677, "grad_norm": 2.3766357613920475, "learning_rate": 2.7982672213394963e-06, "loss": 1.1172, "step": 18094 }, { "epoch": 0.6558297995723243, "grad_norm": 2.359885658501627, "learning_rate": 2.7977402758764004e-06, "loss": 0.8382, "step": 18095 }, { "epoch": 0.655866043274981, "grad_norm": 2.22227412040327, "learning_rate": 2.797213360759109e-06, "loss": 0.835, "step": 18096 }, { "epoch": 0.6559022869776376, "grad_norm": 2.1710273854610533, "learning_rate": 2.796686475994888e-06, "loss": 0.8592, "step": 18097 }, { "epoch": 0.6559385306802943, "grad_norm": 2.1578372543269166, "learning_rate": 2.7961596215909926e-06, "loss": 0.7645, "step": 18098 }, { "epoch": 0.6559747743829509, "grad_norm": 2.275522526625753, "learning_rate": 2.795632797554685e-06, "loss": 0.9285, "step": 18099 }, { "epoch": 0.6560110180856076, "grad_norm": 2.411935312041686, "learning_rate": 2.795106003893224e-06, "loss": 1.0158, "step": 18100 }, { "epoch": 0.6560472617882643, "grad_norm": 2.258876501971082, "learning_rate": 2.7945792406138706e-06, "loss": 0.8039, "step": 18101 }, { "epoch": 0.656083505490921, "grad_norm": 2.1628529574928046, "learning_rate": 2.79405250772388e-06, "loss": 0.9722, "step": 18102 }, { "epoch": 0.6561197491935776, "grad_norm": 2.3550686000675576, "learning_rate": 2.7935258052305115e-06, "loss": 0.7035, "step": 18103 }, { "epoch": 0.6561559928962343, "grad_norm": 2.193848424047163, "learning_rate": 2.792999133141025e-06, "loss": 0.9306, "step": 18104 }, { "epoch": 0.6561922365988909, "grad_norm": 2.392146125742511, "learning_rate": 2.7924724914626733e-06, "loss": 0.9997, "step": 18105 }, { "epoch": 0.6562284803015476, "grad_norm": 2.409130765770873, "learning_rate": 2.7919458802027165e-06, "loss": 0.9046, "step": 18106 }, { "epoch": 0.6562647240042042, "grad_norm": 2.4710716711281684, "learning_rate": 2.7914192993684092e-06, "loss": 0.9208, "step": 18107 }, { "epoch": 0.656300967706861, "grad_norm": 2.2971711516616526, "learning_rate": 2.7908927489670107e-06, "loss": 0.8826, "step": 18108 }, { "epoch": 0.6563372114095176, "grad_norm": 2.292151622902507, "learning_rate": 2.790366229005772e-06, "loss": 0.9364, "step": 18109 }, { "epoch": 0.6563734551121743, "grad_norm": 2.525375973069948, "learning_rate": 2.78983973949195e-06, "loss": 0.8634, "step": 18110 }, { "epoch": 0.6564096988148309, "grad_norm": 2.452517314281546, "learning_rate": 2.789313280432802e-06, "loss": 0.9463, "step": 18111 }, { "epoch": 0.6564459425174876, "grad_norm": 2.5306609595969882, "learning_rate": 2.7887868518355782e-06, "loss": 0.7647, "step": 18112 }, { "epoch": 0.6564821862201442, "grad_norm": 2.357435985049955, "learning_rate": 2.7882604537075364e-06, "loss": 1.1059, "step": 18113 }, { "epoch": 0.656518429922801, "grad_norm": 2.5092288570714616, "learning_rate": 2.787734086055923e-06, "loss": 1.1104, "step": 18114 }, { "epoch": 0.6565546736254576, "grad_norm": 2.1328358503010207, "learning_rate": 2.7872077488880013e-06, "loss": 0.8325, "step": 18115 }, { "epoch": 0.6565909173281143, "grad_norm": 2.38871205645729, "learning_rate": 2.7866814422110156e-06, "loss": 0.9153, "step": 18116 }, { "epoch": 0.6566271610307709, "grad_norm": 2.4434559887416567, "learning_rate": 2.7861551660322236e-06, "loss": 0.9513, "step": 18117 }, { "epoch": 0.6566634047334275, "grad_norm": 2.6111402211725325, "learning_rate": 2.7856289203588717e-06, "loss": 0.9454, "step": 18118 }, { "epoch": 0.6566996484360842, "grad_norm": 2.375282891522687, "learning_rate": 2.785102705198215e-06, "loss": 0.8822, "step": 18119 }, { "epoch": 0.6567358921387408, "grad_norm": 2.20691634761793, "learning_rate": 2.7845765205575047e-06, "loss": 0.952, "step": 18120 }, { "epoch": 0.6567721358413976, "grad_norm": 2.581018435512528, "learning_rate": 2.7840503664439887e-06, "loss": 0.9925, "step": 18121 }, { "epoch": 0.6568083795440542, "grad_norm": 2.6380764676150563, "learning_rate": 2.7835242428649182e-06, "loss": 0.752, "step": 18122 }, { "epoch": 0.6568446232467109, "grad_norm": 2.5528487537181834, "learning_rate": 2.7829981498275434e-06, "loss": 0.995, "step": 18123 }, { "epoch": 0.6568808669493675, "grad_norm": 2.106315218490287, "learning_rate": 2.7824720873391153e-06, "loss": 0.7744, "step": 18124 }, { "epoch": 0.6569171106520242, "grad_norm": 2.082517399725309, "learning_rate": 2.7819460554068785e-06, "loss": 0.9408, "step": 18125 }, { "epoch": 0.6569533543546808, "grad_norm": 2.2296795663972793, "learning_rate": 2.7814200540380837e-06, "loss": 0.9165, "step": 18126 }, { "epoch": 0.6569895980573376, "grad_norm": 2.3538936468672045, "learning_rate": 2.780894083239979e-06, "loss": 0.8284, "step": 18127 }, { "epoch": 0.6570258417599942, "grad_norm": 2.346193064310918, "learning_rate": 2.780368143019813e-06, "loss": 0.8734, "step": 18128 }, { "epoch": 0.6570620854626509, "grad_norm": 2.3209672837590256, "learning_rate": 2.7798422333848306e-06, "loss": 0.8485, "step": 18129 }, { "epoch": 0.6570983291653075, "grad_norm": 2.0473591264838764, "learning_rate": 2.7793163543422784e-06, "loss": 0.7645, "step": 18130 }, { "epoch": 0.6571345728679642, "grad_norm": 2.334592051060027, "learning_rate": 2.778790505899407e-06, "loss": 0.7707, "step": 18131 }, { "epoch": 0.6571708165706208, "grad_norm": 2.5711970522044294, "learning_rate": 2.7782646880634567e-06, "loss": 0.891, "step": 18132 }, { "epoch": 0.6572070602732775, "grad_norm": 2.018885262702845, "learning_rate": 2.7777389008416765e-06, "loss": 0.7292, "step": 18133 }, { "epoch": 0.6572433039759342, "grad_norm": 2.3717874965193313, "learning_rate": 2.7772131442413075e-06, "loss": 0.7628, "step": 18134 }, { "epoch": 0.6572795476785909, "grad_norm": 2.1959693078976987, "learning_rate": 2.7766874182696003e-06, "loss": 1.0853, "step": 18135 }, { "epoch": 0.6573157913812475, "grad_norm": 2.1693124231628484, "learning_rate": 2.7761617229337946e-06, "loss": 0.8737, "step": 18136 }, { "epoch": 0.6573520350839042, "grad_norm": 2.5604140383766927, "learning_rate": 2.7756360582411366e-06, "loss": 0.8797, "step": 18137 }, { "epoch": 0.6573882787865608, "grad_norm": 2.748181466044939, "learning_rate": 2.775110424198867e-06, "loss": 0.9864, "step": 18138 }, { "epoch": 0.6574245224892175, "grad_norm": 2.282421171053923, "learning_rate": 2.7745848208142296e-06, "loss": 0.871, "step": 18139 }, { "epoch": 0.6574607661918742, "grad_norm": 2.7005377739420164, "learning_rate": 2.7740592480944694e-06, "loss": 1.0598, "step": 18140 }, { "epoch": 0.6574970098945309, "grad_norm": 2.1887158740907844, "learning_rate": 2.7735337060468255e-06, "loss": 1.0227, "step": 18141 }, { "epoch": 0.6575332535971875, "grad_norm": 2.3456468179588237, "learning_rate": 2.77300819467854e-06, "loss": 0.9841, "step": 18142 }, { "epoch": 0.6575694972998442, "grad_norm": 2.5260502870924375, "learning_rate": 2.7724827139968544e-06, "loss": 0.9148, "step": 18143 }, { "epoch": 0.6576057410025008, "grad_norm": 2.524950394722631, "learning_rate": 2.771957264009013e-06, "loss": 0.8691, "step": 18144 }, { "epoch": 0.6576419847051574, "grad_norm": 2.391304615613128, "learning_rate": 2.77143184472225e-06, "loss": 0.7734, "step": 18145 }, { "epoch": 0.6576782284078141, "grad_norm": 2.389659992239743, "learning_rate": 2.7709064561438085e-06, "loss": 0.9639, "step": 18146 }, { "epoch": 0.6577144721104708, "grad_norm": 2.6317347705216627, "learning_rate": 2.7703810982809287e-06, "loss": 0.9038, "step": 18147 }, { "epoch": 0.6577507158131275, "grad_norm": 2.5205652527389044, "learning_rate": 2.7698557711408503e-06, "loss": 1.027, "step": 18148 }, { "epoch": 0.6577869595157841, "grad_norm": 2.4517164771706366, "learning_rate": 2.7693304747308083e-06, "loss": 0.8459, "step": 18149 }, { "epoch": 0.6578232032184408, "grad_norm": 1.9993573486627376, "learning_rate": 2.768805209058044e-06, "loss": 0.6525, "step": 18150 }, { "epoch": 0.6578594469210974, "grad_norm": 2.561310435810337, "learning_rate": 2.768279974129796e-06, "loss": 0.822, "step": 18151 }, { "epoch": 0.6578956906237541, "grad_norm": 2.287937118108138, "learning_rate": 2.767754769953298e-06, "loss": 0.8791, "step": 18152 }, { "epoch": 0.6579319343264108, "grad_norm": 2.434967599654096, "learning_rate": 2.767229596535792e-06, "loss": 0.8839, "step": 18153 }, { "epoch": 0.6579681780290675, "grad_norm": 2.3774171055421127, "learning_rate": 2.766704453884508e-06, "loss": 1.037, "step": 18154 }, { "epoch": 0.6580044217317241, "grad_norm": 1.9874114143205324, "learning_rate": 2.7661793420066896e-06, "loss": 0.7848, "step": 18155 }, { "epoch": 0.6580406654343808, "grad_norm": 2.2295391116408982, "learning_rate": 2.765654260909566e-06, "loss": 0.8521, "step": 18156 }, { "epoch": 0.6580769091370374, "grad_norm": 2.3373949562386303, "learning_rate": 2.765129210600377e-06, "loss": 0.983, "step": 18157 }, { "epoch": 0.6581131528396941, "grad_norm": 2.176157771795135, "learning_rate": 2.7646041910863565e-06, "loss": 0.7767, "step": 18158 }, { "epoch": 0.6581493965423507, "grad_norm": 2.1685823250689866, "learning_rate": 2.764079202374737e-06, "loss": 0.8278, "step": 18159 }, { "epoch": 0.6581856402450075, "grad_norm": 2.078537541910369, "learning_rate": 2.763554244472756e-06, "loss": 0.7761, "step": 18160 }, { "epoch": 0.6582218839476641, "grad_norm": 2.1713944213769762, "learning_rate": 2.7630293173876404e-06, "loss": 0.9628, "step": 18161 }, { "epoch": 0.6582581276503208, "grad_norm": 2.290001090611771, "learning_rate": 2.7625044211266323e-06, "loss": 1.0401, "step": 18162 }, { "epoch": 0.6582943713529774, "grad_norm": 2.250834341915784, "learning_rate": 2.7619795556969582e-06, "loss": 1.0555, "step": 18163 }, { "epoch": 0.6583306150556341, "grad_norm": 2.0095872761358216, "learning_rate": 2.761454721105854e-06, "loss": 0.7258, "step": 18164 }, { "epoch": 0.6583668587582907, "grad_norm": 2.6146666420277063, "learning_rate": 2.7609299173605465e-06, "loss": 0.8709, "step": 18165 }, { "epoch": 0.6584031024609475, "grad_norm": 2.336878272400364, "learning_rate": 2.760405144468273e-06, "loss": 0.8742, "step": 18166 }, { "epoch": 0.6584393461636041, "grad_norm": 2.6571780403772234, "learning_rate": 2.7598804024362617e-06, "loss": 0.9328, "step": 18167 }, { "epoch": 0.6584755898662608, "grad_norm": 2.308566665722893, "learning_rate": 2.7593556912717456e-06, "loss": 0.8844, "step": 18168 }, { "epoch": 0.6585118335689174, "grad_norm": 2.1968385140128346, "learning_rate": 2.7588310109819507e-06, "loss": 0.7321, "step": 18169 }, { "epoch": 0.658548077271574, "grad_norm": 2.361882560897315, "learning_rate": 2.758306361574109e-06, "loss": 0.8341, "step": 18170 }, { "epoch": 0.6585843209742307, "grad_norm": 2.4047630866719127, "learning_rate": 2.7577817430554515e-06, "loss": 0.9035, "step": 18171 }, { "epoch": 0.6586205646768873, "grad_norm": 2.2560947880662923, "learning_rate": 2.757257155433204e-06, "loss": 0.9279, "step": 18172 }, { "epoch": 0.6586568083795441, "grad_norm": 2.252125464273271, "learning_rate": 2.7567325987145965e-06, "loss": 0.8552, "step": 18173 }, { "epoch": 0.6586930520822007, "grad_norm": 2.3701850403761147, "learning_rate": 2.756208072906857e-06, "loss": 0.9519, "step": 18174 }, { "epoch": 0.6587292957848574, "grad_norm": 2.5833793381893555, "learning_rate": 2.755683578017215e-06, "loss": 0.8649, "step": 18175 }, { "epoch": 0.658765539487514, "grad_norm": 2.4139478028642727, "learning_rate": 2.7551591140528943e-06, "loss": 0.9537, "step": 18176 }, { "epoch": 0.6588017831901707, "grad_norm": 2.3475558100151734, "learning_rate": 2.7546346810211235e-06, "loss": 0.767, "step": 18177 }, { "epoch": 0.6588380268928273, "grad_norm": 2.213615100343981, "learning_rate": 2.7541102789291306e-06, "loss": 0.846, "step": 18178 }, { "epoch": 0.658874270595484, "grad_norm": 2.3203827371355326, "learning_rate": 2.7535859077841382e-06, "loss": 1.1741, "step": 18179 }, { "epoch": 0.6589105142981407, "grad_norm": 2.678642411320525, "learning_rate": 2.753061567593375e-06, "loss": 0.9984, "step": 18180 }, { "epoch": 0.6589467580007974, "grad_norm": 2.4160075817725364, "learning_rate": 2.752537258364061e-06, "loss": 0.8634, "step": 18181 }, { "epoch": 0.658983001703454, "grad_norm": 2.3312501317848016, "learning_rate": 2.752012980103427e-06, "loss": 0.9817, "step": 18182 }, { "epoch": 0.6590192454061107, "grad_norm": 2.4116084382842735, "learning_rate": 2.751488732818694e-06, "loss": 0.9564, "step": 18183 }, { "epoch": 0.6590554891087673, "grad_norm": 2.527588549218705, "learning_rate": 2.750964516517088e-06, "loss": 1.03, "step": 18184 }, { "epoch": 0.659091732811424, "grad_norm": 2.4665032082055793, "learning_rate": 2.750440331205827e-06, "loss": 0.9594, "step": 18185 }, { "epoch": 0.6591279765140807, "grad_norm": 2.3807641316078123, "learning_rate": 2.749916176892141e-06, "loss": 0.7017, "step": 18186 }, { "epoch": 0.6591642202167374, "grad_norm": 2.327691027854656, "learning_rate": 2.7493920535832476e-06, "loss": 0.9662, "step": 18187 }, { "epoch": 0.659200463919394, "grad_norm": 2.1596643844275634, "learning_rate": 2.7488679612863727e-06, "loss": 0.5962, "step": 18188 }, { "epoch": 0.6592367076220507, "grad_norm": 2.368755794146885, "learning_rate": 2.748343900008734e-06, "loss": 0.8826, "step": 18189 }, { "epoch": 0.6592729513247073, "grad_norm": 2.5965122279641215, "learning_rate": 2.7478198697575542e-06, "loss": 0.8851, "step": 18190 }, { "epoch": 0.659309195027364, "grad_norm": 2.416926693964487, "learning_rate": 2.747295870540057e-06, "loss": 0.8869, "step": 18191 }, { "epoch": 0.6593454387300206, "grad_norm": 2.3399952661025987, "learning_rate": 2.746771902363458e-06, "loss": 0.9479, "step": 18192 }, { "epoch": 0.6593816824326774, "grad_norm": 2.3714615621566324, "learning_rate": 2.74624796523498e-06, "loss": 1.0277, "step": 18193 }, { "epoch": 0.659417926135334, "grad_norm": 2.334173088249931, "learning_rate": 2.7457240591618417e-06, "loss": 0.7697, "step": 18194 }, { "epoch": 0.6594541698379907, "grad_norm": 2.3594232939402593, "learning_rate": 2.7452001841512642e-06, "loss": 0.9741, "step": 18195 }, { "epoch": 0.6594904135406473, "grad_norm": 2.3141513583053723, "learning_rate": 2.7446763402104626e-06, "loss": 0.8699, "step": 18196 }, { "epoch": 0.659526657243304, "grad_norm": 2.6586080236769556, "learning_rate": 2.7441525273466573e-06, "loss": 0.945, "step": 18197 }, { "epoch": 0.6595629009459606, "grad_norm": 2.3016640192968256, "learning_rate": 2.7436287455670672e-06, "loss": 0.8673, "step": 18198 }, { "epoch": 0.6595991446486174, "grad_norm": 2.281293299593708, "learning_rate": 2.7431049948789074e-06, "loss": 0.8886, "step": 18199 }, { "epoch": 0.659635388351274, "grad_norm": 2.424959579032237, "learning_rate": 2.742581275289398e-06, "loss": 0.7567, "step": 18200 }, { "epoch": 0.6596716320539306, "grad_norm": 2.234320426072643, "learning_rate": 2.742057586805748e-06, "loss": 0.7195, "step": 18201 }, { "epoch": 0.6597078757565873, "grad_norm": 2.5905232331462362, "learning_rate": 2.741533929435184e-06, "loss": 0.9234, "step": 18202 }, { "epoch": 0.6597441194592439, "grad_norm": 2.4449634146517614, "learning_rate": 2.741010303184914e-06, "loss": 0.8043, "step": 18203 }, { "epoch": 0.6597803631619006, "grad_norm": 2.282576550435689, "learning_rate": 2.7404867080621577e-06, "loss": 0.818, "step": 18204 }, { "epoch": 0.6598166068645572, "grad_norm": 2.1829630532300124, "learning_rate": 2.739963144074127e-06, "loss": 0.7502, "step": 18205 }, { "epoch": 0.659852850567214, "grad_norm": 2.1900614578264244, "learning_rate": 2.7394396112280363e-06, "loss": 0.7647, "step": 18206 }, { "epoch": 0.6598890942698706, "grad_norm": 2.3370003157162293, "learning_rate": 2.7389161095311032e-06, "loss": 0.7651, "step": 18207 }, { "epoch": 0.6599253379725273, "grad_norm": 1.9699995025583332, "learning_rate": 2.738392638990537e-06, "loss": 0.7878, "step": 18208 }, { "epoch": 0.6599615816751839, "grad_norm": 2.389507868528076, "learning_rate": 2.7378691996135515e-06, "loss": 0.9256, "step": 18209 }, { "epoch": 0.6599978253778406, "grad_norm": 2.0108649172270887, "learning_rate": 2.737345791407361e-06, "loss": 0.772, "step": 18210 }, { "epoch": 0.6600340690804972, "grad_norm": 2.65589346634156, "learning_rate": 2.736822414379179e-06, "loss": 0.8397, "step": 18211 }, { "epoch": 0.660070312783154, "grad_norm": 2.0765940744070055, "learning_rate": 2.7362990685362133e-06, "loss": 0.9914, "step": 18212 }, { "epoch": 0.6601065564858106, "grad_norm": 2.377873323185485, "learning_rate": 2.7357757538856773e-06, "loss": 0.8322, "step": 18213 }, { "epoch": 0.6601428001884673, "grad_norm": 2.2637761483954746, "learning_rate": 2.735252470434782e-06, "loss": 0.7983, "step": 18214 }, { "epoch": 0.6601790438911239, "grad_norm": 2.4067232855313425, "learning_rate": 2.73472921819074e-06, "loss": 0.8607, "step": 18215 }, { "epoch": 0.6602152875937806, "grad_norm": 2.6256060109261505, "learning_rate": 2.7342059971607576e-06, "loss": 0.9698, "step": 18216 }, { "epoch": 0.6602515312964372, "grad_norm": 2.427463711679838, "learning_rate": 2.733682807352046e-06, "loss": 0.8564, "step": 18217 }, { "epoch": 0.6602877749990939, "grad_norm": 2.3165449740411517, "learning_rate": 2.733159648771817e-06, "loss": 1.0656, "step": 18218 }, { "epoch": 0.6603240187017506, "grad_norm": 2.0681428365394923, "learning_rate": 2.7326365214272753e-06, "loss": 0.8983, "step": 18219 }, { "epoch": 0.6603602624044073, "grad_norm": 2.481238147092909, "learning_rate": 2.7321134253256325e-06, "loss": 1.0548, "step": 18220 }, { "epoch": 0.6603965061070639, "grad_norm": 2.1235325453622163, "learning_rate": 2.731590360474091e-06, "loss": 0.8789, "step": 18221 }, { "epoch": 0.6604327498097206, "grad_norm": 2.482120404697021, "learning_rate": 2.731067326879867e-06, "loss": 0.7702, "step": 18222 }, { "epoch": 0.6604689935123772, "grad_norm": 2.3533129383734006, "learning_rate": 2.7305443245501604e-06, "loss": 1.1619, "step": 18223 }, { "epoch": 0.6605052372150338, "grad_norm": 2.6264162212459268, "learning_rate": 2.7300213534921837e-06, "loss": 1.0141, "step": 18224 }, { "epoch": 0.6605414809176906, "grad_norm": 2.8949747168441418, "learning_rate": 2.729498413713137e-06, "loss": 0.7637, "step": 18225 }, { "epoch": 0.6605777246203473, "grad_norm": 2.589451547314767, "learning_rate": 2.7289755052202294e-06, "loss": 0.8607, "step": 18226 }, { "epoch": 0.6606139683230039, "grad_norm": 2.3938083462748647, "learning_rate": 2.728452628020667e-06, "loss": 0.8907, "step": 18227 }, { "epoch": 0.6606502120256605, "grad_norm": 2.1760500287379285, "learning_rate": 2.727929782121652e-06, "loss": 0.9418, "step": 18228 }, { "epoch": 0.6606864557283172, "grad_norm": 2.2165503806334232, "learning_rate": 2.727406967530391e-06, "loss": 0.925, "step": 18229 }, { "epoch": 0.6607226994309738, "grad_norm": 2.3932863332716834, "learning_rate": 2.7268841842540873e-06, "loss": 0.9261, "step": 18230 }, { "epoch": 0.6607589431336305, "grad_norm": 2.14364035698546, "learning_rate": 2.7263614322999464e-06, "loss": 0.8418, "step": 18231 }, { "epoch": 0.6607951868362872, "grad_norm": 1.9604701945357896, "learning_rate": 2.7258387116751684e-06, "loss": 0.7725, "step": 18232 }, { "epoch": 0.6608314305389439, "grad_norm": 2.1714706194056506, "learning_rate": 2.725316022386958e-06, "loss": 0.8743, "step": 18233 }, { "epoch": 0.6608676742416005, "grad_norm": 2.425268692297214, "learning_rate": 2.724793364442516e-06, "loss": 0.9992, "step": 18234 }, { "epoch": 0.6609039179442572, "grad_norm": 2.419120137919728, "learning_rate": 2.7242707378490484e-06, "loss": 0.9011, "step": 18235 }, { "epoch": 0.6609401616469138, "grad_norm": 2.4668667823746886, "learning_rate": 2.7237481426137514e-06, "loss": 0.954, "step": 18236 }, { "epoch": 0.6609764053495705, "grad_norm": 2.5897024427580435, "learning_rate": 2.723225578743828e-06, "loss": 1.0373, "step": 18237 }, { "epoch": 0.6610126490522272, "grad_norm": 2.0776324609539127, "learning_rate": 2.722703046246482e-06, "loss": 0.7354, "step": 18238 }, { "epoch": 0.6610488927548839, "grad_norm": 2.4061092372158988, "learning_rate": 2.722180545128908e-06, "loss": 0.8975, "step": 18239 }, { "epoch": 0.6610851364575405, "grad_norm": 2.392957729633999, "learning_rate": 2.7216580753983085e-06, "loss": 0.9437, "step": 18240 }, { "epoch": 0.6611213801601972, "grad_norm": 4.970673946443445, "learning_rate": 2.7211356370618835e-06, "loss": 0.8822, "step": 18241 }, { "epoch": 0.6611576238628538, "grad_norm": 2.276736274649398, "learning_rate": 2.7206132301268328e-06, "loss": 0.7348, "step": 18242 }, { "epoch": 0.6611938675655105, "grad_norm": 2.1518361712777216, "learning_rate": 2.720090854600351e-06, "loss": 0.7985, "step": 18243 }, { "epoch": 0.6612301112681671, "grad_norm": 2.3782016420946146, "learning_rate": 2.7195685104896385e-06, "loss": 1.0154, "step": 18244 }, { "epoch": 0.6612663549708239, "grad_norm": 2.5164651864730403, "learning_rate": 2.7190461978018945e-06, "loss": 1.0578, "step": 18245 }, { "epoch": 0.6613025986734805, "grad_norm": 2.3743657316317797, "learning_rate": 2.7185239165443134e-06, "loss": 0.9613, "step": 18246 }, { "epoch": 0.6613388423761372, "grad_norm": 2.4752977479484, "learning_rate": 2.7180016667240937e-06, "loss": 0.8539, "step": 18247 }, { "epoch": 0.6613750860787938, "grad_norm": 2.2312610425026356, "learning_rate": 2.717479448348428e-06, "loss": 0.8398, "step": 18248 }, { "epoch": 0.6614113297814505, "grad_norm": 2.1911408586470444, "learning_rate": 2.716957261424519e-06, "loss": 0.7789, "step": 18249 }, { "epoch": 0.6614475734841071, "grad_norm": 2.0887209090242145, "learning_rate": 2.716435105959555e-06, "loss": 0.7903, "step": 18250 }, { "epoch": 0.6614838171867637, "grad_norm": 2.0695914735997563, "learning_rate": 2.715912981960738e-06, "loss": 0.8343, "step": 18251 }, { "epoch": 0.6615200608894205, "grad_norm": 2.401564288614845, "learning_rate": 2.715390889435253e-06, "loss": 0.8542, "step": 18252 }, { "epoch": 0.6615563045920771, "grad_norm": 2.5268413702796186, "learning_rate": 2.7148688283903048e-06, "loss": 0.7818, "step": 18253 }, { "epoch": 0.6615925482947338, "grad_norm": 2.5563564304150503, "learning_rate": 2.7143467988330795e-06, "loss": 0.7217, "step": 18254 }, { "epoch": 0.6616287919973904, "grad_norm": 2.6296365185034505, "learning_rate": 2.7138248007707758e-06, "loss": 0.9301, "step": 18255 }, { "epoch": 0.6616650357000471, "grad_norm": 2.0626129967470503, "learning_rate": 2.7133028342105815e-06, "loss": 0.8137, "step": 18256 }, { "epoch": 0.6617012794027037, "grad_norm": 2.434644243690093, "learning_rate": 2.712780899159691e-06, "loss": 0.8614, "step": 18257 }, { "epoch": 0.6617375231053605, "grad_norm": 2.503174569284805, "learning_rate": 2.7122589956252986e-06, "loss": 0.8307, "step": 18258 }, { "epoch": 0.6617737668080171, "grad_norm": 2.5531996800738423, "learning_rate": 2.7117371236145918e-06, "loss": 0.9657, "step": 18259 }, { "epoch": 0.6618100105106738, "grad_norm": 2.4858492324828303, "learning_rate": 2.7112152831347626e-06, "loss": 0.8916, "step": 18260 }, { "epoch": 0.6618462542133304, "grad_norm": 2.618915461214378, "learning_rate": 2.7106934741930036e-06, "loss": 0.9656, "step": 18261 }, { "epoch": 0.6618824979159871, "grad_norm": 2.2892340691571125, "learning_rate": 2.7101716967965055e-06, "loss": 0.9397, "step": 18262 }, { "epoch": 0.6619187416186437, "grad_norm": 2.2645575949388244, "learning_rate": 2.7096499509524547e-06, "loss": 0.7549, "step": 18263 }, { "epoch": 0.6619549853213004, "grad_norm": 1.8297713863719909, "learning_rate": 2.7091282366680426e-06, "loss": 0.6388, "step": 18264 }, { "epoch": 0.6619912290239571, "grad_norm": 2.3596768589671413, "learning_rate": 2.7086065539504596e-06, "loss": 0.924, "step": 18265 }, { "epoch": 0.6620274727266138, "grad_norm": 2.3726976782089944, "learning_rate": 2.7080849028068912e-06, "loss": 0.9049, "step": 18266 }, { "epoch": 0.6620637164292704, "grad_norm": 2.5093619310528394, "learning_rate": 2.7075632832445274e-06, "loss": 0.9055, "step": 18267 }, { "epoch": 0.6620999601319271, "grad_norm": 2.287399312815524, "learning_rate": 2.7070416952705523e-06, "loss": 0.8148, "step": 18268 }, { "epoch": 0.6621362038345837, "grad_norm": 2.5717871718309144, "learning_rate": 2.7065201388921597e-06, "loss": 0.8294, "step": 18269 }, { "epoch": 0.6621724475372404, "grad_norm": 2.365691636811908, "learning_rate": 2.705998614116531e-06, "loss": 0.7782, "step": 18270 }, { "epoch": 0.6622086912398971, "grad_norm": 2.335459090944893, "learning_rate": 2.7054771209508544e-06, "loss": 0.8587, "step": 18271 }, { "epoch": 0.6622449349425538, "grad_norm": 2.2678993358667103, "learning_rate": 2.704955659402313e-06, "loss": 0.9169, "step": 18272 }, { "epoch": 0.6622811786452104, "grad_norm": 2.5282237113208654, "learning_rate": 2.704434229478098e-06, "loss": 0.976, "step": 18273 }, { "epoch": 0.6623174223478671, "grad_norm": 2.3745566806799934, "learning_rate": 2.703912831185389e-06, "loss": 0.8982, "step": 18274 }, { "epoch": 0.6623536660505237, "grad_norm": 2.509704772222121, "learning_rate": 2.703391464531374e-06, "loss": 1.0558, "step": 18275 }, { "epoch": 0.6623899097531804, "grad_norm": 2.1983001323848685, "learning_rate": 2.7028701295232346e-06, "loss": 0.9268, "step": 18276 }, { "epoch": 0.662426153455837, "grad_norm": 2.2173018667446187, "learning_rate": 2.7023488261681553e-06, "loss": 0.8474, "step": 18277 }, { "epoch": 0.6624623971584938, "grad_norm": 2.2737892252431253, "learning_rate": 2.701827554473322e-06, "loss": 0.903, "step": 18278 }, { "epoch": 0.6624986408611504, "grad_norm": 2.3509666946054573, "learning_rate": 2.7013063144459117e-06, "loss": 0.7224, "step": 18279 }, { "epoch": 0.662534884563807, "grad_norm": 2.612779790131744, "learning_rate": 2.7007851060931114e-06, "loss": 0.8911, "step": 18280 }, { "epoch": 0.6625711282664637, "grad_norm": 2.2890720177309722, "learning_rate": 2.7002639294221016e-06, "loss": 0.7393, "step": 18281 }, { "epoch": 0.6626073719691203, "grad_norm": 2.2546239119696287, "learning_rate": 2.6997427844400657e-06, "loss": 0.7085, "step": 18282 }, { "epoch": 0.662643615671777, "grad_norm": 2.297440429941306, "learning_rate": 2.699221671154182e-06, "loss": 1.0158, "step": 18283 }, { "epoch": 0.6626798593744337, "grad_norm": 2.4539772811220644, "learning_rate": 2.698700589571631e-06, "loss": 0.9756, "step": 18284 }, { "epoch": 0.6627161030770904, "grad_norm": 2.448754612851046, "learning_rate": 2.698179539699597e-06, "loss": 0.6861, "step": 18285 }, { "epoch": 0.662752346779747, "grad_norm": 2.280215062478063, "learning_rate": 2.6976585215452544e-06, "loss": 0.7671, "step": 18286 }, { "epoch": 0.6627885904824037, "grad_norm": 2.5702172589492647, "learning_rate": 2.6971375351157867e-06, "loss": 0.894, "step": 18287 }, { "epoch": 0.6628248341850603, "grad_norm": 2.73854713355745, "learning_rate": 2.6966165804183675e-06, "loss": 0.9622, "step": 18288 }, { "epoch": 0.662861077887717, "grad_norm": 2.341534813639422, "learning_rate": 2.6960956574601825e-06, "loss": 1.1001, "step": 18289 }, { "epoch": 0.6628973215903736, "grad_norm": 2.51400277531236, "learning_rate": 2.6955747662484045e-06, "loss": 0.728, "step": 18290 }, { "epoch": 0.6629335652930304, "grad_norm": 2.147452103981043, "learning_rate": 2.6950539067902137e-06, "loss": 0.7224, "step": 18291 }, { "epoch": 0.662969808995687, "grad_norm": 2.0722593906691777, "learning_rate": 2.694533079092785e-06, "loss": 0.6859, "step": 18292 }, { "epoch": 0.6630060526983437, "grad_norm": 2.183084514872881, "learning_rate": 2.6940122831632954e-06, "loss": 0.8233, "step": 18293 }, { "epoch": 0.6630422964010003, "grad_norm": 2.1741982014032777, "learning_rate": 2.6934915190089243e-06, "loss": 0.8852, "step": 18294 }, { "epoch": 0.663078540103657, "grad_norm": 2.4308632570199964, "learning_rate": 2.692970786636843e-06, "loss": 0.8843, "step": 18295 }, { "epoch": 0.6631147838063136, "grad_norm": 2.1488728064340994, "learning_rate": 2.6924500860542292e-06, "loss": 0.9392, "step": 18296 }, { "epoch": 0.6631510275089704, "grad_norm": 2.377375519455217, "learning_rate": 2.6919294172682573e-06, "loss": 0.8457, "step": 18297 }, { "epoch": 0.663187271211627, "grad_norm": 2.0116299215169557, "learning_rate": 2.6914087802861043e-06, "loss": 0.9588, "step": 18298 }, { "epoch": 0.6632235149142837, "grad_norm": 2.2817684583512525, "learning_rate": 2.6908881751149406e-06, "loss": 0.876, "step": 18299 }, { "epoch": 0.6632597586169403, "grad_norm": 2.4693625152656664, "learning_rate": 2.6903676017619407e-06, "loss": 0.9117, "step": 18300 }, { "epoch": 0.663296002319597, "grad_norm": 2.4375671215980166, "learning_rate": 2.6898470602342792e-06, "loss": 1.2325, "step": 18301 }, { "epoch": 0.6633322460222536, "grad_norm": 2.5328464826826687, "learning_rate": 2.68932655053913e-06, "loss": 0.7805, "step": 18302 }, { "epoch": 0.6633684897249102, "grad_norm": 2.3417652046324084, "learning_rate": 2.6888060726836606e-06, "loss": 0.891, "step": 18303 }, { "epoch": 0.663404733427567, "grad_norm": 2.2360344273620054, "learning_rate": 2.6882856266750458e-06, "loss": 0.825, "step": 18304 }, { "epoch": 0.6634409771302237, "grad_norm": 2.1489738016599227, "learning_rate": 2.6877652125204593e-06, "loss": 0.881, "step": 18305 }, { "epoch": 0.6634772208328803, "grad_norm": 2.1776283651828585, "learning_rate": 2.687244830227068e-06, "loss": 0.9054, "step": 18306 }, { "epoch": 0.663513464535537, "grad_norm": 1.863093325515556, "learning_rate": 2.6867244798020466e-06, "loss": 0.7043, "step": 18307 }, { "epoch": 0.6635497082381936, "grad_norm": 2.410354064012875, "learning_rate": 2.686204161252558e-06, "loss": 0.8824, "step": 18308 }, { "epoch": 0.6635859519408502, "grad_norm": 2.424182082501369, "learning_rate": 2.685683874585781e-06, "loss": 0.862, "step": 18309 }, { "epoch": 0.663622195643507, "grad_norm": 2.42128623820873, "learning_rate": 2.6851636198088783e-06, "loss": 0.9022, "step": 18310 }, { "epoch": 0.6636584393461636, "grad_norm": 2.182842298651867, "learning_rate": 2.6846433969290236e-06, "loss": 0.9507, "step": 18311 }, { "epoch": 0.6636946830488203, "grad_norm": 2.05509332882089, "learning_rate": 2.68412320595338e-06, "loss": 0.7757, "step": 18312 }, { "epoch": 0.6637309267514769, "grad_norm": 2.3457935964075736, "learning_rate": 2.6836030468891185e-06, "loss": 0.7215, "step": 18313 }, { "epoch": 0.6637671704541336, "grad_norm": 2.298885436949263, "learning_rate": 2.6830829197434075e-06, "loss": 0.8383, "step": 18314 }, { "epoch": 0.6638034141567902, "grad_norm": 2.3759697434062494, "learning_rate": 2.6825628245234115e-06, "loss": 0.7011, "step": 18315 }, { "epoch": 0.6638396578594469, "grad_norm": 2.4328147346964535, "learning_rate": 2.6820427612362975e-06, "loss": 0.8972, "step": 18316 }, { "epoch": 0.6638759015621036, "grad_norm": 2.3194833971097952, "learning_rate": 2.6815227298892332e-06, "loss": 0.7822, "step": 18317 }, { "epoch": 0.6639121452647603, "grad_norm": 2.5654919172309967, "learning_rate": 2.681002730489385e-06, "loss": 0.9541, "step": 18318 }, { "epoch": 0.6639483889674169, "grad_norm": 2.2606582137921647, "learning_rate": 2.680482763043912e-06, "loss": 0.7947, "step": 18319 }, { "epoch": 0.6639846326700736, "grad_norm": 2.4065810941970525, "learning_rate": 2.679962827559989e-06, "loss": 0.9298, "step": 18320 }, { "epoch": 0.6640208763727302, "grad_norm": 2.571613239066455, "learning_rate": 2.6794429240447725e-06, "loss": 0.8168, "step": 18321 }, { "epoch": 0.6640571200753869, "grad_norm": 2.4845806873878824, "learning_rate": 2.678923052505431e-06, "loss": 1.0453, "step": 18322 }, { "epoch": 0.6640933637780435, "grad_norm": 2.3502998344102113, "learning_rate": 2.678403212949124e-06, "loss": 0.9469, "step": 18323 }, { "epoch": 0.6641296074807003, "grad_norm": 2.050895901386055, "learning_rate": 2.6778834053830174e-06, "loss": 0.8158, "step": 18324 }, { "epoch": 0.6641658511833569, "grad_norm": 2.622142414905574, "learning_rate": 2.6773636298142736e-06, "loss": 0.8445, "step": 18325 }, { "epoch": 0.6642020948860136, "grad_norm": 2.3181073513020647, "learning_rate": 2.6768438862500535e-06, "loss": 0.9726, "step": 18326 }, { "epoch": 0.6642383385886702, "grad_norm": 2.3977779912599946, "learning_rate": 2.6763241746975193e-06, "loss": 0.9379, "step": 18327 }, { "epoch": 0.6642745822913269, "grad_norm": 2.469134195411535, "learning_rate": 2.675804495163833e-06, "loss": 0.9242, "step": 18328 }, { "epoch": 0.6643108259939835, "grad_norm": 2.420752897885546, "learning_rate": 2.6752848476561564e-06, "loss": 0.9348, "step": 18329 }, { "epoch": 0.6643470696966403, "grad_norm": 2.1843632559498416, "learning_rate": 2.6747652321816474e-06, "loss": 0.8098, "step": 18330 }, { "epoch": 0.6643833133992969, "grad_norm": 2.364548756080581, "learning_rate": 2.6742456487474673e-06, "loss": 1.0199, "step": 18331 }, { "epoch": 0.6644195571019536, "grad_norm": 2.3872289580796857, "learning_rate": 2.673726097360777e-06, "loss": 0.883, "step": 18332 }, { "epoch": 0.6644558008046102, "grad_norm": 2.589018810452743, "learning_rate": 2.6732065780287326e-06, "loss": 0.84, "step": 18333 }, { "epoch": 0.6644920445072668, "grad_norm": 2.366812771099705, "learning_rate": 2.672687090758496e-06, "loss": 0.9057, "step": 18334 }, { "epoch": 0.6645282882099235, "grad_norm": 2.345226180491213, "learning_rate": 2.6721676355572202e-06, "loss": 0.849, "step": 18335 }, { "epoch": 0.6645645319125801, "grad_norm": 2.42906678305625, "learning_rate": 2.67164821243207e-06, "loss": 0.873, "step": 18336 }, { "epoch": 0.6646007756152369, "grad_norm": 2.2902437985727775, "learning_rate": 2.671128821390198e-06, "loss": 0.846, "step": 18337 }, { "epoch": 0.6646370193178935, "grad_norm": 2.360809697163384, "learning_rate": 2.670609462438764e-06, "loss": 0.7135, "step": 18338 }, { "epoch": 0.6646732630205502, "grad_norm": 2.3842762190500655, "learning_rate": 2.670090135584919e-06, "loss": 0.8649, "step": 18339 }, { "epoch": 0.6647095067232068, "grad_norm": 2.352290856285558, "learning_rate": 2.6695708408358266e-06, "loss": 1.0122, "step": 18340 }, { "epoch": 0.6647457504258635, "grad_norm": 2.5790959886559826, "learning_rate": 2.669051578198637e-06, "loss": 0.7902, "step": 18341 }, { "epoch": 0.6647819941285201, "grad_norm": 2.188284768405469, "learning_rate": 2.668532347680508e-06, "loss": 0.9377, "step": 18342 }, { "epoch": 0.6648182378311769, "grad_norm": 2.4854865999090228, "learning_rate": 2.668013149288592e-06, "loss": 1.0404, "step": 18343 }, { "epoch": 0.6648544815338335, "grad_norm": 2.596102407168534, "learning_rate": 2.667493983030045e-06, "loss": 0.9532, "step": 18344 }, { "epoch": 0.6648907252364902, "grad_norm": 2.5507698848653018, "learning_rate": 2.666974848912021e-06, "loss": 0.9042, "step": 18345 }, { "epoch": 0.6649269689391468, "grad_norm": 2.7031871807222347, "learning_rate": 2.666455746941672e-06, "loss": 0.7737, "step": 18346 }, { "epoch": 0.6649632126418035, "grad_norm": 2.43362906755542, "learning_rate": 2.6659366771261504e-06, "loss": 0.9303, "step": 18347 }, { "epoch": 0.6649994563444601, "grad_norm": 2.2949544985440435, "learning_rate": 2.66541763947261e-06, "loss": 0.9252, "step": 18348 }, { "epoch": 0.6650357000471168, "grad_norm": 2.614178000265311, "learning_rate": 2.6648986339882042e-06, "loss": 0.8623, "step": 18349 }, { "epoch": 0.6650719437497735, "grad_norm": 2.4688305968968676, "learning_rate": 2.664379660680082e-06, "loss": 0.9131, "step": 18350 }, { "epoch": 0.6651081874524302, "grad_norm": 2.1642329435526437, "learning_rate": 2.6638607195553956e-06, "loss": 0.8295, "step": 18351 }, { "epoch": 0.6651444311550868, "grad_norm": 3.168387455799986, "learning_rate": 2.6633418106212968e-06, "loss": 0.9627, "step": 18352 }, { "epoch": 0.6651806748577435, "grad_norm": 2.431776848259267, "learning_rate": 2.6628229338849332e-06, "loss": 0.9083, "step": 18353 }, { "epoch": 0.6652169185604001, "grad_norm": 2.4852312855261336, "learning_rate": 2.6623040893534578e-06, "loss": 0.9269, "step": 18354 }, { "epoch": 0.6652531622630568, "grad_norm": 2.6344876164097855, "learning_rate": 2.6617852770340142e-06, "loss": 1.1595, "step": 18355 }, { "epoch": 0.6652894059657135, "grad_norm": 2.3915308016296954, "learning_rate": 2.6612664969337596e-06, "loss": 0.7455, "step": 18356 }, { "epoch": 0.6653256496683702, "grad_norm": 2.3423850261424377, "learning_rate": 2.660747749059836e-06, "loss": 0.8643, "step": 18357 }, { "epoch": 0.6653618933710268, "grad_norm": 2.48417910873845, "learning_rate": 2.660229033419396e-06, "loss": 0.7691, "step": 18358 }, { "epoch": 0.6653981370736834, "grad_norm": 2.162714941215696, "learning_rate": 2.659710350019581e-06, "loss": 0.8055, "step": 18359 }, { "epoch": 0.6654343807763401, "grad_norm": 2.326946556278294, "learning_rate": 2.659191698867546e-06, "loss": 0.8112, "step": 18360 }, { "epoch": 0.6654706244789967, "grad_norm": 2.0591442241392395, "learning_rate": 2.6586730799704307e-06, "loss": 0.812, "step": 18361 }, { "epoch": 0.6655068681816534, "grad_norm": 2.1237155738857036, "learning_rate": 2.6581544933353876e-06, "loss": 0.7864, "step": 18362 }, { "epoch": 0.6655431118843101, "grad_norm": 2.3793010773701697, "learning_rate": 2.6576359389695566e-06, "loss": 0.8452, "step": 18363 }, { "epoch": 0.6655793555869668, "grad_norm": 2.7022574754325905, "learning_rate": 2.6571174168800863e-06, "loss": 0.8926, "step": 18364 }, { "epoch": 0.6656155992896234, "grad_norm": 2.126998936472544, "learning_rate": 2.6565989270741223e-06, "loss": 0.7438, "step": 18365 }, { "epoch": 0.6656518429922801, "grad_norm": 2.4783690486972745, "learning_rate": 2.656080469558806e-06, "loss": 0.8579, "step": 18366 }, { "epoch": 0.6656880866949367, "grad_norm": 2.495406697204842, "learning_rate": 2.655562044341284e-06, "loss": 0.8702, "step": 18367 }, { "epoch": 0.6657243303975934, "grad_norm": 2.2857689914599106, "learning_rate": 2.6550436514286987e-06, "loss": 1.022, "step": 18368 }, { "epoch": 0.6657605741002501, "grad_norm": 2.371812089155636, "learning_rate": 2.654525290828196e-06, "loss": 0.9766, "step": 18369 }, { "epoch": 0.6657968178029068, "grad_norm": 2.535236486922748, "learning_rate": 2.6540069625469135e-06, "loss": 0.8778, "step": 18370 }, { "epoch": 0.6658330615055634, "grad_norm": 2.1782323123073635, "learning_rate": 2.6534886665919967e-06, "loss": 0.8279, "step": 18371 }, { "epoch": 0.6658693052082201, "grad_norm": 2.7307761310163934, "learning_rate": 2.6529704029705895e-06, "loss": 1.0111, "step": 18372 }, { "epoch": 0.6659055489108767, "grad_norm": 2.4199313418819886, "learning_rate": 2.6524521716898277e-06, "loss": 0.9579, "step": 18373 }, { "epoch": 0.6659417926135334, "grad_norm": 2.2908006063685518, "learning_rate": 2.651933972756858e-06, "loss": 0.9822, "step": 18374 }, { "epoch": 0.66597803631619, "grad_norm": 2.504825054860839, "learning_rate": 2.651415806178814e-06, "loss": 0.9534, "step": 18375 }, { "epoch": 0.6660142800188468, "grad_norm": 2.6603627973594386, "learning_rate": 2.650897671962843e-06, "loss": 0.9687, "step": 18376 }, { "epoch": 0.6660505237215034, "grad_norm": 2.587028356022402, "learning_rate": 2.6503795701160806e-06, "loss": 0.7936, "step": 18377 }, { "epoch": 0.6660867674241601, "grad_norm": 2.2498090045442347, "learning_rate": 2.649861500645668e-06, "loss": 0.9441, "step": 18378 }, { "epoch": 0.6661230111268167, "grad_norm": 2.553180914285034, "learning_rate": 2.6493434635587406e-06, "loss": 0.8482, "step": 18379 }, { "epoch": 0.6661592548294734, "grad_norm": 2.6528235220594008, "learning_rate": 2.6488254588624385e-06, "loss": 0.9253, "step": 18380 }, { "epoch": 0.66619549853213, "grad_norm": 2.3540625067812218, "learning_rate": 2.6483074865639024e-06, "loss": 0.8931, "step": 18381 }, { "epoch": 0.6662317422347868, "grad_norm": 2.2990304829199766, "learning_rate": 2.6477895466702642e-06, "loss": 0.8929, "step": 18382 }, { "epoch": 0.6662679859374434, "grad_norm": 2.227872532660892, "learning_rate": 2.6472716391886632e-06, "loss": 0.915, "step": 18383 }, { "epoch": 0.6663042296401, "grad_norm": 2.137363699594121, "learning_rate": 2.646753764126237e-06, "loss": 0.8666, "step": 18384 }, { "epoch": 0.6663404733427567, "grad_norm": 2.3658548093387783, "learning_rate": 2.6462359214901224e-06, "loss": 0.8173, "step": 18385 }, { "epoch": 0.6663767170454133, "grad_norm": 2.2904922507994203, "learning_rate": 2.645718111287451e-06, "loss": 0.8454, "step": 18386 }, { "epoch": 0.66641296074807, "grad_norm": 2.2278737505347004, "learning_rate": 2.6452003335253607e-06, "loss": 1.0838, "step": 18387 }, { "epoch": 0.6664492044507266, "grad_norm": 2.2710574554415746, "learning_rate": 2.644682588210985e-06, "loss": 0.8798, "step": 18388 }, { "epoch": 0.6664854481533834, "grad_norm": 2.5600548697213648, "learning_rate": 2.6441648753514617e-06, "loss": 0.9672, "step": 18389 }, { "epoch": 0.66652169185604, "grad_norm": 2.4483455388402624, "learning_rate": 2.643647194953919e-06, "loss": 1.001, "step": 18390 }, { "epoch": 0.6665579355586967, "grad_norm": 2.3357822264306263, "learning_rate": 2.6431295470254926e-06, "loss": 0.8286, "step": 18391 }, { "epoch": 0.6665941792613533, "grad_norm": 2.3896975298242356, "learning_rate": 2.6426119315733186e-06, "loss": 0.9999, "step": 18392 }, { "epoch": 0.66663042296401, "grad_norm": 2.1954300591699525, "learning_rate": 2.6420943486045235e-06, "loss": 0.9035, "step": 18393 }, { "epoch": 0.6666666666666666, "grad_norm": 2.522522245990072, "learning_rate": 2.6415767981262453e-06, "loss": 1.0461, "step": 18394 }, { "epoch": 0.6667029103693233, "grad_norm": 2.394369660945545, "learning_rate": 2.6410592801456076e-06, "loss": 0.9303, "step": 18395 }, { "epoch": 0.66673915407198, "grad_norm": 2.2951389400014306, "learning_rate": 2.6405417946697504e-06, "loss": 0.868, "step": 18396 }, { "epoch": 0.6667753977746367, "grad_norm": 2.3427777971000032, "learning_rate": 2.6400243417057985e-06, "loss": 0.9878, "step": 18397 }, { "epoch": 0.6668116414772933, "grad_norm": 2.3778822655916514, "learning_rate": 2.639506921260886e-06, "loss": 0.9689, "step": 18398 }, { "epoch": 0.66684788517995, "grad_norm": 2.0696330570647574, "learning_rate": 2.6389895333421387e-06, "loss": 0.8678, "step": 18399 }, { "epoch": 0.6668841288826066, "grad_norm": 2.4936810065329893, "learning_rate": 2.6384721779566873e-06, "loss": 0.8417, "step": 18400 }, { "epoch": 0.6669203725852633, "grad_norm": 2.312699828527511, "learning_rate": 2.6379548551116633e-06, "loss": 0.8971, "step": 18401 }, { "epoch": 0.66695661628792, "grad_norm": 2.658938841828941, "learning_rate": 2.637437564814189e-06, "loss": 0.8671, "step": 18402 }, { "epoch": 0.6669928599905767, "grad_norm": 2.655748339857912, "learning_rate": 2.6369203070714e-06, "loss": 0.8263, "step": 18403 }, { "epoch": 0.6670291036932333, "grad_norm": 2.267811973139556, "learning_rate": 2.6364030818904183e-06, "loss": 0.9925, "step": 18404 }, { "epoch": 0.66706534739589, "grad_norm": 2.2411426264646237, "learning_rate": 2.6358858892783744e-06, "loss": 0.7493, "step": 18405 }, { "epoch": 0.6671015910985466, "grad_norm": 2.5855284998930945, "learning_rate": 2.6353687292423897e-06, "loss": 0.8263, "step": 18406 }, { "epoch": 0.6671378348012033, "grad_norm": 2.372262265825574, "learning_rate": 2.6348516017895974e-06, "loss": 0.7928, "step": 18407 }, { "epoch": 0.6671740785038599, "grad_norm": 2.4291500925236384, "learning_rate": 2.6343345069271177e-06, "loss": 0.968, "step": 18408 }, { "epoch": 0.6672103222065167, "grad_norm": 2.3063195989481757, "learning_rate": 2.63381744466208e-06, "loss": 0.8667, "step": 18409 }, { "epoch": 0.6672465659091733, "grad_norm": 2.411095123746359, "learning_rate": 2.6333004150016045e-06, "loss": 1.0367, "step": 18410 }, { "epoch": 0.66728280961183, "grad_norm": 2.2830552024423807, "learning_rate": 2.632783417952818e-06, "loss": 0.7638, "step": 18411 }, { "epoch": 0.6673190533144866, "grad_norm": 2.361849943769973, "learning_rate": 2.6322664535228463e-06, "loss": 0.8264, "step": 18412 }, { "epoch": 0.6673552970171432, "grad_norm": 2.7449182221495234, "learning_rate": 2.631749521718809e-06, "loss": 0.8697, "step": 18413 }, { "epoch": 0.6673915407197999, "grad_norm": 2.2609007746742678, "learning_rate": 2.631232622547831e-06, "loss": 0.8354, "step": 18414 }, { "epoch": 0.6674277844224566, "grad_norm": 2.3732002592328896, "learning_rate": 2.6307157560170356e-06, "loss": 0.7135, "step": 18415 }, { "epoch": 0.6674640281251133, "grad_norm": 2.4849200899951565, "learning_rate": 2.6301989221335457e-06, "loss": 1.0617, "step": 18416 }, { "epoch": 0.6675002718277699, "grad_norm": 3.7401597703498033, "learning_rate": 2.6296821209044787e-06, "loss": 0.8901, "step": 18417 }, { "epoch": 0.6675365155304266, "grad_norm": 2.3339276244205855, "learning_rate": 2.6291653523369593e-06, "loss": 0.8286, "step": 18418 }, { "epoch": 0.6675727592330832, "grad_norm": 2.5782065951015363, "learning_rate": 2.628648616438109e-06, "loss": 0.8844, "step": 18419 }, { "epoch": 0.6676090029357399, "grad_norm": 2.1328340733362894, "learning_rate": 2.628131913215045e-06, "loss": 0.8177, "step": 18420 }, { "epoch": 0.6676452466383965, "grad_norm": 2.307997292122031, "learning_rate": 2.6276152426748914e-06, "loss": 0.8893, "step": 18421 }, { "epoch": 0.6676814903410533, "grad_norm": 1.958419295394391, "learning_rate": 2.6270986048247605e-06, "loss": 0.9264, "step": 18422 }, { "epoch": 0.6677177340437099, "grad_norm": 2.4800377482188716, "learning_rate": 2.626581999671779e-06, "loss": 0.9286, "step": 18423 }, { "epoch": 0.6677539777463666, "grad_norm": 2.947577589487218, "learning_rate": 2.626065427223061e-06, "loss": 0.9944, "step": 18424 }, { "epoch": 0.6677902214490232, "grad_norm": 2.3447433728694986, "learning_rate": 2.6255488874857273e-06, "loss": 0.7523, "step": 18425 }, { "epoch": 0.6678264651516799, "grad_norm": 2.3145290013356403, "learning_rate": 2.6250323804668896e-06, "loss": 0.8923, "step": 18426 }, { "epoch": 0.6678627088543365, "grad_norm": 2.084668722263642, "learning_rate": 2.6245159061736743e-06, "loss": 0.995, "step": 18427 }, { "epoch": 0.6678989525569933, "grad_norm": 2.319727345911574, "learning_rate": 2.6239994646131904e-06, "loss": 0.9164, "step": 18428 }, { "epoch": 0.6679351962596499, "grad_norm": 2.134526094644613, "learning_rate": 2.6234830557925594e-06, "loss": 0.7719, "step": 18429 }, { "epoch": 0.6679714399623066, "grad_norm": 2.2546201655219864, "learning_rate": 2.6229666797188926e-06, "loss": 0.8353, "step": 18430 }, { "epoch": 0.6680076836649632, "grad_norm": 2.4939387525925496, "learning_rate": 2.622450336399307e-06, "loss": 0.8991, "step": 18431 }, { "epoch": 0.6680439273676199, "grad_norm": 2.5251750114109, "learning_rate": 2.62193402584092e-06, "loss": 0.7667, "step": 18432 }, { "epoch": 0.6680801710702765, "grad_norm": 2.1442161704352922, "learning_rate": 2.621417748050842e-06, "loss": 0.8527, "step": 18433 }, { "epoch": 0.6681164147729332, "grad_norm": 2.2882414280788805, "learning_rate": 2.6209015030361884e-06, "loss": 0.8197, "step": 18434 }, { "epoch": 0.6681526584755899, "grad_norm": 2.366354589075695, "learning_rate": 2.6203852908040737e-06, "loss": 0.829, "step": 18435 }, { "epoch": 0.6681889021782466, "grad_norm": 2.302107469402324, "learning_rate": 2.6198691113616127e-06, "loss": 0.8132, "step": 18436 }, { "epoch": 0.6682251458809032, "grad_norm": 2.3972391266424764, "learning_rate": 2.619352964715914e-06, "loss": 0.7957, "step": 18437 }, { "epoch": 0.6682613895835599, "grad_norm": 2.444160795871159, "learning_rate": 2.6188368508740913e-06, "loss": 0.8679, "step": 18438 }, { "epoch": 0.6682976332862165, "grad_norm": 2.120013715733227, "learning_rate": 2.618320769843259e-06, "loss": 0.7845, "step": 18439 }, { "epoch": 0.6683338769888731, "grad_norm": 2.3797519991673983, "learning_rate": 2.6178047216305235e-06, "loss": 1.0652, "step": 18440 }, { "epoch": 0.6683701206915299, "grad_norm": 2.257179551093403, "learning_rate": 2.617288706243001e-06, "loss": 0.9944, "step": 18441 }, { "epoch": 0.6684063643941865, "grad_norm": 2.4516127856820282, "learning_rate": 2.616772723687795e-06, "loss": 0.8237, "step": 18442 }, { "epoch": 0.6684426080968432, "grad_norm": 2.171551959318749, "learning_rate": 2.6162567739720233e-06, "loss": 1.0901, "step": 18443 }, { "epoch": 0.6684788517994998, "grad_norm": 2.5372933187903124, "learning_rate": 2.6157408571027897e-06, "loss": 0.8515, "step": 18444 }, { "epoch": 0.6685150955021565, "grad_norm": 3.138945899639542, "learning_rate": 2.615224973087207e-06, "loss": 0.9828, "step": 18445 }, { "epoch": 0.6685513392048131, "grad_norm": 2.541097801242181, "learning_rate": 2.614709121932378e-06, "loss": 0.7707, "step": 18446 }, { "epoch": 0.6685875829074698, "grad_norm": 2.36903763593767, "learning_rate": 2.614193303645418e-06, "loss": 0.6374, "step": 18447 }, { "epoch": 0.6686238266101265, "grad_norm": 2.0672270120833267, "learning_rate": 2.6136775182334305e-06, "loss": 0.8686, "step": 18448 }, { "epoch": 0.6686600703127832, "grad_norm": 2.3867225252163555, "learning_rate": 2.613161765703525e-06, "loss": 1.0569, "step": 18449 }, { "epoch": 0.6686963140154398, "grad_norm": 2.6462027270556256, "learning_rate": 2.6126460460628055e-06, "loss": 0.9775, "step": 18450 }, { "epoch": 0.6687325577180965, "grad_norm": 2.320115335655684, "learning_rate": 2.612130359318379e-06, "loss": 0.8502, "step": 18451 }, { "epoch": 0.6687688014207531, "grad_norm": 2.2993976914333256, "learning_rate": 2.6116147054773543e-06, "loss": 0.9313, "step": 18452 }, { "epoch": 0.6688050451234098, "grad_norm": 2.29391405891314, "learning_rate": 2.611099084546832e-06, "loss": 0.9167, "step": 18453 }, { "epoch": 0.6688412888260664, "grad_norm": 2.2742176365309863, "learning_rate": 2.6105834965339203e-06, "loss": 0.7066, "step": 18454 }, { "epoch": 0.6688775325287232, "grad_norm": 2.433465188189486, "learning_rate": 2.6100679414457226e-06, "loss": 1.0656, "step": 18455 }, { "epoch": 0.6689137762313798, "grad_norm": 2.252442858226421, "learning_rate": 2.609552419289345e-06, "loss": 0.9564, "step": 18456 }, { "epoch": 0.6689500199340365, "grad_norm": 2.29174746643758, "learning_rate": 2.6090369300718875e-06, "loss": 0.8832, "step": 18457 }, { "epoch": 0.6689862636366931, "grad_norm": 2.1262983843863745, "learning_rate": 2.6085214738004563e-06, "loss": 0.8774, "step": 18458 }, { "epoch": 0.6690225073393498, "grad_norm": 2.7976587715426406, "learning_rate": 2.6080060504821536e-06, "loss": 0.8489, "step": 18459 }, { "epoch": 0.6690587510420064, "grad_norm": 2.2496712068308278, "learning_rate": 2.607490660124079e-06, "loss": 0.8387, "step": 18460 }, { "epoch": 0.6690949947446632, "grad_norm": 2.210174905176978, "learning_rate": 2.6069753027333376e-06, "loss": 0.6918, "step": 18461 }, { "epoch": 0.6691312384473198, "grad_norm": 1.9376849766166686, "learning_rate": 2.6064599783170265e-06, "loss": 0.8752, "step": 18462 }, { "epoch": 0.6691674821499765, "grad_norm": 2.3351389574944204, "learning_rate": 2.6059446868822526e-06, "loss": 0.9066, "step": 18463 }, { "epoch": 0.6692037258526331, "grad_norm": 2.5297807635967717, "learning_rate": 2.6054294284361115e-06, "loss": 0.8361, "step": 18464 }, { "epoch": 0.6692399695552897, "grad_norm": 2.5025068174962444, "learning_rate": 2.604914202985706e-06, "loss": 1.0474, "step": 18465 }, { "epoch": 0.6692762132579464, "grad_norm": 2.515888257310909, "learning_rate": 2.6043990105381327e-06, "loss": 1.0199, "step": 18466 }, { "epoch": 0.669312456960603, "grad_norm": 2.3625936017713016, "learning_rate": 2.6038838511004926e-06, "loss": 0.9575, "step": 18467 }, { "epoch": 0.6693487006632598, "grad_norm": 2.567951282199775, "learning_rate": 2.6033687246798854e-06, "loss": 0.8391, "step": 18468 }, { "epoch": 0.6693849443659164, "grad_norm": 2.22759160724153, "learning_rate": 2.602853631283406e-06, "loss": 0.8007, "step": 18469 }, { "epoch": 0.6694211880685731, "grad_norm": 2.2999781832412807, "learning_rate": 2.6023385709181536e-06, "loss": 0.7959, "step": 18470 }, { "epoch": 0.6694574317712297, "grad_norm": 2.342673741442566, "learning_rate": 2.6018235435912254e-06, "loss": 0.9246, "step": 18471 }, { "epoch": 0.6694936754738864, "grad_norm": 2.611495057025649, "learning_rate": 2.6013085493097214e-06, "loss": 1.0164, "step": 18472 }, { "epoch": 0.669529919176543, "grad_norm": 2.6802601710971787, "learning_rate": 2.6007935880807317e-06, "loss": 0.9807, "step": 18473 }, { "epoch": 0.6695661628791998, "grad_norm": 2.197911862981801, "learning_rate": 2.600278659911356e-06, "loss": 1.0093, "step": 18474 }, { "epoch": 0.6696024065818564, "grad_norm": 2.486446145696743, "learning_rate": 2.599763764808689e-06, "loss": 0.9516, "step": 18475 }, { "epoch": 0.6696386502845131, "grad_norm": 2.1107399336323756, "learning_rate": 2.5992489027798274e-06, "loss": 0.7744, "step": 18476 }, { "epoch": 0.6696748939871697, "grad_norm": 2.4905655358810095, "learning_rate": 2.598734073831862e-06, "loss": 1.036, "step": 18477 }, { "epoch": 0.6697111376898264, "grad_norm": 2.672470421918002, "learning_rate": 2.5982192779718897e-06, "loss": 0.8557, "step": 18478 }, { "epoch": 0.669747381392483, "grad_norm": 2.3644630903390818, "learning_rate": 2.597704515207004e-06, "loss": 0.837, "step": 18479 }, { "epoch": 0.6697836250951397, "grad_norm": 2.3681590921960756, "learning_rate": 2.597189785544296e-06, "loss": 0.9575, "step": 18480 }, { "epoch": 0.6698198687977964, "grad_norm": 2.5561319837922025, "learning_rate": 2.5966750889908595e-06, "loss": 0.906, "step": 18481 }, { "epoch": 0.6698561125004531, "grad_norm": 2.4935551500577087, "learning_rate": 2.5961604255537865e-06, "loss": 1.0113, "step": 18482 }, { "epoch": 0.6698923562031097, "grad_norm": 2.0108428801708964, "learning_rate": 2.5956457952401715e-06, "loss": 0.9428, "step": 18483 }, { "epoch": 0.6699285999057664, "grad_norm": 2.488635690952611, "learning_rate": 2.5951311980571004e-06, "loss": 0.8331, "step": 18484 }, { "epoch": 0.669964843608423, "grad_norm": 2.1954224671301956, "learning_rate": 2.594616634011668e-06, "loss": 0.9152, "step": 18485 }, { "epoch": 0.6700010873110797, "grad_norm": 2.529765490980155, "learning_rate": 2.5941021031109647e-06, "loss": 0.9398, "step": 18486 }, { "epoch": 0.6700373310137364, "grad_norm": 2.3625843491642553, "learning_rate": 2.5935876053620785e-06, "loss": 0.8799, "step": 18487 }, { "epoch": 0.6700735747163931, "grad_norm": 2.5629887673174676, "learning_rate": 2.593073140772101e-06, "loss": 1.0479, "step": 18488 }, { "epoch": 0.6701098184190497, "grad_norm": 2.3115773136017412, "learning_rate": 2.5925587093481166e-06, "loss": 0.8363, "step": 18489 }, { "epoch": 0.6701460621217064, "grad_norm": 2.5098880433156507, "learning_rate": 2.5920443110972206e-06, "loss": 0.9006, "step": 18490 }, { "epoch": 0.670182305824363, "grad_norm": 2.5760493533625843, "learning_rate": 2.5915299460264953e-06, "loss": 0.901, "step": 18491 }, { "epoch": 0.6702185495270196, "grad_norm": 2.382890796687176, "learning_rate": 2.5910156141430333e-06, "loss": 0.8476, "step": 18492 }, { "epoch": 0.6702547932296763, "grad_norm": 2.021763745775367, "learning_rate": 2.5905013154539154e-06, "loss": 0.716, "step": 18493 }, { "epoch": 0.670291036932333, "grad_norm": 2.1754857134724563, "learning_rate": 2.589987049966236e-06, "loss": 0.7895, "step": 18494 }, { "epoch": 0.6703272806349897, "grad_norm": 2.3253482511757153, "learning_rate": 2.5894728176870753e-06, "loss": 0.7124, "step": 18495 }, { "epoch": 0.6703635243376463, "grad_norm": 2.328460422964217, "learning_rate": 2.588958618623524e-06, "loss": 0.9571, "step": 18496 }, { "epoch": 0.670399768040303, "grad_norm": 2.260608449412071, "learning_rate": 2.5884444527826626e-06, "loss": 0.8769, "step": 18497 }, { "epoch": 0.6704360117429596, "grad_norm": 2.4506515700272042, "learning_rate": 2.5879303201715777e-06, "loss": 0.8752, "step": 18498 }, { "epoch": 0.6704722554456163, "grad_norm": 2.735883678091056, "learning_rate": 2.587416220797356e-06, "loss": 0.9922, "step": 18499 }, { "epoch": 0.670508499148273, "grad_norm": 2.317407597947472, "learning_rate": 2.586902154667078e-06, "loss": 0.811, "step": 18500 }, { "epoch": 0.6705447428509297, "grad_norm": 2.4795774170420537, "learning_rate": 2.5863881217878295e-06, "loss": 0.9397, "step": 18501 }, { "epoch": 0.6705809865535863, "grad_norm": 2.338717265704109, "learning_rate": 2.5858741221666923e-06, "loss": 0.7246, "step": 18502 }, { "epoch": 0.670617230256243, "grad_norm": 3.0645112516856376, "learning_rate": 2.5853601558107522e-06, "loss": 0.8643, "step": 18503 }, { "epoch": 0.6706534739588996, "grad_norm": 2.225194726131013, "learning_rate": 2.5848462227270867e-06, "loss": 0.8719, "step": 18504 }, { "epoch": 0.6706897176615563, "grad_norm": 2.1393130827967797, "learning_rate": 2.584332322922779e-06, "loss": 0.7366, "step": 18505 }, { "epoch": 0.6707259613642129, "grad_norm": 2.3909768951009758, "learning_rate": 2.583818456404913e-06, "loss": 0.8943, "step": 18506 }, { "epoch": 0.6707622050668697, "grad_norm": 2.330391502064321, "learning_rate": 2.5833046231805657e-06, "loss": 0.8293, "step": 18507 }, { "epoch": 0.6707984487695263, "grad_norm": 2.530445342104367, "learning_rate": 2.582790823256821e-06, "loss": 0.9182, "step": 18508 }, { "epoch": 0.670834692472183, "grad_norm": 2.1805341493360384, "learning_rate": 2.5822770566407533e-06, "loss": 0.719, "step": 18509 }, { "epoch": 0.6708709361748396, "grad_norm": 2.15396771028198, "learning_rate": 2.5817633233394498e-06, "loss": 0.9325, "step": 18510 }, { "epoch": 0.6709071798774963, "grad_norm": 2.1544194937273735, "learning_rate": 2.5812496233599827e-06, "loss": 0.831, "step": 18511 }, { "epoch": 0.6709434235801529, "grad_norm": 2.1599355291576043, "learning_rate": 2.5807359567094347e-06, "loss": 0.7365, "step": 18512 }, { "epoch": 0.6709796672828097, "grad_norm": 2.350135773247218, "learning_rate": 2.5802223233948788e-06, "loss": 0.9394, "step": 18513 }, { "epoch": 0.6710159109854663, "grad_norm": 2.54607424490491, "learning_rate": 2.579708723423399e-06, "loss": 0.7659, "step": 18514 }, { "epoch": 0.671052154688123, "grad_norm": 2.111745333554755, "learning_rate": 2.579195156802068e-06, "loss": 0.9797, "step": 18515 }, { "epoch": 0.6710883983907796, "grad_norm": 2.1384400591858594, "learning_rate": 2.5786816235379654e-06, "loss": 1.017, "step": 18516 }, { "epoch": 0.6711246420934363, "grad_norm": 2.5485420689302374, "learning_rate": 2.5781681236381633e-06, "loss": 1.0109, "step": 18517 }, { "epoch": 0.6711608857960929, "grad_norm": 2.4787590640403065, "learning_rate": 2.5776546571097404e-06, "loss": 0.8706, "step": 18518 }, { "epoch": 0.6711971294987495, "grad_norm": 2.526660843053965, "learning_rate": 2.5771412239597726e-06, "loss": 0.7878, "step": 18519 }, { "epoch": 0.6712333732014063, "grad_norm": 2.238745679641359, "learning_rate": 2.5766278241953323e-06, "loss": 0.9959, "step": 18520 }, { "epoch": 0.671269616904063, "grad_norm": 2.6336258890307658, "learning_rate": 2.5761144578234944e-06, "loss": 0.8429, "step": 18521 }, { "epoch": 0.6713058606067196, "grad_norm": 2.4682278908386923, "learning_rate": 2.5756011248513336e-06, "loss": 1.087, "step": 18522 }, { "epoch": 0.6713421043093762, "grad_norm": 2.528678892693339, "learning_rate": 2.575087825285925e-06, "loss": 0.9069, "step": 18523 }, { "epoch": 0.6713783480120329, "grad_norm": 2.373620489148706, "learning_rate": 2.5745745591343373e-06, "loss": 0.9572, "step": 18524 }, { "epoch": 0.6714145917146895, "grad_norm": 2.6394654589381905, "learning_rate": 2.574061326403646e-06, "loss": 0.7898, "step": 18525 }, { "epoch": 0.6714508354173462, "grad_norm": 2.154445985647865, "learning_rate": 2.573548127100924e-06, "loss": 0.8243, "step": 18526 }, { "epoch": 0.6714870791200029, "grad_norm": 2.2358148121210015, "learning_rate": 2.5730349612332397e-06, "loss": 0.9724, "step": 18527 }, { "epoch": 0.6715233228226596, "grad_norm": 2.201799782407211, "learning_rate": 2.5725218288076674e-06, "loss": 0.9181, "step": 18528 }, { "epoch": 0.6715595665253162, "grad_norm": 2.513864259296894, "learning_rate": 2.5720087298312725e-06, "loss": 0.8119, "step": 18529 }, { "epoch": 0.6715958102279729, "grad_norm": 2.48843117157742, "learning_rate": 2.571495664311133e-06, "loss": 0.9259, "step": 18530 }, { "epoch": 0.6716320539306295, "grad_norm": 2.478612893144861, "learning_rate": 2.570982632254312e-06, "loss": 0.8732, "step": 18531 }, { "epoch": 0.6716682976332862, "grad_norm": 2.205322990204646, "learning_rate": 2.5704696336678834e-06, "loss": 0.7429, "step": 18532 }, { "epoch": 0.6717045413359429, "grad_norm": 2.1606723716858354, "learning_rate": 2.5699566685589108e-06, "loss": 0.9246, "step": 18533 }, { "epoch": 0.6717407850385996, "grad_norm": 2.241836747421699, "learning_rate": 2.569443736934469e-06, "loss": 0.803, "step": 18534 }, { "epoch": 0.6717770287412562, "grad_norm": 2.549406720686024, "learning_rate": 2.5689308388016205e-06, "loss": 0.9942, "step": 18535 }, { "epoch": 0.6718132724439129, "grad_norm": 2.7216152465434735, "learning_rate": 2.5684179741674364e-06, "loss": 1.0015, "step": 18536 }, { "epoch": 0.6718495161465695, "grad_norm": 2.033590678480946, "learning_rate": 2.5679051430389806e-06, "loss": 0.9126, "step": 18537 }, { "epoch": 0.6718857598492262, "grad_norm": 2.5354676807217005, "learning_rate": 2.5673923454233206e-06, "loss": 0.9087, "step": 18538 }, { "epoch": 0.6719220035518828, "grad_norm": 2.5348227550031024, "learning_rate": 2.566879581327526e-06, "loss": 0.8736, "step": 18539 }, { "epoch": 0.6719582472545396, "grad_norm": 2.484866661588205, "learning_rate": 2.566366850758656e-06, "loss": 0.8907, "step": 18540 }, { "epoch": 0.6719944909571962, "grad_norm": 2.3667780607350264, "learning_rate": 2.56585415372378e-06, "loss": 1.0597, "step": 18541 }, { "epoch": 0.6720307346598529, "grad_norm": 2.2517342144011687, "learning_rate": 2.565341490229961e-06, "loss": 0.9156, "step": 18542 }, { "epoch": 0.6720669783625095, "grad_norm": 2.3822080633395144, "learning_rate": 2.5648288602842656e-06, "loss": 0.8476, "step": 18543 }, { "epoch": 0.6721032220651662, "grad_norm": 2.2557224700663823, "learning_rate": 2.564316263893754e-06, "loss": 0.9161, "step": 18544 }, { "epoch": 0.6721394657678228, "grad_norm": 2.156037364829858, "learning_rate": 2.5638037010654914e-06, "loss": 0.8036, "step": 18545 }, { "epoch": 0.6721757094704796, "grad_norm": 2.3774042745269948, "learning_rate": 2.563291171806542e-06, "loss": 0.802, "step": 18546 }, { "epoch": 0.6722119531731362, "grad_norm": 2.156710247225028, "learning_rate": 2.5627786761239654e-06, "loss": 0.7373, "step": 18547 }, { "epoch": 0.6722481968757928, "grad_norm": 2.3657725742383495, "learning_rate": 2.562266214024826e-06, "loss": 0.9536, "step": 18548 }, { "epoch": 0.6722844405784495, "grad_norm": 2.5000555539608955, "learning_rate": 2.56175378551618e-06, "loss": 0.9204, "step": 18549 }, { "epoch": 0.6723206842811061, "grad_norm": 2.400448736420363, "learning_rate": 2.561241390605097e-06, "loss": 0.9236, "step": 18550 }, { "epoch": 0.6723569279837628, "grad_norm": 2.482315011076658, "learning_rate": 2.5607290292986296e-06, "loss": 0.8967, "step": 18551 }, { "epoch": 0.6723931716864194, "grad_norm": 2.30674087428842, "learning_rate": 2.560216701603844e-06, "loss": 0.9317, "step": 18552 }, { "epoch": 0.6724294153890762, "grad_norm": 2.323032028561185, "learning_rate": 2.5597044075277932e-06, "loss": 1.0275, "step": 18553 }, { "epoch": 0.6724656590917328, "grad_norm": 2.134129223448578, "learning_rate": 2.559192147077541e-06, "loss": 0.9328, "step": 18554 }, { "epoch": 0.6725019027943895, "grad_norm": 2.609540527778835, "learning_rate": 2.5586799202601464e-06, "loss": 0.8988, "step": 18555 }, { "epoch": 0.6725381464970461, "grad_norm": 2.3974513305773884, "learning_rate": 2.5581677270826643e-06, "loss": 0.922, "step": 18556 }, { "epoch": 0.6725743901997028, "grad_norm": 2.188485710378586, "learning_rate": 2.557655567552154e-06, "loss": 0.8374, "step": 18557 }, { "epoch": 0.6726106339023594, "grad_norm": 2.4731090376261142, "learning_rate": 2.5571434416756735e-06, "loss": 0.8316, "step": 18558 }, { "epoch": 0.6726468776050162, "grad_norm": 2.2253024923861005, "learning_rate": 2.55663134946028e-06, "loss": 0.9015, "step": 18559 }, { "epoch": 0.6726831213076728, "grad_norm": 2.118170687004951, "learning_rate": 2.5561192909130273e-06, "loss": 0.8219, "step": 18560 }, { "epoch": 0.6727193650103295, "grad_norm": 2.5224493340532854, "learning_rate": 2.5556072660409727e-06, "loss": 0.7963, "step": 18561 }, { "epoch": 0.6727556087129861, "grad_norm": 2.2446580736481, "learning_rate": 2.5550952748511715e-06, "loss": 0.8925, "step": 18562 }, { "epoch": 0.6727918524156428, "grad_norm": 2.4068595912425854, "learning_rate": 2.5545833173506814e-06, "loss": 0.7924, "step": 18563 }, { "epoch": 0.6728280961182994, "grad_norm": 2.305518677584261, "learning_rate": 2.5540713935465515e-06, "loss": 0.7937, "step": 18564 }, { "epoch": 0.6728643398209561, "grad_norm": 2.543683347026588, "learning_rate": 2.5535595034458393e-06, "loss": 1.0407, "step": 18565 }, { "epoch": 0.6729005835236128, "grad_norm": 2.3550215508940737, "learning_rate": 2.553047647055599e-06, "loss": 0.8284, "step": 18566 }, { "epoch": 0.6729368272262695, "grad_norm": 2.3445892685660357, "learning_rate": 2.55253582438288e-06, "loss": 0.7709, "step": 18567 }, { "epoch": 0.6729730709289261, "grad_norm": 2.379155970682367, "learning_rate": 2.552024035434738e-06, "loss": 0.7918, "step": 18568 }, { "epoch": 0.6730093146315828, "grad_norm": 2.2172773355674154, "learning_rate": 2.5515122802182233e-06, "loss": 0.9144, "step": 18569 }, { "epoch": 0.6730455583342394, "grad_norm": 2.3768990473374942, "learning_rate": 2.5510005587403914e-06, "loss": 0.8721, "step": 18570 }, { "epoch": 0.673081802036896, "grad_norm": 2.1478469042083153, "learning_rate": 2.5504888710082886e-06, "loss": 0.6996, "step": 18571 }, { "epoch": 0.6731180457395528, "grad_norm": 2.4006558928897985, "learning_rate": 2.5499772170289672e-06, "loss": 0.9264, "step": 18572 }, { "epoch": 0.6731542894422095, "grad_norm": 3.0587869653704267, "learning_rate": 2.5494655968094806e-06, "loss": 1.0288, "step": 18573 }, { "epoch": 0.6731905331448661, "grad_norm": 2.434914954053714, "learning_rate": 2.548954010356874e-06, "loss": 0.789, "step": 18574 }, { "epoch": 0.6732267768475227, "grad_norm": 2.462550695347283, "learning_rate": 2.5484424576782e-06, "loss": 0.8396, "step": 18575 }, { "epoch": 0.6732630205501794, "grad_norm": 2.34051084610657, "learning_rate": 2.5479309387805027e-06, "loss": 1.0061, "step": 18576 }, { "epoch": 0.673299264252836, "grad_norm": 2.441325804901601, "learning_rate": 2.5474194536708383e-06, "loss": 0.8061, "step": 18577 }, { "epoch": 0.6733355079554927, "grad_norm": 2.4243707720496346, "learning_rate": 2.5469080023562486e-06, "loss": 0.8569, "step": 18578 }, { "epoch": 0.6733717516581494, "grad_norm": 2.36281974086928, "learning_rate": 2.5463965848437856e-06, "loss": 0.7615, "step": 18579 }, { "epoch": 0.6734079953608061, "grad_norm": 2.1229350002043326, "learning_rate": 2.545885201140489e-06, "loss": 0.839, "step": 18580 }, { "epoch": 0.6734442390634627, "grad_norm": 2.2867718835436843, "learning_rate": 2.5453738512534144e-06, "loss": 0.8659, "step": 18581 }, { "epoch": 0.6734804827661194, "grad_norm": 2.066613997440637, "learning_rate": 2.5448625351896017e-06, "loss": 0.7453, "step": 18582 }, { "epoch": 0.673516726468776, "grad_norm": 2.215419186897804, "learning_rate": 2.5443512529561e-06, "loss": 1.0406, "step": 18583 }, { "epoch": 0.6735529701714327, "grad_norm": 2.1556113972028945, "learning_rate": 2.543840004559951e-06, "loss": 0.9501, "step": 18584 }, { "epoch": 0.6735892138740894, "grad_norm": 2.2211394782884892, "learning_rate": 2.5433287900082017e-06, "loss": 0.8659, "step": 18585 }, { "epoch": 0.6736254575767461, "grad_norm": 2.3741521443300138, "learning_rate": 2.542817609307897e-06, "loss": 1.0242, "step": 18586 }, { "epoch": 0.6736617012794027, "grad_norm": 2.4341613094047276, "learning_rate": 2.5423064624660788e-06, "loss": 0.9156, "step": 18587 }, { "epoch": 0.6736979449820594, "grad_norm": 2.1512181630625813, "learning_rate": 2.541795349489791e-06, "loss": 0.7614, "step": 18588 }, { "epoch": 0.673734188684716, "grad_norm": 2.452094932457271, "learning_rate": 2.5412842703860764e-06, "loss": 0.9027, "step": 18589 }, { "epoch": 0.6737704323873727, "grad_norm": 2.1453622723936996, "learning_rate": 2.5407732251619786e-06, "loss": 0.8831, "step": 18590 }, { "epoch": 0.6738066760900293, "grad_norm": 2.167656083348911, "learning_rate": 2.5402622138245375e-06, "loss": 0.8636, "step": 18591 }, { "epoch": 0.6738429197926861, "grad_norm": 2.1912780686587556, "learning_rate": 2.539751236380795e-06, "loss": 0.8286, "step": 18592 }, { "epoch": 0.6738791634953427, "grad_norm": 2.46170431123434, "learning_rate": 2.539240292837795e-06, "loss": 1.0846, "step": 18593 }, { "epoch": 0.6739154071979994, "grad_norm": 2.2869220167618827, "learning_rate": 2.5387293832025738e-06, "loss": 0.8241, "step": 18594 }, { "epoch": 0.673951650900656, "grad_norm": 2.0630892317075045, "learning_rate": 2.538218507482174e-06, "loss": 0.8042, "step": 18595 }, { "epoch": 0.6739878946033127, "grad_norm": 2.292125860809709, "learning_rate": 2.537707665683632e-06, "loss": 0.8794, "step": 18596 }, { "epoch": 0.6740241383059693, "grad_norm": 2.6131561105029055, "learning_rate": 2.5371968578139914e-06, "loss": 0.8701, "step": 18597 }, { "epoch": 0.674060382008626, "grad_norm": 2.5772522836189804, "learning_rate": 2.536686083880288e-06, "loss": 0.9409, "step": 18598 }, { "epoch": 0.6740966257112827, "grad_norm": 2.3503693534888, "learning_rate": 2.536175343889562e-06, "loss": 0.9569, "step": 18599 }, { "epoch": 0.6741328694139394, "grad_norm": 2.2338462937309824, "learning_rate": 2.5356646378488457e-06, "loss": 0.7185, "step": 18600 }, { "epoch": 0.674169113116596, "grad_norm": 2.495414621416032, "learning_rate": 2.5351539657651847e-06, "loss": 0.9412, "step": 18601 }, { "epoch": 0.6742053568192526, "grad_norm": 2.1645779710733404, "learning_rate": 2.5346433276456093e-06, "loss": 0.7815, "step": 18602 }, { "epoch": 0.6742416005219093, "grad_norm": 2.6171282388586032, "learning_rate": 2.534132723497159e-06, "loss": 0.8228, "step": 18603 }, { "epoch": 0.6742778442245659, "grad_norm": 2.095323686322525, "learning_rate": 2.533622153326868e-06, "loss": 0.9343, "step": 18604 }, { "epoch": 0.6743140879272227, "grad_norm": 2.402909878690048, "learning_rate": 2.533111617141771e-06, "loss": 0.854, "step": 18605 }, { "epoch": 0.6743503316298793, "grad_norm": 2.3110881255033062, "learning_rate": 2.532601114948906e-06, "loss": 0.8602, "step": 18606 }, { "epoch": 0.674386575332536, "grad_norm": 2.510417291584733, "learning_rate": 2.5320906467553035e-06, "loss": 0.9744, "step": 18607 }, { "epoch": 0.6744228190351926, "grad_norm": 2.110646472536295, "learning_rate": 2.5315802125679994e-06, "loss": 0.8893, "step": 18608 }, { "epoch": 0.6744590627378493, "grad_norm": 2.181633035694078, "learning_rate": 2.5310698123940274e-06, "loss": 0.9255, "step": 18609 }, { "epoch": 0.6744953064405059, "grad_norm": 2.251380372255625, "learning_rate": 2.530559446240422e-06, "loss": 0.9269, "step": 18610 }, { "epoch": 0.6745315501431626, "grad_norm": 2.4832107034897786, "learning_rate": 2.530049114114212e-06, "loss": 0.9883, "step": 18611 }, { "epoch": 0.6745677938458193, "grad_norm": 2.384225165507654, "learning_rate": 2.529538816022431e-06, "loss": 0.7598, "step": 18612 }, { "epoch": 0.674604037548476, "grad_norm": 2.588547564935055, "learning_rate": 2.5290285519721136e-06, "loss": 1.0044, "step": 18613 }, { "epoch": 0.6746402812511326, "grad_norm": 2.3741585689226077, "learning_rate": 2.5285183219702863e-06, "loss": 0.8485, "step": 18614 }, { "epoch": 0.6746765249537893, "grad_norm": 2.2516047258737597, "learning_rate": 2.5280081260239835e-06, "loss": 0.7876, "step": 18615 }, { "epoch": 0.6747127686564459, "grad_norm": 2.5947997924298187, "learning_rate": 2.52749796414023e-06, "loss": 0.8625, "step": 18616 }, { "epoch": 0.6747490123591026, "grad_norm": 2.404841994752052, "learning_rate": 2.526987836326063e-06, "loss": 1.0093, "step": 18617 }, { "epoch": 0.6747852560617593, "grad_norm": 2.407572466777228, "learning_rate": 2.526477742588506e-06, "loss": 0.9349, "step": 18618 }, { "epoch": 0.674821499764416, "grad_norm": 2.485976940770283, "learning_rate": 2.5259676829345923e-06, "loss": 0.8705, "step": 18619 }, { "epoch": 0.6748577434670726, "grad_norm": 2.251572436403676, "learning_rate": 2.525457657371343e-06, "loss": 0.8421, "step": 18620 }, { "epoch": 0.6748939871697293, "grad_norm": 2.0328440258168095, "learning_rate": 2.5249476659057948e-06, "loss": 0.6816, "step": 18621 }, { "epoch": 0.6749302308723859, "grad_norm": 2.2513324503884746, "learning_rate": 2.524437708544969e-06, "loss": 1.0206, "step": 18622 }, { "epoch": 0.6749664745750426, "grad_norm": 2.257148499144191, "learning_rate": 2.5239277852958965e-06, "loss": 0.9659, "step": 18623 }, { "epoch": 0.6750027182776992, "grad_norm": 2.211947710590791, "learning_rate": 2.5234178961655996e-06, "loss": 0.8621, "step": 18624 }, { "epoch": 0.675038961980356, "grad_norm": 2.1553941698097985, "learning_rate": 2.5229080411611064e-06, "loss": 0.7446, "step": 18625 }, { "epoch": 0.6750752056830126, "grad_norm": 2.244879110087204, "learning_rate": 2.5223982202894435e-06, "loss": 0.9365, "step": 18626 }, { "epoch": 0.6751114493856692, "grad_norm": 2.425175718880656, "learning_rate": 2.5218884335576338e-06, "loss": 0.9019, "step": 18627 }, { "epoch": 0.6751476930883259, "grad_norm": 2.2973658957152456, "learning_rate": 2.5213786809727024e-06, "loss": 0.9842, "step": 18628 }, { "epoch": 0.6751839367909825, "grad_norm": 2.594275083484186, "learning_rate": 2.520868962541674e-06, "loss": 0.8638, "step": 18629 }, { "epoch": 0.6752201804936392, "grad_norm": 2.218171084310297, "learning_rate": 2.5203592782715735e-06, "loss": 0.8251, "step": 18630 }, { "epoch": 0.675256424196296, "grad_norm": 2.34488899365653, "learning_rate": 2.5198496281694206e-06, "loss": 0.7593, "step": 18631 }, { "epoch": 0.6752926678989526, "grad_norm": 2.3186369865159078, "learning_rate": 2.5193400122422396e-06, "loss": 0.787, "step": 18632 }, { "epoch": 0.6753289116016092, "grad_norm": 2.084292192935356, "learning_rate": 2.518830430497056e-06, "loss": 0.816, "step": 18633 }, { "epoch": 0.6753651553042659, "grad_norm": 2.5070470761121846, "learning_rate": 2.5183208829408857e-06, "loss": 0.9138, "step": 18634 }, { "epoch": 0.6754013990069225, "grad_norm": 2.2545929008999503, "learning_rate": 2.5178113695807545e-06, "loss": 1.0218, "step": 18635 }, { "epoch": 0.6754376427095792, "grad_norm": 2.59970077433244, "learning_rate": 2.5173018904236777e-06, "loss": 0.8567, "step": 18636 }, { "epoch": 0.6754738864122358, "grad_norm": 2.212373852217888, "learning_rate": 2.5167924454766833e-06, "loss": 0.978, "step": 18637 }, { "epoch": 0.6755101301148926, "grad_norm": 2.674165463310669, "learning_rate": 2.5162830347467858e-06, "loss": 0.9326, "step": 18638 }, { "epoch": 0.6755463738175492, "grad_norm": 2.3037132859913383, "learning_rate": 2.5157736582410076e-06, "loss": 0.8157, "step": 18639 }, { "epoch": 0.6755826175202059, "grad_norm": 2.346060471267077, "learning_rate": 2.5152643159663635e-06, "loss": 0.9042, "step": 18640 }, { "epoch": 0.6756188612228625, "grad_norm": 2.3522134942632493, "learning_rate": 2.5147550079298753e-06, "loss": 1.0936, "step": 18641 }, { "epoch": 0.6756551049255192, "grad_norm": 2.4016928097386465, "learning_rate": 2.5142457341385594e-06, "loss": 0.8737, "step": 18642 }, { "epoch": 0.6756913486281758, "grad_norm": 2.0718868312234693, "learning_rate": 2.513736494599434e-06, "loss": 0.8254, "step": 18643 }, { "epoch": 0.6757275923308326, "grad_norm": 2.2263082353640837, "learning_rate": 2.5132272893195187e-06, "loss": 0.853, "step": 18644 }, { "epoch": 0.6757638360334892, "grad_norm": 2.325962908254261, "learning_rate": 2.5127181183058246e-06, "loss": 0.9491, "step": 18645 }, { "epoch": 0.6758000797361459, "grad_norm": 2.5573187957744605, "learning_rate": 2.5122089815653726e-06, "loss": 0.7598, "step": 18646 }, { "epoch": 0.6758363234388025, "grad_norm": 2.2490565763319843, "learning_rate": 2.511699879105173e-06, "loss": 0.8732, "step": 18647 }, { "epoch": 0.6758725671414592, "grad_norm": 2.566904361846316, "learning_rate": 2.511190810932248e-06, "loss": 0.8395, "step": 18648 }, { "epoch": 0.6759088108441158, "grad_norm": 2.0408922020601215, "learning_rate": 2.5106817770536064e-06, "loss": 0.7407, "step": 18649 }, { "epoch": 0.6759450545467725, "grad_norm": 2.142238859346969, "learning_rate": 2.5101727774762654e-06, "loss": 0.8717, "step": 18650 }, { "epoch": 0.6759812982494292, "grad_norm": 2.4821785459051138, "learning_rate": 2.509663812207237e-06, "loss": 0.8941, "step": 18651 }, { "epoch": 0.6760175419520859, "grad_norm": 2.1346902982782496, "learning_rate": 2.509154881253535e-06, "loss": 0.8119, "step": 18652 }, { "epoch": 0.6760537856547425, "grad_norm": 2.1057625481793125, "learning_rate": 2.508645984622173e-06, "loss": 1.0609, "step": 18653 }, { "epoch": 0.6760900293573991, "grad_norm": 2.482470014639223, "learning_rate": 2.5081371223201623e-06, "loss": 0.9557, "step": 18654 }, { "epoch": 0.6761262730600558, "grad_norm": 2.1995347409020045, "learning_rate": 2.5076282943545136e-06, "loss": 0.7944, "step": 18655 }, { "epoch": 0.6761625167627124, "grad_norm": 2.3151565301558366, "learning_rate": 2.5071195007322415e-06, "loss": 0.9214, "step": 18656 }, { "epoch": 0.6761987604653692, "grad_norm": 2.4154639283299697, "learning_rate": 2.506610741460356e-06, "loss": 1.0266, "step": 18657 }, { "epoch": 0.6762350041680258, "grad_norm": 2.199234038372097, "learning_rate": 2.5061020165458637e-06, "loss": 0.8679, "step": 18658 }, { "epoch": 0.6762712478706825, "grad_norm": 2.4543095186972113, "learning_rate": 2.5055933259957787e-06, "loss": 0.7516, "step": 18659 }, { "epoch": 0.6763074915733391, "grad_norm": 2.4005786244469927, "learning_rate": 2.5050846698171104e-06, "loss": 0.9536, "step": 18660 }, { "epoch": 0.6763437352759958, "grad_norm": 2.430738114506155, "learning_rate": 2.5045760480168645e-06, "loss": 0.9217, "step": 18661 }, { "epoch": 0.6763799789786524, "grad_norm": 2.528843036641141, "learning_rate": 2.5040674606020532e-06, "loss": 0.9135, "step": 18662 }, { "epoch": 0.6764162226813091, "grad_norm": 2.581209788123543, "learning_rate": 2.5035589075796794e-06, "loss": 0.7479, "step": 18663 }, { "epoch": 0.6764524663839658, "grad_norm": 2.536228708191426, "learning_rate": 2.5030503889567583e-06, "loss": 0.7942, "step": 18664 }, { "epoch": 0.6764887100866225, "grad_norm": 2.4251619706154166, "learning_rate": 2.50254190474029e-06, "loss": 0.8348, "step": 18665 }, { "epoch": 0.6765249537892791, "grad_norm": 2.25404068746564, "learning_rate": 2.502033454937286e-06, "loss": 0.872, "step": 18666 }, { "epoch": 0.6765611974919358, "grad_norm": 2.821069122401452, "learning_rate": 2.5015250395547464e-06, "loss": 0.8412, "step": 18667 }, { "epoch": 0.6765974411945924, "grad_norm": 2.361765503531815, "learning_rate": 2.5010166585996844e-06, "loss": 0.8341, "step": 18668 }, { "epoch": 0.6766336848972491, "grad_norm": 2.692447430369801, "learning_rate": 2.5005083120791003e-06, "loss": 0.9361, "step": 18669 }, { "epoch": 0.6766699285999057, "grad_norm": 2.5243321526517417, "learning_rate": 2.5000000000000015e-06, "loss": 1.0454, "step": 18670 }, { "epoch": 0.6767061723025625, "grad_norm": 2.401404977130736, "learning_rate": 2.4994917223693887e-06, "loss": 0.9821, "step": 18671 }, { "epoch": 0.6767424160052191, "grad_norm": 2.2538154524233813, "learning_rate": 2.4989834791942675e-06, "loss": 1.0519, "step": 18672 }, { "epoch": 0.6767786597078758, "grad_norm": 2.099127189155838, "learning_rate": 2.498475270481644e-06, "loss": 0.7637, "step": 18673 }, { "epoch": 0.6768149034105324, "grad_norm": 2.447747797764275, "learning_rate": 2.4979670962385157e-06, "loss": 0.7933, "step": 18674 }, { "epoch": 0.6768511471131891, "grad_norm": 2.1623732319537305, "learning_rate": 2.497458956471888e-06, "loss": 0.7965, "step": 18675 }, { "epoch": 0.6768873908158457, "grad_norm": 2.2853168322507353, "learning_rate": 2.4969508511887627e-06, "loss": 1.0103, "step": 18676 }, { "epoch": 0.6769236345185025, "grad_norm": 1.9054685279087915, "learning_rate": 2.496442780396142e-06, "loss": 0.7374, "step": 18677 }, { "epoch": 0.6769598782211591, "grad_norm": 2.3935778446127403, "learning_rate": 2.4959347441010247e-06, "loss": 0.8258, "step": 18678 }, { "epoch": 0.6769961219238158, "grad_norm": 2.314461142073889, "learning_rate": 2.495426742310412e-06, "loss": 0.7019, "step": 18679 }, { "epoch": 0.6770323656264724, "grad_norm": 2.8544806708600934, "learning_rate": 2.494918775031305e-06, "loss": 0.8595, "step": 18680 }, { "epoch": 0.677068609329129, "grad_norm": 2.3789218028748778, "learning_rate": 2.4944108422707015e-06, "loss": 0.8724, "step": 18681 }, { "epoch": 0.6771048530317857, "grad_norm": 2.4829641148404233, "learning_rate": 2.4939029440356025e-06, "loss": 0.8534, "step": 18682 }, { "epoch": 0.6771410967344423, "grad_norm": 2.4285711833107624, "learning_rate": 2.493395080333002e-06, "loss": 0.8535, "step": 18683 }, { "epoch": 0.6771773404370991, "grad_norm": 2.2556612016850073, "learning_rate": 2.4928872511699054e-06, "loss": 0.8366, "step": 18684 }, { "epoch": 0.6772135841397557, "grad_norm": 2.361112655660237, "learning_rate": 2.492379456553303e-06, "loss": 0.8666, "step": 18685 }, { "epoch": 0.6772498278424124, "grad_norm": 2.284763231246401, "learning_rate": 2.4918716964901983e-06, "loss": 0.8294, "step": 18686 }, { "epoch": 0.677286071545069, "grad_norm": 2.333965213483644, "learning_rate": 2.491363970987581e-06, "loss": 0.8987, "step": 18687 }, { "epoch": 0.6773223152477257, "grad_norm": 2.3880595965118183, "learning_rate": 2.4908562800524537e-06, "loss": 1.0943, "step": 18688 }, { "epoch": 0.6773585589503823, "grad_norm": 2.1350498778912304, "learning_rate": 2.4903486236918082e-06, "loss": 0.8499, "step": 18689 }, { "epoch": 0.6773948026530391, "grad_norm": 2.398779527815546, "learning_rate": 2.4898410019126427e-06, "loss": 0.947, "step": 18690 }, { "epoch": 0.6774310463556957, "grad_norm": 2.9341176050325264, "learning_rate": 2.4893334147219485e-06, "loss": 0.848, "step": 18691 }, { "epoch": 0.6774672900583524, "grad_norm": 2.839568033456765, "learning_rate": 2.488825862126721e-06, "loss": 0.8704, "step": 18692 }, { "epoch": 0.677503533761009, "grad_norm": 2.472032952380842, "learning_rate": 2.4883183441339566e-06, "loss": 0.977, "step": 18693 }, { "epoch": 0.6775397774636657, "grad_norm": 2.293510078794116, "learning_rate": 2.487810860750644e-06, "loss": 0.8863, "step": 18694 }, { "epoch": 0.6775760211663223, "grad_norm": 2.771865858592301, "learning_rate": 2.4873034119837793e-06, "loss": 0.7581, "step": 18695 }, { "epoch": 0.677612264868979, "grad_norm": 2.783869820037348, "learning_rate": 2.4867959978403535e-06, "loss": 0.922, "step": 18696 }, { "epoch": 0.6776485085716357, "grad_norm": 2.1966193947938417, "learning_rate": 2.4862886183273603e-06, "loss": 0.6191, "step": 18697 }, { "epoch": 0.6776847522742924, "grad_norm": 2.415511735080405, "learning_rate": 2.4857812734517884e-06, "loss": 0.9018, "step": 18698 }, { "epoch": 0.677720995976949, "grad_norm": 2.284225123947412, "learning_rate": 2.4852739632206295e-06, "loss": 0.7988, "step": 18699 }, { "epoch": 0.6777572396796057, "grad_norm": 2.5915053385136977, "learning_rate": 2.4847666876408773e-06, "loss": 1.0843, "step": 18700 }, { "epoch": 0.6777934833822623, "grad_norm": 2.2556702727199833, "learning_rate": 2.4842594467195165e-06, "loss": 0.8838, "step": 18701 }, { "epoch": 0.677829727084919, "grad_norm": 2.460828172489036, "learning_rate": 2.4837522404635407e-06, "loss": 0.9814, "step": 18702 }, { "epoch": 0.6778659707875757, "grad_norm": 2.6119601391678464, "learning_rate": 2.4832450688799336e-06, "loss": 0.8784, "step": 18703 }, { "epoch": 0.6779022144902324, "grad_norm": 2.5659904420355457, "learning_rate": 2.4827379319756918e-06, "loss": 0.8641, "step": 18704 }, { "epoch": 0.677938458192889, "grad_norm": 2.6477371593121193, "learning_rate": 2.4822308297577964e-06, "loss": 0.865, "step": 18705 }, { "epoch": 0.6779747018955457, "grad_norm": 2.5343780041300334, "learning_rate": 2.481723762233239e-06, "loss": 0.7627, "step": 18706 }, { "epoch": 0.6780109455982023, "grad_norm": 2.485708555311298, "learning_rate": 2.4812167294090022e-06, "loss": 0.8719, "step": 18707 }, { "epoch": 0.6780471893008589, "grad_norm": 2.1521527039886466, "learning_rate": 2.4807097312920786e-06, "loss": 0.8386, "step": 18708 }, { "epoch": 0.6780834330035156, "grad_norm": 2.2013277619903078, "learning_rate": 2.48020276788945e-06, "loss": 0.8007, "step": 18709 }, { "epoch": 0.6781196767061723, "grad_norm": 2.2039710770482603, "learning_rate": 2.479695839208105e-06, "loss": 0.8903, "step": 18710 }, { "epoch": 0.678155920408829, "grad_norm": 2.2853913577520037, "learning_rate": 2.479188945255025e-06, "loss": 0.8906, "step": 18711 }, { "epoch": 0.6781921641114856, "grad_norm": 2.348511047851681, "learning_rate": 2.4786820860371976e-06, "loss": 0.8352, "step": 18712 }, { "epoch": 0.6782284078141423, "grad_norm": 2.3852391803507214, "learning_rate": 2.478175261561607e-06, "loss": 0.9359, "step": 18713 }, { "epoch": 0.6782646515167989, "grad_norm": 2.357679948654922, "learning_rate": 2.477668471835235e-06, "loss": 0.7589, "step": 18714 }, { "epoch": 0.6783008952194556, "grad_norm": 2.430915955827427, "learning_rate": 2.4771617168650657e-06, "loss": 0.9158, "step": 18715 }, { "epoch": 0.6783371389221123, "grad_norm": 2.358471050761515, "learning_rate": 2.476654996658082e-06, "loss": 0.9047, "step": 18716 }, { "epoch": 0.678373382624769, "grad_norm": 2.3948368793637766, "learning_rate": 2.4761483112212686e-06, "loss": 0.8617, "step": 18717 }, { "epoch": 0.6784096263274256, "grad_norm": 2.40006192680642, "learning_rate": 2.4756416605616036e-06, "loss": 0.8604, "step": 18718 }, { "epoch": 0.6784458700300823, "grad_norm": 2.2096210298427827, "learning_rate": 2.475135044686069e-06, "loss": 0.8721, "step": 18719 }, { "epoch": 0.6784821137327389, "grad_norm": 2.2546378539867495, "learning_rate": 2.4746284636016488e-06, "loss": 0.9227, "step": 18720 }, { "epoch": 0.6785183574353956, "grad_norm": 2.1194470520840674, "learning_rate": 2.474121917315319e-06, "loss": 0.7839, "step": 18721 }, { "epoch": 0.6785546011380522, "grad_norm": 2.394462591895941, "learning_rate": 2.473615405834064e-06, "loss": 1.0648, "step": 18722 }, { "epoch": 0.678590844840709, "grad_norm": 2.2551658974353233, "learning_rate": 2.4731089291648565e-06, "loss": 0.8594, "step": 18723 }, { "epoch": 0.6786270885433656, "grad_norm": 2.3677288881784717, "learning_rate": 2.472602487314683e-06, "loss": 0.7422, "step": 18724 }, { "epoch": 0.6786633322460223, "grad_norm": 2.0277298751431387, "learning_rate": 2.4720960802905177e-06, "loss": 0.8382, "step": 18725 }, { "epoch": 0.6786995759486789, "grad_norm": 2.4112474013038203, "learning_rate": 2.4715897080993397e-06, "loss": 0.8354, "step": 18726 }, { "epoch": 0.6787358196513356, "grad_norm": 2.4165472652777136, "learning_rate": 2.4710833707481274e-06, "loss": 0.8703, "step": 18727 }, { "epoch": 0.6787720633539922, "grad_norm": 2.3886252563886403, "learning_rate": 2.4705770682438556e-06, "loss": 0.7512, "step": 18728 }, { "epoch": 0.6788083070566489, "grad_norm": 2.387108976705945, "learning_rate": 2.470070800593502e-06, "loss": 0.7356, "step": 18729 }, { "epoch": 0.6788445507593056, "grad_norm": 2.341424180410161, "learning_rate": 2.4695645678040426e-06, "loss": 1.0311, "step": 18730 }, { "epoch": 0.6788807944619623, "grad_norm": 2.2307009706745538, "learning_rate": 2.469058369882455e-06, "loss": 0.9473, "step": 18731 }, { "epoch": 0.6789170381646189, "grad_norm": 2.4861153568966925, "learning_rate": 2.4685522068357103e-06, "loss": 1.0438, "step": 18732 }, { "epoch": 0.6789532818672755, "grad_norm": 2.2820702378945668, "learning_rate": 2.468046078670787e-06, "loss": 0.9139, "step": 18733 }, { "epoch": 0.6789895255699322, "grad_norm": 2.2792505867703907, "learning_rate": 2.467539985394654e-06, "loss": 0.9248, "step": 18734 }, { "epoch": 0.6790257692725888, "grad_norm": 2.0809947898790546, "learning_rate": 2.467033927014292e-06, "loss": 0.8769, "step": 18735 }, { "epoch": 0.6790620129752456, "grad_norm": 2.369113526746836, "learning_rate": 2.4665279035366696e-06, "loss": 0.9773, "step": 18736 }, { "epoch": 0.6790982566779022, "grad_norm": 2.140662557645022, "learning_rate": 2.4660219149687616e-06, "loss": 0.8117, "step": 18737 }, { "epoch": 0.6791345003805589, "grad_norm": 2.8617754744862527, "learning_rate": 2.465515961317537e-06, "loss": 1.0262, "step": 18738 }, { "epoch": 0.6791707440832155, "grad_norm": 2.4849439368601427, "learning_rate": 2.4650100425899702e-06, "loss": 0.7645, "step": 18739 }, { "epoch": 0.6792069877858722, "grad_norm": 2.379515138867208, "learning_rate": 2.4645041587930337e-06, "loss": 0.9062, "step": 18740 }, { "epoch": 0.6792432314885288, "grad_norm": 2.3774707201639824, "learning_rate": 2.463998309933694e-06, "loss": 0.9685, "step": 18741 }, { "epoch": 0.6792794751911855, "grad_norm": 2.416936843150282, "learning_rate": 2.4634924960189245e-06, "loss": 0.9559, "step": 18742 }, { "epoch": 0.6793157188938422, "grad_norm": 2.3722135788769863, "learning_rate": 2.462986717055694e-06, "loss": 0.8092, "step": 18743 }, { "epoch": 0.6793519625964989, "grad_norm": 2.3840364362971704, "learning_rate": 2.462480973050974e-06, "loss": 0.7803, "step": 18744 }, { "epoch": 0.6793882062991555, "grad_norm": 2.371444097435349, "learning_rate": 2.46197526401173e-06, "loss": 0.8087, "step": 18745 }, { "epoch": 0.6794244500018122, "grad_norm": 2.507388291400825, "learning_rate": 2.461469589944932e-06, "loss": 0.7415, "step": 18746 }, { "epoch": 0.6794606937044688, "grad_norm": 2.1713888413188753, "learning_rate": 2.4609639508575494e-06, "loss": 0.8069, "step": 18747 }, { "epoch": 0.6794969374071255, "grad_norm": 2.531335556113466, "learning_rate": 2.4604583467565464e-06, "loss": 0.9393, "step": 18748 }, { "epoch": 0.6795331811097822, "grad_norm": 2.632693186954924, "learning_rate": 2.459952777648893e-06, "loss": 0.8859, "step": 18749 }, { "epoch": 0.6795694248124389, "grad_norm": 2.6672658865259966, "learning_rate": 2.4594472435415506e-06, "loss": 0.7486, "step": 18750 }, { "epoch": 0.6796056685150955, "grad_norm": 2.347104320485409, "learning_rate": 2.4589417444414927e-06, "loss": 0.9747, "step": 18751 }, { "epoch": 0.6796419122177522, "grad_norm": 2.5965095340932756, "learning_rate": 2.458436280355679e-06, "loss": 0.9158, "step": 18752 }, { "epoch": 0.6796781559204088, "grad_norm": 2.1552446735839554, "learning_rate": 2.457930851291078e-06, "loss": 0.7352, "step": 18753 }, { "epoch": 0.6797143996230655, "grad_norm": 2.277928210264378, "learning_rate": 2.457425457254648e-06, "loss": 0.9031, "step": 18754 }, { "epoch": 0.6797506433257221, "grad_norm": 2.347371124416568, "learning_rate": 2.456920098253362e-06, "loss": 0.7622, "step": 18755 }, { "epoch": 0.6797868870283789, "grad_norm": 2.4805457674688203, "learning_rate": 2.456414774294177e-06, "loss": 0.951, "step": 18756 }, { "epoch": 0.6798231307310355, "grad_norm": 2.676396557463933, "learning_rate": 2.4559094853840596e-06, "loss": 0.9301, "step": 18757 }, { "epoch": 0.6798593744336922, "grad_norm": 2.4773168101671366, "learning_rate": 2.455404231529969e-06, "loss": 0.9432, "step": 18758 }, { "epoch": 0.6798956181363488, "grad_norm": 2.0502699616826385, "learning_rate": 2.45489901273887e-06, "loss": 0.8521, "step": 18759 }, { "epoch": 0.6799318618390054, "grad_norm": 2.332554195500068, "learning_rate": 2.4543938290177243e-06, "loss": 0.8355, "step": 18760 }, { "epoch": 0.6799681055416621, "grad_norm": 2.393000912216337, "learning_rate": 2.4538886803734906e-06, "loss": 0.7948, "step": 18761 }, { "epoch": 0.6800043492443189, "grad_norm": 2.4403291742397473, "learning_rate": 2.4533835668131317e-06, "loss": 0.8522, "step": 18762 }, { "epoch": 0.6800405929469755, "grad_norm": 2.315332579246729, "learning_rate": 2.4528784883436063e-06, "loss": 0.891, "step": 18763 }, { "epoch": 0.6800768366496321, "grad_norm": 2.1803357327463693, "learning_rate": 2.4523734449718774e-06, "loss": 0.7778, "step": 18764 }, { "epoch": 0.6801130803522888, "grad_norm": 2.5524030880662716, "learning_rate": 2.4518684367048987e-06, "loss": 0.9861, "step": 18765 }, { "epoch": 0.6801493240549454, "grad_norm": 2.1677125895576297, "learning_rate": 2.4513634635496325e-06, "loss": 0.9875, "step": 18766 }, { "epoch": 0.6801855677576021, "grad_norm": 2.6373106216061006, "learning_rate": 2.4508585255130384e-06, "loss": 1.0589, "step": 18767 }, { "epoch": 0.6802218114602587, "grad_norm": 2.38148583240762, "learning_rate": 2.45035362260207e-06, "loss": 0.9884, "step": 18768 }, { "epoch": 0.6802580551629155, "grad_norm": 2.4339888106280445, "learning_rate": 2.4498487548236894e-06, "loss": 0.6683, "step": 18769 }, { "epoch": 0.6802942988655721, "grad_norm": 2.465916602164577, "learning_rate": 2.4493439221848458e-06, "loss": 0.9569, "step": 18770 }, { "epoch": 0.6803305425682288, "grad_norm": 2.210218910959903, "learning_rate": 2.4488391246925047e-06, "loss": 0.8325, "step": 18771 }, { "epoch": 0.6803667862708854, "grad_norm": 2.1942346833763393, "learning_rate": 2.448334362353616e-06, "loss": 0.6868, "step": 18772 }, { "epoch": 0.6804030299735421, "grad_norm": 2.3579269734722024, "learning_rate": 2.4478296351751377e-06, "loss": 0.7836, "step": 18773 }, { "epoch": 0.6804392736761987, "grad_norm": 2.214704599335464, "learning_rate": 2.4473249431640205e-06, "loss": 0.8629, "step": 18774 }, { "epoch": 0.6804755173788555, "grad_norm": 2.3637449050566226, "learning_rate": 2.446820286327225e-06, "loss": 1.0013, "step": 18775 }, { "epoch": 0.6805117610815121, "grad_norm": 2.5134933800695713, "learning_rate": 2.4463156646717e-06, "loss": 0.8287, "step": 18776 }, { "epoch": 0.6805480047841688, "grad_norm": 2.828419487854737, "learning_rate": 2.445811078204403e-06, "loss": 0.8944, "step": 18777 }, { "epoch": 0.6805842484868254, "grad_norm": 2.3247770606996663, "learning_rate": 2.4453065269322824e-06, "loss": 0.9001, "step": 18778 }, { "epoch": 0.6806204921894821, "grad_norm": 2.466654790331649, "learning_rate": 2.4448020108622933e-06, "loss": 1.1618, "step": 18779 }, { "epoch": 0.6806567358921387, "grad_norm": 2.6560397157651403, "learning_rate": 2.444297530001388e-06, "loss": 0.9335, "step": 18780 }, { "epoch": 0.6806929795947954, "grad_norm": 2.283620518806094, "learning_rate": 2.4437930843565163e-06, "loss": 0.9842, "step": 18781 }, { "epoch": 0.6807292232974521, "grad_norm": 2.2732059750402125, "learning_rate": 2.4432886739346296e-06, "loss": 0.8261, "step": 18782 }, { "epoch": 0.6807654670001088, "grad_norm": 2.018628663133227, "learning_rate": 2.4427842987426786e-06, "loss": 0.8296, "step": 18783 }, { "epoch": 0.6808017107027654, "grad_norm": 2.344391980124319, "learning_rate": 2.4422799587876155e-06, "loss": 0.8667, "step": 18784 }, { "epoch": 0.680837954405422, "grad_norm": 2.3861063941490332, "learning_rate": 2.4417756540763855e-06, "loss": 1.0624, "step": 18785 }, { "epoch": 0.6808741981080787, "grad_norm": 2.22750119752547, "learning_rate": 2.4412713846159397e-06, "loss": 0.9101, "step": 18786 }, { "epoch": 0.6809104418107353, "grad_norm": 2.5490462547097508, "learning_rate": 2.440767150413229e-06, "loss": 0.8901, "step": 18787 }, { "epoch": 0.6809466855133921, "grad_norm": 2.417682211851495, "learning_rate": 2.4402629514751975e-06, "loss": 0.7947, "step": 18788 }, { "epoch": 0.6809829292160487, "grad_norm": 2.582100263527163, "learning_rate": 2.4397587878087958e-06, "loss": 1.0388, "step": 18789 }, { "epoch": 0.6810191729187054, "grad_norm": 2.02318325300307, "learning_rate": 2.4392546594209665e-06, "loss": 0.8097, "step": 18790 }, { "epoch": 0.681055416621362, "grad_norm": 2.422071267625848, "learning_rate": 2.438750566318662e-06, "loss": 0.8559, "step": 18791 }, { "epoch": 0.6810916603240187, "grad_norm": 2.690505085221556, "learning_rate": 2.4382465085088253e-06, "loss": 1.0437, "step": 18792 }, { "epoch": 0.6811279040266753, "grad_norm": 2.3499494107363668, "learning_rate": 2.4377424859984035e-06, "loss": 0.8575, "step": 18793 }, { "epoch": 0.681164147729332, "grad_norm": 2.537297685785271, "learning_rate": 2.4372384987943363e-06, "loss": 0.8918, "step": 18794 }, { "epoch": 0.6812003914319887, "grad_norm": 2.5071534971580047, "learning_rate": 2.4367345469035775e-06, "loss": 0.8323, "step": 18795 }, { "epoch": 0.6812366351346454, "grad_norm": 2.432672335269188, "learning_rate": 2.436230630333064e-06, "loss": 0.8363, "step": 18796 }, { "epoch": 0.681272878837302, "grad_norm": 1.9998215135217703, "learning_rate": 2.4357267490897433e-06, "loss": 0.8905, "step": 18797 }, { "epoch": 0.6813091225399587, "grad_norm": 2.2141784139076863, "learning_rate": 2.4352229031805563e-06, "loss": 1.1396, "step": 18798 }, { "epoch": 0.6813453662426153, "grad_norm": 2.4906836322691053, "learning_rate": 2.434719092612446e-06, "loss": 0.8038, "step": 18799 }, { "epoch": 0.681381609945272, "grad_norm": 2.5121450354266965, "learning_rate": 2.434215317392357e-06, "loss": 0.8699, "step": 18800 }, { "epoch": 0.6814178536479286, "grad_norm": 2.40756463929463, "learning_rate": 2.433711577527228e-06, "loss": 1.1052, "step": 18801 }, { "epoch": 0.6814540973505854, "grad_norm": 2.190601715482406, "learning_rate": 2.4332078730240006e-06, "loss": 0.7078, "step": 18802 }, { "epoch": 0.681490341053242, "grad_norm": 2.415445458478087, "learning_rate": 2.4327042038896164e-06, "loss": 1.0077, "step": 18803 }, { "epoch": 0.6815265847558987, "grad_norm": 2.49131525039687, "learning_rate": 2.432200570131018e-06, "loss": 0.9572, "step": 18804 }, { "epoch": 0.6815628284585553, "grad_norm": 2.3774327503247776, "learning_rate": 2.431696971755141e-06, "loss": 0.8146, "step": 18805 }, { "epoch": 0.681599072161212, "grad_norm": 2.1774987747163514, "learning_rate": 2.431193408768926e-06, "loss": 0.7051, "step": 18806 }, { "epoch": 0.6816353158638686, "grad_norm": 2.3724626517942347, "learning_rate": 2.430689881179314e-06, "loss": 1.0111, "step": 18807 }, { "epoch": 0.6816715595665254, "grad_norm": 2.267237129328381, "learning_rate": 2.4301863889932404e-06, "loss": 0.8644, "step": 18808 }, { "epoch": 0.681707803269182, "grad_norm": 2.5460565391522616, "learning_rate": 2.4296829322176436e-06, "loss": 0.8641, "step": 18809 }, { "epoch": 0.6817440469718387, "grad_norm": 2.4210571430859162, "learning_rate": 2.4291795108594614e-06, "loss": 0.8471, "step": 18810 }, { "epoch": 0.6817802906744953, "grad_norm": 2.4954235633071424, "learning_rate": 2.428676124925633e-06, "loss": 0.8474, "step": 18811 }, { "epoch": 0.681816534377152, "grad_norm": 2.7662324882896803, "learning_rate": 2.4281727744230904e-06, "loss": 0.8784, "step": 18812 }, { "epoch": 0.6818527780798086, "grad_norm": 2.6740176772478397, "learning_rate": 2.4276694593587717e-06, "loss": 0.9574, "step": 18813 }, { "epoch": 0.6818890217824652, "grad_norm": 2.2495447279154335, "learning_rate": 2.4271661797396123e-06, "loss": 0.7719, "step": 18814 }, { "epoch": 0.681925265485122, "grad_norm": 2.550123945642549, "learning_rate": 2.4266629355725484e-06, "loss": 0.9112, "step": 18815 }, { "epoch": 0.6819615091877786, "grad_norm": 2.510720015782017, "learning_rate": 2.4261597268645114e-06, "loss": 0.866, "step": 18816 }, { "epoch": 0.6819977528904353, "grad_norm": 2.1960900286804526, "learning_rate": 2.425656553622437e-06, "loss": 0.7662, "step": 18817 }, { "epoch": 0.6820339965930919, "grad_norm": 2.687299629365269, "learning_rate": 2.4251534158532597e-06, "loss": 0.9063, "step": 18818 }, { "epoch": 0.6820702402957486, "grad_norm": 2.3847510626578234, "learning_rate": 2.42465031356391e-06, "loss": 0.831, "step": 18819 }, { "epoch": 0.6821064839984052, "grad_norm": 2.367983982724154, "learning_rate": 2.4241472467613224e-06, "loss": 0.8476, "step": 18820 }, { "epoch": 0.682142727701062, "grad_norm": 2.21910342393057, "learning_rate": 2.423644215452425e-06, "loss": 0.8954, "step": 18821 }, { "epoch": 0.6821789714037186, "grad_norm": 2.701670672462254, "learning_rate": 2.423141219644155e-06, "loss": 0.9951, "step": 18822 }, { "epoch": 0.6822152151063753, "grad_norm": 2.300730460977718, "learning_rate": 2.4226382593434395e-06, "loss": 0.7582, "step": 18823 }, { "epoch": 0.6822514588090319, "grad_norm": 2.220719008218646, "learning_rate": 2.422135334557212e-06, "loss": 0.8779, "step": 18824 }, { "epoch": 0.6822877025116886, "grad_norm": 2.4731444406133023, "learning_rate": 2.421632445292398e-06, "loss": 1.1625, "step": 18825 }, { "epoch": 0.6823239462143452, "grad_norm": 2.338014625772352, "learning_rate": 2.4211295915559296e-06, "loss": 0.9001, "step": 18826 }, { "epoch": 0.6823601899170019, "grad_norm": 2.374919743909175, "learning_rate": 2.420626773354738e-06, "loss": 0.8778, "step": 18827 }, { "epoch": 0.6823964336196586, "grad_norm": 2.2218771150147383, "learning_rate": 2.4201239906957473e-06, "loss": 0.6518, "step": 18828 }, { "epoch": 0.6824326773223153, "grad_norm": 2.3956827871939517, "learning_rate": 2.4196212435858886e-06, "loss": 0.8937, "step": 18829 }, { "epoch": 0.6824689210249719, "grad_norm": 2.131774349443799, "learning_rate": 2.4191185320320875e-06, "loss": 0.8231, "step": 18830 }, { "epoch": 0.6825051647276286, "grad_norm": 2.5783274873585764, "learning_rate": 2.418615856041275e-06, "loss": 1.0826, "step": 18831 }, { "epoch": 0.6825414084302852, "grad_norm": 2.4696644676544963, "learning_rate": 2.4181132156203724e-06, "loss": 0.8472, "step": 18832 }, { "epoch": 0.6825776521329419, "grad_norm": 2.452289254585575, "learning_rate": 2.417610610776308e-06, "loss": 0.8174, "step": 18833 }, { "epoch": 0.6826138958355986, "grad_norm": 2.462520331070678, "learning_rate": 2.41710804151601e-06, "loss": 0.8968, "step": 18834 }, { "epoch": 0.6826501395382553, "grad_norm": 2.5009427196201166, "learning_rate": 2.416605507846399e-06, "loss": 0.8343, "step": 18835 }, { "epoch": 0.6826863832409119, "grad_norm": 2.1954943793547192, "learning_rate": 2.4161030097744035e-06, "loss": 0.798, "step": 18836 }, { "epoch": 0.6827226269435686, "grad_norm": 2.5270165137640648, "learning_rate": 2.415600547306942e-06, "loss": 0.962, "step": 18837 }, { "epoch": 0.6827588706462252, "grad_norm": 2.099843313609019, "learning_rate": 2.415098120450946e-06, "loss": 0.8742, "step": 18838 }, { "epoch": 0.6827951143488818, "grad_norm": 2.2495547637685864, "learning_rate": 2.4145957292133327e-06, "loss": 0.9681, "step": 18839 }, { "epoch": 0.6828313580515385, "grad_norm": 2.2766471494999276, "learning_rate": 2.4140933736010286e-06, "loss": 0.8895, "step": 18840 }, { "epoch": 0.6828676017541953, "grad_norm": 2.478167114775836, "learning_rate": 2.4135910536209502e-06, "loss": 0.9824, "step": 18841 }, { "epoch": 0.6829038454568519, "grad_norm": 2.3219945353596962, "learning_rate": 2.4130887692800266e-06, "loss": 0.8527, "step": 18842 }, { "epoch": 0.6829400891595085, "grad_norm": 2.361524298228734, "learning_rate": 2.412586520585173e-06, "loss": 0.7921, "step": 18843 }, { "epoch": 0.6829763328621652, "grad_norm": 2.3394504638644156, "learning_rate": 2.412084307543314e-06, "loss": 0.9608, "step": 18844 }, { "epoch": 0.6830125765648218, "grad_norm": 2.074960186272404, "learning_rate": 2.4115821301613667e-06, "loss": 1.042, "step": 18845 }, { "epoch": 0.6830488202674785, "grad_norm": 2.3428102160242457, "learning_rate": 2.411079988446252e-06, "loss": 0.9349, "step": 18846 }, { "epoch": 0.6830850639701352, "grad_norm": 2.376149343296511, "learning_rate": 2.4105778824048914e-06, "loss": 0.8604, "step": 18847 }, { "epoch": 0.6831213076727919, "grad_norm": 2.339495983404158, "learning_rate": 2.4100758120442e-06, "loss": 0.8408, "step": 18848 }, { "epoch": 0.6831575513754485, "grad_norm": 2.2312827481860755, "learning_rate": 2.409573777371097e-06, "loss": 0.776, "step": 18849 }, { "epoch": 0.6831937950781052, "grad_norm": 2.4069838993824764, "learning_rate": 2.409071778392501e-06, "loss": 0.9539, "step": 18850 }, { "epoch": 0.6832300387807618, "grad_norm": 2.347678448168419, "learning_rate": 2.4085698151153307e-06, "loss": 0.8712, "step": 18851 }, { "epoch": 0.6832662824834185, "grad_norm": 2.457531310155495, "learning_rate": 2.4080678875464997e-06, "loss": 0.9164, "step": 18852 }, { "epoch": 0.6833025261860751, "grad_norm": 2.2315466002200313, "learning_rate": 2.4075659956929247e-06, "loss": 0.9818, "step": 18853 }, { "epoch": 0.6833387698887319, "grad_norm": 2.414795105319747, "learning_rate": 2.407064139561525e-06, "loss": 0.9827, "step": 18854 }, { "epoch": 0.6833750135913885, "grad_norm": 2.305009489585379, "learning_rate": 2.4065623191592115e-06, "loss": 0.8604, "step": 18855 }, { "epoch": 0.6834112572940452, "grad_norm": 2.3786829294113585, "learning_rate": 2.4060605344929024e-06, "loss": 0.8394, "step": 18856 }, { "epoch": 0.6834475009967018, "grad_norm": 2.396631787658482, "learning_rate": 2.4055587855695066e-06, "loss": 0.9347, "step": 18857 }, { "epoch": 0.6834837446993585, "grad_norm": 2.3675556685314336, "learning_rate": 2.4050570723959453e-06, "loss": 0.8881, "step": 18858 }, { "epoch": 0.6835199884020151, "grad_norm": 2.583116691055669, "learning_rate": 2.404555394979127e-06, "loss": 0.9036, "step": 18859 }, { "epoch": 0.6835562321046719, "grad_norm": 2.3525988430584026, "learning_rate": 2.4040537533259665e-06, "loss": 0.8738, "step": 18860 }, { "epoch": 0.6835924758073285, "grad_norm": 2.4570575584991654, "learning_rate": 2.403552147443372e-06, "loss": 0.9422, "step": 18861 }, { "epoch": 0.6836287195099852, "grad_norm": 2.6034482135684747, "learning_rate": 2.4030505773382627e-06, "loss": 0.8885, "step": 18862 }, { "epoch": 0.6836649632126418, "grad_norm": 2.0439158398108463, "learning_rate": 2.4025490430175437e-06, "loss": 0.7005, "step": 18863 }, { "epoch": 0.6837012069152985, "grad_norm": 2.326915817205422, "learning_rate": 2.4020475444881296e-06, "loss": 0.9791, "step": 18864 }, { "epoch": 0.6837374506179551, "grad_norm": 2.6361842186763242, "learning_rate": 2.401546081756928e-06, "loss": 0.878, "step": 18865 }, { "epoch": 0.6837736943206117, "grad_norm": 1.9266791464604485, "learning_rate": 2.401044654830849e-06, "loss": 1.0231, "step": 18866 }, { "epoch": 0.6838099380232685, "grad_norm": 2.0720524626639656, "learning_rate": 2.400543263716805e-06, "loss": 0.8435, "step": 18867 }, { "epoch": 0.6838461817259252, "grad_norm": 2.7634025109115954, "learning_rate": 2.4000419084217016e-06, "loss": 1.1295, "step": 18868 }, { "epoch": 0.6838824254285818, "grad_norm": 2.4691424265823754, "learning_rate": 2.3995405889524474e-06, "loss": 0.8323, "step": 18869 }, { "epoch": 0.6839186691312384, "grad_norm": 2.3898265988104535, "learning_rate": 2.3990393053159517e-06, "loss": 0.9852, "step": 18870 }, { "epoch": 0.6839549128338951, "grad_norm": 2.4007860600360877, "learning_rate": 2.398538057519123e-06, "loss": 1.0877, "step": 18871 }, { "epoch": 0.6839911565365517, "grad_norm": 2.4551485958235038, "learning_rate": 2.398036845568865e-06, "loss": 1.0136, "step": 18872 }, { "epoch": 0.6840274002392084, "grad_norm": 2.228743729750977, "learning_rate": 2.3975356694720854e-06, "loss": 0.983, "step": 18873 }, { "epoch": 0.6840636439418651, "grad_norm": 2.4562833078736284, "learning_rate": 2.3970345292356923e-06, "loss": 1.0024, "step": 18874 }, { "epoch": 0.6840998876445218, "grad_norm": 2.3019004533335896, "learning_rate": 2.396533424866587e-06, "loss": 0.7641, "step": 18875 }, { "epoch": 0.6841361313471784, "grad_norm": 2.915680565178515, "learning_rate": 2.3960323563716786e-06, "loss": 0.9612, "step": 18876 }, { "epoch": 0.6841723750498351, "grad_norm": 2.515917186866342, "learning_rate": 2.3955313237578658e-06, "loss": 0.7612, "step": 18877 }, { "epoch": 0.6842086187524917, "grad_norm": 2.5621646367710382, "learning_rate": 2.39503032703206e-06, "loss": 0.9603, "step": 18878 }, { "epoch": 0.6842448624551484, "grad_norm": 2.3539338805431167, "learning_rate": 2.3945293662011594e-06, "loss": 0.8893, "step": 18879 }, { "epoch": 0.6842811061578051, "grad_norm": 2.686759060812486, "learning_rate": 2.3940284412720695e-06, "loss": 0.9512, "step": 18880 }, { "epoch": 0.6843173498604618, "grad_norm": 2.230597418391145, "learning_rate": 2.3935275522516883e-06, "loss": 0.8218, "step": 18881 }, { "epoch": 0.6843535935631184, "grad_norm": 2.1708523250138043, "learning_rate": 2.393026699146925e-06, "loss": 0.8707, "step": 18882 }, { "epoch": 0.6843898372657751, "grad_norm": 2.4886554921782484, "learning_rate": 2.3925258819646745e-06, "loss": 0.9009, "step": 18883 }, { "epoch": 0.6844260809684317, "grad_norm": 2.5448891830232863, "learning_rate": 2.3920251007118432e-06, "loss": 0.9606, "step": 18884 }, { "epoch": 0.6844623246710884, "grad_norm": 2.338122342094721, "learning_rate": 2.3915243553953266e-06, "loss": 0.9734, "step": 18885 }, { "epoch": 0.684498568373745, "grad_norm": 2.578028335597178, "learning_rate": 2.3910236460220263e-06, "loss": 0.8686, "step": 18886 }, { "epoch": 0.6845348120764018, "grad_norm": 2.821335921554417, "learning_rate": 2.390522972598845e-06, "loss": 0.9154, "step": 18887 }, { "epoch": 0.6845710557790584, "grad_norm": 2.2305876637761672, "learning_rate": 2.3900223351326744e-06, "loss": 0.998, "step": 18888 }, { "epoch": 0.6846072994817151, "grad_norm": 2.3843475982335325, "learning_rate": 2.389521733630422e-06, "loss": 1.0191, "step": 18889 }, { "epoch": 0.6846435431843717, "grad_norm": 2.525880943153264, "learning_rate": 2.3890211680989792e-06, "loss": 1.0088, "step": 18890 }, { "epoch": 0.6846797868870284, "grad_norm": 2.4196994605330935, "learning_rate": 2.3885206385452476e-06, "loss": 0.8269, "step": 18891 }, { "epoch": 0.684716030589685, "grad_norm": 2.1469994277124007, "learning_rate": 2.3880201449761204e-06, "loss": 0.749, "step": 18892 }, { "epoch": 0.6847522742923418, "grad_norm": 2.4660257954326146, "learning_rate": 2.3875196873984955e-06, "loss": 0.8429, "step": 18893 }, { "epoch": 0.6847885179949984, "grad_norm": 2.3614466992669043, "learning_rate": 2.3870192658192715e-06, "loss": 0.9738, "step": 18894 }, { "epoch": 0.684824761697655, "grad_norm": 2.2881223653202096, "learning_rate": 2.38651888024534e-06, "loss": 0.726, "step": 18895 }, { "epoch": 0.6848610054003117, "grad_norm": 2.114815786442518, "learning_rate": 2.3860185306835977e-06, "loss": 0.8359, "step": 18896 }, { "epoch": 0.6848972491029683, "grad_norm": 2.0242607275098514, "learning_rate": 2.3855182171409393e-06, "loss": 0.7924, "step": 18897 }, { "epoch": 0.684933492805625, "grad_norm": 2.216710278420952, "learning_rate": 2.38501793962426e-06, "loss": 0.797, "step": 18898 }, { "epoch": 0.6849697365082816, "grad_norm": 2.4540076155887074, "learning_rate": 2.3845176981404506e-06, "loss": 0.7744, "step": 18899 }, { "epoch": 0.6850059802109384, "grad_norm": 2.2709484117032765, "learning_rate": 2.384017492696405e-06, "loss": 0.7052, "step": 18900 }, { "epoch": 0.685042223913595, "grad_norm": 2.5217461961709224, "learning_rate": 2.3835173232990166e-06, "loss": 0.9017, "step": 18901 }, { "epoch": 0.6850784676162517, "grad_norm": 2.6840110652882156, "learning_rate": 2.383017189955179e-06, "loss": 0.8285, "step": 18902 }, { "epoch": 0.6851147113189083, "grad_norm": 2.4048854392495116, "learning_rate": 2.38251709267178e-06, "loss": 0.8709, "step": 18903 }, { "epoch": 0.685150955021565, "grad_norm": 2.000855580575002, "learning_rate": 2.3820170314557125e-06, "loss": 0.8445, "step": 18904 }, { "epoch": 0.6851871987242216, "grad_norm": 2.3363458306864615, "learning_rate": 2.3815170063138685e-06, "loss": 0.9261, "step": 18905 }, { "epoch": 0.6852234424268784, "grad_norm": 2.4324586594699777, "learning_rate": 2.381017017253135e-06, "loss": 0.934, "step": 18906 }, { "epoch": 0.685259686129535, "grad_norm": 2.410563986548209, "learning_rate": 2.3805170642804047e-06, "loss": 1.0215, "step": 18907 }, { "epoch": 0.6852959298321917, "grad_norm": 2.574204329163406, "learning_rate": 2.380017147402562e-06, "loss": 0.6727, "step": 18908 }, { "epoch": 0.6853321735348483, "grad_norm": 2.3355020388116072, "learning_rate": 2.3795172666265016e-06, "loss": 0.961, "step": 18909 }, { "epoch": 0.685368417237505, "grad_norm": 2.7095552479990537, "learning_rate": 2.379017421959107e-06, "loss": 1.0266, "step": 18910 }, { "epoch": 0.6854046609401616, "grad_norm": 2.5255129229956657, "learning_rate": 2.3785176134072687e-06, "loss": 0.9989, "step": 18911 }, { "epoch": 0.6854409046428183, "grad_norm": 2.501336298287781, "learning_rate": 2.37801784097787e-06, "loss": 1.025, "step": 18912 }, { "epoch": 0.685477148345475, "grad_norm": 2.136004924158379, "learning_rate": 2.3775181046778006e-06, "loss": 0.8093, "step": 18913 }, { "epoch": 0.6855133920481317, "grad_norm": 2.345516516809616, "learning_rate": 2.377018404513947e-06, "loss": 0.9152, "step": 18914 }, { "epoch": 0.6855496357507883, "grad_norm": 2.277815043008663, "learning_rate": 2.3765187404931924e-06, "loss": 0.8383, "step": 18915 }, { "epoch": 0.685585879453445, "grad_norm": 2.7132822616917416, "learning_rate": 2.376019112622422e-06, "loss": 0.8641, "step": 18916 }, { "epoch": 0.6856221231561016, "grad_norm": 2.0635770167354495, "learning_rate": 2.3755195209085212e-06, "loss": 0.814, "step": 18917 }, { "epoch": 0.6856583668587583, "grad_norm": 2.2062211799836438, "learning_rate": 2.3750199653583767e-06, "loss": 1.0022, "step": 18918 }, { "epoch": 0.685694610561415, "grad_norm": 2.2817516796951147, "learning_rate": 2.3745204459788675e-06, "loss": 0.7994, "step": 18919 }, { "epoch": 0.6857308542640717, "grad_norm": 1.835711253408639, "learning_rate": 2.3740209627768784e-06, "loss": 0.743, "step": 18920 }, { "epoch": 0.6857670979667283, "grad_norm": 2.395081772644339, "learning_rate": 2.3735215157592943e-06, "loss": 0.822, "step": 18921 }, { "epoch": 0.685803341669385, "grad_norm": 2.2911086282394697, "learning_rate": 2.3730221049329938e-06, "loss": 0.8338, "step": 18922 }, { "epoch": 0.6858395853720416, "grad_norm": 2.47438826719232, "learning_rate": 2.3725227303048614e-06, "loss": 0.8668, "step": 18923 }, { "epoch": 0.6858758290746982, "grad_norm": 2.5420960580454075, "learning_rate": 2.3720233918817733e-06, "loss": 0.805, "step": 18924 }, { "epoch": 0.6859120727773549, "grad_norm": 2.6576329918607215, "learning_rate": 2.3715240896706164e-06, "loss": 0.8103, "step": 18925 }, { "epoch": 0.6859483164800116, "grad_norm": 2.2049644590895614, "learning_rate": 2.371024823678267e-06, "loss": 0.8937, "step": 18926 }, { "epoch": 0.6859845601826683, "grad_norm": 2.4705924654764724, "learning_rate": 2.3705255939116073e-06, "loss": 0.9868, "step": 18927 }, { "epoch": 0.6860208038853249, "grad_norm": 2.425699103213361, "learning_rate": 2.37002640037751e-06, "loss": 0.7899, "step": 18928 }, { "epoch": 0.6860570475879816, "grad_norm": 2.632510735510113, "learning_rate": 2.369527243082863e-06, "loss": 0.9126, "step": 18929 }, { "epoch": 0.6860932912906382, "grad_norm": 2.1157765382319225, "learning_rate": 2.369028122034538e-06, "loss": 0.7858, "step": 18930 }, { "epoch": 0.6861295349932949, "grad_norm": 2.467120355198381, "learning_rate": 2.368529037239416e-06, "loss": 0.9235, "step": 18931 }, { "epoch": 0.6861657786959516, "grad_norm": 2.184019024207029, "learning_rate": 2.36802998870437e-06, "loss": 0.978, "step": 18932 }, { "epoch": 0.6862020223986083, "grad_norm": 2.3402333435594254, "learning_rate": 2.36753097643628e-06, "loss": 0.8923, "step": 18933 }, { "epoch": 0.6862382661012649, "grad_norm": 2.550456000151315, "learning_rate": 2.367032000442022e-06, "loss": 1.055, "step": 18934 }, { "epoch": 0.6862745098039216, "grad_norm": 2.2947137416119503, "learning_rate": 2.3665330607284693e-06, "loss": 0.9405, "step": 18935 }, { "epoch": 0.6863107535065782, "grad_norm": 2.1231651327202825, "learning_rate": 2.366034157302498e-06, "loss": 0.8651, "step": 18936 }, { "epoch": 0.6863469972092349, "grad_norm": 2.4437608892542775, "learning_rate": 2.3655352901709833e-06, "loss": 0.8459, "step": 18937 }, { "epoch": 0.6863832409118915, "grad_norm": 2.261399431387499, "learning_rate": 2.3650364593408003e-06, "loss": 0.7828, "step": 18938 }, { "epoch": 0.6864194846145483, "grad_norm": 2.646635480405653, "learning_rate": 2.36453766481882e-06, "loss": 0.9548, "step": 18939 }, { "epoch": 0.6864557283172049, "grad_norm": 2.201055915802178, "learning_rate": 2.364038906611917e-06, "loss": 0.7996, "step": 18940 }, { "epoch": 0.6864919720198616, "grad_norm": 2.1757810768467034, "learning_rate": 2.3635401847269656e-06, "loss": 0.8081, "step": 18941 }, { "epoch": 0.6865282157225182, "grad_norm": 2.12743707542892, "learning_rate": 2.3630414991708338e-06, "loss": 0.7611, "step": 18942 }, { "epoch": 0.6865644594251749, "grad_norm": 2.1798457186149496, "learning_rate": 2.3625428499503974e-06, "loss": 0.819, "step": 18943 }, { "epoch": 0.6866007031278315, "grad_norm": 2.275078682173258, "learning_rate": 2.362044237072521e-06, "loss": 0.8729, "step": 18944 }, { "epoch": 0.6866369468304881, "grad_norm": 2.3473144507137187, "learning_rate": 2.3615456605440838e-06, "loss": 1.0319, "step": 18945 }, { "epoch": 0.6866731905331449, "grad_norm": 2.346008433639937, "learning_rate": 2.3610471203719506e-06, "loss": 0.8214, "step": 18946 }, { "epoch": 0.6867094342358016, "grad_norm": 2.578442572742991, "learning_rate": 2.360548616562993e-06, "loss": 0.9544, "step": 18947 }, { "epoch": 0.6867456779384582, "grad_norm": 2.44757558758132, "learning_rate": 2.360050149124076e-06, "loss": 0.7875, "step": 18948 }, { "epoch": 0.6867819216411148, "grad_norm": 2.1364077482865023, "learning_rate": 2.3595517180620746e-06, "loss": 0.9041, "step": 18949 }, { "epoch": 0.6868181653437715, "grad_norm": 2.7413580231730874, "learning_rate": 2.359053323383852e-06, "loss": 0.9776, "step": 18950 }, { "epoch": 0.6868544090464281, "grad_norm": 2.4398039174306683, "learning_rate": 2.35855496509628e-06, "loss": 0.9181, "step": 18951 }, { "epoch": 0.6868906527490849, "grad_norm": 2.2800477103024397, "learning_rate": 2.35805664320622e-06, "loss": 0.6244, "step": 18952 }, { "epoch": 0.6869268964517415, "grad_norm": 2.2875859003570085, "learning_rate": 2.3575583577205423e-06, "loss": 0.9059, "step": 18953 }, { "epoch": 0.6869631401543982, "grad_norm": 2.3942356846748334, "learning_rate": 2.357060108646114e-06, "loss": 0.9784, "step": 18954 }, { "epoch": 0.6869993838570548, "grad_norm": 2.4437948415787645, "learning_rate": 2.3565618959897974e-06, "loss": 0.8972, "step": 18955 }, { "epoch": 0.6870356275597115, "grad_norm": 2.5381659333863875, "learning_rate": 2.356063719758459e-06, "loss": 1.0528, "step": 18956 }, { "epoch": 0.6870718712623681, "grad_norm": 2.5498476716329863, "learning_rate": 2.3555655799589634e-06, "loss": 1.0, "step": 18957 }, { "epoch": 0.6871081149650248, "grad_norm": 2.528759356619784, "learning_rate": 2.3550674765981767e-06, "loss": 0.8347, "step": 18958 }, { "epoch": 0.6871443586676815, "grad_norm": 2.210082992614803, "learning_rate": 2.354569409682959e-06, "loss": 1.0256, "step": 18959 }, { "epoch": 0.6871806023703382, "grad_norm": 2.3067901148925922, "learning_rate": 2.354071379220175e-06, "loss": 0.8477, "step": 18960 }, { "epoch": 0.6872168460729948, "grad_norm": 2.3237478563222447, "learning_rate": 2.3535733852166885e-06, "loss": 0.7574, "step": 18961 }, { "epoch": 0.6872530897756515, "grad_norm": 2.544728437116315, "learning_rate": 2.353075427679359e-06, "loss": 0.9135, "step": 18962 }, { "epoch": 0.6872893334783081, "grad_norm": 2.347955575232493, "learning_rate": 2.35257750661505e-06, "loss": 0.7672, "step": 18963 }, { "epoch": 0.6873255771809648, "grad_norm": 2.3488210112837105, "learning_rate": 2.3520796220306193e-06, "loss": 0.85, "step": 18964 }, { "epoch": 0.6873618208836215, "grad_norm": 2.7375543607890642, "learning_rate": 2.3515817739329334e-06, "loss": 1.046, "step": 18965 }, { "epoch": 0.6873980645862782, "grad_norm": 2.355012591013098, "learning_rate": 2.3510839623288473e-06, "loss": 0.9609, "step": 18966 }, { "epoch": 0.6874343082889348, "grad_norm": 2.5291224862087622, "learning_rate": 2.350586187225224e-06, "loss": 0.869, "step": 18967 }, { "epoch": 0.6874705519915915, "grad_norm": 2.314546668793031, "learning_rate": 2.3500884486289166e-06, "loss": 1.0308, "step": 18968 }, { "epoch": 0.6875067956942481, "grad_norm": 2.3658900697915812, "learning_rate": 2.3495907465467915e-06, "loss": 0.9763, "step": 18969 }, { "epoch": 0.6875430393969048, "grad_norm": 2.5583246280208147, "learning_rate": 2.3490930809857015e-06, "loss": 0.9535, "step": 18970 }, { "epoch": 0.6875792830995614, "grad_norm": 2.5819273683530284, "learning_rate": 2.3485954519525055e-06, "loss": 0.8771, "step": 18971 }, { "epoch": 0.6876155268022182, "grad_norm": 1.9813712926251072, "learning_rate": 2.348097859454062e-06, "loss": 0.684, "step": 18972 }, { "epoch": 0.6876517705048748, "grad_norm": 2.653909517413492, "learning_rate": 2.347600303497225e-06, "loss": 0.9307, "step": 18973 }, { "epoch": 0.6876880142075315, "grad_norm": 2.399353705068193, "learning_rate": 2.347102784088853e-06, "loss": 0.9319, "step": 18974 }, { "epoch": 0.6877242579101881, "grad_norm": 2.0305058294941225, "learning_rate": 2.3466053012357976e-06, "loss": 0.7963, "step": 18975 }, { "epoch": 0.6877605016128447, "grad_norm": 2.1098285994106254, "learning_rate": 2.346107854944919e-06, "loss": 0.9101, "step": 18976 }, { "epoch": 0.6877967453155014, "grad_norm": 2.231430773591015, "learning_rate": 2.3456104452230675e-06, "loss": 1.1903, "step": 18977 }, { "epoch": 0.6878329890181581, "grad_norm": 2.1458719691823007, "learning_rate": 2.345113072077101e-06, "loss": 0.7896, "step": 18978 }, { "epoch": 0.6878692327208148, "grad_norm": 2.2379744973071105, "learning_rate": 2.3446157355138686e-06, "loss": 0.8584, "step": 18979 }, { "epoch": 0.6879054764234714, "grad_norm": 2.441381447334756, "learning_rate": 2.344118435540226e-06, "loss": 0.9427, "step": 18980 }, { "epoch": 0.6879417201261281, "grad_norm": 2.152297316665095, "learning_rate": 2.343621172163026e-06, "loss": 0.9967, "step": 18981 }, { "epoch": 0.6879779638287847, "grad_norm": 2.3458051095477845, "learning_rate": 2.3431239453891185e-06, "loss": 1.0485, "step": 18982 }, { "epoch": 0.6880142075314414, "grad_norm": 2.101586538958413, "learning_rate": 2.342626755225356e-06, "loss": 0.7808, "step": 18983 }, { "epoch": 0.688050451234098, "grad_norm": 2.465144985746195, "learning_rate": 2.34212960167859e-06, "loss": 0.9779, "step": 18984 }, { "epoch": 0.6880866949367548, "grad_norm": 2.5427390415335536, "learning_rate": 2.3416324847556722e-06, "loss": 0.8901, "step": 18985 }, { "epoch": 0.6881229386394114, "grad_norm": 2.5870097877467377, "learning_rate": 2.3411354044634486e-06, "loss": 0.8968, "step": 18986 }, { "epoch": 0.6881591823420681, "grad_norm": 2.4606625316497426, "learning_rate": 2.3406383608087724e-06, "loss": 0.9682, "step": 18987 }, { "epoch": 0.6881954260447247, "grad_norm": 2.382546084191057, "learning_rate": 2.34014135379849e-06, "loss": 0.9321, "step": 18988 }, { "epoch": 0.6882316697473814, "grad_norm": 3.2919059085251585, "learning_rate": 2.339644383439454e-06, "loss": 0.927, "step": 18989 }, { "epoch": 0.688267913450038, "grad_norm": 2.6751768679000594, "learning_rate": 2.339147449738507e-06, "loss": 0.8026, "step": 18990 }, { "epoch": 0.6883041571526948, "grad_norm": 1.9652677674252952, "learning_rate": 2.338650552702499e-06, "loss": 0.8872, "step": 18991 }, { "epoch": 0.6883404008553514, "grad_norm": 2.53164223114317, "learning_rate": 2.338153692338279e-06, "loss": 0.9498, "step": 18992 }, { "epoch": 0.6883766445580081, "grad_norm": 2.277680952650813, "learning_rate": 2.337656868652689e-06, "loss": 0.9644, "step": 18993 }, { "epoch": 0.6884128882606647, "grad_norm": 2.309212999994373, "learning_rate": 2.3371600816525798e-06, "loss": 0.8497, "step": 18994 }, { "epoch": 0.6884491319633214, "grad_norm": 1.9550553792957488, "learning_rate": 2.3366633313447895e-06, "loss": 0.7048, "step": 18995 }, { "epoch": 0.688485375665978, "grad_norm": 2.413670247621377, "learning_rate": 2.336166617736172e-06, "loss": 1.0199, "step": 18996 }, { "epoch": 0.6885216193686347, "grad_norm": 2.2788656765799007, "learning_rate": 2.3356699408335657e-06, "loss": 0.7235, "step": 18997 }, { "epoch": 0.6885578630712914, "grad_norm": 2.3924016367069574, "learning_rate": 2.335173300643818e-06, "loss": 0.881, "step": 18998 }, { "epoch": 0.6885941067739481, "grad_norm": 2.489337965945184, "learning_rate": 2.3346766971737687e-06, "loss": 0.8002, "step": 18999 }, { "epoch": 0.6886303504766047, "grad_norm": 2.101225653043065, "learning_rate": 2.3341801304302624e-06, "loss": 0.8254, "step": 19000 }, { "epoch": 0.6886665941792613, "grad_norm": 2.0891908886532358, "learning_rate": 2.333683600420144e-06, "loss": 0.9405, "step": 19001 }, { "epoch": 0.688702837881918, "grad_norm": 2.1511916543281524, "learning_rate": 2.333187107150251e-06, "loss": 0.6203, "step": 19002 }, { "epoch": 0.6887390815845746, "grad_norm": 2.3356719304738807, "learning_rate": 2.3326906506274264e-06, "loss": 0.9268, "step": 19003 }, { "epoch": 0.6887753252872313, "grad_norm": 2.3727712176607785, "learning_rate": 2.3321942308585123e-06, "loss": 0.8873, "step": 19004 }, { "epoch": 0.688811568989888, "grad_norm": 2.480071715692824, "learning_rate": 2.33169784785035e-06, "loss": 0.9588, "step": 19005 }, { "epoch": 0.6888478126925447, "grad_norm": 2.558545205979766, "learning_rate": 2.331201501609775e-06, "loss": 0.8935, "step": 19006 }, { "epoch": 0.6888840563952013, "grad_norm": 2.0082328426404623, "learning_rate": 2.33070519214363e-06, "loss": 0.9026, "step": 19007 }, { "epoch": 0.688920300097858, "grad_norm": 2.572817613416702, "learning_rate": 2.3302089194587547e-06, "loss": 0.9495, "step": 19008 }, { "epoch": 0.6889565438005146, "grad_norm": 2.3187907515994852, "learning_rate": 2.3297126835619838e-06, "loss": 0.665, "step": 19009 }, { "epoch": 0.6889927875031713, "grad_norm": 2.11241339407964, "learning_rate": 2.3292164844601593e-06, "loss": 0.7983, "step": 19010 }, { "epoch": 0.689029031205828, "grad_norm": 2.2042103024393227, "learning_rate": 2.328720322160113e-06, "loss": 0.9116, "step": 19011 }, { "epoch": 0.6890652749084847, "grad_norm": 1.9037114155968125, "learning_rate": 2.328224196668688e-06, "loss": 0.8883, "step": 19012 }, { "epoch": 0.6891015186111413, "grad_norm": 2.5346020965533236, "learning_rate": 2.3277281079927164e-06, "loss": 0.8922, "step": 19013 }, { "epoch": 0.689137762313798, "grad_norm": 2.32976611002635, "learning_rate": 2.3272320561390372e-06, "loss": 0.8407, "step": 19014 }, { "epoch": 0.6891740060164546, "grad_norm": 2.436129328094205, "learning_rate": 2.32673604111448e-06, "loss": 0.8126, "step": 19015 }, { "epoch": 0.6892102497191113, "grad_norm": 2.524894143856141, "learning_rate": 2.3262400629258876e-06, "loss": 0.9629, "step": 19016 }, { "epoch": 0.6892464934217679, "grad_norm": 2.168446349667519, "learning_rate": 2.3257441215800874e-06, "loss": 0.7942, "step": 19017 }, { "epoch": 0.6892827371244247, "grad_norm": 2.303682378275914, "learning_rate": 2.325248217083918e-06, "loss": 0.8904, "step": 19018 }, { "epoch": 0.6893189808270813, "grad_norm": 2.155006821020659, "learning_rate": 2.3247523494442086e-06, "loss": 0.8374, "step": 19019 }, { "epoch": 0.689355224529738, "grad_norm": 2.6046951582319307, "learning_rate": 2.3242565186677944e-06, "loss": 0.8713, "step": 19020 }, { "epoch": 0.6893914682323946, "grad_norm": 2.2513767624012906, "learning_rate": 2.3237607247615084e-06, "loss": 0.9196, "step": 19021 }, { "epoch": 0.6894277119350513, "grad_norm": 2.8074415727088904, "learning_rate": 2.32326496773218e-06, "loss": 1.0133, "step": 19022 }, { "epoch": 0.6894639556377079, "grad_norm": 2.628509223530598, "learning_rate": 2.322769247586641e-06, "loss": 1.0284, "step": 19023 }, { "epoch": 0.6895001993403647, "grad_norm": 2.197987723840917, "learning_rate": 2.3222735643317223e-06, "loss": 0.9702, "step": 19024 }, { "epoch": 0.6895364430430213, "grad_norm": 2.335130422881837, "learning_rate": 2.3217779179742567e-06, "loss": 0.7677, "step": 19025 }, { "epoch": 0.689572686745678, "grad_norm": 2.2737947717506066, "learning_rate": 2.32128230852107e-06, "loss": 0.6998, "step": 19026 }, { "epoch": 0.6896089304483346, "grad_norm": 2.1873178472983947, "learning_rate": 2.3207867359789928e-06, "loss": 0.9279, "step": 19027 }, { "epoch": 0.6896451741509912, "grad_norm": 2.475284977918604, "learning_rate": 2.3202912003548554e-06, "loss": 0.8736, "step": 19028 }, { "epoch": 0.6896814178536479, "grad_norm": 2.5189664867200734, "learning_rate": 2.3197957016554834e-06, "loss": 0.9006, "step": 19029 }, { "epoch": 0.6897176615563045, "grad_norm": 2.321879079846189, "learning_rate": 2.319300239887708e-06, "loss": 0.7214, "step": 19030 }, { "epoch": 0.6897539052589613, "grad_norm": 2.456269511663382, "learning_rate": 2.318804815058349e-06, "loss": 0.7877, "step": 19031 }, { "epoch": 0.6897901489616179, "grad_norm": 2.2920441020395126, "learning_rate": 2.318309427174243e-06, "loss": 0.7691, "step": 19032 }, { "epoch": 0.6898263926642746, "grad_norm": 2.314578920397737, "learning_rate": 2.317814076242209e-06, "loss": 0.8497, "step": 19033 }, { "epoch": 0.6898626363669312, "grad_norm": 2.389453279058076, "learning_rate": 2.317318762269077e-06, "loss": 0.9153, "step": 19034 }, { "epoch": 0.6898988800695879, "grad_norm": 2.5340586393329922, "learning_rate": 2.3168234852616654e-06, "loss": 0.8962, "step": 19035 }, { "epoch": 0.6899351237722445, "grad_norm": 2.4416762399929706, "learning_rate": 2.3163282452268083e-06, "loss": 0.7863, "step": 19036 }, { "epoch": 0.6899713674749013, "grad_norm": 2.4743491487043117, "learning_rate": 2.3158330421713227e-06, "loss": 0.9994, "step": 19037 }, { "epoch": 0.6900076111775579, "grad_norm": 2.1557500090773143, "learning_rate": 2.3153378761020364e-06, "loss": 0.8827, "step": 19038 }, { "epoch": 0.6900438548802146, "grad_norm": 2.256922811654493, "learning_rate": 2.314842747025768e-06, "loss": 0.8919, "step": 19039 }, { "epoch": 0.6900800985828712, "grad_norm": 2.452009995764392, "learning_rate": 2.3143476549493434e-06, "loss": 0.8969, "step": 19040 }, { "epoch": 0.6901163422855279, "grad_norm": 2.3288551535900823, "learning_rate": 2.3138525998795854e-06, "loss": 0.8936, "step": 19041 }, { "epoch": 0.6901525859881845, "grad_norm": 2.9056047368824047, "learning_rate": 2.3133575818233117e-06, "loss": 0.9201, "step": 19042 }, { "epoch": 0.6901888296908412, "grad_norm": 2.524355830642888, "learning_rate": 2.3128626007873465e-06, "loss": 0.8336, "step": 19043 }, { "epoch": 0.6902250733934979, "grad_norm": 2.6148594097683473, "learning_rate": 2.3123676567785093e-06, "loss": 0.9428, "step": 19044 }, { "epoch": 0.6902613170961546, "grad_norm": 2.20603299481903, "learning_rate": 2.311872749803622e-06, "loss": 0.8127, "step": 19045 }, { "epoch": 0.6902975607988112, "grad_norm": 2.1825208846370585, "learning_rate": 2.311377879869501e-06, "loss": 0.8702, "step": 19046 }, { "epoch": 0.6903338045014679, "grad_norm": 2.406243707057864, "learning_rate": 2.3108830469829664e-06, "loss": 0.8877, "step": 19047 }, { "epoch": 0.6903700482041245, "grad_norm": 2.4554032061921856, "learning_rate": 2.310388251150839e-06, "loss": 1.1236, "step": 19048 }, { "epoch": 0.6904062919067812, "grad_norm": 2.635176006855424, "learning_rate": 2.309893492379933e-06, "loss": 0.8407, "step": 19049 }, { "epoch": 0.6904425356094379, "grad_norm": 2.418960709067204, "learning_rate": 2.309398770677068e-06, "loss": 1.0796, "step": 19050 }, { "epoch": 0.6904787793120946, "grad_norm": 2.2176487997514496, "learning_rate": 2.308904086049061e-06, "loss": 0.6994, "step": 19051 }, { "epoch": 0.6905150230147512, "grad_norm": 2.2781071987622408, "learning_rate": 2.3084094385027297e-06, "loss": 0.8728, "step": 19052 }, { "epoch": 0.6905512667174079, "grad_norm": 2.4550835337468957, "learning_rate": 2.3079148280448864e-06, "loss": 0.7656, "step": 19053 }, { "epoch": 0.6905875104200645, "grad_norm": 2.2663770678330644, "learning_rate": 2.3074202546823487e-06, "loss": 0.887, "step": 19054 }, { "epoch": 0.6906237541227211, "grad_norm": 2.1967135106254165, "learning_rate": 2.306925718421932e-06, "loss": 0.9204, "step": 19055 }, { "epoch": 0.6906599978253778, "grad_norm": 2.6262033591683407, "learning_rate": 2.306431219270451e-06, "loss": 0.9355, "step": 19056 }, { "epoch": 0.6906962415280345, "grad_norm": 2.5257378293045187, "learning_rate": 2.305936757234718e-06, "loss": 1.0744, "step": 19057 }, { "epoch": 0.6907324852306912, "grad_norm": 2.106514025696714, "learning_rate": 2.3054423323215462e-06, "loss": 0.7742, "step": 19058 }, { "epoch": 0.6907687289333478, "grad_norm": 2.433416407355665, "learning_rate": 2.304947944537752e-06, "loss": 0.9343, "step": 19059 }, { "epoch": 0.6908049726360045, "grad_norm": 2.089067773876208, "learning_rate": 2.3044535938901434e-06, "loss": 0.9579, "step": 19060 }, { "epoch": 0.6908412163386611, "grad_norm": 2.6148362230164888, "learning_rate": 2.303959280385536e-06, "loss": 0.74, "step": 19061 }, { "epoch": 0.6908774600413178, "grad_norm": 2.265597982527954, "learning_rate": 2.3034650040307348e-06, "loss": 0.792, "step": 19062 }, { "epoch": 0.6909137037439745, "grad_norm": 2.673818667721865, "learning_rate": 2.3029707648325593e-06, "loss": 0.9175, "step": 19063 }, { "epoch": 0.6909499474466312, "grad_norm": 2.221301191650976, "learning_rate": 2.3024765627978134e-06, "loss": 0.7852, "step": 19064 }, { "epoch": 0.6909861911492878, "grad_norm": 2.1596273237808017, "learning_rate": 2.301982397933311e-06, "loss": 0.8808, "step": 19065 }, { "epoch": 0.6910224348519445, "grad_norm": 2.820300029721619, "learning_rate": 2.301488270245858e-06, "loss": 0.8504, "step": 19066 }, { "epoch": 0.6910586785546011, "grad_norm": 2.4380537305005614, "learning_rate": 2.300994179742264e-06, "loss": 0.9891, "step": 19067 }, { "epoch": 0.6910949222572578, "grad_norm": 2.4268977444760074, "learning_rate": 2.3005001264293403e-06, "loss": 0.9332, "step": 19068 }, { "epoch": 0.6911311659599144, "grad_norm": 2.326949945565549, "learning_rate": 2.30000611031389e-06, "loss": 0.916, "step": 19069 }, { "epoch": 0.6911674096625712, "grad_norm": 2.3185627966879867, "learning_rate": 2.2995121314027225e-06, "loss": 0.8186, "step": 19070 }, { "epoch": 0.6912036533652278, "grad_norm": 2.419629521212126, "learning_rate": 2.299018189702645e-06, "loss": 0.9904, "step": 19071 }, { "epoch": 0.6912398970678845, "grad_norm": 2.2571033747755087, "learning_rate": 2.298524285220465e-06, "loss": 0.9761, "step": 19072 }, { "epoch": 0.6912761407705411, "grad_norm": 2.69612783404388, "learning_rate": 2.298030417962985e-06, "loss": 0.7894, "step": 19073 }, { "epoch": 0.6913123844731978, "grad_norm": 2.350292595632749, "learning_rate": 2.2975365879370114e-06, "loss": 0.7974, "step": 19074 }, { "epoch": 0.6913486281758544, "grad_norm": 2.2394637374287822, "learning_rate": 2.297042795149349e-06, "loss": 0.7944, "step": 19075 }, { "epoch": 0.691384871878511, "grad_norm": 2.25468167083221, "learning_rate": 2.2965490396068047e-06, "loss": 0.9672, "step": 19076 }, { "epoch": 0.6914211155811678, "grad_norm": 2.278397861345728, "learning_rate": 2.2960553213161774e-06, "loss": 0.7582, "step": 19077 }, { "epoch": 0.6914573592838245, "grad_norm": 2.403752665425741, "learning_rate": 2.295561640284272e-06, "loss": 1.0526, "step": 19078 }, { "epoch": 0.6914936029864811, "grad_norm": 2.521758249393534, "learning_rate": 2.295067996517894e-06, "loss": 0.895, "step": 19079 }, { "epoch": 0.6915298466891378, "grad_norm": 2.2347168513271174, "learning_rate": 2.2945743900238415e-06, "loss": 0.8793, "step": 19080 }, { "epoch": 0.6915660903917944, "grad_norm": 2.420741101090311, "learning_rate": 2.2940808208089195e-06, "loss": 0.8398, "step": 19081 }, { "epoch": 0.691602334094451, "grad_norm": 2.4094803408455743, "learning_rate": 2.2935872888799232e-06, "loss": 0.7309, "step": 19082 }, { "epoch": 0.6916385777971078, "grad_norm": 2.4810914053586735, "learning_rate": 2.293093794243661e-06, "loss": 0.9361, "step": 19083 }, { "epoch": 0.6916748214997644, "grad_norm": 2.239742719193741, "learning_rate": 2.2926003369069277e-06, "loss": 0.8547, "step": 19084 }, { "epoch": 0.6917110652024211, "grad_norm": 2.40531442102881, "learning_rate": 2.292106916876526e-06, "loss": 0.9241, "step": 19085 }, { "epoch": 0.6917473089050777, "grad_norm": 2.465772266347779, "learning_rate": 2.2916135341592515e-06, "loss": 0.8949, "step": 19086 }, { "epoch": 0.6917835526077344, "grad_norm": 2.2180013036418007, "learning_rate": 2.2911201887619044e-06, "loss": 0.8061, "step": 19087 }, { "epoch": 0.691819796310391, "grad_norm": 2.745746110573493, "learning_rate": 2.290626880691285e-06, "loss": 0.8216, "step": 19088 }, { "epoch": 0.6918560400130477, "grad_norm": 2.4509382366834873, "learning_rate": 2.2901336099541866e-06, "loss": 0.9583, "step": 19089 }, { "epoch": 0.6918922837157044, "grad_norm": 2.6024819862792072, "learning_rate": 2.2896403765574082e-06, "loss": 0.8693, "step": 19090 }, { "epoch": 0.6919285274183611, "grad_norm": 2.307888863876436, "learning_rate": 2.2891471805077466e-06, "loss": 1.003, "step": 19091 }, { "epoch": 0.6919647711210177, "grad_norm": 2.2750356134047793, "learning_rate": 2.2886540218119986e-06, "loss": 0.8458, "step": 19092 }, { "epoch": 0.6920010148236744, "grad_norm": 2.278648071686655, "learning_rate": 2.2881609004769566e-06, "loss": 0.9285, "step": 19093 }, { "epoch": 0.692037258526331, "grad_norm": 2.2062601191233124, "learning_rate": 2.2876678165094173e-06, "loss": 0.7863, "step": 19094 }, { "epoch": 0.6920735022289877, "grad_norm": 2.6641053803939116, "learning_rate": 2.2871747699161773e-06, "loss": 0.8711, "step": 19095 }, { "epoch": 0.6921097459316444, "grad_norm": 2.2495782463437473, "learning_rate": 2.2866817607040266e-06, "loss": 0.78, "step": 19096 }, { "epoch": 0.6921459896343011, "grad_norm": 2.3870540042232533, "learning_rate": 2.2861887888797617e-06, "loss": 0.7103, "step": 19097 }, { "epoch": 0.6921822333369577, "grad_norm": 2.5279941671295254, "learning_rate": 2.2856958544501705e-06, "loss": 0.973, "step": 19098 }, { "epoch": 0.6922184770396144, "grad_norm": 1.8327973765410377, "learning_rate": 2.2852029574220526e-06, "loss": 0.8305, "step": 19099 }, { "epoch": 0.692254720742271, "grad_norm": 2.188143099155971, "learning_rate": 2.284710097802194e-06, "loss": 0.8959, "step": 19100 }, { "epoch": 0.6922909644449277, "grad_norm": 2.1803706463490182, "learning_rate": 2.28421727559739e-06, "loss": 0.9047, "step": 19101 }, { "epoch": 0.6923272081475843, "grad_norm": 2.5332628079279833, "learning_rate": 2.2837244908144264e-06, "loss": 0.9065, "step": 19102 }, { "epoch": 0.6923634518502411, "grad_norm": 2.484341413938609, "learning_rate": 2.2832317434601e-06, "loss": 0.9602, "step": 19103 }, { "epoch": 0.6923996955528977, "grad_norm": 2.548857653970244, "learning_rate": 2.2827390335411946e-06, "loss": 0.9414, "step": 19104 }, { "epoch": 0.6924359392555544, "grad_norm": 2.207752811034743, "learning_rate": 2.2822463610645044e-06, "loss": 0.7076, "step": 19105 }, { "epoch": 0.692472182958211, "grad_norm": 2.262375822470169, "learning_rate": 2.2817537260368137e-06, "loss": 0.9712, "step": 19106 }, { "epoch": 0.6925084266608676, "grad_norm": 2.739094550981817, "learning_rate": 2.2812611284649128e-06, "loss": 0.9568, "step": 19107 }, { "epoch": 0.6925446703635243, "grad_norm": 2.645523026562537, "learning_rate": 2.2807685683555914e-06, "loss": 0.8609, "step": 19108 }, { "epoch": 0.692580914066181, "grad_norm": 2.4243174121167743, "learning_rate": 2.280276045715632e-06, "loss": 0.8139, "step": 19109 }, { "epoch": 0.6926171577688377, "grad_norm": 2.220260835946905, "learning_rate": 2.2797835605518244e-06, "loss": 0.8679, "step": 19110 }, { "epoch": 0.6926534014714943, "grad_norm": 2.414057579096453, "learning_rate": 2.2792911128709543e-06, "loss": 0.8047, "step": 19111 }, { "epoch": 0.692689645174151, "grad_norm": 2.4748766929864643, "learning_rate": 2.2787987026798086e-06, "loss": 0.8843, "step": 19112 }, { "epoch": 0.6927258888768076, "grad_norm": 2.402821899242929, "learning_rate": 2.27830632998517e-06, "loss": 0.7282, "step": 19113 }, { "epoch": 0.6927621325794643, "grad_norm": 2.3673891028740814, "learning_rate": 2.2778139947938234e-06, "loss": 0.6697, "step": 19114 }, { "epoch": 0.6927983762821209, "grad_norm": 2.304318874211191, "learning_rate": 2.2773216971125555e-06, "loss": 0.8828, "step": 19115 }, { "epoch": 0.6928346199847777, "grad_norm": 2.0574111953797334, "learning_rate": 2.2768294369481462e-06, "loss": 1.0988, "step": 19116 }, { "epoch": 0.6928708636874343, "grad_norm": 2.043188048380178, "learning_rate": 2.2763372143073826e-06, "loss": 0.8169, "step": 19117 }, { "epoch": 0.692907107390091, "grad_norm": 2.338926562077664, "learning_rate": 2.275845029197041e-06, "loss": 0.85, "step": 19118 }, { "epoch": 0.6929433510927476, "grad_norm": 2.3258839537409637, "learning_rate": 2.2753528816239113e-06, "loss": 0.708, "step": 19119 }, { "epoch": 0.6929795947954043, "grad_norm": 2.416118771812148, "learning_rate": 2.274860771594769e-06, "loss": 0.862, "step": 19120 }, { "epoch": 0.6930158384980609, "grad_norm": 2.3697057474881413, "learning_rate": 2.2743686991163987e-06, "loss": 0.8324, "step": 19121 }, { "epoch": 0.6930520822007177, "grad_norm": 2.4204030214502517, "learning_rate": 2.273876664195576e-06, "loss": 0.9875, "step": 19122 }, { "epoch": 0.6930883259033743, "grad_norm": 2.177648661808418, "learning_rate": 2.273384666839088e-06, "loss": 0.9021, "step": 19123 }, { "epoch": 0.693124569606031, "grad_norm": 2.814822372951821, "learning_rate": 2.2728927070537084e-06, "loss": 1.3197, "step": 19124 }, { "epoch": 0.6931608133086876, "grad_norm": 1.9950696985614191, "learning_rate": 2.2724007848462193e-06, "loss": 0.6688, "step": 19125 }, { "epoch": 0.6931970570113443, "grad_norm": 2.5591260052232947, "learning_rate": 2.2719089002233968e-06, "loss": 1.1184, "step": 19126 }, { "epoch": 0.6932333007140009, "grad_norm": 2.404701487380168, "learning_rate": 2.271417053192019e-06, "loss": 0.9797, "step": 19127 }, { "epoch": 0.6932695444166576, "grad_norm": 2.2460084438257337, "learning_rate": 2.2709252437588665e-06, "loss": 0.8357, "step": 19128 }, { "epoch": 0.6933057881193143, "grad_norm": 2.507656292782516, "learning_rate": 2.270433471930711e-06, "loss": 0.8893, "step": 19129 }, { "epoch": 0.693342031821971, "grad_norm": 2.3003526109112706, "learning_rate": 2.269941737714332e-06, "loss": 0.7949, "step": 19130 }, { "epoch": 0.6933782755246276, "grad_norm": 2.297777083819595, "learning_rate": 2.269450041116505e-06, "loss": 0.6633, "step": 19131 }, { "epoch": 0.6934145192272843, "grad_norm": 2.043308365471332, "learning_rate": 2.2689583821440064e-06, "loss": 0.654, "step": 19132 }, { "epoch": 0.6934507629299409, "grad_norm": 2.774459466741671, "learning_rate": 2.2684667608036078e-06, "loss": 1.0132, "step": 19133 }, { "epoch": 0.6934870066325975, "grad_norm": 2.406289623145714, "learning_rate": 2.2679751771020855e-06, "loss": 0.8536, "step": 19134 }, { "epoch": 0.6935232503352543, "grad_norm": 2.470055346143575, "learning_rate": 2.2674836310462145e-06, "loss": 0.8303, "step": 19135 }, { "epoch": 0.693559494037911, "grad_norm": 2.3081940357261472, "learning_rate": 2.266992122642765e-06, "loss": 0.8768, "step": 19136 }, { "epoch": 0.6935957377405676, "grad_norm": 2.3837228244112043, "learning_rate": 2.266500651898511e-06, "loss": 0.8369, "step": 19137 }, { "epoch": 0.6936319814432242, "grad_norm": 2.176294982159088, "learning_rate": 2.266009218820225e-06, "loss": 0.779, "step": 19138 }, { "epoch": 0.6936682251458809, "grad_norm": 2.305806056646242, "learning_rate": 2.2655178234146803e-06, "loss": 0.7921, "step": 19139 }, { "epoch": 0.6937044688485375, "grad_norm": 2.31847417262881, "learning_rate": 2.2650264656886447e-06, "loss": 1.0906, "step": 19140 }, { "epoch": 0.6937407125511942, "grad_norm": 2.1752547392323494, "learning_rate": 2.2645351456488895e-06, "loss": 0.7383, "step": 19141 }, { "epoch": 0.6937769562538509, "grad_norm": 2.420369233419636, "learning_rate": 2.264043863302186e-06, "loss": 1.0001, "step": 19142 }, { "epoch": 0.6938131999565076, "grad_norm": 2.587810742621428, "learning_rate": 2.263552618655306e-06, "loss": 0.7337, "step": 19143 }, { "epoch": 0.6938494436591642, "grad_norm": 2.1499013073472617, "learning_rate": 2.263061411715013e-06, "loss": 0.8858, "step": 19144 }, { "epoch": 0.6938856873618209, "grad_norm": 2.371460878849203, "learning_rate": 2.262570242488079e-06, "loss": 0.773, "step": 19145 }, { "epoch": 0.6939219310644775, "grad_norm": 2.6145972678445077, "learning_rate": 2.2620791109812734e-06, "loss": 1.0228, "step": 19146 }, { "epoch": 0.6939581747671342, "grad_norm": 2.316177887555801, "learning_rate": 2.2615880172013598e-06, "loss": 1.0166, "step": 19147 }, { "epoch": 0.6939944184697908, "grad_norm": 2.2384453423234736, "learning_rate": 2.2610969611551092e-06, "loss": 0.9671, "step": 19148 }, { "epoch": 0.6940306621724476, "grad_norm": 2.280139308836438, "learning_rate": 2.260605942849282e-06, "loss": 0.8942, "step": 19149 }, { "epoch": 0.6940669058751042, "grad_norm": 2.186700963355725, "learning_rate": 2.2601149622906526e-06, "loss": 0.8565, "step": 19150 }, { "epoch": 0.6941031495777609, "grad_norm": 2.167538920971902, "learning_rate": 2.2596240194859796e-06, "loss": 0.9114, "step": 19151 }, { "epoch": 0.6941393932804175, "grad_norm": 2.576863696294589, "learning_rate": 2.2591331144420323e-06, "loss": 0.7447, "step": 19152 }, { "epoch": 0.6941756369830742, "grad_norm": 2.2136360944812186, "learning_rate": 2.258642247165571e-06, "loss": 0.8912, "step": 19153 }, { "epoch": 0.6942118806857308, "grad_norm": 2.1589440516852, "learning_rate": 2.2581514176633616e-06, "loss": 0.7584, "step": 19154 }, { "epoch": 0.6942481243883876, "grad_norm": 2.3008816051264267, "learning_rate": 2.2576606259421698e-06, "loss": 0.9354, "step": 19155 }, { "epoch": 0.6942843680910442, "grad_norm": 2.4341946474276783, "learning_rate": 2.257169872008754e-06, "loss": 0.962, "step": 19156 }, { "epoch": 0.6943206117937009, "grad_norm": 2.3734750794904844, "learning_rate": 2.256679155869878e-06, "loss": 0.8944, "step": 19157 }, { "epoch": 0.6943568554963575, "grad_norm": 2.352577030962232, "learning_rate": 2.2561884775323053e-06, "loss": 0.8198, "step": 19158 }, { "epoch": 0.6943930991990142, "grad_norm": 2.9106210884874057, "learning_rate": 2.2556978370027964e-06, "loss": 0.9756, "step": 19159 }, { "epoch": 0.6944293429016708, "grad_norm": 2.452984514610516, "learning_rate": 2.2552072342881104e-06, "loss": 0.8834, "step": 19160 }, { "epoch": 0.6944655866043274, "grad_norm": 2.251619784776056, "learning_rate": 2.254716669395009e-06, "loss": 0.8869, "step": 19161 }, { "epoch": 0.6945018303069842, "grad_norm": 2.3251692020342456, "learning_rate": 2.2542261423302515e-06, "loss": 1.053, "step": 19162 }, { "epoch": 0.6945380740096408, "grad_norm": 2.132715834208464, "learning_rate": 2.253735653100599e-06, "loss": 1.051, "step": 19163 }, { "epoch": 0.6945743177122975, "grad_norm": 2.522646244654253, "learning_rate": 2.253245201712806e-06, "loss": 0.8589, "step": 19164 }, { "epoch": 0.6946105614149541, "grad_norm": 2.0683772435481464, "learning_rate": 2.2527547881736334e-06, "loss": 0.9553, "step": 19165 }, { "epoch": 0.6946468051176108, "grad_norm": 2.4505749756439323, "learning_rate": 2.25226441248984e-06, "loss": 0.9118, "step": 19166 }, { "epoch": 0.6946830488202674, "grad_norm": 2.583986782931461, "learning_rate": 2.2517740746681794e-06, "loss": 0.9337, "step": 19167 }, { "epoch": 0.6947192925229242, "grad_norm": 2.539026634501874, "learning_rate": 2.2512837747154122e-06, "loss": 0.9484, "step": 19168 }, { "epoch": 0.6947555362255808, "grad_norm": 2.2655883173669804, "learning_rate": 2.250793512638288e-06, "loss": 0.8673, "step": 19169 }, { "epoch": 0.6947917799282375, "grad_norm": 2.3142394805475406, "learning_rate": 2.25030328844357e-06, "loss": 0.8786, "step": 19170 }, { "epoch": 0.6948280236308941, "grad_norm": 2.4180036037325072, "learning_rate": 2.2498131021380083e-06, "loss": 0.847, "step": 19171 }, { "epoch": 0.6948642673335508, "grad_norm": 2.5508900926943725, "learning_rate": 2.2493229537283606e-06, "loss": 0.845, "step": 19172 }, { "epoch": 0.6949005110362074, "grad_norm": 2.4228749568337524, "learning_rate": 2.2488328432213773e-06, "loss": 0.8098, "step": 19173 }, { "epoch": 0.6949367547388641, "grad_norm": 2.377882796910434, "learning_rate": 2.2483427706238136e-06, "loss": 0.8826, "step": 19174 }, { "epoch": 0.6949729984415208, "grad_norm": 2.512896830847511, "learning_rate": 2.247852735942424e-06, "loss": 0.9486, "step": 19175 }, { "epoch": 0.6950092421441775, "grad_norm": 2.5197326461535883, "learning_rate": 2.2473627391839577e-06, "loss": 0.8716, "step": 19176 }, { "epoch": 0.6950454858468341, "grad_norm": 2.5423215601184497, "learning_rate": 2.2468727803551677e-06, "loss": 0.8826, "step": 19177 }, { "epoch": 0.6950817295494908, "grad_norm": 2.498888266783573, "learning_rate": 2.2463828594628056e-06, "loss": 0.7862, "step": 19178 }, { "epoch": 0.6951179732521474, "grad_norm": 2.2893506409999866, "learning_rate": 2.245892976513625e-06, "loss": 0.9073, "step": 19179 }, { "epoch": 0.6951542169548041, "grad_norm": 2.6060784031749713, "learning_rate": 2.245403131514371e-06, "loss": 0.8423, "step": 19180 }, { "epoch": 0.6951904606574608, "grad_norm": 2.098378632816565, "learning_rate": 2.2449133244717964e-06, "loss": 0.9089, "step": 19181 }, { "epoch": 0.6952267043601175, "grad_norm": 2.634879840226356, "learning_rate": 2.2444235553926515e-06, "loss": 0.8411, "step": 19182 }, { "epoch": 0.6952629480627741, "grad_norm": 2.4257793108425303, "learning_rate": 2.2439338242836816e-06, "loss": 0.7646, "step": 19183 }, { "epoch": 0.6952991917654308, "grad_norm": 2.5513399668125825, "learning_rate": 2.2434441311516393e-06, "loss": 0.8865, "step": 19184 }, { "epoch": 0.6953354354680874, "grad_norm": 2.3939363895244345, "learning_rate": 2.2429544760032653e-06, "loss": 0.9537, "step": 19185 }, { "epoch": 0.695371679170744, "grad_norm": 2.4169100871690707, "learning_rate": 2.242464858845315e-06, "loss": 0.8263, "step": 19186 }, { "epoch": 0.6954079228734007, "grad_norm": 2.4655280137023174, "learning_rate": 2.2419752796845293e-06, "loss": 1.1283, "step": 19187 }, { "epoch": 0.6954441665760575, "grad_norm": 2.3691997075583267, "learning_rate": 2.2414857385276585e-06, "loss": 1.0228, "step": 19188 }, { "epoch": 0.6954804102787141, "grad_norm": 2.3669745153015924, "learning_rate": 2.2409962353814425e-06, "loss": 0.8671, "step": 19189 }, { "epoch": 0.6955166539813707, "grad_norm": 2.362135076017476, "learning_rate": 2.240506770252633e-06, "loss": 0.7383, "step": 19190 }, { "epoch": 0.6955528976840274, "grad_norm": 2.096689469591511, "learning_rate": 2.2400173431479695e-06, "loss": 0.7012, "step": 19191 }, { "epoch": 0.695589141386684, "grad_norm": 2.355607525554943, "learning_rate": 2.239527954074199e-06, "loss": 0.9499, "step": 19192 }, { "epoch": 0.6956253850893407, "grad_norm": 2.349964604434479, "learning_rate": 2.2390386030380627e-06, "loss": 0.7533, "step": 19193 }, { "epoch": 0.6956616287919974, "grad_norm": 2.5365144735908975, "learning_rate": 2.2385492900463053e-06, "loss": 0.7354, "step": 19194 }, { "epoch": 0.6956978724946541, "grad_norm": 2.499927479467741, "learning_rate": 2.2380600151056696e-06, "loss": 0.8134, "step": 19195 }, { "epoch": 0.6957341161973107, "grad_norm": 2.524393593813515, "learning_rate": 2.237570778222895e-06, "loss": 0.8257, "step": 19196 }, { "epoch": 0.6957703598999674, "grad_norm": 2.255908536620251, "learning_rate": 2.237081579404724e-06, "loss": 0.8559, "step": 19197 }, { "epoch": 0.695806603602624, "grad_norm": 2.228968353932068, "learning_rate": 2.236592418657898e-06, "loss": 0.7496, "step": 19198 }, { "epoch": 0.6958428473052807, "grad_norm": 2.3646893353006253, "learning_rate": 2.2361032959891594e-06, "loss": 0.8321, "step": 19199 }, { "epoch": 0.6958790910079373, "grad_norm": 2.4419399716076113, "learning_rate": 2.2356142114052437e-06, "loss": 0.9377, "step": 19200 }, { "epoch": 0.6959153347105941, "grad_norm": 2.6121816501616726, "learning_rate": 2.2351251649128925e-06, "loss": 1.0723, "step": 19201 }, { "epoch": 0.6959515784132507, "grad_norm": 2.3865184554329666, "learning_rate": 2.2346361565188463e-06, "loss": 0.8768, "step": 19202 }, { "epoch": 0.6959878221159074, "grad_norm": 2.9031630746171584, "learning_rate": 2.234147186229839e-06, "loss": 1.1608, "step": 19203 }, { "epoch": 0.696024065818564, "grad_norm": 2.261784476905527, "learning_rate": 2.2336582540526137e-06, "loss": 0.8698, "step": 19204 }, { "epoch": 0.6960603095212207, "grad_norm": 2.2640190656863264, "learning_rate": 2.2331693599938995e-06, "loss": 0.9531, "step": 19205 }, { "epoch": 0.6960965532238773, "grad_norm": 2.3684759560271074, "learning_rate": 2.2326805040604432e-06, "loss": 0.8514, "step": 19206 }, { "epoch": 0.6961327969265341, "grad_norm": 2.6376797557595517, "learning_rate": 2.2321916862589732e-06, "loss": 0.8949, "step": 19207 }, { "epoch": 0.6961690406291907, "grad_norm": 2.457742910986724, "learning_rate": 2.23170290659623e-06, "loss": 1.1596, "step": 19208 }, { "epoch": 0.6962052843318474, "grad_norm": 2.163610980970733, "learning_rate": 2.231214165078943e-06, "loss": 0.7382, "step": 19209 }, { "epoch": 0.696241528034504, "grad_norm": 2.825400813349178, "learning_rate": 2.230725461713854e-06, "loss": 0.8081, "step": 19210 }, { "epoch": 0.6962777717371607, "grad_norm": 2.467004587021297, "learning_rate": 2.2302367965076916e-06, "loss": 0.8229, "step": 19211 }, { "epoch": 0.6963140154398173, "grad_norm": 2.5489080253303213, "learning_rate": 2.2297481694671926e-06, "loss": 0.9987, "step": 19212 }, { "epoch": 0.696350259142474, "grad_norm": 2.483697416885664, "learning_rate": 2.2292595805990864e-06, "loss": 0.8998, "step": 19213 }, { "epoch": 0.6963865028451307, "grad_norm": 2.531441412210728, "learning_rate": 2.2287710299101077e-06, "loss": 0.8172, "step": 19214 }, { "epoch": 0.6964227465477874, "grad_norm": 2.5007667944152283, "learning_rate": 2.22828251740699e-06, "loss": 0.8702, "step": 19215 }, { "epoch": 0.696458990250444, "grad_norm": 1.9437947132930644, "learning_rate": 2.2277940430964594e-06, "loss": 0.7554, "step": 19216 }, { "epoch": 0.6964952339531006, "grad_norm": 2.4508211157292603, "learning_rate": 2.227305606985254e-06, "loss": 1.0137, "step": 19217 }, { "epoch": 0.6965314776557573, "grad_norm": 2.171807006157604, "learning_rate": 2.226817209080098e-06, "loss": 0.8145, "step": 19218 }, { "epoch": 0.6965677213584139, "grad_norm": 2.2366510403740896, "learning_rate": 2.2263288493877265e-06, "loss": 0.8862, "step": 19219 }, { "epoch": 0.6966039650610706, "grad_norm": 2.444001612594276, "learning_rate": 2.225840527914864e-06, "loss": 0.7163, "step": 19220 }, { "epoch": 0.6966402087637273, "grad_norm": 2.3808087365265767, "learning_rate": 2.2253522446682407e-06, "loss": 1.0483, "step": 19221 }, { "epoch": 0.696676452466384, "grad_norm": 2.3310105932716203, "learning_rate": 2.2248639996545875e-06, "loss": 0.937, "step": 19222 }, { "epoch": 0.6967126961690406, "grad_norm": 2.282495897161907, "learning_rate": 2.224375792880628e-06, "loss": 0.95, "step": 19223 }, { "epoch": 0.6967489398716973, "grad_norm": 2.6419975706610006, "learning_rate": 2.2238876243530916e-06, "loss": 1.0147, "step": 19224 }, { "epoch": 0.6967851835743539, "grad_norm": 2.2765354080666755, "learning_rate": 2.2233994940787053e-06, "loss": 0.8713, "step": 19225 }, { "epoch": 0.6968214272770106, "grad_norm": 2.3662912206591713, "learning_rate": 2.222911402064196e-06, "loss": 0.9665, "step": 19226 }, { "epoch": 0.6968576709796673, "grad_norm": 2.229885816882333, "learning_rate": 2.222423348316286e-06, "loss": 1.0944, "step": 19227 }, { "epoch": 0.696893914682324, "grad_norm": 2.268689675412994, "learning_rate": 2.221935332841703e-06, "loss": 0.9872, "step": 19228 }, { "epoch": 0.6969301583849806, "grad_norm": 2.5882034321753746, "learning_rate": 2.2214473556471713e-06, "loss": 0.9266, "step": 19229 }, { "epoch": 0.6969664020876373, "grad_norm": 2.379161508072118, "learning_rate": 2.2209594167394155e-06, "loss": 0.7699, "step": 19230 }, { "epoch": 0.6970026457902939, "grad_norm": 2.1778808223963053, "learning_rate": 2.2204715161251565e-06, "loss": 0.7231, "step": 19231 }, { "epoch": 0.6970388894929506, "grad_norm": 2.4606509344440752, "learning_rate": 2.21998365381112e-06, "loss": 0.9212, "step": 19232 }, { "epoch": 0.6970751331956072, "grad_norm": 2.354223770877756, "learning_rate": 2.219495829804028e-06, "loss": 0.8686, "step": 19233 }, { "epoch": 0.697111376898264, "grad_norm": 1.9786741302149633, "learning_rate": 2.219008044110601e-06, "loss": 0.701, "step": 19234 }, { "epoch": 0.6971476206009206, "grad_norm": 2.5432396793087744, "learning_rate": 2.2185202967375628e-06, "loss": 0.8809, "step": 19235 }, { "epoch": 0.6971838643035773, "grad_norm": 2.165544347010967, "learning_rate": 2.2180325876916286e-06, "loss": 0.9194, "step": 19236 }, { "epoch": 0.6972201080062339, "grad_norm": 2.919391573571589, "learning_rate": 2.217544916979527e-06, "loss": 0.9347, "step": 19237 }, { "epoch": 0.6972563517088906, "grad_norm": 2.6958721485941526, "learning_rate": 2.217057284607972e-06, "loss": 0.9617, "step": 19238 }, { "epoch": 0.6972925954115472, "grad_norm": 2.019512584144927, "learning_rate": 2.2165696905836866e-06, "loss": 0.6616, "step": 19239 }, { "epoch": 0.697328839114204, "grad_norm": 2.3207456874203443, "learning_rate": 2.216082134913385e-06, "loss": 0.8706, "step": 19240 }, { "epoch": 0.6973650828168606, "grad_norm": 2.4257550065539566, "learning_rate": 2.215594617603788e-06, "loss": 0.692, "step": 19241 }, { "epoch": 0.6974013265195173, "grad_norm": 2.186377030580432, "learning_rate": 2.2151071386616154e-06, "loss": 0.8178, "step": 19242 }, { "epoch": 0.6974375702221739, "grad_norm": 2.3426838441528774, "learning_rate": 2.21461969809358e-06, "loss": 0.8506, "step": 19243 }, { "epoch": 0.6974738139248305, "grad_norm": 2.342108239587008, "learning_rate": 2.2141322959064003e-06, "loss": 0.9637, "step": 19244 }, { "epoch": 0.6975100576274872, "grad_norm": 2.377170445555594, "learning_rate": 2.2136449321067937e-06, "loss": 0.8348, "step": 19245 }, { "epoch": 0.6975463013301438, "grad_norm": 2.1095539109519867, "learning_rate": 2.213157606701476e-06, "loss": 0.8621, "step": 19246 }, { "epoch": 0.6975825450328006, "grad_norm": 2.3845203121527647, "learning_rate": 2.2126703196971594e-06, "loss": 0.7415, "step": 19247 }, { "epoch": 0.6976187887354572, "grad_norm": 2.3077996229769973, "learning_rate": 2.21218307110056e-06, "loss": 0.901, "step": 19248 }, { "epoch": 0.6976550324381139, "grad_norm": 2.6622369274804205, "learning_rate": 2.211695860918392e-06, "loss": 1.0589, "step": 19249 }, { "epoch": 0.6976912761407705, "grad_norm": 2.0267674882797926, "learning_rate": 2.2112086891573704e-06, "loss": 0.6126, "step": 19250 }, { "epoch": 0.6977275198434272, "grad_norm": 2.3881685656343774, "learning_rate": 2.2107215558242044e-06, "loss": 1.0036, "step": 19251 }, { "epoch": 0.6977637635460838, "grad_norm": 2.226422323338546, "learning_rate": 2.210234460925609e-06, "loss": 0.9173, "step": 19252 }, { "epoch": 0.6978000072487406, "grad_norm": 2.700684302148996, "learning_rate": 2.209747404468297e-06, "loss": 0.8994, "step": 19253 }, { "epoch": 0.6978362509513972, "grad_norm": 2.4716355514267483, "learning_rate": 2.209260386458976e-06, "loss": 1.0288, "step": 19254 }, { "epoch": 0.6978724946540539, "grad_norm": 2.3375469787302094, "learning_rate": 2.2087734069043616e-06, "loss": 0.8653, "step": 19255 }, { "epoch": 0.6979087383567105, "grad_norm": 2.492846632424844, "learning_rate": 2.208286465811158e-06, "loss": 1.0125, "step": 19256 }, { "epoch": 0.6979449820593672, "grad_norm": 2.3577971604134595, "learning_rate": 2.2077995631860818e-06, "loss": 1.1076, "step": 19257 }, { "epoch": 0.6979812257620238, "grad_norm": 2.4354530537968997, "learning_rate": 2.207312699035837e-06, "loss": 0.8629, "step": 19258 }, { "epoch": 0.6980174694646805, "grad_norm": 2.3417869421340503, "learning_rate": 2.2068258733671366e-06, "loss": 0.9182, "step": 19259 }, { "epoch": 0.6980537131673372, "grad_norm": 2.36979121746951, "learning_rate": 2.2063390861866844e-06, "loss": 1.0374, "step": 19260 }, { "epoch": 0.6980899568699939, "grad_norm": 2.5079360651384652, "learning_rate": 2.20585233750119e-06, "loss": 0.9798, "step": 19261 }, { "epoch": 0.6981262005726505, "grad_norm": 2.0120986062253916, "learning_rate": 2.2053656273173625e-06, "loss": 0.8947, "step": 19262 }, { "epoch": 0.6981624442753072, "grad_norm": 2.500079088004239, "learning_rate": 2.2048789556419043e-06, "loss": 0.9532, "step": 19263 }, { "epoch": 0.6981986879779638, "grad_norm": 2.438042667637226, "learning_rate": 2.2043923224815237e-06, "loss": 0.8684, "step": 19264 }, { "epoch": 0.6982349316806205, "grad_norm": 2.2944129164960203, "learning_rate": 2.2039057278429264e-06, "loss": 0.8941, "step": 19265 }, { "epoch": 0.6982711753832772, "grad_norm": 2.5630895351765712, "learning_rate": 2.203419171732819e-06, "loss": 0.93, "step": 19266 }, { "epoch": 0.6983074190859339, "grad_norm": 2.054821869660946, "learning_rate": 2.202932654157902e-06, "loss": 0.9681, "step": 19267 }, { "epoch": 0.6983436627885905, "grad_norm": 2.5170361990711685, "learning_rate": 2.2024461751248816e-06, "loss": 0.6889, "step": 19268 }, { "epoch": 0.6983799064912471, "grad_norm": 2.484880125207455, "learning_rate": 2.201959734640463e-06, "loss": 0.9165, "step": 19269 }, { "epoch": 0.6984161501939038, "grad_norm": 2.473967036578904, "learning_rate": 2.2014733327113445e-06, "loss": 0.9175, "step": 19270 }, { "epoch": 0.6984523938965604, "grad_norm": 2.3603259628987665, "learning_rate": 2.200986969344232e-06, "loss": 0.813, "step": 19271 }, { "epoch": 0.6984886375992171, "grad_norm": 2.6952709876523637, "learning_rate": 2.2005006445458256e-06, "loss": 1.009, "step": 19272 }, { "epoch": 0.6985248813018738, "grad_norm": 2.455144069571922, "learning_rate": 2.2000143583228296e-06, "loss": 1.0003, "step": 19273 }, { "epoch": 0.6985611250045305, "grad_norm": 2.487530653865243, "learning_rate": 2.19952811068194e-06, "loss": 0.9686, "step": 19274 }, { "epoch": 0.6985973687071871, "grad_norm": 2.3476860136194655, "learning_rate": 2.1990419016298616e-06, "loss": 0.874, "step": 19275 }, { "epoch": 0.6986336124098438, "grad_norm": 2.416673191457896, "learning_rate": 2.1985557311732873e-06, "loss": 0.8383, "step": 19276 }, { "epoch": 0.6986698561125004, "grad_norm": 2.440426294733674, "learning_rate": 2.1980695993189254e-06, "loss": 1.1019, "step": 19277 }, { "epoch": 0.6987060998151571, "grad_norm": 2.163162164598271, "learning_rate": 2.197583506073468e-06, "loss": 0.7729, "step": 19278 }, { "epoch": 0.6987423435178138, "grad_norm": 2.3533689650889826, "learning_rate": 2.1970974514436164e-06, "loss": 0.8452, "step": 19279 }, { "epoch": 0.6987785872204705, "grad_norm": 2.154688765701907, "learning_rate": 2.196611435436065e-06, "loss": 0.7873, "step": 19280 }, { "epoch": 0.6988148309231271, "grad_norm": 2.361680879526217, "learning_rate": 2.196125458057513e-06, "loss": 0.966, "step": 19281 }, { "epoch": 0.6988510746257838, "grad_norm": 2.5718968411571965, "learning_rate": 2.195639519314658e-06, "loss": 0.8303, "step": 19282 }, { "epoch": 0.6988873183284404, "grad_norm": 2.2073167171523447, "learning_rate": 2.1951536192141924e-06, "loss": 0.706, "step": 19283 }, { "epoch": 0.6989235620310971, "grad_norm": 2.5627047108138346, "learning_rate": 2.194667757762814e-06, "loss": 1.0565, "step": 19284 }, { "epoch": 0.6989598057337537, "grad_norm": 2.4171783991944027, "learning_rate": 2.1941819349672168e-06, "loss": 0.8951, "step": 19285 }, { "epoch": 0.6989960494364105, "grad_norm": 2.310681003741045, "learning_rate": 2.1936961508340976e-06, "loss": 0.8178, "step": 19286 }, { "epoch": 0.6990322931390671, "grad_norm": 2.381278499391437, "learning_rate": 2.193210405370147e-06, "loss": 0.9226, "step": 19287 }, { "epoch": 0.6990685368417238, "grad_norm": 2.4728764471150146, "learning_rate": 2.192724698582059e-06, "loss": 1.002, "step": 19288 }, { "epoch": 0.6991047805443804, "grad_norm": 2.4323206768466004, "learning_rate": 2.1922390304765283e-06, "loss": 0.8979, "step": 19289 }, { "epoch": 0.6991410242470371, "grad_norm": 2.3363802963902964, "learning_rate": 2.191753401060245e-06, "loss": 0.9083, "step": 19290 }, { "epoch": 0.6991772679496937, "grad_norm": 2.247701532332641, "learning_rate": 2.191267810339902e-06, "loss": 1.0518, "step": 19291 }, { "epoch": 0.6992135116523504, "grad_norm": 2.357099247688677, "learning_rate": 2.1907822583221873e-06, "loss": 0.841, "step": 19292 }, { "epoch": 0.6992497553550071, "grad_norm": 2.1978829298117075, "learning_rate": 2.1902967450137973e-06, "loss": 0.8328, "step": 19293 }, { "epoch": 0.6992859990576638, "grad_norm": 2.3385969268558258, "learning_rate": 2.189811270421417e-06, "loss": 0.9295, "step": 19294 }, { "epoch": 0.6993222427603204, "grad_norm": 2.285445649988798, "learning_rate": 2.189325834551738e-06, "loss": 0.9129, "step": 19295 }, { "epoch": 0.699358486462977, "grad_norm": 2.3792021798298197, "learning_rate": 2.188840437411449e-06, "loss": 1.013, "step": 19296 }, { "epoch": 0.6993947301656337, "grad_norm": 2.4715980800204305, "learning_rate": 2.1883550790072407e-06, "loss": 0.8352, "step": 19297 }, { "epoch": 0.6994309738682903, "grad_norm": 2.2051667345954327, "learning_rate": 2.187869759345797e-06, "loss": 0.8238, "step": 19298 }, { "epoch": 0.6994672175709471, "grad_norm": 2.2458433079236104, "learning_rate": 2.1873844784338073e-06, "loss": 0.912, "step": 19299 }, { "epoch": 0.6995034612736037, "grad_norm": 2.436765254406071, "learning_rate": 2.1868992362779606e-06, "loss": 0.8782, "step": 19300 }, { "epoch": 0.6995397049762604, "grad_norm": 2.4297747893368884, "learning_rate": 2.1864140328849385e-06, "loss": 0.7154, "step": 19301 }, { "epoch": 0.699575948678917, "grad_norm": 2.266590637644516, "learning_rate": 2.1859288682614323e-06, "loss": 0.8794, "step": 19302 }, { "epoch": 0.6996121923815737, "grad_norm": 2.0052104439204363, "learning_rate": 2.18544374241412e-06, "loss": 0.9007, "step": 19303 }, { "epoch": 0.6996484360842303, "grad_norm": 2.139668432029418, "learning_rate": 2.184958655349695e-06, "loss": 0.9989, "step": 19304 }, { "epoch": 0.699684679786887, "grad_norm": 2.427389078519496, "learning_rate": 2.1844736070748346e-06, "loss": 0.792, "step": 19305 }, { "epoch": 0.6997209234895437, "grad_norm": 2.413820096015477, "learning_rate": 2.183988597596227e-06, "loss": 0.7904, "step": 19306 }, { "epoch": 0.6997571671922004, "grad_norm": 2.462814778012759, "learning_rate": 2.183503626920552e-06, "loss": 0.9784, "step": 19307 }, { "epoch": 0.699793410894857, "grad_norm": 2.667295582496593, "learning_rate": 2.183018695054494e-06, "loss": 0.7546, "step": 19308 }, { "epoch": 0.6998296545975137, "grad_norm": 2.1492814416414254, "learning_rate": 2.1825338020047365e-06, "loss": 0.6194, "step": 19309 }, { "epoch": 0.6998658983001703, "grad_norm": 2.302955265607805, "learning_rate": 2.1820489477779572e-06, "loss": 0.8629, "step": 19310 }, { "epoch": 0.699902142002827, "grad_norm": 2.1310282393547726, "learning_rate": 2.181564132380839e-06, "loss": 0.8288, "step": 19311 }, { "epoch": 0.6999383857054837, "grad_norm": 2.37023828413001, "learning_rate": 2.1810793558200625e-06, "loss": 0.9215, "step": 19312 }, { "epoch": 0.6999746294081404, "grad_norm": 2.4810803992537727, "learning_rate": 2.18059461810231e-06, "loss": 0.8971, "step": 19313 }, { "epoch": 0.700010873110797, "grad_norm": 2.309364986660595, "learning_rate": 2.180109919234256e-06, "loss": 0.7719, "step": 19314 }, { "epoch": 0.7000471168134537, "grad_norm": 2.708515344733531, "learning_rate": 2.179625259222582e-06, "loss": 1.0261, "step": 19315 }, { "epoch": 0.7000833605161103, "grad_norm": 2.2367152009872453, "learning_rate": 2.1791406380739666e-06, "loss": 0.8644, "step": 19316 }, { "epoch": 0.700119604218767, "grad_norm": 2.3215129950371867, "learning_rate": 2.1786560557950886e-06, "loss": 0.9483, "step": 19317 }, { "epoch": 0.7001558479214236, "grad_norm": 2.3725890632683706, "learning_rate": 2.178171512392623e-06, "loss": 0.8552, "step": 19318 }, { "epoch": 0.7001920916240804, "grad_norm": 2.6443669636793095, "learning_rate": 2.1776870078732464e-06, "loss": 0.9862, "step": 19319 }, { "epoch": 0.700228335326737, "grad_norm": 2.309417720463741, "learning_rate": 2.177202542243638e-06, "loss": 0.9179, "step": 19320 }, { "epoch": 0.7002645790293937, "grad_norm": 2.388006218309734, "learning_rate": 2.1767181155104695e-06, "loss": 0.87, "step": 19321 }, { "epoch": 0.7003008227320503, "grad_norm": 2.3517034593907242, "learning_rate": 2.1762337276804195e-06, "loss": 1.0662, "step": 19322 }, { "epoch": 0.700337066434707, "grad_norm": 2.198726780190799, "learning_rate": 2.175749378760158e-06, "loss": 0.8006, "step": 19323 }, { "epoch": 0.7003733101373636, "grad_norm": 2.5180807424428946, "learning_rate": 2.1752650687563653e-06, "loss": 0.9797, "step": 19324 }, { "epoch": 0.7004095538400203, "grad_norm": 2.1278503298852605, "learning_rate": 2.1747807976757096e-06, "loss": 0.8712, "step": 19325 }, { "epoch": 0.700445797542677, "grad_norm": 2.3309044567458486, "learning_rate": 2.1742965655248684e-06, "loss": 0.8055, "step": 19326 }, { "epoch": 0.7004820412453336, "grad_norm": 2.4422584961638614, "learning_rate": 2.1738123723105094e-06, "loss": 0.9386, "step": 19327 }, { "epoch": 0.7005182849479903, "grad_norm": 3.466062715118186, "learning_rate": 2.1733282180393062e-06, "loss": 0.7497, "step": 19328 }, { "epoch": 0.7005545286506469, "grad_norm": 2.5357257653956045, "learning_rate": 2.1728441027179327e-06, "loss": 0.9669, "step": 19329 }, { "epoch": 0.7005907723533036, "grad_norm": 2.406098773339357, "learning_rate": 2.172360026353056e-06, "loss": 1.0078, "step": 19330 }, { "epoch": 0.7006270160559602, "grad_norm": 2.0961373770211353, "learning_rate": 2.171875988951348e-06, "loss": 0.786, "step": 19331 }, { "epoch": 0.700663259758617, "grad_norm": 2.3729974871949553, "learning_rate": 2.1713919905194786e-06, "loss": 1.0719, "step": 19332 }, { "epoch": 0.7006995034612736, "grad_norm": 2.3362018682397885, "learning_rate": 2.1709080310641185e-06, "loss": 0.8662, "step": 19333 }, { "epoch": 0.7007357471639303, "grad_norm": 2.737290183341792, "learning_rate": 2.1704241105919333e-06, "loss": 0.9577, "step": 19334 }, { "epoch": 0.7007719908665869, "grad_norm": 2.546815923792634, "learning_rate": 2.1699402291095924e-06, "loss": 0.853, "step": 19335 }, { "epoch": 0.7008082345692436, "grad_norm": 2.60524032328367, "learning_rate": 2.169456386623763e-06, "loss": 0.8869, "step": 19336 }, { "epoch": 0.7008444782719002, "grad_norm": 2.2958909516936106, "learning_rate": 2.1689725831411153e-06, "loss": 0.7926, "step": 19337 }, { "epoch": 0.700880721974557, "grad_norm": 2.03685307946221, "learning_rate": 2.168488818668311e-06, "loss": 0.9147, "step": 19338 }, { "epoch": 0.7009169656772136, "grad_norm": 1.9840888766037008, "learning_rate": 2.1680050932120183e-06, "loss": 0.7485, "step": 19339 }, { "epoch": 0.7009532093798703, "grad_norm": 2.2366610212475715, "learning_rate": 2.1675214067789048e-06, "loss": 0.8469, "step": 19340 }, { "epoch": 0.7009894530825269, "grad_norm": 2.296395025363566, "learning_rate": 2.1670377593756316e-06, "loss": 0.8598, "step": 19341 }, { "epoch": 0.7010256967851836, "grad_norm": 2.3862063017052098, "learning_rate": 2.1665541510088663e-06, "loss": 0.9487, "step": 19342 }, { "epoch": 0.7010619404878402, "grad_norm": 2.485164135436032, "learning_rate": 2.1660705816852672e-06, "loss": 0.9943, "step": 19343 }, { "epoch": 0.7010981841904969, "grad_norm": 1.9831404813346802, "learning_rate": 2.1655870514115058e-06, "loss": 0.9225, "step": 19344 }, { "epoch": 0.7011344278931536, "grad_norm": 2.38935288502101, "learning_rate": 2.1651035601942387e-06, "loss": 0.8105, "step": 19345 }, { "epoch": 0.7011706715958103, "grad_norm": 2.39781034323978, "learning_rate": 2.164620108040132e-06, "loss": 0.9582, "step": 19346 }, { "epoch": 0.7012069152984669, "grad_norm": 2.628157966361055, "learning_rate": 2.164136694955843e-06, "loss": 0.9635, "step": 19347 }, { "epoch": 0.7012431590011236, "grad_norm": 2.2546958267240034, "learning_rate": 2.163653320948036e-06, "loss": 0.7508, "step": 19348 }, { "epoch": 0.7012794027037802, "grad_norm": 2.77901493323783, "learning_rate": 2.163169986023372e-06, "loss": 0.9609, "step": 19349 }, { "epoch": 0.7013156464064368, "grad_norm": 2.498655096549305, "learning_rate": 2.1626866901885087e-06, "loss": 0.9541, "step": 19350 }, { "epoch": 0.7013518901090935, "grad_norm": 2.136416510876632, "learning_rate": 2.1622034334501064e-06, "loss": 0.9461, "step": 19351 }, { "epoch": 0.7013881338117502, "grad_norm": 1.9918098057822942, "learning_rate": 2.1617202158148247e-06, "loss": 0.8168, "step": 19352 }, { "epoch": 0.7014243775144069, "grad_norm": 2.6001433810032664, "learning_rate": 2.1612370372893237e-06, "loss": 0.9346, "step": 19353 }, { "epoch": 0.7014606212170635, "grad_norm": 2.3748952052214625, "learning_rate": 2.160753897880258e-06, "loss": 0.8837, "step": 19354 }, { "epoch": 0.7014968649197202, "grad_norm": 2.269681107200833, "learning_rate": 2.160270797594286e-06, "loss": 0.9664, "step": 19355 }, { "epoch": 0.7015331086223768, "grad_norm": 2.2818999857707136, "learning_rate": 2.159787736438067e-06, "loss": 0.8056, "step": 19356 }, { "epoch": 0.7015693523250335, "grad_norm": 2.447958321421328, "learning_rate": 2.159304714418254e-06, "loss": 0.8881, "step": 19357 }, { "epoch": 0.7016055960276902, "grad_norm": 2.1136703899056055, "learning_rate": 2.1588217315415032e-06, "loss": 0.7962, "step": 19358 }, { "epoch": 0.7016418397303469, "grad_norm": 2.03987332738323, "learning_rate": 2.158338787814471e-06, "loss": 0.7482, "step": 19359 }, { "epoch": 0.7016780834330035, "grad_norm": 2.648448414497426, "learning_rate": 2.1578558832438136e-06, "loss": 0.9339, "step": 19360 }, { "epoch": 0.7017143271356602, "grad_norm": 2.651615263407578, "learning_rate": 2.1573730178361813e-06, "loss": 0.9695, "step": 19361 }, { "epoch": 0.7017505708383168, "grad_norm": 2.442862691204894, "learning_rate": 2.1568901915982316e-06, "loss": 0.9525, "step": 19362 }, { "epoch": 0.7017868145409735, "grad_norm": 2.5983043576273914, "learning_rate": 2.156407404536612e-06, "loss": 0.9779, "step": 19363 }, { "epoch": 0.7018230582436301, "grad_norm": 2.3808779835486575, "learning_rate": 2.1559246566579816e-06, "loss": 0.8242, "step": 19364 }, { "epoch": 0.7018593019462869, "grad_norm": 2.31598648098779, "learning_rate": 2.1554419479689887e-06, "loss": 0.9469, "step": 19365 }, { "epoch": 0.7018955456489435, "grad_norm": 2.76870588612355, "learning_rate": 2.154959278476286e-06, "loss": 0.8178, "step": 19366 }, { "epoch": 0.7019317893516002, "grad_norm": 1.8141754461562591, "learning_rate": 2.1544766481865233e-06, "loss": 0.7761, "step": 19367 }, { "epoch": 0.7019680330542568, "grad_norm": 2.333146108532877, "learning_rate": 2.1539940571063505e-06, "loss": 0.9725, "step": 19368 }, { "epoch": 0.7020042767569135, "grad_norm": 2.489679097002163, "learning_rate": 2.1535115052424203e-06, "loss": 0.8045, "step": 19369 }, { "epoch": 0.7020405204595701, "grad_norm": 2.3631739861407244, "learning_rate": 2.1530289926013787e-06, "loss": 0.8031, "step": 19370 }, { "epoch": 0.7020767641622269, "grad_norm": 2.237901782129253, "learning_rate": 2.152546519189876e-06, "loss": 0.6602, "step": 19371 }, { "epoch": 0.7021130078648835, "grad_norm": 2.396033791711508, "learning_rate": 2.152064085014559e-06, "loss": 1.0903, "step": 19372 }, { "epoch": 0.7021492515675402, "grad_norm": 2.282267590310542, "learning_rate": 2.1515816900820798e-06, "loss": 1.011, "step": 19373 }, { "epoch": 0.7021854952701968, "grad_norm": 2.270315151164052, "learning_rate": 2.1510993343990795e-06, "loss": 0.8108, "step": 19374 }, { "epoch": 0.7022217389728534, "grad_norm": 2.13056191431722, "learning_rate": 2.1506170179722075e-06, "loss": 0.6916, "step": 19375 }, { "epoch": 0.7022579826755101, "grad_norm": 2.300534693288545, "learning_rate": 2.150134740808112e-06, "loss": 1.0055, "step": 19376 }, { "epoch": 0.7022942263781667, "grad_norm": 2.265523278804375, "learning_rate": 2.1496525029134333e-06, "loss": 0.8407, "step": 19377 }, { "epoch": 0.7023304700808235, "grad_norm": 2.3432995440676936, "learning_rate": 2.1491703042948204e-06, "loss": 0.8986, "step": 19378 }, { "epoch": 0.7023667137834801, "grad_norm": 2.136380605509507, "learning_rate": 2.1486881449589163e-06, "loss": 0.8606, "step": 19379 }, { "epoch": 0.7024029574861368, "grad_norm": 2.037290226719131, "learning_rate": 2.148206024912367e-06, "loss": 0.6882, "step": 19380 }, { "epoch": 0.7024392011887934, "grad_norm": 2.3138949106005513, "learning_rate": 2.147723944161812e-06, "loss": 0.7621, "step": 19381 }, { "epoch": 0.7024754448914501, "grad_norm": 2.6439620295474415, "learning_rate": 2.1472419027138964e-06, "loss": 0.8009, "step": 19382 }, { "epoch": 0.7025116885941067, "grad_norm": 2.520138133427081, "learning_rate": 2.1467599005752613e-06, "loss": 1.0248, "step": 19383 }, { "epoch": 0.7025479322967635, "grad_norm": 2.4008601973701453, "learning_rate": 2.1462779377525515e-06, "loss": 0.7801, "step": 19384 }, { "epoch": 0.7025841759994201, "grad_norm": 2.6177566388333986, "learning_rate": 2.1457960142524044e-06, "loss": 0.9597, "step": 19385 }, { "epoch": 0.7026204197020768, "grad_norm": 2.4657056405999582, "learning_rate": 2.1453141300814615e-06, "loss": 0.8694, "step": 19386 }, { "epoch": 0.7026566634047334, "grad_norm": 2.4764000669812667, "learning_rate": 2.1448322852463654e-06, "loss": 0.9615, "step": 19387 }, { "epoch": 0.7026929071073901, "grad_norm": 2.213557230788791, "learning_rate": 2.144350479753752e-06, "loss": 0.8816, "step": 19388 }, { "epoch": 0.7027291508100467, "grad_norm": 2.2006575657892693, "learning_rate": 2.1438687136102638e-06, "loss": 0.8203, "step": 19389 }, { "epoch": 0.7027653945127034, "grad_norm": 2.4058016786153673, "learning_rate": 2.143386986822533e-06, "loss": 0.9594, "step": 19390 }, { "epoch": 0.7028016382153601, "grad_norm": 2.651489947157361, "learning_rate": 2.1429052993972065e-06, "loss": 0.8562, "step": 19391 }, { "epoch": 0.7028378819180168, "grad_norm": 2.319802902791772, "learning_rate": 2.1424236513409146e-06, "loss": 0.9672, "step": 19392 }, { "epoch": 0.7028741256206734, "grad_norm": 2.275005626146706, "learning_rate": 2.1419420426602986e-06, "loss": 0.7711, "step": 19393 }, { "epoch": 0.7029103693233301, "grad_norm": 2.224445723821246, "learning_rate": 2.1414604733619916e-06, "loss": 0.8468, "step": 19394 }, { "epoch": 0.7029466130259867, "grad_norm": 2.3462821543812504, "learning_rate": 2.1409789434526297e-06, "loss": 0.8511, "step": 19395 }, { "epoch": 0.7029828567286434, "grad_norm": 2.220826304396509, "learning_rate": 2.1404974529388506e-06, "loss": 0.7821, "step": 19396 }, { "epoch": 0.7030191004313001, "grad_norm": 2.115218827563274, "learning_rate": 2.140016001827286e-06, "loss": 0.9165, "step": 19397 }, { "epoch": 0.7030553441339568, "grad_norm": 2.5342709281000415, "learning_rate": 2.1395345901245705e-06, "loss": 0.8745, "step": 19398 }, { "epoch": 0.7030915878366134, "grad_norm": 2.4725460312921834, "learning_rate": 2.139053217837339e-06, "loss": 0.8946, "step": 19399 }, { "epoch": 0.70312783153927, "grad_norm": 2.313516332771848, "learning_rate": 2.138571884972226e-06, "loss": 0.948, "step": 19400 }, { "epoch": 0.7031640752419267, "grad_norm": 2.4062852713837493, "learning_rate": 2.138090591535859e-06, "loss": 0.9594, "step": 19401 }, { "epoch": 0.7032003189445833, "grad_norm": 2.5425926843839135, "learning_rate": 2.1376093375348737e-06, "loss": 0.8244, "step": 19402 }, { "epoch": 0.70323656264724, "grad_norm": 2.164717404504672, "learning_rate": 2.1371281229759005e-06, "loss": 1.003, "step": 19403 }, { "epoch": 0.7032728063498968, "grad_norm": 2.1818508109957904, "learning_rate": 2.136646947865572e-06, "loss": 0.9705, "step": 19404 }, { "epoch": 0.7033090500525534, "grad_norm": 2.443831851372083, "learning_rate": 2.136165812210515e-06, "loss": 0.8477, "step": 19405 }, { "epoch": 0.70334529375521, "grad_norm": 2.5201304793233708, "learning_rate": 2.135684716017361e-06, "loss": 0.7661, "step": 19406 }, { "epoch": 0.7033815374578667, "grad_norm": 2.177601648854194, "learning_rate": 2.1352036592927415e-06, "loss": 0.9341, "step": 19407 }, { "epoch": 0.7034177811605233, "grad_norm": 2.4654108382565476, "learning_rate": 2.134722642043281e-06, "loss": 0.9418, "step": 19408 }, { "epoch": 0.70345402486318, "grad_norm": 2.1534144762786127, "learning_rate": 2.1342416642756116e-06, "loss": 0.695, "step": 19409 }, { "epoch": 0.7034902685658367, "grad_norm": 2.188730389229267, "learning_rate": 2.133760725996355e-06, "loss": 0.973, "step": 19410 }, { "epoch": 0.7035265122684934, "grad_norm": 2.425242144838659, "learning_rate": 2.133279827212146e-06, "loss": 1.0621, "step": 19411 }, { "epoch": 0.70356275597115, "grad_norm": 2.2526774561524814, "learning_rate": 2.132798967929605e-06, "loss": 0.7556, "step": 19412 }, { "epoch": 0.7035989996738067, "grad_norm": 2.578958405384406, "learning_rate": 2.132318148155363e-06, "loss": 0.9525, "step": 19413 }, { "epoch": 0.7036352433764633, "grad_norm": 2.366298142613373, "learning_rate": 2.1318373678960395e-06, "loss": 0.7579, "step": 19414 }, { "epoch": 0.70367148707912, "grad_norm": 2.5356798260254974, "learning_rate": 2.131356627158263e-06, "loss": 1.0297, "step": 19415 }, { "epoch": 0.7037077307817766, "grad_norm": 2.4281478748946967, "learning_rate": 2.1308759259486596e-06, "loss": 0.9081, "step": 19416 }, { "epoch": 0.7037439744844334, "grad_norm": 2.3083201453526434, "learning_rate": 2.1303952642738484e-06, "loss": 0.9433, "step": 19417 }, { "epoch": 0.70378021818709, "grad_norm": 2.5295555573825417, "learning_rate": 2.1299146421404555e-06, "loss": 1.0489, "step": 19418 }, { "epoch": 0.7038164618897467, "grad_norm": 2.5397280317768876, "learning_rate": 2.129434059555102e-06, "loss": 0.9399, "step": 19419 }, { "epoch": 0.7038527055924033, "grad_norm": 2.4276225999347845, "learning_rate": 2.128953516524414e-06, "loss": 0.7414, "step": 19420 }, { "epoch": 0.70388894929506, "grad_norm": 2.3641928521992237, "learning_rate": 2.128473013055008e-06, "loss": 0.7157, "step": 19421 }, { "epoch": 0.7039251929977166, "grad_norm": 2.2134277339901174, "learning_rate": 2.127992549153507e-06, "loss": 0.9308, "step": 19422 }, { "epoch": 0.7039614367003733, "grad_norm": 2.5092854492162426, "learning_rate": 2.1275121248265317e-06, "loss": 0.8465, "step": 19423 }, { "epoch": 0.70399768040303, "grad_norm": 2.9307519937801843, "learning_rate": 2.1270317400807045e-06, "loss": 0.9549, "step": 19424 }, { "epoch": 0.7040339241056867, "grad_norm": 2.3803622402890734, "learning_rate": 2.12655139492264e-06, "loss": 0.7399, "step": 19425 }, { "epoch": 0.7040701678083433, "grad_norm": 2.2136667111462525, "learning_rate": 2.1260710893589598e-06, "loss": 0.7433, "step": 19426 }, { "epoch": 0.704106411511, "grad_norm": 2.3546627015948385, "learning_rate": 2.1255908233962834e-06, "loss": 0.9476, "step": 19427 }, { "epoch": 0.7041426552136566, "grad_norm": 2.450012533581572, "learning_rate": 2.1251105970412255e-06, "loss": 1.0368, "step": 19428 }, { "epoch": 0.7041788989163132, "grad_norm": 2.5494811412314577, "learning_rate": 2.1246304103004065e-06, "loss": 0.8625, "step": 19429 }, { "epoch": 0.70421514261897, "grad_norm": 2.434511541516852, "learning_rate": 2.1241502631804384e-06, "loss": 0.9691, "step": 19430 }, { "epoch": 0.7042513863216266, "grad_norm": 2.4017932872319387, "learning_rate": 2.1236701556879436e-06, "loss": 0.9144, "step": 19431 }, { "epoch": 0.7042876300242833, "grad_norm": 2.5962986543738444, "learning_rate": 2.1231900878295332e-06, "loss": 0.9391, "step": 19432 }, { "epoch": 0.7043238737269399, "grad_norm": 2.5457427706474998, "learning_rate": 2.1227100596118244e-06, "loss": 1.0249, "step": 19433 }, { "epoch": 0.7043601174295966, "grad_norm": 2.1441653153894293, "learning_rate": 2.1222300710414293e-06, "loss": 0.9017, "step": 19434 }, { "epoch": 0.7043963611322532, "grad_norm": 2.4625555217430826, "learning_rate": 2.1217501221249643e-06, "loss": 1.0669, "step": 19435 }, { "epoch": 0.7044326048349099, "grad_norm": 2.3182628504142504, "learning_rate": 2.1212702128690425e-06, "loss": 0.8606, "step": 19436 }, { "epoch": 0.7044688485375666, "grad_norm": 2.441752861837371, "learning_rate": 2.120790343280275e-06, "loss": 0.9939, "step": 19437 }, { "epoch": 0.7045050922402233, "grad_norm": 2.392546190207032, "learning_rate": 2.1203105133652755e-06, "loss": 0.8357, "step": 19438 }, { "epoch": 0.7045413359428799, "grad_norm": 2.233386783627706, "learning_rate": 2.1198307231306554e-06, "loss": 0.9329, "step": 19439 }, { "epoch": 0.7045775796455366, "grad_norm": 2.3182828867924736, "learning_rate": 2.1193509725830276e-06, "loss": 0.8775, "step": 19440 }, { "epoch": 0.7046138233481932, "grad_norm": 2.1045906888203074, "learning_rate": 2.1188712617289997e-06, "loss": 0.8689, "step": 19441 }, { "epoch": 0.7046500670508499, "grad_norm": 2.0590523392216578, "learning_rate": 2.118391590575183e-06, "loss": 0.7762, "step": 19442 }, { "epoch": 0.7046863107535066, "grad_norm": 2.736177724906227, "learning_rate": 2.117911959128188e-06, "loss": 0.9175, "step": 19443 }, { "epoch": 0.7047225544561633, "grad_norm": 2.2781121610317507, "learning_rate": 2.1174323673946246e-06, "loss": 0.6875, "step": 19444 }, { "epoch": 0.7047587981588199, "grad_norm": 2.2619537795217375, "learning_rate": 2.1169528153810973e-06, "loss": 1.2725, "step": 19445 }, { "epoch": 0.7047950418614766, "grad_norm": 2.270813080696126, "learning_rate": 2.1164733030942182e-06, "loss": 0.8913, "step": 19446 }, { "epoch": 0.7048312855641332, "grad_norm": 2.3066006932794645, "learning_rate": 2.115993830540594e-06, "loss": 1.0972, "step": 19447 }, { "epoch": 0.7048675292667899, "grad_norm": 2.324968169622971, "learning_rate": 2.1155143977268287e-06, "loss": 0.9019, "step": 19448 }, { "epoch": 0.7049037729694465, "grad_norm": 2.5065110749855606, "learning_rate": 2.1150350046595325e-06, "loss": 1.0148, "step": 19449 }, { "epoch": 0.7049400166721033, "grad_norm": 2.3965380023447054, "learning_rate": 2.1145556513453055e-06, "loss": 1.0093, "step": 19450 }, { "epoch": 0.7049762603747599, "grad_norm": 2.2243335630611067, "learning_rate": 2.11407633779076e-06, "loss": 0.8792, "step": 19451 }, { "epoch": 0.7050125040774166, "grad_norm": 2.228147397650663, "learning_rate": 2.113597064002496e-06, "loss": 1.0081, "step": 19452 }, { "epoch": 0.7050487477800732, "grad_norm": 2.361507439507206, "learning_rate": 2.1131178299871203e-06, "loss": 0.8226, "step": 19453 }, { "epoch": 0.7050849914827299, "grad_norm": 2.269594588777261, "learning_rate": 2.112638635751233e-06, "loss": 0.8116, "step": 19454 }, { "epoch": 0.7051212351853865, "grad_norm": 2.248239182354475, "learning_rate": 2.112159481301439e-06, "loss": 0.7929, "step": 19455 }, { "epoch": 0.7051574788880433, "grad_norm": 2.184490536596799, "learning_rate": 2.1116803666443432e-06, "loss": 0.8829, "step": 19456 }, { "epoch": 0.7051937225906999, "grad_norm": 2.425035724662839, "learning_rate": 2.111201291786541e-06, "loss": 0.9226, "step": 19457 }, { "epoch": 0.7052299662933565, "grad_norm": 2.283291098787169, "learning_rate": 2.110722256734642e-06, "loss": 1.0847, "step": 19458 }, { "epoch": 0.7052662099960132, "grad_norm": 2.976072156424299, "learning_rate": 2.1102432614952405e-06, "loss": 1.0603, "step": 19459 }, { "epoch": 0.7053024536986698, "grad_norm": 2.2616566604810697, "learning_rate": 2.1097643060749417e-06, "loss": 0.8665, "step": 19460 }, { "epoch": 0.7053386974013265, "grad_norm": 2.3671867319492246, "learning_rate": 2.1092853904803405e-06, "loss": 0.7097, "step": 19461 }, { "epoch": 0.7053749411039831, "grad_norm": 2.4933703814310575, "learning_rate": 2.1088065147180385e-06, "loss": 0.7993, "step": 19462 }, { "epoch": 0.7054111848066399, "grad_norm": 2.4374315376638074, "learning_rate": 2.108327678794636e-06, "loss": 0.9287, "step": 19463 }, { "epoch": 0.7054474285092965, "grad_norm": 2.086832544140306, "learning_rate": 2.1078488827167276e-06, "loss": 0.8312, "step": 19464 }, { "epoch": 0.7054836722119532, "grad_norm": 2.260215760073215, "learning_rate": 2.1073701264909117e-06, "loss": 0.803, "step": 19465 }, { "epoch": 0.7055199159146098, "grad_norm": 2.237292197563653, "learning_rate": 2.1068914101237867e-06, "loss": 0.9575, "step": 19466 }, { "epoch": 0.7055561596172665, "grad_norm": 2.447331025490369, "learning_rate": 2.1064127336219502e-06, "loss": 0.7059, "step": 19467 }, { "epoch": 0.7055924033199231, "grad_norm": 2.2555129585250246, "learning_rate": 2.1059340969919946e-06, "loss": 0.9596, "step": 19468 }, { "epoch": 0.7056286470225799, "grad_norm": 1.9849898681751066, "learning_rate": 2.105455500240517e-06, "loss": 0.7769, "step": 19469 }, { "epoch": 0.7056648907252365, "grad_norm": 2.5054081456452146, "learning_rate": 2.1049769433741112e-06, "loss": 0.9905, "step": 19470 }, { "epoch": 0.7057011344278932, "grad_norm": 2.025645339840253, "learning_rate": 2.104498426399375e-06, "loss": 0.7463, "step": 19471 }, { "epoch": 0.7057373781305498, "grad_norm": 2.249117645906064, "learning_rate": 2.104019949322897e-06, "loss": 0.8598, "step": 19472 }, { "epoch": 0.7057736218332065, "grad_norm": 2.797942179681066, "learning_rate": 2.1035415121512725e-06, "loss": 1.0343, "step": 19473 }, { "epoch": 0.7058098655358631, "grad_norm": 2.17996553603041, "learning_rate": 2.103063114891097e-06, "loss": 0.847, "step": 19474 }, { "epoch": 0.7058461092385198, "grad_norm": 2.6885087142876465, "learning_rate": 2.1025847575489566e-06, "loss": 0.8659, "step": 19475 }, { "epoch": 0.7058823529411765, "grad_norm": 2.34921329382432, "learning_rate": 2.1021064401314487e-06, "loss": 0.9708, "step": 19476 }, { "epoch": 0.7059185966438332, "grad_norm": 2.3700825505799594, "learning_rate": 2.1016281626451573e-06, "loss": 0.875, "step": 19477 }, { "epoch": 0.7059548403464898, "grad_norm": 2.17626022798677, "learning_rate": 2.101149925096681e-06, "loss": 0.9148, "step": 19478 }, { "epoch": 0.7059910840491465, "grad_norm": 2.872453937855971, "learning_rate": 2.1006717274926035e-06, "loss": 1.0016, "step": 19479 }, { "epoch": 0.7060273277518031, "grad_norm": 2.381093327864776, "learning_rate": 2.100193569839518e-06, "loss": 0.851, "step": 19480 }, { "epoch": 0.7060635714544597, "grad_norm": 2.3940387512366392, "learning_rate": 2.09971545214401e-06, "loss": 0.8176, "step": 19481 }, { "epoch": 0.7060998151571165, "grad_norm": 2.2668353244751454, "learning_rate": 2.0992373744126685e-06, "loss": 0.6902, "step": 19482 }, { "epoch": 0.7061360588597732, "grad_norm": 2.61847646629737, "learning_rate": 2.0987593366520837e-06, "loss": 0.8703, "step": 19483 }, { "epoch": 0.7061723025624298, "grad_norm": 2.53473250101718, "learning_rate": 2.0982813388688383e-06, "loss": 1.0239, "step": 19484 }, { "epoch": 0.7062085462650864, "grad_norm": 2.468306204682337, "learning_rate": 2.097803381069522e-06, "loss": 0.9507, "step": 19485 }, { "epoch": 0.7062447899677431, "grad_norm": 2.1758953169026136, "learning_rate": 2.0973254632607192e-06, "loss": 0.84, "step": 19486 }, { "epoch": 0.7062810336703997, "grad_norm": 2.2275548075909426, "learning_rate": 2.0968475854490182e-06, "loss": 0.8336, "step": 19487 }, { "epoch": 0.7063172773730564, "grad_norm": 1.8367442082408065, "learning_rate": 2.096369747641e-06, "loss": 0.6993, "step": 19488 }, { "epoch": 0.7063535210757131, "grad_norm": 2.3038237044052745, "learning_rate": 2.09589194984325e-06, "loss": 1.0071, "step": 19489 }, { "epoch": 0.7063897647783698, "grad_norm": 2.2268442168386913, "learning_rate": 2.095414192062353e-06, "loss": 0.8606, "step": 19490 }, { "epoch": 0.7064260084810264, "grad_norm": 2.424017650143652, "learning_rate": 2.0949364743048938e-06, "loss": 0.9149, "step": 19491 }, { "epoch": 0.7064622521836831, "grad_norm": 2.4904811504279225, "learning_rate": 2.094458796577451e-06, "loss": 0.9049, "step": 19492 }, { "epoch": 0.7064984958863397, "grad_norm": 2.329535608424463, "learning_rate": 2.0939811588866087e-06, "loss": 0.6984, "step": 19493 }, { "epoch": 0.7065347395889964, "grad_norm": 2.557199537348061, "learning_rate": 2.093503561238951e-06, "loss": 0.8276, "step": 19494 }, { "epoch": 0.706570983291653, "grad_norm": 2.427971028100942, "learning_rate": 2.0930260036410537e-06, "loss": 0.7739, "step": 19495 }, { "epoch": 0.7066072269943098, "grad_norm": 2.370372527861901, "learning_rate": 2.092548486099502e-06, "loss": 0.8377, "step": 19496 }, { "epoch": 0.7066434706969664, "grad_norm": 2.2855765079116304, "learning_rate": 2.0920710086208706e-06, "loss": 0.9507, "step": 19497 }, { "epoch": 0.7066797143996231, "grad_norm": 2.2541759431131485, "learning_rate": 2.091593571211745e-06, "loss": 0.9193, "step": 19498 }, { "epoch": 0.7067159581022797, "grad_norm": 2.153940721510743, "learning_rate": 2.0911161738787e-06, "loss": 0.8861, "step": 19499 }, { "epoch": 0.7067522018049364, "grad_norm": 2.386978626228134, "learning_rate": 2.0906388166283165e-06, "loss": 0.9826, "step": 19500 }, { "epoch": 0.706788445507593, "grad_norm": 2.3371034656821994, "learning_rate": 2.090161499467169e-06, "loss": 1.0225, "step": 19501 }, { "epoch": 0.7068246892102498, "grad_norm": 2.549852735916339, "learning_rate": 2.0896842224018356e-06, "loss": 0.9617, "step": 19502 }, { "epoch": 0.7068609329129064, "grad_norm": 2.0962768557756095, "learning_rate": 2.0892069854388953e-06, "loss": 0.8363, "step": 19503 }, { "epoch": 0.7068971766155631, "grad_norm": 2.35109463595668, "learning_rate": 2.088729788584921e-06, "loss": 0.7668, "step": 19504 }, { "epoch": 0.7069334203182197, "grad_norm": 2.437228968528164, "learning_rate": 2.088252631846489e-06, "loss": 0.8954, "step": 19505 }, { "epoch": 0.7069696640208764, "grad_norm": 2.4950758343422357, "learning_rate": 2.0877755152301752e-06, "loss": 1.1806, "step": 19506 }, { "epoch": 0.707005907723533, "grad_norm": 2.55553618181808, "learning_rate": 2.0872984387425556e-06, "loss": 0.8327, "step": 19507 }, { "epoch": 0.7070421514261896, "grad_norm": 2.3441380724396845, "learning_rate": 2.0868214023901993e-06, "loss": 0.9379, "step": 19508 }, { "epoch": 0.7070783951288464, "grad_norm": 2.127754302625407, "learning_rate": 2.0863444061796834e-06, "loss": 1.0387, "step": 19509 }, { "epoch": 0.707114638831503, "grad_norm": 2.237103927277489, "learning_rate": 2.0858674501175784e-06, "loss": 1.0135, "step": 19510 }, { "epoch": 0.7071508825341597, "grad_norm": 2.412055950578592, "learning_rate": 2.0853905342104595e-06, "loss": 0.9777, "step": 19511 }, { "epoch": 0.7071871262368163, "grad_norm": 2.2744108275003754, "learning_rate": 2.0849136584648957e-06, "loss": 0.8235, "step": 19512 }, { "epoch": 0.707223369939473, "grad_norm": 2.467661864055684, "learning_rate": 2.0844368228874574e-06, "loss": 0.9301, "step": 19513 }, { "epoch": 0.7072596136421296, "grad_norm": 2.543843533460774, "learning_rate": 2.0839600274847182e-06, "loss": 0.8521, "step": 19514 }, { "epoch": 0.7072958573447864, "grad_norm": 2.2541189017552115, "learning_rate": 2.0834832722632455e-06, "loss": 1.0079, "step": 19515 }, { "epoch": 0.707332101047443, "grad_norm": 2.6106953165319595, "learning_rate": 2.0830065572296105e-06, "loss": 1.0096, "step": 19516 }, { "epoch": 0.7073683447500997, "grad_norm": 2.213928264475137, "learning_rate": 2.0825298823903773e-06, "loss": 0.8472, "step": 19517 }, { "epoch": 0.7074045884527563, "grad_norm": 2.295471741127666, "learning_rate": 2.0820532477521215e-06, "loss": 0.8212, "step": 19518 }, { "epoch": 0.707440832155413, "grad_norm": 2.5575097065300088, "learning_rate": 2.081576653321406e-06, "loss": 0.9985, "step": 19519 }, { "epoch": 0.7074770758580696, "grad_norm": 2.4394249966204997, "learning_rate": 2.0811000991048003e-06, "loss": 0.879, "step": 19520 }, { "epoch": 0.7075133195607263, "grad_norm": 2.304103723075624, "learning_rate": 2.0806235851088687e-06, "loss": 1.0123, "step": 19521 }, { "epoch": 0.707549563263383, "grad_norm": 2.185167892850793, "learning_rate": 2.0801471113401783e-06, "loss": 0.9057, "step": 19522 }, { "epoch": 0.7075858069660397, "grad_norm": 2.214204009434966, "learning_rate": 2.0796706778052967e-06, "loss": 0.8676, "step": 19523 }, { "epoch": 0.7076220506686963, "grad_norm": 2.436497866899841, "learning_rate": 2.079194284510785e-06, "loss": 0.8298, "step": 19524 }, { "epoch": 0.707658294371353, "grad_norm": 2.604983532234447, "learning_rate": 2.0787179314632093e-06, "loss": 0.935, "step": 19525 }, { "epoch": 0.7076945380740096, "grad_norm": 2.2835623628284862, "learning_rate": 2.0782416186691347e-06, "loss": 0.9474, "step": 19526 }, { "epoch": 0.7077307817766663, "grad_norm": 2.4536296290632875, "learning_rate": 2.0777653461351247e-06, "loss": 0.8004, "step": 19527 }, { "epoch": 0.707767025479323, "grad_norm": 2.2492387588115434, "learning_rate": 2.0772891138677393e-06, "loss": 1.0865, "step": 19528 }, { "epoch": 0.7078032691819797, "grad_norm": 2.3886424873892156, "learning_rate": 2.076812921873542e-06, "loss": 0.7004, "step": 19529 }, { "epoch": 0.7078395128846363, "grad_norm": 2.266319450394653, "learning_rate": 2.0763367701590953e-06, "loss": 0.8097, "step": 19530 }, { "epoch": 0.707875756587293, "grad_norm": 2.526940873122614, "learning_rate": 2.0758606587309616e-06, "loss": 0.8899, "step": 19531 }, { "epoch": 0.7079120002899496, "grad_norm": 2.3036831167047573, "learning_rate": 2.0753845875956973e-06, "loss": 0.7703, "step": 19532 }, { "epoch": 0.7079482439926063, "grad_norm": 2.357844034430513, "learning_rate": 2.0749085567598647e-06, "loss": 0.9446, "step": 19533 }, { "epoch": 0.7079844876952629, "grad_norm": 2.332803175117625, "learning_rate": 2.074432566230025e-06, "loss": 0.9083, "step": 19534 }, { "epoch": 0.7080207313979197, "grad_norm": 2.136049299102, "learning_rate": 2.073956616012733e-06, "loss": 0.7579, "step": 19535 }, { "epoch": 0.7080569751005763, "grad_norm": 2.477873746797202, "learning_rate": 2.0734807061145517e-06, "loss": 0.7338, "step": 19536 }, { "epoch": 0.708093218803233, "grad_norm": 2.3472762207807563, "learning_rate": 2.073004836542033e-06, "loss": 0.9582, "step": 19537 }, { "epoch": 0.7081294625058896, "grad_norm": 2.8871648157042866, "learning_rate": 2.0725290073017406e-06, "loss": 0.8913, "step": 19538 }, { "epoch": 0.7081657062085462, "grad_norm": 2.4262181278193036, "learning_rate": 2.0720532184002264e-06, "loss": 0.7689, "step": 19539 }, { "epoch": 0.7082019499112029, "grad_norm": 2.1228546192004405, "learning_rate": 2.0715774698440477e-06, "loss": 0.7082, "step": 19540 }, { "epoch": 0.7082381936138596, "grad_norm": 2.392634535828549, "learning_rate": 2.071101761639762e-06, "loss": 0.9986, "step": 19541 }, { "epoch": 0.7082744373165163, "grad_norm": 2.3237371160063676, "learning_rate": 2.0706260937939217e-06, "loss": 0.9475, "step": 19542 }, { "epoch": 0.7083106810191729, "grad_norm": 2.414164902253541, "learning_rate": 2.0701504663130835e-06, "loss": 1.029, "step": 19543 }, { "epoch": 0.7083469247218296, "grad_norm": 2.431756146981318, "learning_rate": 2.0696748792037964e-06, "loss": 1.0367, "step": 19544 }, { "epoch": 0.7083831684244862, "grad_norm": 2.3865221635895284, "learning_rate": 2.069199332472621e-06, "loss": 0.7943, "step": 19545 }, { "epoch": 0.7084194121271429, "grad_norm": 2.4593317997582402, "learning_rate": 2.0687238261261037e-06, "loss": 0.9183, "step": 19546 }, { "epoch": 0.7084556558297995, "grad_norm": 2.2384399419302046, "learning_rate": 2.068248360170802e-06, "loss": 0.9021, "step": 19547 }, { "epoch": 0.7084918995324563, "grad_norm": 2.0696287729300114, "learning_rate": 2.067772934613263e-06, "loss": 0.8931, "step": 19548 }, { "epoch": 0.7085281432351129, "grad_norm": 2.3909187815660955, "learning_rate": 2.0672975494600393e-06, "loss": 0.814, "step": 19549 }, { "epoch": 0.7085643869377696, "grad_norm": 2.6308707302838843, "learning_rate": 2.0668222047176837e-06, "loss": 0.9763, "step": 19550 }, { "epoch": 0.7086006306404262, "grad_norm": 2.2202267000388956, "learning_rate": 2.066346900392743e-06, "loss": 0.8755, "step": 19551 }, { "epoch": 0.7086368743430829, "grad_norm": 2.308820150951536, "learning_rate": 2.065871636491767e-06, "loss": 0.9334, "step": 19552 }, { "epoch": 0.7086731180457395, "grad_norm": 2.379601537744596, "learning_rate": 2.065396413021305e-06, "loss": 0.9223, "step": 19553 }, { "epoch": 0.7087093617483963, "grad_norm": 2.5176461868503774, "learning_rate": 2.064921229987908e-06, "loss": 0.7686, "step": 19554 }, { "epoch": 0.7087456054510529, "grad_norm": 2.565945096532041, "learning_rate": 2.06444608739812e-06, "loss": 0.8983, "step": 19555 }, { "epoch": 0.7087818491537096, "grad_norm": 2.0770392213981785, "learning_rate": 2.0639709852584888e-06, "loss": 0.7403, "step": 19556 }, { "epoch": 0.7088180928563662, "grad_norm": 2.423052094426764, "learning_rate": 2.063495923575562e-06, "loss": 0.953, "step": 19557 }, { "epoch": 0.7088543365590229, "grad_norm": 2.130045692652413, "learning_rate": 2.0630209023558875e-06, "loss": 0.8632, "step": 19558 }, { "epoch": 0.7088905802616795, "grad_norm": 2.577764807761935, "learning_rate": 2.062545921606006e-06, "loss": 0.9432, "step": 19559 }, { "epoch": 0.7089268239643362, "grad_norm": 2.315451404074695, "learning_rate": 2.062070981332466e-06, "loss": 1.1039, "step": 19560 }, { "epoch": 0.7089630676669929, "grad_norm": 2.7154233844886866, "learning_rate": 2.061596081541813e-06, "loss": 1.0207, "step": 19561 }, { "epoch": 0.7089993113696496, "grad_norm": 2.2840907167752382, "learning_rate": 2.0611212222405867e-06, "loss": 0.942, "step": 19562 }, { "epoch": 0.7090355550723062, "grad_norm": 2.1925795626081825, "learning_rate": 2.060646403435335e-06, "loss": 0.7912, "step": 19563 }, { "epoch": 0.7090717987749628, "grad_norm": 2.424073027685776, "learning_rate": 2.0601716251325938e-06, "loss": 0.8047, "step": 19564 }, { "epoch": 0.7091080424776195, "grad_norm": 2.310737124043424, "learning_rate": 2.0596968873389133e-06, "loss": 0.7223, "step": 19565 }, { "epoch": 0.7091442861802761, "grad_norm": 2.5630282251808576, "learning_rate": 2.05922219006083e-06, "loss": 0.8937, "step": 19566 }, { "epoch": 0.7091805298829328, "grad_norm": 2.539122113097415, "learning_rate": 2.0587475333048885e-06, "loss": 1.0177, "step": 19567 }, { "epoch": 0.7092167735855895, "grad_norm": 2.2409901242036465, "learning_rate": 2.058272917077625e-06, "loss": 0.8505, "step": 19568 }, { "epoch": 0.7092530172882462, "grad_norm": 2.347611816175291, "learning_rate": 2.057798341385581e-06, "loss": 0.907, "step": 19569 }, { "epoch": 0.7092892609909028, "grad_norm": 2.5217044690046326, "learning_rate": 2.0573238062352998e-06, "loss": 0.9707, "step": 19570 }, { "epoch": 0.7093255046935595, "grad_norm": 2.3396339968059756, "learning_rate": 2.0568493116333136e-06, "loss": 0.782, "step": 19571 }, { "epoch": 0.7093617483962161, "grad_norm": 2.269692363096086, "learning_rate": 2.0563748575861647e-06, "loss": 0.7891, "step": 19572 }, { "epoch": 0.7093979920988728, "grad_norm": 2.1722627758509105, "learning_rate": 2.05590044410039e-06, "loss": 0.9343, "step": 19573 }, { "epoch": 0.7094342358015295, "grad_norm": 2.3640156376648758, "learning_rate": 2.055426071182528e-06, "loss": 0.7821, "step": 19574 }, { "epoch": 0.7094704795041862, "grad_norm": 2.4641210918371415, "learning_rate": 2.054951738839112e-06, "loss": 0.8258, "step": 19575 }, { "epoch": 0.7095067232068428, "grad_norm": 2.2485944092182737, "learning_rate": 2.0544774470766802e-06, "loss": 0.7911, "step": 19576 }, { "epoch": 0.7095429669094995, "grad_norm": 2.4157293235780024, "learning_rate": 2.054003195901768e-06, "loss": 0.854, "step": 19577 }, { "epoch": 0.7095792106121561, "grad_norm": 2.351191790718827, "learning_rate": 2.0535289853209123e-06, "loss": 0.7656, "step": 19578 }, { "epoch": 0.7096154543148128, "grad_norm": 2.52448826988237, "learning_rate": 2.0530548153406427e-06, "loss": 0.9927, "step": 19579 }, { "epoch": 0.7096516980174694, "grad_norm": 2.723934821228579, "learning_rate": 2.0525806859674956e-06, "loss": 0.9045, "step": 19580 }, { "epoch": 0.7096879417201262, "grad_norm": 2.1860532424094976, "learning_rate": 2.0521065972080062e-06, "loss": 0.863, "step": 19581 }, { "epoch": 0.7097241854227828, "grad_norm": 2.1108759337552407, "learning_rate": 2.0516325490687026e-06, "loss": 0.8363, "step": 19582 }, { "epoch": 0.7097604291254395, "grad_norm": 2.55555519095422, "learning_rate": 2.051158541556122e-06, "loss": 0.9547, "step": 19583 }, { "epoch": 0.7097966728280961, "grad_norm": 2.153111431607596, "learning_rate": 2.050684574676789e-06, "loss": 0.9858, "step": 19584 }, { "epoch": 0.7098329165307528, "grad_norm": 2.346614789260604, "learning_rate": 2.050210648437243e-06, "loss": 0.8928, "step": 19585 }, { "epoch": 0.7098691602334094, "grad_norm": 2.058879336595925, "learning_rate": 2.049736762844008e-06, "loss": 0.8213, "step": 19586 }, { "epoch": 0.7099054039360662, "grad_norm": 2.408781958358112, "learning_rate": 2.049262917903618e-06, "loss": 0.9213, "step": 19587 }, { "epoch": 0.7099416476387228, "grad_norm": 2.2925596199540204, "learning_rate": 2.048789113622599e-06, "loss": 0.8376, "step": 19588 }, { "epoch": 0.7099778913413795, "grad_norm": 2.7491730636574827, "learning_rate": 2.048315350007481e-06, "loss": 0.9676, "step": 19589 }, { "epoch": 0.7100141350440361, "grad_norm": 2.3658497001363488, "learning_rate": 2.0478416270647942e-06, "loss": 0.8642, "step": 19590 }, { "epoch": 0.7100503787466927, "grad_norm": 2.163032784656564, "learning_rate": 2.0473679448010626e-06, "loss": 0.7703, "step": 19591 }, { "epoch": 0.7100866224493494, "grad_norm": 2.226921338389479, "learning_rate": 2.046894303222815e-06, "loss": 0.8319, "step": 19592 }, { "epoch": 0.710122866152006, "grad_norm": 2.3986271639887806, "learning_rate": 2.046420702336578e-06, "loss": 1.0188, "step": 19593 }, { "epoch": 0.7101591098546628, "grad_norm": 2.5327770998169576, "learning_rate": 2.045947142148879e-06, "loss": 0.8884, "step": 19594 }, { "epoch": 0.7101953535573194, "grad_norm": 2.5855072862754716, "learning_rate": 2.04547362266624e-06, "loss": 0.9651, "step": 19595 }, { "epoch": 0.7102315972599761, "grad_norm": 2.1085170195390943, "learning_rate": 2.045000143895188e-06, "loss": 0.7641, "step": 19596 }, { "epoch": 0.7102678409626327, "grad_norm": 2.312227520526137, "learning_rate": 2.044526705842247e-06, "loss": 0.7747, "step": 19597 }, { "epoch": 0.7103040846652894, "grad_norm": 2.5481036014752165, "learning_rate": 2.044053308513942e-06, "loss": 0.9808, "step": 19598 }, { "epoch": 0.710340328367946, "grad_norm": 2.4098989656312177, "learning_rate": 2.0435799519167925e-06, "loss": 0.8905, "step": 19599 }, { "epoch": 0.7103765720706028, "grad_norm": 2.3883238786913514, "learning_rate": 2.043106636057324e-06, "loss": 0.7022, "step": 19600 }, { "epoch": 0.7104128157732594, "grad_norm": 2.6648135779518767, "learning_rate": 2.0426333609420595e-06, "loss": 1.0248, "step": 19601 }, { "epoch": 0.7104490594759161, "grad_norm": 2.481683067123224, "learning_rate": 2.0421601265775165e-06, "loss": 0.9155, "step": 19602 }, { "epoch": 0.7104853031785727, "grad_norm": 2.1670228412043153, "learning_rate": 2.0416869329702207e-06, "loss": 0.8258, "step": 19603 }, { "epoch": 0.7105215468812294, "grad_norm": 2.5492697242809252, "learning_rate": 2.0412137801266857e-06, "loss": 0.9089, "step": 19604 }, { "epoch": 0.710557790583886, "grad_norm": 2.415265575036667, "learning_rate": 2.040740668053439e-06, "loss": 0.7829, "step": 19605 }, { "epoch": 0.7105940342865427, "grad_norm": 2.450537981187464, "learning_rate": 2.040267596756994e-06, "loss": 0.9388, "step": 19606 }, { "epoch": 0.7106302779891994, "grad_norm": 2.3160486343887943, "learning_rate": 2.039794566243874e-06, "loss": 0.8338, "step": 19607 }, { "epoch": 0.7106665216918561, "grad_norm": 2.363729063612615, "learning_rate": 2.0393215765205924e-06, "loss": 0.9506, "step": 19608 }, { "epoch": 0.7107027653945127, "grad_norm": 2.6730257349246536, "learning_rate": 2.038848627593669e-06, "loss": 0.8112, "step": 19609 }, { "epoch": 0.7107390090971694, "grad_norm": 2.480920248653496, "learning_rate": 2.0383757194696225e-06, "loss": 0.9476, "step": 19610 }, { "epoch": 0.710775252799826, "grad_norm": 2.395350937302742, "learning_rate": 2.037902852154966e-06, "loss": 0.846, "step": 19611 }, { "epoch": 0.7108114965024827, "grad_norm": 2.293429370001345, "learning_rate": 2.0374300256562165e-06, "loss": 0.7893, "step": 19612 }, { "epoch": 0.7108477402051394, "grad_norm": 2.533018775952912, "learning_rate": 2.0369572399798894e-06, "loss": 0.908, "step": 19613 }, { "epoch": 0.7108839839077961, "grad_norm": 2.245188249956022, "learning_rate": 2.0364844951325015e-06, "loss": 0.8848, "step": 19614 }, { "epoch": 0.7109202276104527, "grad_norm": 2.5062524195414113, "learning_rate": 2.036011791120563e-06, "loss": 0.9778, "step": 19615 }, { "epoch": 0.7109564713131094, "grad_norm": 2.48450056176942, "learning_rate": 2.0355391279505895e-06, "loss": 0.8344, "step": 19616 }, { "epoch": 0.710992715015766, "grad_norm": 1.9637100654555273, "learning_rate": 2.035066505629094e-06, "loss": 0.8438, "step": 19617 }, { "epoch": 0.7110289587184226, "grad_norm": 2.115126823359519, "learning_rate": 2.0345939241625905e-06, "loss": 1.0218, "step": 19618 }, { "epoch": 0.7110652024210793, "grad_norm": 2.351695120966483, "learning_rate": 2.034121383557588e-06, "loss": 0.8619, "step": 19619 }, { "epoch": 0.711101446123736, "grad_norm": 2.109595717468146, "learning_rate": 2.0336488838205986e-06, "loss": 0.8011, "step": 19620 }, { "epoch": 0.7111376898263927, "grad_norm": 2.5863030681050407, "learning_rate": 2.033176424958136e-06, "loss": 0.9435, "step": 19621 }, { "epoch": 0.7111739335290493, "grad_norm": 2.3089118596163325, "learning_rate": 2.0327040069767056e-06, "loss": 0.7272, "step": 19622 }, { "epoch": 0.711210177231706, "grad_norm": 2.1023150141709133, "learning_rate": 2.0322316298828197e-06, "loss": 0.7074, "step": 19623 }, { "epoch": 0.7112464209343626, "grad_norm": 2.3593025486966894, "learning_rate": 2.031759293682987e-06, "loss": 0.9295, "step": 19624 }, { "epoch": 0.7112826646370193, "grad_norm": 2.230311067847102, "learning_rate": 2.031286998383718e-06, "loss": 0.9469, "step": 19625 }, { "epoch": 0.7113189083396759, "grad_norm": 2.466747335588933, "learning_rate": 2.030814743991517e-06, "loss": 0.8737, "step": 19626 }, { "epoch": 0.7113551520423327, "grad_norm": 2.4398302529887843, "learning_rate": 2.030342530512893e-06, "loss": 0.8718, "step": 19627 }, { "epoch": 0.7113913957449893, "grad_norm": 2.4427424062175493, "learning_rate": 2.0298703579543548e-06, "loss": 0.9145, "step": 19628 }, { "epoch": 0.711427639447646, "grad_norm": 2.5377958825123446, "learning_rate": 2.029398226322405e-06, "loss": 0.9617, "step": 19629 }, { "epoch": 0.7114638831503026, "grad_norm": 2.2857479483713616, "learning_rate": 2.028926135623553e-06, "loss": 0.8273, "step": 19630 }, { "epoch": 0.7115001268529593, "grad_norm": 2.459824522105317, "learning_rate": 2.028454085864298e-06, "loss": 0.8741, "step": 19631 }, { "epoch": 0.7115363705556159, "grad_norm": 2.6895458936523644, "learning_rate": 2.0279820770511527e-06, "loss": 0.9209, "step": 19632 }, { "epoch": 0.7115726142582727, "grad_norm": 2.4121649505216456, "learning_rate": 2.027510109190615e-06, "loss": 0.8968, "step": 19633 }, { "epoch": 0.7116088579609293, "grad_norm": 2.0356261590366893, "learning_rate": 2.0270381822891915e-06, "loss": 0.8087, "step": 19634 }, { "epoch": 0.711645101663586, "grad_norm": 2.2585721035677144, "learning_rate": 2.026566296353383e-06, "loss": 0.9838, "step": 19635 }, { "epoch": 0.7116813453662426, "grad_norm": 2.275761214583466, "learning_rate": 2.0260944513896918e-06, "loss": 0.858, "step": 19636 }, { "epoch": 0.7117175890688993, "grad_norm": 2.2543653622914537, "learning_rate": 2.0256226474046232e-06, "loss": 0.9335, "step": 19637 }, { "epoch": 0.7117538327715559, "grad_norm": 2.273114010972239, "learning_rate": 2.025150884404673e-06, "loss": 0.9409, "step": 19638 }, { "epoch": 0.7117900764742126, "grad_norm": 2.202598571837144, "learning_rate": 2.024679162396345e-06, "loss": 0.9291, "step": 19639 }, { "epoch": 0.7118263201768693, "grad_norm": 2.44555075209783, "learning_rate": 2.0242074813861386e-06, "loss": 0.8581, "step": 19640 }, { "epoch": 0.711862563879526, "grad_norm": 2.4319316386625527, "learning_rate": 2.023735841380555e-06, "loss": 1.0872, "step": 19641 }, { "epoch": 0.7118988075821826, "grad_norm": 2.326559692747429, "learning_rate": 2.0232642423860897e-06, "loss": 0.914, "step": 19642 }, { "epoch": 0.7119350512848392, "grad_norm": 2.155166733378114, "learning_rate": 2.022792684409243e-06, "loss": 0.8458, "step": 19643 }, { "epoch": 0.7119712949874959, "grad_norm": 2.3015705238054265, "learning_rate": 2.022321167456513e-06, "loss": 0.931, "step": 19644 }, { "epoch": 0.7120075386901525, "grad_norm": 2.4451453684843165, "learning_rate": 2.0218496915343975e-06, "loss": 1.0375, "step": 19645 }, { "epoch": 0.7120437823928093, "grad_norm": 2.508660241715829, "learning_rate": 2.021378256649391e-06, "loss": 0.8568, "step": 19646 }, { "epoch": 0.712080026095466, "grad_norm": 2.488634841226407, "learning_rate": 2.0209068628079903e-06, "loss": 0.9506, "step": 19647 }, { "epoch": 0.7121162697981226, "grad_norm": 2.186190143728591, "learning_rate": 2.0204355100166933e-06, "loss": 0.7818, "step": 19648 }, { "epoch": 0.7121525135007792, "grad_norm": 2.4102287296289235, "learning_rate": 2.0199641982819913e-06, "loss": 1.0544, "step": 19649 }, { "epoch": 0.7121887572034359, "grad_norm": 2.2474244956636693, "learning_rate": 2.019492927610382e-06, "loss": 0.8088, "step": 19650 }, { "epoch": 0.7122250009060925, "grad_norm": 2.166014398971402, "learning_rate": 2.0190216980083537e-06, "loss": 0.8139, "step": 19651 }, { "epoch": 0.7122612446087492, "grad_norm": 2.3953006316271725, "learning_rate": 2.0185505094824077e-06, "loss": 0.7227, "step": 19652 }, { "epoch": 0.7122974883114059, "grad_norm": 2.4340142588945395, "learning_rate": 2.0180793620390305e-06, "loss": 1.0008, "step": 19653 }, { "epoch": 0.7123337320140626, "grad_norm": 2.2429719338933056, "learning_rate": 2.017608255684718e-06, "loss": 0.9154, "step": 19654 }, { "epoch": 0.7123699757167192, "grad_norm": 2.5441913975705464, "learning_rate": 2.0171371904259586e-06, "loss": 0.9981, "step": 19655 }, { "epoch": 0.7124062194193759, "grad_norm": 2.2164805230002433, "learning_rate": 2.0166661662692442e-06, "loss": 0.9813, "step": 19656 }, { "epoch": 0.7124424631220325, "grad_norm": 2.559976415499668, "learning_rate": 2.016195183221068e-06, "loss": 0.8063, "step": 19657 }, { "epoch": 0.7124787068246892, "grad_norm": 2.137337316614002, "learning_rate": 2.0157242412879156e-06, "loss": 1.0397, "step": 19658 }, { "epoch": 0.7125149505273459, "grad_norm": 2.060804616783233, "learning_rate": 2.0152533404762787e-06, "loss": 0.8512, "step": 19659 }, { "epoch": 0.7125511942300026, "grad_norm": 2.4048490461610808, "learning_rate": 2.014782480792645e-06, "loss": 0.835, "step": 19660 }, { "epoch": 0.7125874379326592, "grad_norm": 2.318019422881731, "learning_rate": 2.0143116622435055e-06, "loss": 0.806, "step": 19661 }, { "epoch": 0.7126236816353159, "grad_norm": 2.2843360426052057, "learning_rate": 2.013840884835344e-06, "loss": 0.8466, "step": 19662 }, { "epoch": 0.7126599253379725, "grad_norm": 2.4659870608354306, "learning_rate": 2.013370148574649e-06, "loss": 0.7939, "step": 19663 }, { "epoch": 0.7126961690406292, "grad_norm": 2.3842590745709478, "learning_rate": 2.0128994534679068e-06, "loss": 0.8236, "step": 19664 }, { "epoch": 0.7127324127432858, "grad_norm": 2.4923846253550885, "learning_rate": 2.012428799521606e-06, "loss": 1.0178, "step": 19665 }, { "epoch": 0.7127686564459426, "grad_norm": 2.444254408250239, "learning_rate": 2.011958186742227e-06, "loss": 1.0209, "step": 19666 }, { "epoch": 0.7128049001485992, "grad_norm": 2.2277992960778814, "learning_rate": 2.0114876151362573e-06, "loss": 0.9676, "step": 19667 }, { "epoch": 0.7128411438512559, "grad_norm": 2.242092985808196, "learning_rate": 2.011017084710183e-06, "loss": 0.7092, "step": 19668 }, { "epoch": 0.7128773875539125, "grad_norm": 2.33497611186434, "learning_rate": 2.0105465954704834e-06, "loss": 0.8559, "step": 19669 }, { "epoch": 0.7129136312565691, "grad_norm": 4.418625770234741, "learning_rate": 2.0100761474236457e-06, "loss": 0.7993, "step": 19670 }, { "epoch": 0.7129498749592258, "grad_norm": 2.1996445870078642, "learning_rate": 2.009605740576147e-06, "loss": 0.9546, "step": 19671 }, { "epoch": 0.7129861186618826, "grad_norm": 2.3178808351215037, "learning_rate": 2.009135374934476e-06, "loss": 0.8622, "step": 19672 }, { "epoch": 0.7130223623645392, "grad_norm": 2.1941194262955945, "learning_rate": 2.008665050505109e-06, "loss": 0.7754, "step": 19673 }, { "epoch": 0.7130586060671958, "grad_norm": 2.380314733147044, "learning_rate": 2.0081947672945306e-06, "loss": 0.9433, "step": 19674 }, { "epoch": 0.7130948497698525, "grad_norm": 2.467426624380945, "learning_rate": 2.0077245253092163e-06, "loss": 0.8902, "step": 19675 }, { "epoch": 0.7131310934725091, "grad_norm": 2.580525811617352, "learning_rate": 2.007254324555649e-06, "loss": 0.9512, "step": 19676 }, { "epoch": 0.7131673371751658, "grad_norm": 2.3379065037482336, "learning_rate": 2.006784165040309e-06, "loss": 1.0088, "step": 19677 }, { "epoch": 0.7132035808778224, "grad_norm": 2.6036270161505968, "learning_rate": 2.0063140467696714e-06, "loss": 0.9004, "step": 19678 }, { "epoch": 0.7132398245804792, "grad_norm": 2.632228003691103, "learning_rate": 2.005843969750215e-06, "loss": 0.6849, "step": 19679 }, { "epoch": 0.7132760682831358, "grad_norm": 2.5568092268770894, "learning_rate": 2.005373933988419e-06, "loss": 0.7235, "step": 19680 }, { "epoch": 0.7133123119857925, "grad_norm": 2.0135306656481538, "learning_rate": 2.00490393949076e-06, "loss": 0.7425, "step": 19681 }, { "epoch": 0.7133485556884491, "grad_norm": 2.397441484949601, "learning_rate": 2.0044339862637115e-06, "loss": 0.8853, "step": 19682 }, { "epoch": 0.7133847993911058, "grad_norm": 2.18121439230565, "learning_rate": 2.0039640743137513e-06, "loss": 0.8536, "step": 19683 }, { "epoch": 0.7134210430937624, "grad_norm": 2.476176444560188, "learning_rate": 2.0034942036473536e-06, "loss": 1.103, "step": 19684 }, { "epoch": 0.7134572867964192, "grad_norm": 2.4636845683665554, "learning_rate": 2.0030243742709957e-06, "loss": 0.9327, "step": 19685 }, { "epoch": 0.7134935304990758, "grad_norm": 2.268529576903607, "learning_rate": 2.002554586191148e-06, "loss": 0.7399, "step": 19686 }, { "epoch": 0.7135297742017325, "grad_norm": 2.5897151091107324, "learning_rate": 2.0020848394142855e-06, "loss": 1.0409, "step": 19687 }, { "epoch": 0.7135660179043891, "grad_norm": 2.1805382091373886, "learning_rate": 2.001615133946882e-06, "loss": 0.8696, "step": 19688 }, { "epoch": 0.7136022616070458, "grad_norm": 2.2104577962289826, "learning_rate": 2.001145469795407e-06, "loss": 0.8655, "step": 19689 }, { "epoch": 0.7136385053097024, "grad_norm": 2.130604079903919, "learning_rate": 2.000675846966336e-06, "loss": 0.8539, "step": 19690 }, { "epoch": 0.713674749012359, "grad_norm": 2.4818164549949677, "learning_rate": 2.0002062654661338e-06, "loss": 0.9913, "step": 19691 }, { "epoch": 0.7137109927150158, "grad_norm": 2.384624841751806, "learning_rate": 1.9997367253012784e-06, "loss": 0.8954, "step": 19692 }, { "epoch": 0.7137472364176725, "grad_norm": 2.429054019084163, "learning_rate": 1.9992672264782348e-06, "loss": 0.8409, "step": 19693 }, { "epoch": 0.7137834801203291, "grad_norm": 2.4960199658752393, "learning_rate": 1.998797769003476e-06, "loss": 0.8375, "step": 19694 }, { "epoch": 0.7138197238229858, "grad_norm": 2.639282925224445, "learning_rate": 1.9983283528834667e-06, "loss": 1.0773, "step": 19695 }, { "epoch": 0.7138559675256424, "grad_norm": 2.169180182106028, "learning_rate": 1.997858978124678e-06, "loss": 0.8653, "step": 19696 }, { "epoch": 0.713892211228299, "grad_norm": 2.2849722923601297, "learning_rate": 1.9973896447335782e-06, "loss": 0.9822, "step": 19697 }, { "epoch": 0.7139284549309557, "grad_norm": 2.227690958422996, "learning_rate": 1.996920352716632e-06, "loss": 0.7432, "step": 19698 }, { "epoch": 0.7139646986336124, "grad_norm": 2.071541613997203, "learning_rate": 1.996451102080307e-06, "loss": 0.6591, "step": 19699 }, { "epoch": 0.7140009423362691, "grad_norm": 2.167126039448983, "learning_rate": 1.9959818928310685e-06, "loss": 0.7867, "step": 19700 }, { "epoch": 0.7140371860389257, "grad_norm": 2.4066170717826036, "learning_rate": 1.9955127249753853e-06, "loss": 0.8376, "step": 19701 }, { "epoch": 0.7140734297415824, "grad_norm": 2.304909741308701, "learning_rate": 1.995043598519716e-06, "loss": 0.9372, "step": 19702 }, { "epoch": 0.714109673444239, "grad_norm": 2.4260376781287567, "learning_rate": 1.994574513470532e-06, "loss": 0.8419, "step": 19703 }, { "epoch": 0.7141459171468957, "grad_norm": 2.2978279960148784, "learning_rate": 1.9941054698342922e-06, "loss": 1.0026, "step": 19704 }, { "epoch": 0.7141821608495524, "grad_norm": 2.1997118215075666, "learning_rate": 1.9936364676174626e-06, "loss": 0.892, "step": 19705 }, { "epoch": 0.7142184045522091, "grad_norm": 2.2348173996331235, "learning_rate": 1.993167506826502e-06, "loss": 0.9071, "step": 19706 }, { "epoch": 0.7142546482548657, "grad_norm": 2.1658849538205467, "learning_rate": 1.9926985874678752e-06, "loss": 0.8326, "step": 19707 }, { "epoch": 0.7142908919575224, "grad_norm": 2.5301341894187455, "learning_rate": 1.9922297095480446e-06, "loss": 1.0596, "step": 19708 }, { "epoch": 0.714327135660179, "grad_norm": 2.159903333677437, "learning_rate": 1.9917608730734682e-06, "loss": 0.71, "step": 19709 }, { "epoch": 0.7143633793628357, "grad_norm": 2.2888998886233125, "learning_rate": 1.991292078050608e-06, "loss": 0.9183, "step": 19710 }, { "epoch": 0.7143996230654923, "grad_norm": 2.3148594206588817, "learning_rate": 1.9908233244859224e-06, "loss": 0.8118, "step": 19711 }, { "epoch": 0.7144358667681491, "grad_norm": 2.2164186216383386, "learning_rate": 1.9903546123858736e-06, "loss": 0.8663, "step": 19712 }, { "epoch": 0.7144721104708057, "grad_norm": 2.332939031409568, "learning_rate": 1.9898859417569166e-06, "loss": 0.9983, "step": 19713 }, { "epoch": 0.7145083541734624, "grad_norm": 2.22797178069936, "learning_rate": 1.9894173126055105e-06, "loss": 0.8042, "step": 19714 }, { "epoch": 0.714544597876119, "grad_norm": 2.590473742125622, "learning_rate": 1.988948724938115e-06, "loss": 1.1556, "step": 19715 }, { "epoch": 0.7145808415787757, "grad_norm": 2.290733360928553, "learning_rate": 1.9884801787611836e-06, "loss": 0.8121, "step": 19716 }, { "epoch": 0.7146170852814323, "grad_norm": 2.5505374146757314, "learning_rate": 1.988011674081175e-06, "loss": 0.7994, "step": 19717 }, { "epoch": 0.7146533289840891, "grad_norm": 2.2610749728333586, "learning_rate": 1.9875432109045413e-06, "loss": 0.7799, "step": 19718 }, { "epoch": 0.7146895726867457, "grad_norm": 2.333663474570093, "learning_rate": 1.9870747892377434e-06, "loss": 0.9212, "step": 19719 }, { "epoch": 0.7147258163894024, "grad_norm": 2.3404966169736894, "learning_rate": 1.9866064090872304e-06, "loss": 0.8762, "step": 19720 }, { "epoch": 0.714762060092059, "grad_norm": 2.509260520256839, "learning_rate": 1.9861380704594613e-06, "loss": 0.9525, "step": 19721 }, { "epoch": 0.7147983037947157, "grad_norm": 2.2457876393556697, "learning_rate": 1.985669773360885e-06, "loss": 0.7329, "step": 19722 }, { "epoch": 0.7148345474973723, "grad_norm": 2.239856176780154, "learning_rate": 1.9852015177979562e-06, "loss": 0.8296, "step": 19723 }, { "epoch": 0.7148707912000289, "grad_norm": 2.0773853071883615, "learning_rate": 1.984733303777128e-06, "loss": 0.8106, "step": 19724 }, { "epoch": 0.7149070349026857, "grad_norm": 2.578036683403664, "learning_rate": 1.9842651313048506e-06, "loss": 0.921, "step": 19725 }, { "epoch": 0.7149432786053423, "grad_norm": 1.973991097159258, "learning_rate": 1.9837970003875757e-06, "loss": 0.7006, "step": 19726 }, { "epoch": 0.714979522307999, "grad_norm": 2.489568186563, "learning_rate": 1.9833289110317537e-06, "loss": 0.8339, "step": 19727 }, { "epoch": 0.7150157660106556, "grad_norm": 2.2304206812617435, "learning_rate": 1.9828608632438368e-06, "loss": 0.8196, "step": 19728 }, { "epoch": 0.7150520097133123, "grad_norm": 2.2557800969479884, "learning_rate": 1.982392857030271e-06, "loss": 0.8164, "step": 19729 }, { "epoch": 0.7150882534159689, "grad_norm": 2.015920618134395, "learning_rate": 1.981924892397506e-06, "loss": 0.7812, "step": 19730 }, { "epoch": 0.7151244971186257, "grad_norm": 2.251847143196614, "learning_rate": 1.9814569693519914e-06, "loss": 0.748, "step": 19731 }, { "epoch": 0.7151607408212823, "grad_norm": 2.4994821005583767, "learning_rate": 1.9809890879001755e-06, "loss": 1.0707, "step": 19732 }, { "epoch": 0.715196984523939, "grad_norm": 2.3434195861073763, "learning_rate": 1.9805212480485027e-06, "loss": 0.9264, "step": 19733 }, { "epoch": 0.7152332282265956, "grad_norm": 2.4521338017019874, "learning_rate": 1.98005344980342e-06, "loss": 0.732, "step": 19734 }, { "epoch": 0.7152694719292523, "grad_norm": 2.212006171632621, "learning_rate": 1.9795856931713774e-06, "loss": 0.931, "step": 19735 }, { "epoch": 0.7153057156319089, "grad_norm": 2.5912200435309747, "learning_rate": 1.979117978158815e-06, "loss": 0.6973, "step": 19736 }, { "epoch": 0.7153419593345656, "grad_norm": 2.3226390336323637, "learning_rate": 1.978650304772182e-06, "loss": 0.9906, "step": 19737 }, { "epoch": 0.7153782030372223, "grad_norm": 2.2321368720391495, "learning_rate": 1.978182673017916e-06, "loss": 0.9401, "step": 19738 }, { "epoch": 0.715414446739879, "grad_norm": 1.9951718483578553, "learning_rate": 1.9777150829024696e-06, "loss": 0.7128, "step": 19739 }, { "epoch": 0.7154506904425356, "grad_norm": 2.131356095780051, "learning_rate": 1.9772475344322796e-06, "loss": 0.9252, "step": 19740 }, { "epoch": 0.7154869341451923, "grad_norm": 2.4700358524329915, "learning_rate": 1.976780027613792e-06, "loss": 1.0113, "step": 19741 }, { "epoch": 0.7155231778478489, "grad_norm": 2.021240482042363, "learning_rate": 1.9763125624534456e-06, "loss": 0.7752, "step": 19742 }, { "epoch": 0.7155594215505056, "grad_norm": 2.330944081604125, "learning_rate": 1.9758451389576837e-06, "loss": 0.9178, "step": 19743 }, { "epoch": 0.7155956652531623, "grad_norm": 2.1724233525723813, "learning_rate": 1.9753777571329485e-06, "loss": 0.8571, "step": 19744 }, { "epoch": 0.715631908955819, "grad_norm": 2.228298364552898, "learning_rate": 1.9749104169856766e-06, "loss": 0.9359, "step": 19745 }, { "epoch": 0.7156681526584756, "grad_norm": 2.255180306579519, "learning_rate": 1.97444311852231e-06, "loss": 0.8477, "step": 19746 }, { "epoch": 0.7157043963611323, "grad_norm": 2.3881735959776003, "learning_rate": 1.973975861749287e-06, "loss": 0.873, "step": 19747 }, { "epoch": 0.7157406400637889, "grad_norm": 2.3613195836305008, "learning_rate": 1.9735086466730486e-06, "loss": 0.9624, "step": 19748 }, { "epoch": 0.7157768837664455, "grad_norm": 2.1354210400797102, "learning_rate": 1.973041473300029e-06, "loss": 1.0584, "step": 19749 }, { "epoch": 0.7158131274691022, "grad_norm": 2.0966754489871624, "learning_rate": 1.9725743416366668e-06, "loss": 0.7508, "step": 19750 }, { "epoch": 0.715849371171759, "grad_norm": 2.408787764273701, "learning_rate": 1.9721072516893996e-06, "loss": 0.8095, "step": 19751 }, { "epoch": 0.7158856148744156, "grad_norm": 2.306405858636789, "learning_rate": 1.9716402034646646e-06, "loss": 0.7303, "step": 19752 }, { "epoch": 0.7159218585770722, "grad_norm": 2.3003420858338375, "learning_rate": 1.9711731969688948e-06, "loss": 0.986, "step": 19753 }, { "epoch": 0.7159581022797289, "grad_norm": 2.6109195675969104, "learning_rate": 1.970706232208526e-06, "loss": 0.9059, "step": 19754 }, { "epoch": 0.7159943459823855, "grad_norm": 2.298971401423494, "learning_rate": 1.9702393091899962e-06, "loss": 0.955, "step": 19755 }, { "epoch": 0.7160305896850422, "grad_norm": 2.4973150077517765, "learning_rate": 1.969772427919734e-06, "loss": 0.9088, "step": 19756 }, { "epoch": 0.7160668333876989, "grad_norm": 2.5316096440285993, "learning_rate": 1.969305588404177e-06, "loss": 0.9972, "step": 19757 }, { "epoch": 0.7161030770903556, "grad_norm": 2.395896309776732, "learning_rate": 1.9688387906497518e-06, "loss": 1.0014, "step": 19758 }, { "epoch": 0.7161393207930122, "grad_norm": 2.479123027774294, "learning_rate": 1.9683720346628986e-06, "loss": 0.9061, "step": 19759 }, { "epoch": 0.7161755644956689, "grad_norm": 2.2058415315462274, "learning_rate": 1.9679053204500443e-06, "loss": 0.7337, "step": 19760 }, { "epoch": 0.7162118081983255, "grad_norm": 2.751322582670957, "learning_rate": 1.967438648017622e-06, "loss": 0.9234, "step": 19761 }, { "epoch": 0.7162480519009822, "grad_norm": 2.5807598603963235, "learning_rate": 1.9669720173720596e-06, "loss": 1.0215, "step": 19762 }, { "epoch": 0.7162842956036388, "grad_norm": 2.5178121520074805, "learning_rate": 1.966505428519789e-06, "loss": 0.8347, "step": 19763 }, { "epoch": 0.7163205393062956, "grad_norm": 2.118583988636302, "learning_rate": 1.9660388814672397e-06, "loss": 0.8054, "step": 19764 }, { "epoch": 0.7163567830089522, "grad_norm": 2.276424016538149, "learning_rate": 1.9655723762208385e-06, "loss": 0.762, "step": 19765 }, { "epoch": 0.7163930267116089, "grad_norm": 2.226149285557942, "learning_rate": 1.9651059127870146e-06, "loss": 0.8277, "step": 19766 }, { "epoch": 0.7164292704142655, "grad_norm": 2.188865810614057, "learning_rate": 1.9646394911721963e-06, "loss": 0.7725, "step": 19767 }, { "epoch": 0.7164655141169222, "grad_norm": 2.334178620080298, "learning_rate": 1.9641731113828116e-06, "loss": 0.7278, "step": 19768 }, { "epoch": 0.7165017578195788, "grad_norm": 2.6186011678116223, "learning_rate": 1.9637067734252836e-06, "loss": 0.9402, "step": 19769 }, { "epoch": 0.7165380015222355, "grad_norm": 2.2815690246413025, "learning_rate": 1.9632404773060397e-06, "loss": 0.8299, "step": 19770 }, { "epoch": 0.7165742452248922, "grad_norm": 2.0668945781236863, "learning_rate": 1.962774223031506e-06, "loss": 0.7652, "step": 19771 }, { "epoch": 0.7166104889275489, "grad_norm": 2.501973175081207, "learning_rate": 1.9623080106081077e-06, "loss": 1.018, "step": 19772 }, { "epoch": 0.7166467326302055, "grad_norm": 2.232115360276067, "learning_rate": 1.9618418400422667e-06, "loss": 0.8888, "step": 19773 }, { "epoch": 0.7166829763328622, "grad_norm": 2.425465883553185, "learning_rate": 1.9613757113404074e-06, "loss": 1.1412, "step": 19774 }, { "epoch": 0.7167192200355188, "grad_norm": 2.4083811907008363, "learning_rate": 1.9609096245089553e-06, "loss": 0.9737, "step": 19775 }, { "epoch": 0.7167554637381754, "grad_norm": 2.611984219584278, "learning_rate": 1.9604435795543285e-06, "loss": 0.8266, "step": 19776 }, { "epoch": 0.7167917074408322, "grad_norm": 2.523788126938139, "learning_rate": 1.9599775764829526e-06, "loss": 0.9822, "step": 19777 }, { "epoch": 0.7168279511434889, "grad_norm": 2.5580674986836343, "learning_rate": 1.959511615301244e-06, "loss": 1.1258, "step": 19778 }, { "epoch": 0.7168641948461455, "grad_norm": 2.0832941463740005, "learning_rate": 1.9590456960156294e-06, "loss": 0.8947, "step": 19779 }, { "epoch": 0.7169004385488021, "grad_norm": 2.3843782099039896, "learning_rate": 1.958579818632525e-06, "loss": 0.8782, "step": 19780 }, { "epoch": 0.7169366822514588, "grad_norm": 2.6489428467381337, "learning_rate": 1.9581139831583523e-06, "loss": 0.7158, "step": 19781 }, { "epoch": 0.7169729259541154, "grad_norm": 2.4564140333982496, "learning_rate": 1.957648189599528e-06, "loss": 1.0354, "step": 19782 }, { "epoch": 0.7170091696567721, "grad_norm": 2.868589556050103, "learning_rate": 1.9571824379624715e-06, "loss": 0.856, "step": 19783 }, { "epoch": 0.7170454133594288, "grad_norm": 2.402220599381584, "learning_rate": 1.9567167282536025e-06, "loss": 0.8785, "step": 19784 }, { "epoch": 0.7170816570620855, "grad_norm": 2.5479952584605017, "learning_rate": 1.956251060479333e-06, "loss": 0.8505, "step": 19785 }, { "epoch": 0.7171179007647421, "grad_norm": 2.180515921428092, "learning_rate": 1.955785434646087e-06, "loss": 0.7762, "step": 19786 }, { "epoch": 0.7171541444673988, "grad_norm": 2.332634931576095, "learning_rate": 1.9553198507602743e-06, "loss": 0.9417, "step": 19787 }, { "epoch": 0.7171903881700554, "grad_norm": 2.227866069260906, "learning_rate": 1.9548543088283143e-06, "loss": 0.9318, "step": 19788 }, { "epoch": 0.7172266318727121, "grad_norm": 2.1603916941470214, "learning_rate": 1.9543888088566175e-06, "loss": 0.8411, "step": 19789 }, { "epoch": 0.7172628755753688, "grad_norm": 2.7075400337589812, "learning_rate": 1.9539233508516042e-06, "loss": 0.896, "step": 19790 }, { "epoch": 0.7172991192780255, "grad_norm": 2.2914543018903277, "learning_rate": 1.953457934819683e-06, "loss": 0.7682, "step": 19791 }, { "epoch": 0.7173353629806821, "grad_norm": 2.258941854002041, "learning_rate": 1.9529925607672707e-06, "loss": 0.7175, "step": 19792 }, { "epoch": 0.7173716066833388, "grad_norm": 2.363377820387084, "learning_rate": 1.9525272287007762e-06, "loss": 0.8781, "step": 19793 }, { "epoch": 0.7174078503859954, "grad_norm": 2.285252709468303, "learning_rate": 1.952061938626614e-06, "loss": 0.9044, "step": 19794 }, { "epoch": 0.7174440940886521, "grad_norm": 2.1393142044720603, "learning_rate": 1.9515966905511963e-06, "loss": 1.108, "step": 19795 }, { "epoch": 0.7174803377913087, "grad_norm": 2.186739359888351, "learning_rate": 1.951131484480931e-06, "loss": 0.945, "step": 19796 }, { "epoch": 0.7175165814939655, "grad_norm": 2.3957448416826366, "learning_rate": 1.95066632042223e-06, "loss": 0.8613, "step": 19797 }, { "epoch": 0.7175528251966221, "grad_norm": 2.18114272473151, "learning_rate": 1.9502011983815027e-06, "loss": 1.0831, "step": 19798 }, { "epoch": 0.7175890688992788, "grad_norm": 2.0660476008457764, "learning_rate": 1.9497361183651602e-06, "loss": 0.9043, "step": 19799 }, { "epoch": 0.7176253126019354, "grad_norm": 2.3935277568319937, "learning_rate": 1.949271080379608e-06, "loss": 0.8867, "step": 19800 }, { "epoch": 0.717661556304592, "grad_norm": 2.566789386388559, "learning_rate": 1.9488060844312545e-06, "loss": 0.8313, "step": 19801 }, { "epoch": 0.7176978000072487, "grad_norm": 2.141062763787659, "learning_rate": 1.9483411305265106e-06, "loss": 0.766, "step": 19802 }, { "epoch": 0.7177340437099055, "grad_norm": 2.3988975244990534, "learning_rate": 1.9478762186717775e-06, "loss": 0.7075, "step": 19803 }, { "epoch": 0.7177702874125621, "grad_norm": 2.5327491811742275, "learning_rate": 1.9474113488734666e-06, "loss": 0.8436, "step": 19804 }, { "epoch": 0.7178065311152187, "grad_norm": 2.420879308546196, "learning_rate": 1.9469465211379775e-06, "loss": 0.7577, "step": 19805 }, { "epoch": 0.7178427748178754, "grad_norm": 2.407056549928129, "learning_rate": 1.9464817354717226e-06, "loss": 0.975, "step": 19806 }, { "epoch": 0.717879018520532, "grad_norm": 2.555942948306793, "learning_rate": 1.9460169918811006e-06, "loss": 0.8327, "step": 19807 }, { "epoch": 0.7179152622231887, "grad_norm": 2.070099530536433, "learning_rate": 1.945552290372519e-06, "loss": 0.8128, "step": 19808 }, { "epoch": 0.7179515059258453, "grad_norm": 2.1751500096431644, "learning_rate": 1.9450876309523785e-06, "loss": 1.0034, "step": 19809 }, { "epoch": 0.7179877496285021, "grad_norm": 2.234024041946062, "learning_rate": 1.9446230136270826e-06, "loss": 0.9072, "step": 19810 }, { "epoch": 0.7180239933311587, "grad_norm": 2.8617797797467235, "learning_rate": 1.944158438403036e-06, "loss": 0.8461, "step": 19811 }, { "epoch": 0.7180602370338154, "grad_norm": 2.5629698562327907, "learning_rate": 1.9436939052866355e-06, "loss": 0.9141, "step": 19812 }, { "epoch": 0.718096480736472, "grad_norm": 2.5083933872524073, "learning_rate": 1.943229414284285e-06, "loss": 1.1084, "step": 19813 }, { "epoch": 0.7181327244391287, "grad_norm": 2.215364467373909, "learning_rate": 1.942764965402384e-06, "loss": 0.9438, "step": 19814 }, { "epoch": 0.7181689681417853, "grad_norm": 2.4148059216638607, "learning_rate": 1.942300558647335e-06, "loss": 1.0333, "step": 19815 }, { "epoch": 0.7182052118444421, "grad_norm": 2.784394720537772, "learning_rate": 1.941836194025533e-06, "loss": 0.9501, "step": 19816 }, { "epoch": 0.7182414555470987, "grad_norm": 2.3211352019261704, "learning_rate": 1.9413718715433796e-06, "loss": 0.8798, "step": 19817 }, { "epoch": 0.7182776992497554, "grad_norm": 2.3411950288119647, "learning_rate": 1.940907591207271e-06, "loss": 1.0426, "step": 19818 }, { "epoch": 0.718313942952412, "grad_norm": 2.2643684295070012, "learning_rate": 1.9404433530236078e-06, "loss": 0.8573, "step": 19819 }, { "epoch": 0.7183501866550687, "grad_norm": 2.191885707060632, "learning_rate": 1.939979156998783e-06, "loss": 0.8879, "step": 19820 }, { "epoch": 0.7183864303577253, "grad_norm": 2.3297207109983322, "learning_rate": 1.939515003139195e-06, "loss": 0.9436, "step": 19821 }, { "epoch": 0.718422674060382, "grad_norm": 2.3957385865631666, "learning_rate": 1.939050891451241e-06, "loss": 0.9922, "step": 19822 }, { "epoch": 0.7184589177630387, "grad_norm": 2.454298554817905, "learning_rate": 1.9385868219413127e-06, "loss": 0.8698, "step": 19823 }, { "epoch": 0.7184951614656954, "grad_norm": 2.4672478868384498, "learning_rate": 1.9381227946158082e-06, "loss": 0.8657, "step": 19824 }, { "epoch": 0.718531405168352, "grad_norm": 2.3507720024268526, "learning_rate": 1.9376588094811165e-06, "loss": 0.943, "step": 19825 }, { "epoch": 0.7185676488710087, "grad_norm": 2.2506401171533836, "learning_rate": 1.937194866543638e-06, "loss": 0.8226, "step": 19826 }, { "epoch": 0.7186038925736653, "grad_norm": 2.249712815438188, "learning_rate": 1.93673096580976e-06, "loss": 0.9656, "step": 19827 }, { "epoch": 0.718640136276322, "grad_norm": 2.7232668739813235, "learning_rate": 1.936267107285878e-06, "loss": 0.7388, "step": 19828 }, { "epoch": 0.7186763799789787, "grad_norm": 2.3358212527181226, "learning_rate": 1.9358032909783815e-06, "loss": 0.8125, "step": 19829 }, { "epoch": 0.7187126236816354, "grad_norm": 2.4133663344926792, "learning_rate": 1.935339516893662e-06, "loss": 0.9832, "step": 19830 }, { "epoch": 0.718748867384292, "grad_norm": 2.323873389901821, "learning_rate": 1.934875785038112e-06, "loss": 0.8972, "step": 19831 }, { "epoch": 0.7187851110869486, "grad_norm": 2.3341639708094033, "learning_rate": 1.9344120954181183e-06, "loss": 0.7344, "step": 19832 }, { "epoch": 0.7188213547896053, "grad_norm": 2.815104912438321, "learning_rate": 1.933948448040072e-06, "loss": 0.7582, "step": 19833 }, { "epoch": 0.7188575984922619, "grad_norm": 2.0864812478302817, "learning_rate": 1.933484842910362e-06, "loss": 0.5845, "step": 19834 }, { "epoch": 0.7188938421949186, "grad_norm": 2.621673147675874, "learning_rate": 1.933021280035378e-06, "loss": 0.8641, "step": 19835 }, { "epoch": 0.7189300858975753, "grad_norm": 2.0523146504138086, "learning_rate": 1.932557759421504e-06, "loss": 0.7441, "step": 19836 }, { "epoch": 0.718966329600232, "grad_norm": 2.509737118936049, "learning_rate": 1.9320942810751293e-06, "loss": 0.8745, "step": 19837 }, { "epoch": 0.7190025733028886, "grad_norm": 2.553826253966571, "learning_rate": 1.9316308450026396e-06, "loss": 0.9207, "step": 19838 }, { "epoch": 0.7190388170055453, "grad_norm": 2.355520165954651, "learning_rate": 1.931167451210423e-06, "loss": 0.8429, "step": 19839 }, { "epoch": 0.7190750607082019, "grad_norm": 2.2236660712770653, "learning_rate": 1.930704099704861e-06, "loss": 0.9868, "step": 19840 }, { "epoch": 0.7191113044108586, "grad_norm": 2.56021727184206, "learning_rate": 1.9302407904923406e-06, "loss": 1.0375, "step": 19841 }, { "epoch": 0.7191475481135152, "grad_norm": 2.33312406778904, "learning_rate": 1.929777523579247e-06, "loss": 1.0841, "step": 19842 }, { "epoch": 0.719183791816172, "grad_norm": 2.4369330347498317, "learning_rate": 1.929314298971961e-06, "loss": 0.909, "step": 19843 }, { "epoch": 0.7192200355188286, "grad_norm": 2.4649665893272514, "learning_rate": 1.9288511166768683e-06, "loss": 0.9829, "step": 19844 }, { "epoch": 0.7192562792214853, "grad_norm": 2.3302528745804234, "learning_rate": 1.9283879767003468e-06, "loss": 0.9611, "step": 19845 }, { "epoch": 0.7192925229241419, "grad_norm": 2.024717008269651, "learning_rate": 1.9279248790487844e-06, "loss": 0.7665, "step": 19846 }, { "epoch": 0.7193287666267986, "grad_norm": 2.537234028183762, "learning_rate": 1.9274618237285575e-06, "loss": 0.8715, "step": 19847 }, { "epoch": 0.7193650103294552, "grad_norm": 2.3217991311372073, "learning_rate": 1.92699881074605e-06, "loss": 0.7994, "step": 19848 }, { "epoch": 0.719401254032112, "grad_norm": 2.686073022096036, "learning_rate": 1.926535840107639e-06, "loss": 0.8447, "step": 19849 }, { "epoch": 0.7194374977347686, "grad_norm": 2.4821068020133854, "learning_rate": 1.9260729118197053e-06, "loss": 0.942, "step": 19850 }, { "epoch": 0.7194737414374253, "grad_norm": 2.6124428291450266, "learning_rate": 1.9256100258886296e-06, "loss": 0.7905, "step": 19851 }, { "epoch": 0.7195099851400819, "grad_norm": 2.1628733856626403, "learning_rate": 1.925147182320786e-06, "loss": 0.784, "step": 19852 }, { "epoch": 0.7195462288427386, "grad_norm": 2.4812235016415225, "learning_rate": 1.924684381122556e-06, "loss": 0.8713, "step": 19853 }, { "epoch": 0.7195824725453952, "grad_norm": 2.4589575248508626, "learning_rate": 1.924221622300314e-06, "loss": 0.9402, "step": 19854 }, { "epoch": 0.7196187162480518, "grad_norm": 2.3768956736506173, "learning_rate": 1.9237589058604406e-06, "loss": 0.8033, "step": 19855 }, { "epoch": 0.7196549599507086, "grad_norm": 2.2587798569548596, "learning_rate": 1.923296231809307e-06, "loss": 0.8269, "step": 19856 }, { "epoch": 0.7196912036533653, "grad_norm": 2.4035505078229167, "learning_rate": 1.92283360015329e-06, "loss": 0.8341, "step": 19857 }, { "epoch": 0.7197274473560219, "grad_norm": 2.366509222908667, "learning_rate": 1.922371010898765e-06, "loss": 1.1615, "step": 19858 }, { "epoch": 0.7197636910586785, "grad_norm": 2.4562646241104757, "learning_rate": 1.921908464052109e-06, "loss": 0.9265, "step": 19859 }, { "epoch": 0.7197999347613352, "grad_norm": 2.2489476131804005, "learning_rate": 1.9214459596196905e-06, "loss": 0.8633, "step": 19860 }, { "epoch": 0.7198361784639918, "grad_norm": 2.511553343044533, "learning_rate": 1.9209834976078847e-06, "loss": 1.0018, "step": 19861 }, { "epoch": 0.7198724221666486, "grad_norm": 2.2200175226450383, "learning_rate": 1.920521078023066e-06, "loss": 0.9012, "step": 19862 }, { "epoch": 0.7199086658693052, "grad_norm": 1.9837591157425127, "learning_rate": 1.9200587008716026e-06, "loss": 0.8601, "step": 19863 }, { "epoch": 0.7199449095719619, "grad_norm": 2.3598191642518103, "learning_rate": 1.9195963661598677e-06, "loss": 0.9517, "step": 19864 }, { "epoch": 0.7199811532746185, "grad_norm": 2.5559272806755953, "learning_rate": 1.9191340738942315e-06, "loss": 0.8462, "step": 19865 }, { "epoch": 0.7200173969772752, "grad_norm": 2.533576693419883, "learning_rate": 1.9186718240810664e-06, "loss": 0.8573, "step": 19866 }, { "epoch": 0.7200536406799318, "grad_norm": 2.5578440091475203, "learning_rate": 1.918209616726739e-06, "loss": 1.0599, "step": 19867 }, { "epoch": 0.7200898843825885, "grad_norm": 2.3339925495944924, "learning_rate": 1.9177474518376188e-06, "loss": 0.9048, "step": 19868 }, { "epoch": 0.7201261280852452, "grad_norm": 2.5193268592966134, "learning_rate": 1.917285329420076e-06, "loss": 0.981, "step": 19869 }, { "epoch": 0.7201623717879019, "grad_norm": 2.477102821250387, "learning_rate": 1.9168232494804756e-06, "loss": 0.9576, "step": 19870 }, { "epoch": 0.7201986154905585, "grad_norm": 2.3921729774141314, "learning_rate": 1.9163612120251877e-06, "loss": 0.9377, "step": 19871 }, { "epoch": 0.7202348591932152, "grad_norm": 2.2299105909210737, "learning_rate": 1.9158992170605745e-06, "loss": 0.8692, "step": 19872 }, { "epoch": 0.7202711028958718, "grad_norm": 2.308227480116651, "learning_rate": 1.915437264593008e-06, "loss": 0.7766, "step": 19873 }, { "epoch": 0.7203073465985285, "grad_norm": 2.337538910839836, "learning_rate": 1.9149753546288487e-06, "loss": 0.8744, "step": 19874 }, { "epoch": 0.7203435903011852, "grad_norm": 2.554809278194801, "learning_rate": 1.9145134871744646e-06, "loss": 0.8528, "step": 19875 }, { "epoch": 0.7203798340038419, "grad_norm": 2.3514129862492865, "learning_rate": 1.914051662236216e-06, "loss": 0.8688, "step": 19876 }, { "epoch": 0.7204160777064985, "grad_norm": 2.6544479595515624, "learning_rate": 1.913589879820472e-06, "loss": 0.9314, "step": 19877 }, { "epoch": 0.7204523214091552, "grad_norm": 2.4441395309960794, "learning_rate": 1.9131281399335917e-06, "loss": 0.7855, "step": 19878 }, { "epoch": 0.7204885651118118, "grad_norm": 2.392801201307331, "learning_rate": 1.9126664425819395e-06, "loss": 0.8955, "step": 19879 }, { "epoch": 0.7205248088144685, "grad_norm": 2.1799887311483532, "learning_rate": 1.912204787771876e-06, "loss": 1.015, "step": 19880 }, { "epoch": 0.7205610525171251, "grad_norm": 2.2987425886513706, "learning_rate": 1.9117431755097625e-06, "loss": 0.8797, "step": 19881 }, { "epoch": 0.7205972962197819, "grad_norm": 2.256106244967381, "learning_rate": 1.911281605801962e-06, "loss": 0.9011, "step": 19882 }, { "epoch": 0.7206335399224385, "grad_norm": 2.2545553786334773, "learning_rate": 1.9108200786548316e-06, "loss": 0.7899, "step": 19883 }, { "epoch": 0.7206697836250952, "grad_norm": 2.4828558888702688, "learning_rate": 1.9103585940747315e-06, "loss": 0.8307, "step": 19884 }, { "epoch": 0.7207060273277518, "grad_norm": 2.7690445800820758, "learning_rate": 1.9098971520680222e-06, "loss": 1.0096, "step": 19885 }, { "epoch": 0.7207422710304084, "grad_norm": 2.5765224094760173, "learning_rate": 1.909435752641063e-06, "loss": 0.8681, "step": 19886 }, { "epoch": 0.7207785147330651, "grad_norm": 2.436088729853847, "learning_rate": 1.9089743958002075e-06, "loss": 0.8875, "step": 19887 }, { "epoch": 0.7208147584357218, "grad_norm": 2.5189554167939927, "learning_rate": 1.908513081551816e-06, "loss": 0.8655, "step": 19888 }, { "epoch": 0.7208510021383785, "grad_norm": 2.4547244780734503, "learning_rate": 1.908051809902246e-06, "loss": 1.0039, "step": 19889 }, { "epoch": 0.7208872458410351, "grad_norm": 2.3569944819178557, "learning_rate": 1.907590580857851e-06, "loss": 0.7024, "step": 19890 }, { "epoch": 0.7209234895436918, "grad_norm": 2.053345611544046, "learning_rate": 1.9071293944249886e-06, "loss": 0.8737, "step": 19891 }, { "epoch": 0.7209597332463484, "grad_norm": 2.2258407972862853, "learning_rate": 1.9066682506100097e-06, "loss": 0.9027, "step": 19892 }, { "epoch": 0.7209959769490051, "grad_norm": 2.387748311158387, "learning_rate": 1.9062071494192746e-06, "loss": 0.8089, "step": 19893 }, { "epoch": 0.7210322206516617, "grad_norm": 2.284927886153386, "learning_rate": 1.9057460908591325e-06, "loss": 0.8538, "step": 19894 }, { "epoch": 0.7210684643543185, "grad_norm": 2.5132616282964744, "learning_rate": 1.9052850749359397e-06, "loss": 0.7665, "step": 19895 }, { "epoch": 0.7211047080569751, "grad_norm": 2.528519596572783, "learning_rate": 1.9048241016560448e-06, "loss": 0.9677, "step": 19896 }, { "epoch": 0.7211409517596318, "grad_norm": 2.575072525045452, "learning_rate": 1.904363171025802e-06, "loss": 0.9092, "step": 19897 }, { "epoch": 0.7211771954622884, "grad_norm": 2.1687691545400356, "learning_rate": 1.9039022830515646e-06, "loss": 0.8009, "step": 19898 }, { "epoch": 0.7212134391649451, "grad_norm": 2.241417449095315, "learning_rate": 1.9034414377396793e-06, "loss": 0.868, "step": 19899 }, { "epoch": 0.7212496828676017, "grad_norm": 2.502642813440402, "learning_rate": 1.902980635096498e-06, "loss": 0.866, "step": 19900 }, { "epoch": 0.7212859265702584, "grad_norm": 2.4472844686458557, "learning_rate": 1.9025198751283708e-06, "loss": 1.0916, "step": 19901 }, { "epoch": 0.7213221702729151, "grad_norm": 2.554108994927404, "learning_rate": 1.902059157841648e-06, "loss": 0.8198, "step": 19902 }, { "epoch": 0.7213584139755718, "grad_norm": 2.3975194516178084, "learning_rate": 1.9015984832426754e-06, "loss": 0.9442, "step": 19903 }, { "epoch": 0.7213946576782284, "grad_norm": 2.033821194542186, "learning_rate": 1.9011378513378008e-06, "loss": 0.6606, "step": 19904 }, { "epoch": 0.7214309013808851, "grad_norm": 2.256890128852874, "learning_rate": 1.900677262133373e-06, "loss": 0.9139, "step": 19905 }, { "epoch": 0.7214671450835417, "grad_norm": 2.1501757062041293, "learning_rate": 1.90021671563574e-06, "loss": 0.9531, "step": 19906 }, { "epoch": 0.7215033887861984, "grad_norm": 2.3016978699131765, "learning_rate": 1.899756211851244e-06, "loss": 0.9302, "step": 19907 }, { "epoch": 0.7215396324888551, "grad_norm": 2.479086085383225, "learning_rate": 1.8992957507862325e-06, "loss": 0.8218, "step": 19908 }, { "epoch": 0.7215758761915118, "grad_norm": 2.175551270643982, "learning_rate": 1.8988353324470521e-06, "loss": 0.9893, "step": 19909 }, { "epoch": 0.7216121198941684, "grad_norm": 2.531941020582056, "learning_rate": 1.8983749568400433e-06, "loss": 0.907, "step": 19910 }, { "epoch": 0.721648363596825, "grad_norm": 2.4245373124038996, "learning_rate": 1.897914623971554e-06, "loss": 0.7199, "step": 19911 }, { "epoch": 0.7216846072994817, "grad_norm": 2.422881078456506, "learning_rate": 1.8974543338479211e-06, "loss": 1.1694, "step": 19912 }, { "epoch": 0.7217208510021383, "grad_norm": 2.1237733632334375, "learning_rate": 1.8969940864754948e-06, "loss": 0.914, "step": 19913 }, { "epoch": 0.721757094704795, "grad_norm": 2.453930629983701, "learning_rate": 1.8965338818606117e-06, "loss": 0.95, "step": 19914 }, { "epoch": 0.7217933384074517, "grad_norm": 2.2770724764830144, "learning_rate": 1.8960737200096163e-06, "loss": 0.8152, "step": 19915 }, { "epoch": 0.7218295821101084, "grad_norm": 2.4469381371201466, "learning_rate": 1.8956136009288467e-06, "loss": 0.9235, "step": 19916 }, { "epoch": 0.721865825812765, "grad_norm": 2.3245288950420533, "learning_rate": 1.8951535246246439e-06, "loss": 0.8116, "step": 19917 }, { "epoch": 0.7219020695154217, "grad_norm": 2.7498307213215742, "learning_rate": 1.8946934911033493e-06, "loss": 0.9443, "step": 19918 }, { "epoch": 0.7219383132180783, "grad_norm": 2.286435502425014, "learning_rate": 1.8942335003712986e-06, "loss": 0.7185, "step": 19919 }, { "epoch": 0.721974556920735, "grad_norm": 2.346263155317819, "learning_rate": 1.8937735524348322e-06, "loss": 0.7477, "step": 19920 }, { "epoch": 0.7220108006233917, "grad_norm": 2.5169249220816265, "learning_rate": 1.8933136473002872e-06, "loss": 0.9136, "step": 19921 }, { "epoch": 0.7220470443260484, "grad_norm": 2.4774151794570836, "learning_rate": 1.8928537849740036e-06, "loss": 0.9418, "step": 19922 }, { "epoch": 0.722083288028705, "grad_norm": 2.27446034496812, "learning_rate": 1.8923939654623137e-06, "loss": 0.8396, "step": 19923 }, { "epoch": 0.7221195317313617, "grad_norm": 2.2901898423371247, "learning_rate": 1.8919341887715553e-06, "loss": 1.0783, "step": 19924 }, { "epoch": 0.7221557754340183, "grad_norm": 2.438943334993298, "learning_rate": 1.8914744549080643e-06, "loss": 1.0541, "step": 19925 }, { "epoch": 0.722192019136675, "grad_norm": 2.095531417109102, "learning_rate": 1.8910147638781774e-06, "loss": 0.7896, "step": 19926 }, { "epoch": 0.7222282628393316, "grad_norm": 2.1755741349751943, "learning_rate": 1.8905551156882245e-06, "loss": 0.8399, "step": 19927 }, { "epoch": 0.7222645065419884, "grad_norm": 2.29339351102357, "learning_rate": 1.8900955103445422e-06, "loss": 0.7745, "step": 19928 }, { "epoch": 0.722300750244645, "grad_norm": 2.0725824176715237, "learning_rate": 1.8896359478534642e-06, "loss": 0.865, "step": 19929 }, { "epoch": 0.7223369939473017, "grad_norm": 2.357567278665753, "learning_rate": 1.8891764282213205e-06, "loss": 0.9393, "step": 19930 }, { "epoch": 0.7223732376499583, "grad_norm": 2.339765514034565, "learning_rate": 1.888716951454445e-06, "loss": 0.9323, "step": 19931 }, { "epoch": 0.722409481352615, "grad_norm": 2.3318792448724506, "learning_rate": 1.888257517559166e-06, "loss": 0.8836, "step": 19932 }, { "epoch": 0.7224457250552716, "grad_norm": 2.1396635921957503, "learning_rate": 1.8877981265418194e-06, "loss": 0.9444, "step": 19933 }, { "epoch": 0.7224819687579284, "grad_norm": 2.548162003505162, "learning_rate": 1.8873387784087304e-06, "loss": 0.7646, "step": 19934 }, { "epoch": 0.722518212460585, "grad_norm": 2.35274893646474, "learning_rate": 1.8868794731662326e-06, "loss": 1.0308, "step": 19935 }, { "epoch": 0.7225544561632417, "grad_norm": 2.494728373380911, "learning_rate": 1.8864202108206514e-06, "loss": 0.9782, "step": 19936 }, { "epoch": 0.7225906998658983, "grad_norm": 2.336205561425205, "learning_rate": 1.8859609913783161e-06, "loss": 0.7197, "step": 19937 }, { "epoch": 0.722626943568555, "grad_norm": 2.3025622639200822, "learning_rate": 1.8855018148455572e-06, "loss": 0.8075, "step": 19938 }, { "epoch": 0.7226631872712116, "grad_norm": 2.27558502636672, "learning_rate": 1.8850426812286975e-06, "loss": 0.8758, "step": 19939 }, { "epoch": 0.7226994309738682, "grad_norm": 2.3173661133123464, "learning_rate": 1.8845835905340665e-06, "loss": 0.8775, "step": 19940 }, { "epoch": 0.722735674676525, "grad_norm": 2.462617405781451, "learning_rate": 1.8841245427679889e-06, "loss": 0.911, "step": 19941 }, { "epoch": 0.7227719183791816, "grad_norm": 2.404768886728455, "learning_rate": 1.883665537936793e-06, "loss": 0.8458, "step": 19942 }, { "epoch": 0.7228081620818383, "grad_norm": 2.910332699089677, "learning_rate": 1.8832065760467987e-06, "loss": 0.8609, "step": 19943 }, { "epoch": 0.7228444057844949, "grad_norm": 2.164481099595052, "learning_rate": 1.8827476571043335e-06, "loss": 0.797, "step": 19944 }, { "epoch": 0.7228806494871516, "grad_norm": 2.2455095124794213, "learning_rate": 1.8822887811157204e-06, "loss": 0.8791, "step": 19945 }, { "epoch": 0.7229168931898082, "grad_norm": 2.4910538142187697, "learning_rate": 1.8818299480872837e-06, "loss": 0.8252, "step": 19946 }, { "epoch": 0.722953136892465, "grad_norm": 2.381710837520758, "learning_rate": 1.8813711580253429e-06, "loss": 0.9749, "step": 19947 }, { "epoch": 0.7229893805951216, "grad_norm": 2.4875465090087983, "learning_rate": 1.8809124109362215e-06, "loss": 0.8305, "step": 19948 }, { "epoch": 0.7230256242977783, "grad_norm": 2.36685544643141, "learning_rate": 1.8804537068262423e-06, "loss": 0.9445, "step": 19949 }, { "epoch": 0.7230618680004349, "grad_norm": 2.231727408414972, "learning_rate": 1.8799950457017225e-06, "loss": 0.9129, "step": 19950 }, { "epoch": 0.7230981117030916, "grad_norm": 2.280878680246106, "learning_rate": 1.8795364275689843e-06, "loss": 0.7532, "step": 19951 }, { "epoch": 0.7231343554057482, "grad_norm": 2.5848101745714125, "learning_rate": 1.8790778524343473e-06, "loss": 1.0453, "step": 19952 }, { "epoch": 0.7231705991084049, "grad_norm": 2.462305084906698, "learning_rate": 1.8786193203041309e-06, "loss": 0.9699, "step": 19953 }, { "epoch": 0.7232068428110616, "grad_norm": 2.4994160021043865, "learning_rate": 1.8781608311846517e-06, "loss": 0.9024, "step": 19954 }, { "epoch": 0.7232430865137183, "grad_norm": 2.4960100179561846, "learning_rate": 1.8777023850822273e-06, "loss": 0.9302, "step": 19955 }, { "epoch": 0.7232793302163749, "grad_norm": 2.6331161141238155, "learning_rate": 1.877243982003178e-06, "loss": 0.864, "step": 19956 }, { "epoch": 0.7233155739190316, "grad_norm": 2.137605163078723, "learning_rate": 1.8767856219538167e-06, "loss": 0.7357, "step": 19957 }, { "epoch": 0.7233518176216882, "grad_norm": 2.232716377884809, "learning_rate": 1.8763273049404618e-06, "loss": 0.8116, "step": 19958 }, { "epoch": 0.7233880613243449, "grad_norm": 2.4682696138042157, "learning_rate": 1.875869030969424e-06, "loss": 0.9186, "step": 19959 }, { "epoch": 0.7234243050270016, "grad_norm": 2.195619257766538, "learning_rate": 1.875410800047025e-06, "loss": 0.8127, "step": 19960 }, { "epoch": 0.7234605487296583, "grad_norm": 2.626962957425637, "learning_rate": 1.874952612179574e-06, "loss": 0.8724, "step": 19961 }, { "epoch": 0.7234967924323149, "grad_norm": 2.3054072142537696, "learning_rate": 1.8744944673733877e-06, "loss": 0.9197, "step": 19962 }, { "epoch": 0.7235330361349716, "grad_norm": 2.353050764201571, "learning_rate": 1.8740363656347744e-06, "loss": 0.9316, "step": 19963 }, { "epoch": 0.7235692798376282, "grad_norm": 2.464966206774281, "learning_rate": 1.873578306970052e-06, "loss": 0.8937, "step": 19964 }, { "epoch": 0.7236055235402848, "grad_norm": 2.41105950718002, "learning_rate": 1.8731202913855285e-06, "loss": 1.0076, "step": 19965 }, { "epoch": 0.7236417672429415, "grad_norm": 2.3181176678611917, "learning_rate": 1.8726623188875181e-06, "loss": 0.8417, "step": 19966 }, { "epoch": 0.7236780109455982, "grad_norm": 2.2973159699049766, "learning_rate": 1.8722043894823276e-06, "loss": 0.9818, "step": 19967 }, { "epoch": 0.7237142546482549, "grad_norm": 2.1699154625859673, "learning_rate": 1.8717465031762693e-06, "loss": 0.8174, "step": 19968 }, { "epoch": 0.7237504983509115, "grad_norm": 2.593662740909922, "learning_rate": 1.871288659975654e-06, "loss": 0.8866, "step": 19969 }, { "epoch": 0.7237867420535682, "grad_norm": 2.250342758107544, "learning_rate": 1.870830859886787e-06, "loss": 0.8681, "step": 19970 }, { "epoch": 0.7238229857562248, "grad_norm": 2.472473858211969, "learning_rate": 1.8703731029159778e-06, "loss": 0.9437, "step": 19971 }, { "epoch": 0.7238592294588815, "grad_norm": 2.174709566628618, "learning_rate": 1.8699153890695347e-06, "loss": 0.8611, "step": 19972 }, { "epoch": 0.7238954731615381, "grad_norm": 2.501658001136721, "learning_rate": 1.8694577183537665e-06, "loss": 0.9385, "step": 19973 }, { "epoch": 0.7239317168641949, "grad_norm": 2.381584991367833, "learning_rate": 1.8690000907749762e-06, "loss": 0.8048, "step": 19974 }, { "epoch": 0.7239679605668515, "grad_norm": 2.1454054914445244, "learning_rate": 1.8685425063394707e-06, "loss": 0.8357, "step": 19975 }, { "epoch": 0.7240042042695082, "grad_norm": 2.606564551761032, "learning_rate": 1.8680849650535576e-06, "loss": 0.771, "step": 19976 }, { "epoch": 0.7240404479721648, "grad_norm": 2.65398686369629, "learning_rate": 1.8676274669235372e-06, "loss": 0.7604, "step": 19977 }, { "epoch": 0.7240766916748215, "grad_norm": 2.4809739603418755, "learning_rate": 1.8671700119557185e-06, "loss": 0.9115, "step": 19978 }, { "epoch": 0.7241129353774781, "grad_norm": 2.4120334683829934, "learning_rate": 1.8667126001563985e-06, "loss": 0.8571, "step": 19979 }, { "epoch": 0.7241491790801349, "grad_norm": 2.399421097266923, "learning_rate": 1.866255231531887e-06, "loss": 0.902, "step": 19980 }, { "epoch": 0.7241854227827915, "grad_norm": 2.342401262059425, "learning_rate": 1.8657979060884817e-06, "loss": 0.9179, "step": 19981 }, { "epoch": 0.7242216664854482, "grad_norm": 2.043080078153572, "learning_rate": 1.8653406238324877e-06, "loss": 0.7938, "step": 19982 }, { "epoch": 0.7242579101881048, "grad_norm": 2.379301211284097, "learning_rate": 1.8648833847702024e-06, "loss": 0.7225, "step": 19983 }, { "epoch": 0.7242941538907615, "grad_norm": 2.101420718778994, "learning_rate": 1.8644261889079279e-06, "loss": 0.7443, "step": 19984 }, { "epoch": 0.7243303975934181, "grad_norm": 2.11988099765942, "learning_rate": 1.8639690362519658e-06, "loss": 0.9732, "step": 19985 }, { "epoch": 0.7243666412960748, "grad_norm": 2.33254219992386, "learning_rate": 1.863511926808612e-06, "loss": 0.9332, "step": 19986 }, { "epoch": 0.7244028849987315, "grad_norm": 2.36972715193674, "learning_rate": 1.8630548605841674e-06, "loss": 1.0715, "step": 19987 }, { "epoch": 0.7244391287013882, "grad_norm": 2.2620704718469833, "learning_rate": 1.862597837584929e-06, "loss": 0.7492, "step": 19988 }, { "epoch": 0.7244753724040448, "grad_norm": 2.243698750038949, "learning_rate": 1.8621408578171973e-06, "loss": 0.7067, "step": 19989 }, { "epoch": 0.7245116161067015, "grad_norm": 2.3720999917131, "learning_rate": 1.861683921287265e-06, "loss": 1.0286, "step": 19990 }, { "epoch": 0.7245478598093581, "grad_norm": 2.381290250294651, "learning_rate": 1.8612270280014299e-06, "loss": 1.0502, "step": 19991 }, { "epoch": 0.7245841035120147, "grad_norm": 2.3040492254216463, "learning_rate": 1.860770177965988e-06, "loss": 0.9235, "step": 19992 }, { "epoch": 0.7246203472146715, "grad_norm": 2.3158507569345885, "learning_rate": 1.8603133711872368e-06, "loss": 0.9896, "step": 19993 }, { "epoch": 0.7246565909173281, "grad_norm": 2.179731320030169, "learning_rate": 1.8598566076714663e-06, "loss": 0.8574, "step": 19994 }, { "epoch": 0.7246928346199848, "grad_norm": 2.31206125762751, "learning_rate": 1.859399887424973e-06, "loss": 0.8516, "step": 19995 }, { "epoch": 0.7247290783226414, "grad_norm": 2.3419575155265555, "learning_rate": 1.8589432104540517e-06, "loss": 0.7568, "step": 19996 }, { "epoch": 0.7247653220252981, "grad_norm": 2.211153394373986, "learning_rate": 1.8584865767649913e-06, "loss": 0.8441, "step": 19997 }, { "epoch": 0.7248015657279547, "grad_norm": 2.372050830394522, "learning_rate": 1.8580299863640878e-06, "loss": 0.9658, "step": 19998 }, { "epoch": 0.7248378094306114, "grad_norm": 2.383150876053212, "learning_rate": 1.8575734392576273e-06, "loss": 0.8523, "step": 19999 }, { "epoch": 0.7248740531332681, "grad_norm": 2.466258197633229, "learning_rate": 1.857116935451908e-06, "loss": 0.7291, "step": 20000 }, { "epoch": 0.7249102968359248, "grad_norm": 2.2373405141314073, "learning_rate": 1.8566604749532146e-06, "loss": 0.9799, "step": 20001 }, { "epoch": 0.7249465405385814, "grad_norm": 2.1867774512694864, "learning_rate": 1.8562040577678408e-06, "loss": 0.8505, "step": 20002 }, { "epoch": 0.7249827842412381, "grad_norm": 2.44865084851384, "learning_rate": 1.8557476839020722e-06, "loss": 0.9271, "step": 20003 }, { "epoch": 0.7250190279438947, "grad_norm": 2.0885380221953915, "learning_rate": 1.8552913533621986e-06, "loss": 0.8685, "step": 20004 }, { "epoch": 0.7250552716465514, "grad_norm": 2.1389232067522093, "learning_rate": 1.85483506615451e-06, "loss": 0.8409, "step": 20005 }, { "epoch": 0.7250915153492081, "grad_norm": 2.5441732701147632, "learning_rate": 1.8543788222852905e-06, "loss": 0.8387, "step": 20006 }, { "epoch": 0.7251277590518648, "grad_norm": 2.515492490324345, "learning_rate": 1.853922621760828e-06, "loss": 1.0466, "step": 20007 }, { "epoch": 0.7251640027545214, "grad_norm": 2.2417867794031627, "learning_rate": 1.8534664645874095e-06, "loss": 0.8775, "step": 20008 }, { "epoch": 0.7252002464571781, "grad_norm": 2.3036845189912216, "learning_rate": 1.8530103507713215e-06, "loss": 0.8413, "step": 20009 }, { "epoch": 0.7252364901598347, "grad_norm": 2.1299743409129763, "learning_rate": 1.8525542803188463e-06, "loss": 0.8949, "step": 20010 }, { "epoch": 0.7252727338624914, "grad_norm": 2.2008281079718564, "learning_rate": 1.8520982532362696e-06, "loss": 0.8213, "step": 20011 }, { "epoch": 0.725308977565148, "grad_norm": 2.134077971533877, "learning_rate": 1.8516422695298753e-06, "loss": 0.8281, "step": 20012 }, { "epoch": 0.7253452212678048, "grad_norm": 2.343206123718485, "learning_rate": 1.8511863292059478e-06, "loss": 1.0785, "step": 20013 }, { "epoch": 0.7253814649704614, "grad_norm": 2.1417092492273313, "learning_rate": 1.8507304322707663e-06, "loss": 0.9168, "step": 20014 }, { "epoch": 0.7254177086731181, "grad_norm": 2.217634985271853, "learning_rate": 1.8502745787306159e-06, "loss": 0.7193, "step": 20015 }, { "epoch": 0.7254539523757747, "grad_norm": 2.2427721258620794, "learning_rate": 1.8498187685917778e-06, "loss": 0.8198, "step": 20016 }, { "epoch": 0.7254901960784313, "grad_norm": 2.135561644737912, "learning_rate": 1.8493630018605302e-06, "loss": 0.6788, "step": 20017 }, { "epoch": 0.725526439781088, "grad_norm": 2.3833356003952857, "learning_rate": 1.8489072785431573e-06, "loss": 0.9439, "step": 20018 }, { "epoch": 0.7255626834837448, "grad_norm": 2.4641293674704374, "learning_rate": 1.8484515986459322e-06, "loss": 0.8295, "step": 20019 }, { "epoch": 0.7255989271864014, "grad_norm": 2.0264964076363094, "learning_rate": 1.847995962175142e-06, "loss": 0.7799, "step": 20020 }, { "epoch": 0.725635170889058, "grad_norm": 2.7180194346616293, "learning_rate": 1.8475403691370591e-06, "loss": 1.0403, "step": 20021 }, { "epoch": 0.7256714145917147, "grad_norm": 2.2536602523321667, "learning_rate": 1.8470848195379654e-06, "loss": 0.9111, "step": 20022 }, { "epoch": 0.7257076582943713, "grad_norm": 2.31452364431937, "learning_rate": 1.8466293133841345e-06, "loss": 1.0946, "step": 20023 }, { "epoch": 0.725743901997028, "grad_norm": 2.3065800280357305, "learning_rate": 1.8461738506818444e-06, "loss": 0.9501, "step": 20024 }, { "epoch": 0.7257801456996846, "grad_norm": 2.282512719689717, "learning_rate": 1.8457184314373727e-06, "loss": 0.9341, "step": 20025 }, { "epoch": 0.7258163894023414, "grad_norm": 2.3958063229420694, "learning_rate": 1.84526305565699e-06, "loss": 0.9529, "step": 20026 }, { "epoch": 0.725852633104998, "grad_norm": 2.3362267641742482, "learning_rate": 1.844807723346978e-06, "loss": 0.9776, "step": 20027 }, { "epoch": 0.7258888768076547, "grad_norm": 2.547153768224212, "learning_rate": 1.844352434513606e-06, "loss": 0.9059, "step": 20028 }, { "epoch": 0.7259251205103113, "grad_norm": 2.325117678054724, "learning_rate": 1.84389718916315e-06, "loss": 1.139, "step": 20029 }, { "epoch": 0.725961364212968, "grad_norm": 2.3903618231685986, "learning_rate": 1.8434419873018793e-06, "loss": 0.9028, "step": 20030 }, { "epoch": 0.7259976079156246, "grad_norm": 2.5018291016149687, "learning_rate": 1.842986828936072e-06, "loss": 1.0528, "step": 20031 }, { "epoch": 0.7260338516182814, "grad_norm": 2.5143372375853668, "learning_rate": 1.842531714071995e-06, "loss": 0.8255, "step": 20032 }, { "epoch": 0.726070095320938, "grad_norm": 2.5854430174776177, "learning_rate": 1.8420766427159238e-06, "loss": 0.7826, "step": 20033 }, { "epoch": 0.7261063390235947, "grad_norm": 2.466524037853734, "learning_rate": 1.8416216148741246e-06, "loss": 0.6817, "step": 20034 }, { "epoch": 0.7261425827262513, "grad_norm": 2.4219731686180253, "learning_rate": 1.8411666305528691e-06, "loss": 0.6697, "step": 20035 }, { "epoch": 0.726178826428908, "grad_norm": 2.835703896800369, "learning_rate": 1.8407116897584288e-06, "loss": 1.0111, "step": 20036 }, { "epoch": 0.7262150701315646, "grad_norm": 2.8014655114473386, "learning_rate": 1.8402567924970694e-06, "loss": 0.7249, "step": 20037 }, { "epoch": 0.7262513138342213, "grad_norm": 2.4645545859799434, "learning_rate": 1.83980193877506e-06, "loss": 1.0503, "step": 20038 }, { "epoch": 0.726287557536878, "grad_norm": 2.6632397363037974, "learning_rate": 1.8393471285986687e-06, "loss": 0.9557, "step": 20039 }, { "epoch": 0.7263238012395347, "grad_norm": 2.3597671978119505, "learning_rate": 1.8388923619741643e-06, "loss": 1.0297, "step": 20040 }, { "epoch": 0.7263600449421913, "grad_norm": 2.0664840609879973, "learning_rate": 1.8384376389078097e-06, "loss": 0.8485, "step": 20041 }, { "epoch": 0.726396288644848, "grad_norm": 2.545602178532276, "learning_rate": 1.8379829594058723e-06, "loss": 0.9474, "step": 20042 }, { "epoch": 0.7264325323475046, "grad_norm": 2.271055978609619, "learning_rate": 1.8375283234746194e-06, "loss": 0.7763, "step": 20043 }, { "epoch": 0.7264687760501612, "grad_norm": 2.4765817545047897, "learning_rate": 1.8370737311203118e-06, "loss": 0.9546, "step": 20044 }, { "epoch": 0.7265050197528179, "grad_norm": 2.2960879810231822, "learning_rate": 1.836619182349217e-06, "loss": 1.0144, "step": 20045 }, { "epoch": 0.7265412634554747, "grad_norm": 2.4020205767738836, "learning_rate": 1.8361646771675929e-06, "loss": 0.7854, "step": 20046 }, { "epoch": 0.7265775071581313, "grad_norm": 2.375877046752718, "learning_rate": 1.8357102155817098e-06, "loss": 0.9103, "step": 20047 }, { "epoch": 0.7266137508607879, "grad_norm": 2.2116563055562137, "learning_rate": 1.8352557975978247e-06, "loss": 0.762, "step": 20048 }, { "epoch": 0.7266499945634446, "grad_norm": 2.139160397020484, "learning_rate": 1.8348014232222023e-06, "loss": 0.9667, "step": 20049 }, { "epoch": 0.7266862382661012, "grad_norm": 2.5726079526798165, "learning_rate": 1.8343470924610984e-06, "loss": 0.9446, "step": 20050 }, { "epoch": 0.7267224819687579, "grad_norm": 2.2789018300561463, "learning_rate": 1.833892805320781e-06, "loss": 0.9432, "step": 20051 }, { "epoch": 0.7267587256714146, "grad_norm": 2.5380887828188263, "learning_rate": 1.8334385618075035e-06, "loss": 0.8343, "step": 20052 }, { "epoch": 0.7267949693740713, "grad_norm": 2.6683531211011386, "learning_rate": 1.83298436192753e-06, "loss": 0.8059, "step": 20053 }, { "epoch": 0.7268312130767279, "grad_norm": 2.4911056319045635, "learning_rate": 1.8325302056871146e-06, "loss": 0.9035, "step": 20054 }, { "epoch": 0.7268674567793846, "grad_norm": 2.3372368690469107, "learning_rate": 1.8320760930925174e-06, "loss": 1.0235, "step": 20055 }, { "epoch": 0.7269037004820412, "grad_norm": 2.2415459554119055, "learning_rate": 1.831622024149997e-06, "loss": 0.9845, "step": 20056 }, { "epoch": 0.7269399441846979, "grad_norm": 2.269869179129004, "learning_rate": 1.8311679988658077e-06, "loss": 0.91, "step": 20057 }, { "epoch": 0.7269761878873545, "grad_norm": 2.492360579408886, "learning_rate": 1.8307140172462069e-06, "loss": 0.8984, "step": 20058 }, { "epoch": 0.7270124315900113, "grad_norm": 2.79625101598502, "learning_rate": 1.83026007929745e-06, "loss": 0.8477, "step": 20059 }, { "epoch": 0.7270486752926679, "grad_norm": 2.4373180769185363, "learning_rate": 1.8298061850257942e-06, "loss": 0.8816, "step": 20060 }, { "epoch": 0.7270849189953246, "grad_norm": 2.4348515128194554, "learning_rate": 1.8293523344374903e-06, "loss": 0.9061, "step": 20061 }, { "epoch": 0.7271211626979812, "grad_norm": 2.138526688942985, "learning_rate": 1.8288985275387932e-06, "loss": 0.8459, "step": 20062 }, { "epoch": 0.7271574064006379, "grad_norm": 2.415161495167974, "learning_rate": 1.8284447643359588e-06, "loss": 0.9723, "step": 20063 }, { "epoch": 0.7271936501032945, "grad_norm": 2.354293025163301, "learning_rate": 1.827991044835235e-06, "loss": 0.9655, "step": 20064 }, { "epoch": 0.7272298938059513, "grad_norm": 2.322467884312005, "learning_rate": 1.8275373690428787e-06, "loss": 0.8161, "step": 20065 }, { "epoch": 0.7272661375086079, "grad_norm": 2.263318097605314, "learning_rate": 1.827083736965135e-06, "loss": 0.9847, "step": 20066 }, { "epoch": 0.7273023812112646, "grad_norm": 2.4410900048368274, "learning_rate": 1.826630148608262e-06, "loss": 0.9797, "step": 20067 }, { "epoch": 0.7273386249139212, "grad_norm": 2.3686856957829625, "learning_rate": 1.8261766039785044e-06, "loss": 0.8627, "step": 20068 }, { "epoch": 0.7273748686165779, "grad_norm": 2.5311105837277816, "learning_rate": 1.825723103082116e-06, "loss": 0.8146, "step": 20069 }, { "epoch": 0.7274111123192345, "grad_norm": 2.500905131527882, "learning_rate": 1.8252696459253416e-06, "loss": 0.9431, "step": 20070 }, { "epoch": 0.7274473560218911, "grad_norm": 2.157655975334527, "learning_rate": 1.8248162325144314e-06, "loss": 0.8659, "step": 20071 }, { "epoch": 0.7274835997245479, "grad_norm": 2.144185615134213, "learning_rate": 1.824362862855633e-06, "loss": 0.8127, "step": 20072 }, { "epoch": 0.7275198434272045, "grad_norm": 2.4878220322493614, "learning_rate": 1.8239095369551958e-06, "loss": 1.1861, "step": 20073 }, { "epoch": 0.7275560871298612, "grad_norm": 2.372051512801311, "learning_rate": 1.8234562548193624e-06, "loss": 0.7376, "step": 20074 }, { "epoch": 0.7275923308325178, "grad_norm": 2.492226993041929, "learning_rate": 1.823003016454381e-06, "loss": 0.791, "step": 20075 }, { "epoch": 0.7276285745351745, "grad_norm": 2.4564227125844695, "learning_rate": 1.8225498218664977e-06, "loss": 0.9849, "step": 20076 }, { "epoch": 0.7276648182378311, "grad_norm": 2.736455182814447, "learning_rate": 1.8220966710619554e-06, "loss": 1.2295, "step": 20077 }, { "epoch": 0.7277010619404879, "grad_norm": 2.0431697849940194, "learning_rate": 1.8216435640469982e-06, "loss": 0.9193, "step": 20078 }, { "epoch": 0.7277373056431445, "grad_norm": 2.3038153358096056, "learning_rate": 1.8211905008278712e-06, "loss": 0.9539, "step": 20079 }, { "epoch": 0.7277735493458012, "grad_norm": 2.126290393041863, "learning_rate": 1.820737481410818e-06, "loss": 0.7432, "step": 20080 }, { "epoch": 0.7278097930484578, "grad_norm": 2.338942364011187, "learning_rate": 1.8202845058020773e-06, "loss": 0.7333, "step": 20081 }, { "epoch": 0.7278460367511145, "grad_norm": 2.28878260181739, "learning_rate": 1.8198315740078936e-06, "loss": 0.9036, "step": 20082 }, { "epoch": 0.7278822804537711, "grad_norm": 2.309710952588142, "learning_rate": 1.8193786860345097e-06, "loss": 0.8666, "step": 20083 }, { "epoch": 0.7279185241564278, "grad_norm": 2.300563981520173, "learning_rate": 1.8189258418881616e-06, "loss": 0.9011, "step": 20084 }, { "epoch": 0.7279547678590845, "grad_norm": 2.30780186767038, "learning_rate": 1.8184730415750934e-06, "loss": 1.0041, "step": 20085 }, { "epoch": 0.7279910115617412, "grad_norm": 2.1781335177342522, "learning_rate": 1.8180202851015393e-06, "loss": 0.9756, "step": 20086 }, { "epoch": 0.7280272552643978, "grad_norm": 2.33297179837664, "learning_rate": 1.8175675724737445e-06, "loss": 0.8504, "step": 20087 }, { "epoch": 0.7280634989670545, "grad_norm": 2.0073445120264495, "learning_rate": 1.817114903697942e-06, "loss": 0.7731, "step": 20088 }, { "epoch": 0.7280997426697111, "grad_norm": 2.511361130243064, "learning_rate": 1.8166622787803734e-06, "loss": 0.8202, "step": 20089 }, { "epoch": 0.7281359863723678, "grad_norm": 2.4276860453621913, "learning_rate": 1.816209697727271e-06, "loss": 1.0164, "step": 20090 }, { "epoch": 0.7281722300750245, "grad_norm": 2.4468724888971356, "learning_rate": 1.8157571605448737e-06, "loss": 0.9081, "step": 20091 }, { "epoch": 0.7282084737776812, "grad_norm": 2.5391761607409067, "learning_rate": 1.8153046672394193e-06, "loss": 0.9597, "step": 20092 }, { "epoch": 0.7282447174803378, "grad_norm": 2.3166517560124538, "learning_rate": 1.814852217817138e-06, "loss": 0.7116, "step": 20093 }, { "epoch": 0.7282809611829945, "grad_norm": 1.996487756699296, "learning_rate": 1.8143998122842671e-06, "loss": 0.6825, "step": 20094 }, { "epoch": 0.7283172048856511, "grad_norm": 2.368184152245621, "learning_rate": 1.8139474506470405e-06, "loss": 0.9548, "step": 20095 }, { "epoch": 0.7283534485883078, "grad_norm": 2.3680385203890095, "learning_rate": 1.8134951329116923e-06, "loss": 0.9522, "step": 20096 }, { "epoch": 0.7283896922909644, "grad_norm": 2.614229893747547, "learning_rate": 1.813042859084453e-06, "loss": 0.8324, "step": 20097 }, { "epoch": 0.7284259359936212, "grad_norm": 2.143374741397137, "learning_rate": 1.812590629171555e-06, "loss": 0.8845, "step": 20098 }, { "epoch": 0.7284621796962778, "grad_norm": 2.4785448046985508, "learning_rate": 1.812138443179231e-06, "loss": 0.9818, "step": 20099 }, { "epoch": 0.7284984233989344, "grad_norm": 2.3609501690234698, "learning_rate": 1.8116863011137126e-06, "loss": 0.9329, "step": 20100 }, { "epoch": 0.7285346671015911, "grad_norm": 2.3156817632192883, "learning_rate": 1.8112342029812274e-06, "loss": 0.9414, "step": 20101 }, { "epoch": 0.7285709108042477, "grad_norm": 2.1530797772428487, "learning_rate": 1.8107821487880061e-06, "loss": 1.0283, "step": 20102 }, { "epoch": 0.7286071545069044, "grad_norm": 2.317371011483157, "learning_rate": 1.8103301385402799e-06, "loss": 0.9344, "step": 20103 }, { "epoch": 0.7286433982095611, "grad_norm": 2.6499678335172354, "learning_rate": 1.8098781722442738e-06, "loss": 1.0846, "step": 20104 }, { "epoch": 0.7286796419122178, "grad_norm": 2.307099074204023, "learning_rate": 1.8094262499062181e-06, "loss": 0.842, "step": 20105 }, { "epoch": 0.7287158856148744, "grad_norm": 2.2435676980484183, "learning_rate": 1.8089743715323365e-06, "loss": 0.8581, "step": 20106 }, { "epoch": 0.7287521293175311, "grad_norm": 2.3334192050045646, "learning_rate": 1.808522537128861e-06, "loss": 0.8136, "step": 20107 }, { "epoch": 0.7287883730201877, "grad_norm": 2.364212822872886, "learning_rate": 1.8080707467020136e-06, "loss": 0.6936, "step": 20108 }, { "epoch": 0.7288246167228444, "grad_norm": 2.4505164255710237, "learning_rate": 1.8076190002580208e-06, "loss": 0.891, "step": 20109 }, { "epoch": 0.728860860425501, "grad_norm": 2.1116123090370986, "learning_rate": 1.807167297803109e-06, "loss": 0.8552, "step": 20110 }, { "epoch": 0.7288971041281578, "grad_norm": 2.4172163062707157, "learning_rate": 1.8067156393434993e-06, "loss": 0.9809, "step": 20111 }, { "epoch": 0.7289333478308144, "grad_norm": 2.214436892648455, "learning_rate": 1.8062640248854185e-06, "loss": 0.8745, "step": 20112 }, { "epoch": 0.7289695915334711, "grad_norm": 2.3373281803890826, "learning_rate": 1.8058124544350842e-06, "loss": 0.961, "step": 20113 }, { "epoch": 0.7290058352361277, "grad_norm": 2.4286722898261157, "learning_rate": 1.8053609279987267e-06, "loss": 1.1864, "step": 20114 }, { "epoch": 0.7290420789387844, "grad_norm": 2.560443023082301, "learning_rate": 1.804909445582561e-06, "loss": 0.9659, "step": 20115 }, { "epoch": 0.729078322641441, "grad_norm": 2.484436220707773, "learning_rate": 1.8044580071928131e-06, "loss": 0.9526, "step": 20116 }, { "epoch": 0.7291145663440977, "grad_norm": 2.357882529977716, "learning_rate": 1.8040066128356976e-06, "loss": 0.71, "step": 20117 }, { "epoch": 0.7291508100467544, "grad_norm": 2.435362923961643, "learning_rate": 1.8035552625174413e-06, "loss": 0.7917, "step": 20118 }, { "epoch": 0.7291870537494111, "grad_norm": 2.3075528396793534, "learning_rate": 1.8031039562442588e-06, "loss": 0.9496, "step": 20119 }, { "epoch": 0.7292232974520677, "grad_norm": 2.3413910021397304, "learning_rate": 1.802652694022372e-06, "loss": 0.9494, "step": 20120 }, { "epoch": 0.7292595411547244, "grad_norm": 1.9870921426834127, "learning_rate": 1.802201475857996e-06, "loss": 0.9537, "step": 20121 }, { "epoch": 0.729295784857381, "grad_norm": 2.4959413401978137, "learning_rate": 1.8017503017573496e-06, "loss": 0.8785, "step": 20122 }, { "epoch": 0.7293320285600376, "grad_norm": 2.4057204065164317, "learning_rate": 1.8012991717266514e-06, "loss": 0.8561, "step": 20123 }, { "epoch": 0.7293682722626944, "grad_norm": 2.2249973550412365, "learning_rate": 1.8008480857721144e-06, "loss": 0.7915, "step": 20124 }, { "epoch": 0.729404515965351, "grad_norm": 2.2852666255134046, "learning_rate": 1.8003970438999563e-06, "loss": 0.9365, "step": 20125 }, { "epoch": 0.7294407596680077, "grad_norm": 2.594773578811688, "learning_rate": 1.7999460461163915e-06, "loss": 0.7825, "step": 20126 }, { "epoch": 0.7294770033706643, "grad_norm": 2.3648077667464675, "learning_rate": 1.7994950924276367e-06, "loss": 0.9036, "step": 20127 }, { "epoch": 0.729513247073321, "grad_norm": 2.457829410261597, "learning_rate": 1.7990441828399018e-06, "loss": 0.8221, "step": 20128 }, { "epoch": 0.7295494907759776, "grad_norm": 2.3053062009358576, "learning_rate": 1.7985933173594029e-06, "loss": 0.8311, "step": 20129 }, { "epoch": 0.7295857344786343, "grad_norm": 2.43485733443547, "learning_rate": 1.7981424959923532e-06, "loss": 1.0107, "step": 20130 }, { "epoch": 0.729621978181291, "grad_norm": 2.241783024508212, "learning_rate": 1.7976917187449616e-06, "loss": 1.0086, "step": 20131 }, { "epoch": 0.7296582218839477, "grad_norm": 2.2240807434171423, "learning_rate": 1.7972409856234436e-06, "loss": 0.7997, "step": 20132 }, { "epoch": 0.7296944655866043, "grad_norm": 2.116545875627534, "learning_rate": 1.796790296634004e-06, "loss": 0.746, "step": 20133 }, { "epoch": 0.729730709289261, "grad_norm": 2.5449142453361993, "learning_rate": 1.7963396517828607e-06, "loss": 0.8241, "step": 20134 }, { "epoch": 0.7297669529919176, "grad_norm": 2.471423442953293, "learning_rate": 1.7958890510762177e-06, "loss": 1.0094, "step": 20135 }, { "epoch": 0.7298031966945743, "grad_norm": 2.2573149795403853, "learning_rate": 1.7954384945202873e-06, "loss": 0.8417, "step": 20136 }, { "epoch": 0.729839440397231, "grad_norm": 2.3505127468893683, "learning_rate": 1.7949879821212729e-06, "loss": 0.7865, "step": 20137 }, { "epoch": 0.7298756840998877, "grad_norm": 2.2663075522049665, "learning_rate": 1.7945375138853893e-06, "loss": 0.8237, "step": 20138 }, { "epoch": 0.7299119278025443, "grad_norm": 2.1707933461595013, "learning_rate": 1.794087089818839e-06, "loss": 0.9926, "step": 20139 }, { "epoch": 0.729948171505201, "grad_norm": 2.3798961585245113, "learning_rate": 1.7936367099278307e-06, "loss": 0.8407, "step": 20140 }, { "epoch": 0.7299844152078576, "grad_norm": 2.121036073310344, "learning_rate": 1.793186374218568e-06, "loss": 0.923, "step": 20141 }, { "epoch": 0.7300206589105143, "grad_norm": 2.2584995504666066, "learning_rate": 1.7927360826972572e-06, "loss": 0.8433, "step": 20142 }, { "epoch": 0.7300569026131709, "grad_norm": 2.100577592918859, "learning_rate": 1.7922858353701061e-06, "loss": 0.9536, "step": 20143 }, { "epoch": 0.7300931463158277, "grad_norm": 2.379900355620354, "learning_rate": 1.7918356322433138e-06, "loss": 0.9088, "step": 20144 }, { "epoch": 0.7301293900184843, "grad_norm": 2.5730431800930083, "learning_rate": 1.7913854733230862e-06, "loss": 1.0167, "step": 20145 }, { "epoch": 0.730165633721141, "grad_norm": 2.289670773689464, "learning_rate": 1.790935358615627e-06, "loss": 0.644, "step": 20146 }, { "epoch": 0.7302018774237976, "grad_norm": 2.2571357777305194, "learning_rate": 1.790485288127139e-06, "loss": 0.8631, "step": 20147 }, { "epoch": 0.7302381211264543, "grad_norm": 2.4645094776695355, "learning_rate": 1.7900352618638206e-06, "loss": 0.9433, "step": 20148 }, { "epoch": 0.7302743648291109, "grad_norm": 2.5740848046203793, "learning_rate": 1.7895852798318753e-06, "loss": 0.7547, "step": 20149 }, { "epoch": 0.7303106085317677, "grad_norm": 2.2956273979902173, "learning_rate": 1.7891353420375052e-06, "loss": 0.9052, "step": 20150 }, { "epoch": 0.7303468522344243, "grad_norm": 2.4938259003460312, "learning_rate": 1.7886854484869066e-06, "loss": 0.9012, "step": 20151 }, { "epoch": 0.730383095937081, "grad_norm": 2.0567666755842, "learning_rate": 1.7882355991862816e-06, "loss": 0.6494, "step": 20152 }, { "epoch": 0.7304193396397376, "grad_norm": 2.167729844995364, "learning_rate": 1.7877857941418242e-06, "loss": 0.794, "step": 20153 }, { "epoch": 0.7304555833423942, "grad_norm": 2.4623323443381673, "learning_rate": 1.7873360333597395e-06, "loss": 0.9447, "step": 20154 }, { "epoch": 0.7304918270450509, "grad_norm": 2.442142108032111, "learning_rate": 1.7868863168462197e-06, "loss": 0.8339, "step": 20155 }, { "epoch": 0.7305280707477075, "grad_norm": 2.4375212781982527, "learning_rate": 1.7864366446074648e-06, "loss": 0.9848, "step": 20156 }, { "epoch": 0.7305643144503643, "grad_norm": 2.1883820023890777, "learning_rate": 1.7859870166496663e-06, "loss": 0.708, "step": 20157 }, { "epoch": 0.7306005581530209, "grad_norm": 2.351525835199538, "learning_rate": 1.785537432979026e-06, "loss": 0.8032, "step": 20158 }, { "epoch": 0.7306368018556776, "grad_norm": 2.488601863285092, "learning_rate": 1.785087893601734e-06, "loss": 0.9249, "step": 20159 }, { "epoch": 0.7306730455583342, "grad_norm": 2.3813911434062427, "learning_rate": 1.7846383985239885e-06, "loss": 0.8525, "step": 20160 }, { "epoch": 0.7307092892609909, "grad_norm": 2.363354927532364, "learning_rate": 1.7841889477519798e-06, "loss": 0.8455, "step": 20161 }, { "epoch": 0.7307455329636475, "grad_norm": 2.2760271943792967, "learning_rate": 1.7837395412919017e-06, "loss": 0.7922, "step": 20162 }, { "epoch": 0.7307817766663043, "grad_norm": 2.2914648781433753, "learning_rate": 1.7832901791499502e-06, "loss": 0.9054, "step": 20163 }, { "epoch": 0.7308180203689609, "grad_norm": 2.4839401811355617, "learning_rate": 1.7828408613323122e-06, "loss": 0.765, "step": 20164 }, { "epoch": 0.7308542640716176, "grad_norm": 2.349305557354704, "learning_rate": 1.782391587845182e-06, "loss": 0.9193, "step": 20165 }, { "epoch": 0.7308905077742742, "grad_norm": 2.3004856320182787, "learning_rate": 1.78194235869475e-06, "loss": 0.7358, "step": 20166 }, { "epoch": 0.7309267514769309, "grad_norm": 2.354442231926403, "learning_rate": 1.781493173887207e-06, "loss": 0.9831, "step": 20167 }, { "epoch": 0.7309629951795875, "grad_norm": 2.2198637202777336, "learning_rate": 1.7810440334287405e-06, "loss": 0.8639, "step": 20168 }, { "epoch": 0.7309992388822442, "grad_norm": 2.341045059017126, "learning_rate": 1.7805949373255399e-06, "loss": 0.7886, "step": 20169 }, { "epoch": 0.7310354825849009, "grad_norm": 2.2162034063400835, "learning_rate": 1.780145885583796e-06, "loss": 0.9307, "step": 20170 }, { "epoch": 0.7310717262875576, "grad_norm": 2.411213828266547, "learning_rate": 1.779696878209693e-06, "loss": 0.9228, "step": 20171 }, { "epoch": 0.7311079699902142, "grad_norm": 2.3119568190842417, "learning_rate": 1.7792479152094212e-06, "loss": 0.8401, "step": 20172 }, { "epoch": 0.7311442136928709, "grad_norm": 2.3160283394958836, "learning_rate": 1.7787989965891612e-06, "loss": 0.9051, "step": 20173 }, { "epoch": 0.7311804573955275, "grad_norm": 2.3661596739544497, "learning_rate": 1.778350122355107e-06, "loss": 0.9316, "step": 20174 }, { "epoch": 0.7312167010981842, "grad_norm": 2.9138962692235513, "learning_rate": 1.7779012925134375e-06, "loss": 0.9752, "step": 20175 }, { "epoch": 0.7312529448008408, "grad_norm": 2.4381332382116594, "learning_rate": 1.7774525070703413e-06, "loss": 0.8928, "step": 20176 }, { "epoch": 0.7312891885034976, "grad_norm": 2.535855365723449, "learning_rate": 1.7770037660319994e-06, "loss": 0.8358, "step": 20177 }, { "epoch": 0.7313254322061542, "grad_norm": 2.4455089326977357, "learning_rate": 1.7765550694045963e-06, "loss": 0.8699, "step": 20178 }, { "epoch": 0.7313616759088108, "grad_norm": 2.3913884704140305, "learning_rate": 1.776106417194316e-06, "loss": 1.0205, "step": 20179 }, { "epoch": 0.7313979196114675, "grad_norm": 2.4008874398071973, "learning_rate": 1.7756578094073385e-06, "loss": 0.9267, "step": 20180 }, { "epoch": 0.7314341633141241, "grad_norm": 2.2486254818360654, "learning_rate": 1.775209246049846e-06, "loss": 0.9289, "step": 20181 }, { "epoch": 0.7314704070167808, "grad_norm": 2.114441136161314, "learning_rate": 1.7747607271280198e-06, "loss": 0.8349, "step": 20182 }, { "epoch": 0.7315066507194375, "grad_norm": 2.1672540184510685, "learning_rate": 1.7743122526480421e-06, "loss": 0.8831, "step": 20183 }, { "epoch": 0.7315428944220942, "grad_norm": 2.4294909161589904, "learning_rate": 1.7738638226160887e-06, "loss": 0.88, "step": 20184 }, { "epoch": 0.7315791381247508, "grad_norm": 2.1651559647560497, "learning_rate": 1.773415437038341e-06, "loss": 0.7005, "step": 20185 }, { "epoch": 0.7316153818274075, "grad_norm": 2.0982479365864917, "learning_rate": 1.7729670959209772e-06, "loss": 1.0195, "step": 20186 }, { "epoch": 0.7316516255300641, "grad_norm": 2.2471692853057617, "learning_rate": 1.772518799270177e-06, "loss": 0.9498, "step": 20187 }, { "epoch": 0.7316878692327208, "grad_norm": 2.46633726866247, "learning_rate": 1.772070547092114e-06, "loss": 0.862, "step": 20188 }, { "epoch": 0.7317241129353774, "grad_norm": 2.5954568733332146, "learning_rate": 1.7716223393929665e-06, "loss": 0.7992, "step": 20189 }, { "epoch": 0.7317603566380342, "grad_norm": 2.460133528981713, "learning_rate": 1.7711741761789124e-06, "loss": 0.9308, "step": 20190 }, { "epoch": 0.7317966003406908, "grad_norm": 2.7532198949722586, "learning_rate": 1.7707260574561237e-06, "loss": 0.9847, "step": 20191 }, { "epoch": 0.7318328440433475, "grad_norm": 2.2333458682569582, "learning_rate": 1.770277983230777e-06, "loss": 0.7405, "step": 20192 }, { "epoch": 0.7318690877460041, "grad_norm": 2.415738158761236, "learning_rate": 1.7698299535090468e-06, "loss": 0.9703, "step": 20193 }, { "epoch": 0.7319053314486608, "grad_norm": 2.4416183645691882, "learning_rate": 1.7693819682971076e-06, "loss": 0.8025, "step": 20194 }, { "epoch": 0.7319415751513174, "grad_norm": 2.3265902592441097, "learning_rate": 1.7689340276011291e-06, "loss": 1.0358, "step": 20195 }, { "epoch": 0.7319778188539742, "grad_norm": 1.9904880611932225, "learning_rate": 1.768486131427286e-06, "loss": 0.778, "step": 20196 }, { "epoch": 0.7320140625566308, "grad_norm": 2.0299940715387272, "learning_rate": 1.7680382797817514e-06, "loss": 0.6973, "step": 20197 }, { "epoch": 0.7320503062592875, "grad_norm": 2.5622639221543837, "learning_rate": 1.7675904726706927e-06, "loss": 0.9161, "step": 20198 }, { "epoch": 0.7320865499619441, "grad_norm": 2.206883605467038, "learning_rate": 1.7671427101002848e-06, "loss": 0.8463, "step": 20199 }, { "epoch": 0.7321227936646008, "grad_norm": 2.27541323399268, "learning_rate": 1.7666949920766908e-06, "loss": 0.6853, "step": 20200 }, { "epoch": 0.7321590373672574, "grad_norm": 2.229396497306634, "learning_rate": 1.7662473186060885e-06, "loss": 1.0492, "step": 20201 }, { "epoch": 0.732195281069914, "grad_norm": 2.399154866295957, "learning_rate": 1.7657996896946406e-06, "loss": 0.8928, "step": 20202 }, { "epoch": 0.7322315247725708, "grad_norm": 2.364342015104844, "learning_rate": 1.7653521053485184e-06, "loss": 0.8273, "step": 20203 }, { "epoch": 0.7322677684752275, "grad_norm": 2.386864132395939, "learning_rate": 1.7649045655738845e-06, "loss": 0.8269, "step": 20204 }, { "epoch": 0.7323040121778841, "grad_norm": 2.600632878662867, "learning_rate": 1.7644570703769126e-06, "loss": 0.9031, "step": 20205 }, { "epoch": 0.7323402558805407, "grad_norm": 2.3198942273214467, "learning_rate": 1.7640096197637641e-06, "loss": 0.8184, "step": 20206 }, { "epoch": 0.7323764995831974, "grad_norm": 2.124795087600091, "learning_rate": 1.7635622137406077e-06, "loss": 0.8285, "step": 20207 }, { "epoch": 0.732412743285854, "grad_norm": 2.396811533988167, "learning_rate": 1.7631148523136044e-06, "loss": 0.6607, "step": 20208 }, { "epoch": 0.7324489869885108, "grad_norm": 2.1879297828351385, "learning_rate": 1.7626675354889211e-06, "loss": 0.98, "step": 20209 }, { "epoch": 0.7324852306911674, "grad_norm": 2.4125350758419137, "learning_rate": 1.7622202632727232e-06, "loss": 0.9874, "step": 20210 }, { "epoch": 0.7325214743938241, "grad_norm": 2.406868112644667, "learning_rate": 1.7617730356711704e-06, "loss": 0.8394, "step": 20211 }, { "epoch": 0.7325577180964807, "grad_norm": 2.406985691387892, "learning_rate": 1.7613258526904265e-06, "loss": 0.8983, "step": 20212 }, { "epoch": 0.7325939617991374, "grad_norm": 2.1768605853667924, "learning_rate": 1.7608787143366535e-06, "loss": 0.8198, "step": 20213 }, { "epoch": 0.732630205501794, "grad_norm": 2.5730824386919715, "learning_rate": 1.7604316206160143e-06, "loss": 0.9574, "step": 20214 }, { "epoch": 0.7326664492044507, "grad_norm": 2.7744247198083523, "learning_rate": 1.759984571534667e-06, "loss": 1.058, "step": 20215 }, { "epoch": 0.7327026929071074, "grad_norm": 2.3898035043209656, "learning_rate": 1.759537567098773e-06, "loss": 0.9043, "step": 20216 }, { "epoch": 0.7327389366097641, "grad_norm": 2.298098188450119, "learning_rate": 1.7590906073144925e-06, "loss": 0.7908, "step": 20217 }, { "epoch": 0.7327751803124207, "grad_norm": 2.2966524559638777, "learning_rate": 1.7586436921879823e-06, "loss": 0.8202, "step": 20218 }, { "epoch": 0.7328114240150774, "grad_norm": 2.5399932145620436, "learning_rate": 1.758196821725403e-06, "loss": 0.9719, "step": 20219 }, { "epoch": 0.732847667717734, "grad_norm": 2.4892372701940175, "learning_rate": 1.7577499959329076e-06, "loss": 0.7895, "step": 20220 }, { "epoch": 0.7328839114203907, "grad_norm": 2.441005915183369, "learning_rate": 1.7573032148166602e-06, "loss": 0.8401, "step": 20221 }, { "epoch": 0.7329201551230474, "grad_norm": 2.591582873626535, "learning_rate": 1.7568564783828112e-06, "loss": 0.8543, "step": 20222 }, { "epoch": 0.7329563988257041, "grad_norm": 2.4432614732366176, "learning_rate": 1.7564097866375208e-06, "loss": 0.7424, "step": 20223 }, { "epoch": 0.7329926425283607, "grad_norm": 2.6240005310040098, "learning_rate": 1.7559631395869375e-06, "loss": 0.9495, "step": 20224 }, { "epoch": 0.7330288862310174, "grad_norm": 2.797630741924754, "learning_rate": 1.7555165372372246e-06, "loss": 0.858, "step": 20225 }, { "epoch": 0.733065129933674, "grad_norm": 2.5331495416597627, "learning_rate": 1.7550699795945292e-06, "loss": 0.672, "step": 20226 }, { "epoch": 0.7331013736363307, "grad_norm": 2.3269989238926327, "learning_rate": 1.754623466665009e-06, "loss": 0.8129, "step": 20227 }, { "epoch": 0.7331376173389873, "grad_norm": 2.457042042886833, "learning_rate": 1.754176998454813e-06, "loss": 0.9221, "step": 20228 }, { "epoch": 0.7331738610416441, "grad_norm": 2.559884500881348, "learning_rate": 1.753730574970095e-06, "loss": 0.838, "step": 20229 }, { "epoch": 0.7332101047443007, "grad_norm": 2.505367528451045, "learning_rate": 1.7532841962170082e-06, "loss": 1.0997, "step": 20230 }, { "epoch": 0.7332463484469574, "grad_norm": 2.4288849396526806, "learning_rate": 1.7528378622017e-06, "loss": 0.8857, "step": 20231 }, { "epoch": 0.733282592149614, "grad_norm": 2.0078372338104984, "learning_rate": 1.752391572930322e-06, "loss": 0.7649, "step": 20232 }, { "epoch": 0.7333188358522706, "grad_norm": 2.2640720303664654, "learning_rate": 1.7519453284090237e-06, "loss": 0.9073, "step": 20233 }, { "epoch": 0.7333550795549273, "grad_norm": 2.288018039157091, "learning_rate": 1.7514991286439565e-06, "loss": 0.8134, "step": 20234 }, { "epoch": 0.733391323257584, "grad_norm": 2.1189518702544903, "learning_rate": 1.7510529736412652e-06, "loss": 0.8849, "step": 20235 }, { "epoch": 0.7334275669602407, "grad_norm": 2.388954391375517, "learning_rate": 1.7506068634070989e-06, "loss": 0.9154, "step": 20236 }, { "epoch": 0.7334638106628973, "grad_norm": 2.3639507005863973, "learning_rate": 1.7501607979476064e-06, "loss": 0.8592, "step": 20237 }, { "epoch": 0.733500054365554, "grad_norm": 2.5563888781731388, "learning_rate": 1.749714777268931e-06, "loss": 0.9422, "step": 20238 }, { "epoch": 0.7335362980682106, "grad_norm": 2.445534566312087, "learning_rate": 1.7492688013772218e-06, "loss": 0.8953, "step": 20239 }, { "epoch": 0.7335725417708673, "grad_norm": 2.285935612387166, "learning_rate": 1.7488228702786193e-06, "loss": 0.8794, "step": 20240 }, { "epoch": 0.7336087854735239, "grad_norm": 2.4862220272609696, "learning_rate": 1.7483769839792746e-06, "loss": 0.936, "step": 20241 }, { "epoch": 0.7336450291761807, "grad_norm": 2.65985810407546, "learning_rate": 1.7479311424853273e-06, "loss": 0.8076, "step": 20242 }, { "epoch": 0.7336812728788373, "grad_norm": 2.359803441837827, "learning_rate": 1.7474853458029234e-06, "loss": 0.8825, "step": 20243 }, { "epoch": 0.733717516581494, "grad_norm": 2.7084361156493544, "learning_rate": 1.7470395939382011e-06, "loss": 0.928, "step": 20244 }, { "epoch": 0.7337537602841506, "grad_norm": 2.360966571605847, "learning_rate": 1.7465938868973092e-06, "loss": 0.7525, "step": 20245 }, { "epoch": 0.7337900039868073, "grad_norm": 2.1647162474453743, "learning_rate": 1.7461482246863842e-06, "loss": 0.9391, "step": 20246 }, { "epoch": 0.7338262476894639, "grad_norm": 2.1252030644661426, "learning_rate": 1.7457026073115708e-06, "loss": 0.857, "step": 20247 }, { "epoch": 0.7338624913921206, "grad_norm": 2.150479438876882, "learning_rate": 1.7452570347790054e-06, "loss": 0.722, "step": 20248 }, { "epoch": 0.7338987350947773, "grad_norm": 2.309502763006078, "learning_rate": 1.7448115070948295e-06, "loss": 0.9103, "step": 20249 }, { "epoch": 0.733934978797434, "grad_norm": 2.225893099592368, "learning_rate": 1.7443660242651844e-06, "loss": 0.8494, "step": 20250 }, { "epoch": 0.7339712225000906, "grad_norm": 2.1623836333808355, "learning_rate": 1.7439205862962043e-06, "loss": 0.8887, "step": 20251 }, { "epoch": 0.7340074662027473, "grad_norm": 2.491769480348752, "learning_rate": 1.7434751931940303e-06, "loss": 0.7765, "step": 20252 }, { "epoch": 0.7340437099054039, "grad_norm": 2.18598346466243, "learning_rate": 1.7430298449647976e-06, "loss": 1.0045, "step": 20253 }, { "epoch": 0.7340799536080606, "grad_norm": 2.3669765124010573, "learning_rate": 1.7425845416146465e-06, "loss": 0.8331, "step": 20254 }, { "epoch": 0.7341161973107173, "grad_norm": 2.470339321726011, "learning_rate": 1.742139283149708e-06, "loss": 0.8089, "step": 20255 }, { "epoch": 0.734152441013374, "grad_norm": 2.5869196548583506, "learning_rate": 1.7416940695761198e-06, "loss": 0.9078, "step": 20256 }, { "epoch": 0.7341886847160306, "grad_norm": 2.2960777645965678, "learning_rate": 1.7412489009000188e-06, "loss": 0.9257, "step": 20257 }, { "epoch": 0.7342249284186873, "grad_norm": 2.145077492680597, "learning_rate": 1.7408037771275354e-06, "loss": 0.9753, "step": 20258 }, { "epoch": 0.7342611721213439, "grad_norm": 2.208017579828003, "learning_rate": 1.7403586982648068e-06, "loss": 0.8864, "step": 20259 }, { "epoch": 0.7342974158240005, "grad_norm": 2.4100249787300934, "learning_rate": 1.7399136643179604e-06, "loss": 0.857, "step": 20260 }, { "epoch": 0.7343336595266572, "grad_norm": 2.586228225295316, "learning_rate": 1.739468675293135e-06, "loss": 0.8432, "step": 20261 }, { "epoch": 0.734369903229314, "grad_norm": 2.337496551928583, "learning_rate": 1.739023731196458e-06, "loss": 0.9552, "step": 20262 }, { "epoch": 0.7344061469319706, "grad_norm": 2.5436114337104385, "learning_rate": 1.7385788320340635e-06, "loss": 0.9725, "step": 20263 }, { "epoch": 0.7344423906346272, "grad_norm": 2.5624958065759924, "learning_rate": 1.7381339778120787e-06, "loss": 0.8732, "step": 20264 }, { "epoch": 0.7344786343372839, "grad_norm": 2.342358192167675, "learning_rate": 1.7376891685366347e-06, "loss": 0.9066, "step": 20265 }, { "epoch": 0.7345148780399405, "grad_norm": 2.573137635063922, "learning_rate": 1.7372444042138631e-06, "loss": 0.9142, "step": 20266 }, { "epoch": 0.7345511217425972, "grad_norm": 2.1982426079415394, "learning_rate": 1.736799684849888e-06, "loss": 0.6966, "step": 20267 }, { "epoch": 0.7345873654452539, "grad_norm": 2.5833793626458257, "learning_rate": 1.7363550104508403e-06, "loss": 0.8742, "step": 20268 }, { "epoch": 0.7346236091479106, "grad_norm": 2.5352322579664976, "learning_rate": 1.7359103810228466e-06, "loss": 0.8577, "step": 20269 }, { "epoch": 0.7346598528505672, "grad_norm": 2.0185532592757003, "learning_rate": 1.7354657965720356e-06, "loss": 0.7266, "step": 20270 }, { "epoch": 0.7346960965532239, "grad_norm": 2.122460841456101, "learning_rate": 1.7350212571045277e-06, "loss": 0.756, "step": 20271 }, { "epoch": 0.7347323402558805, "grad_norm": 2.1265479785303585, "learning_rate": 1.7345767626264565e-06, "loss": 1.0145, "step": 20272 }, { "epoch": 0.7347685839585372, "grad_norm": 2.6166952629407048, "learning_rate": 1.7341323131439402e-06, "loss": 0.9501, "step": 20273 }, { "epoch": 0.7348048276611938, "grad_norm": 2.6571325801020156, "learning_rate": 1.7336879086631076e-06, "loss": 0.9777, "step": 20274 }, { "epoch": 0.7348410713638506, "grad_norm": 2.542258898768675, "learning_rate": 1.7332435491900785e-06, "loss": 0.8262, "step": 20275 }, { "epoch": 0.7348773150665072, "grad_norm": 2.4699272730940676, "learning_rate": 1.7327992347309775e-06, "loss": 0.8874, "step": 20276 }, { "epoch": 0.7349135587691639, "grad_norm": 2.4159723713988064, "learning_rate": 1.7323549652919286e-06, "loss": 0.9272, "step": 20277 }, { "epoch": 0.7349498024718205, "grad_norm": 2.453914735873416, "learning_rate": 1.7319107408790509e-06, "loss": 0.9254, "step": 20278 }, { "epoch": 0.7349860461744772, "grad_norm": 2.7071364896255727, "learning_rate": 1.731466561498466e-06, "loss": 0.9077, "step": 20279 }, { "epoch": 0.7350222898771338, "grad_norm": 2.1766617946419866, "learning_rate": 1.731022427156296e-06, "loss": 0.7264, "step": 20280 }, { "epoch": 0.7350585335797906, "grad_norm": 2.514717542246945, "learning_rate": 1.730578337858661e-06, "loss": 0.8549, "step": 20281 }, { "epoch": 0.7350947772824472, "grad_norm": 2.24973569415633, "learning_rate": 1.7301342936116778e-06, "loss": 0.9283, "step": 20282 }, { "epoch": 0.7351310209851039, "grad_norm": 2.5366134250150596, "learning_rate": 1.729690294421466e-06, "loss": 0.9561, "step": 20283 }, { "epoch": 0.7351672646877605, "grad_norm": 2.6050294266262046, "learning_rate": 1.7292463402941462e-06, "loss": 0.8271, "step": 20284 }, { "epoch": 0.7352035083904171, "grad_norm": 2.295925793144082, "learning_rate": 1.728802431235832e-06, "loss": 0.8766, "step": 20285 }, { "epoch": 0.7352397520930738, "grad_norm": 2.1713863380112692, "learning_rate": 1.7283585672526437e-06, "loss": 0.853, "step": 20286 }, { "epoch": 0.7352759957957304, "grad_norm": 2.34201335364229, "learning_rate": 1.7279147483506925e-06, "loss": 0.8643, "step": 20287 }, { "epoch": 0.7353122394983872, "grad_norm": 2.3243378875863097, "learning_rate": 1.7274709745361006e-06, "loss": 1.0925, "step": 20288 }, { "epoch": 0.7353484832010438, "grad_norm": 2.458165001722924, "learning_rate": 1.7270272458149772e-06, "loss": 0.9087, "step": 20289 }, { "epoch": 0.7353847269037005, "grad_norm": 2.697098504189277, "learning_rate": 1.7265835621934414e-06, "loss": 0.8643, "step": 20290 }, { "epoch": 0.7354209706063571, "grad_norm": 2.461722708162743, "learning_rate": 1.7261399236776005e-06, "loss": 0.9533, "step": 20291 }, { "epoch": 0.7354572143090138, "grad_norm": 2.3470777350642402, "learning_rate": 1.7256963302735752e-06, "loss": 0.8728, "step": 20292 }, { "epoch": 0.7354934580116704, "grad_norm": 2.46761521226312, "learning_rate": 1.725252781987472e-06, "loss": 0.8799, "step": 20293 }, { "epoch": 0.7355297017143272, "grad_norm": 2.2357756673768088, "learning_rate": 1.7248092788254073e-06, "loss": 0.9436, "step": 20294 }, { "epoch": 0.7355659454169838, "grad_norm": 2.300635862263108, "learning_rate": 1.7243658207934876e-06, "loss": 0.7688, "step": 20295 }, { "epoch": 0.7356021891196405, "grad_norm": 2.202727168390427, "learning_rate": 1.7239224078978268e-06, "loss": 0.8869, "step": 20296 }, { "epoch": 0.7356384328222971, "grad_norm": 2.277378284769127, "learning_rate": 1.7234790401445344e-06, "loss": 1.0231, "step": 20297 }, { "epoch": 0.7356746765249538, "grad_norm": 2.3427223282125413, "learning_rate": 1.7230357175397183e-06, "loss": 1.0325, "step": 20298 }, { "epoch": 0.7357109202276104, "grad_norm": 2.0997913126228793, "learning_rate": 1.7225924400894883e-06, "loss": 1.0264, "step": 20299 }, { "epoch": 0.7357471639302671, "grad_norm": 2.2006216883031606, "learning_rate": 1.7221492077999518e-06, "loss": 1.0044, "step": 20300 }, { "epoch": 0.7357834076329238, "grad_norm": 2.1553705576432782, "learning_rate": 1.7217060206772191e-06, "loss": 0.7136, "step": 20301 }, { "epoch": 0.7358196513355805, "grad_norm": 2.3729730418403725, "learning_rate": 1.7212628787273927e-06, "loss": 0.8656, "step": 20302 }, { "epoch": 0.7358558950382371, "grad_norm": 2.5385552359459056, "learning_rate": 1.7208197819565814e-06, "loss": 0.888, "step": 20303 }, { "epoch": 0.7358921387408938, "grad_norm": 2.306262250588313, "learning_rate": 1.7203767303708913e-06, "loss": 0.7971, "step": 20304 }, { "epoch": 0.7359283824435504, "grad_norm": 2.444980959793209, "learning_rate": 1.7199337239764251e-06, "loss": 0.8347, "step": 20305 }, { "epoch": 0.7359646261462071, "grad_norm": 2.4695037239823585, "learning_rate": 1.71949076277929e-06, "loss": 0.9569, "step": 20306 }, { "epoch": 0.7360008698488638, "grad_norm": 2.1489902221095694, "learning_rate": 1.7190478467855853e-06, "loss": 0.9297, "step": 20307 }, { "epoch": 0.7360371135515205, "grad_norm": 2.4245785942343367, "learning_rate": 1.71860497600142e-06, "loss": 1.0762, "step": 20308 }, { "epoch": 0.7360733572541771, "grad_norm": 2.617368681064589, "learning_rate": 1.7181621504328921e-06, "loss": 0.9155, "step": 20309 }, { "epoch": 0.7361096009568338, "grad_norm": 2.3073689841854486, "learning_rate": 1.717719370086106e-06, "loss": 0.8096, "step": 20310 }, { "epoch": 0.7361458446594904, "grad_norm": 2.2838951637979235, "learning_rate": 1.7172766349671594e-06, "loss": 0.8519, "step": 20311 }, { "epoch": 0.736182088362147, "grad_norm": 2.7098564236170253, "learning_rate": 1.7168339450821586e-06, "loss": 0.8855, "step": 20312 }, { "epoch": 0.7362183320648037, "grad_norm": 2.2617943122127446, "learning_rate": 1.7163913004371986e-06, "loss": 0.8173, "step": 20313 }, { "epoch": 0.7362545757674605, "grad_norm": 2.423707327603947, "learning_rate": 1.7159487010383824e-06, "loss": 0.893, "step": 20314 }, { "epoch": 0.7362908194701171, "grad_norm": 2.2878674513469908, "learning_rate": 1.715506146891805e-06, "loss": 0.9398, "step": 20315 }, { "epoch": 0.7363270631727737, "grad_norm": 2.3146874986789263, "learning_rate": 1.7150636380035668e-06, "loss": 1.0438, "step": 20316 }, { "epoch": 0.7363633068754304, "grad_norm": 2.377421208249288, "learning_rate": 1.7146211743797664e-06, "loss": 0.8972, "step": 20317 }, { "epoch": 0.736399550578087, "grad_norm": 2.257938765677445, "learning_rate": 1.7141787560264982e-06, "loss": 0.7899, "step": 20318 }, { "epoch": 0.7364357942807437, "grad_norm": 2.251360459567889, "learning_rate": 1.7137363829498588e-06, "loss": 0.8208, "step": 20319 }, { "epoch": 0.7364720379834003, "grad_norm": 2.6827814363213283, "learning_rate": 1.7132940551559452e-06, "loss": 0.7883, "step": 20320 }, { "epoch": 0.7365082816860571, "grad_norm": 2.3995143280676703, "learning_rate": 1.712851772650853e-06, "loss": 1.0001, "step": 20321 }, { "epoch": 0.7365445253887137, "grad_norm": 2.210470427461488, "learning_rate": 1.7124095354406738e-06, "loss": 0.7664, "step": 20322 }, { "epoch": 0.7365807690913704, "grad_norm": 2.273336930439491, "learning_rate": 1.7119673435315031e-06, "loss": 0.8705, "step": 20323 }, { "epoch": 0.736617012794027, "grad_norm": 2.5040770135911488, "learning_rate": 1.7115251969294355e-06, "loss": 0.8093, "step": 20324 }, { "epoch": 0.7366532564966837, "grad_norm": 2.1863504906991515, "learning_rate": 1.71108309564056e-06, "loss": 0.7409, "step": 20325 }, { "epoch": 0.7366895001993403, "grad_norm": 2.489419753938237, "learning_rate": 1.7106410396709722e-06, "loss": 0.8397, "step": 20326 }, { "epoch": 0.7367257439019971, "grad_norm": 2.3523136319751727, "learning_rate": 1.7101990290267583e-06, "loss": 0.8112, "step": 20327 }, { "epoch": 0.7367619876046537, "grad_norm": 2.30741837740494, "learning_rate": 1.7097570637140154e-06, "loss": 0.8796, "step": 20328 }, { "epoch": 0.7367982313073104, "grad_norm": 2.0520569700195006, "learning_rate": 1.7093151437388288e-06, "loss": 0.7044, "step": 20329 }, { "epoch": 0.736834475009967, "grad_norm": 2.175017195524556, "learning_rate": 1.7088732691072907e-06, "loss": 0.9482, "step": 20330 }, { "epoch": 0.7368707187126237, "grad_norm": 2.2895437485837724, "learning_rate": 1.7084314398254853e-06, "loss": 0.91, "step": 20331 }, { "epoch": 0.7369069624152803, "grad_norm": 2.265075553692219, "learning_rate": 1.7079896558995073e-06, "loss": 0.814, "step": 20332 }, { "epoch": 0.736943206117937, "grad_norm": 2.3602582151936238, "learning_rate": 1.7075479173354397e-06, "loss": 0.9492, "step": 20333 }, { "epoch": 0.7369794498205937, "grad_norm": 2.4742335080448297, "learning_rate": 1.7071062241393715e-06, "loss": 0.9669, "step": 20334 }, { "epoch": 0.7370156935232504, "grad_norm": 2.205841849888431, "learning_rate": 1.7066645763173872e-06, "loss": 0.793, "step": 20335 }, { "epoch": 0.737051937225907, "grad_norm": 2.2536509931678266, "learning_rate": 1.7062229738755725e-06, "loss": 0.7856, "step": 20336 }, { "epoch": 0.7370881809285637, "grad_norm": 2.262206893336334, "learning_rate": 1.7057814168200153e-06, "loss": 0.7985, "step": 20337 }, { "epoch": 0.7371244246312203, "grad_norm": 2.7718851900981902, "learning_rate": 1.7053399051567964e-06, "loss": 0.9625, "step": 20338 }, { "epoch": 0.737160668333877, "grad_norm": 2.2956666779675143, "learning_rate": 1.7048984388920009e-06, "loss": 0.7353, "step": 20339 }, { "epoch": 0.7371969120365337, "grad_norm": 2.6110554422459957, "learning_rate": 1.7044570180317122e-06, "loss": 0.8546, "step": 20340 }, { "epoch": 0.7372331557391903, "grad_norm": 2.22117513675643, "learning_rate": 1.7040156425820136e-06, "loss": 0.8915, "step": 20341 }, { "epoch": 0.737269399441847, "grad_norm": 2.264456661283612, "learning_rate": 1.703574312548985e-06, "loss": 0.9043, "step": 20342 }, { "epoch": 0.7373056431445036, "grad_norm": 2.2723310319029073, "learning_rate": 1.7031330279387093e-06, "loss": 0.738, "step": 20343 }, { "epoch": 0.7373418868471603, "grad_norm": 2.719250909260926, "learning_rate": 1.7026917887572674e-06, "loss": 0.8731, "step": 20344 }, { "epoch": 0.7373781305498169, "grad_norm": 2.0745983491247157, "learning_rate": 1.702250595010737e-06, "loss": 0.7713, "step": 20345 }, { "epoch": 0.7374143742524736, "grad_norm": 2.5814103797588244, "learning_rate": 1.701809446705201e-06, "loss": 0.7443, "step": 20346 }, { "epoch": 0.7374506179551303, "grad_norm": 2.39973136970109, "learning_rate": 1.7013683438467326e-06, "loss": 0.9025, "step": 20347 }, { "epoch": 0.737486861657787, "grad_norm": 2.1160690126133854, "learning_rate": 1.7009272864414172e-06, "loss": 0.7044, "step": 20348 }, { "epoch": 0.7375231053604436, "grad_norm": 2.3303369685743185, "learning_rate": 1.7004862744953265e-06, "loss": 0.8778, "step": 20349 }, { "epoch": 0.7375593490631003, "grad_norm": 2.298751514011911, "learning_rate": 1.7000453080145413e-06, "loss": 0.7606, "step": 20350 }, { "epoch": 0.7375955927657569, "grad_norm": 2.563152765026942, "learning_rate": 1.6996043870051342e-06, "loss": 0.9619, "step": 20351 }, { "epoch": 0.7376318364684136, "grad_norm": 2.337616894584584, "learning_rate": 1.6991635114731825e-06, "loss": 0.9431, "step": 20352 }, { "epoch": 0.7376680801710703, "grad_norm": 2.266956950069574, "learning_rate": 1.6987226814247632e-06, "loss": 0.9023, "step": 20353 }, { "epoch": 0.737704323873727, "grad_norm": 2.4880401471352025, "learning_rate": 1.6982818968659454e-06, "loss": 0.9153, "step": 20354 }, { "epoch": 0.7377405675763836, "grad_norm": 2.67585023525754, "learning_rate": 1.6978411578028098e-06, "loss": 0.8923, "step": 20355 }, { "epoch": 0.7377768112790403, "grad_norm": 2.312456615186813, "learning_rate": 1.6974004642414237e-06, "loss": 0.9724, "step": 20356 }, { "epoch": 0.7378130549816969, "grad_norm": 2.1619626785943313, "learning_rate": 1.6969598161878642e-06, "loss": 0.9267, "step": 20357 }, { "epoch": 0.7378492986843536, "grad_norm": 2.459504581730602, "learning_rate": 1.6965192136481974e-06, "loss": 0.8415, "step": 20358 }, { "epoch": 0.7378855423870102, "grad_norm": 2.2806739854801266, "learning_rate": 1.6960786566285008e-06, "loss": 0.8909, "step": 20359 }, { "epoch": 0.737921786089667, "grad_norm": 2.227328161663063, "learning_rate": 1.6956381451348408e-06, "loss": 0.8342, "step": 20360 }, { "epoch": 0.7379580297923236, "grad_norm": 2.212161763692498, "learning_rate": 1.6951976791732905e-06, "loss": 0.7683, "step": 20361 }, { "epoch": 0.7379942734949803, "grad_norm": 2.4528173133243323, "learning_rate": 1.6947572587499156e-06, "loss": 0.8945, "step": 20362 }, { "epoch": 0.7380305171976369, "grad_norm": 2.345869120215984, "learning_rate": 1.6943168838707869e-06, "loss": 0.9072, "step": 20363 }, { "epoch": 0.7380667609002936, "grad_norm": 1.98169297273402, "learning_rate": 1.6938765545419739e-06, "loss": 0.7721, "step": 20364 }, { "epoch": 0.7381030046029502, "grad_norm": 2.1342483967667136, "learning_rate": 1.693436270769541e-06, "loss": 0.7835, "step": 20365 }, { "epoch": 0.738139248305607, "grad_norm": 2.12879327116889, "learning_rate": 1.6929960325595562e-06, "loss": 0.7895, "step": 20366 }, { "epoch": 0.7381754920082636, "grad_norm": 2.54752387677323, "learning_rate": 1.6925558399180864e-06, "loss": 0.6903, "step": 20367 }, { "epoch": 0.7382117357109202, "grad_norm": 2.414257919921765, "learning_rate": 1.6921156928511983e-06, "loss": 1.0149, "step": 20368 }, { "epoch": 0.7382479794135769, "grad_norm": 2.3318387713070994, "learning_rate": 1.691675591364954e-06, "loss": 0.9434, "step": 20369 }, { "epoch": 0.7382842231162335, "grad_norm": 2.68519910217107, "learning_rate": 1.6912355354654192e-06, "loss": 0.8696, "step": 20370 }, { "epoch": 0.7383204668188902, "grad_norm": 2.1132313106178975, "learning_rate": 1.6907955251586594e-06, "loss": 0.7802, "step": 20371 }, { "epoch": 0.7383567105215468, "grad_norm": 2.287755865751937, "learning_rate": 1.6903555604507339e-06, "loss": 0.8981, "step": 20372 }, { "epoch": 0.7383929542242036, "grad_norm": 2.2975237871173317, "learning_rate": 1.6899156413477091e-06, "loss": 0.8566, "step": 20373 }, { "epoch": 0.7384291979268602, "grad_norm": 2.341324662019615, "learning_rate": 1.689475767855641e-06, "loss": 0.7997, "step": 20374 }, { "epoch": 0.7384654416295169, "grad_norm": 2.5762492493369202, "learning_rate": 1.689035939980599e-06, "loss": 0.7416, "step": 20375 }, { "epoch": 0.7385016853321735, "grad_norm": 2.4050061128579556, "learning_rate": 1.688596157728637e-06, "loss": 0.8918, "step": 20376 }, { "epoch": 0.7385379290348302, "grad_norm": 2.1483558246892334, "learning_rate": 1.6881564211058188e-06, "loss": 0.7504, "step": 20377 }, { "epoch": 0.7385741727374868, "grad_norm": 2.4665021946428527, "learning_rate": 1.687716730118199e-06, "loss": 0.8925, "step": 20378 }, { "epoch": 0.7386104164401436, "grad_norm": 2.5726738798924216, "learning_rate": 1.6872770847718433e-06, "loss": 0.9697, "step": 20379 }, { "epoch": 0.7386466601428002, "grad_norm": 2.317124575183665, "learning_rate": 1.6868374850728036e-06, "loss": 0.9022, "step": 20380 }, { "epoch": 0.7386829038454569, "grad_norm": 2.5520008370105027, "learning_rate": 1.6863979310271418e-06, "loss": 0.788, "step": 20381 }, { "epoch": 0.7387191475481135, "grad_norm": 2.237471056187667, "learning_rate": 1.6859584226409104e-06, "loss": 0.9945, "step": 20382 }, { "epoch": 0.7387553912507702, "grad_norm": 2.414116054141455, "learning_rate": 1.6855189599201677e-06, "loss": 0.8853, "step": 20383 }, { "epoch": 0.7387916349534268, "grad_norm": 2.4363077489649396, "learning_rate": 1.685079542870971e-06, "loss": 0.9277, "step": 20384 }, { "epoch": 0.7388278786560835, "grad_norm": 2.3018664283377515, "learning_rate": 1.6846401714993715e-06, "loss": 0.8793, "step": 20385 }, { "epoch": 0.7388641223587402, "grad_norm": 2.1398128562673224, "learning_rate": 1.6842008458114257e-06, "loss": 0.8514, "step": 20386 }, { "epoch": 0.7389003660613969, "grad_norm": 2.3325655643925187, "learning_rate": 1.6837615658131867e-06, "loss": 0.8055, "step": 20387 }, { "epoch": 0.7389366097640535, "grad_norm": 2.2869547033025204, "learning_rate": 1.68332233151071e-06, "loss": 0.9536, "step": 20388 }, { "epoch": 0.7389728534667102, "grad_norm": 2.5723416914462196, "learning_rate": 1.6828831429100434e-06, "loss": 0.9885, "step": 20389 }, { "epoch": 0.7390090971693668, "grad_norm": 2.5166215746208134, "learning_rate": 1.6824440000172409e-06, "loss": 0.8616, "step": 20390 }, { "epoch": 0.7390453408720234, "grad_norm": 2.256947652801442, "learning_rate": 1.6820049028383557e-06, "loss": 0.748, "step": 20391 }, { "epoch": 0.7390815845746801, "grad_norm": 2.62212624903098, "learning_rate": 1.6815658513794348e-06, "loss": 0.9404, "step": 20392 }, { "epoch": 0.7391178282773369, "grad_norm": 2.4476448987154953, "learning_rate": 1.6811268456465307e-06, "loss": 0.9392, "step": 20393 }, { "epoch": 0.7391540719799935, "grad_norm": 2.037877673567238, "learning_rate": 1.6806878856456882e-06, "loss": 0.6047, "step": 20394 }, { "epoch": 0.7391903156826501, "grad_norm": 2.4540322330375157, "learning_rate": 1.6802489713829628e-06, "loss": 0.8371, "step": 20395 }, { "epoch": 0.7392265593853068, "grad_norm": 2.10711990453351, "learning_rate": 1.6798101028643971e-06, "loss": 0.7518, "step": 20396 }, { "epoch": 0.7392628030879634, "grad_norm": 2.424344725185478, "learning_rate": 1.6793712800960416e-06, "loss": 0.8513, "step": 20397 }, { "epoch": 0.7392990467906201, "grad_norm": 2.380209729949561, "learning_rate": 1.6789325030839388e-06, "loss": 0.7504, "step": 20398 }, { "epoch": 0.7393352904932768, "grad_norm": 2.426225517558753, "learning_rate": 1.6784937718341403e-06, "loss": 0.9522, "step": 20399 }, { "epoch": 0.7393715341959335, "grad_norm": 1.971992910017637, "learning_rate": 1.6780550863526874e-06, "loss": 0.8279, "step": 20400 }, { "epoch": 0.7394077778985901, "grad_norm": 2.2025834537411306, "learning_rate": 1.677616446645628e-06, "loss": 0.9245, "step": 20401 }, { "epoch": 0.7394440216012468, "grad_norm": 2.346011001414013, "learning_rate": 1.677177852719003e-06, "loss": 0.9608, "step": 20402 }, { "epoch": 0.7394802653039034, "grad_norm": 2.429247073773672, "learning_rate": 1.6767393045788577e-06, "loss": 0.82, "step": 20403 }, { "epoch": 0.7395165090065601, "grad_norm": 2.556357477729579, "learning_rate": 1.6763008022312365e-06, "loss": 1.009, "step": 20404 }, { "epoch": 0.7395527527092167, "grad_norm": 2.49088313292862, "learning_rate": 1.6758623456821787e-06, "loss": 0.8843, "step": 20405 }, { "epoch": 0.7395889964118735, "grad_norm": 2.146528471462298, "learning_rate": 1.6754239349377276e-06, "loss": 0.7131, "step": 20406 }, { "epoch": 0.7396252401145301, "grad_norm": 2.458837038678676, "learning_rate": 1.6749855700039237e-06, "loss": 1.1791, "step": 20407 }, { "epoch": 0.7396614838171868, "grad_norm": 2.335589984581966, "learning_rate": 1.6745472508868093e-06, "loss": 0.7879, "step": 20408 }, { "epoch": 0.7396977275198434, "grad_norm": 2.3935770767096596, "learning_rate": 1.6741089775924212e-06, "loss": 1.1139, "step": 20409 }, { "epoch": 0.7397339712225001, "grad_norm": 2.6953019006457537, "learning_rate": 1.6736707501267995e-06, "loss": 0.98, "step": 20410 }, { "epoch": 0.7397702149251567, "grad_norm": 2.3883954763473723, "learning_rate": 1.6732325684959855e-06, "loss": 0.9944, "step": 20411 }, { "epoch": 0.7398064586278135, "grad_norm": 2.487607861257476, "learning_rate": 1.6727944327060126e-06, "loss": 1.0671, "step": 20412 }, { "epoch": 0.7398427023304701, "grad_norm": 2.195925452106188, "learning_rate": 1.6723563427629218e-06, "loss": 0.9896, "step": 20413 }, { "epoch": 0.7398789460331268, "grad_norm": 2.437306573106286, "learning_rate": 1.671918298672745e-06, "loss": 0.9874, "step": 20414 }, { "epoch": 0.7399151897357834, "grad_norm": 2.355785311334208, "learning_rate": 1.6714803004415247e-06, "loss": 0.8148, "step": 20415 }, { "epoch": 0.73995143343844, "grad_norm": 2.350027095602089, "learning_rate": 1.6710423480752907e-06, "loss": 0.947, "step": 20416 }, { "epoch": 0.7399876771410967, "grad_norm": 2.500781468982004, "learning_rate": 1.670604441580082e-06, "loss": 0.7832, "step": 20417 }, { "epoch": 0.7400239208437533, "grad_norm": 2.474044564803443, "learning_rate": 1.6701665809619273e-06, "loss": 0.8939, "step": 20418 }, { "epoch": 0.7400601645464101, "grad_norm": 2.444727785585848, "learning_rate": 1.6697287662268664e-06, "loss": 0.7021, "step": 20419 }, { "epoch": 0.7400964082490668, "grad_norm": 2.2325328814585697, "learning_rate": 1.6692909973809273e-06, "loss": 0.8646, "step": 20420 }, { "epoch": 0.7401326519517234, "grad_norm": 2.3092777039140286, "learning_rate": 1.6688532744301455e-06, "loss": 0.9297, "step": 20421 }, { "epoch": 0.74016889565438, "grad_norm": 2.1677066406812724, "learning_rate": 1.6684155973805494e-06, "loss": 0.8571, "step": 20422 }, { "epoch": 0.7402051393570367, "grad_norm": 2.4733187254789812, "learning_rate": 1.6679779662381718e-06, "loss": 0.8442, "step": 20423 }, { "epoch": 0.7402413830596933, "grad_norm": 3.031891146152691, "learning_rate": 1.667540381009044e-06, "loss": 0.9182, "step": 20424 }, { "epoch": 0.7402776267623501, "grad_norm": 2.516413919804267, "learning_rate": 1.6671028416991924e-06, "loss": 0.8619, "step": 20425 }, { "epoch": 0.7403138704650067, "grad_norm": 2.1585939071864595, "learning_rate": 1.6666653483146483e-06, "loss": 1.2072, "step": 20426 }, { "epoch": 0.7403501141676634, "grad_norm": 2.0983238578089947, "learning_rate": 1.6662279008614401e-06, "loss": 0.9483, "step": 20427 }, { "epoch": 0.74038635787032, "grad_norm": 2.5924652690160213, "learning_rate": 1.6657904993455964e-06, "loss": 0.9453, "step": 20428 }, { "epoch": 0.7404226015729767, "grad_norm": 2.7046445999185442, "learning_rate": 1.6653531437731417e-06, "loss": 1.0179, "step": 20429 }, { "epoch": 0.7404588452756333, "grad_norm": 1.903754496308885, "learning_rate": 1.6649158341501038e-06, "loss": 0.7983, "step": 20430 }, { "epoch": 0.74049508897829, "grad_norm": 2.6658713044460924, "learning_rate": 1.6644785704825105e-06, "loss": 1.0344, "step": 20431 }, { "epoch": 0.7405313326809467, "grad_norm": 2.4866909592554793, "learning_rate": 1.6640413527763838e-06, "loss": 0.9929, "step": 20432 }, { "epoch": 0.7405675763836034, "grad_norm": 2.7438815578227262, "learning_rate": 1.663604181037749e-06, "loss": 0.8429, "step": 20433 }, { "epoch": 0.74060382008626, "grad_norm": 2.6674760572731393, "learning_rate": 1.6631670552726314e-06, "loss": 1.0747, "step": 20434 }, { "epoch": 0.7406400637889167, "grad_norm": 2.486991904697061, "learning_rate": 1.6627299754870551e-06, "loss": 0.8508, "step": 20435 }, { "epoch": 0.7406763074915733, "grad_norm": 2.4508107952602565, "learning_rate": 1.6622929416870404e-06, "loss": 0.9507, "step": 20436 }, { "epoch": 0.74071255119423, "grad_norm": 2.5473450647539564, "learning_rate": 1.66185595387861e-06, "loss": 1.0543, "step": 20437 }, { "epoch": 0.7407487948968867, "grad_norm": 2.6745967992948683, "learning_rate": 1.661419012067787e-06, "loss": 1.0898, "step": 20438 }, { "epoch": 0.7407850385995434, "grad_norm": 2.6470394673152318, "learning_rate": 1.6609821162605895e-06, "loss": 1.0019, "step": 20439 }, { "epoch": 0.7408212823022, "grad_norm": 2.3770326673826476, "learning_rate": 1.6605452664630406e-06, "loss": 0.7727, "step": 20440 }, { "epoch": 0.7408575260048567, "grad_norm": 2.314620348766379, "learning_rate": 1.6601084626811553e-06, "loss": 0.9681, "step": 20441 }, { "epoch": 0.7408937697075133, "grad_norm": 2.4486020170753555, "learning_rate": 1.6596717049209588e-06, "loss": 0.7525, "step": 20442 }, { "epoch": 0.74093001341017, "grad_norm": 2.4288286964890275, "learning_rate": 1.659234993188464e-06, "loss": 0.9413, "step": 20443 }, { "epoch": 0.7409662571128266, "grad_norm": 2.3894679766901037, "learning_rate": 1.6587983274896925e-06, "loss": 0.8521, "step": 20444 }, { "epoch": 0.7410025008154834, "grad_norm": 2.4784404196809593, "learning_rate": 1.6583617078306558e-06, "loss": 0.8674, "step": 20445 }, { "epoch": 0.74103874451814, "grad_norm": 2.5197858527359625, "learning_rate": 1.6579251342173774e-06, "loss": 0.9652, "step": 20446 }, { "epoch": 0.7410749882207966, "grad_norm": 2.3670128060822253, "learning_rate": 1.6574886066558675e-06, "loss": 0.9244, "step": 20447 }, { "epoch": 0.7411112319234533, "grad_norm": 2.0242161424100833, "learning_rate": 1.657052125152145e-06, "loss": 0.7034, "step": 20448 }, { "epoch": 0.7411474756261099, "grad_norm": 2.6960123408977155, "learning_rate": 1.6566156897122209e-06, "loss": 0.9374, "step": 20449 }, { "epoch": 0.7411837193287666, "grad_norm": 2.4252447627142235, "learning_rate": 1.6561793003421107e-06, "loss": 1.0076, "step": 20450 }, { "epoch": 0.7412199630314233, "grad_norm": 2.5811005578152533, "learning_rate": 1.6557429570478283e-06, "loss": 0.9317, "step": 20451 }, { "epoch": 0.74125620673408, "grad_norm": 2.2489709958032265, "learning_rate": 1.6553066598353845e-06, "loss": 0.81, "step": 20452 }, { "epoch": 0.7412924504367366, "grad_norm": 2.2354812117495464, "learning_rate": 1.654870408710792e-06, "loss": 0.9311, "step": 20453 }, { "epoch": 0.7413286941393933, "grad_norm": 2.609308141562058, "learning_rate": 1.654434203680062e-06, "loss": 0.8686, "step": 20454 }, { "epoch": 0.7413649378420499, "grad_norm": 2.0101511496189355, "learning_rate": 1.653998044749207e-06, "loss": 0.9179, "step": 20455 }, { "epoch": 0.7414011815447066, "grad_norm": 2.4681514401274773, "learning_rate": 1.653561931924234e-06, "loss": 0.9473, "step": 20456 }, { "epoch": 0.7414374252473632, "grad_norm": 2.158361914007754, "learning_rate": 1.6531258652111537e-06, "loss": 0.6549, "step": 20457 }, { "epoch": 0.74147366895002, "grad_norm": 2.146416077195351, "learning_rate": 1.6526898446159762e-06, "loss": 0.7884, "step": 20458 }, { "epoch": 0.7415099126526766, "grad_norm": 2.3602675959906376, "learning_rate": 1.6522538701447067e-06, "loss": 0.8912, "step": 20459 }, { "epoch": 0.7415461563553333, "grad_norm": 2.547806688265584, "learning_rate": 1.6518179418033559e-06, "loss": 0.8968, "step": 20460 }, { "epoch": 0.7415824000579899, "grad_norm": 2.5287996665717385, "learning_rate": 1.6513820595979257e-06, "loss": 0.9216, "step": 20461 }, { "epoch": 0.7416186437606466, "grad_norm": 2.7047556530468224, "learning_rate": 1.6509462235344287e-06, "loss": 0.8361, "step": 20462 }, { "epoch": 0.7416548874633032, "grad_norm": 2.235154880647935, "learning_rate": 1.6505104336188655e-06, "loss": 0.8854, "step": 20463 }, { "epoch": 0.7416911311659599, "grad_norm": 2.3405183605610937, "learning_rate": 1.650074689857245e-06, "loss": 0.9195, "step": 20464 }, { "epoch": 0.7417273748686166, "grad_norm": 2.2064386618037433, "learning_rate": 1.6496389922555656e-06, "loss": 1.0134, "step": 20465 }, { "epoch": 0.7417636185712733, "grad_norm": 1.8927499861583432, "learning_rate": 1.6492033408198382e-06, "loss": 0.6127, "step": 20466 }, { "epoch": 0.7417998622739299, "grad_norm": 2.31777130243626, "learning_rate": 1.6487677355560606e-06, "loss": 0.7058, "step": 20467 }, { "epoch": 0.7418361059765866, "grad_norm": 2.3608733453645514, "learning_rate": 1.6483321764702382e-06, "loss": 1.0482, "step": 20468 }, { "epoch": 0.7418723496792432, "grad_norm": 2.3390914947511177, "learning_rate": 1.6478966635683697e-06, "loss": 0.8942, "step": 20469 }, { "epoch": 0.7419085933818999, "grad_norm": 2.5550151108677417, "learning_rate": 1.647461196856458e-06, "loss": 0.9574, "step": 20470 }, { "epoch": 0.7419448370845566, "grad_norm": 2.2462580769944998, "learning_rate": 1.647025776340505e-06, "loss": 0.8213, "step": 20471 }, { "epoch": 0.7419810807872133, "grad_norm": 2.2134918192306188, "learning_rate": 1.6465904020265078e-06, "loss": 0.762, "step": 20472 }, { "epoch": 0.7420173244898699, "grad_norm": 2.2232367235927954, "learning_rate": 1.6461550739204662e-06, "loss": 0.931, "step": 20473 }, { "epoch": 0.7420535681925265, "grad_norm": 2.3095828085223205, "learning_rate": 1.6457197920283802e-06, "loss": 0.83, "step": 20474 }, { "epoch": 0.7420898118951832, "grad_norm": 2.103323016506574, "learning_rate": 1.6452845563562475e-06, "loss": 0.7535, "step": 20475 }, { "epoch": 0.7421260555978398, "grad_norm": 2.659196944593439, "learning_rate": 1.644849366910064e-06, "loss": 0.9648, "step": 20476 }, { "epoch": 0.7421622993004965, "grad_norm": 2.4500723086810137, "learning_rate": 1.6444142236958267e-06, "loss": 0.7355, "step": 20477 }, { "epoch": 0.7421985430031532, "grad_norm": 2.191523896054813, "learning_rate": 1.6439791267195332e-06, "loss": 0.9431, "step": 20478 }, { "epoch": 0.7422347867058099, "grad_norm": 2.1538156270225426, "learning_rate": 1.6435440759871768e-06, "loss": 0.9205, "step": 20479 }, { "epoch": 0.7422710304084665, "grad_norm": 2.653161302261178, "learning_rate": 1.6431090715047543e-06, "loss": 0.8895, "step": 20480 }, { "epoch": 0.7423072741111232, "grad_norm": 2.3147622199589013, "learning_rate": 1.6426741132782554e-06, "loss": 0.8537, "step": 20481 }, { "epoch": 0.7423435178137798, "grad_norm": 2.6195547148642206, "learning_rate": 1.6422392013136802e-06, "loss": 0.888, "step": 20482 }, { "epoch": 0.7423797615164365, "grad_norm": 2.4990286724300854, "learning_rate": 1.6418043356170161e-06, "loss": 0.891, "step": 20483 }, { "epoch": 0.7424160052190932, "grad_norm": 2.472971745439063, "learning_rate": 1.6413695161942594e-06, "loss": 0.8864, "step": 20484 }, { "epoch": 0.7424522489217499, "grad_norm": 2.4571243203428708, "learning_rate": 1.6409347430513956e-06, "loss": 0.9115, "step": 20485 }, { "epoch": 0.7424884926244065, "grad_norm": 2.4218197082235493, "learning_rate": 1.6405000161944233e-06, "loss": 1.0675, "step": 20486 }, { "epoch": 0.7425247363270632, "grad_norm": 2.3492348344908742, "learning_rate": 1.6400653356293272e-06, "loss": 0.8353, "step": 20487 }, { "epoch": 0.7425609800297198, "grad_norm": 2.4478613100811493, "learning_rate": 1.6396307013621011e-06, "loss": 0.8441, "step": 20488 }, { "epoch": 0.7425972237323765, "grad_norm": 2.610306174264554, "learning_rate": 1.6391961133987288e-06, "loss": 1.0253, "step": 20489 }, { "epoch": 0.7426334674350331, "grad_norm": 2.6588816424702975, "learning_rate": 1.6387615717452021e-06, "loss": 0.8309, "step": 20490 }, { "epoch": 0.7426697111376899, "grad_norm": 2.2307590111665228, "learning_rate": 1.63832707640751e-06, "loss": 0.8236, "step": 20491 }, { "epoch": 0.7427059548403465, "grad_norm": 2.193067001461018, "learning_rate": 1.6378926273916357e-06, "loss": 0.8696, "step": 20492 }, { "epoch": 0.7427421985430032, "grad_norm": 2.2229458431186564, "learning_rate": 1.637458224703568e-06, "loss": 0.7931, "step": 20493 }, { "epoch": 0.7427784422456598, "grad_norm": 2.476926260424481, "learning_rate": 1.6370238683492922e-06, "loss": 0.7964, "step": 20494 }, { "epoch": 0.7428146859483165, "grad_norm": 2.347287432563161, "learning_rate": 1.6365895583347952e-06, "loss": 0.9035, "step": 20495 }, { "epoch": 0.7428509296509731, "grad_norm": 2.456684239541439, "learning_rate": 1.6361552946660586e-06, "loss": 1.0777, "step": 20496 }, { "epoch": 0.7428871733536299, "grad_norm": 2.528240329078209, "learning_rate": 1.6357210773490673e-06, "loss": 0.9279, "step": 20497 }, { "epoch": 0.7429234170562865, "grad_norm": 2.124575785700634, "learning_rate": 1.6352869063898069e-06, "loss": 1.2501, "step": 20498 }, { "epoch": 0.7429596607589432, "grad_norm": 2.1515823910614778, "learning_rate": 1.6348527817942562e-06, "loss": 0.8358, "step": 20499 }, { "epoch": 0.7429959044615998, "grad_norm": 2.187651139260974, "learning_rate": 1.6344187035684001e-06, "loss": 0.9399, "step": 20500 }, { "epoch": 0.7430321481642564, "grad_norm": 2.2645564960252353, "learning_rate": 1.633984671718216e-06, "loss": 0.6417, "step": 20501 }, { "epoch": 0.7430683918669131, "grad_norm": 2.3835030756293922, "learning_rate": 1.6335506862496909e-06, "loss": 0.985, "step": 20502 }, { "epoch": 0.7431046355695697, "grad_norm": 2.125610683594954, "learning_rate": 1.6331167471687999e-06, "loss": 0.8643, "step": 20503 }, { "epoch": 0.7431408792722265, "grad_norm": 2.5868646175230725, "learning_rate": 1.6326828544815248e-06, "loss": 0.8735, "step": 20504 }, { "epoch": 0.7431771229748831, "grad_norm": 2.5109700665560193, "learning_rate": 1.6322490081938409e-06, "loss": 1.0474, "step": 20505 }, { "epoch": 0.7432133666775398, "grad_norm": 2.3864604590264906, "learning_rate": 1.6318152083117317e-06, "loss": 0.854, "step": 20506 }, { "epoch": 0.7432496103801964, "grad_norm": 2.2097660380513595, "learning_rate": 1.6313814548411704e-06, "loss": 0.8541, "step": 20507 }, { "epoch": 0.7432858540828531, "grad_norm": 2.90997403239864, "learning_rate": 1.6309477477881368e-06, "loss": 0.9272, "step": 20508 }, { "epoch": 0.7433220977855097, "grad_norm": 2.127491490957272, "learning_rate": 1.6305140871586044e-06, "loss": 0.8522, "step": 20509 }, { "epoch": 0.7433583414881665, "grad_norm": 2.031085249336766, "learning_rate": 1.63008047295855e-06, "loss": 0.7425, "step": 20510 }, { "epoch": 0.7433945851908231, "grad_norm": 2.3972488375909284, "learning_rate": 1.6296469051939507e-06, "loss": 0.8871, "step": 20511 }, { "epoch": 0.7434308288934798, "grad_norm": 2.392872069756058, "learning_rate": 1.629213383870777e-06, "loss": 0.8604, "step": 20512 }, { "epoch": 0.7434670725961364, "grad_norm": 2.4140960539802534, "learning_rate": 1.6287799089950034e-06, "loss": 0.9531, "step": 20513 }, { "epoch": 0.7435033162987931, "grad_norm": 2.4147964071847174, "learning_rate": 1.6283464805726046e-06, "loss": 1.0079, "step": 20514 }, { "epoch": 0.7435395600014497, "grad_norm": 2.2993676090174597, "learning_rate": 1.6279130986095537e-06, "loss": 0.944, "step": 20515 }, { "epoch": 0.7435758037041064, "grad_norm": 2.4592737631931816, "learning_rate": 1.6274797631118188e-06, "loss": 0.8608, "step": 20516 }, { "epoch": 0.7436120474067631, "grad_norm": 2.161389329099486, "learning_rate": 1.6270464740853736e-06, "loss": 0.8599, "step": 20517 }, { "epoch": 0.7436482911094198, "grad_norm": 2.15172533846954, "learning_rate": 1.62661323153619e-06, "loss": 0.8003, "step": 20518 }, { "epoch": 0.7436845348120764, "grad_norm": 2.3040779413737766, "learning_rate": 1.6261800354702334e-06, "loss": 0.8744, "step": 20519 }, { "epoch": 0.7437207785147331, "grad_norm": 2.536287492433332, "learning_rate": 1.625746885893476e-06, "loss": 1.0417, "step": 20520 }, { "epoch": 0.7437570222173897, "grad_norm": 2.394469722029626, "learning_rate": 1.6253137828118859e-06, "loss": 0.8311, "step": 20521 }, { "epoch": 0.7437932659200464, "grad_norm": 2.4004711393003446, "learning_rate": 1.6248807262314326e-06, "loss": 0.7478, "step": 20522 }, { "epoch": 0.743829509622703, "grad_norm": 2.041486774133608, "learning_rate": 1.6244477161580802e-06, "loss": 0.8082, "step": 20523 }, { "epoch": 0.7438657533253598, "grad_norm": 2.447570174539743, "learning_rate": 1.6240147525977962e-06, "loss": 0.822, "step": 20524 }, { "epoch": 0.7439019970280164, "grad_norm": 2.237082486195798, "learning_rate": 1.6235818355565492e-06, "loss": 0.9221, "step": 20525 }, { "epoch": 0.743938240730673, "grad_norm": 2.0842526082245914, "learning_rate": 1.623148965040301e-06, "loss": 0.772, "step": 20526 }, { "epoch": 0.7439744844333297, "grad_norm": 2.4543037564472967, "learning_rate": 1.622716141055019e-06, "loss": 1.0586, "step": 20527 }, { "epoch": 0.7440107281359863, "grad_norm": 2.444239468820266, "learning_rate": 1.6222833636066632e-06, "loss": 0.9929, "step": 20528 }, { "epoch": 0.744046971838643, "grad_norm": 2.156401871390872, "learning_rate": 1.6218506327012028e-06, "loss": 0.8446, "step": 20529 }, { "epoch": 0.7440832155412997, "grad_norm": 2.523220252673757, "learning_rate": 1.6214179483445962e-06, "loss": 0.9281, "step": 20530 }, { "epoch": 0.7441194592439564, "grad_norm": 2.20459880086499, "learning_rate": 1.620985310542809e-06, "loss": 0.8455, "step": 20531 }, { "epoch": 0.744155702946613, "grad_norm": 2.177247918889201, "learning_rate": 1.6205527193017973e-06, "loss": 0.9016, "step": 20532 }, { "epoch": 0.7441919466492697, "grad_norm": 2.6557872605418646, "learning_rate": 1.620120174627528e-06, "loss": 0.9736, "step": 20533 }, { "epoch": 0.7442281903519263, "grad_norm": 2.6012950024193744, "learning_rate": 1.6196876765259572e-06, "loss": 0.9089, "step": 20534 }, { "epoch": 0.744264434054583, "grad_norm": 2.281215047848783, "learning_rate": 1.6192552250030474e-06, "loss": 0.8959, "step": 20535 }, { "epoch": 0.7443006777572396, "grad_norm": 2.7398065270056446, "learning_rate": 1.6188228200647549e-06, "loss": 0.9198, "step": 20536 }, { "epoch": 0.7443369214598964, "grad_norm": 2.3172050050280224, "learning_rate": 1.6183904617170386e-06, "loss": 0.7977, "step": 20537 }, { "epoch": 0.744373165162553, "grad_norm": 2.4157742540341762, "learning_rate": 1.617958149965858e-06, "loss": 0.8097, "step": 20538 }, { "epoch": 0.7444094088652097, "grad_norm": 2.3179743436853992, "learning_rate": 1.6175258848171677e-06, "loss": 0.9276, "step": 20539 }, { "epoch": 0.7444456525678663, "grad_norm": 2.206943269159972, "learning_rate": 1.617093666276925e-06, "loss": 0.9478, "step": 20540 }, { "epoch": 0.744481896270523, "grad_norm": 2.2483047792865523, "learning_rate": 1.616661494351085e-06, "loss": 1.0274, "step": 20541 }, { "epoch": 0.7445181399731796, "grad_norm": 2.2956260095753747, "learning_rate": 1.6162293690456055e-06, "loss": 0.869, "step": 20542 }, { "epoch": 0.7445543836758364, "grad_norm": 2.419510293975551, "learning_rate": 1.615797290366437e-06, "loss": 0.8833, "step": 20543 }, { "epoch": 0.744590627378493, "grad_norm": 2.7058177905161136, "learning_rate": 1.6153652583195356e-06, "loss": 0.7975, "step": 20544 }, { "epoch": 0.7446268710811497, "grad_norm": 2.472003138246345, "learning_rate": 1.6149332729108552e-06, "loss": 1.0686, "step": 20545 }, { "epoch": 0.7446631147838063, "grad_norm": 2.3041866416655794, "learning_rate": 1.6145013341463456e-06, "loss": 0.9888, "step": 20546 }, { "epoch": 0.744699358486463, "grad_norm": 2.6470292098153756, "learning_rate": 1.614069442031962e-06, "loss": 0.8545, "step": 20547 }, { "epoch": 0.7447356021891196, "grad_norm": 2.310356845134787, "learning_rate": 1.61363759657365e-06, "loss": 0.9232, "step": 20548 }, { "epoch": 0.7447718458917763, "grad_norm": 2.6289607006919327, "learning_rate": 1.6132057977773675e-06, "loss": 0.9593, "step": 20549 }, { "epoch": 0.744808089594433, "grad_norm": 2.275840928679099, "learning_rate": 1.6127740456490593e-06, "loss": 0.7802, "step": 20550 }, { "epoch": 0.7448443332970897, "grad_norm": 2.5248716349163596, "learning_rate": 1.6123423401946775e-06, "loss": 0.9041, "step": 20551 }, { "epoch": 0.7448805769997463, "grad_norm": 2.441660157042655, "learning_rate": 1.6119106814201663e-06, "loss": 0.8022, "step": 20552 }, { "epoch": 0.744916820702403, "grad_norm": 2.2596391289667954, "learning_rate": 1.6114790693314801e-06, "loss": 0.7413, "step": 20553 }, { "epoch": 0.7449530644050596, "grad_norm": 2.2226387189219676, "learning_rate": 1.611047503934562e-06, "loss": 0.8223, "step": 20554 }, { "epoch": 0.7449893081077162, "grad_norm": 2.413930967204545, "learning_rate": 1.6106159852353603e-06, "loss": 0.8553, "step": 20555 }, { "epoch": 0.745025551810373, "grad_norm": 2.7539735311087883, "learning_rate": 1.6101845132398197e-06, "loss": 0.7722, "step": 20556 }, { "epoch": 0.7450617955130296, "grad_norm": 2.188220561300331, "learning_rate": 1.609753087953886e-06, "loss": 0.7966, "step": 20557 }, { "epoch": 0.7450980392156863, "grad_norm": 2.106709267214114, "learning_rate": 1.609321709383506e-06, "loss": 0.8763, "step": 20558 }, { "epoch": 0.7451342829183429, "grad_norm": 2.224277617317393, "learning_rate": 1.608890377534621e-06, "loss": 0.7363, "step": 20559 }, { "epoch": 0.7451705266209996, "grad_norm": 2.244185889467237, "learning_rate": 1.6084590924131755e-06, "loss": 0.9135, "step": 20560 }, { "epoch": 0.7452067703236562, "grad_norm": 2.057483934847352, "learning_rate": 1.6080278540251121e-06, "loss": 0.7371, "step": 20561 }, { "epoch": 0.7452430140263129, "grad_norm": 2.0215007114068198, "learning_rate": 1.6075966623763755e-06, "loss": 0.8733, "step": 20562 }, { "epoch": 0.7452792577289696, "grad_norm": 2.192837217977503, "learning_rate": 1.607165517472904e-06, "loss": 0.8399, "step": 20563 }, { "epoch": 0.7453155014316263, "grad_norm": 2.453946053402847, "learning_rate": 1.606734419320639e-06, "loss": 0.8645, "step": 20564 }, { "epoch": 0.7453517451342829, "grad_norm": 2.2284448562913064, "learning_rate": 1.6063033679255235e-06, "loss": 0.8096, "step": 20565 }, { "epoch": 0.7453879888369396, "grad_norm": 2.195799070917695, "learning_rate": 1.6058723632934937e-06, "loss": 0.7734, "step": 20566 }, { "epoch": 0.7454242325395962, "grad_norm": 2.2792949699609935, "learning_rate": 1.6054414054304919e-06, "loss": 0.962, "step": 20567 }, { "epoch": 0.7454604762422529, "grad_norm": 2.3299486710618114, "learning_rate": 1.6050104943424506e-06, "loss": 0.831, "step": 20568 }, { "epoch": 0.7454967199449096, "grad_norm": 2.92098653482759, "learning_rate": 1.604579630035315e-06, "loss": 0.9422, "step": 20569 }, { "epoch": 0.7455329636475663, "grad_norm": 2.2496897304590475, "learning_rate": 1.6041488125150168e-06, "loss": 0.8755, "step": 20570 }, { "epoch": 0.7455692073502229, "grad_norm": 2.216341463783834, "learning_rate": 1.6037180417874964e-06, "loss": 0.7854, "step": 20571 }, { "epoch": 0.7456054510528796, "grad_norm": 2.6576652322388874, "learning_rate": 1.603287317858684e-06, "loss": 0.9019, "step": 20572 }, { "epoch": 0.7456416947555362, "grad_norm": 2.1702250088620745, "learning_rate": 1.602856640734521e-06, "loss": 0.7057, "step": 20573 }, { "epoch": 0.7456779384581929, "grad_norm": 2.5298514086885566, "learning_rate": 1.602426010420937e-06, "loss": 0.9608, "step": 20574 }, { "epoch": 0.7457141821608495, "grad_norm": 2.3884668278376218, "learning_rate": 1.6019954269238703e-06, "loss": 0.9118, "step": 20575 }, { "epoch": 0.7457504258635063, "grad_norm": 2.3621579195364415, "learning_rate": 1.6015648902492491e-06, "loss": 1.0437, "step": 20576 }, { "epoch": 0.7457866695661629, "grad_norm": 2.449581121930175, "learning_rate": 1.6011344004030088e-06, "loss": 0.8705, "step": 20577 }, { "epoch": 0.7458229132688196, "grad_norm": 2.422423431992779, "learning_rate": 1.6007039573910826e-06, "loss": 0.9554, "step": 20578 }, { "epoch": 0.7458591569714762, "grad_norm": 2.3007536190230735, "learning_rate": 1.6002735612193988e-06, "loss": 0.8496, "step": 20579 }, { "epoch": 0.7458954006741328, "grad_norm": 2.273621100528526, "learning_rate": 1.5998432118938883e-06, "loss": 1.0099, "step": 20580 }, { "epoch": 0.7459316443767895, "grad_norm": 2.3622265987575926, "learning_rate": 1.599412909420483e-06, "loss": 1.1007, "step": 20581 }, { "epoch": 0.7459678880794463, "grad_norm": 2.419820641199781, "learning_rate": 1.5989826538051124e-06, "loss": 0.8501, "step": 20582 }, { "epoch": 0.7460041317821029, "grad_norm": 2.387995499654844, "learning_rate": 1.5985524450537021e-06, "loss": 0.9832, "step": 20583 }, { "epoch": 0.7460403754847595, "grad_norm": 2.2725461107127356, "learning_rate": 1.5981222831721821e-06, "loss": 0.8499, "step": 20584 }, { "epoch": 0.7460766191874162, "grad_norm": 2.5116080085753762, "learning_rate": 1.5976921681664809e-06, "loss": 1.0529, "step": 20585 }, { "epoch": 0.7461128628900728, "grad_norm": 2.4284737374627077, "learning_rate": 1.597262100042523e-06, "loss": 1.0887, "step": 20586 }, { "epoch": 0.7461491065927295, "grad_norm": 2.6634341844832536, "learning_rate": 1.5968320788062369e-06, "loss": 0.9635, "step": 20587 }, { "epoch": 0.7461853502953861, "grad_norm": 2.1019735791365353, "learning_rate": 1.596402104463543e-06, "loss": 0.7914, "step": 20588 }, { "epoch": 0.7462215939980429, "grad_norm": 2.16629294079517, "learning_rate": 1.5959721770203735e-06, "loss": 0.749, "step": 20589 }, { "epoch": 0.7462578377006995, "grad_norm": 2.3441999224544054, "learning_rate": 1.5955422964826467e-06, "loss": 0.734, "step": 20590 }, { "epoch": 0.7462940814033562, "grad_norm": 2.4555993843422974, "learning_rate": 1.5951124628562898e-06, "loss": 0.7111, "step": 20591 }, { "epoch": 0.7463303251060128, "grad_norm": 2.2917183559587158, "learning_rate": 1.5946826761472207e-06, "loss": 1.0259, "step": 20592 }, { "epoch": 0.7463665688086695, "grad_norm": 2.2379040437715076, "learning_rate": 1.5942529363613678e-06, "loss": 0.9305, "step": 20593 }, { "epoch": 0.7464028125113261, "grad_norm": 2.3634039026613354, "learning_rate": 1.5938232435046485e-06, "loss": 0.9766, "step": 20594 }, { "epoch": 0.7464390562139828, "grad_norm": 2.2336525208742084, "learning_rate": 1.5933935975829868e-06, "loss": 0.904, "step": 20595 }, { "epoch": 0.7464752999166395, "grad_norm": 2.1250439459157033, "learning_rate": 1.5929639986022988e-06, "loss": 0.9442, "step": 20596 }, { "epoch": 0.7465115436192962, "grad_norm": 2.3503722875697743, "learning_rate": 1.5925344465685067e-06, "loss": 0.9403, "step": 20597 }, { "epoch": 0.7465477873219528, "grad_norm": 2.3823219687148014, "learning_rate": 1.5921049414875306e-06, "loss": 0.9086, "step": 20598 }, { "epoch": 0.7465840310246095, "grad_norm": 2.4349634891457854, "learning_rate": 1.5916754833652843e-06, "loss": 0.8762, "step": 20599 }, { "epoch": 0.7466202747272661, "grad_norm": 2.309442765554195, "learning_rate": 1.5912460722076916e-06, "loss": 0.8753, "step": 20600 }, { "epoch": 0.7466565184299228, "grad_norm": 2.5617524203773185, "learning_rate": 1.5908167080206654e-06, "loss": 0.8912, "step": 20601 }, { "epoch": 0.7466927621325795, "grad_norm": 2.439932298340321, "learning_rate": 1.5903873908101242e-06, "loss": 0.8613, "step": 20602 }, { "epoch": 0.7467290058352362, "grad_norm": 2.528639315194033, "learning_rate": 1.5899581205819814e-06, "loss": 0.9041, "step": 20603 }, { "epoch": 0.7467652495378928, "grad_norm": 2.4148024395204253, "learning_rate": 1.589528897342153e-06, "loss": 0.7626, "step": 20604 }, { "epoch": 0.7468014932405495, "grad_norm": 2.3859843059562986, "learning_rate": 1.589099721096556e-06, "loss": 0.7043, "step": 20605 }, { "epoch": 0.7468377369432061, "grad_norm": 2.1033201543419073, "learning_rate": 1.5886705918510996e-06, "loss": 0.9114, "step": 20606 }, { "epoch": 0.7468739806458627, "grad_norm": 2.348569071020352, "learning_rate": 1.5882415096116999e-06, "loss": 0.8937, "step": 20607 }, { "epoch": 0.7469102243485194, "grad_norm": 2.6446775636125204, "learning_rate": 1.5878124743842688e-06, "loss": 0.8671, "step": 20608 }, { "epoch": 0.7469464680511761, "grad_norm": 2.5910793433284622, "learning_rate": 1.58738348617472e-06, "loss": 1.0081, "step": 20609 }, { "epoch": 0.7469827117538328, "grad_norm": 2.1704741023601373, "learning_rate": 1.5869545449889606e-06, "loss": 0.9568, "step": 20610 }, { "epoch": 0.7470189554564894, "grad_norm": 2.090001811711837, "learning_rate": 1.586525650832904e-06, "loss": 0.9145, "step": 20611 }, { "epoch": 0.7470551991591461, "grad_norm": 2.517822293466274, "learning_rate": 1.5860968037124607e-06, "loss": 0.9877, "step": 20612 }, { "epoch": 0.7470914428618027, "grad_norm": 2.4378434513124607, "learning_rate": 1.5856680036335377e-06, "loss": 0.856, "step": 20613 }, { "epoch": 0.7471276865644594, "grad_norm": 2.4548963673191615, "learning_rate": 1.5852392506020458e-06, "loss": 0.8384, "step": 20614 }, { "epoch": 0.7471639302671161, "grad_norm": 2.0695444295261614, "learning_rate": 1.5848105446238893e-06, "loss": 0.8677, "step": 20615 }, { "epoch": 0.7472001739697728, "grad_norm": 2.4219163107697663, "learning_rate": 1.5843818857049803e-06, "loss": 0.8604, "step": 20616 }, { "epoch": 0.7472364176724294, "grad_norm": 2.311468140551847, "learning_rate": 1.5839532738512226e-06, "loss": 0.9987, "step": 20617 }, { "epoch": 0.7472726613750861, "grad_norm": 2.379094444815011, "learning_rate": 1.5835247090685236e-06, "loss": 0.856, "step": 20618 }, { "epoch": 0.7473089050777427, "grad_norm": 2.2439186485011238, "learning_rate": 1.583096191362785e-06, "loss": 0.9431, "step": 20619 }, { "epoch": 0.7473451487803994, "grad_norm": 2.42993956576364, "learning_rate": 1.5826677207399177e-06, "loss": 0.8508, "step": 20620 }, { "epoch": 0.747381392483056, "grad_norm": 2.5858350147458213, "learning_rate": 1.582239297205821e-06, "loss": 0.9494, "step": 20621 }, { "epoch": 0.7474176361857128, "grad_norm": 2.350810873254822, "learning_rate": 1.5818109207664012e-06, "loss": 0.8308, "step": 20622 }, { "epoch": 0.7474538798883694, "grad_norm": 2.5584978607710367, "learning_rate": 1.5813825914275577e-06, "loss": 0.8319, "step": 20623 }, { "epoch": 0.7474901235910261, "grad_norm": 2.1054809377346033, "learning_rate": 1.580954309195195e-06, "loss": 0.7881, "step": 20624 }, { "epoch": 0.7475263672936827, "grad_norm": 2.5128845736232073, "learning_rate": 1.580526074075216e-06, "loss": 0.9221, "step": 20625 }, { "epoch": 0.7475626109963394, "grad_norm": 2.2244007269632835, "learning_rate": 1.5800978860735177e-06, "loss": 0.9032, "step": 20626 }, { "epoch": 0.747598854698996, "grad_norm": 2.6237716661461623, "learning_rate": 1.5796697451960025e-06, "loss": 0.8751, "step": 20627 }, { "epoch": 0.7476350984016528, "grad_norm": 2.0642501463559735, "learning_rate": 1.5792416514485692e-06, "loss": 0.9021, "step": 20628 }, { "epoch": 0.7476713421043094, "grad_norm": 2.1669886473404416, "learning_rate": 1.5788136048371189e-06, "loss": 1.0068, "step": 20629 }, { "epoch": 0.7477075858069661, "grad_norm": 2.342764039902496, "learning_rate": 1.5783856053675466e-06, "loss": 0.782, "step": 20630 }, { "epoch": 0.7477438295096227, "grad_norm": 2.4724537513787777, "learning_rate": 1.5779576530457514e-06, "loss": 0.9345, "step": 20631 }, { "epoch": 0.7477800732122794, "grad_norm": 2.527191856555252, "learning_rate": 1.5775297478776313e-06, "loss": 0.9257, "step": 20632 }, { "epoch": 0.747816316914936, "grad_norm": 2.182607720682211, "learning_rate": 1.5771018898690805e-06, "loss": 0.9692, "step": 20633 }, { "epoch": 0.7478525606175926, "grad_norm": 2.5453549164442295, "learning_rate": 1.5766740790259965e-06, "loss": 0.9331, "step": 20634 }, { "epoch": 0.7478888043202494, "grad_norm": 2.3139895846253604, "learning_rate": 1.5762463153542707e-06, "loss": 0.9454, "step": 20635 }, { "epoch": 0.747925048022906, "grad_norm": 2.8241453043590736, "learning_rate": 1.575818598859803e-06, "loss": 1.0672, "step": 20636 }, { "epoch": 0.7479612917255627, "grad_norm": 2.7272095722901684, "learning_rate": 1.5753909295484826e-06, "loss": 1.0253, "step": 20637 }, { "epoch": 0.7479975354282193, "grad_norm": 2.4178266144830243, "learning_rate": 1.5749633074262055e-06, "loss": 0.9673, "step": 20638 }, { "epoch": 0.748033779130876, "grad_norm": 2.6318802063709255, "learning_rate": 1.5745357324988598e-06, "loss": 0.9062, "step": 20639 }, { "epoch": 0.7480700228335326, "grad_norm": 2.1450193353298537, "learning_rate": 1.5741082047723426e-06, "loss": 0.6855, "step": 20640 }, { "epoch": 0.7481062665361894, "grad_norm": 2.262850969163401, "learning_rate": 1.5736807242525414e-06, "loss": 0.9379, "step": 20641 }, { "epoch": 0.748142510238846, "grad_norm": 2.3195487813327857, "learning_rate": 1.5732532909453491e-06, "loss": 0.8167, "step": 20642 }, { "epoch": 0.7481787539415027, "grad_norm": 2.3373383556145093, "learning_rate": 1.5728259048566518e-06, "loss": 0.8253, "step": 20643 }, { "epoch": 0.7482149976441593, "grad_norm": 2.2525747085246963, "learning_rate": 1.5723985659923418e-06, "loss": 0.8415, "step": 20644 }, { "epoch": 0.748251241346816, "grad_norm": 2.4796528638107964, "learning_rate": 1.5719712743583076e-06, "loss": 0.9092, "step": 20645 }, { "epoch": 0.7482874850494726, "grad_norm": 2.2745168399932605, "learning_rate": 1.5715440299604346e-06, "loss": 0.8739, "step": 20646 }, { "epoch": 0.7483237287521293, "grad_norm": 1.9566521798258933, "learning_rate": 1.5711168328046118e-06, "loss": 0.771, "step": 20647 }, { "epoch": 0.748359972454786, "grad_norm": 2.562528562227542, "learning_rate": 1.5706896828967249e-06, "loss": 0.9061, "step": 20648 }, { "epoch": 0.7483962161574427, "grad_norm": 2.340287752016078, "learning_rate": 1.5702625802426618e-06, "loss": 0.9767, "step": 20649 }, { "epoch": 0.7484324598600993, "grad_norm": 2.644173369807631, "learning_rate": 1.5698355248483044e-06, "loss": 0.8181, "step": 20650 }, { "epoch": 0.748468703562756, "grad_norm": 2.557500350007571, "learning_rate": 1.5694085167195394e-06, "loss": 0.942, "step": 20651 }, { "epoch": 0.7485049472654126, "grad_norm": 2.3856483008612535, "learning_rate": 1.5689815558622518e-06, "loss": 0.8907, "step": 20652 }, { "epoch": 0.7485411909680693, "grad_norm": 2.3995585704457794, "learning_rate": 1.568554642282321e-06, "loss": 0.9607, "step": 20653 }, { "epoch": 0.748577434670726, "grad_norm": 2.5984540033584724, "learning_rate": 1.5681277759856346e-06, "loss": 0.8601, "step": 20654 }, { "epoch": 0.7486136783733827, "grad_norm": 2.265726645677951, "learning_rate": 1.5677009569780682e-06, "loss": 0.9195, "step": 20655 }, { "epoch": 0.7486499220760393, "grad_norm": 2.7437895568718993, "learning_rate": 1.56727418526551e-06, "loss": 0.8129, "step": 20656 }, { "epoch": 0.748686165778696, "grad_norm": 2.5010020268650117, "learning_rate": 1.5668474608538359e-06, "loss": 0.8261, "step": 20657 }, { "epoch": 0.7487224094813526, "grad_norm": 2.3737142385053263, "learning_rate": 1.5664207837489292e-06, "loss": 0.6968, "step": 20658 }, { "epoch": 0.7487586531840092, "grad_norm": 2.1491486835500733, "learning_rate": 1.5659941539566642e-06, "loss": 0.8048, "step": 20659 }, { "epoch": 0.7487948968866659, "grad_norm": 2.355118554815851, "learning_rate": 1.565567571482926e-06, "loss": 1.0099, "step": 20660 }, { "epoch": 0.7488311405893227, "grad_norm": 2.404777417681686, "learning_rate": 1.5651410363335883e-06, "loss": 0.9763, "step": 20661 }, { "epoch": 0.7488673842919793, "grad_norm": 2.2918088525578746, "learning_rate": 1.5647145485145315e-06, "loss": 0.8463, "step": 20662 }, { "epoch": 0.748903627994636, "grad_norm": 2.198000549054615, "learning_rate": 1.5642881080316285e-06, "loss": 0.9473, "step": 20663 }, { "epoch": 0.7489398716972926, "grad_norm": 2.3727186137144525, "learning_rate": 1.563861714890758e-06, "loss": 0.8451, "step": 20664 }, { "epoch": 0.7489761153999492, "grad_norm": 2.2353929744931604, "learning_rate": 1.5634353690977965e-06, "loss": 0.7284, "step": 20665 }, { "epoch": 0.7490123591026059, "grad_norm": 2.4733217521892032, "learning_rate": 1.5630090706586153e-06, "loss": 0.9423, "step": 20666 }, { "epoch": 0.7490486028052625, "grad_norm": 2.4382410438457014, "learning_rate": 1.562582819579091e-06, "loss": 0.7004, "step": 20667 }, { "epoch": 0.7490848465079193, "grad_norm": 2.341657863469088, "learning_rate": 1.5621566158650965e-06, "loss": 0.9112, "step": 20668 }, { "epoch": 0.7491210902105759, "grad_norm": 2.23558226089689, "learning_rate": 1.561730459522506e-06, "loss": 0.9293, "step": 20669 }, { "epoch": 0.7491573339132326, "grad_norm": 2.3487334550900685, "learning_rate": 1.5613043505571885e-06, "loss": 0.9017, "step": 20670 }, { "epoch": 0.7491935776158892, "grad_norm": 2.2972006312851008, "learning_rate": 1.560878288975018e-06, "loss": 0.8568, "step": 20671 }, { "epoch": 0.7492298213185459, "grad_norm": 2.2589877217949264, "learning_rate": 1.5604522747818656e-06, "loss": 1.0041, "step": 20672 }, { "epoch": 0.7492660650212025, "grad_norm": 2.274142011820699, "learning_rate": 1.5600263079835998e-06, "loss": 0.8382, "step": 20673 }, { "epoch": 0.7493023087238593, "grad_norm": 2.436940739515951, "learning_rate": 1.5596003885860928e-06, "loss": 0.862, "step": 20674 }, { "epoch": 0.7493385524265159, "grad_norm": 2.2650004208706425, "learning_rate": 1.5591745165952076e-06, "loss": 0.8583, "step": 20675 }, { "epoch": 0.7493747961291726, "grad_norm": 2.3174092253242415, "learning_rate": 1.5587486920168204e-06, "loss": 0.9534, "step": 20676 }, { "epoch": 0.7494110398318292, "grad_norm": 2.801777848430578, "learning_rate": 1.5583229148567936e-06, "loss": 0.909, "step": 20677 }, { "epoch": 0.7494472835344859, "grad_norm": 2.520722254380204, "learning_rate": 1.5578971851209957e-06, "loss": 0.8624, "step": 20678 }, { "epoch": 0.7494835272371425, "grad_norm": 2.5040281467037246, "learning_rate": 1.5574715028152926e-06, "loss": 1.0161, "step": 20679 }, { "epoch": 0.7495197709397992, "grad_norm": 2.4351101878810018, "learning_rate": 1.5570458679455513e-06, "loss": 0.7082, "step": 20680 }, { "epoch": 0.7495560146424559, "grad_norm": 2.340652998396189, "learning_rate": 1.556620280517635e-06, "loss": 0.84, "step": 20681 }, { "epoch": 0.7495922583451126, "grad_norm": 2.4483034909402814, "learning_rate": 1.5561947405374083e-06, "loss": 0.8405, "step": 20682 }, { "epoch": 0.7496285020477692, "grad_norm": 2.2353969954308033, "learning_rate": 1.5557692480107366e-06, "loss": 0.7721, "step": 20683 }, { "epoch": 0.7496647457504259, "grad_norm": 2.394537586316594, "learning_rate": 1.5553438029434808e-06, "loss": 0.8256, "step": 20684 }, { "epoch": 0.7497009894530825, "grad_norm": 2.72468200098047, "learning_rate": 1.5549184053415051e-06, "loss": 0.8619, "step": 20685 }, { "epoch": 0.7497372331557391, "grad_norm": 2.0236959339862084, "learning_rate": 1.5544930552106675e-06, "loss": 0.8293, "step": 20686 }, { "epoch": 0.7497734768583959, "grad_norm": 2.5760823535007757, "learning_rate": 1.5540677525568348e-06, "loss": 0.8796, "step": 20687 }, { "epoch": 0.7498097205610526, "grad_norm": 2.3195095259916148, "learning_rate": 1.5536424973858626e-06, "loss": 0.842, "step": 20688 }, { "epoch": 0.7498459642637092, "grad_norm": 2.2073982782570085, "learning_rate": 1.5532172897036145e-06, "loss": 0.729, "step": 20689 }, { "epoch": 0.7498822079663658, "grad_norm": 2.439543073722506, "learning_rate": 1.5527921295159459e-06, "loss": 0.9215, "step": 20690 }, { "epoch": 0.7499184516690225, "grad_norm": 2.4075449159886197, "learning_rate": 1.5523670168287169e-06, "loss": 1.0307, "step": 20691 }, { "epoch": 0.7499546953716791, "grad_norm": 2.3341098619718244, "learning_rate": 1.5519419516477868e-06, "loss": 0.878, "step": 20692 }, { "epoch": 0.7499909390743358, "grad_norm": 2.3979323465691045, "learning_rate": 1.5515169339790104e-06, "loss": 0.8937, "step": 20693 }, { "epoch": 0.7500271827769925, "grad_norm": 2.399179257039188, "learning_rate": 1.551091963828244e-06, "loss": 1.0062, "step": 20694 }, { "epoch": 0.7500634264796492, "grad_norm": 2.460444746801676, "learning_rate": 1.5506670412013453e-06, "loss": 0.8668, "step": 20695 }, { "epoch": 0.7500996701823058, "grad_norm": 2.2929538874845643, "learning_rate": 1.55024216610417e-06, "loss": 0.9716, "step": 20696 }, { "epoch": 0.7501359138849625, "grad_norm": 2.4007402289790742, "learning_rate": 1.5498173385425697e-06, "loss": 0.888, "step": 20697 }, { "epoch": 0.7501721575876191, "grad_norm": 2.3594463043915788, "learning_rate": 1.5493925585224002e-06, "loss": 0.9442, "step": 20698 }, { "epoch": 0.7502084012902758, "grad_norm": 2.092923828879897, "learning_rate": 1.5489678260495155e-06, "loss": 1.0375, "step": 20699 }, { "epoch": 0.7502446449929325, "grad_norm": 2.1981222263018885, "learning_rate": 1.5485431411297659e-06, "loss": 0.9642, "step": 20700 }, { "epoch": 0.7502808886955892, "grad_norm": 2.193425314990065, "learning_rate": 1.5481185037690038e-06, "loss": 0.7264, "step": 20701 }, { "epoch": 0.7503171323982458, "grad_norm": 2.3871108788000375, "learning_rate": 1.5476939139730813e-06, "loss": 0.9495, "step": 20702 }, { "epoch": 0.7503533761009025, "grad_norm": 2.2422121088405325, "learning_rate": 1.5472693717478504e-06, "loss": 0.8541, "step": 20703 }, { "epoch": 0.7503896198035591, "grad_norm": 2.2512841526941756, "learning_rate": 1.5468448770991583e-06, "loss": 0.9136, "step": 20704 }, { "epoch": 0.7504258635062158, "grad_norm": 2.450320279648864, "learning_rate": 1.5464204300328567e-06, "loss": 0.9563, "step": 20705 }, { "epoch": 0.7504621072088724, "grad_norm": 2.3666338060863947, "learning_rate": 1.5459960305547893e-06, "loss": 0.8873, "step": 20706 }, { "epoch": 0.7504983509115292, "grad_norm": 2.235621823168538, "learning_rate": 1.5455716786708114e-06, "loss": 0.8278, "step": 20707 }, { "epoch": 0.7505345946141858, "grad_norm": 2.274106008169618, "learning_rate": 1.5451473743867645e-06, "loss": 0.8104, "step": 20708 }, { "epoch": 0.7505708383168425, "grad_norm": 2.4552178991806475, "learning_rate": 1.5447231177084998e-06, "loss": 0.8347, "step": 20709 }, { "epoch": 0.7506070820194991, "grad_norm": 2.2901839427370776, "learning_rate": 1.5442989086418585e-06, "loss": 0.7698, "step": 20710 }, { "epoch": 0.7506433257221558, "grad_norm": 2.133528780669831, "learning_rate": 1.5438747471926885e-06, "loss": 0.8874, "step": 20711 }, { "epoch": 0.7506795694248124, "grad_norm": 2.0777166592242033, "learning_rate": 1.5434506333668359e-06, "loss": 0.7604, "step": 20712 }, { "epoch": 0.7507158131274692, "grad_norm": 2.335064426281611, "learning_rate": 1.5430265671701416e-06, "loss": 1.0215, "step": 20713 }, { "epoch": 0.7507520568301258, "grad_norm": 2.532111937036092, "learning_rate": 1.5426025486084506e-06, "loss": 1.0484, "step": 20714 }, { "epoch": 0.7507883005327824, "grad_norm": 2.2337187036005517, "learning_rate": 1.5421785776876052e-06, "loss": 0.9165, "step": 20715 }, { "epoch": 0.7508245442354391, "grad_norm": 2.3805924202718938, "learning_rate": 1.5417546544134493e-06, "loss": 0.9537, "step": 20716 }, { "epoch": 0.7508607879380957, "grad_norm": 2.3370563922895715, "learning_rate": 1.5413307787918214e-06, "loss": 0.8041, "step": 20717 }, { "epoch": 0.7508970316407524, "grad_norm": 2.0542348991699115, "learning_rate": 1.540906950828564e-06, "loss": 0.8021, "step": 20718 }, { "epoch": 0.750933275343409, "grad_norm": 2.405098575137997, "learning_rate": 1.5404831705295176e-06, "loss": 1.0201, "step": 20719 }, { "epoch": 0.7509695190460658, "grad_norm": 2.4521316165314153, "learning_rate": 1.5400594379005201e-06, "loss": 0.879, "step": 20720 }, { "epoch": 0.7510057627487224, "grad_norm": 2.1536604590501898, "learning_rate": 1.539635752947412e-06, "loss": 0.8105, "step": 20721 }, { "epoch": 0.7510420064513791, "grad_norm": 2.526649558125741, "learning_rate": 1.5392121156760276e-06, "loss": 1.0804, "step": 20722 }, { "epoch": 0.7510782501540357, "grad_norm": 2.1112270050751, "learning_rate": 1.5387885260922102e-06, "loss": 0.8637, "step": 20723 }, { "epoch": 0.7511144938566924, "grad_norm": 2.302229329920281, "learning_rate": 1.5383649842017923e-06, "loss": 1.0127, "step": 20724 }, { "epoch": 0.751150737559349, "grad_norm": 2.6985453701727766, "learning_rate": 1.5379414900106132e-06, "loss": 0.9165, "step": 20725 }, { "epoch": 0.7511869812620058, "grad_norm": 2.2048270234901945, "learning_rate": 1.537518043524503e-06, "loss": 0.9696, "step": 20726 }, { "epoch": 0.7512232249646624, "grad_norm": 2.3876030859980535, "learning_rate": 1.5370946447493028e-06, "loss": 0.8427, "step": 20727 }, { "epoch": 0.7512594686673191, "grad_norm": 2.005460624322965, "learning_rate": 1.5366712936908434e-06, "loss": 0.6152, "step": 20728 }, { "epoch": 0.7512957123699757, "grad_norm": 2.4341117073169833, "learning_rate": 1.5362479903549604e-06, "loss": 1.0114, "step": 20729 }, { "epoch": 0.7513319560726324, "grad_norm": 2.384411096456625, "learning_rate": 1.5358247347474836e-06, "loss": 0.8164, "step": 20730 }, { "epoch": 0.751368199775289, "grad_norm": 2.2800324706595294, "learning_rate": 1.5354015268742466e-06, "loss": 1.0929, "step": 20731 }, { "epoch": 0.7514044434779457, "grad_norm": 2.692526914894455, "learning_rate": 1.5349783667410833e-06, "loss": 0.8722, "step": 20732 }, { "epoch": 0.7514406871806024, "grad_norm": 2.353030238853418, "learning_rate": 1.5345552543538205e-06, "loss": 0.8264, "step": 20733 }, { "epoch": 0.7514769308832591, "grad_norm": 2.151336007952619, "learning_rate": 1.534132189718291e-06, "loss": 0.8431, "step": 20734 }, { "epoch": 0.7515131745859157, "grad_norm": 2.416599623562652, "learning_rate": 1.5337091728403237e-06, "loss": 0.9673, "step": 20735 }, { "epoch": 0.7515494182885724, "grad_norm": 2.3991266668800186, "learning_rate": 1.533286203725749e-06, "loss": 0.8818, "step": 20736 }, { "epoch": 0.751585661991229, "grad_norm": 2.3691493212226282, "learning_rate": 1.5328632823803919e-06, "loss": 0.7409, "step": 20737 }, { "epoch": 0.7516219056938857, "grad_norm": 2.2896625715779493, "learning_rate": 1.5324404088100826e-06, "loss": 0.7841, "step": 20738 }, { "epoch": 0.7516581493965423, "grad_norm": 2.27245288892251, "learning_rate": 1.5320175830206486e-06, "loss": 0.6085, "step": 20739 }, { "epoch": 0.751694393099199, "grad_norm": 2.041221926383519, "learning_rate": 1.5315948050179137e-06, "loss": 0.9595, "step": 20740 }, { "epoch": 0.7517306368018557, "grad_norm": 2.1317767221139468, "learning_rate": 1.5311720748077058e-06, "loss": 0.811, "step": 20741 }, { "epoch": 0.7517668805045123, "grad_norm": 2.499077703224111, "learning_rate": 1.5307493923958454e-06, "loss": 0.7626, "step": 20742 }, { "epoch": 0.751803124207169, "grad_norm": 2.439313669393078, "learning_rate": 1.5303267577881641e-06, "loss": 0.8915, "step": 20743 }, { "epoch": 0.7518393679098256, "grad_norm": 2.635283741452126, "learning_rate": 1.52990417099048e-06, "loss": 0.9745, "step": 20744 }, { "epoch": 0.7518756116124823, "grad_norm": 2.2346446379420417, "learning_rate": 1.5294816320086192e-06, "loss": 1.165, "step": 20745 }, { "epoch": 0.751911855315139, "grad_norm": 2.3244871025854588, "learning_rate": 1.5290591408483995e-06, "loss": 0.9296, "step": 20746 }, { "epoch": 0.7519480990177957, "grad_norm": 2.246771027928786, "learning_rate": 1.5286366975156492e-06, "loss": 0.7411, "step": 20747 }, { "epoch": 0.7519843427204523, "grad_norm": 2.4901691524880367, "learning_rate": 1.5282143020161839e-06, "loss": 1.03, "step": 20748 }, { "epoch": 0.752020586423109, "grad_norm": 2.5617755363902575, "learning_rate": 1.5277919543558274e-06, "loss": 1.0657, "step": 20749 }, { "epoch": 0.7520568301257656, "grad_norm": 2.224967551619004, "learning_rate": 1.527369654540396e-06, "loss": 0.9279, "step": 20750 }, { "epoch": 0.7520930738284223, "grad_norm": 2.282094267122825, "learning_rate": 1.5269474025757109e-06, "loss": 0.7334, "step": 20751 }, { "epoch": 0.7521293175310789, "grad_norm": 2.314725620412501, "learning_rate": 1.5265251984675917e-06, "loss": 0.7222, "step": 20752 }, { "epoch": 0.7521655612337357, "grad_norm": 2.392156845865502, "learning_rate": 1.526103042221853e-06, "loss": 0.9636, "step": 20753 }, { "epoch": 0.7522018049363923, "grad_norm": 2.3203767448739177, "learning_rate": 1.5256809338443136e-06, "loss": 1.0023, "step": 20754 }, { "epoch": 0.752238048639049, "grad_norm": 2.3345715342989344, "learning_rate": 1.5252588733407891e-06, "loss": 0.9049, "step": 20755 }, { "epoch": 0.7522742923417056, "grad_norm": 2.2000295189226104, "learning_rate": 1.5248368607170982e-06, "loss": 0.9893, "step": 20756 }, { "epoch": 0.7523105360443623, "grad_norm": 2.3208998803690473, "learning_rate": 1.5244148959790516e-06, "loss": 0.6355, "step": 20757 }, { "epoch": 0.7523467797470189, "grad_norm": 2.27416884365744, "learning_rate": 1.523992979132466e-06, "loss": 0.9407, "step": 20758 }, { "epoch": 0.7523830234496757, "grad_norm": 2.524889027152973, "learning_rate": 1.5235711101831562e-06, "loss": 0.9443, "step": 20759 }, { "epoch": 0.7524192671523323, "grad_norm": 2.194994548374562, "learning_rate": 1.523149289136933e-06, "loss": 0.9429, "step": 20760 }, { "epoch": 0.752455510854989, "grad_norm": 2.209109248038064, "learning_rate": 1.5227275159996096e-06, "loss": 0.8901, "step": 20761 }, { "epoch": 0.7524917545576456, "grad_norm": 2.5340334846221344, "learning_rate": 1.5223057907769978e-06, "loss": 0.9061, "step": 20762 }, { "epoch": 0.7525279982603023, "grad_norm": 2.2455255707421693, "learning_rate": 1.5218841134749107e-06, "loss": 0.8845, "step": 20763 }, { "epoch": 0.7525642419629589, "grad_norm": 2.3913388785995187, "learning_rate": 1.5214624840991554e-06, "loss": 1.1515, "step": 20764 }, { "epoch": 0.7526004856656155, "grad_norm": 2.5627840676059814, "learning_rate": 1.5210409026555434e-06, "loss": 0.8212, "step": 20765 }, { "epoch": 0.7526367293682723, "grad_norm": 2.3290327618342035, "learning_rate": 1.520619369149884e-06, "loss": 0.7962, "step": 20766 }, { "epoch": 0.752672973070929, "grad_norm": 2.2490747504792967, "learning_rate": 1.5201978835879867e-06, "loss": 0.8156, "step": 20767 }, { "epoch": 0.7527092167735856, "grad_norm": 2.403523671296423, "learning_rate": 1.5197764459756564e-06, "loss": 0.8223, "step": 20768 }, { "epoch": 0.7527454604762422, "grad_norm": 2.666410503377506, "learning_rate": 1.5193550563187021e-06, "loss": 0.8399, "step": 20769 }, { "epoch": 0.7527817041788989, "grad_norm": 2.2417875688295967, "learning_rate": 1.5189337146229316e-06, "loss": 0.7748, "step": 20770 }, { "epoch": 0.7528179478815555, "grad_norm": 2.284226277749937, "learning_rate": 1.5185124208941476e-06, "loss": 0.8715, "step": 20771 }, { "epoch": 0.7528541915842123, "grad_norm": 2.243082481382663, "learning_rate": 1.5180911751381583e-06, "loss": 0.9312, "step": 20772 }, { "epoch": 0.7528904352868689, "grad_norm": 2.3156021892177474, "learning_rate": 1.517669977360764e-06, "loss": 0.7768, "step": 20773 }, { "epoch": 0.7529266789895256, "grad_norm": 2.0302605639039726, "learning_rate": 1.5172488275677743e-06, "loss": 0.6506, "step": 20774 }, { "epoch": 0.7529629226921822, "grad_norm": 2.2416362458303487, "learning_rate": 1.5168277257649882e-06, "loss": 0.8985, "step": 20775 }, { "epoch": 0.7529991663948389, "grad_norm": 2.770008769404411, "learning_rate": 1.516406671958211e-06, "loss": 0.8189, "step": 20776 }, { "epoch": 0.7530354100974955, "grad_norm": 2.3734000347128963, "learning_rate": 1.5159856661532412e-06, "loss": 0.9532, "step": 20777 }, { "epoch": 0.7530716538001522, "grad_norm": 2.4001318968565757, "learning_rate": 1.5155647083558822e-06, "loss": 0.7958, "step": 20778 }, { "epoch": 0.7531078975028089, "grad_norm": 2.301013938442898, "learning_rate": 1.5151437985719352e-06, "loss": 0.925, "step": 20779 }, { "epoch": 0.7531441412054656, "grad_norm": 2.588162529979221, "learning_rate": 1.5147229368071981e-06, "loss": 0.9131, "step": 20780 }, { "epoch": 0.7531803849081222, "grad_norm": 2.4082293026825066, "learning_rate": 1.5143021230674709e-06, "loss": 0.8717, "step": 20781 }, { "epoch": 0.7532166286107789, "grad_norm": 2.6796230013545985, "learning_rate": 1.513881357358552e-06, "loss": 0.8456, "step": 20782 }, { "epoch": 0.7532528723134355, "grad_norm": 2.719920959457825, "learning_rate": 1.5134606396862418e-06, "loss": 0.9235, "step": 20783 }, { "epoch": 0.7532891160160922, "grad_norm": 2.42765439410181, "learning_rate": 1.5130399700563337e-06, "loss": 1.1208, "step": 20784 }, { "epoch": 0.7533253597187489, "grad_norm": 2.3138770529081496, "learning_rate": 1.5126193484746254e-06, "loss": 1.0066, "step": 20785 }, { "epoch": 0.7533616034214056, "grad_norm": 2.098206112802151, "learning_rate": 1.5121987749469141e-06, "loss": 0.8393, "step": 20786 }, { "epoch": 0.7533978471240622, "grad_norm": 2.4346072971976187, "learning_rate": 1.5117782494789957e-06, "loss": 0.8286, "step": 20787 }, { "epoch": 0.7534340908267189, "grad_norm": 2.389950865151677, "learning_rate": 1.5113577720766615e-06, "loss": 0.9823, "step": 20788 }, { "epoch": 0.7534703345293755, "grad_norm": 2.315111446920937, "learning_rate": 1.510937342745708e-06, "loss": 0.7153, "step": 20789 }, { "epoch": 0.7535065782320322, "grad_norm": 1.8906468011206203, "learning_rate": 1.510516961491929e-06, "loss": 0.7445, "step": 20790 }, { "epoch": 0.7535428219346888, "grad_norm": 2.412050812149053, "learning_rate": 1.5100966283211138e-06, "loss": 0.942, "step": 20791 }, { "epoch": 0.7535790656373456, "grad_norm": 2.2538872163906447, "learning_rate": 1.5096763432390587e-06, "loss": 1.0784, "step": 20792 }, { "epoch": 0.7536153093400022, "grad_norm": 2.3455952185458395, "learning_rate": 1.5092561062515492e-06, "loss": 0.8987, "step": 20793 }, { "epoch": 0.7536515530426589, "grad_norm": 2.202991619536567, "learning_rate": 1.508835917364383e-06, "loss": 0.8211, "step": 20794 }, { "epoch": 0.7536877967453155, "grad_norm": 2.071190045622498, "learning_rate": 1.5084157765833447e-06, "loss": 0.7789, "step": 20795 }, { "epoch": 0.7537240404479721, "grad_norm": 2.38717904852599, "learning_rate": 1.5079956839142267e-06, "loss": 1.0433, "step": 20796 }, { "epoch": 0.7537602841506288, "grad_norm": 2.4325475524439844, "learning_rate": 1.507575639362815e-06, "loss": 0.7493, "step": 20797 }, { "epoch": 0.7537965278532854, "grad_norm": 2.481964892752853, "learning_rate": 1.5071556429348988e-06, "loss": 0.9082, "step": 20798 }, { "epoch": 0.7538327715559422, "grad_norm": 2.2658197769594906, "learning_rate": 1.5067356946362666e-06, "loss": 0.7734, "step": 20799 }, { "epoch": 0.7538690152585988, "grad_norm": 2.505749327554398, "learning_rate": 1.506315794472703e-06, "loss": 0.7811, "step": 20800 }, { "epoch": 0.7539052589612555, "grad_norm": 2.2744691733039706, "learning_rate": 1.5058959424499942e-06, "loss": 1.074, "step": 20801 }, { "epoch": 0.7539415026639121, "grad_norm": 2.340505902953727, "learning_rate": 1.5054761385739263e-06, "loss": 1.0149, "step": 20802 }, { "epoch": 0.7539777463665688, "grad_norm": 2.401212539065594, "learning_rate": 1.5050563828502857e-06, "loss": 0.9389, "step": 20803 }, { "epoch": 0.7540139900692254, "grad_norm": 2.1077091933277363, "learning_rate": 1.5046366752848524e-06, "loss": 1.0446, "step": 20804 }, { "epoch": 0.7540502337718822, "grad_norm": 2.0574991336870925, "learning_rate": 1.5042170158834123e-06, "loss": 0.8609, "step": 20805 }, { "epoch": 0.7540864774745388, "grad_norm": 2.1370847580734584, "learning_rate": 1.5037974046517489e-06, "loss": 0.9425, "step": 20806 }, { "epoch": 0.7541227211771955, "grad_norm": 2.014419854544545, "learning_rate": 1.5033778415956413e-06, "loss": 0.7902, "step": 20807 }, { "epoch": 0.7541589648798521, "grad_norm": 2.45005683283863, "learning_rate": 1.5029583267208736e-06, "loss": 0.7505, "step": 20808 }, { "epoch": 0.7541952085825088, "grad_norm": 2.5266139351599373, "learning_rate": 1.5025388600332225e-06, "loss": 1.0331, "step": 20809 }, { "epoch": 0.7542314522851654, "grad_norm": 2.495718981870034, "learning_rate": 1.5021194415384738e-06, "loss": 0.9565, "step": 20810 }, { "epoch": 0.7542676959878221, "grad_norm": 2.5289330408751485, "learning_rate": 1.5017000712424019e-06, "loss": 0.7273, "step": 20811 }, { "epoch": 0.7543039396904788, "grad_norm": 2.4445816102156046, "learning_rate": 1.501280749150789e-06, "loss": 0.9356, "step": 20812 }, { "epoch": 0.7543401833931355, "grad_norm": 1.9867657645617507, "learning_rate": 1.5008614752694084e-06, "loss": 0.7754, "step": 20813 }, { "epoch": 0.7543764270957921, "grad_norm": 2.3883512406321064, "learning_rate": 1.5004422496040432e-06, "loss": 0.8323, "step": 20814 }, { "epoch": 0.7544126707984488, "grad_norm": 2.431496459591891, "learning_rate": 1.5000230721604665e-06, "loss": 0.9406, "step": 20815 }, { "epoch": 0.7544489145011054, "grad_norm": 2.6527030192581975, "learning_rate": 1.4996039429444558e-06, "loss": 0.8495, "step": 20816 }, { "epoch": 0.754485158203762, "grad_norm": 2.386086813902351, "learning_rate": 1.499184861961785e-06, "loss": 1.0513, "step": 20817 }, { "epoch": 0.7545214019064188, "grad_norm": 2.242452963954013, "learning_rate": 1.4987658292182295e-06, "loss": 0.9041, "step": 20818 }, { "epoch": 0.7545576456090755, "grad_norm": 2.589708679189708, "learning_rate": 1.4983468447195642e-06, "loss": 0.77, "step": 20819 }, { "epoch": 0.7545938893117321, "grad_norm": 2.2319661016876156, "learning_rate": 1.497927908471561e-06, "loss": 0.7892, "step": 20820 }, { "epoch": 0.7546301330143887, "grad_norm": 2.3132709258704587, "learning_rate": 1.497509020479993e-06, "loss": 0.7116, "step": 20821 }, { "epoch": 0.7546663767170454, "grad_norm": 2.029623899585549, "learning_rate": 1.4970901807506321e-06, "loss": 0.6572, "step": 20822 }, { "epoch": 0.754702620419702, "grad_norm": 2.3408620181812205, "learning_rate": 1.496671389289252e-06, "loss": 0.8557, "step": 20823 }, { "epoch": 0.7547388641223587, "grad_norm": 2.25146509156054, "learning_rate": 1.4962526461016197e-06, "loss": 0.737, "step": 20824 }, { "epoch": 0.7547751078250154, "grad_norm": 2.430810848443044, "learning_rate": 1.4958339511935065e-06, "loss": 0.9557, "step": 20825 }, { "epoch": 0.7548113515276721, "grad_norm": 2.3186526874543256, "learning_rate": 1.4954153045706844e-06, "loss": 0.9238, "step": 20826 }, { "epoch": 0.7548475952303287, "grad_norm": 2.2022523860269616, "learning_rate": 1.4949967062389181e-06, "loss": 0.8946, "step": 20827 }, { "epoch": 0.7548838389329854, "grad_norm": 2.4562373482345987, "learning_rate": 1.494578156203979e-06, "loss": 0.8229, "step": 20828 }, { "epoch": 0.754920082635642, "grad_norm": 2.2110494263153027, "learning_rate": 1.4941596544716296e-06, "loss": 0.8686, "step": 20829 }, { "epoch": 0.7549563263382987, "grad_norm": 2.076596901107356, "learning_rate": 1.4937412010476431e-06, "loss": 0.905, "step": 20830 }, { "epoch": 0.7549925700409554, "grad_norm": 2.43290975777883, "learning_rate": 1.493322795937781e-06, "loss": 0.8267, "step": 20831 }, { "epoch": 0.7550288137436121, "grad_norm": 2.382200651939294, "learning_rate": 1.4929044391478115e-06, "loss": 1.12, "step": 20832 }, { "epoch": 0.7550650574462687, "grad_norm": 2.516624350995315, "learning_rate": 1.4924861306834948e-06, "loss": 0.9503, "step": 20833 }, { "epoch": 0.7551013011489254, "grad_norm": 2.333453663531225, "learning_rate": 1.4920678705506008e-06, "loss": 0.8152, "step": 20834 }, { "epoch": 0.755137544851582, "grad_norm": 2.1486226750704738, "learning_rate": 1.4916496587548884e-06, "loss": 0.7608, "step": 20835 }, { "epoch": 0.7551737885542387, "grad_norm": 2.3410045506453687, "learning_rate": 1.4912314953021233e-06, "loss": 0.8599, "step": 20836 }, { "epoch": 0.7552100322568953, "grad_norm": 2.199788250216837, "learning_rate": 1.4908133801980651e-06, "loss": 0.7864, "step": 20837 }, { "epoch": 0.7552462759595521, "grad_norm": 2.4582266868724587, "learning_rate": 1.4903953134484762e-06, "loss": 0.8558, "step": 20838 }, { "epoch": 0.7552825196622087, "grad_norm": 2.500068434561949, "learning_rate": 1.4899772950591185e-06, "loss": 0.9655, "step": 20839 }, { "epoch": 0.7553187633648654, "grad_norm": 2.576653928746702, "learning_rate": 1.4895593250357482e-06, "loss": 0.7999, "step": 20840 }, { "epoch": 0.755355007067522, "grad_norm": 2.421703416998175, "learning_rate": 1.48914140338413e-06, "loss": 0.8314, "step": 20841 }, { "epoch": 0.7553912507701787, "grad_norm": 2.4967627958122796, "learning_rate": 1.4887235301100188e-06, "loss": 1.0175, "step": 20842 }, { "epoch": 0.7554274944728353, "grad_norm": 2.2187110263928718, "learning_rate": 1.4883057052191751e-06, "loss": 0.8173, "step": 20843 }, { "epoch": 0.7554637381754921, "grad_norm": 2.6589293986874445, "learning_rate": 1.4878879287173537e-06, "loss": 0.9391, "step": 20844 }, { "epoch": 0.7554999818781487, "grad_norm": 2.1115004421745405, "learning_rate": 1.4874702006103125e-06, "loss": 0.7286, "step": 20845 }, { "epoch": 0.7555362255808054, "grad_norm": 2.37966326731774, "learning_rate": 1.487052520903809e-06, "loss": 0.8529, "step": 20846 }, { "epoch": 0.755572469283462, "grad_norm": 2.1983930996826504, "learning_rate": 1.486634889603596e-06, "loss": 1.0106, "step": 20847 }, { "epoch": 0.7556087129861186, "grad_norm": 2.0726944759449752, "learning_rate": 1.486217306715429e-06, "loss": 0.9321, "step": 20848 }, { "epoch": 0.7556449566887753, "grad_norm": 2.32119878712892, "learning_rate": 1.4857997722450624e-06, "loss": 1.1058, "step": 20849 }, { "epoch": 0.7556812003914319, "grad_norm": 2.260611129509849, "learning_rate": 1.4853822861982514e-06, "loss": 0.7816, "step": 20850 }, { "epoch": 0.7557174440940887, "grad_norm": 2.7035787027432026, "learning_rate": 1.4849648485807454e-06, "loss": 0.929, "step": 20851 }, { "epoch": 0.7557536877967453, "grad_norm": 2.4051163924519874, "learning_rate": 1.4845474593982978e-06, "loss": 0.8803, "step": 20852 }, { "epoch": 0.755789931499402, "grad_norm": 2.4318657587676293, "learning_rate": 1.4841301186566603e-06, "loss": 0.9137, "step": 20853 }, { "epoch": 0.7558261752020586, "grad_norm": 2.426739005428503, "learning_rate": 1.4837128263615853e-06, "loss": 0.7844, "step": 20854 }, { "epoch": 0.7558624189047153, "grad_norm": 2.4723008198958314, "learning_rate": 1.4832955825188188e-06, "loss": 0.9204, "step": 20855 }, { "epoch": 0.7558986626073719, "grad_norm": 2.3217681988995906, "learning_rate": 1.4828783871341123e-06, "loss": 0.8533, "step": 20856 }, { "epoch": 0.7559349063100287, "grad_norm": 2.1105267408485067, "learning_rate": 1.4824612402132166e-06, "loss": 0.7785, "step": 20857 }, { "epoch": 0.7559711500126853, "grad_norm": 2.4021129806657657, "learning_rate": 1.4820441417618757e-06, "loss": 0.9399, "step": 20858 }, { "epoch": 0.756007393715342, "grad_norm": 2.0824689239119305, "learning_rate": 1.481627091785841e-06, "loss": 0.788, "step": 20859 }, { "epoch": 0.7560436374179986, "grad_norm": 2.521966051999644, "learning_rate": 1.4812100902908538e-06, "loss": 0.8548, "step": 20860 }, { "epoch": 0.7560798811206553, "grad_norm": 2.3812773383927395, "learning_rate": 1.4807931372826662e-06, "loss": 1.0313, "step": 20861 }, { "epoch": 0.7561161248233119, "grad_norm": 2.101764817502146, "learning_rate": 1.4803762327670195e-06, "loss": 0.7388, "step": 20862 }, { "epoch": 0.7561523685259686, "grad_norm": 2.091646411348569, "learning_rate": 1.4799593767496617e-06, "loss": 0.9417, "step": 20863 }, { "epoch": 0.7561886122286253, "grad_norm": 2.2382613185072304, "learning_rate": 1.479542569236333e-06, "loss": 0.8991, "step": 20864 }, { "epoch": 0.756224855931282, "grad_norm": 2.3244176398908247, "learning_rate": 1.4791258102327782e-06, "loss": 0.7585, "step": 20865 }, { "epoch": 0.7562610996339386, "grad_norm": 2.4777394773579338, "learning_rate": 1.4787090997447423e-06, "loss": 0.8769, "step": 20866 }, { "epoch": 0.7562973433365953, "grad_norm": 2.42214675455369, "learning_rate": 1.4782924377779639e-06, "loss": 0.8517, "step": 20867 }, { "epoch": 0.7563335870392519, "grad_norm": 2.8152505893084414, "learning_rate": 1.4778758243381851e-06, "loss": 0.8032, "step": 20868 }, { "epoch": 0.7563698307419086, "grad_norm": 2.323555764644534, "learning_rate": 1.4774592594311482e-06, "loss": 0.8413, "step": 20869 }, { "epoch": 0.7564060744445652, "grad_norm": 2.284370765595766, "learning_rate": 1.4770427430625938e-06, "loss": 0.9843, "step": 20870 }, { "epoch": 0.756442318147222, "grad_norm": 2.5312533843184863, "learning_rate": 1.4766262752382582e-06, "loss": 0.8302, "step": 20871 }, { "epoch": 0.7564785618498786, "grad_norm": 2.0209589346979127, "learning_rate": 1.4762098559638816e-06, "loss": 0.8585, "step": 20872 }, { "epoch": 0.7565148055525353, "grad_norm": 2.6132465172268002, "learning_rate": 1.4757934852452015e-06, "loss": 0.6619, "step": 20873 }, { "epoch": 0.7565510492551919, "grad_norm": 2.2752773396471273, "learning_rate": 1.4753771630879582e-06, "loss": 0.8956, "step": 20874 }, { "epoch": 0.7565872929578485, "grad_norm": 2.4249372335027957, "learning_rate": 1.474960889497884e-06, "loss": 0.6676, "step": 20875 }, { "epoch": 0.7566235366605052, "grad_norm": 2.4445458664594977, "learning_rate": 1.4745446644807164e-06, "loss": 0.8466, "step": 20876 }, { "epoch": 0.756659780363162, "grad_norm": 2.4411885317272706, "learning_rate": 1.4741284880421935e-06, "loss": 0.7889, "step": 20877 }, { "epoch": 0.7566960240658186, "grad_norm": 2.3588214995393817, "learning_rate": 1.4737123601880459e-06, "loss": 1.0415, "step": 20878 }, { "epoch": 0.7567322677684752, "grad_norm": 1.9579081707069235, "learning_rate": 1.4732962809240109e-06, "loss": 0.7003, "step": 20879 }, { "epoch": 0.7567685114711319, "grad_norm": 2.7938284546015173, "learning_rate": 1.4728802502558171e-06, "loss": 0.9028, "step": 20880 }, { "epoch": 0.7568047551737885, "grad_norm": 2.4300662886107487, "learning_rate": 1.4724642681892037e-06, "loss": 0.9552, "step": 20881 }, { "epoch": 0.7568409988764452, "grad_norm": 2.237905476049308, "learning_rate": 1.4720483347298974e-06, "loss": 0.7678, "step": 20882 }, { "epoch": 0.7568772425791018, "grad_norm": 2.203423850532032, "learning_rate": 1.4716324498836331e-06, "loss": 0.9061, "step": 20883 }, { "epoch": 0.7569134862817586, "grad_norm": 2.2805027651202208, "learning_rate": 1.4712166136561378e-06, "loss": 0.8204, "step": 20884 }, { "epoch": 0.7569497299844152, "grad_norm": 2.40461166544744, "learning_rate": 1.4708008260531443e-06, "loss": 0.8557, "step": 20885 }, { "epoch": 0.7569859736870719, "grad_norm": 2.483458536687641, "learning_rate": 1.470385087080382e-06, "loss": 0.9112, "step": 20886 }, { "epoch": 0.7570222173897285, "grad_norm": 2.342983232042369, "learning_rate": 1.4699693967435768e-06, "loss": 0.841, "step": 20887 }, { "epoch": 0.7570584610923852, "grad_norm": 2.301004289553339, "learning_rate": 1.469553755048459e-06, "loss": 0.9961, "step": 20888 }, { "epoch": 0.7570947047950418, "grad_norm": 2.415643693252661, "learning_rate": 1.469138162000755e-06, "loss": 0.7934, "step": 20889 }, { "epoch": 0.7571309484976986, "grad_norm": 2.7740680421751316, "learning_rate": 1.4687226176061931e-06, "loss": 1.0613, "step": 20890 }, { "epoch": 0.7571671922003552, "grad_norm": 2.7282176910740725, "learning_rate": 1.4683071218704965e-06, "loss": 0.9146, "step": 20891 }, { "epoch": 0.7572034359030119, "grad_norm": 2.1194348534907057, "learning_rate": 1.467891674799392e-06, "loss": 0.7355, "step": 20892 }, { "epoch": 0.7572396796056685, "grad_norm": 2.885443858252896, "learning_rate": 1.4674762763986055e-06, "loss": 0.9531, "step": 20893 }, { "epoch": 0.7572759233083252, "grad_norm": 2.0428393098744912, "learning_rate": 1.4670609266738577e-06, "loss": 0.9092, "step": 20894 }, { "epoch": 0.7573121670109818, "grad_norm": 2.3300082630419103, "learning_rate": 1.466645625630876e-06, "loss": 0.9161, "step": 20895 }, { "epoch": 0.7573484107136385, "grad_norm": 2.364159902160023, "learning_rate": 1.466230373275377e-06, "loss": 0.8831, "step": 20896 }, { "epoch": 0.7573846544162952, "grad_norm": 2.373277997306248, "learning_rate": 1.465815169613089e-06, "loss": 0.9073, "step": 20897 }, { "epoch": 0.7574208981189519, "grad_norm": 2.1890583400644323, "learning_rate": 1.46540001464973e-06, "loss": 0.7848, "step": 20898 }, { "epoch": 0.7574571418216085, "grad_norm": 2.4432446844663365, "learning_rate": 1.4649849083910218e-06, "loss": 0.9544, "step": 20899 }, { "epoch": 0.7574933855242652, "grad_norm": 2.678079579474186, "learning_rate": 1.464569850842681e-06, "loss": 0.7329, "step": 20900 }, { "epoch": 0.7575296292269218, "grad_norm": 2.4404283521182437, "learning_rate": 1.4641548420104322e-06, "loss": 0.98, "step": 20901 }, { "epoch": 0.7575658729295784, "grad_norm": 2.2595620054971235, "learning_rate": 1.4637398818999903e-06, "loss": 1.0002, "step": 20902 }, { "epoch": 0.7576021166322352, "grad_norm": 2.291179944162992, "learning_rate": 1.4633249705170755e-06, "loss": 1.0, "step": 20903 }, { "epoch": 0.7576383603348918, "grad_norm": 2.4367264074847967, "learning_rate": 1.462910107867402e-06, "loss": 0.9139, "step": 20904 }, { "epoch": 0.7576746040375485, "grad_norm": 2.6310244628000587, "learning_rate": 1.4624952939566883e-06, "loss": 0.8564, "step": 20905 }, { "epoch": 0.7577108477402051, "grad_norm": 2.382507674368358, "learning_rate": 1.4620805287906515e-06, "loss": 0.8571, "step": 20906 }, { "epoch": 0.7577470914428618, "grad_norm": 2.3848795998286136, "learning_rate": 1.4616658123750033e-06, "loss": 0.951, "step": 20907 }, { "epoch": 0.7577833351455184, "grad_norm": 2.5447797962464462, "learning_rate": 1.4612511447154609e-06, "loss": 0.9603, "step": 20908 }, { "epoch": 0.7578195788481751, "grad_norm": 2.7642624385097236, "learning_rate": 1.4608365258177371e-06, "loss": 0.8341, "step": 20909 }, { "epoch": 0.7578558225508318, "grad_norm": 2.2879446248948203, "learning_rate": 1.4604219556875476e-06, "loss": 0.8343, "step": 20910 }, { "epoch": 0.7578920662534885, "grad_norm": 2.325715018110206, "learning_rate": 1.4600074343306008e-06, "loss": 0.8569, "step": 20911 }, { "epoch": 0.7579283099561451, "grad_norm": 2.355890905199205, "learning_rate": 1.4595929617526112e-06, "loss": 0.8576, "step": 20912 }, { "epoch": 0.7579645536588018, "grad_norm": 2.398432723585453, "learning_rate": 1.459178537959291e-06, "loss": 0.9434, "step": 20913 }, { "epoch": 0.7580007973614584, "grad_norm": 2.339846231100225, "learning_rate": 1.4587641629563471e-06, "loss": 0.7904, "step": 20914 }, { "epoch": 0.7580370410641151, "grad_norm": 2.6051302814062507, "learning_rate": 1.458349836749493e-06, "loss": 0.8131, "step": 20915 }, { "epoch": 0.7580732847667718, "grad_norm": 3.8974168346565214, "learning_rate": 1.4579355593444332e-06, "loss": 0.6789, "step": 20916 }, { "epoch": 0.7581095284694285, "grad_norm": 2.4488085270662188, "learning_rate": 1.4575213307468822e-06, "loss": 0.8793, "step": 20917 }, { "epoch": 0.7581457721720851, "grad_norm": 2.0766547123305283, "learning_rate": 1.4571071509625433e-06, "loss": 0.8065, "step": 20918 }, { "epoch": 0.7581820158747418, "grad_norm": 2.400467567585364, "learning_rate": 1.4566930199971273e-06, "loss": 0.8398, "step": 20919 }, { "epoch": 0.7582182595773984, "grad_norm": 2.236696254144258, "learning_rate": 1.4562789378563347e-06, "loss": 0.8919, "step": 20920 }, { "epoch": 0.7582545032800551, "grad_norm": 2.574746628220638, "learning_rate": 1.455864904545879e-06, "loss": 0.8492, "step": 20921 }, { "epoch": 0.7582907469827117, "grad_norm": 2.164724360454309, "learning_rate": 1.4554509200714594e-06, "loss": 0.8464, "step": 20922 }, { "epoch": 0.7583269906853685, "grad_norm": 2.25544691797922, "learning_rate": 1.4550369844387823e-06, "loss": 0.8465, "step": 20923 }, { "epoch": 0.7583632343880251, "grad_norm": 2.289528770735871, "learning_rate": 1.4546230976535535e-06, "loss": 0.8534, "step": 20924 }, { "epoch": 0.7583994780906818, "grad_norm": 2.391137213188531, "learning_rate": 1.454209259721473e-06, "loss": 1.0737, "step": 20925 }, { "epoch": 0.7584357217933384, "grad_norm": 2.089693812494987, "learning_rate": 1.4537954706482454e-06, "loss": 0.9357, "step": 20926 }, { "epoch": 0.758471965495995, "grad_norm": 2.556338995523332, "learning_rate": 1.4533817304395687e-06, "loss": 0.8723, "step": 20927 }, { "epoch": 0.7585082091986517, "grad_norm": 2.657260112218436, "learning_rate": 1.4529680391011502e-06, "loss": 1.0805, "step": 20928 }, { "epoch": 0.7585444529013085, "grad_norm": 2.4749238750071108, "learning_rate": 1.4525543966386851e-06, "loss": 0.9379, "step": 20929 }, { "epoch": 0.7585806966039651, "grad_norm": 2.444360812639178, "learning_rate": 1.4521408030578766e-06, "loss": 1.0043, "step": 20930 }, { "epoch": 0.7586169403066217, "grad_norm": 2.4045873016898143, "learning_rate": 1.4517272583644205e-06, "loss": 0.9479, "step": 20931 }, { "epoch": 0.7586531840092784, "grad_norm": 2.1630330922618026, "learning_rate": 1.4513137625640167e-06, "loss": 0.7999, "step": 20932 }, { "epoch": 0.758689427711935, "grad_norm": 2.2959208472245933, "learning_rate": 1.4509003156623653e-06, "loss": 0.8609, "step": 20933 }, { "epoch": 0.7587256714145917, "grad_norm": 2.3701313011585117, "learning_rate": 1.4504869176651587e-06, "loss": 0.8781, "step": 20934 }, { "epoch": 0.7587619151172483, "grad_norm": 2.4672151140498033, "learning_rate": 1.4500735685780965e-06, "loss": 0.8034, "step": 20935 }, { "epoch": 0.7587981588199051, "grad_norm": 2.584036272036278, "learning_rate": 1.4496602684068729e-06, "loss": 1.0146, "step": 20936 }, { "epoch": 0.7588344025225617, "grad_norm": 2.3162462543127793, "learning_rate": 1.4492470171571854e-06, "loss": 0.8444, "step": 20937 }, { "epoch": 0.7588706462252184, "grad_norm": 2.1759736204108022, "learning_rate": 1.4488338148347252e-06, "loss": 0.919, "step": 20938 }, { "epoch": 0.758906889927875, "grad_norm": 2.461930937449324, "learning_rate": 1.4484206614451872e-06, "loss": 1.0352, "step": 20939 }, { "epoch": 0.7589431336305317, "grad_norm": 2.1965232101522942, "learning_rate": 1.4480075569942647e-06, "loss": 1.1001, "step": 20940 }, { "epoch": 0.7589793773331883, "grad_norm": 2.774323495500176, "learning_rate": 1.4475945014876512e-06, "loss": 0.877, "step": 20941 }, { "epoch": 0.759015621035845, "grad_norm": 2.6641993779496933, "learning_rate": 1.4471814949310358e-06, "loss": 1.024, "step": 20942 }, { "epoch": 0.7590518647385017, "grad_norm": 2.4069166255961294, "learning_rate": 1.446768537330111e-06, "loss": 0.9064, "step": 20943 }, { "epoch": 0.7590881084411584, "grad_norm": 2.106503980889169, "learning_rate": 1.4463556286905678e-06, "loss": 0.7949, "step": 20944 }, { "epoch": 0.759124352143815, "grad_norm": 2.4980788828344895, "learning_rate": 1.445942769018094e-06, "loss": 0.834, "step": 20945 }, { "epoch": 0.7591605958464717, "grad_norm": 1.9654672121561865, "learning_rate": 1.4455299583183802e-06, "loss": 0.9089, "step": 20946 }, { "epoch": 0.7591968395491283, "grad_norm": 2.5829931546436056, "learning_rate": 1.445117196597111e-06, "loss": 0.9346, "step": 20947 }, { "epoch": 0.759233083251785, "grad_norm": 2.189221628370802, "learning_rate": 1.44470448385998e-06, "loss": 0.738, "step": 20948 }, { "epoch": 0.7592693269544417, "grad_norm": 2.3282617802537486, "learning_rate": 1.4442918201126694e-06, "loss": 0.8599, "step": 20949 }, { "epoch": 0.7593055706570984, "grad_norm": 2.615964554607517, "learning_rate": 1.4438792053608692e-06, "loss": 0.9691, "step": 20950 }, { "epoch": 0.759341814359755, "grad_norm": 2.108439092622129, "learning_rate": 1.4434666396102608e-06, "loss": 0.7985, "step": 20951 }, { "epoch": 0.7593780580624117, "grad_norm": 2.347813524119812, "learning_rate": 1.4430541228665312e-06, "loss": 0.8814, "step": 20952 }, { "epoch": 0.7594143017650683, "grad_norm": 2.3332709490129426, "learning_rate": 1.4426416551353662e-06, "loss": 0.8424, "step": 20953 }, { "epoch": 0.759450545467725, "grad_norm": 2.4686006705932964, "learning_rate": 1.4422292364224466e-06, "loss": 0.9616, "step": 20954 }, { "epoch": 0.7594867891703816, "grad_norm": 2.4868720621716314, "learning_rate": 1.4418168667334559e-06, "loss": 0.9775, "step": 20955 }, { "epoch": 0.7595230328730384, "grad_norm": 2.5361820550430574, "learning_rate": 1.4414045460740771e-06, "loss": 0.8854, "step": 20956 }, { "epoch": 0.759559276575695, "grad_norm": 2.1884600429753496, "learning_rate": 1.4409922744499932e-06, "loss": 0.7296, "step": 20957 }, { "epoch": 0.7595955202783516, "grad_norm": 2.5369224331302442, "learning_rate": 1.4405800518668817e-06, "loss": 0.9973, "step": 20958 }, { "epoch": 0.7596317639810083, "grad_norm": 2.29517153381179, "learning_rate": 1.440167878330424e-06, "loss": 0.9926, "step": 20959 }, { "epoch": 0.7596680076836649, "grad_norm": 2.286913918008635, "learning_rate": 1.4397557538463003e-06, "loss": 0.7416, "step": 20960 }, { "epoch": 0.7597042513863216, "grad_norm": 2.351685026324835, "learning_rate": 1.4393436784201908e-06, "loss": 0.8797, "step": 20961 }, { "epoch": 0.7597404950889783, "grad_norm": 2.377132338067886, "learning_rate": 1.4389316520577706e-06, "loss": 0.9971, "step": 20962 }, { "epoch": 0.759776738791635, "grad_norm": 5.866370283771031, "learning_rate": 1.4385196747647184e-06, "loss": 0.7373, "step": 20963 }, { "epoch": 0.7598129824942916, "grad_norm": 2.308146084394812, "learning_rate": 1.4381077465467125e-06, "loss": 0.8752, "step": 20964 }, { "epoch": 0.7598492261969483, "grad_norm": 2.4343178927413827, "learning_rate": 1.4376958674094267e-06, "loss": 0.7765, "step": 20965 }, { "epoch": 0.7598854698996049, "grad_norm": 2.5257045631654367, "learning_rate": 1.4372840373585385e-06, "loss": 0.7343, "step": 20966 }, { "epoch": 0.7599217136022616, "grad_norm": 2.39038862494518, "learning_rate": 1.4368722563997183e-06, "loss": 0.9603, "step": 20967 }, { "epoch": 0.7599579573049182, "grad_norm": 2.5024494279888927, "learning_rate": 1.436460524538647e-06, "loss": 0.9349, "step": 20968 }, { "epoch": 0.759994201007575, "grad_norm": 2.1702992438732185, "learning_rate": 1.436048841780993e-06, "loss": 0.8933, "step": 20969 }, { "epoch": 0.7600304447102316, "grad_norm": 2.117335555969886, "learning_rate": 1.435637208132432e-06, "loss": 0.9886, "step": 20970 }, { "epoch": 0.7600666884128883, "grad_norm": 2.483147503706449, "learning_rate": 1.4352256235986328e-06, "loss": 0.9248, "step": 20971 }, { "epoch": 0.7601029321155449, "grad_norm": 2.207262971638738, "learning_rate": 1.434814088185269e-06, "loss": 0.8876, "step": 20972 }, { "epoch": 0.7601391758182016, "grad_norm": 2.5122495517198753, "learning_rate": 1.4344026018980123e-06, "loss": 0.7954, "step": 20973 }, { "epoch": 0.7601754195208582, "grad_norm": 2.564830469881282, "learning_rate": 1.4339911647425297e-06, "loss": 0.9802, "step": 20974 }, { "epoch": 0.760211663223515, "grad_norm": 2.442569412021971, "learning_rate": 1.4335797767244925e-06, "loss": 1.0539, "step": 20975 }, { "epoch": 0.7602479069261716, "grad_norm": 2.3765149852664096, "learning_rate": 1.4331684378495686e-06, "loss": 1.0915, "step": 20976 }, { "epoch": 0.7602841506288283, "grad_norm": 2.320115555076681, "learning_rate": 1.4327571481234286e-06, "loss": 0.8486, "step": 20977 }, { "epoch": 0.7603203943314849, "grad_norm": 2.3226434087077283, "learning_rate": 1.4323459075517359e-06, "loss": 1.0119, "step": 20978 }, { "epoch": 0.7603566380341416, "grad_norm": 2.4504927414118485, "learning_rate": 1.4319347161401587e-06, "loss": 0.9459, "step": 20979 }, { "epoch": 0.7603928817367982, "grad_norm": 2.11860488974162, "learning_rate": 1.431523573894365e-06, "loss": 0.8258, "step": 20980 }, { "epoch": 0.7604291254394548, "grad_norm": 2.5669714311514, "learning_rate": 1.4311124808200167e-06, "loss": 0.946, "step": 20981 }, { "epoch": 0.7604653691421116, "grad_norm": 2.2821585807982427, "learning_rate": 1.4307014369227817e-06, "loss": 0.7924, "step": 20982 }, { "epoch": 0.7605016128447682, "grad_norm": 2.7049593963934098, "learning_rate": 1.4302904422083192e-06, "loss": 1.0968, "step": 20983 }, { "epoch": 0.7605378565474249, "grad_norm": 2.2689625549640264, "learning_rate": 1.4298794966822982e-06, "loss": 0.8482, "step": 20984 }, { "epoch": 0.7605741002500815, "grad_norm": 2.034547621399074, "learning_rate": 1.4294686003503771e-06, "loss": 0.8021, "step": 20985 }, { "epoch": 0.7606103439527382, "grad_norm": 2.526200758322041, "learning_rate": 1.4290577532182215e-06, "loss": 0.8012, "step": 20986 }, { "epoch": 0.7606465876553948, "grad_norm": 2.508425601310932, "learning_rate": 1.4286469552914866e-06, "loss": 1.088, "step": 20987 }, { "epoch": 0.7606828313580516, "grad_norm": 2.3304659222401, "learning_rate": 1.4282362065758403e-06, "loss": 0.7784, "step": 20988 }, { "epoch": 0.7607190750607082, "grad_norm": 2.2825039633257234, "learning_rate": 1.427825507076937e-06, "loss": 0.7821, "step": 20989 }, { "epoch": 0.7607553187633649, "grad_norm": 2.5114646442524777, "learning_rate": 1.4274148568004398e-06, "loss": 0.8362, "step": 20990 }, { "epoch": 0.7607915624660215, "grad_norm": 2.6298538945437917, "learning_rate": 1.4270042557520036e-06, "loss": 0.8829, "step": 20991 }, { "epoch": 0.7608278061686782, "grad_norm": 2.500825930568993, "learning_rate": 1.4265937039372874e-06, "loss": 0.866, "step": 20992 }, { "epoch": 0.7608640498713348, "grad_norm": 2.4089496935552286, "learning_rate": 1.4261832013619509e-06, "loss": 1.0219, "step": 20993 }, { "epoch": 0.7609002935739915, "grad_norm": 2.350690043637273, "learning_rate": 1.4257727480316468e-06, "loss": 0.8656, "step": 20994 }, { "epoch": 0.7609365372766482, "grad_norm": 2.255323034292415, "learning_rate": 1.4253623439520325e-06, "loss": 0.952, "step": 20995 }, { "epoch": 0.7609727809793049, "grad_norm": 2.4390058819550062, "learning_rate": 1.424951989128764e-06, "loss": 0.8418, "step": 20996 }, { "epoch": 0.7610090246819615, "grad_norm": 2.356714272746666, "learning_rate": 1.4245416835674958e-06, "loss": 0.9177, "step": 20997 }, { "epoch": 0.7610452683846182, "grad_norm": 2.4734681721250444, "learning_rate": 1.4241314272738792e-06, "loss": 0.95, "step": 20998 }, { "epoch": 0.7610815120872748, "grad_norm": 2.4696192509217654, "learning_rate": 1.4237212202535688e-06, "loss": 0.9544, "step": 20999 }, { "epoch": 0.7611177557899315, "grad_norm": 2.6626784502904846, "learning_rate": 1.4233110625122193e-06, "loss": 0.8132, "step": 21000 }, { "epoch": 0.7611539994925882, "grad_norm": 2.370654829320031, "learning_rate": 1.4229009540554784e-06, "loss": 0.9734, "step": 21001 }, { "epoch": 0.7611902431952449, "grad_norm": 2.376311704385187, "learning_rate": 1.4224908948889987e-06, "loss": 0.8341, "step": 21002 }, { "epoch": 0.7612264868979015, "grad_norm": 2.3798895727821114, "learning_rate": 1.422080885018431e-06, "loss": 0.9495, "step": 21003 }, { "epoch": 0.7612627306005582, "grad_norm": 2.1085648885562027, "learning_rate": 1.4216709244494264e-06, "loss": 0.8048, "step": 21004 }, { "epoch": 0.7612989743032148, "grad_norm": 2.3684859864528938, "learning_rate": 1.421261013187631e-06, "loss": 0.8661, "step": 21005 }, { "epoch": 0.7613352180058715, "grad_norm": 2.403516753527202, "learning_rate": 1.4208511512386947e-06, "loss": 0.8101, "step": 21006 }, { "epoch": 0.7613714617085281, "grad_norm": 2.2388612277579893, "learning_rate": 1.4204413386082643e-06, "loss": 0.9427, "step": 21007 }, { "epoch": 0.7614077054111849, "grad_norm": 2.1226217243144165, "learning_rate": 1.4200315753019895e-06, "loss": 0.674, "step": 21008 }, { "epoch": 0.7614439491138415, "grad_norm": 2.032422866816003, "learning_rate": 1.4196218613255126e-06, "loss": 0.7756, "step": 21009 }, { "epoch": 0.7614801928164981, "grad_norm": 2.3882134401999258, "learning_rate": 1.4192121966844813e-06, "loss": 0.9224, "step": 21010 }, { "epoch": 0.7615164365191548, "grad_norm": 2.268541892707426, "learning_rate": 1.4188025813845418e-06, "loss": 0.7932, "step": 21011 }, { "epoch": 0.7615526802218114, "grad_norm": 2.299520740758901, "learning_rate": 1.4183930154313352e-06, "loss": 0.7938, "step": 21012 }, { "epoch": 0.7615889239244681, "grad_norm": 2.4131510845734105, "learning_rate": 1.4179834988305085e-06, "loss": 1.0134, "step": 21013 }, { "epoch": 0.7616251676271247, "grad_norm": 2.218886624379914, "learning_rate": 1.4175740315877002e-06, "loss": 0.7184, "step": 21014 }, { "epoch": 0.7616614113297815, "grad_norm": 2.3604003381206082, "learning_rate": 1.4171646137085577e-06, "loss": 0.9652, "step": 21015 }, { "epoch": 0.7616976550324381, "grad_norm": 2.2065557789081525, "learning_rate": 1.4167552451987183e-06, "loss": 0.7668, "step": 21016 }, { "epoch": 0.7617338987350948, "grad_norm": 2.426660212346082, "learning_rate": 1.4163459260638263e-06, "loss": 0.9486, "step": 21017 }, { "epoch": 0.7617701424377514, "grad_norm": 2.41508434655598, "learning_rate": 1.415936656309519e-06, "loss": 0.885, "step": 21018 }, { "epoch": 0.7618063861404081, "grad_norm": 2.3141889031457494, "learning_rate": 1.4155274359414362e-06, "loss": 0.7112, "step": 21019 }, { "epoch": 0.7618426298430647, "grad_norm": 2.559511878595236, "learning_rate": 1.4151182649652194e-06, "loss": 1.0106, "step": 21020 }, { "epoch": 0.7618788735457215, "grad_norm": 2.4280160925120016, "learning_rate": 1.4147091433865035e-06, "loss": 0.8347, "step": 21021 }, { "epoch": 0.7619151172483781, "grad_norm": 2.352702834785969, "learning_rate": 1.4143000712109272e-06, "loss": 0.8807, "step": 21022 }, { "epoch": 0.7619513609510348, "grad_norm": 2.2130224033591905, "learning_rate": 1.413891048444127e-06, "loss": 0.8346, "step": 21023 }, { "epoch": 0.7619876046536914, "grad_norm": 2.2923224845197954, "learning_rate": 1.4134820750917417e-06, "loss": 0.8196, "step": 21024 }, { "epoch": 0.7620238483563481, "grad_norm": 2.464603306309074, "learning_rate": 1.413073151159402e-06, "loss": 0.8626, "step": 21025 }, { "epoch": 0.7620600920590047, "grad_norm": 2.4079106812981848, "learning_rate": 1.4126642766527455e-06, "loss": 0.8546, "step": 21026 }, { "epoch": 0.7620963357616614, "grad_norm": 2.654812449715738, "learning_rate": 1.412255451577405e-06, "loss": 0.9083, "step": 21027 }, { "epoch": 0.7621325794643181, "grad_norm": 2.5944921748676766, "learning_rate": 1.4118466759390171e-06, "loss": 0.918, "step": 21028 }, { "epoch": 0.7621688231669748, "grad_norm": 2.5635359142435705, "learning_rate": 1.4114379497432096e-06, "loss": 0.9031, "step": 21029 }, { "epoch": 0.7622050668696314, "grad_norm": 2.285136363961786, "learning_rate": 1.4110292729956171e-06, "loss": 0.8967, "step": 21030 }, { "epoch": 0.7622413105722881, "grad_norm": 2.3489640265696776, "learning_rate": 1.4106206457018724e-06, "loss": 0.892, "step": 21031 }, { "epoch": 0.7622775542749447, "grad_norm": 2.3766565627992158, "learning_rate": 1.4102120678676028e-06, "loss": 0.9447, "step": 21032 }, { "epoch": 0.7623137979776013, "grad_norm": 2.5909837011326515, "learning_rate": 1.4098035394984417e-06, "loss": 0.9061, "step": 21033 }, { "epoch": 0.7623500416802581, "grad_norm": 2.279764240100034, "learning_rate": 1.4093950606000128e-06, "loss": 0.8961, "step": 21034 }, { "epoch": 0.7623862853829148, "grad_norm": 2.3510872782850156, "learning_rate": 1.4089866311779522e-06, "loss": 0.8163, "step": 21035 }, { "epoch": 0.7624225290855714, "grad_norm": 2.4954132607953046, "learning_rate": 1.4085782512378821e-06, "loss": 0.9055, "step": 21036 }, { "epoch": 0.762458772788228, "grad_norm": 2.8054455645493035, "learning_rate": 1.4081699207854332e-06, "loss": 1.0034, "step": 21037 }, { "epoch": 0.7624950164908847, "grad_norm": 2.7211118411987605, "learning_rate": 1.407761639826229e-06, "loss": 0.9858, "step": 21038 }, { "epoch": 0.7625312601935413, "grad_norm": 2.544926195737655, "learning_rate": 1.4073534083658969e-06, "loss": 0.9595, "step": 21039 }, { "epoch": 0.762567503896198, "grad_norm": 2.1361664014291755, "learning_rate": 1.4069452264100637e-06, "loss": 0.8164, "step": 21040 }, { "epoch": 0.7626037475988547, "grad_norm": 2.1833695371552326, "learning_rate": 1.4065370939643508e-06, "loss": 0.7779, "step": 21041 }, { "epoch": 0.7626399913015114, "grad_norm": 2.271323062720447, "learning_rate": 1.4061290110343835e-06, "loss": 0.8918, "step": 21042 }, { "epoch": 0.762676235004168, "grad_norm": 2.534409276758831, "learning_rate": 1.4057209776257847e-06, "loss": 0.9938, "step": 21043 }, { "epoch": 0.7627124787068247, "grad_norm": 2.2046613183166075, "learning_rate": 1.405312993744179e-06, "loss": 0.7596, "step": 21044 }, { "epoch": 0.7627487224094813, "grad_norm": 2.82895302408339, "learning_rate": 1.404905059395184e-06, "loss": 0.9997, "step": 21045 }, { "epoch": 0.762784966112138, "grad_norm": 2.2742454820463474, "learning_rate": 1.404497174584424e-06, "loss": 0.7074, "step": 21046 }, { "epoch": 0.7628212098147947, "grad_norm": 2.6348425889617726, "learning_rate": 1.4040893393175175e-06, "loss": 1.0626, "step": 21047 }, { "epoch": 0.7628574535174514, "grad_norm": 2.3106344211054117, "learning_rate": 1.4036815536000876e-06, "loss": 0.9567, "step": 21048 }, { "epoch": 0.762893697220108, "grad_norm": 2.4556684899530166, "learning_rate": 1.4032738174377485e-06, "loss": 0.938, "step": 21049 }, { "epoch": 0.7629299409227647, "grad_norm": 2.163595305118774, "learning_rate": 1.4028661308361214e-06, "loss": 0.5877, "step": 21050 }, { "epoch": 0.7629661846254213, "grad_norm": 2.429403607177488, "learning_rate": 1.4024584938008252e-06, "loss": 0.9406, "step": 21051 }, { "epoch": 0.763002428328078, "grad_norm": 2.036705204710678, "learning_rate": 1.4020509063374733e-06, "loss": 0.6757, "step": 21052 }, { "epoch": 0.7630386720307346, "grad_norm": 2.414821189088222, "learning_rate": 1.4016433684516857e-06, "loss": 0.8704, "step": 21053 }, { "epoch": 0.7630749157333914, "grad_norm": 2.233531946359892, "learning_rate": 1.4012358801490733e-06, "loss": 0.83, "step": 21054 }, { "epoch": 0.763111159436048, "grad_norm": 2.2950752502379226, "learning_rate": 1.400828441435257e-06, "loss": 0.7709, "step": 21055 }, { "epoch": 0.7631474031387047, "grad_norm": 2.15310436317991, "learning_rate": 1.4004210523158468e-06, "loss": 0.8138, "step": 21056 }, { "epoch": 0.7631836468413613, "grad_norm": 2.4609712373387747, "learning_rate": 1.4000137127964586e-06, "loss": 0.935, "step": 21057 }, { "epoch": 0.763219890544018, "grad_norm": 2.1896695177501053, "learning_rate": 1.3996064228827027e-06, "loss": 0.8567, "step": 21058 }, { "epoch": 0.7632561342466746, "grad_norm": 2.0344807754329444, "learning_rate": 1.399199182580193e-06, "loss": 0.9102, "step": 21059 }, { "epoch": 0.7632923779493314, "grad_norm": 2.1389550269914364, "learning_rate": 1.3987919918945425e-06, "loss": 0.773, "step": 21060 }, { "epoch": 0.763328621651988, "grad_norm": 2.469595143642445, "learning_rate": 1.3983848508313592e-06, "loss": 0.8269, "step": 21061 }, { "epoch": 0.7633648653546447, "grad_norm": 2.3608258067727435, "learning_rate": 1.397977759396254e-06, "loss": 0.8756, "step": 21062 }, { "epoch": 0.7634011090573013, "grad_norm": 2.1924244914046476, "learning_rate": 1.3975707175948367e-06, "loss": 0.7493, "step": 21063 }, { "epoch": 0.7634373527599579, "grad_norm": 2.50131392043432, "learning_rate": 1.397163725432718e-06, "loss": 0.8706, "step": 21064 }, { "epoch": 0.7634735964626146, "grad_norm": 2.515243665151623, "learning_rate": 1.3967567829155027e-06, "loss": 0.8983, "step": 21065 }, { "epoch": 0.7635098401652712, "grad_norm": 2.1207081588723407, "learning_rate": 1.3963498900488e-06, "loss": 0.7245, "step": 21066 }, { "epoch": 0.763546083867928, "grad_norm": 2.618728336095662, "learning_rate": 1.3959430468382174e-06, "loss": 0.9639, "step": 21067 }, { "epoch": 0.7635823275705846, "grad_norm": 2.5391066894347554, "learning_rate": 1.395536253289359e-06, "loss": 0.9256, "step": 21068 }, { "epoch": 0.7636185712732413, "grad_norm": 2.4569546498288166, "learning_rate": 1.3951295094078322e-06, "loss": 0.9811, "step": 21069 }, { "epoch": 0.7636548149758979, "grad_norm": 2.585852537431132, "learning_rate": 1.3947228151992376e-06, "loss": 0.903, "step": 21070 }, { "epoch": 0.7636910586785546, "grad_norm": 2.5373018918051375, "learning_rate": 1.3943161706691854e-06, "loss": 0.9277, "step": 21071 }, { "epoch": 0.7637273023812112, "grad_norm": 2.3185160904813618, "learning_rate": 1.3939095758232745e-06, "loss": 1.0388, "step": 21072 }, { "epoch": 0.7637635460838679, "grad_norm": 2.343971159363276, "learning_rate": 1.3935030306671099e-06, "loss": 0.7757, "step": 21073 }, { "epoch": 0.7637997897865246, "grad_norm": 2.418751417020801, "learning_rate": 1.3930965352062892e-06, "loss": 0.9418, "step": 21074 }, { "epoch": 0.7638360334891813, "grad_norm": 1.9931194901392453, "learning_rate": 1.3926900894464202e-06, "loss": 0.9298, "step": 21075 }, { "epoch": 0.7638722771918379, "grad_norm": 2.2880475756737644, "learning_rate": 1.3922836933930983e-06, "loss": 0.9236, "step": 21076 }, { "epoch": 0.7639085208944946, "grad_norm": 2.446363505175316, "learning_rate": 1.391877347051927e-06, "loss": 1.0976, "step": 21077 }, { "epoch": 0.7639447645971512, "grad_norm": 2.376196093805384, "learning_rate": 1.3914710504285023e-06, "loss": 0.9965, "step": 21078 }, { "epoch": 0.7639810082998079, "grad_norm": 2.324340639815685, "learning_rate": 1.391064803528424e-06, "loss": 0.7543, "step": 21079 }, { "epoch": 0.7640172520024646, "grad_norm": 2.400860371635029, "learning_rate": 1.390658606357292e-06, "loss": 1.0872, "step": 21080 }, { "epoch": 0.7640534957051213, "grad_norm": 2.8878159952416915, "learning_rate": 1.3902524589206996e-06, "loss": 1.0074, "step": 21081 }, { "epoch": 0.7640897394077779, "grad_norm": 2.486929935662882, "learning_rate": 1.3898463612242451e-06, "loss": 0.8224, "step": 21082 }, { "epoch": 0.7641259831104346, "grad_norm": 2.4016316422411266, "learning_rate": 1.3894403132735245e-06, "loss": 0.9076, "step": 21083 }, { "epoch": 0.7641622268130912, "grad_norm": 1.909254053497673, "learning_rate": 1.3890343150741343e-06, "loss": 0.7009, "step": 21084 }, { "epoch": 0.7641984705157479, "grad_norm": 2.5056606467034093, "learning_rate": 1.3886283666316663e-06, "loss": 0.9188, "step": 21085 }, { "epoch": 0.7642347142184045, "grad_norm": 2.458274355693653, "learning_rate": 1.3882224679517148e-06, "loss": 0.9335, "step": 21086 }, { "epoch": 0.7642709579210613, "grad_norm": 2.414796246326793, "learning_rate": 1.387816619039875e-06, "loss": 0.794, "step": 21087 }, { "epoch": 0.7643072016237179, "grad_norm": 2.3022087437496053, "learning_rate": 1.387410819901736e-06, "loss": 0.8941, "step": 21088 }, { "epoch": 0.7643434453263745, "grad_norm": 2.518103738438347, "learning_rate": 1.3870050705428911e-06, "loss": 0.8739, "step": 21089 }, { "epoch": 0.7643796890290312, "grad_norm": 2.317708534898981, "learning_rate": 1.3865993709689318e-06, "loss": 0.9707, "step": 21090 }, { "epoch": 0.7644159327316878, "grad_norm": 2.506067147681742, "learning_rate": 1.3861937211854487e-06, "loss": 0.8176, "step": 21091 }, { "epoch": 0.7644521764343445, "grad_norm": 2.042860547833446, "learning_rate": 1.3857881211980295e-06, "loss": 0.9217, "step": 21092 }, { "epoch": 0.7644884201370012, "grad_norm": 1.8838579050794093, "learning_rate": 1.3853825710122637e-06, "loss": 0.6684, "step": 21093 }, { "epoch": 0.7645246638396579, "grad_norm": 2.441419137314851, "learning_rate": 1.3849770706337406e-06, "loss": 0.9336, "step": 21094 }, { "epoch": 0.7645609075423145, "grad_norm": 2.3140350480199117, "learning_rate": 1.384571620068048e-06, "loss": 0.947, "step": 21095 }, { "epoch": 0.7645971512449712, "grad_norm": 2.569015569263086, "learning_rate": 1.3841662193207711e-06, "loss": 0.8352, "step": 21096 }, { "epoch": 0.7646333949476278, "grad_norm": 2.1467160978950464, "learning_rate": 1.3837608683974968e-06, "loss": 1.0372, "step": 21097 }, { "epoch": 0.7646696386502845, "grad_norm": 2.7216798139141307, "learning_rate": 1.383355567303812e-06, "loss": 0.942, "step": 21098 }, { "epoch": 0.7647058823529411, "grad_norm": 2.2298056426859496, "learning_rate": 1.3829503160452984e-06, "loss": 0.9215, "step": 21099 }, { "epoch": 0.7647421260555979, "grad_norm": 2.3963230881905218, "learning_rate": 1.3825451146275443e-06, "loss": 0.8267, "step": 21100 }, { "epoch": 0.7647783697582545, "grad_norm": 2.3627896201324035, "learning_rate": 1.3821399630561266e-06, "loss": 0.9156, "step": 21101 }, { "epoch": 0.7648146134609112, "grad_norm": 2.306961681990158, "learning_rate": 1.381734861336636e-06, "loss": 0.7088, "step": 21102 }, { "epoch": 0.7648508571635678, "grad_norm": 2.170551479546024, "learning_rate": 1.3813298094746491e-06, "loss": 0.854, "step": 21103 }, { "epoch": 0.7648871008662245, "grad_norm": 2.536559316457615, "learning_rate": 1.3809248074757503e-06, "loss": 1.028, "step": 21104 }, { "epoch": 0.7649233445688811, "grad_norm": 2.3041642929462265, "learning_rate": 1.380519855345518e-06, "loss": 0.9588, "step": 21105 }, { "epoch": 0.7649595882715379, "grad_norm": 2.4306292942793055, "learning_rate": 1.3801149530895325e-06, "loss": 0.964, "step": 21106 }, { "epoch": 0.7649958319741945, "grad_norm": 2.391372963705103, "learning_rate": 1.379710100713375e-06, "loss": 0.9262, "step": 21107 }, { "epoch": 0.7650320756768512, "grad_norm": 3.1841306205165805, "learning_rate": 1.3793052982226219e-06, "loss": 0.8989, "step": 21108 }, { "epoch": 0.7650683193795078, "grad_norm": 2.5267037685779883, "learning_rate": 1.3789005456228515e-06, "loss": 0.8085, "step": 21109 }, { "epoch": 0.7651045630821645, "grad_norm": 2.318319931880126, "learning_rate": 1.378495842919642e-06, "loss": 0.9507, "step": 21110 }, { "epoch": 0.7651408067848211, "grad_norm": 2.1818959751906823, "learning_rate": 1.3780911901185713e-06, "loss": 0.9876, "step": 21111 }, { "epoch": 0.7651770504874778, "grad_norm": 2.5852151058231607, "learning_rate": 1.3776865872252116e-06, "loss": 0.9645, "step": 21112 }, { "epoch": 0.7652132941901345, "grad_norm": 2.388532358280988, "learning_rate": 1.3772820342451398e-06, "loss": 0.9111, "step": 21113 }, { "epoch": 0.7652495378927912, "grad_norm": 2.6355518636036073, "learning_rate": 1.3768775311839311e-06, "loss": 0.9348, "step": 21114 }, { "epoch": 0.7652857815954478, "grad_norm": 2.412827524776173, "learning_rate": 1.3764730780471603e-06, "loss": 0.9604, "step": 21115 }, { "epoch": 0.7653220252981044, "grad_norm": 2.269554568145192, "learning_rate": 1.3760686748403972e-06, "loss": 0.9141, "step": 21116 }, { "epoch": 0.7653582690007611, "grad_norm": 2.3607509272703715, "learning_rate": 1.3756643215692166e-06, "loss": 1.0017, "step": 21117 }, { "epoch": 0.7653945127034177, "grad_norm": 2.0595679124369437, "learning_rate": 1.3752600182391907e-06, "loss": 0.8399, "step": 21118 }, { "epoch": 0.7654307564060745, "grad_norm": 2.5619498101193896, "learning_rate": 1.3748557648558885e-06, "loss": 0.9903, "step": 21119 }, { "epoch": 0.7654670001087311, "grad_norm": 2.1980910356854095, "learning_rate": 1.3744515614248826e-06, "loss": 0.8768, "step": 21120 }, { "epoch": 0.7655032438113878, "grad_norm": 2.5584287263352907, "learning_rate": 1.3740474079517385e-06, "loss": 0.829, "step": 21121 }, { "epoch": 0.7655394875140444, "grad_norm": 2.3837490152608836, "learning_rate": 1.3736433044420316e-06, "loss": 0.824, "step": 21122 }, { "epoch": 0.7655757312167011, "grad_norm": 2.2294632587846808, "learning_rate": 1.3732392509013253e-06, "loss": 0.9135, "step": 21123 }, { "epoch": 0.7656119749193577, "grad_norm": 2.48612402004554, "learning_rate": 1.3728352473351896e-06, "loss": 0.8935, "step": 21124 }, { "epoch": 0.7656482186220144, "grad_norm": 2.661015003690036, "learning_rate": 1.3724312937491895e-06, "loss": 0.8377, "step": 21125 }, { "epoch": 0.7656844623246711, "grad_norm": 2.349382586973581, "learning_rate": 1.3720273901488923e-06, "loss": 0.8807, "step": 21126 }, { "epoch": 0.7657207060273278, "grad_norm": 2.3855886120631813, "learning_rate": 1.371623536539865e-06, "loss": 0.7408, "step": 21127 }, { "epoch": 0.7657569497299844, "grad_norm": 2.1975087547908205, "learning_rate": 1.37121973292767e-06, "loss": 0.7911, "step": 21128 }, { "epoch": 0.7657931934326411, "grad_norm": 2.1887436612977815, "learning_rate": 1.3708159793178717e-06, "loss": 0.6689, "step": 21129 }, { "epoch": 0.7658294371352977, "grad_norm": 2.4782646422312267, "learning_rate": 1.3704122757160342e-06, "loss": 0.7206, "step": 21130 }, { "epoch": 0.7658656808379544, "grad_norm": 2.391740056506046, "learning_rate": 1.3700086221277226e-06, "loss": 0.8565, "step": 21131 }, { "epoch": 0.7659019245406111, "grad_norm": 2.430498353062309, "learning_rate": 1.3696050185584952e-06, "loss": 0.8244, "step": 21132 }, { "epoch": 0.7659381682432678, "grad_norm": 2.1079790693518183, "learning_rate": 1.3692014650139151e-06, "loss": 0.8571, "step": 21133 }, { "epoch": 0.7659744119459244, "grad_norm": 2.3936759337427476, "learning_rate": 1.3687979614995428e-06, "loss": 0.8642, "step": 21134 }, { "epoch": 0.7660106556485811, "grad_norm": 2.512480799549367, "learning_rate": 1.3683945080209398e-06, "loss": 0.8684, "step": 21135 }, { "epoch": 0.7660468993512377, "grad_norm": 2.2406368910495136, "learning_rate": 1.367991104583663e-06, "loss": 0.9856, "step": 21136 }, { "epoch": 0.7660831430538944, "grad_norm": 2.249616512683663, "learning_rate": 1.3675877511932724e-06, "loss": 0.906, "step": 21137 }, { "epoch": 0.766119386756551, "grad_norm": 2.3688739908728245, "learning_rate": 1.3671844478553265e-06, "loss": 0.9225, "step": 21138 }, { "epoch": 0.7661556304592078, "grad_norm": 2.4533013269044854, "learning_rate": 1.3667811945753812e-06, "loss": 0.7026, "step": 21139 }, { "epoch": 0.7661918741618644, "grad_norm": 2.2844169792673417, "learning_rate": 1.3663779913589946e-06, "loss": 0.9471, "step": 21140 }, { "epoch": 0.766228117864521, "grad_norm": 2.3431210078470444, "learning_rate": 1.3659748382117188e-06, "loss": 0.7808, "step": 21141 }, { "epoch": 0.7662643615671777, "grad_norm": 2.546004890619539, "learning_rate": 1.3655717351391152e-06, "loss": 1.0151, "step": 21142 }, { "epoch": 0.7663006052698343, "grad_norm": 2.2619945409514295, "learning_rate": 1.3651686821467336e-06, "loss": 0.9127, "step": 21143 }, { "epoch": 0.766336848972491, "grad_norm": 2.4331578100651607, "learning_rate": 1.3647656792401308e-06, "loss": 0.7633, "step": 21144 }, { "epoch": 0.7663730926751476, "grad_norm": 2.4208346464355213, "learning_rate": 1.3643627264248566e-06, "loss": 0.8149, "step": 21145 }, { "epoch": 0.7664093363778044, "grad_norm": 2.4900197272263638, "learning_rate": 1.3639598237064655e-06, "loss": 0.8309, "step": 21146 }, { "epoch": 0.766445580080461, "grad_norm": 2.205933278896708, "learning_rate": 1.3635569710905106e-06, "loss": 0.6651, "step": 21147 }, { "epoch": 0.7664818237831177, "grad_norm": 2.667467049876766, "learning_rate": 1.3631541685825394e-06, "loss": 0.7974, "step": 21148 }, { "epoch": 0.7665180674857743, "grad_norm": 2.410279713874863, "learning_rate": 1.362751416188105e-06, "loss": 0.8854, "step": 21149 }, { "epoch": 0.766554311188431, "grad_norm": 2.4054481800674137, "learning_rate": 1.3623487139127562e-06, "loss": 1.0279, "step": 21150 }, { "epoch": 0.7665905548910876, "grad_norm": 2.3912953520788673, "learning_rate": 1.3619460617620433e-06, "loss": 0.943, "step": 21151 }, { "epoch": 0.7666267985937444, "grad_norm": 2.276707552033342, "learning_rate": 1.3615434597415117e-06, "loss": 1.0226, "step": 21152 }, { "epoch": 0.766663042296401, "grad_norm": 2.1149550220415643, "learning_rate": 1.3611409078567112e-06, "loss": 0.7275, "step": 21153 }, { "epoch": 0.7666992859990577, "grad_norm": 2.327672794906603, "learning_rate": 1.36073840611319e-06, "loss": 0.9306, "step": 21154 }, { "epoch": 0.7667355297017143, "grad_norm": 2.450688317712508, "learning_rate": 1.3603359545164907e-06, "loss": 0.905, "step": 21155 }, { "epoch": 0.766771773404371, "grad_norm": 3.1056470214703964, "learning_rate": 1.3599335530721625e-06, "loss": 0.9071, "step": 21156 }, { "epoch": 0.7668080171070276, "grad_norm": 2.6272871503268544, "learning_rate": 1.3595312017857453e-06, "loss": 0.9769, "step": 21157 }, { "epoch": 0.7668442608096843, "grad_norm": 2.203539990373865, "learning_rate": 1.3591289006627894e-06, "loss": 0.8276, "step": 21158 }, { "epoch": 0.766880504512341, "grad_norm": 2.147415564762208, "learning_rate": 1.3587266497088336e-06, "loss": 0.7826, "step": 21159 }, { "epoch": 0.7669167482149977, "grad_norm": 2.2745663185638176, "learning_rate": 1.3583244489294244e-06, "loss": 0.9103, "step": 21160 }, { "epoch": 0.7669529919176543, "grad_norm": 2.565466068867202, "learning_rate": 1.3579222983300988e-06, "loss": 0.8508, "step": 21161 }, { "epoch": 0.766989235620311, "grad_norm": 2.3378793916601768, "learning_rate": 1.3575201979164043e-06, "loss": 0.9611, "step": 21162 }, { "epoch": 0.7670254793229676, "grad_norm": 2.012676794201904, "learning_rate": 1.3571181476938767e-06, "loss": 0.8385, "step": 21163 }, { "epoch": 0.7670617230256243, "grad_norm": 2.366800728016738, "learning_rate": 1.3567161476680596e-06, "loss": 0.9179, "step": 21164 }, { "epoch": 0.767097966728281, "grad_norm": 2.289263678423033, "learning_rate": 1.3563141978444893e-06, "loss": 0.9616, "step": 21165 }, { "epoch": 0.7671342104309377, "grad_norm": 2.6170542239626426, "learning_rate": 1.3559122982287054e-06, "loss": 0.9712, "step": 21166 }, { "epoch": 0.7671704541335943, "grad_norm": 2.674277511990595, "learning_rate": 1.3555104488262478e-06, "loss": 1.0627, "step": 21167 }, { "epoch": 0.767206697836251, "grad_norm": 2.1401420299653635, "learning_rate": 1.355108649642649e-06, "loss": 0.6814, "step": 21168 }, { "epoch": 0.7672429415389076, "grad_norm": 2.0589104451020246, "learning_rate": 1.3547069006834518e-06, "loss": 0.8081, "step": 21169 }, { "epoch": 0.7672791852415642, "grad_norm": 2.13820190437036, "learning_rate": 1.3543052019541875e-06, "loss": 0.7152, "step": 21170 }, { "epoch": 0.7673154289442209, "grad_norm": 2.525361230715803, "learning_rate": 1.353903553460394e-06, "loss": 0.8866, "step": 21171 }, { "epoch": 0.7673516726468776, "grad_norm": 2.604027907263564, "learning_rate": 1.353501955207603e-06, "loss": 0.9544, "step": 21172 }, { "epoch": 0.7673879163495343, "grad_norm": 2.363072320571552, "learning_rate": 1.3531004072013498e-06, "loss": 0.8968, "step": 21173 }, { "epoch": 0.7674241600521909, "grad_norm": 2.3127136110885123, "learning_rate": 1.3526989094471688e-06, "loss": 0.8427, "step": 21174 }, { "epoch": 0.7674604037548476, "grad_norm": 2.0673908247432213, "learning_rate": 1.3522974619505897e-06, "loss": 0.6592, "step": 21175 }, { "epoch": 0.7674966474575042, "grad_norm": 2.53496072982058, "learning_rate": 1.3518960647171454e-06, "loss": 0.865, "step": 21176 }, { "epoch": 0.7675328911601609, "grad_norm": 2.465722214499164, "learning_rate": 1.3514947177523667e-06, "loss": 0.9941, "step": 21177 }, { "epoch": 0.7675691348628176, "grad_norm": 2.1493911337867355, "learning_rate": 1.351093421061786e-06, "loss": 0.854, "step": 21178 }, { "epoch": 0.7676053785654743, "grad_norm": 2.3386621438920954, "learning_rate": 1.35069217465093e-06, "loss": 0.8547, "step": 21179 }, { "epoch": 0.7676416222681309, "grad_norm": 2.5962029805016282, "learning_rate": 1.3502909785253282e-06, "loss": 0.8841, "step": 21180 }, { "epoch": 0.7676778659707876, "grad_norm": 2.7566837077695188, "learning_rate": 1.3498898326905097e-06, "loss": 0.9138, "step": 21181 }, { "epoch": 0.7677141096734442, "grad_norm": 2.799710793443786, "learning_rate": 1.3494887371520033e-06, "loss": 0.859, "step": 21182 }, { "epoch": 0.7677503533761009, "grad_norm": 2.6325371050931414, "learning_rate": 1.3490876919153329e-06, "loss": 1.1524, "step": 21183 }, { "epoch": 0.7677865970787575, "grad_norm": 2.3227428592178136, "learning_rate": 1.3486866969860263e-06, "loss": 0.8454, "step": 21184 }, { "epoch": 0.7678228407814143, "grad_norm": 2.3939796446370494, "learning_rate": 1.3482857523696097e-06, "loss": 0.8733, "step": 21185 }, { "epoch": 0.7678590844840709, "grad_norm": 2.4082696403987294, "learning_rate": 1.3478848580716058e-06, "loss": 0.8985, "step": 21186 }, { "epoch": 0.7678953281867276, "grad_norm": 1.9460546568589085, "learning_rate": 1.3474840140975414e-06, "loss": 0.7917, "step": 21187 }, { "epoch": 0.7679315718893842, "grad_norm": 2.3545884664552026, "learning_rate": 1.3470832204529349e-06, "loss": 0.7919, "step": 21188 }, { "epoch": 0.7679678155920409, "grad_norm": 2.6077720675291904, "learning_rate": 1.3466824771433163e-06, "loss": 0.8222, "step": 21189 }, { "epoch": 0.7680040592946975, "grad_norm": 2.22183769089814, "learning_rate": 1.3462817841742016e-06, "loss": 1.0541, "step": 21190 }, { "epoch": 0.7680403029973543, "grad_norm": 2.471409222945225, "learning_rate": 1.3458811415511152e-06, "loss": 0.9277, "step": 21191 }, { "epoch": 0.7680765467000109, "grad_norm": 2.0239872329487, "learning_rate": 1.3454805492795763e-06, "loss": 0.8988, "step": 21192 }, { "epoch": 0.7681127904026676, "grad_norm": 2.1162575920763143, "learning_rate": 1.3450800073651043e-06, "loss": 0.8748, "step": 21193 }, { "epoch": 0.7681490341053242, "grad_norm": 2.4420455304604367, "learning_rate": 1.3446795158132214e-06, "loss": 0.9252, "step": 21194 }, { "epoch": 0.7681852778079808, "grad_norm": 2.6829781393273326, "learning_rate": 1.3442790746294426e-06, "loss": 0.9368, "step": 21195 }, { "epoch": 0.7682215215106375, "grad_norm": 2.2679118744409013, "learning_rate": 1.3438786838192874e-06, "loss": 0.816, "step": 21196 }, { "epoch": 0.7682577652132941, "grad_norm": 2.3241387731099987, "learning_rate": 1.3434783433882725e-06, "loss": 0.7523, "step": 21197 }, { "epoch": 0.7682940089159509, "grad_norm": 2.5645559060132257, "learning_rate": 1.3430780533419163e-06, "loss": 0.7691, "step": 21198 }, { "epoch": 0.7683302526186075, "grad_norm": 2.4548865770237107, "learning_rate": 1.342677813685731e-06, "loss": 0.9248, "step": 21199 }, { "epoch": 0.7683664963212642, "grad_norm": 2.3558310528272375, "learning_rate": 1.3422776244252338e-06, "loss": 1.0848, "step": 21200 }, { "epoch": 0.7684027400239208, "grad_norm": 2.4386850197663836, "learning_rate": 1.341877485565939e-06, "loss": 1.087, "step": 21201 }, { "epoch": 0.7684389837265775, "grad_norm": 2.6160584035386503, "learning_rate": 1.3414773971133616e-06, "loss": 0.9228, "step": 21202 }, { "epoch": 0.7684752274292341, "grad_norm": 2.107665544906239, "learning_rate": 1.341077359073011e-06, "loss": 1.027, "step": 21203 }, { "epoch": 0.7685114711318909, "grad_norm": 2.385732053685595, "learning_rate": 1.3406773714504018e-06, "loss": 0.8807, "step": 21204 }, { "epoch": 0.7685477148345475, "grad_norm": 2.176218854317253, "learning_rate": 1.3402774342510472e-06, "loss": 0.8142, "step": 21205 }, { "epoch": 0.7685839585372042, "grad_norm": 2.4447688356638104, "learning_rate": 1.3398775474804543e-06, "loss": 0.9067, "step": 21206 }, { "epoch": 0.7686202022398608, "grad_norm": 2.4119381247546756, "learning_rate": 1.339477711144137e-06, "loss": 1.0086, "step": 21207 }, { "epoch": 0.7686564459425175, "grad_norm": 2.413031076671827, "learning_rate": 1.3390779252476e-06, "loss": 0.7575, "step": 21208 }, { "epoch": 0.7686926896451741, "grad_norm": 2.167365901748198, "learning_rate": 1.3386781897963579e-06, "loss": 0.872, "step": 21209 }, { "epoch": 0.7687289333478308, "grad_norm": 2.271015153741356, "learning_rate": 1.338278504795915e-06, "loss": 0.7999, "step": 21210 }, { "epoch": 0.7687651770504875, "grad_norm": 2.67309395290292, "learning_rate": 1.337878870251781e-06, "loss": 0.9694, "step": 21211 }, { "epoch": 0.7688014207531442, "grad_norm": 2.823932893760299, "learning_rate": 1.3374792861694597e-06, "loss": 0.8498, "step": 21212 }, { "epoch": 0.7688376644558008, "grad_norm": 2.5867336829082115, "learning_rate": 1.3370797525544594e-06, "loss": 1.0257, "step": 21213 }, { "epoch": 0.7688739081584575, "grad_norm": 2.825087463750539, "learning_rate": 1.3366802694122854e-06, "loss": 0.916, "step": 21214 }, { "epoch": 0.7689101518611141, "grad_norm": 2.404517908573439, "learning_rate": 1.3362808367484408e-06, "loss": 0.7801, "step": 21215 }, { "epoch": 0.7689463955637708, "grad_norm": 2.5403813400723037, "learning_rate": 1.3358814545684307e-06, "loss": 0.8681, "step": 21216 }, { "epoch": 0.7689826392664274, "grad_norm": 2.363606005560319, "learning_rate": 1.3354821228777581e-06, "loss": 1.0374, "step": 21217 }, { "epoch": 0.7690188829690842, "grad_norm": 2.2024392143511817, "learning_rate": 1.3350828416819272e-06, "loss": 0.953, "step": 21218 }, { "epoch": 0.7690551266717408, "grad_norm": 2.0465687590605306, "learning_rate": 1.3346836109864363e-06, "loss": 0.684, "step": 21219 }, { "epoch": 0.7690913703743975, "grad_norm": 2.208107520983892, "learning_rate": 1.334284430796789e-06, "loss": 0.8008, "step": 21220 }, { "epoch": 0.7691276140770541, "grad_norm": 2.242721686037377, "learning_rate": 1.3338853011184849e-06, "loss": 0.8658, "step": 21221 }, { "epoch": 0.7691638577797107, "grad_norm": 2.089125362807092, "learning_rate": 1.333486221957026e-06, "loss": 0.8245, "step": 21222 }, { "epoch": 0.7692001014823674, "grad_norm": 2.433925787935089, "learning_rate": 1.333087193317908e-06, "loss": 1.0481, "step": 21223 }, { "epoch": 0.7692363451850242, "grad_norm": 2.1717029262412644, "learning_rate": 1.3326882152066306e-06, "loss": 0.828, "step": 21224 }, { "epoch": 0.7692725888876808, "grad_norm": 2.1205636246008743, "learning_rate": 1.3322892876286936e-06, "loss": 0.8158, "step": 21225 }, { "epoch": 0.7693088325903374, "grad_norm": 2.3276924450215746, "learning_rate": 1.3318904105895903e-06, "loss": 0.9148, "step": 21226 }, { "epoch": 0.7693450762929941, "grad_norm": 2.4019139757910266, "learning_rate": 1.33149158409482e-06, "loss": 1.0743, "step": 21227 }, { "epoch": 0.7693813199956507, "grad_norm": 2.628316681188065, "learning_rate": 1.3310928081498742e-06, "loss": 0.8643, "step": 21228 }, { "epoch": 0.7694175636983074, "grad_norm": 2.1286121391251713, "learning_rate": 1.3306940827602533e-06, "loss": 0.7419, "step": 21229 }, { "epoch": 0.769453807400964, "grad_norm": 2.2259047419834816, "learning_rate": 1.3302954079314478e-06, "loss": 0.9695, "step": 21230 }, { "epoch": 0.7694900511036208, "grad_norm": 2.5394843986014437, "learning_rate": 1.3298967836689536e-06, "loss": 0.9239, "step": 21231 }, { "epoch": 0.7695262948062774, "grad_norm": 2.205008288056621, "learning_rate": 1.3294982099782605e-06, "loss": 0.9098, "step": 21232 }, { "epoch": 0.7695625385089341, "grad_norm": 2.415910999049114, "learning_rate": 1.3290996868648626e-06, "loss": 0.8, "step": 21233 }, { "epoch": 0.7695987822115907, "grad_norm": 2.193353392903579, "learning_rate": 1.328701214334252e-06, "loss": 0.7815, "step": 21234 }, { "epoch": 0.7696350259142474, "grad_norm": 2.112932555915087, "learning_rate": 1.3283027923919167e-06, "loss": 0.8433, "step": 21235 }, { "epoch": 0.769671269616904, "grad_norm": 2.21272956920401, "learning_rate": 1.3279044210433485e-06, "loss": 0.9296, "step": 21236 }, { "epoch": 0.7697075133195608, "grad_norm": 2.385793990188078, "learning_rate": 1.327506100294037e-06, "loss": 1.025, "step": 21237 }, { "epoch": 0.7697437570222174, "grad_norm": 2.4128445348408705, "learning_rate": 1.327107830149471e-06, "loss": 0.9917, "step": 21238 }, { "epoch": 0.7697800007248741, "grad_norm": 2.727539423762697, "learning_rate": 1.3267096106151372e-06, "loss": 0.9646, "step": 21239 }, { "epoch": 0.7698162444275307, "grad_norm": 2.2520289540207825, "learning_rate": 1.3263114416965228e-06, "loss": 0.7111, "step": 21240 }, { "epoch": 0.7698524881301874, "grad_norm": 2.255302200459064, "learning_rate": 1.3259133233991173e-06, "loss": 0.9592, "step": 21241 }, { "epoch": 0.769888731832844, "grad_norm": 2.7143811319368085, "learning_rate": 1.3255152557284018e-06, "loss": 0.8823, "step": 21242 }, { "epoch": 0.7699249755355007, "grad_norm": 2.3425752000331768, "learning_rate": 1.3251172386898659e-06, "loss": 0.8462, "step": 21243 }, { "epoch": 0.7699612192381574, "grad_norm": 2.5646964806282213, "learning_rate": 1.3247192722889884e-06, "loss": 0.9125, "step": 21244 }, { "epoch": 0.7699974629408141, "grad_norm": 2.444729570307442, "learning_rate": 1.32432135653126e-06, "loss": 0.7052, "step": 21245 }, { "epoch": 0.7700337066434707, "grad_norm": 2.147199596809453, "learning_rate": 1.3239234914221588e-06, "loss": 0.8708, "step": 21246 }, { "epoch": 0.7700699503461274, "grad_norm": 2.3522228854461997, "learning_rate": 1.3235256769671683e-06, "loss": 0.9284, "step": 21247 }, { "epoch": 0.770106194048784, "grad_norm": 1.9356040521772009, "learning_rate": 1.323127913171771e-06, "loss": 0.6924, "step": 21248 }, { "epoch": 0.7701424377514406, "grad_norm": 2.3685882601426527, "learning_rate": 1.3227302000414488e-06, "loss": 0.7906, "step": 21249 }, { "epoch": 0.7701786814540974, "grad_norm": 2.3799504188428613, "learning_rate": 1.3223325375816787e-06, "loss": 0.7477, "step": 21250 }, { "epoch": 0.770214925156754, "grad_norm": 2.0677760214578957, "learning_rate": 1.3219349257979418e-06, "loss": 0.8588, "step": 21251 }, { "epoch": 0.7702511688594107, "grad_norm": 2.220298298083516, "learning_rate": 1.3215373646957196e-06, "loss": 0.9182, "step": 21252 }, { "epoch": 0.7702874125620673, "grad_norm": 2.2997000604844637, "learning_rate": 1.3211398542804853e-06, "loss": 0.8644, "step": 21253 }, { "epoch": 0.770323656264724, "grad_norm": 2.6539541967118745, "learning_rate": 1.3207423945577214e-06, "loss": 0.8858, "step": 21254 }, { "epoch": 0.7703598999673806, "grad_norm": 2.5451143685759448, "learning_rate": 1.3203449855328986e-06, "loss": 0.9229, "step": 21255 }, { "epoch": 0.7703961436700373, "grad_norm": 2.394120772055878, "learning_rate": 1.3199476272114998e-06, "loss": 0.9441, "step": 21256 }, { "epoch": 0.770432387372694, "grad_norm": 2.3976637805340792, "learning_rate": 1.319550319598995e-06, "loss": 0.8797, "step": 21257 }, { "epoch": 0.7704686310753507, "grad_norm": 2.3522910010442253, "learning_rate": 1.319153062700863e-06, "loss": 0.837, "step": 21258 }, { "epoch": 0.7705048747780073, "grad_norm": 2.1007739300884065, "learning_rate": 1.318755856522574e-06, "loss": 0.6596, "step": 21259 }, { "epoch": 0.770541118480664, "grad_norm": 2.3312119663821917, "learning_rate": 1.318358701069603e-06, "loss": 0.6929, "step": 21260 }, { "epoch": 0.7705773621833206, "grad_norm": 2.4311356486734828, "learning_rate": 1.317961596347424e-06, "loss": 1.0282, "step": 21261 }, { "epoch": 0.7706136058859773, "grad_norm": 2.5478017141014107, "learning_rate": 1.3175645423615058e-06, "loss": 0.9167, "step": 21262 }, { "epoch": 0.770649849588634, "grad_norm": 2.5320518777561603, "learning_rate": 1.3171675391173216e-06, "loss": 0.8467, "step": 21263 }, { "epoch": 0.7706860932912907, "grad_norm": 2.251168995019316, "learning_rate": 1.3167705866203405e-06, "loss": 0.9493, "step": 21264 }, { "epoch": 0.7707223369939473, "grad_norm": 2.6842867100406833, "learning_rate": 1.3163736848760356e-06, "loss": 0.8182, "step": 21265 }, { "epoch": 0.770758580696604, "grad_norm": 2.369820868451482, "learning_rate": 1.3159768338898715e-06, "loss": 0.9831, "step": 21266 }, { "epoch": 0.7707948243992606, "grad_norm": 2.0338510792335773, "learning_rate": 1.3155800336673186e-06, "loss": 0.7508, "step": 21267 }, { "epoch": 0.7708310681019173, "grad_norm": 2.3005487812293732, "learning_rate": 1.3151832842138445e-06, "loss": 0.9721, "step": 21268 }, { "epoch": 0.7708673118045739, "grad_norm": 2.482927613879291, "learning_rate": 1.3147865855349185e-06, "loss": 1.1226, "step": 21269 }, { "epoch": 0.7709035555072307, "grad_norm": 2.427838319119995, "learning_rate": 1.3143899376360026e-06, "loss": 0.8755, "step": 21270 }, { "epoch": 0.7709397992098873, "grad_norm": 2.6096355145895873, "learning_rate": 1.313993340522564e-06, "loss": 0.8983, "step": 21271 }, { "epoch": 0.770976042912544, "grad_norm": 2.53065123958378, "learning_rate": 1.3135967942000698e-06, "loss": 0.8866, "step": 21272 }, { "epoch": 0.7710122866152006, "grad_norm": 2.478218272889101, "learning_rate": 1.3132002986739805e-06, "loss": 0.9745, "step": 21273 }, { "epoch": 0.7710485303178573, "grad_norm": 2.3679244160368516, "learning_rate": 1.3128038539497635e-06, "loss": 0.883, "step": 21274 }, { "epoch": 0.7710847740205139, "grad_norm": 2.2793438616981727, "learning_rate": 1.3124074600328758e-06, "loss": 0.8836, "step": 21275 }, { "epoch": 0.7711210177231707, "grad_norm": 2.411861441683177, "learning_rate": 1.3120111169287864e-06, "loss": 0.9051, "step": 21276 }, { "epoch": 0.7711572614258273, "grad_norm": 2.387742364866397, "learning_rate": 1.3116148246429523e-06, "loss": 0.7769, "step": 21277 }, { "epoch": 0.771193505128484, "grad_norm": 2.4897065519988733, "learning_rate": 1.311218583180836e-06, "loss": 0.7816, "step": 21278 }, { "epoch": 0.7712297488311406, "grad_norm": 2.4696173880955197, "learning_rate": 1.3108223925478958e-06, "loss": 0.9208, "step": 21279 }, { "epoch": 0.7712659925337972, "grad_norm": 2.5470427939224973, "learning_rate": 1.3104262527495915e-06, "loss": 0.795, "step": 21280 }, { "epoch": 0.7713022362364539, "grad_norm": 2.32658188693878, "learning_rate": 1.3100301637913837e-06, "loss": 0.9015, "step": 21281 }, { "epoch": 0.7713384799391105, "grad_norm": 2.602080926374385, "learning_rate": 1.309634125678727e-06, "loss": 0.6825, "step": 21282 }, { "epoch": 0.7713747236417673, "grad_norm": 2.2774021385132914, "learning_rate": 1.3092381384170805e-06, "loss": 0.7809, "step": 21283 }, { "epoch": 0.7714109673444239, "grad_norm": 2.31902043548654, "learning_rate": 1.3088422020119002e-06, "loss": 0.7354, "step": 21284 }, { "epoch": 0.7714472110470806, "grad_norm": 2.1405465664341143, "learning_rate": 1.3084463164686433e-06, "loss": 0.8466, "step": 21285 }, { "epoch": 0.7714834547497372, "grad_norm": 2.7083329358062134, "learning_rate": 1.3080504817927626e-06, "loss": 0.9477, "step": 21286 }, { "epoch": 0.7715196984523939, "grad_norm": 2.4060879754807725, "learning_rate": 1.3076546979897132e-06, "loss": 0.8545, "step": 21287 }, { "epoch": 0.7715559421550505, "grad_norm": 2.3908759208982455, "learning_rate": 1.3072589650649487e-06, "loss": 0.771, "step": 21288 }, { "epoch": 0.7715921858577072, "grad_norm": 2.2928513104709145, "learning_rate": 1.3068632830239243e-06, "loss": 0.7627, "step": 21289 }, { "epoch": 0.7716284295603639, "grad_norm": 2.3284360251014173, "learning_rate": 1.3064676518720882e-06, "loss": 0.9737, "step": 21290 }, { "epoch": 0.7716646732630206, "grad_norm": 2.498962996601028, "learning_rate": 1.3060720716148946e-06, "loss": 0.8752, "step": 21291 }, { "epoch": 0.7717009169656772, "grad_norm": 2.877945888682123, "learning_rate": 1.305676542257795e-06, "loss": 0.9384, "step": 21292 }, { "epoch": 0.7717371606683339, "grad_norm": 2.325762281524164, "learning_rate": 1.3052810638062373e-06, "loss": 0.9658, "step": 21293 }, { "epoch": 0.7717734043709905, "grad_norm": 2.4100556130638573, "learning_rate": 1.3048856362656731e-06, "loss": 0.7507, "step": 21294 }, { "epoch": 0.7718096480736472, "grad_norm": 2.6684857777330335, "learning_rate": 1.3044902596415477e-06, "loss": 0.9392, "step": 21295 }, { "epoch": 0.7718458917763039, "grad_norm": 2.0819357588297316, "learning_rate": 1.3040949339393139e-06, "loss": 0.7341, "step": 21296 }, { "epoch": 0.7718821354789606, "grad_norm": 2.442461776737471, "learning_rate": 1.3036996591644157e-06, "loss": 0.8568, "step": 21297 }, { "epoch": 0.7719183791816172, "grad_norm": 2.200556510881997, "learning_rate": 1.3033044353223017e-06, "loss": 1.0432, "step": 21298 }, { "epoch": 0.7719546228842739, "grad_norm": 2.429030990990786, "learning_rate": 1.3029092624184158e-06, "loss": 0.9166, "step": 21299 }, { "epoch": 0.7719908665869305, "grad_norm": 2.3322176408752786, "learning_rate": 1.3025141404582042e-06, "loss": 0.778, "step": 21300 }, { "epoch": 0.7720271102895871, "grad_norm": 2.466583592207307, "learning_rate": 1.3021190694471131e-06, "loss": 1.0358, "step": 21301 }, { "epoch": 0.7720633539922438, "grad_norm": 2.3654074148581543, "learning_rate": 1.3017240493905837e-06, "loss": 0.897, "step": 21302 }, { "epoch": 0.7720995976949006, "grad_norm": 1.9899796668464251, "learning_rate": 1.3013290802940597e-06, "loss": 0.826, "step": 21303 }, { "epoch": 0.7721358413975572, "grad_norm": 2.36608633695481, "learning_rate": 1.3009341621629845e-06, "loss": 1.0335, "step": 21304 }, { "epoch": 0.7721720851002138, "grad_norm": 2.2745655489124292, "learning_rate": 1.3005392950028007e-06, "loss": 0.8191, "step": 21305 }, { "epoch": 0.7722083288028705, "grad_norm": 2.2993893289454737, "learning_rate": 1.3001444788189466e-06, "loss": 0.6382, "step": 21306 }, { "epoch": 0.7722445725055271, "grad_norm": 2.246064909289165, "learning_rate": 1.299749713616864e-06, "loss": 0.9143, "step": 21307 }, { "epoch": 0.7722808162081838, "grad_norm": 2.5036206645519017, "learning_rate": 1.2993549994019922e-06, "loss": 0.8401, "step": 21308 }, { "epoch": 0.7723170599108405, "grad_norm": 2.0205873329229713, "learning_rate": 1.2989603361797727e-06, "loss": 0.7504, "step": 21309 }, { "epoch": 0.7723533036134972, "grad_norm": 2.4123259551856195, "learning_rate": 1.2985657239556393e-06, "loss": 0.9069, "step": 21310 }, { "epoch": 0.7723895473161538, "grad_norm": 2.724425118781066, "learning_rate": 1.2981711627350314e-06, "loss": 0.8391, "step": 21311 }, { "epoch": 0.7724257910188105, "grad_norm": 2.1927449680941726, "learning_rate": 1.2977766525233881e-06, "loss": 0.9364, "step": 21312 }, { "epoch": 0.7724620347214671, "grad_norm": 2.232131451852219, "learning_rate": 1.2973821933261422e-06, "loss": 0.7482, "step": 21313 }, { "epoch": 0.7724982784241238, "grad_norm": 2.484260288575976, "learning_rate": 1.2969877851487312e-06, "loss": 1.053, "step": 21314 }, { "epoch": 0.7725345221267804, "grad_norm": 2.5068181663197397, "learning_rate": 1.2965934279965863e-06, "loss": 0.8992, "step": 21315 }, { "epoch": 0.7725707658294372, "grad_norm": 2.435514455145205, "learning_rate": 1.2961991218751473e-06, "loss": 0.8548, "step": 21316 }, { "epoch": 0.7726070095320938, "grad_norm": 2.242766125921433, "learning_rate": 1.2958048667898432e-06, "loss": 0.9951, "step": 21317 }, { "epoch": 0.7726432532347505, "grad_norm": 2.46330459822463, "learning_rate": 1.2954106627461088e-06, "loss": 0.9779, "step": 21318 }, { "epoch": 0.7726794969374071, "grad_norm": 2.042233377785144, "learning_rate": 1.2950165097493738e-06, "loss": 0.8308, "step": 21319 }, { "epoch": 0.7727157406400638, "grad_norm": 2.394236490201581, "learning_rate": 1.2946224078050706e-06, "loss": 0.9304, "step": 21320 }, { "epoch": 0.7727519843427204, "grad_norm": 2.543983685114667, "learning_rate": 1.2942283569186316e-06, "loss": 0.8431, "step": 21321 }, { "epoch": 0.7727882280453772, "grad_norm": 2.4437282874986805, "learning_rate": 1.2938343570954827e-06, "loss": 1.0166, "step": 21322 }, { "epoch": 0.7728244717480338, "grad_norm": 2.3484279931127907, "learning_rate": 1.2934404083410547e-06, "loss": 0.8027, "step": 21323 }, { "epoch": 0.7728607154506905, "grad_norm": 2.4581417725419175, "learning_rate": 1.2930465106607764e-06, "loss": 0.8983, "step": 21324 }, { "epoch": 0.7728969591533471, "grad_norm": 2.2368795782677737, "learning_rate": 1.2926526640600768e-06, "loss": 0.9789, "step": 21325 }, { "epoch": 0.7729332028560038, "grad_norm": 2.007834636289042, "learning_rate": 1.2922588685443798e-06, "loss": 0.6991, "step": 21326 }, { "epoch": 0.7729694465586604, "grad_norm": 2.0555658251079514, "learning_rate": 1.2918651241191131e-06, "loss": 0.9198, "step": 21327 }, { "epoch": 0.773005690261317, "grad_norm": 2.057755129822995, "learning_rate": 1.2914714307897042e-06, "loss": 0.8906, "step": 21328 }, { "epoch": 0.7730419339639738, "grad_norm": 2.224801949390486, "learning_rate": 1.2910777885615745e-06, "loss": 0.7418, "step": 21329 }, { "epoch": 0.7730781776666305, "grad_norm": 2.3027465598406036, "learning_rate": 1.2906841974401502e-06, "loss": 0.9485, "step": 21330 }, { "epoch": 0.7731144213692871, "grad_norm": 2.0497270207668343, "learning_rate": 1.2902906574308537e-06, "loss": 0.8099, "step": 21331 }, { "epoch": 0.7731506650719437, "grad_norm": 2.4595532680382286, "learning_rate": 1.2898971685391098e-06, "loss": 0.6649, "step": 21332 }, { "epoch": 0.7731869087746004, "grad_norm": 2.1919715231845425, "learning_rate": 1.2895037307703384e-06, "loss": 0.8565, "step": 21333 }, { "epoch": 0.773223152477257, "grad_norm": 2.433286367811101, "learning_rate": 1.2891103441299608e-06, "loss": 0.8773, "step": 21334 }, { "epoch": 0.7732593961799138, "grad_norm": 2.392371560952618, "learning_rate": 1.288717008623399e-06, "loss": 0.8914, "step": 21335 }, { "epoch": 0.7732956398825704, "grad_norm": 2.8486438965182175, "learning_rate": 1.2883237242560732e-06, "loss": 0.875, "step": 21336 }, { "epoch": 0.7733318835852271, "grad_norm": 2.2419264861183796, "learning_rate": 1.2879304910334006e-06, "loss": 0.8292, "step": 21337 }, { "epoch": 0.7733681272878837, "grad_norm": 2.182961752832596, "learning_rate": 1.2875373089608007e-06, "loss": 0.6805, "step": 21338 }, { "epoch": 0.7734043709905404, "grad_norm": 2.4394995505765196, "learning_rate": 1.2871441780436932e-06, "loss": 0.9483, "step": 21339 }, { "epoch": 0.773440614693197, "grad_norm": 2.777646075079815, "learning_rate": 1.286751098287492e-06, "loss": 0.8455, "step": 21340 }, { "epoch": 0.7734768583958537, "grad_norm": 2.3111642145410154, "learning_rate": 1.2863580696976163e-06, "loss": 0.7769, "step": 21341 }, { "epoch": 0.7735131020985104, "grad_norm": 2.5822868403215073, "learning_rate": 1.2859650922794769e-06, "loss": 0.8979, "step": 21342 }, { "epoch": 0.7735493458011671, "grad_norm": 2.798389409047094, "learning_rate": 1.2855721660384962e-06, "loss": 0.8515, "step": 21343 }, { "epoch": 0.7735855895038237, "grad_norm": 2.335317376657981, "learning_rate": 1.2851792909800826e-06, "loss": 1.1224, "step": 21344 }, { "epoch": 0.7736218332064804, "grad_norm": 2.4218895322417753, "learning_rate": 1.2847864671096538e-06, "loss": 1.0808, "step": 21345 }, { "epoch": 0.773658076909137, "grad_norm": 2.378855698272121, "learning_rate": 1.2843936944326186e-06, "loss": 0.8629, "step": 21346 }, { "epoch": 0.7736943206117937, "grad_norm": 2.354432890622266, "learning_rate": 1.2840009729543912e-06, "loss": 0.877, "step": 21347 }, { "epoch": 0.7737305643144504, "grad_norm": 2.7776083877795656, "learning_rate": 1.2836083026803846e-06, "loss": 0.9798, "step": 21348 }, { "epoch": 0.7737668080171071, "grad_norm": 2.6875920728313227, "learning_rate": 1.2832156836160065e-06, "loss": 0.855, "step": 21349 }, { "epoch": 0.7738030517197637, "grad_norm": 2.2726075085583903, "learning_rate": 1.2828231157666687e-06, "loss": 0.8926, "step": 21350 }, { "epoch": 0.7738392954224204, "grad_norm": 2.34276069526293, "learning_rate": 1.28243059913778e-06, "loss": 0.8964, "step": 21351 }, { "epoch": 0.773875539125077, "grad_norm": 3.1723183561368233, "learning_rate": 1.282038133734751e-06, "loss": 1.0829, "step": 21352 }, { "epoch": 0.7739117828277337, "grad_norm": 2.631465266697728, "learning_rate": 1.281645719562986e-06, "loss": 1.011, "step": 21353 }, { "epoch": 0.7739480265303903, "grad_norm": 2.8954101833015895, "learning_rate": 1.2812533566278944e-06, "loss": 0.9183, "step": 21354 }, { "epoch": 0.7739842702330471, "grad_norm": 2.4267196000264075, "learning_rate": 1.280861044934883e-06, "loss": 0.8963, "step": 21355 }, { "epoch": 0.7740205139357037, "grad_norm": 2.7179809093405507, "learning_rate": 1.2804687844893582e-06, "loss": 1.1072, "step": 21356 }, { "epoch": 0.7740567576383603, "grad_norm": 2.273730499768889, "learning_rate": 1.2800765752967225e-06, "loss": 0.8609, "step": 21357 }, { "epoch": 0.774093001341017, "grad_norm": 2.6698559011015943, "learning_rate": 1.2796844173623818e-06, "loss": 0.9368, "step": 21358 }, { "epoch": 0.7741292450436736, "grad_norm": 2.361579353434205, "learning_rate": 1.2792923106917416e-06, "loss": 1.0216, "step": 21359 }, { "epoch": 0.7741654887463303, "grad_norm": 2.452748034239002, "learning_rate": 1.2789002552902019e-06, "loss": 1.0334, "step": 21360 }, { "epoch": 0.7742017324489869, "grad_norm": 2.154715886680559, "learning_rate": 1.2785082511631674e-06, "loss": 0.9849, "step": 21361 }, { "epoch": 0.7742379761516437, "grad_norm": 2.3329835040744835, "learning_rate": 1.2781162983160355e-06, "loss": 0.7778, "step": 21362 }, { "epoch": 0.7742742198543003, "grad_norm": 2.2932033667474374, "learning_rate": 1.277724396754213e-06, "loss": 0.9294, "step": 21363 }, { "epoch": 0.774310463556957, "grad_norm": 2.3406824894756078, "learning_rate": 1.277332546483096e-06, "loss": 0.834, "step": 21364 }, { "epoch": 0.7743467072596136, "grad_norm": 2.281299130947733, "learning_rate": 1.2769407475080865e-06, "loss": 0.7343, "step": 21365 }, { "epoch": 0.7743829509622703, "grad_norm": 2.4638758746748284, "learning_rate": 1.2765489998345798e-06, "loss": 0.9861, "step": 21366 }, { "epoch": 0.7744191946649269, "grad_norm": 2.349132613087885, "learning_rate": 1.2761573034679764e-06, "loss": 1.0212, "step": 21367 }, { "epoch": 0.7744554383675837, "grad_norm": 2.4184300475197698, "learning_rate": 1.275765658413675e-06, "loss": 0.8054, "step": 21368 }, { "epoch": 0.7744916820702403, "grad_norm": 2.5055946706914805, "learning_rate": 1.2753740646770684e-06, "loss": 0.7203, "step": 21369 }, { "epoch": 0.774527925772897, "grad_norm": 2.5028397808278764, "learning_rate": 1.2749825222635553e-06, "loss": 0.8536, "step": 21370 }, { "epoch": 0.7745641694755536, "grad_norm": 2.577112005734824, "learning_rate": 1.2745910311785298e-06, "loss": 0.872, "step": 21371 }, { "epoch": 0.7746004131782103, "grad_norm": 2.369847484420301, "learning_rate": 1.2741995914273886e-06, "loss": 0.915, "step": 21372 }, { "epoch": 0.7746366568808669, "grad_norm": 2.526305827199712, "learning_rate": 1.2738082030155219e-06, "loss": 1.0006, "step": 21373 }, { "epoch": 0.7746729005835236, "grad_norm": 2.2506847902749616, "learning_rate": 1.2734168659483248e-06, "loss": 0.8028, "step": 21374 }, { "epoch": 0.7747091442861803, "grad_norm": 2.3102638813943086, "learning_rate": 1.2730255802311898e-06, "loss": 0.8532, "step": 21375 }, { "epoch": 0.774745387988837, "grad_norm": 2.2252419846633376, "learning_rate": 1.2726343458695095e-06, "loss": 0.8466, "step": 21376 }, { "epoch": 0.7747816316914936, "grad_norm": 2.2069901206329834, "learning_rate": 1.272243162868672e-06, "loss": 0.9422, "step": 21377 }, { "epoch": 0.7748178753941503, "grad_norm": 2.2103154043388002, "learning_rate": 1.27185203123407e-06, "loss": 0.773, "step": 21378 }, { "epoch": 0.7748541190968069, "grad_norm": 2.4193771848477628, "learning_rate": 1.2714609509710928e-06, "loss": 0.8651, "step": 21379 }, { "epoch": 0.7748903627994636, "grad_norm": 2.0392195500400994, "learning_rate": 1.271069922085128e-06, "loss": 0.8018, "step": 21380 }, { "epoch": 0.7749266065021203, "grad_norm": 2.3546208269363675, "learning_rate": 1.2706789445815659e-06, "loss": 0.795, "step": 21381 }, { "epoch": 0.774962850204777, "grad_norm": 2.410465917741987, "learning_rate": 1.270288018465789e-06, "loss": 0.8849, "step": 21382 }, { "epoch": 0.7749990939074336, "grad_norm": 2.015103384445014, "learning_rate": 1.2698971437431912e-06, "loss": 0.6923, "step": 21383 }, { "epoch": 0.7750353376100902, "grad_norm": 2.7563045686274914, "learning_rate": 1.2695063204191527e-06, "loss": 0.9322, "step": 21384 }, { "epoch": 0.7750715813127469, "grad_norm": 2.3354213698007342, "learning_rate": 1.269115548499063e-06, "loss": 0.8866, "step": 21385 }, { "epoch": 0.7751078250154035, "grad_norm": 2.5241206731007333, "learning_rate": 1.268724827988303e-06, "loss": 0.8803, "step": 21386 }, { "epoch": 0.7751440687180602, "grad_norm": 2.1901261629054836, "learning_rate": 1.2683341588922582e-06, "loss": 1.0148, "step": 21387 }, { "epoch": 0.7751803124207169, "grad_norm": 2.6823762986841757, "learning_rate": 1.2679435412163132e-06, "loss": 0.9503, "step": 21388 }, { "epoch": 0.7752165561233736, "grad_norm": 2.4596772111001917, "learning_rate": 1.2675529749658482e-06, "loss": 0.6858, "step": 21389 }, { "epoch": 0.7752527998260302, "grad_norm": 2.3380077772554713, "learning_rate": 1.267162460146245e-06, "loss": 0.8278, "step": 21390 }, { "epoch": 0.7752890435286869, "grad_norm": 2.477345046343818, "learning_rate": 1.2667719967628867e-06, "loss": 0.8442, "step": 21391 }, { "epoch": 0.7753252872313435, "grad_norm": 2.433670684885375, "learning_rate": 1.266381584821153e-06, "loss": 0.86, "step": 21392 }, { "epoch": 0.7753615309340002, "grad_norm": 2.3045803380199783, "learning_rate": 1.2659912243264221e-06, "loss": 0.9915, "step": 21393 }, { "epoch": 0.7753977746366569, "grad_norm": 1.85218815060063, "learning_rate": 1.2656009152840737e-06, "loss": 0.8073, "step": 21394 }, { "epoch": 0.7754340183393136, "grad_norm": 2.3735335663459507, "learning_rate": 1.265210657699486e-06, "loss": 0.7044, "step": 21395 }, { "epoch": 0.7754702620419702, "grad_norm": 2.2983974603238977, "learning_rate": 1.2648204515780387e-06, "loss": 0.8526, "step": 21396 }, { "epoch": 0.7755065057446269, "grad_norm": 2.4396343351196843, "learning_rate": 1.264430296925105e-06, "loss": 0.899, "step": 21397 }, { "epoch": 0.7755427494472835, "grad_norm": 2.7601594158642335, "learning_rate": 1.2640401937460623e-06, "loss": 0.9614, "step": 21398 }, { "epoch": 0.7755789931499402, "grad_norm": 2.2899998126815633, "learning_rate": 1.2636501420462877e-06, "loss": 0.8301, "step": 21399 }, { "epoch": 0.7756152368525968, "grad_norm": 2.399066351972015, "learning_rate": 1.263260141831153e-06, "loss": 0.9292, "step": 21400 }, { "epoch": 0.7756514805552536, "grad_norm": 2.5281287414911926, "learning_rate": 1.2628701931060356e-06, "loss": 1.2694, "step": 21401 }, { "epoch": 0.7756877242579102, "grad_norm": 2.388369977746962, "learning_rate": 1.2624802958763032e-06, "loss": 0.9769, "step": 21402 }, { "epoch": 0.7757239679605669, "grad_norm": 2.721716563314223, "learning_rate": 1.2620904501473353e-06, "loss": 0.9049, "step": 21403 }, { "epoch": 0.7757602116632235, "grad_norm": 2.5969522129642497, "learning_rate": 1.2617006559244993e-06, "loss": 1.0192, "step": 21404 }, { "epoch": 0.7757964553658802, "grad_norm": 2.410573857121997, "learning_rate": 1.2613109132131685e-06, "loss": 0.8735, "step": 21405 }, { "epoch": 0.7758326990685368, "grad_norm": 2.34948185351359, "learning_rate": 1.2609212220187105e-06, "loss": 0.8103, "step": 21406 }, { "epoch": 0.7758689427711936, "grad_norm": 2.4050142372618213, "learning_rate": 1.2605315823464969e-06, "loss": 0.9667, "step": 21407 }, { "epoch": 0.7759051864738502, "grad_norm": 2.49393155115854, "learning_rate": 1.2601419942018978e-06, "loss": 0.8487, "step": 21408 }, { "epoch": 0.7759414301765069, "grad_norm": 2.359879218312765, "learning_rate": 1.2597524575902776e-06, "loss": 0.8009, "step": 21409 }, { "epoch": 0.7759776738791635, "grad_norm": 2.598327137692689, "learning_rate": 1.2593629725170093e-06, "loss": 0.7311, "step": 21410 }, { "epoch": 0.7760139175818201, "grad_norm": 2.237766931955799, "learning_rate": 1.2589735389874558e-06, "loss": 0.7167, "step": 21411 }, { "epoch": 0.7760501612844768, "grad_norm": 2.762601108808655, "learning_rate": 1.2585841570069856e-06, "loss": 0.8872, "step": 21412 }, { "epoch": 0.7760864049871334, "grad_norm": 2.608341030398409, "learning_rate": 1.2581948265809618e-06, "loss": 0.874, "step": 21413 }, { "epoch": 0.7761226486897902, "grad_norm": 2.2663817086397655, "learning_rate": 1.2578055477147499e-06, "loss": 0.6906, "step": 21414 }, { "epoch": 0.7761588923924468, "grad_norm": 2.1916185091820948, "learning_rate": 1.2574163204137145e-06, "loss": 0.8058, "step": 21415 }, { "epoch": 0.7761951360951035, "grad_norm": 2.096962641724996, "learning_rate": 1.2570271446832206e-06, "loss": 0.7432, "step": 21416 }, { "epoch": 0.7762313797977601, "grad_norm": 2.27428117007824, "learning_rate": 1.2566380205286271e-06, "loss": 0.7791, "step": 21417 }, { "epoch": 0.7762676235004168, "grad_norm": 2.3177108415564094, "learning_rate": 1.2562489479552975e-06, "loss": 0.8636, "step": 21418 }, { "epoch": 0.7763038672030734, "grad_norm": 2.467484702958712, "learning_rate": 1.2558599269685951e-06, "loss": 0.9729, "step": 21419 }, { "epoch": 0.7763401109057301, "grad_norm": 2.04482908401454, "learning_rate": 1.2554709575738772e-06, "loss": 0.7586, "step": 21420 }, { "epoch": 0.7763763546083868, "grad_norm": 2.3176411397701813, "learning_rate": 1.2550820397765046e-06, "loss": 0.8879, "step": 21421 }, { "epoch": 0.7764125983110435, "grad_norm": 2.752894806914535, "learning_rate": 1.2546931735818368e-06, "loss": 0.9442, "step": 21422 }, { "epoch": 0.7764488420137001, "grad_norm": 2.154398458272423, "learning_rate": 1.2543043589952336e-06, "loss": 0.6364, "step": 21423 }, { "epoch": 0.7764850857163568, "grad_norm": 2.2471169348408493, "learning_rate": 1.253915596022049e-06, "loss": 0.8229, "step": 21424 }, { "epoch": 0.7765213294190134, "grad_norm": 2.4869645529117945, "learning_rate": 1.2535268846676424e-06, "loss": 0.9148, "step": 21425 }, { "epoch": 0.7765575731216701, "grad_norm": 2.2221490517394114, "learning_rate": 1.2531382249373708e-06, "loss": 0.9764, "step": 21426 }, { "epoch": 0.7765938168243268, "grad_norm": 2.169003324004803, "learning_rate": 1.2527496168365872e-06, "loss": 0.9556, "step": 21427 }, { "epoch": 0.7766300605269835, "grad_norm": 2.57572440180802, "learning_rate": 1.2523610603706488e-06, "loss": 0.9515, "step": 21428 }, { "epoch": 0.7766663042296401, "grad_norm": 2.043501429777662, "learning_rate": 1.2519725555449057e-06, "loss": 0.8723, "step": 21429 }, { "epoch": 0.7767025479322968, "grad_norm": 2.1617857200398434, "learning_rate": 1.2515841023647169e-06, "loss": 0.8528, "step": 21430 }, { "epoch": 0.7767387916349534, "grad_norm": 2.278746106964597, "learning_rate": 1.2511957008354308e-06, "loss": 0.8854, "step": 21431 }, { "epoch": 0.77677503533761, "grad_norm": 2.2624990046186255, "learning_rate": 1.250807350962402e-06, "loss": 0.9887, "step": 21432 }, { "epoch": 0.7768112790402667, "grad_norm": 2.178075973840455, "learning_rate": 1.250419052750979e-06, "loss": 0.8493, "step": 21433 }, { "epoch": 0.7768475227429235, "grad_norm": 2.101967165158078, "learning_rate": 1.250030806206514e-06, "loss": 0.8948, "step": 21434 }, { "epoch": 0.7768837664455801, "grad_norm": 2.265199692261699, "learning_rate": 1.2496426113343586e-06, "loss": 0.8155, "step": 21435 }, { "epoch": 0.7769200101482368, "grad_norm": 2.507609230484819, "learning_rate": 1.2492544681398577e-06, "loss": 0.9572, "step": 21436 }, { "epoch": 0.7769562538508934, "grad_norm": 2.6143664725498588, "learning_rate": 1.2488663766283615e-06, "loss": 0.925, "step": 21437 }, { "epoch": 0.77699249755355, "grad_norm": 2.055769947660484, "learning_rate": 1.248478336805219e-06, "loss": 0.7034, "step": 21438 }, { "epoch": 0.7770287412562067, "grad_norm": 2.287271054220181, "learning_rate": 1.2480903486757767e-06, "loss": 0.9259, "step": 21439 }, { "epoch": 0.7770649849588634, "grad_norm": 1.8808506561009617, "learning_rate": 1.2477024122453795e-06, "loss": 0.5697, "step": 21440 }, { "epoch": 0.7771012286615201, "grad_norm": 2.404860945340326, "learning_rate": 1.2473145275193733e-06, "loss": 0.9167, "step": 21441 }, { "epoch": 0.7771374723641767, "grad_norm": 2.368993364951073, "learning_rate": 1.2469266945031038e-06, "loss": 0.8722, "step": 21442 }, { "epoch": 0.7771737160668334, "grad_norm": 2.1754373891497134, "learning_rate": 1.2465389132019156e-06, "loss": 1.0123, "step": 21443 }, { "epoch": 0.77720995976949, "grad_norm": 2.4102150434848677, "learning_rate": 1.2461511836211503e-06, "loss": 0.9873, "step": 21444 }, { "epoch": 0.7772462034721467, "grad_norm": 2.2069344974372886, "learning_rate": 1.2457635057661511e-06, "loss": 0.961, "step": 21445 }, { "epoch": 0.7772824471748033, "grad_norm": 2.1676262732528655, "learning_rate": 1.2453758796422615e-06, "loss": 0.8021, "step": 21446 }, { "epoch": 0.7773186908774601, "grad_norm": 2.2465514415894585, "learning_rate": 1.2449883052548205e-06, "loss": 0.7721, "step": 21447 }, { "epoch": 0.7773549345801167, "grad_norm": 2.4130055472377085, "learning_rate": 1.244600782609171e-06, "loss": 0.9503, "step": 21448 }, { "epoch": 0.7773911782827734, "grad_norm": 2.1506976056892504, "learning_rate": 1.2442133117106486e-06, "loss": 0.753, "step": 21449 }, { "epoch": 0.77742742198543, "grad_norm": 2.946134546525323, "learning_rate": 1.2438258925645985e-06, "loss": 0.8823, "step": 21450 }, { "epoch": 0.7774636656880867, "grad_norm": 2.1442565838171728, "learning_rate": 1.2434385251763543e-06, "loss": 0.8127, "step": 21451 }, { "epoch": 0.7774999093907433, "grad_norm": 2.2823192820739955, "learning_rate": 1.2430512095512564e-06, "loss": 0.8421, "step": 21452 }, { "epoch": 0.7775361530934001, "grad_norm": 2.6506386377452116, "learning_rate": 1.2426639456946398e-06, "loss": 0.9154, "step": 21453 }, { "epoch": 0.7775723967960567, "grad_norm": 2.294321811003489, "learning_rate": 1.2422767336118413e-06, "loss": 0.9073, "step": 21454 }, { "epoch": 0.7776086404987134, "grad_norm": 2.5464324659254887, "learning_rate": 1.2418895733081988e-06, "loss": 1.0281, "step": 21455 }, { "epoch": 0.77764488420137, "grad_norm": 2.376477574239836, "learning_rate": 1.2415024647890433e-06, "loss": 0.858, "step": 21456 }, { "epoch": 0.7776811279040267, "grad_norm": 2.20283239163907, "learning_rate": 1.241115408059711e-06, "loss": 0.974, "step": 21457 }, { "epoch": 0.7777173716066833, "grad_norm": 2.6382758411692677, "learning_rate": 1.2407284031255346e-06, "loss": 0.8204, "step": 21458 }, { "epoch": 0.77775361530934, "grad_norm": 2.2104196534043083, "learning_rate": 1.2403414499918492e-06, "loss": 0.9515, "step": 21459 }, { "epoch": 0.7777898590119967, "grad_norm": 2.586109199434268, "learning_rate": 1.239954548663983e-06, "loss": 1.0222, "step": 21460 }, { "epoch": 0.7778261027146534, "grad_norm": 2.509923902907753, "learning_rate": 1.2395676991472694e-06, "loss": 0.7907, "step": 21461 }, { "epoch": 0.77786234641731, "grad_norm": 2.093423605818181, "learning_rate": 1.2391809014470386e-06, "loss": 0.8234, "step": 21462 }, { "epoch": 0.7778985901199666, "grad_norm": 2.4528410267176706, "learning_rate": 1.2387941555686217e-06, "loss": 0.8783, "step": 21463 }, { "epoch": 0.7779348338226233, "grad_norm": 2.1938872638506384, "learning_rate": 1.2384074615173458e-06, "loss": 0.7675, "step": 21464 }, { "epoch": 0.7779710775252799, "grad_norm": 2.5021380503405766, "learning_rate": 1.2380208192985394e-06, "loss": 0.9179, "step": 21465 }, { "epoch": 0.7780073212279367, "grad_norm": 2.399980510912567, "learning_rate": 1.237634228917533e-06, "loss": 0.9255, "step": 21466 }, { "epoch": 0.7780435649305933, "grad_norm": 2.593280388220866, "learning_rate": 1.2372476903796499e-06, "loss": 0.8991, "step": 21467 }, { "epoch": 0.77807980863325, "grad_norm": 2.016436842501773, "learning_rate": 1.2368612036902194e-06, "loss": 0.6216, "step": 21468 }, { "epoch": 0.7781160523359066, "grad_norm": 2.190169863166608, "learning_rate": 1.2364747688545625e-06, "loss": 0.8477, "step": 21469 }, { "epoch": 0.7781522960385633, "grad_norm": 2.3228415346207654, "learning_rate": 1.2360883858780104e-06, "loss": 0.899, "step": 21470 }, { "epoch": 0.7781885397412199, "grad_norm": 2.1869936034467345, "learning_rate": 1.235702054765882e-06, "loss": 0.6935, "step": 21471 }, { "epoch": 0.7782247834438766, "grad_norm": 2.295892540550368, "learning_rate": 1.2353157755235051e-06, "loss": 0.8684, "step": 21472 }, { "epoch": 0.7782610271465333, "grad_norm": 2.2423627502406456, "learning_rate": 1.234929548156198e-06, "loss": 0.9338, "step": 21473 }, { "epoch": 0.77829727084919, "grad_norm": 2.238874792399708, "learning_rate": 1.2345433726692846e-06, "loss": 0.9615, "step": 21474 }, { "epoch": 0.7783335145518466, "grad_norm": 2.015814658652308, "learning_rate": 1.2341572490680875e-06, "loss": 0.7509, "step": 21475 }, { "epoch": 0.7783697582545033, "grad_norm": 2.396310550715614, "learning_rate": 1.2337711773579253e-06, "loss": 0.892, "step": 21476 }, { "epoch": 0.7784060019571599, "grad_norm": 2.6906970820791565, "learning_rate": 1.233385157544118e-06, "loss": 0.8513, "step": 21477 }, { "epoch": 0.7784422456598166, "grad_norm": 2.5002703085489624, "learning_rate": 1.2329991896319855e-06, "loss": 0.8499, "step": 21478 }, { "epoch": 0.7784784893624733, "grad_norm": 2.5278519732805615, "learning_rate": 1.232613273626847e-06, "loss": 0.7233, "step": 21479 }, { "epoch": 0.77851473306513, "grad_norm": 2.393131268313212, "learning_rate": 1.232227409534018e-06, "loss": 0.8851, "step": 21480 }, { "epoch": 0.7785509767677866, "grad_norm": 2.5831071676126935, "learning_rate": 1.2318415973588165e-06, "loss": 1.0368, "step": 21481 }, { "epoch": 0.7785872204704433, "grad_norm": 1.9491270056842513, "learning_rate": 1.2314558371065593e-06, "loss": 0.7766, "step": 21482 }, { "epoch": 0.7786234641730999, "grad_norm": 2.1720246241246874, "learning_rate": 1.2310701287825627e-06, "loss": 0.9392, "step": 21483 }, { "epoch": 0.7786597078757566, "grad_norm": 2.2895284674065355, "learning_rate": 1.2306844723921385e-06, "loss": 0.9566, "step": 21484 }, { "epoch": 0.7786959515784132, "grad_norm": 2.3262453061621664, "learning_rate": 1.2302988679406031e-06, "loss": 0.793, "step": 21485 }, { "epoch": 0.77873219528107, "grad_norm": 2.5249643646471758, "learning_rate": 1.2299133154332709e-06, "loss": 0.9155, "step": 21486 }, { "epoch": 0.7787684389837266, "grad_norm": 2.2560718974723195, "learning_rate": 1.2295278148754515e-06, "loss": 0.9706, "step": 21487 }, { "epoch": 0.7788046826863833, "grad_norm": 2.215929329512083, "learning_rate": 1.22914236627246e-06, "loss": 0.9145, "step": 21488 }, { "epoch": 0.7788409263890399, "grad_norm": 2.4730563845526174, "learning_rate": 1.2287569696296037e-06, "loss": 0.7945, "step": 21489 }, { "epoch": 0.7788771700916965, "grad_norm": 2.3764192797309325, "learning_rate": 1.2283716249521975e-06, "loss": 0.9852, "step": 21490 }, { "epoch": 0.7789134137943532, "grad_norm": 2.3228830518158228, "learning_rate": 1.2279863322455488e-06, "loss": 0.786, "step": 21491 }, { "epoch": 0.7789496574970098, "grad_norm": 2.5887130504904734, "learning_rate": 1.2276010915149668e-06, "loss": 0.9755, "step": 21492 }, { "epoch": 0.7789859011996666, "grad_norm": 2.6325879454516743, "learning_rate": 1.2272159027657616e-06, "loss": 0.9329, "step": 21493 }, { "epoch": 0.7790221449023232, "grad_norm": 2.169517305161526, "learning_rate": 1.2268307660032385e-06, "loss": 0.7585, "step": 21494 }, { "epoch": 0.7790583886049799, "grad_norm": 2.310035442946661, "learning_rate": 1.2264456812327069e-06, "loss": 0.9099, "step": 21495 }, { "epoch": 0.7790946323076365, "grad_norm": 2.105950383139008, "learning_rate": 1.2260606484594683e-06, "loss": 0.7957, "step": 21496 }, { "epoch": 0.7791308760102932, "grad_norm": 2.1355783422831855, "learning_rate": 1.2256756676888348e-06, "loss": 0.7763, "step": 21497 }, { "epoch": 0.7791671197129498, "grad_norm": 2.24885561625909, "learning_rate": 1.2252907389261064e-06, "loss": 0.81, "step": 21498 }, { "epoch": 0.7792033634156066, "grad_norm": 2.4000429366212774, "learning_rate": 1.2249058621765903e-06, "loss": 0.9326, "step": 21499 }, { "epoch": 0.7792396071182632, "grad_norm": 2.382882345504795, "learning_rate": 1.2245210374455868e-06, "loss": 0.9684, "step": 21500 }, { "epoch": 0.7792758508209199, "grad_norm": 2.141672848092645, "learning_rate": 1.2241362647383998e-06, "loss": 1.0171, "step": 21501 }, { "epoch": 0.7793120945235765, "grad_norm": 2.2883604742963506, "learning_rate": 1.223751544060332e-06, "loss": 0.7059, "step": 21502 }, { "epoch": 0.7793483382262332, "grad_norm": 2.3109685645608042, "learning_rate": 1.2233668754166856e-06, "loss": 0.7112, "step": 21503 }, { "epoch": 0.7793845819288898, "grad_norm": 2.277335390791919, "learning_rate": 1.2229822588127578e-06, "loss": 0.8715, "step": 21504 }, { "epoch": 0.7794208256315465, "grad_norm": 2.672838763929056, "learning_rate": 1.2225976942538503e-06, "loss": 0.8576, "step": 21505 }, { "epoch": 0.7794570693342032, "grad_norm": 2.805965914901894, "learning_rate": 1.2222131817452632e-06, "loss": 0.8684, "step": 21506 }, { "epoch": 0.7794933130368599, "grad_norm": 2.634109870889934, "learning_rate": 1.221828721292293e-06, "loss": 0.9417, "step": 21507 }, { "epoch": 0.7795295567395165, "grad_norm": 2.560027204979027, "learning_rate": 1.2214443129002373e-06, "loss": 0.9119, "step": 21508 }, { "epoch": 0.7795658004421732, "grad_norm": 2.2288958188532426, "learning_rate": 1.221059956574394e-06, "loss": 0.7088, "step": 21509 }, { "epoch": 0.7796020441448298, "grad_norm": 2.241577841573015, "learning_rate": 1.2206756523200607e-06, "loss": 0.8377, "step": 21510 }, { "epoch": 0.7796382878474865, "grad_norm": 2.2552960962002553, "learning_rate": 1.2202914001425304e-06, "loss": 0.962, "step": 21511 }, { "epoch": 0.7796745315501432, "grad_norm": 2.576069690107435, "learning_rate": 1.219907200047098e-06, "loss": 0.9592, "step": 21512 }, { "epoch": 0.7797107752527999, "grad_norm": 2.6402201528514726, "learning_rate": 1.2195230520390593e-06, "loss": 1.1067, "step": 21513 }, { "epoch": 0.7797470189554565, "grad_norm": 2.60710852531342, "learning_rate": 1.219138956123706e-06, "loss": 1.0543, "step": 21514 }, { "epoch": 0.7797832626581132, "grad_norm": 2.535949191006715, "learning_rate": 1.2187549123063324e-06, "loss": 0.8952, "step": 21515 }, { "epoch": 0.7798195063607698, "grad_norm": 2.2105034583019108, "learning_rate": 1.2183709205922262e-06, "loss": 0.8627, "step": 21516 }, { "epoch": 0.7798557500634264, "grad_norm": 2.374776701124664, "learning_rate": 1.2179869809866846e-06, "loss": 0.8335, "step": 21517 }, { "epoch": 0.7798919937660831, "grad_norm": 2.121869796528491, "learning_rate": 1.2176030934949939e-06, "loss": 0.8997, "step": 21518 }, { "epoch": 0.7799282374687398, "grad_norm": 2.2873096187142696, "learning_rate": 1.2172192581224462e-06, "loss": 0.8092, "step": 21519 }, { "epoch": 0.7799644811713965, "grad_norm": 2.5100305452174876, "learning_rate": 1.2168354748743282e-06, "loss": 0.8532, "step": 21520 }, { "epoch": 0.7800007248740531, "grad_norm": 2.417778910465026, "learning_rate": 1.2164517437559291e-06, "loss": 1.078, "step": 21521 }, { "epoch": 0.7800369685767098, "grad_norm": 2.312299779646947, "learning_rate": 1.2160680647725386e-06, "loss": 1.0095, "step": 21522 }, { "epoch": 0.7800732122793664, "grad_norm": 2.221168765919856, "learning_rate": 1.2156844379294398e-06, "loss": 0.8116, "step": 21523 }, { "epoch": 0.7801094559820231, "grad_norm": 2.323514261871507, "learning_rate": 1.2153008632319213e-06, "loss": 0.8079, "step": 21524 }, { "epoch": 0.7801456996846798, "grad_norm": 2.533340219915493, "learning_rate": 1.2149173406852677e-06, "loss": 0.8692, "step": 21525 }, { "epoch": 0.7801819433873365, "grad_norm": 2.121440475128462, "learning_rate": 1.2145338702947652e-06, "loss": 0.7002, "step": 21526 }, { "epoch": 0.7802181870899931, "grad_norm": 2.4249212775461944, "learning_rate": 1.2141504520656955e-06, "loss": 0.7655, "step": 21527 }, { "epoch": 0.7802544307926498, "grad_norm": 2.422002212287699, "learning_rate": 1.213767086003343e-06, "loss": 0.8152, "step": 21528 }, { "epoch": 0.7802906744953064, "grad_norm": 2.2767645583667893, "learning_rate": 1.2133837721129899e-06, "loss": 0.8964, "step": 21529 }, { "epoch": 0.7803269181979631, "grad_norm": 2.673565910723443, "learning_rate": 1.2130005103999198e-06, "loss": 0.8335, "step": 21530 }, { "epoch": 0.7803631619006197, "grad_norm": 2.0674656315372237, "learning_rate": 1.2126173008694115e-06, "loss": 0.9968, "step": 21531 }, { "epoch": 0.7803994056032765, "grad_norm": 2.277828471909573, "learning_rate": 1.2122341435267453e-06, "loss": 0.9603, "step": 21532 }, { "epoch": 0.7804356493059331, "grad_norm": 2.669109194474923, "learning_rate": 1.2118510383772041e-06, "loss": 1.0728, "step": 21533 }, { "epoch": 0.7804718930085898, "grad_norm": 2.1296805480746905, "learning_rate": 1.211467985426063e-06, "loss": 0.6681, "step": 21534 }, { "epoch": 0.7805081367112464, "grad_norm": 1.9926317411863566, "learning_rate": 1.2110849846786027e-06, "loss": 0.875, "step": 21535 }, { "epoch": 0.7805443804139031, "grad_norm": 2.443613742937212, "learning_rate": 1.2107020361400979e-06, "loss": 0.7788, "step": 21536 }, { "epoch": 0.7805806241165597, "grad_norm": 2.5854020332650225, "learning_rate": 1.2103191398158293e-06, "loss": 0.95, "step": 21537 }, { "epoch": 0.7806168678192165, "grad_norm": 2.860562742562788, "learning_rate": 1.2099362957110699e-06, "loss": 0.8113, "step": 21538 }, { "epoch": 0.7806531115218731, "grad_norm": 2.617008513570323, "learning_rate": 1.209553503831098e-06, "loss": 0.8792, "step": 21539 }, { "epoch": 0.7806893552245298, "grad_norm": 2.5150774892221466, "learning_rate": 1.2091707641811844e-06, "loss": 0.8148, "step": 21540 }, { "epoch": 0.7807255989271864, "grad_norm": 2.504158488709562, "learning_rate": 1.2087880767666054e-06, "loss": 0.9292, "step": 21541 }, { "epoch": 0.780761842629843, "grad_norm": 2.3421018255937374, "learning_rate": 1.2084054415926356e-06, "loss": 0.9411, "step": 21542 }, { "epoch": 0.7807980863324997, "grad_norm": 2.34756183468043, "learning_rate": 1.2080228586645438e-06, "loss": 0.8138, "step": 21543 }, { "epoch": 0.7808343300351563, "grad_norm": 2.624289586361598, "learning_rate": 1.2076403279876042e-06, "loss": 0.943, "step": 21544 }, { "epoch": 0.7808705737378131, "grad_norm": 2.4834580114411042, "learning_rate": 1.2072578495670866e-06, "loss": 1.0331, "step": 21545 }, { "epoch": 0.7809068174404697, "grad_norm": 2.3998233391869137, "learning_rate": 1.206875423408264e-06, "loss": 0.832, "step": 21546 }, { "epoch": 0.7809430611431264, "grad_norm": 2.2329764257327573, "learning_rate": 1.2064930495164024e-06, "loss": 0.8468, "step": 21547 }, { "epoch": 0.780979304845783, "grad_norm": 2.3048287086859385, "learning_rate": 1.2061107278967727e-06, "loss": 0.7838, "step": 21548 }, { "epoch": 0.7810155485484397, "grad_norm": 2.176465666661922, "learning_rate": 1.2057284585546425e-06, "loss": 0.8667, "step": 21549 }, { "epoch": 0.7810517922510963, "grad_norm": 2.1719899134953105, "learning_rate": 1.2053462414952805e-06, "loss": 0.899, "step": 21550 }, { "epoch": 0.7810880359537531, "grad_norm": 2.3258805384568055, "learning_rate": 1.2049640767239513e-06, "loss": 0.9116, "step": 21551 }, { "epoch": 0.7811242796564097, "grad_norm": 2.52056150921026, "learning_rate": 1.2045819642459221e-06, "loss": 0.8412, "step": 21552 }, { "epoch": 0.7811605233590664, "grad_norm": 2.7107194865624944, "learning_rate": 1.2041999040664592e-06, "loss": 1.0979, "step": 21553 }, { "epoch": 0.781196767061723, "grad_norm": 2.3407401949617905, "learning_rate": 1.2038178961908248e-06, "loss": 0.7536, "step": 21554 }, { "epoch": 0.7812330107643797, "grad_norm": 2.2559310797004195, "learning_rate": 1.2034359406242852e-06, "loss": 0.7505, "step": 21555 }, { "epoch": 0.7812692544670363, "grad_norm": 2.464690934568092, "learning_rate": 1.2030540373720988e-06, "loss": 0.923, "step": 21556 }, { "epoch": 0.781305498169693, "grad_norm": 2.489410737266253, "learning_rate": 1.2026721864395352e-06, "loss": 0.9683, "step": 21557 }, { "epoch": 0.7813417418723497, "grad_norm": 2.7188827655819607, "learning_rate": 1.2022903878318504e-06, "loss": 0.9113, "step": 21558 }, { "epoch": 0.7813779855750064, "grad_norm": 2.306835014878613, "learning_rate": 1.2019086415543091e-06, "loss": 0.9214, "step": 21559 }, { "epoch": 0.781414229277663, "grad_norm": 2.697592195252785, "learning_rate": 1.2015269476121682e-06, "loss": 0.9618, "step": 21560 }, { "epoch": 0.7814504729803197, "grad_norm": 2.1091768258063506, "learning_rate": 1.2011453060106888e-06, "loss": 0.802, "step": 21561 }, { "epoch": 0.7814867166829763, "grad_norm": 2.3185807727163716, "learning_rate": 1.2007637167551306e-06, "loss": 0.9204, "step": 21562 }, { "epoch": 0.781522960385633, "grad_norm": 2.5451886860213095, "learning_rate": 1.2003821798507493e-06, "loss": 0.9944, "step": 21563 }, { "epoch": 0.7815592040882896, "grad_norm": 2.436594585879178, "learning_rate": 1.2000006953028037e-06, "loss": 0.8562, "step": 21564 }, { "epoch": 0.7815954477909464, "grad_norm": 2.6726560764576566, "learning_rate": 1.1996192631165505e-06, "loss": 1.0724, "step": 21565 }, { "epoch": 0.781631691493603, "grad_norm": 2.37859132352453, "learning_rate": 1.1992378832972467e-06, "loss": 0.8676, "step": 21566 }, { "epoch": 0.7816679351962597, "grad_norm": 2.307395856272458, "learning_rate": 1.1988565558501453e-06, "loss": 0.8375, "step": 21567 }, { "epoch": 0.7817041788989163, "grad_norm": 2.1271293651141177, "learning_rate": 1.1984752807805012e-06, "loss": 0.8657, "step": 21568 }, { "epoch": 0.781740422601573, "grad_norm": 2.164204808223401, "learning_rate": 1.1980940580935684e-06, "loss": 0.6569, "step": 21569 }, { "epoch": 0.7817766663042296, "grad_norm": 2.380211082964396, "learning_rate": 1.1977128877946025e-06, "loss": 0.8569, "step": 21570 }, { "epoch": 0.7818129100068864, "grad_norm": 2.206619412141306, "learning_rate": 1.1973317698888515e-06, "loss": 0.8651, "step": 21571 }, { "epoch": 0.781849153709543, "grad_norm": 2.4431437613524407, "learning_rate": 1.1969507043815687e-06, "loss": 0.8867, "step": 21572 }, { "epoch": 0.7818853974121996, "grad_norm": 2.35339945921515, "learning_rate": 1.1965696912780068e-06, "loss": 0.8289, "step": 21573 }, { "epoch": 0.7819216411148563, "grad_norm": 2.2139553520279436, "learning_rate": 1.196188730583413e-06, "loss": 0.8796, "step": 21574 }, { "epoch": 0.7819578848175129, "grad_norm": 2.3257347413826577, "learning_rate": 1.1958078223030377e-06, "loss": 0.7895, "step": 21575 }, { "epoch": 0.7819941285201696, "grad_norm": 2.3397631507138903, "learning_rate": 1.1954269664421302e-06, "loss": 0.8529, "step": 21576 }, { "epoch": 0.7820303722228262, "grad_norm": 2.437825767429982, "learning_rate": 1.19504616300594e-06, "loss": 1.0631, "step": 21577 }, { "epoch": 0.782066615925483, "grad_norm": 2.1927901971229193, "learning_rate": 1.1946654119997104e-06, "loss": 0.9863, "step": 21578 }, { "epoch": 0.7821028596281396, "grad_norm": 2.4606616351816384, "learning_rate": 1.1942847134286906e-06, "loss": 0.9392, "step": 21579 }, { "epoch": 0.7821391033307963, "grad_norm": 2.1942057421752463, "learning_rate": 1.1939040672981267e-06, "loss": 0.718, "step": 21580 }, { "epoch": 0.7821753470334529, "grad_norm": 2.006461499164301, "learning_rate": 1.1935234736132616e-06, "loss": 0.9301, "step": 21581 }, { "epoch": 0.7822115907361096, "grad_norm": 2.3616821485274575, "learning_rate": 1.1931429323793426e-06, "loss": 0.8486, "step": 21582 }, { "epoch": 0.7822478344387662, "grad_norm": 2.4725533838835676, "learning_rate": 1.1927624436016088e-06, "loss": 0.8618, "step": 21583 }, { "epoch": 0.782284078141423, "grad_norm": 2.255996284027659, "learning_rate": 1.1923820072853087e-06, "loss": 0.9133, "step": 21584 }, { "epoch": 0.7823203218440796, "grad_norm": 2.257021325506912, "learning_rate": 1.1920016234356801e-06, "loss": 0.8179, "step": 21585 }, { "epoch": 0.7823565655467363, "grad_norm": 2.1881303489303683, "learning_rate": 1.1916212920579679e-06, "loss": 1.007, "step": 21586 }, { "epoch": 0.7823928092493929, "grad_norm": 2.1157845142740497, "learning_rate": 1.191241013157408e-06, "loss": 0.9971, "step": 21587 }, { "epoch": 0.7824290529520496, "grad_norm": 2.397309438350946, "learning_rate": 1.190860786739247e-06, "loss": 0.907, "step": 21588 }, { "epoch": 0.7824652966547062, "grad_norm": 2.2543745350292834, "learning_rate": 1.1904806128087187e-06, "loss": 0.7009, "step": 21589 }, { "epoch": 0.7825015403573629, "grad_norm": 2.5433132721254594, "learning_rate": 1.190100491371065e-06, "loss": 0.9604, "step": 21590 }, { "epoch": 0.7825377840600196, "grad_norm": 2.1532472090637125, "learning_rate": 1.1897204224315207e-06, "loss": 0.8267, "step": 21591 }, { "epoch": 0.7825740277626763, "grad_norm": 2.311908165630942, "learning_rate": 1.189340405995325e-06, "loss": 1.0233, "step": 21592 }, { "epoch": 0.7826102714653329, "grad_norm": 2.4328143025897524, "learning_rate": 1.1889604420677152e-06, "loss": 0.8549, "step": 21593 }, { "epoch": 0.7826465151679896, "grad_norm": 2.345465257221605, "learning_rate": 1.1885805306539245e-06, "loss": 1.0278, "step": 21594 }, { "epoch": 0.7826827588706462, "grad_norm": 1.8275753491341693, "learning_rate": 1.1882006717591888e-06, "loss": 0.6684, "step": 21595 }, { "epoch": 0.7827190025733028, "grad_norm": 2.547039843155413, "learning_rate": 1.1878208653887425e-06, "loss": 0.88, "step": 21596 }, { "epoch": 0.7827552462759596, "grad_norm": 2.240965967110918, "learning_rate": 1.1874411115478207e-06, "loss": 0.9368, "step": 21597 }, { "epoch": 0.7827914899786163, "grad_norm": 2.1985026188035808, "learning_rate": 1.1870614102416528e-06, "loss": 0.763, "step": 21598 }, { "epoch": 0.7828277336812729, "grad_norm": 2.3037451872315993, "learning_rate": 1.1866817614754729e-06, "loss": 0.9707, "step": 21599 }, { "epoch": 0.7828639773839295, "grad_norm": 2.5553069648895397, "learning_rate": 1.1863021652545136e-06, "loss": 0.8486, "step": 21600 }, { "epoch": 0.7829002210865862, "grad_norm": 2.2305715884502084, "learning_rate": 1.1859226215840025e-06, "loss": 0.672, "step": 21601 }, { "epoch": 0.7829364647892428, "grad_norm": 2.2468700243437407, "learning_rate": 1.1855431304691727e-06, "loss": 0.866, "step": 21602 }, { "epoch": 0.7829727084918995, "grad_norm": 2.4418794051400865, "learning_rate": 1.1851636919152487e-06, "loss": 0.9082, "step": 21603 }, { "epoch": 0.7830089521945562, "grad_norm": 2.1335448572197335, "learning_rate": 1.1847843059274644e-06, "loss": 0.9069, "step": 21604 }, { "epoch": 0.7830451958972129, "grad_norm": 2.316732995107072, "learning_rate": 1.184404972511044e-06, "loss": 1.0053, "step": 21605 }, { "epoch": 0.7830814395998695, "grad_norm": 1.9704947241409951, "learning_rate": 1.1840256916712168e-06, "loss": 0.9284, "step": 21606 }, { "epoch": 0.7831176833025262, "grad_norm": 2.4978034796708637, "learning_rate": 1.183646463413206e-06, "loss": 0.9024, "step": 21607 }, { "epoch": 0.7831539270051828, "grad_norm": 2.6614550330361397, "learning_rate": 1.183267287742239e-06, "loss": 0.8445, "step": 21608 }, { "epoch": 0.7831901707078395, "grad_norm": 2.438154062967271, "learning_rate": 1.182888164663542e-06, "loss": 0.9297, "step": 21609 }, { "epoch": 0.7832264144104962, "grad_norm": 2.4881230288395075, "learning_rate": 1.1825090941823364e-06, "loss": 0.9068, "step": 21610 }, { "epoch": 0.7832626581131529, "grad_norm": 2.4831145085007935, "learning_rate": 1.1821300763038467e-06, "loss": 0.9486, "step": 21611 }, { "epoch": 0.7832989018158095, "grad_norm": 2.3017688576364885, "learning_rate": 1.1817511110332952e-06, "loss": 0.8504, "step": 21612 }, { "epoch": 0.7833351455184662, "grad_norm": 2.316948210644623, "learning_rate": 1.1813721983759058e-06, "loss": 0.8926, "step": 21613 }, { "epoch": 0.7833713892211228, "grad_norm": 2.5468630893305737, "learning_rate": 1.1809933383368965e-06, "loss": 0.9146, "step": 21614 }, { "epoch": 0.7834076329237795, "grad_norm": 2.2761982516798076, "learning_rate": 1.1806145309214894e-06, "loss": 0.6475, "step": 21615 }, { "epoch": 0.7834438766264361, "grad_norm": 2.10066377710081, "learning_rate": 1.1802357761349043e-06, "loss": 0.6085, "step": 21616 }, { "epoch": 0.7834801203290929, "grad_norm": 2.5085716784951066, "learning_rate": 1.1798570739823617e-06, "loss": 0.8897, "step": 21617 }, { "epoch": 0.7835163640317495, "grad_norm": 2.2778836773256734, "learning_rate": 1.1794784244690767e-06, "loss": 0.8034, "step": 21618 }, { "epoch": 0.7835526077344062, "grad_norm": 2.8185664025836736, "learning_rate": 1.179099827600268e-06, "loss": 0.8995, "step": 21619 }, { "epoch": 0.7835888514370628, "grad_norm": 2.706259537540921, "learning_rate": 1.1787212833811551e-06, "loss": 0.8175, "step": 21620 }, { "epoch": 0.7836250951397195, "grad_norm": 2.5857663366053085, "learning_rate": 1.1783427918169498e-06, "loss": 0.8949, "step": 21621 }, { "epoch": 0.7836613388423761, "grad_norm": 2.425555664282602, "learning_rate": 1.177964352912871e-06, "loss": 0.9808, "step": 21622 }, { "epoch": 0.7836975825450329, "grad_norm": 2.7269398994828222, "learning_rate": 1.1775859666741297e-06, "loss": 1.1847, "step": 21623 }, { "epoch": 0.7837338262476895, "grad_norm": 2.4267714810029455, "learning_rate": 1.177207633105944e-06, "loss": 1.0836, "step": 21624 }, { "epoch": 0.7837700699503461, "grad_norm": 2.2410565659194246, "learning_rate": 1.1768293522135244e-06, "loss": 0.8913, "step": 21625 }, { "epoch": 0.7838063136530028, "grad_norm": 2.452183435028393, "learning_rate": 1.1764511240020854e-06, "loss": 0.7294, "step": 21626 }, { "epoch": 0.7838425573556594, "grad_norm": 2.3911399010750007, "learning_rate": 1.1760729484768363e-06, "loss": 1.0243, "step": 21627 }, { "epoch": 0.7838788010583161, "grad_norm": 2.421187845748047, "learning_rate": 1.175694825642989e-06, "loss": 0.8271, "step": 21628 }, { "epoch": 0.7839150447609727, "grad_norm": 2.456006265329228, "learning_rate": 1.1753167555057554e-06, "loss": 0.8907, "step": 21629 }, { "epoch": 0.7839512884636295, "grad_norm": 2.189972072341452, "learning_rate": 1.1749387380703426e-06, "loss": 0.7904, "step": 21630 }, { "epoch": 0.7839875321662861, "grad_norm": 2.3950866440794822, "learning_rate": 1.1745607733419606e-06, "loss": 0.8319, "step": 21631 }, { "epoch": 0.7840237758689428, "grad_norm": 2.752880714101718, "learning_rate": 1.1741828613258177e-06, "loss": 0.8767, "step": 21632 }, { "epoch": 0.7840600195715994, "grad_norm": 2.3315063574078514, "learning_rate": 1.173805002027123e-06, "loss": 0.9337, "step": 21633 }, { "epoch": 0.7840962632742561, "grad_norm": 2.494848742169623, "learning_rate": 1.1734271954510795e-06, "loss": 0.8585, "step": 21634 }, { "epoch": 0.7841325069769127, "grad_norm": 2.47813294024494, "learning_rate": 1.1730494416028949e-06, "loss": 0.8167, "step": 21635 }, { "epoch": 0.7841687506795694, "grad_norm": 2.324079611906047, "learning_rate": 1.1726717404877747e-06, "loss": 0.7755, "step": 21636 }, { "epoch": 0.7842049943822261, "grad_norm": 1.8153296435431905, "learning_rate": 1.1722940921109248e-06, "loss": 0.8556, "step": 21637 }, { "epoch": 0.7842412380848828, "grad_norm": 2.353873234722176, "learning_rate": 1.1719164964775458e-06, "loss": 0.8928, "step": 21638 }, { "epoch": 0.7842774817875394, "grad_norm": 2.402965325983248, "learning_rate": 1.1715389535928423e-06, "loss": 0.9854, "step": 21639 }, { "epoch": 0.7843137254901961, "grad_norm": 2.329933106738803, "learning_rate": 1.1711614634620183e-06, "loss": 0.9544, "step": 21640 }, { "epoch": 0.7843499691928527, "grad_norm": 2.6774768886595868, "learning_rate": 1.170784026090272e-06, "loss": 0.8204, "step": 21641 }, { "epoch": 0.7843862128955094, "grad_norm": 2.6096004422087598, "learning_rate": 1.1704066414828074e-06, "loss": 0.8673, "step": 21642 }, { "epoch": 0.7844224565981661, "grad_norm": 2.8499775848499684, "learning_rate": 1.1700293096448207e-06, "loss": 0.9661, "step": 21643 }, { "epoch": 0.7844587003008228, "grad_norm": 2.5060887552939657, "learning_rate": 1.1696520305815161e-06, "loss": 0.9726, "step": 21644 }, { "epoch": 0.7844949440034794, "grad_norm": 2.099672085403802, "learning_rate": 1.169274804298089e-06, "loss": 0.7789, "step": 21645 }, { "epoch": 0.7845311877061361, "grad_norm": 2.6355073848179513, "learning_rate": 1.1688976307997395e-06, "loss": 0.9267, "step": 21646 }, { "epoch": 0.7845674314087927, "grad_norm": 2.523175245166578, "learning_rate": 1.1685205100916625e-06, "loss": 0.8296, "step": 21647 }, { "epoch": 0.7846036751114494, "grad_norm": 2.3812820770097147, "learning_rate": 1.1681434421790555e-06, "loss": 0.9139, "step": 21648 }, { "epoch": 0.784639918814106, "grad_norm": 2.413889727842464, "learning_rate": 1.1677664270671163e-06, "loss": 0.903, "step": 21649 }, { "epoch": 0.7846761625167628, "grad_norm": 2.517668457090149, "learning_rate": 1.1673894647610362e-06, "loss": 0.7901, "step": 21650 }, { "epoch": 0.7847124062194194, "grad_norm": 2.368864368162511, "learning_rate": 1.1670125552660117e-06, "loss": 0.7256, "step": 21651 }, { "epoch": 0.784748649922076, "grad_norm": 2.2785129413724245, "learning_rate": 1.1666356985872357e-06, "loss": 0.7278, "step": 21652 }, { "epoch": 0.7847848936247327, "grad_norm": 2.227513237784517, "learning_rate": 1.166258894729903e-06, "loss": 0.8811, "step": 21653 }, { "epoch": 0.7848211373273893, "grad_norm": 2.4096614264694662, "learning_rate": 1.1658821436992012e-06, "loss": 1.091, "step": 21654 }, { "epoch": 0.784857381030046, "grad_norm": 2.2203895198254444, "learning_rate": 1.1655054455003278e-06, "loss": 0.8053, "step": 21655 }, { "epoch": 0.7848936247327027, "grad_norm": 2.3054544245600392, "learning_rate": 1.1651288001384687e-06, "loss": 0.8377, "step": 21656 }, { "epoch": 0.7849298684353594, "grad_norm": 2.30236137923276, "learning_rate": 1.1647522076188173e-06, "loss": 0.9399, "step": 21657 }, { "epoch": 0.784966112138016, "grad_norm": 2.484040536429952, "learning_rate": 1.164375667946559e-06, "loss": 0.7868, "step": 21658 }, { "epoch": 0.7850023558406727, "grad_norm": 2.2104161700269787, "learning_rate": 1.1639991811268848e-06, "loss": 0.9015, "step": 21659 }, { "epoch": 0.7850385995433293, "grad_norm": 2.313214272721825, "learning_rate": 1.163622747164983e-06, "loss": 0.7651, "step": 21660 }, { "epoch": 0.785074843245986, "grad_norm": 2.2594679344128394, "learning_rate": 1.163246366066038e-06, "loss": 0.8645, "step": 21661 }, { "epoch": 0.7851110869486426, "grad_norm": 2.336330440221906, "learning_rate": 1.1628700378352382e-06, "loss": 0.7381, "step": 21662 }, { "epoch": 0.7851473306512994, "grad_norm": 2.491351409525803, "learning_rate": 1.1624937624777682e-06, "loss": 0.8236, "step": 21663 }, { "epoch": 0.785183574353956, "grad_norm": 2.2544209043666745, "learning_rate": 1.162117539998815e-06, "loss": 0.8146, "step": 21664 }, { "epoch": 0.7852198180566127, "grad_norm": 2.1962447150920164, "learning_rate": 1.161741370403559e-06, "loss": 0.9289, "step": 21665 }, { "epoch": 0.7852560617592693, "grad_norm": 2.244356883229944, "learning_rate": 1.1613652536971865e-06, "loss": 0.9774, "step": 21666 }, { "epoch": 0.785292305461926, "grad_norm": 2.0977166222624506, "learning_rate": 1.1609891898848801e-06, "loss": 0.611, "step": 21667 }, { "epoch": 0.7853285491645826, "grad_norm": 2.776751668782929, "learning_rate": 1.16061317897182e-06, "loss": 0.9587, "step": 21668 }, { "epoch": 0.7853647928672394, "grad_norm": 2.745474150116244, "learning_rate": 1.160237220963189e-06, "loss": 0.9623, "step": 21669 }, { "epoch": 0.785401036569896, "grad_norm": 2.400213315097084, "learning_rate": 1.1598613158641647e-06, "loss": 0.8345, "step": 21670 }, { "epoch": 0.7854372802725527, "grad_norm": 2.4525514733919067, "learning_rate": 1.159485463679932e-06, "loss": 0.973, "step": 21671 }, { "epoch": 0.7854735239752093, "grad_norm": 2.417471318165013, "learning_rate": 1.1591096644156646e-06, "loss": 0.9716, "step": 21672 }, { "epoch": 0.785509767677866, "grad_norm": 2.107105061984736, "learning_rate": 1.158733918076545e-06, "loss": 0.8177, "step": 21673 }, { "epoch": 0.7855460113805226, "grad_norm": 2.4637290626012436, "learning_rate": 1.1583582246677466e-06, "loss": 0.8307, "step": 21674 }, { "epoch": 0.7855822550831792, "grad_norm": 2.262959882446957, "learning_rate": 1.157982584194451e-06, "loss": 0.9998, "step": 21675 }, { "epoch": 0.785618498785836, "grad_norm": 2.339894657356183, "learning_rate": 1.1576069966618297e-06, "loss": 1.1052, "step": 21676 }, { "epoch": 0.7856547424884927, "grad_norm": 2.4637226686547167, "learning_rate": 1.1572314620750625e-06, "loss": 1.108, "step": 21677 }, { "epoch": 0.7856909861911493, "grad_norm": 2.516924364993079, "learning_rate": 1.1568559804393199e-06, "loss": 0.9298, "step": 21678 }, { "epoch": 0.785727229893806, "grad_norm": 2.2873366485325097, "learning_rate": 1.1564805517597777e-06, "loss": 0.7818, "step": 21679 }, { "epoch": 0.7857634735964626, "grad_norm": 2.4272798744692023, "learning_rate": 1.1561051760416102e-06, "loss": 1.0026, "step": 21680 }, { "epoch": 0.7857997172991192, "grad_norm": 2.3408965145095553, "learning_rate": 1.155729853289988e-06, "loss": 0.9828, "step": 21681 }, { "epoch": 0.785835961001776, "grad_norm": 2.6657513226737306, "learning_rate": 1.1553545835100827e-06, "loss": 0.7413, "step": 21682 }, { "epoch": 0.7858722047044326, "grad_norm": 2.522275474245449, "learning_rate": 1.1549793667070663e-06, "loss": 0.8619, "step": 21683 }, { "epoch": 0.7859084484070893, "grad_norm": 2.4637762826484617, "learning_rate": 1.1546042028861105e-06, "loss": 0.8762, "step": 21684 }, { "epoch": 0.7859446921097459, "grad_norm": 2.5695055574391192, "learning_rate": 1.1542290920523813e-06, "loss": 0.8936, "step": 21685 }, { "epoch": 0.7859809358124026, "grad_norm": 2.304573438276198, "learning_rate": 1.15385403421105e-06, "loss": 0.786, "step": 21686 }, { "epoch": 0.7860171795150592, "grad_norm": 2.3299082060856455, "learning_rate": 1.1534790293672848e-06, "loss": 0.9537, "step": 21687 }, { "epoch": 0.7860534232177159, "grad_norm": 2.2901829795704542, "learning_rate": 1.1531040775262509e-06, "loss": 1.0504, "step": 21688 }, { "epoch": 0.7860896669203726, "grad_norm": 2.348336457400197, "learning_rate": 1.1527291786931183e-06, "loss": 0.8195, "step": 21689 }, { "epoch": 0.7861259106230293, "grad_norm": 2.79856416468873, "learning_rate": 1.1523543328730474e-06, "loss": 1.0853, "step": 21690 }, { "epoch": 0.7861621543256859, "grad_norm": 2.090767564730145, "learning_rate": 1.1519795400712097e-06, "loss": 0.6777, "step": 21691 }, { "epoch": 0.7861983980283426, "grad_norm": 2.473061288790417, "learning_rate": 1.1516048002927655e-06, "loss": 0.8494, "step": 21692 }, { "epoch": 0.7862346417309992, "grad_norm": 2.1361247597231783, "learning_rate": 1.1512301135428816e-06, "loss": 0.7094, "step": 21693 }, { "epoch": 0.7862708854336559, "grad_norm": 2.27309180877238, "learning_rate": 1.150855479826717e-06, "loss": 0.7033, "step": 21694 }, { "epoch": 0.7863071291363125, "grad_norm": 2.513223421001467, "learning_rate": 1.150480899149436e-06, "loss": 0.9077, "step": 21695 }, { "epoch": 0.7863433728389693, "grad_norm": 2.1871254776731472, "learning_rate": 1.150106371516202e-06, "loss": 0.8066, "step": 21696 }, { "epoch": 0.7863796165416259, "grad_norm": 2.545116477921009, "learning_rate": 1.1497318969321719e-06, "loss": 0.943, "step": 21697 }, { "epoch": 0.7864158602442826, "grad_norm": 2.5069850259050805, "learning_rate": 1.1493574754025077e-06, "loss": 0.9701, "step": 21698 }, { "epoch": 0.7864521039469392, "grad_norm": 2.3572413555679566, "learning_rate": 1.1489831069323692e-06, "loss": 0.8722, "step": 21699 }, { "epoch": 0.7864883476495959, "grad_norm": 2.3804633165813684, "learning_rate": 1.1486087915269156e-06, "loss": 0.691, "step": 21700 }, { "epoch": 0.7865245913522525, "grad_norm": 2.1568026683236377, "learning_rate": 1.1482345291913023e-06, "loss": 0.7988, "step": 21701 }, { "epoch": 0.7865608350549093, "grad_norm": 2.5397533697458603, "learning_rate": 1.1478603199306876e-06, "loss": 0.798, "step": 21702 }, { "epoch": 0.7865970787575659, "grad_norm": 2.3408358001991916, "learning_rate": 1.1474861637502278e-06, "loss": 0.8927, "step": 21703 }, { "epoch": 0.7866333224602226, "grad_norm": 2.1868230100495483, "learning_rate": 1.1471120606550806e-06, "loss": 0.9012, "step": 21704 }, { "epoch": 0.7866695661628792, "grad_norm": 2.5007967240873654, "learning_rate": 1.1467380106503972e-06, "loss": 1.0843, "step": 21705 }, { "epoch": 0.7867058098655358, "grad_norm": 2.580050284933183, "learning_rate": 1.146364013741334e-06, "loss": 0.8508, "step": 21706 }, { "epoch": 0.7867420535681925, "grad_norm": 2.3501611534721953, "learning_rate": 1.1459900699330451e-06, "loss": 0.9066, "step": 21707 }, { "epoch": 0.7867782972708491, "grad_norm": 2.393446000514528, "learning_rate": 1.1456161792306814e-06, "loss": 0.7028, "step": 21708 }, { "epoch": 0.7868145409735059, "grad_norm": 2.5498403064255215, "learning_rate": 1.1452423416393966e-06, "loss": 0.9874, "step": 21709 }, { "epoch": 0.7868507846761625, "grad_norm": 2.447232219604816, "learning_rate": 1.1448685571643382e-06, "loss": 0.8494, "step": 21710 }, { "epoch": 0.7868870283788192, "grad_norm": 2.4140119193657976, "learning_rate": 1.1444948258106626e-06, "loss": 0.9381, "step": 21711 }, { "epoch": 0.7869232720814758, "grad_norm": 2.3205887725505194, "learning_rate": 1.1441211475835152e-06, "loss": 0.8266, "step": 21712 }, { "epoch": 0.7869595157841325, "grad_norm": 2.2601690748185654, "learning_rate": 1.1437475224880479e-06, "loss": 0.7447, "step": 21713 }, { "epoch": 0.7869957594867891, "grad_norm": 2.45920117077467, "learning_rate": 1.1433739505294056e-06, "loss": 0.8753, "step": 21714 }, { "epoch": 0.7870320031894459, "grad_norm": 2.682770140975211, "learning_rate": 1.1430004317127385e-06, "loss": 0.9683, "step": 21715 }, { "epoch": 0.7870682468921025, "grad_norm": 2.3177896788481305, "learning_rate": 1.1426269660431939e-06, "loss": 0.8642, "step": 21716 }, { "epoch": 0.7871044905947592, "grad_norm": 2.545388239547486, "learning_rate": 1.1422535535259156e-06, "loss": 0.8213, "step": 21717 }, { "epoch": 0.7871407342974158, "grad_norm": 2.4945756665479393, "learning_rate": 1.1418801941660502e-06, "loss": 0.9495, "step": 21718 }, { "epoch": 0.7871769780000725, "grad_norm": 2.511793852380198, "learning_rate": 1.1415068879687424e-06, "loss": 0.7103, "step": 21719 }, { "epoch": 0.7872132217027291, "grad_norm": 2.313846769578863, "learning_rate": 1.1411336349391372e-06, "loss": 0.9245, "step": 21720 }, { "epoch": 0.7872494654053858, "grad_norm": 2.512682096273855, "learning_rate": 1.140760435082376e-06, "loss": 0.8606, "step": 21721 }, { "epoch": 0.7872857091080425, "grad_norm": 2.0560600296986933, "learning_rate": 1.1403872884036016e-06, "loss": 0.741, "step": 21722 }, { "epoch": 0.7873219528106992, "grad_norm": 2.3138004919452033, "learning_rate": 1.1400141949079557e-06, "loss": 0.7275, "step": 21723 }, { "epoch": 0.7873581965133558, "grad_norm": 2.2286973016833795, "learning_rate": 1.139641154600582e-06, "loss": 0.9002, "step": 21724 }, { "epoch": 0.7873944402160125, "grad_norm": 2.319386380065174, "learning_rate": 1.139268167486617e-06, "loss": 1.0016, "step": 21725 }, { "epoch": 0.7874306839186691, "grad_norm": 2.475237467266165, "learning_rate": 1.138895233571201e-06, "loss": 0.8794, "step": 21726 }, { "epoch": 0.7874669276213258, "grad_norm": 2.383680094251901, "learning_rate": 1.1385223528594758e-06, "loss": 0.7376, "step": 21727 }, { "epoch": 0.7875031713239825, "grad_norm": 2.186523318706007, "learning_rate": 1.1381495253565756e-06, "loss": 0.8399, "step": 21728 }, { "epoch": 0.7875394150266392, "grad_norm": 2.200023140839702, "learning_rate": 1.1377767510676402e-06, "loss": 0.7141, "step": 21729 }, { "epoch": 0.7875756587292958, "grad_norm": 2.3017273074686364, "learning_rate": 1.1374040299978033e-06, "loss": 0.8808, "step": 21730 }, { "epoch": 0.7876119024319524, "grad_norm": 2.3523800832351482, "learning_rate": 1.1370313621522056e-06, "loss": 0.8697, "step": 21731 }, { "epoch": 0.7876481461346091, "grad_norm": 2.215401670549135, "learning_rate": 1.1366587475359775e-06, "loss": 0.8812, "step": 21732 }, { "epoch": 0.7876843898372657, "grad_norm": 2.3485564947285607, "learning_rate": 1.1362861861542573e-06, "loss": 1.1227, "step": 21733 }, { "epoch": 0.7877206335399224, "grad_norm": 2.4697662344919076, "learning_rate": 1.1359136780121755e-06, "loss": 0.8989, "step": 21734 }, { "epoch": 0.7877568772425791, "grad_norm": 2.1329180708674524, "learning_rate": 1.1355412231148654e-06, "loss": 0.8389, "step": 21735 }, { "epoch": 0.7877931209452358, "grad_norm": 2.2792895569200256, "learning_rate": 1.1351688214674623e-06, "loss": 0.7681, "step": 21736 }, { "epoch": 0.7878293646478924, "grad_norm": 2.304827627502202, "learning_rate": 1.1347964730750926e-06, "loss": 0.8123, "step": 21737 }, { "epoch": 0.7878656083505491, "grad_norm": 2.4044241062777365, "learning_rate": 1.1344241779428928e-06, "loss": 0.9163, "step": 21738 }, { "epoch": 0.7879018520532057, "grad_norm": 2.3418183479723695, "learning_rate": 1.1340519360759884e-06, "loss": 0.9357, "step": 21739 }, { "epoch": 0.7879380957558624, "grad_norm": 2.4912301780882795, "learning_rate": 1.1336797474795114e-06, "loss": 0.8427, "step": 21740 }, { "epoch": 0.7879743394585191, "grad_norm": 2.3548374231093177, "learning_rate": 1.1333076121585867e-06, "loss": 0.985, "step": 21741 }, { "epoch": 0.7880105831611758, "grad_norm": 2.4324973693735927, "learning_rate": 1.132935530118347e-06, "loss": 0.968, "step": 21742 }, { "epoch": 0.7880468268638324, "grad_norm": 2.2483754397350295, "learning_rate": 1.1325635013639163e-06, "loss": 0.8944, "step": 21743 }, { "epoch": 0.7880830705664891, "grad_norm": 2.3074799769508614, "learning_rate": 1.1321915259004223e-06, "loss": 0.9527, "step": 21744 }, { "epoch": 0.7881193142691457, "grad_norm": 2.3507508998371507, "learning_rate": 1.1318196037329886e-06, "loss": 0.812, "step": 21745 }, { "epoch": 0.7881555579718024, "grad_norm": 2.3150970452399817, "learning_rate": 1.131447734866742e-06, "loss": 0.6709, "step": 21746 }, { "epoch": 0.788191801674459, "grad_norm": 1.9582580908778204, "learning_rate": 1.1310759193068066e-06, "loss": 0.7021, "step": 21747 }, { "epoch": 0.7882280453771158, "grad_norm": 2.3847584752051993, "learning_rate": 1.1307041570583044e-06, "loss": 0.8684, "step": 21748 }, { "epoch": 0.7882642890797724, "grad_norm": 2.305342585524045, "learning_rate": 1.1303324481263584e-06, "loss": 0.9229, "step": 21749 }, { "epoch": 0.7883005327824291, "grad_norm": 2.3154320159274846, "learning_rate": 1.1299607925160915e-06, "loss": 0.9109, "step": 21750 }, { "epoch": 0.7883367764850857, "grad_norm": 2.2265663405862806, "learning_rate": 1.129589190232625e-06, "loss": 0.7857, "step": 21751 }, { "epoch": 0.7883730201877424, "grad_norm": 2.3635116825489675, "learning_rate": 1.129217641281078e-06, "loss": 0.8528, "step": 21752 }, { "epoch": 0.788409263890399, "grad_norm": 2.5372321799445774, "learning_rate": 1.1288461456665706e-06, "loss": 0.893, "step": 21753 }, { "epoch": 0.7884455075930558, "grad_norm": 2.2137550130782966, "learning_rate": 1.1284747033942234e-06, "loss": 0.7899, "step": 21754 }, { "epoch": 0.7884817512957124, "grad_norm": 2.165178307362705, "learning_rate": 1.1281033144691523e-06, "loss": 0.8884, "step": 21755 }, { "epoch": 0.788517994998369, "grad_norm": 1.9983022110289053, "learning_rate": 1.127731978896477e-06, "loss": 0.9065, "step": 21756 }, { "epoch": 0.7885542387010257, "grad_norm": 2.4595193104867987, "learning_rate": 1.1273606966813105e-06, "loss": 0.9598, "step": 21757 }, { "epoch": 0.7885904824036823, "grad_norm": 2.4859097751845507, "learning_rate": 1.126989467828774e-06, "loss": 0.823, "step": 21758 }, { "epoch": 0.788626726106339, "grad_norm": 2.5781991453641293, "learning_rate": 1.1266182923439795e-06, "loss": 0.9394, "step": 21759 }, { "epoch": 0.7886629698089956, "grad_norm": 2.3294520293231034, "learning_rate": 1.1262471702320433e-06, "loss": 0.9753, "step": 21760 }, { "epoch": 0.7886992135116524, "grad_norm": 2.513801179599739, "learning_rate": 1.1258761014980756e-06, "loss": 0.8674, "step": 21761 }, { "epoch": 0.788735457214309, "grad_norm": 2.2689682274030596, "learning_rate": 1.1255050861471956e-06, "loss": 0.7875, "step": 21762 }, { "epoch": 0.7887717009169657, "grad_norm": 2.363802163507059, "learning_rate": 1.1251341241845098e-06, "loss": 0.9251, "step": 21763 }, { "epoch": 0.7888079446196223, "grad_norm": 2.539149274518648, "learning_rate": 1.1247632156151345e-06, "loss": 0.7629, "step": 21764 }, { "epoch": 0.788844188322279, "grad_norm": 2.3351651551168087, "learning_rate": 1.1243923604441769e-06, "loss": 0.923, "step": 21765 }, { "epoch": 0.7888804320249356, "grad_norm": 2.3663955073219913, "learning_rate": 1.1240215586767488e-06, "loss": 0.7615, "step": 21766 }, { "epoch": 0.7889166757275923, "grad_norm": 2.576357753157065, "learning_rate": 1.1236508103179605e-06, "loss": 0.876, "step": 21767 }, { "epoch": 0.788952919430249, "grad_norm": 2.3338025146306838, "learning_rate": 1.1232801153729184e-06, "loss": 0.8418, "step": 21768 }, { "epoch": 0.7889891631329057, "grad_norm": 2.414876533769742, "learning_rate": 1.1229094738467323e-06, "loss": 0.9983, "step": 21769 }, { "epoch": 0.7890254068355623, "grad_norm": 2.379917234599222, "learning_rate": 1.122538885744508e-06, "loss": 0.9229, "step": 21770 }, { "epoch": 0.789061650538219, "grad_norm": 2.5558276931084793, "learning_rate": 1.122168351071355e-06, "loss": 1.0994, "step": 21771 }, { "epoch": 0.7890978942408756, "grad_norm": 2.2442831865889152, "learning_rate": 1.121797869832375e-06, "loss": 0.9231, "step": 21772 }, { "epoch": 0.7891341379435323, "grad_norm": 2.2897872711796143, "learning_rate": 1.121427442032675e-06, "loss": 0.9331, "step": 21773 }, { "epoch": 0.789170381646189, "grad_norm": 2.32308371535749, "learning_rate": 1.1210570676773608e-06, "loss": 0.798, "step": 21774 }, { "epoch": 0.7892066253488457, "grad_norm": 2.4557379863213904, "learning_rate": 1.1206867467715326e-06, "loss": 0.7104, "step": 21775 }, { "epoch": 0.7892428690515023, "grad_norm": 1.9088229877713039, "learning_rate": 1.1203164793202965e-06, "loss": 0.9158, "step": 21776 }, { "epoch": 0.789279112754159, "grad_norm": 2.47782464624804, "learning_rate": 1.11994626532875e-06, "loss": 0.9652, "step": 21777 }, { "epoch": 0.7893153564568156, "grad_norm": 2.244822962902794, "learning_rate": 1.1195761048020004e-06, "loss": 0.9279, "step": 21778 }, { "epoch": 0.7893516001594723, "grad_norm": 2.455565631317475, "learning_rate": 1.1192059977451436e-06, "loss": 1.0132, "step": 21779 }, { "epoch": 0.7893878438621289, "grad_norm": 2.4774329985052357, "learning_rate": 1.1188359441632823e-06, "loss": 0.9192, "step": 21780 }, { "epoch": 0.7894240875647857, "grad_norm": 2.4161800185699533, "learning_rate": 1.1184659440615137e-06, "loss": 1.0441, "step": 21781 }, { "epoch": 0.7894603312674423, "grad_norm": 2.6269708467032005, "learning_rate": 1.1180959974449368e-06, "loss": 0.8562, "step": 21782 }, { "epoch": 0.789496574970099, "grad_norm": 2.3571420983952924, "learning_rate": 1.1177261043186506e-06, "loss": 1.0088, "step": 21783 }, { "epoch": 0.7895328186727556, "grad_norm": 2.2809258247361113, "learning_rate": 1.1173562646877489e-06, "loss": 0.6833, "step": 21784 }, { "epoch": 0.7895690623754122, "grad_norm": 2.373909061032218, "learning_rate": 1.1169864785573302e-06, "loss": 0.8913, "step": 21785 }, { "epoch": 0.7896053060780689, "grad_norm": 2.4754790591043605, "learning_rate": 1.1166167459324896e-06, "loss": 0.8159, "step": 21786 }, { "epoch": 0.7896415497807256, "grad_norm": 2.1908429820529376, "learning_rate": 1.1162470668183228e-06, "loss": 0.8395, "step": 21787 }, { "epoch": 0.7896777934833823, "grad_norm": 2.5219990369193557, "learning_rate": 1.1158774412199209e-06, "loss": 0.8273, "step": 21788 }, { "epoch": 0.7897140371860389, "grad_norm": 2.2711600705111206, "learning_rate": 1.1155078691423793e-06, "loss": 0.7662, "step": 21789 }, { "epoch": 0.7897502808886956, "grad_norm": 2.8730417220961004, "learning_rate": 1.1151383505907897e-06, "loss": 1.0075, "step": 21790 }, { "epoch": 0.7897865245913522, "grad_norm": 2.5093580067356345, "learning_rate": 1.114768885570246e-06, "loss": 0.7768, "step": 21791 }, { "epoch": 0.7898227682940089, "grad_norm": 2.434898728449689, "learning_rate": 1.1143994740858354e-06, "loss": 0.9006, "step": 21792 }, { "epoch": 0.7898590119966655, "grad_norm": 2.0842764759395553, "learning_rate": 1.1140301161426503e-06, "loss": 0.9277, "step": 21793 }, { "epoch": 0.7898952556993223, "grad_norm": 1.9420496874544422, "learning_rate": 1.1136608117457814e-06, "loss": 0.8386, "step": 21794 }, { "epoch": 0.7899314994019789, "grad_norm": 2.5840661199041786, "learning_rate": 1.1132915609003148e-06, "loss": 0.7565, "step": 21795 }, { "epoch": 0.7899677431046356, "grad_norm": 2.341386567224309, "learning_rate": 1.1129223636113412e-06, "loss": 0.978, "step": 21796 }, { "epoch": 0.7900039868072922, "grad_norm": 2.2347981552482237, "learning_rate": 1.1125532198839439e-06, "loss": 0.705, "step": 21797 }, { "epoch": 0.7900402305099489, "grad_norm": 2.46919708924559, "learning_rate": 1.112184129723215e-06, "loss": 0.8042, "step": 21798 }, { "epoch": 0.7900764742126055, "grad_norm": 2.381951707229871, "learning_rate": 1.1118150931342364e-06, "loss": 0.8457, "step": 21799 }, { "epoch": 0.7901127179152623, "grad_norm": 2.6136599231288606, "learning_rate": 1.1114461101220952e-06, "loss": 0.9789, "step": 21800 }, { "epoch": 0.7901489616179189, "grad_norm": 2.5513472100687844, "learning_rate": 1.1110771806918735e-06, "loss": 0.9601, "step": 21801 }, { "epoch": 0.7901852053205756, "grad_norm": 2.8086034895377394, "learning_rate": 1.1107083048486566e-06, "loss": 0.9447, "step": 21802 }, { "epoch": 0.7902214490232322, "grad_norm": 2.223290391937633, "learning_rate": 1.1103394825975282e-06, "loss": 0.9324, "step": 21803 }, { "epoch": 0.7902576927258889, "grad_norm": 2.591060191444106, "learning_rate": 1.1099707139435684e-06, "loss": 0.8767, "step": 21804 }, { "epoch": 0.7902939364285455, "grad_norm": 2.549731313097581, "learning_rate": 1.1096019988918594e-06, "loss": 0.8093, "step": 21805 }, { "epoch": 0.7903301801312022, "grad_norm": 2.280242620561989, "learning_rate": 1.1092333374474818e-06, "loss": 0.6961, "step": 21806 }, { "epoch": 0.7903664238338589, "grad_norm": 2.3806923262371456, "learning_rate": 1.1088647296155174e-06, "loss": 1.0295, "step": 21807 }, { "epoch": 0.7904026675365156, "grad_norm": 2.4694475324761918, "learning_rate": 1.108496175401042e-06, "loss": 0.9063, "step": 21808 }, { "epoch": 0.7904389112391722, "grad_norm": 2.3213088805407196, "learning_rate": 1.1081276748091357e-06, "loss": 0.8447, "step": 21809 }, { "epoch": 0.7904751549418289, "grad_norm": 2.2115128002108317, "learning_rate": 1.1077592278448768e-06, "loss": 0.6639, "step": 21810 }, { "epoch": 0.7905113986444855, "grad_norm": 2.4519460193942657, "learning_rate": 1.1073908345133432e-06, "loss": 0.8365, "step": 21811 }, { "epoch": 0.7905476423471421, "grad_norm": 2.674983929165846, "learning_rate": 1.107022494819608e-06, "loss": 0.9554, "step": 21812 }, { "epoch": 0.7905838860497989, "grad_norm": 2.5995452755282966, "learning_rate": 1.1066542087687487e-06, "loss": 0.985, "step": 21813 }, { "epoch": 0.7906201297524555, "grad_norm": 2.6846251811225295, "learning_rate": 1.1062859763658406e-06, "loss": 0.8816, "step": 21814 }, { "epoch": 0.7906563734551122, "grad_norm": 2.249722931252701, "learning_rate": 1.1059177976159564e-06, "loss": 0.9207, "step": 21815 }, { "epoch": 0.7906926171577688, "grad_norm": 2.240328373488706, "learning_rate": 1.1055496725241694e-06, "loss": 0.8711, "step": 21816 }, { "epoch": 0.7907288608604255, "grad_norm": 2.1748443362276855, "learning_rate": 1.1051816010955524e-06, "loss": 0.8311, "step": 21817 }, { "epoch": 0.7907651045630821, "grad_norm": 2.345824347904702, "learning_rate": 1.1048135833351792e-06, "loss": 0.8416, "step": 21818 }, { "epoch": 0.7908013482657388, "grad_norm": 2.3552194946899623, "learning_rate": 1.1044456192481173e-06, "loss": 0.9301, "step": 21819 }, { "epoch": 0.7908375919683955, "grad_norm": 2.270748943481116, "learning_rate": 1.1040777088394388e-06, "loss": 0.8665, "step": 21820 }, { "epoch": 0.7908738356710522, "grad_norm": 2.4004970373185173, "learning_rate": 1.1037098521142153e-06, "loss": 0.8173, "step": 21821 }, { "epoch": 0.7909100793737088, "grad_norm": 2.6465449219473203, "learning_rate": 1.1033420490775116e-06, "loss": 0.8124, "step": 21822 }, { "epoch": 0.7909463230763655, "grad_norm": 2.529168405133114, "learning_rate": 1.1029742997343995e-06, "loss": 0.9076, "step": 21823 }, { "epoch": 0.7909825667790221, "grad_norm": 2.5135806697833756, "learning_rate": 1.1026066040899418e-06, "loss": 1.0447, "step": 21824 }, { "epoch": 0.7910188104816788, "grad_norm": 2.2161872947879115, "learning_rate": 1.1022389621492113e-06, "loss": 0.9495, "step": 21825 }, { "epoch": 0.7910550541843355, "grad_norm": 2.0247741943976054, "learning_rate": 1.1018713739172687e-06, "loss": 0.852, "step": 21826 }, { "epoch": 0.7910912978869922, "grad_norm": 2.3491124494342928, "learning_rate": 1.1015038393991827e-06, "loss": 0.8107, "step": 21827 }, { "epoch": 0.7911275415896488, "grad_norm": 2.2818592398041133, "learning_rate": 1.1011363586000135e-06, "loss": 0.8127, "step": 21828 }, { "epoch": 0.7911637852923055, "grad_norm": 2.2508324755665363, "learning_rate": 1.1007689315248294e-06, "loss": 0.9242, "step": 21829 }, { "epoch": 0.7912000289949621, "grad_norm": 2.3906655351231714, "learning_rate": 1.1004015581786903e-06, "loss": 0.938, "step": 21830 }, { "epoch": 0.7912362726976188, "grad_norm": 2.223417576701598, "learning_rate": 1.1000342385666608e-06, "loss": 0.9196, "step": 21831 }, { "epoch": 0.7912725164002754, "grad_norm": 2.023590428084261, "learning_rate": 1.0996669726937993e-06, "loss": 0.7827, "step": 21832 }, { "epoch": 0.7913087601029322, "grad_norm": 2.358773271339425, "learning_rate": 1.0992997605651679e-06, "loss": 0.9881, "step": 21833 }, { "epoch": 0.7913450038055888, "grad_norm": 2.1058292782802437, "learning_rate": 1.0989326021858283e-06, "loss": 0.8665, "step": 21834 }, { "epoch": 0.7913812475082455, "grad_norm": 2.5751107176032377, "learning_rate": 1.0985654975608367e-06, "loss": 0.9903, "step": 21835 }, { "epoch": 0.7914174912109021, "grad_norm": 2.449841771726953, "learning_rate": 1.0981984466952533e-06, "loss": 0.9345, "step": 21836 }, { "epoch": 0.7914537349135587, "grad_norm": 2.0317804132277435, "learning_rate": 1.0978314495941355e-06, "loss": 0.7161, "step": 21837 }, { "epoch": 0.7914899786162154, "grad_norm": 2.301679074049399, "learning_rate": 1.0974645062625415e-06, "loss": 0.9085, "step": 21838 }, { "epoch": 0.791526222318872, "grad_norm": 2.3907188248285314, "learning_rate": 1.0970976167055254e-06, "loss": 0.9352, "step": 21839 }, { "epoch": 0.7915624660215288, "grad_norm": 2.3090791882860375, "learning_rate": 1.0967307809281436e-06, "loss": 0.5991, "step": 21840 }, { "epoch": 0.7915987097241854, "grad_norm": 2.314238278337512, "learning_rate": 1.0963639989354524e-06, "loss": 0.8764, "step": 21841 }, { "epoch": 0.7916349534268421, "grad_norm": 2.675617918318197, "learning_rate": 1.0959972707325029e-06, "loss": 0.9506, "step": 21842 }, { "epoch": 0.7916711971294987, "grad_norm": 2.4430556442562543, "learning_rate": 1.0956305963243513e-06, "loss": 0.923, "step": 21843 }, { "epoch": 0.7917074408321554, "grad_norm": 2.6878731145007833, "learning_rate": 1.095263975716046e-06, "loss": 0.9107, "step": 21844 }, { "epoch": 0.791743684534812, "grad_norm": 2.5107608439705036, "learning_rate": 1.0948974089126446e-06, "loss": 1.0442, "step": 21845 }, { "epoch": 0.7917799282374688, "grad_norm": 1.9926401081429008, "learning_rate": 1.0945308959191936e-06, "loss": 0.6957, "step": 21846 }, { "epoch": 0.7918161719401254, "grad_norm": 2.532304108239797, "learning_rate": 1.0941644367407466e-06, "loss": 0.9063, "step": 21847 }, { "epoch": 0.7918524156427821, "grad_norm": 2.3954477117078063, "learning_rate": 1.0937980313823487e-06, "loss": 0.83, "step": 21848 }, { "epoch": 0.7918886593454387, "grad_norm": 2.084252209577128, "learning_rate": 1.093431679849054e-06, "loss": 0.8513, "step": 21849 }, { "epoch": 0.7919249030480954, "grad_norm": 2.357241386789795, "learning_rate": 1.0930653821459069e-06, "loss": 0.9619, "step": 21850 }, { "epoch": 0.791961146750752, "grad_norm": 2.1495031457706437, "learning_rate": 1.0926991382779572e-06, "loss": 0.8087, "step": 21851 }, { "epoch": 0.7919973904534087, "grad_norm": 2.2908851913042256, "learning_rate": 1.0923329482502493e-06, "loss": 0.8771, "step": 21852 }, { "epoch": 0.7920336341560654, "grad_norm": 2.3294646839889492, "learning_rate": 1.0919668120678295e-06, "loss": 0.8953, "step": 21853 }, { "epoch": 0.7920698778587221, "grad_norm": 2.4185357674134624, "learning_rate": 1.0916007297357456e-06, "loss": 0.9689, "step": 21854 }, { "epoch": 0.7921061215613787, "grad_norm": 2.4482693136983764, "learning_rate": 1.091234701259038e-06, "loss": 0.8694, "step": 21855 }, { "epoch": 0.7921423652640354, "grad_norm": 2.35212600818968, "learning_rate": 1.0908687266427526e-06, "loss": 0.9059, "step": 21856 }, { "epoch": 0.792178608966692, "grad_norm": 2.1566418476642113, "learning_rate": 1.090502805891932e-06, "loss": 0.9043, "step": 21857 }, { "epoch": 0.7922148526693487, "grad_norm": 2.541963300444578, "learning_rate": 1.0901369390116196e-06, "loss": 0.7161, "step": 21858 }, { "epoch": 0.7922510963720054, "grad_norm": 2.2801356584806927, "learning_rate": 1.0897711260068544e-06, "loss": 0.7791, "step": 21859 }, { "epoch": 0.7922873400746621, "grad_norm": 2.772355643752242, "learning_rate": 1.0894053668826777e-06, "loss": 0.7735, "step": 21860 }, { "epoch": 0.7923235837773187, "grad_norm": 2.461945021162904, "learning_rate": 1.0890396616441318e-06, "loss": 0.8277, "step": 21861 }, { "epoch": 0.7923598274799754, "grad_norm": 2.3562615812824736, "learning_rate": 1.088674010296253e-06, "loss": 0.7958, "step": 21862 }, { "epoch": 0.792396071182632, "grad_norm": 2.424574954141798, "learning_rate": 1.0883084128440813e-06, "loss": 0.9546, "step": 21863 }, { "epoch": 0.7924323148852886, "grad_norm": 2.6043561973863247, "learning_rate": 1.087942869292652e-06, "loss": 1.084, "step": 21864 }, { "epoch": 0.7924685585879453, "grad_norm": 2.443615006236644, "learning_rate": 1.0875773796470063e-06, "loss": 0.7799, "step": 21865 }, { "epoch": 0.792504802290602, "grad_norm": 2.233001353369936, "learning_rate": 1.087211943912177e-06, "loss": 0.9122, "step": 21866 }, { "epoch": 0.7925410459932587, "grad_norm": 2.1761357635091327, "learning_rate": 1.0868465620932023e-06, "loss": 0.8902, "step": 21867 }, { "epoch": 0.7925772896959153, "grad_norm": 2.375268515504323, "learning_rate": 1.0864812341951136e-06, "loss": 0.8901, "step": 21868 }, { "epoch": 0.792613533398572, "grad_norm": 2.21020913131702, "learning_rate": 1.086115960222947e-06, "loss": 0.7629, "step": 21869 }, { "epoch": 0.7926497771012286, "grad_norm": 1.9812218900951863, "learning_rate": 1.085750740181737e-06, "loss": 0.7971, "step": 21870 }, { "epoch": 0.7926860208038853, "grad_norm": 2.0510139501710962, "learning_rate": 1.085385574076513e-06, "loss": 0.9693, "step": 21871 }, { "epoch": 0.792722264506542, "grad_norm": 2.1190478170036626, "learning_rate": 1.0850204619123078e-06, "loss": 0.8815, "step": 21872 }, { "epoch": 0.7927585082091987, "grad_norm": 2.428046673172093, "learning_rate": 1.0846554036941531e-06, "loss": 0.7601, "step": 21873 }, { "epoch": 0.7927947519118553, "grad_norm": 2.184268491058294, "learning_rate": 1.084290399427081e-06, "loss": 0.8055, "step": 21874 }, { "epoch": 0.792830995614512, "grad_norm": 2.3880687292689586, "learning_rate": 1.0839254491161172e-06, "loss": 0.8941, "step": 21875 }, { "epoch": 0.7928672393171686, "grad_norm": 2.7315569458799023, "learning_rate": 1.0835605527662924e-06, "loss": 0.9355, "step": 21876 }, { "epoch": 0.7929034830198253, "grad_norm": 2.492753209114381, "learning_rate": 1.0831957103826346e-06, "loss": 1.0209, "step": 21877 }, { "epoch": 0.7929397267224819, "grad_norm": 2.3884025275487333, "learning_rate": 1.0828309219701721e-06, "loss": 0.7875, "step": 21878 }, { "epoch": 0.7929759704251387, "grad_norm": 2.406907952712698, "learning_rate": 1.0824661875339298e-06, "loss": 0.8112, "step": 21879 }, { "epoch": 0.7930122141277953, "grad_norm": 2.534748360922445, "learning_rate": 1.0821015070789338e-06, "loss": 0.8701, "step": 21880 }, { "epoch": 0.793048457830452, "grad_norm": 2.4116574121162495, "learning_rate": 1.0817368806102107e-06, "loss": 0.8122, "step": 21881 }, { "epoch": 0.7930847015331086, "grad_norm": 2.502650036706591, "learning_rate": 1.0813723081327826e-06, "loss": 0.998, "step": 21882 }, { "epoch": 0.7931209452357653, "grad_norm": 2.4935392397041616, "learning_rate": 1.0810077896516757e-06, "loss": 0.915, "step": 21883 }, { "epoch": 0.7931571889384219, "grad_norm": 2.2096787292758426, "learning_rate": 1.0806433251719083e-06, "loss": 0.7198, "step": 21884 }, { "epoch": 0.7931934326410787, "grad_norm": 2.2749380814477584, "learning_rate": 1.0802789146985087e-06, "loss": 0.8793, "step": 21885 }, { "epoch": 0.7932296763437353, "grad_norm": 2.1797140697282686, "learning_rate": 1.0799145582364934e-06, "loss": 0.7252, "step": 21886 }, { "epoch": 0.793265920046392, "grad_norm": 2.4116774575403497, "learning_rate": 1.079550255790886e-06, "loss": 0.8873, "step": 21887 }, { "epoch": 0.7933021637490486, "grad_norm": 2.2015187540262207, "learning_rate": 1.0791860073667038e-06, "loss": 0.9503, "step": 21888 }, { "epoch": 0.7933384074517053, "grad_norm": 2.28084348466005, "learning_rate": 1.0788218129689677e-06, "loss": 0.8716, "step": 21889 }, { "epoch": 0.7933746511543619, "grad_norm": 2.1806620195276936, "learning_rate": 1.0784576726026962e-06, "loss": 0.9598, "step": 21890 }, { "epoch": 0.7934108948570185, "grad_norm": 2.6212127746828133, "learning_rate": 1.0780935862729048e-06, "loss": 0.8437, "step": 21891 }, { "epoch": 0.7934471385596753, "grad_norm": 2.2525336396214986, "learning_rate": 1.0777295539846123e-06, "loss": 0.9352, "step": 21892 }, { "epoch": 0.793483382262332, "grad_norm": 2.3255939102461745, "learning_rate": 1.0773655757428348e-06, "loss": 0.8901, "step": 21893 }, { "epoch": 0.7935196259649886, "grad_norm": 2.472081320176244, "learning_rate": 1.0770016515525878e-06, "loss": 0.8541, "step": 21894 }, { "epoch": 0.7935558696676452, "grad_norm": 2.538508262481638, "learning_rate": 1.076637781418885e-06, "loss": 0.9738, "step": 21895 }, { "epoch": 0.7935921133703019, "grad_norm": 2.2467830497480317, "learning_rate": 1.0762739653467402e-06, "loss": 0.9546, "step": 21896 }, { "epoch": 0.7936283570729585, "grad_norm": 2.4477395478075388, "learning_rate": 1.0759102033411673e-06, "loss": 0.9499, "step": 21897 }, { "epoch": 0.7936646007756153, "grad_norm": 2.3150259011105625, "learning_rate": 1.0755464954071799e-06, "loss": 0.8117, "step": 21898 }, { "epoch": 0.7937008444782719, "grad_norm": 2.25951832730116, "learning_rate": 1.0751828415497873e-06, "loss": 0.7775, "step": 21899 }, { "epoch": 0.7937370881809286, "grad_norm": 2.685281639723053, "learning_rate": 1.074819241774001e-06, "loss": 0.8425, "step": 21900 }, { "epoch": 0.7937733318835852, "grad_norm": 2.5086666414158345, "learning_rate": 1.0744556960848334e-06, "loss": 0.8921, "step": 21901 }, { "epoch": 0.7938095755862419, "grad_norm": 2.281243118784406, "learning_rate": 1.074092204487291e-06, "loss": 0.8843, "step": 21902 }, { "epoch": 0.7938458192888985, "grad_norm": 2.2809678955134487, "learning_rate": 1.0737287669863834e-06, "loss": 0.9038, "step": 21903 }, { "epoch": 0.7938820629915552, "grad_norm": 2.512604900746707, "learning_rate": 1.0733653835871188e-06, "loss": 0.8337, "step": 21904 }, { "epoch": 0.7939183066942119, "grad_norm": 2.2811816392542177, "learning_rate": 1.0730020542945063e-06, "loss": 0.8294, "step": 21905 }, { "epoch": 0.7939545503968686, "grad_norm": 2.420780673871922, "learning_rate": 1.0726387791135484e-06, "loss": 0.7666, "step": 21906 }, { "epoch": 0.7939907940995252, "grad_norm": 2.3362509056049676, "learning_rate": 1.0722755580492532e-06, "loss": 0.8638, "step": 21907 }, { "epoch": 0.7940270378021819, "grad_norm": 2.3016383437625834, "learning_rate": 1.071912391106627e-06, "loss": 0.8454, "step": 21908 }, { "epoch": 0.7940632815048385, "grad_norm": 2.3317209762072064, "learning_rate": 1.071549278290671e-06, "loss": 0.8454, "step": 21909 }, { "epoch": 0.7940995252074952, "grad_norm": 2.043078317610143, "learning_rate": 1.0711862196063916e-06, "loss": 0.8069, "step": 21910 }, { "epoch": 0.7941357689101518, "grad_norm": 2.4024097664899284, "learning_rate": 1.070823215058787e-06, "loss": 0.7777, "step": 21911 }, { "epoch": 0.7941720126128086, "grad_norm": 2.3041504493150735, "learning_rate": 1.0704602646528655e-06, "loss": 0.8083, "step": 21912 }, { "epoch": 0.7942082563154652, "grad_norm": 2.4171391166328084, "learning_rate": 1.0700973683936233e-06, "loss": 0.9321, "step": 21913 }, { "epoch": 0.7942445000181219, "grad_norm": 2.314534601303484, "learning_rate": 1.0697345262860638e-06, "loss": 0.8708, "step": 21914 }, { "epoch": 0.7942807437207785, "grad_norm": 2.3894022630756035, "learning_rate": 1.069371738335183e-06, "loss": 0.9086, "step": 21915 }, { "epoch": 0.7943169874234352, "grad_norm": 2.2096627684568095, "learning_rate": 1.069009004545985e-06, "loss": 0.794, "step": 21916 }, { "epoch": 0.7943532311260918, "grad_norm": 2.621193499679895, "learning_rate": 1.0686463249234647e-06, "loss": 0.8184, "step": 21917 }, { "epoch": 0.7943894748287486, "grad_norm": 2.4435943615570013, "learning_rate": 1.0682836994726214e-06, "loss": 0.8242, "step": 21918 }, { "epoch": 0.7944257185314052, "grad_norm": 2.715843996877589, "learning_rate": 1.0679211281984492e-06, "loss": 0.8847, "step": 21919 }, { "epoch": 0.7944619622340618, "grad_norm": 2.5483819722875927, "learning_rate": 1.0675586111059465e-06, "loss": 0.7747, "step": 21920 }, { "epoch": 0.7944982059367185, "grad_norm": 2.22137807818212, "learning_rate": 1.0671961482001087e-06, "loss": 0.907, "step": 21921 }, { "epoch": 0.7945344496393751, "grad_norm": 2.557663761722861, "learning_rate": 1.0668337394859286e-06, "loss": 1.1253, "step": 21922 }, { "epoch": 0.7945706933420318, "grad_norm": 2.341618512989112, "learning_rate": 1.0664713849684005e-06, "loss": 0.8318, "step": 21923 }, { "epoch": 0.7946069370446884, "grad_norm": 2.6153619919418953, "learning_rate": 1.066109084652518e-06, "loss": 0.9422, "step": 21924 }, { "epoch": 0.7946431807473452, "grad_norm": 2.1487623781442795, "learning_rate": 1.0657468385432745e-06, "loss": 0.7815, "step": 21925 }, { "epoch": 0.7946794244500018, "grad_norm": 2.3615943399582187, "learning_rate": 1.0653846466456585e-06, "loss": 0.7959, "step": 21926 }, { "epoch": 0.7947156681526585, "grad_norm": 2.436084021649687, "learning_rate": 1.0650225089646633e-06, "loss": 1.1056, "step": 21927 }, { "epoch": 0.7947519118553151, "grad_norm": 2.384846570159397, "learning_rate": 1.064660425505279e-06, "loss": 0.8139, "step": 21928 }, { "epoch": 0.7947881555579718, "grad_norm": 2.413271155875415, "learning_rate": 1.0642983962724922e-06, "loss": 0.8688, "step": 21929 }, { "epoch": 0.7948243992606284, "grad_norm": 2.227135818284934, "learning_rate": 1.0639364212712955e-06, "loss": 0.8608, "step": 21930 }, { "epoch": 0.7948606429632852, "grad_norm": 2.3936990947828147, "learning_rate": 1.063574500506671e-06, "loss": 0.8324, "step": 21931 }, { "epoch": 0.7948968866659418, "grad_norm": 2.5551310391665814, "learning_rate": 1.0632126339836124e-06, "loss": 0.7885, "step": 21932 }, { "epoch": 0.7949331303685985, "grad_norm": 2.2116305878054536, "learning_rate": 1.0628508217071014e-06, "loss": 0.8851, "step": 21933 }, { "epoch": 0.7949693740712551, "grad_norm": 2.4955403516034775, "learning_rate": 1.062489063682126e-06, "loss": 0.9173, "step": 21934 }, { "epoch": 0.7950056177739118, "grad_norm": 2.370196019850768, "learning_rate": 1.0621273599136678e-06, "loss": 0.7304, "step": 21935 }, { "epoch": 0.7950418614765684, "grad_norm": 2.268932624846126, "learning_rate": 1.0617657104067152e-06, "loss": 0.9489, "step": 21936 }, { "epoch": 0.7950781051792251, "grad_norm": 2.476189105005324, "learning_rate": 1.061404115166248e-06, "loss": 0.7725, "step": 21937 }, { "epoch": 0.7951143488818818, "grad_norm": 2.3848540199735173, "learning_rate": 1.061042574197252e-06, "loss": 0.7963, "step": 21938 }, { "epoch": 0.7951505925845385, "grad_norm": 2.1074181184313914, "learning_rate": 1.0606810875047052e-06, "loss": 0.6315, "step": 21939 }, { "epoch": 0.7951868362871951, "grad_norm": 2.5319712095160116, "learning_rate": 1.0603196550935907e-06, "loss": 0.8316, "step": 21940 }, { "epoch": 0.7952230799898518, "grad_norm": 2.1875933680489252, "learning_rate": 1.0599582769688904e-06, "loss": 0.8463, "step": 21941 }, { "epoch": 0.7952593236925084, "grad_norm": 2.839721409816366, "learning_rate": 1.0595969531355804e-06, "loss": 0.8118, "step": 21942 }, { "epoch": 0.795295567395165, "grad_norm": 2.4890242662093183, "learning_rate": 1.0592356835986417e-06, "loss": 0.9863, "step": 21943 }, { "epoch": 0.7953318110978218, "grad_norm": 2.1009839369227334, "learning_rate": 1.058874468363052e-06, "loss": 0.6594, "step": 21944 }, { "epoch": 0.7953680548004785, "grad_norm": 2.377689529851479, "learning_rate": 1.0585133074337895e-06, "loss": 0.8536, "step": 21945 }, { "epoch": 0.7954042985031351, "grad_norm": 2.6012844675889637, "learning_rate": 1.058152200815829e-06, "loss": 0.9347, "step": 21946 }, { "epoch": 0.7954405422057917, "grad_norm": 2.3565992252812573, "learning_rate": 1.0577911485141468e-06, "loss": 0.9585, "step": 21947 }, { "epoch": 0.7954767859084484, "grad_norm": 2.32857266369578, "learning_rate": 1.0574301505337203e-06, "loss": 0.7765, "step": 21948 }, { "epoch": 0.795513029611105, "grad_norm": 2.209538766726827, "learning_rate": 1.0570692068795202e-06, "loss": 0.823, "step": 21949 }, { "epoch": 0.7955492733137617, "grad_norm": 2.727848766619919, "learning_rate": 1.0567083175565229e-06, "loss": 1.3907, "step": 21950 }, { "epoch": 0.7955855170164184, "grad_norm": 2.2201184020486875, "learning_rate": 1.0563474825696979e-06, "loss": 0.8278, "step": 21951 }, { "epoch": 0.7956217607190751, "grad_norm": 2.1617919443039217, "learning_rate": 1.0559867019240222e-06, "loss": 0.8967, "step": 21952 }, { "epoch": 0.7956580044217317, "grad_norm": 2.288006987835429, "learning_rate": 1.055625975624463e-06, "loss": 0.8864, "step": 21953 }, { "epoch": 0.7956942481243884, "grad_norm": 2.059579917130634, "learning_rate": 1.055265303675994e-06, "loss": 0.7267, "step": 21954 }, { "epoch": 0.795730491827045, "grad_norm": 2.136540281750826, "learning_rate": 1.0549046860835821e-06, "loss": 0.8688, "step": 21955 }, { "epoch": 0.7957667355297017, "grad_norm": 2.2891570704682733, "learning_rate": 1.0545441228521975e-06, "loss": 0.9059, "step": 21956 }, { "epoch": 0.7958029792323584, "grad_norm": 2.505011726539925, "learning_rate": 1.05418361398681e-06, "loss": 1.0231, "step": 21957 }, { "epoch": 0.7958392229350151, "grad_norm": 2.2282108314503506, "learning_rate": 1.0538231594923843e-06, "loss": 0.8322, "step": 21958 }, { "epoch": 0.7958754666376717, "grad_norm": 2.2749125795106253, "learning_rate": 1.0534627593738895e-06, "loss": 0.9529, "step": 21959 }, { "epoch": 0.7959117103403284, "grad_norm": 2.72215825305841, "learning_rate": 1.0531024136362906e-06, "loss": 1.0293, "step": 21960 }, { "epoch": 0.795947954042985, "grad_norm": 2.362444798605254, "learning_rate": 1.0527421222845552e-06, "loss": 0.832, "step": 21961 }, { "epoch": 0.7959841977456417, "grad_norm": 2.399778703948643, "learning_rate": 1.0523818853236444e-06, "loss": 0.8343, "step": 21962 }, { "epoch": 0.7960204414482983, "grad_norm": 2.0922231177589663, "learning_rate": 1.0520217027585244e-06, "loss": 0.7619, "step": 21963 }, { "epoch": 0.7960566851509551, "grad_norm": 2.3182782940390196, "learning_rate": 1.0516615745941572e-06, "loss": 1.0683, "step": 21964 }, { "epoch": 0.7960929288536117, "grad_norm": 2.6211945910418306, "learning_rate": 1.051301500835507e-06, "loss": 0.8432, "step": 21965 }, { "epoch": 0.7961291725562684, "grad_norm": 2.2907364711367566, "learning_rate": 1.0509414814875325e-06, "loss": 0.8286, "step": 21966 }, { "epoch": 0.796165416258925, "grad_norm": 2.2195446616297176, "learning_rate": 1.0505815165551964e-06, "loss": 0.8029, "step": 21967 }, { "epoch": 0.7962016599615817, "grad_norm": 2.143293737420427, "learning_rate": 1.0502216060434595e-06, "loss": 0.8109, "step": 21968 }, { "epoch": 0.7962379036642383, "grad_norm": 2.722044239458345, "learning_rate": 1.0498617499572793e-06, "loss": 0.9044, "step": 21969 }, { "epoch": 0.796274147366895, "grad_norm": 2.299996008154757, "learning_rate": 1.049501948301616e-06, "loss": 0.7878, "step": 21970 }, { "epoch": 0.7963103910695517, "grad_norm": 2.3323652271990474, "learning_rate": 1.0491422010814246e-06, "loss": 0.9297, "step": 21971 }, { "epoch": 0.7963466347722084, "grad_norm": 2.26567674591911, "learning_rate": 1.0487825083016662e-06, "loss": 0.9232, "step": 21972 }, { "epoch": 0.796382878474865, "grad_norm": 2.367200990176653, "learning_rate": 1.0484228699672943e-06, "loss": 0.9185, "step": 21973 }, { "epoch": 0.7964191221775216, "grad_norm": 2.4588100050637003, "learning_rate": 1.0480632860832667e-06, "loss": 1.0388, "step": 21974 }, { "epoch": 0.7964553658801783, "grad_norm": 2.5144803447370894, "learning_rate": 1.0477037566545356e-06, "loss": 1.0036, "step": 21975 }, { "epoch": 0.7964916095828349, "grad_norm": 2.4125173710761616, "learning_rate": 1.0473442816860564e-06, "loss": 0.9701, "step": 21976 }, { "epoch": 0.7965278532854917, "grad_norm": 3.027768943367822, "learning_rate": 1.0469848611827843e-06, "loss": 0.7953, "step": 21977 }, { "epoch": 0.7965640969881483, "grad_norm": 2.349280138995738, "learning_rate": 1.0466254951496679e-06, "loss": 0.8122, "step": 21978 }, { "epoch": 0.796600340690805, "grad_norm": 2.1469559688924176, "learning_rate": 1.0462661835916621e-06, "loss": 0.9016, "step": 21979 }, { "epoch": 0.7966365843934616, "grad_norm": 2.303284499334439, "learning_rate": 1.0459069265137167e-06, "loss": 0.9045, "step": 21980 }, { "epoch": 0.7966728280961183, "grad_norm": 2.1338021005574785, "learning_rate": 1.0455477239207839e-06, "loss": 0.7146, "step": 21981 }, { "epoch": 0.7967090717987749, "grad_norm": 2.547352949003831, "learning_rate": 1.0451885758178094e-06, "loss": 0.9448, "step": 21982 }, { "epoch": 0.7967453155014316, "grad_norm": 2.2079308592509896, "learning_rate": 1.0448294822097471e-06, "loss": 0.898, "step": 21983 }, { "epoch": 0.7967815592040883, "grad_norm": 2.472565072293383, "learning_rate": 1.0444704431015412e-06, "loss": 0.9142, "step": 21984 }, { "epoch": 0.796817802906745, "grad_norm": 2.297069112422335, "learning_rate": 1.0441114584981415e-06, "loss": 0.8853, "step": 21985 }, { "epoch": 0.7968540466094016, "grad_norm": 2.325143158090245, "learning_rate": 1.043752528404492e-06, "loss": 0.9085, "step": 21986 }, { "epoch": 0.7968902903120583, "grad_norm": 2.3225475806796507, "learning_rate": 1.0433936528255406e-06, "loss": 1.0528, "step": 21987 }, { "epoch": 0.7969265340147149, "grad_norm": 2.12960209917195, "learning_rate": 1.0430348317662326e-06, "loss": 0.9434, "step": 21988 }, { "epoch": 0.7969627777173716, "grad_norm": 2.417514248584127, "learning_rate": 1.0426760652315105e-06, "loss": 0.7719, "step": 21989 }, { "epoch": 0.7969990214200283, "grad_norm": 2.539807464351704, "learning_rate": 1.0423173532263186e-06, "loss": 0.9663, "step": 21990 }, { "epoch": 0.797035265122685, "grad_norm": 2.4914674608224794, "learning_rate": 1.0419586957556005e-06, "loss": 0.8915, "step": 21991 }, { "epoch": 0.7970715088253416, "grad_norm": 2.3306864663178484, "learning_rate": 1.0416000928242986e-06, "loss": 1.0417, "step": 21992 }, { "epoch": 0.7971077525279983, "grad_norm": 2.2529716375889097, "learning_rate": 1.0412415444373526e-06, "loss": 0.8384, "step": 21993 }, { "epoch": 0.7971439962306549, "grad_norm": 2.134746180919484, "learning_rate": 1.0408830505997035e-06, "loss": 0.97, "step": 21994 }, { "epoch": 0.7971802399333116, "grad_norm": 2.6019225045390186, "learning_rate": 1.0405246113162932e-06, "loss": 0.9765, "step": 21995 }, { "epoch": 0.7972164836359682, "grad_norm": 2.5509208483820434, "learning_rate": 1.0401662265920575e-06, "loss": 0.7939, "step": 21996 }, { "epoch": 0.797252727338625, "grad_norm": 2.539963392049432, "learning_rate": 1.039807896431938e-06, "loss": 0.7118, "step": 21997 }, { "epoch": 0.7972889710412816, "grad_norm": 4.153172633247867, "learning_rate": 1.0394496208408684e-06, "loss": 0.7402, "step": 21998 }, { "epoch": 0.7973252147439382, "grad_norm": 2.477020206591558, "learning_rate": 1.0390913998237895e-06, "loss": 0.9773, "step": 21999 }, { "epoch": 0.7973614584465949, "grad_norm": 2.4720706166352557, "learning_rate": 1.038733233385635e-06, "loss": 1.0224, "step": 22000 }, { "epoch": 0.7973977021492515, "grad_norm": 2.34593272904911, "learning_rate": 1.038375121531342e-06, "loss": 1.0481, "step": 22001 }, { "epoch": 0.7974339458519082, "grad_norm": 2.550312180634333, "learning_rate": 1.038017064265841e-06, "loss": 0.9143, "step": 22002 }, { "epoch": 0.797470189554565, "grad_norm": 2.4068499424379404, "learning_rate": 1.0376590615940717e-06, "loss": 0.9083, "step": 22003 }, { "epoch": 0.7975064332572216, "grad_norm": 2.2262728185946536, "learning_rate": 1.0373011135209627e-06, "loss": 0.81, "step": 22004 }, { "epoch": 0.7975426769598782, "grad_norm": 2.447093800349986, "learning_rate": 1.036943220051449e-06, "loss": 0.8262, "step": 22005 }, { "epoch": 0.7975789206625349, "grad_norm": 2.2383854590981036, "learning_rate": 1.0365853811904598e-06, "loss": 0.8003, "step": 22006 }, { "epoch": 0.7976151643651915, "grad_norm": 2.343798591030339, "learning_rate": 1.0362275969429269e-06, "loss": 1.0334, "step": 22007 }, { "epoch": 0.7976514080678482, "grad_norm": 2.391820526549108, "learning_rate": 1.0358698673137818e-06, "loss": 0.8096, "step": 22008 }, { "epoch": 0.7976876517705048, "grad_norm": 1.9539778521676723, "learning_rate": 1.0355121923079514e-06, "loss": 0.7719, "step": 22009 }, { "epoch": 0.7977238954731616, "grad_norm": 2.3934221966061595, "learning_rate": 1.0351545719303652e-06, "loss": 0.7222, "step": 22010 }, { "epoch": 0.7977601391758182, "grad_norm": 2.638751604830108, "learning_rate": 1.0347970061859514e-06, "loss": 0.9004, "step": 22011 }, { "epoch": 0.7977963828784749, "grad_norm": 2.3236262080185583, "learning_rate": 1.0344394950796376e-06, "loss": 0.8261, "step": 22012 }, { "epoch": 0.7978326265811315, "grad_norm": 2.3244887588020346, "learning_rate": 1.0340820386163486e-06, "loss": 0.79, "step": 22013 }, { "epoch": 0.7978688702837882, "grad_norm": 2.469864948141117, "learning_rate": 1.03372463680101e-06, "loss": 1.1186, "step": 22014 }, { "epoch": 0.7979051139864448, "grad_norm": 2.498907228026315, "learning_rate": 1.0333672896385489e-06, "loss": 0.9847, "step": 22015 }, { "epoch": 0.7979413576891016, "grad_norm": 2.544911904126521, "learning_rate": 1.0330099971338859e-06, "loss": 0.8489, "step": 22016 }, { "epoch": 0.7979776013917582, "grad_norm": 2.6274132270969255, "learning_rate": 1.0326527592919478e-06, "loss": 0.9805, "step": 22017 }, { "epoch": 0.7980138450944149, "grad_norm": 2.2591591519016756, "learning_rate": 1.0322955761176523e-06, "loss": 0.9349, "step": 22018 }, { "epoch": 0.7980500887970715, "grad_norm": 2.28252070437799, "learning_rate": 1.0319384476159266e-06, "loss": 0.6893, "step": 22019 }, { "epoch": 0.7980863324997282, "grad_norm": 2.097950610721774, "learning_rate": 1.0315813737916885e-06, "loss": 0.7708, "step": 22020 }, { "epoch": 0.7981225762023848, "grad_norm": 2.4807302785156784, "learning_rate": 1.0312243546498602e-06, "loss": 0.9197, "step": 22021 }, { "epoch": 0.7981588199050415, "grad_norm": 2.3114141597982374, "learning_rate": 1.030867390195357e-06, "loss": 0.9822, "step": 22022 }, { "epoch": 0.7981950636076982, "grad_norm": 2.374173619228906, "learning_rate": 1.030510480433104e-06, "loss": 0.8169, "step": 22023 }, { "epoch": 0.7982313073103549, "grad_norm": 2.399349457552493, "learning_rate": 1.030153625368014e-06, "loss": 0.9512, "step": 22024 }, { "epoch": 0.7982675510130115, "grad_norm": 2.5967064584316373, "learning_rate": 1.0297968250050078e-06, "loss": 0.9908, "step": 22025 }, { "epoch": 0.7983037947156681, "grad_norm": 2.3097874723746656, "learning_rate": 1.029440079348999e-06, "loss": 1.1822, "step": 22026 }, { "epoch": 0.7983400384183248, "grad_norm": 2.459150412470028, "learning_rate": 1.0290833884049045e-06, "loss": 0.8585, "step": 22027 }, { "epoch": 0.7983762821209814, "grad_norm": 2.300105559460212, "learning_rate": 1.0287267521776406e-06, "loss": 0.8619, "step": 22028 }, { "epoch": 0.7984125258236382, "grad_norm": 2.2975321004574667, "learning_rate": 1.028370170672119e-06, "loss": 0.8673, "step": 22029 }, { "epoch": 0.7984487695262948, "grad_norm": 2.505529972279846, "learning_rate": 1.0280136438932542e-06, "loss": 0.8168, "step": 22030 }, { "epoch": 0.7984850132289515, "grad_norm": 2.1168050599291663, "learning_rate": 1.0276571718459598e-06, "loss": 0.922, "step": 22031 }, { "epoch": 0.7985212569316081, "grad_norm": 2.538266999942458, "learning_rate": 1.0273007545351483e-06, "loss": 0.7668, "step": 22032 }, { "epoch": 0.7985575006342648, "grad_norm": 1.9781551744530481, "learning_rate": 1.0269443919657286e-06, "loss": 0.956, "step": 22033 }, { "epoch": 0.7985937443369214, "grad_norm": 2.633291641975498, "learning_rate": 1.0265880841426118e-06, "loss": 0.9073, "step": 22034 }, { "epoch": 0.7986299880395781, "grad_norm": 2.39202992737457, "learning_rate": 1.0262318310707099e-06, "loss": 0.7895, "step": 22035 }, { "epoch": 0.7986662317422348, "grad_norm": 2.463607255552945, "learning_rate": 1.0258756327549286e-06, "loss": 0.9518, "step": 22036 }, { "epoch": 0.7987024754448915, "grad_norm": 2.6841574070668592, "learning_rate": 1.0255194892001791e-06, "loss": 0.9124, "step": 22037 }, { "epoch": 0.7987387191475481, "grad_norm": 2.295930369738116, "learning_rate": 1.0251634004113647e-06, "loss": 0.83, "step": 22038 }, { "epoch": 0.7987749628502048, "grad_norm": 2.43277335518591, "learning_rate": 1.0248073663933972e-06, "loss": 0.948, "step": 22039 }, { "epoch": 0.7988112065528614, "grad_norm": 2.034174723799939, "learning_rate": 1.0244513871511791e-06, "loss": 0.7792, "step": 22040 }, { "epoch": 0.7988474502555181, "grad_norm": 2.3685508738827097, "learning_rate": 1.0240954626896177e-06, "loss": 0.8333, "step": 22041 }, { "epoch": 0.7988836939581747, "grad_norm": 2.6041198565572277, "learning_rate": 1.0237395930136146e-06, "loss": 0.9106, "step": 22042 }, { "epoch": 0.7989199376608315, "grad_norm": 2.389073204882304, "learning_rate": 1.0233837781280752e-06, "loss": 0.7611, "step": 22043 }, { "epoch": 0.7989561813634881, "grad_norm": 2.360378660685125, "learning_rate": 1.0230280180379032e-06, "loss": 0.9825, "step": 22044 }, { "epoch": 0.7989924250661448, "grad_norm": 2.0143303283624623, "learning_rate": 1.0226723127480003e-06, "loss": 0.8568, "step": 22045 }, { "epoch": 0.7990286687688014, "grad_norm": 2.289896704560228, "learning_rate": 1.022316662263267e-06, "loss": 0.8784, "step": 22046 }, { "epoch": 0.799064912471458, "grad_norm": 2.4936786285084107, "learning_rate": 1.0219610665886038e-06, "loss": 0.6949, "step": 22047 }, { "epoch": 0.7991011561741147, "grad_norm": 2.740641247638932, "learning_rate": 1.0216055257289126e-06, "loss": 0.9592, "step": 22048 }, { "epoch": 0.7991373998767715, "grad_norm": 2.6099791720860566, "learning_rate": 1.02125003968909e-06, "loss": 0.8882, "step": 22049 }, { "epoch": 0.7991736435794281, "grad_norm": 2.544151467991274, "learning_rate": 1.0208946084740356e-06, "loss": 0.9234, "step": 22050 }, { "epoch": 0.7992098872820848, "grad_norm": 2.0400317935064107, "learning_rate": 1.020539232088647e-06, "loss": 0.712, "step": 22051 }, { "epoch": 0.7992461309847414, "grad_norm": 2.5406854636990217, "learning_rate": 1.0201839105378226e-06, "loss": 0.851, "step": 22052 }, { "epoch": 0.799282374687398, "grad_norm": 2.127676297080128, "learning_rate": 1.0198286438264553e-06, "loss": 0.9207, "step": 22053 }, { "epoch": 0.7993186183900547, "grad_norm": 2.734272675458833, "learning_rate": 1.019473431959443e-06, "loss": 0.8619, "step": 22054 }, { "epoch": 0.7993548620927113, "grad_norm": 1.8308349868624123, "learning_rate": 1.0191182749416801e-06, "loss": 0.8897, "step": 22055 }, { "epoch": 0.7993911057953681, "grad_norm": 2.5212857925301746, "learning_rate": 1.0187631727780583e-06, "loss": 1.0845, "step": 22056 }, { "epoch": 0.7994273494980247, "grad_norm": 2.612631161940084, "learning_rate": 1.018408125473474e-06, "loss": 0.8623, "step": 22057 }, { "epoch": 0.7994635932006814, "grad_norm": 2.346171981733284, "learning_rate": 1.0180531330328147e-06, "loss": 0.943, "step": 22058 }, { "epoch": 0.799499836903338, "grad_norm": 2.5507349179099306, "learning_rate": 1.0176981954609777e-06, "loss": 0.9022, "step": 22059 }, { "epoch": 0.7995360806059947, "grad_norm": 2.7886275564680627, "learning_rate": 1.0173433127628501e-06, "loss": 0.9919, "step": 22060 }, { "epoch": 0.7995723243086513, "grad_norm": 2.1835561804564247, "learning_rate": 1.0169884849433231e-06, "loss": 0.9294, "step": 22061 }, { "epoch": 0.7996085680113081, "grad_norm": 2.640197405008707, "learning_rate": 1.0166337120072873e-06, "loss": 0.7977, "step": 22062 }, { "epoch": 0.7996448117139647, "grad_norm": 2.549839301858627, "learning_rate": 1.016278993959628e-06, "loss": 0.9774, "step": 22063 }, { "epoch": 0.7996810554166214, "grad_norm": 2.3504958947494248, "learning_rate": 1.0159243308052364e-06, "loss": 0.8904, "step": 22064 }, { "epoch": 0.799717299119278, "grad_norm": 2.5684440078405606, "learning_rate": 1.0155697225489959e-06, "loss": 0.849, "step": 22065 }, { "epoch": 0.7997535428219347, "grad_norm": 2.2184619841298123, "learning_rate": 1.0152151691957969e-06, "loss": 0.9289, "step": 22066 }, { "epoch": 0.7997897865245913, "grad_norm": 2.510614257838778, "learning_rate": 1.014860670750522e-06, "loss": 0.9747, "step": 22067 }, { "epoch": 0.799826030227248, "grad_norm": 2.411775988351984, "learning_rate": 1.0145062272180583e-06, "loss": 0.8502, "step": 22068 }, { "epoch": 0.7998622739299047, "grad_norm": 2.3691652280072146, "learning_rate": 1.0141518386032856e-06, "loss": 0.6888, "step": 22069 }, { "epoch": 0.7998985176325614, "grad_norm": 2.470853031499408, "learning_rate": 1.0137975049110926e-06, "loss": 0.9223, "step": 22070 }, { "epoch": 0.799934761335218, "grad_norm": 2.311865569297922, "learning_rate": 1.0134432261463578e-06, "loss": 0.9032, "step": 22071 }, { "epoch": 0.7999710050378747, "grad_norm": 2.3181298898977696, "learning_rate": 1.0130890023139661e-06, "loss": 0.8443, "step": 22072 }, { "epoch": 0.8000072487405313, "grad_norm": 2.3284423695501175, "learning_rate": 1.0127348334187948e-06, "loss": 0.882, "step": 22073 }, { "epoch": 0.800043492443188, "grad_norm": 2.797202032150411, "learning_rate": 1.0123807194657265e-06, "loss": 1.044, "step": 22074 }, { "epoch": 0.8000797361458447, "grad_norm": 2.324133460587437, "learning_rate": 1.0120266604596417e-06, "loss": 0.7678, "step": 22075 }, { "epoch": 0.8001159798485014, "grad_norm": 2.4159927294447234, "learning_rate": 1.0116726564054157e-06, "loss": 0.8687, "step": 22076 }, { "epoch": 0.800152223551158, "grad_norm": 2.3181540604872026, "learning_rate": 1.0113187073079295e-06, "loss": 0.8147, "step": 22077 }, { "epoch": 0.8001884672538147, "grad_norm": 2.3625347505445737, "learning_rate": 1.0109648131720583e-06, "loss": 0.9877, "step": 22078 }, { "epoch": 0.8002247109564713, "grad_norm": 2.3166337929444567, "learning_rate": 1.010610974002681e-06, "loss": 1.1255, "step": 22079 }, { "epoch": 0.8002609546591279, "grad_norm": 2.3261253887772577, "learning_rate": 1.0102571898046708e-06, "loss": 0.8493, "step": 22080 }, { "epoch": 0.8002971983617846, "grad_norm": 2.472311132774257, "learning_rate": 1.0099034605829034e-06, "loss": 0.937, "step": 22081 }, { "epoch": 0.8003334420644413, "grad_norm": 2.2783302833172807, "learning_rate": 1.0095497863422543e-06, "loss": 0.9143, "step": 22082 }, { "epoch": 0.800369685767098, "grad_norm": 2.0904040413639793, "learning_rate": 1.0091961670875948e-06, "loss": 0.7844, "step": 22083 }, { "epoch": 0.8004059294697546, "grad_norm": 2.3645923353964737, "learning_rate": 1.0088426028237996e-06, "loss": 0.7401, "step": 22084 }, { "epoch": 0.8004421731724113, "grad_norm": 2.465164215038353, "learning_rate": 1.0084890935557374e-06, "loss": 0.7771, "step": 22085 }, { "epoch": 0.8004784168750679, "grad_norm": 1.9507882389260331, "learning_rate": 1.0081356392882836e-06, "loss": 0.8062, "step": 22086 }, { "epoch": 0.8005146605777246, "grad_norm": 2.3494872198053636, "learning_rate": 1.0077822400263055e-06, "loss": 0.7301, "step": 22087 }, { "epoch": 0.8005509042803813, "grad_norm": 2.275976439786354, "learning_rate": 1.0074288957746747e-06, "loss": 0.8797, "step": 22088 }, { "epoch": 0.800587147983038, "grad_norm": 2.485185268871756, "learning_rate": 1.0070756065382565e-06, "loss": 1.0161, "step": 22089 }, { "epoch": 0.8006233916856946, "grad_norm": 2.2788849838234797, "learning_rate": 1.0067223723219243e-06, "loss": 0.9167, "step": 22090 }, { "epoch": 0.8006596353883513, "grad_norm": 2.3697062614241444, "learning_rate": 1.006369193130542e-06, "loss": 0.7829, "step": 22091 }, { "epoch": 0.8006958790910079, "grad_norm": 2.2734299057139515, "learning_rate": 1.0060160689689774e-06, "loss": 0.8912, "step": 22092 }, { "epoch": 0.8007321227936646, "grad_norm": 3.150691693570365, "learning_rate": 1.0056629998420953e-06, "loss": 0.9183, "step": 22093 }, { "epoch": 0.8007683664963212, "grad_norm": 2.600341322786703, "learning_rate": 1.005309985754761e-06, "loss": 0.8995, "step": 22094 }, { "epoch": 0.800804610198978, "grad_norm": 2.73325522505839, "learning_rate": 1.0049570267118402e-06, "loss": 0.8713, "step": 22095 }, { "epoch": 0.8008408539016346, "grad_norm": 2.444599280452531, "learning_rate": 1.0046041227181946e-06, "loss": 1.0535, "step": 22096 }, { "epoch": 0.8008770976042913, "grad_norm": 2.399084800454122, "learning_rate": 1.004251273778688e-06, "loss": 0.8974, "step": 22097 }, { "epoch": 0.8009133413069479, "grad_norm": 2.1132663929886553, "learning_rate": 1.0038984798981816e-06, "loss": 0.8595, "step": 22098 }, { "epoch": 0.8009495850096046, "grad_norm": 2.4147355194188185, "learning_rate": 1.0035457410815392e-06, "loss": 0.8701, "step": 22099 }, { "epoch": 0.8009858287122612, "grad_norm": 2.2465410131179846, "learning_rate": 1.003193057333618e-06, "loss": 0.7454, "step": 22100 }, { "epoch": 0.801022072414918, "grad_norm": 2.585353484154699, "learning_rate": 1.0028404286592797e-06, "loss": 0.906, "step": 22101 }, { "epoch": 0.8010583161175746, "grad_norm": 2.379927514362312, "learning_rate": 1.0024878550633837e-06, "loss": 0.7912, "step": 22102 }, { "epoch": 0.8010945598202313, "grad_norm": 2.420468598484072, "learning_rate": 1.0021353365507863e-06, "loss": 0.9705, "step": 22103 }, { "epoch": 0.8011308035228879, "grad_norm": 1.95413062989055, "learning_rate": 1.0017828731263474e-06, "loss": 0.8487, "step": 22104 }, { "epoch": 0.8011670472255445, "grad_norm": 2.3105637806811092, "learning_rate": 1.0014304647949197e-06, "loss": 0.8418, "step": 22105 }, { "epoch": 0.8012032909282012, "grad_norm": 2.363421583080218, "learning_rate": 1.0010781115613649e-06, "loss": 0.825, "step": 22106 }, { "epoch": 0.8012395346308578, "grad_norm": 2.3242989838098276, "learning_rate": 1.0007258134305337e-06, "loss": 0.9855, "step": 22107 }, { "epoch": 0.8012757783335146, "grad_norm": 2.364579919024273, "learning_rate": 1.0003735704072838e-06, "loss": 0.842, "step": 22108 }, { "epoch": 0.8013120220361712, "grad_norm": 2.3416402712115216, "learning_rate": 1.0000213824964638e-06, "loss": 0.8668, "step": 22109 }, { "epoch": 0.8013482657388279, "grad_norm": 2.493283471108187, "learning_rate": 9.996692497029337e-07, "loss": 0.8916, "step": 22110 }, { "epoch": 0.8013845094414845, "grad_norm": 2.1887470892175225, "learning_rate": 9.993171720315398e-07, "loss": 0.8984, "step": 22111 }, { "epoch": 0.8014207531441412, "grad_norm": 2.2987873160293817, "learning_rate": 9.989651494871377e-07, "loss": 0.7992, "step": 22112 }, { "epoch": 0.8014569968467978, "grad_norm": 2.469517483667703, "learning_rate": 9.986131820745743e-07, "loss": 0.5882, "step": 22113 }, { "epoch": 0.8014932405494545, "grad_norm": 2.520465806857719, "learning_rate": 9.982612697987015e-07, "loss": 0.7781, "step": 22114 }, { "epoch": 0.8015294842521112, "grad_norm": 2.3264810651933114, "learning_rate": 9.979094126643695e-07, "loss": 0.7669, "step": 22115 }, { "epoch": 0.8015657279547679, "grad_norm": 2.3840390248601095, "learning_rate": 9.97557610676424e-07, "loss": 0.9079, "step": 22116 }, { "epoch": 0.8016019716574245, "grad_norm": 2.4358831342563914, "learning_rate": 9.97205863839714e-07, "loss": 0.8151, "step": 22117 }, { "epoch": 0.8016382153600812, "grad_norm": 2.505926271413652, "learning_rate": 9.96854172159087e-07, "loss": 0.8827, "step": 22118 }, { "epoch": 0.8016744590627378, "grad_norm": 2.2898394469440095, "learning_rate": 9.965025356393893e-07, "loss": 0.7903, "step": 22119 }, { "epoch": 0.8017107027653945, "grad_norm": 2.3637682047375623, "learning_rate": 9.961509542854635e-07, "loss": 0.7211, "step": 22120 }, { "epoch": 0.8017469464680512, "grad_norm": 2.471578255504758, "learning_rate": 9.957994281021571e-07, "loss": 0.9425, "step": 22121 }, { "epoch": 0.8017831901707079, "grad_norm": 2.2459301723746887, "learning_rate": 9.954479570943138e-07, "loss": 0.8274, "step": 22122 }, { "epoch": 0.8018194338733645, "grad_norm": 2.3613715237477235, "learning_rate": 9.950965412667746e-07, "loss": 1.1599, "step": 22123 }, { "epoch": 0.8018556775760212, "grad_norm": 2.1698671431617607, "learning_rate": 9.947451806243842e-07, "loss": 0.9711, "step": 22124 }, { "epoch": 0.8018919212786778, "grad_norm": 2.5889589377729765, "learning_rate": 9.9439387517198e-07, "loss": 1.0924, "step": 22125 }, { "epoch": 0.8019281649813345, "grad_norm": 2.166104941790884, "learning_rate": 9.940426249144087e-07, "loss": 0.8932, "step": 22126 }, { "epoch": 0.8019644086839911, "grad_norm": 2.3973452873143053, "learning_rate": 9.93691429856506e-07, "loss": 0.9581, "step": 22127 }, { "epoch": 0.8020006523866479, "grad_norm": 2.2198538609778566, "learning_rate": 9.933402900031136e-07, "loss": 0.7932, "step": 22128 }, { "epoch": 0.8020368960893045, "grad_norm": 2.288840046248601, "learning_rate": 9.929892053590678e-07, "loss": 0.9771, "step": 22129 }, { "epoch": 0.8020731397919612, "grad_norm": 2.282286010524255, "learning_rate": 9.92638175929207e-07, "loss": 0.7951, "step": 22130 }, { "epoch": 0.8021093834946178, "grad_norm": 2.6982909741921013, "learning_rate": 9.922872017183687e-07, "loss": 1.0069, "step": 22131 }, { "epoch": 0.8021456271972744, "grad_norm": 2.4903170031612314, "learning_rate": 9.919362827313906e-07, "loss": 0.8833, "step": 22132 }, { "epoch": 0.8021818708999311, "grad_norm": 2.2810288872716296, "learning_rate": 9.915854189731045e-07, "loss": 0.8265, "step": 22133 }, { "epoch": 0.8022181146025879, "grad_norm": 2.321537795015826, "learning_rate": 9.912346104483478e-07, "loss": 0.9351, "step": 22134 }, { "epoch": 0.8022543583052445, "grad_norm": 2.334745763246668, "learning_rate": 9.90883857161955e-07, "loss": 0.9792, "step": 22135 }, { "epoch": 0.8022906020079011, "grad_norm": 2.3310405809846997, "learning_rate": 9.905331591187561e-07, "loss": 0.8199, "step": 22136 }, { "epoch": 0.8023268457105578, "grad_norm": 2.2524963470776984, "learning_rate": 9.901825163235862e-07, "loss": 0.7602, "step": 22137 }, { "epoch": 0.8023630894132144, "grad_norm": 2.4057348756101256, "learning_rate": 9.898319287812757e-07, "loss": 0.9314, "step": 22138 }, { "epoch": 0.8023993331158711, "grad_norm": 2.0728983341007097, "learning_rate": 9.89481396496657e-07, "loss": 0.9043, "step": 22139 }, { "epoch": 0.8024355768185277, "grad_norm": 2.211374213915448, "learning_rate": 9.891309194745585e-07, "loss": 0.9855, "step": 22140 }, { "epoch": 0.8024718205211845, "grad_norm": 2.2807565746965586, "learning_rate": 9.887804977198101e-07, "loss": 0.7673, "step": 22141 }, { "epoch": 0.8025080642238411, "grad_norm": 2.3803171402201895, "learning_rate": 9.884301312372413e-07, "loss": 1.0073, "step": 22142 }, { "epoch": 0.8025443079264978, "grad_norm": 2.757971325867131, "learning_rate": 9.880798200316782e-07, "loss": 0.9527, "step": 22143 }, { "epoch": 0.8025805516291544, "grad_norm": 2.424828707239695, "learning_rate": 9.877295641079492e-07, "loss": 0.8749, "step": 22144 }, { "epoch": 0.8026167953318111, "grad_norm": 2.2870607474426703, "learning_rate": 9.8737936347088e-07, "loss": 0.9999, "step": 22145 }, { "epoch": 0.8026530390344677, "grad_norm": 1.953412966274069, "learning_rate": 9.870292181252977e-07, "loss": 0.8153, "step": 22146 }, { "epoch": 0.8026892827371245, "grad_norm": 2.505400940495217, "learning_rate": 9.866791280760247e-07, "loss": 0.7156, "step": 22147 }, { "epoch": 0.8027255264397811, "grad_norm": 2.6413895446159446, "learning_rate": 9.863290933278858e-07, "loss": 0.7676, "step": 22148 }, { "epoch": 0.8027617701424378, "grad_norm": 2.279098593910225, "learning_rate": 9.85979113885706e-07, "loss": 0.8505, "step": 22149 }, { "epoch": 0.8027980138450944, "grad_norm": 2.406837093798468, "learning_rate": 9.856291897543051e-07, "loss": 0.9188, "step": 22150 }, { "epoch": 0.8028342575477511, "grad_norm": 2.4208841695428935, "learning_rate": 9.852793209385075e-07, "loss": 0.6768, "step": 22151 }, { "epoch": 0.8028705012504077, "grad_norm": 2.3851389154051414, "learning_rate": 9.849295074431302e-07, "loss": 0.7346, "step": 22152 }, { "epoch": 0.8029067449530644, "grad_norm": 2.007709039891529, "learning_rate": 9.845797492729987e-07, "loss": 0.9138, "step": 22153 }, { "epoch": 0.8029429886557211, "grad_norm": 2.6797786799651107, "learning_rate": 9.842300464329285e-07, "loss": 0.9526, "step": 22154 }, { "epoch": 0.8029792323583778, "grad_norm": 2.325356452471844, "learning_rate": 9.838803989277406e-07, "loss": 0.959, "step": 22155 }, { "epoch": 0.8030154760610344, "grad_norm": 2.5875168473505608, "learning_rate": 9.835308067622502e-07, "loss": 0.829, "step": 22156 }, { "epoch": 0.803051719763691, "grad_norm": 2.5457597373192113, "learning_rate": 9.83181269941278e-07, "loss": 0.9352, "step": 22157 }, { "epoch": 0.8030879634663477, "grad_norm": 2.5710096634023003, "learning_rate": 9.828317884696375e-07, "loss": 1.1346, "step": 22158 }, { "epoch": 0.8031242071690043, "grad_norm": 2.722479436592342, "learning_rate": 9.824823623521468e-07, "loss": 0.8853, "step": 22159 }, { "epoch": 0.8031604508716611, "grad_norm": 2.358156330867181, "learning_rate": 9.821329915936179e-07, "loss": 0.8315, "step": 22160 }, { "epoch": 0.8031966945743177, "grad_norm": 2.232028547752142, "learning_rate": 9.81783676198867e-07, "loss": 0.9148, "step": 22161 }, { "epoch": 0.8032329382769744, "grad_norm": 2.2401259327013876, "learning_rate": 9.814344161727074e-07, "loss": 0.8455, "step": 22162 }, { "epoch": 0.803269181979631, "grad_norm": 2.099204526904503, "learning_rate": 9.810852115199504e-07, "loss": 0.5943, "step": 22163 }, { "epoch": 0.8033054256822877, "grad_norm": 2.4243859360223556, "learning_rate": 9.807360622454087e-07, "loss": 0.7442, "step": 22164 }, { "epoch": 0.8033416693849443, "grad_norm": 2.861199630686847, "learning_rate": 9.80386968353893e-07, "loss": 0.8677, "step": 22165 }, { "epoch": 0.803377913087601, "grad_norm": 2.330245418702199, "learning_rate": 9.800379298502154e-07, "loss": 0.9465, "step": 22166 }, { "epoch": 0.8034141567902577, "grad_norm": 2.553835867044937, "learning_rate": 9.796889467391828e-07, "loss": 0.9484, "step": 22167 }, { "epoch": 0.8034504004929144, "grad_norm": 2.6656340678423907, "learning_rate": 9.793400190256046e-07, "loss": 0.7734, "step": 22168 }, { "epoch": 0.803486644195571, "grad_norm": 2.3522689618204287, "learning_rate": 9.78991146714291e-07, "loss": 1.0639, "step": 22169 }, { "epoch": 0.8035228878982277, "grad_norm": 2.416306359401958, "learning_rate": 9.786423298100462e-07, "loss": 0.906, "step": 22170 }, { "epoch": 0.8035591316008843, "grad_norm": 2.2953757605832377, "learning_rate": 9.782935683176793e-07, "loss": 1.06, "step": 22171 }, { "epoch": 0.803595375303541, "grad_norm": 2.4053473744277794, "learning_rate": 9.779448622419923e-07, "loss": 0.8876, "step": 22172 }, { "epoch": 0.8036316190061977, "grad_norm": 2.39804754480315, "learning_rate": 9.775962115877952e-07, "loss": 0.7328, "step": 22173 }, { "epoch": 0.8036678627088544, "grad_norm": 2.2621799330745045, "learning_rate": 9.772476163598883e-07, "loss": 1.0422, "step": 22174 }, { "epoch": 0.803704106411511, "grad_norm": 2.1847296802750704, "learning_rate": 9.768990765630776e-07, "loss": 0.9025, "step": 22175 }, { "epoch": 0.8037403501141677, "grad_norm": 2.5879667833284548, "learning_rate": 9.765505922021618e-07, "loss": 0.9349, "step": 22176 }, { "epoch": 0.8037765938168243, "grad_norm": 2.4094042786931884, "learning_rate": 9.762021632819484e-07, "loss": 0.6981, "step": 22177 }, { "epoch": 0.803812837519481, "grad_norm": 2.7925356818428395, "learning_rate": 9.758537898072345e-07, "loss": 0.7814, "step": 22178 }, { "epoch": 0.8038490812221376, "grad_norm": 2.3145636472472764, "learning_rate": 9.755054717828228e-07, "loss": 0.6878, "step": 22179 }, { "epoch": 0.8038853249247944, "grad_norm": 2.2735713143929077, "learning_rate": 9.751572092135102e-07, "loss": 0.8519, "step": 22180 }, { "epoch": 0.803921568627451, "grad_norm": 2.332527591941073, "learning_rate": 9.748090021040974e-07, "loss": 0.9207, "step": 22181 }, { "epoch": 0.8039578123301077, "grad_norm": 2.4611642051178375, "learning_rate": 9.744608504593833e-07, "loss": 0.8936, "step": 22182 }, { "epoch": 0.8039940560327643, "grad_norm": 2.6538042629183978, "learning_rate": 9.741127542841633e-07, "loss": 0.8268, "step": 22183 }, { "epoch": 0.804030299735421, "grad_norm": 2.4907275941302154, "learning_rate": 9.73764713583234e-07, "loss": 0.8674, "step": 22184 }, { "epoch": 0.8040665434380776, "grad_norm": 2.4223220920726027, "learning_rate": 9.734167283613927e-07, "loss": 0.9417, "step": 22185 }, { "epoch": 0.8041027871407342, "grad_norm": 2.3686333351298914, "learning_rate": 9.730687986234344e-07, "loss": 1.0284, "step": 22186 }, { "epoch": 0.804139030843391, "grad_norm": 2.0029435340710418, "learning_rate": 9.727209243741515e-07, "loss": 0.7356, "step": 22187 }, { "epoch": 0.8041752745460476, "grad_norm": 2.0130470360894974, "learning_rate": 9.723731056183389e-07, "loss": 0.7417, "step": 22188 }, { "epoch": 0.8042115182487043, "grad_norm": 2.607960922243237, "learning_rate": 9.7202534236079e-07, "loss": 0.8966, "step": 22189 }, { "epoch": 0.8042477619513609, "grad_norm": 2.553241138940767, "learning_rate": 9.71677634606295e-07, "loss": 0.8588, "step": 22190 }, { "epoch": 0.8042840056540176, "grad_norm": 2.6605733295413097, "learning_rate": 9.713299823596466e-07, "loss": 0.7997, "step": 22191 }, { "epoch": 0.8043202493566742, "grad_norm": 2.32381277338034, "learning_rate": 9.709823856256324e-07, "loss": 0.9568, "step": 22192 }, { "epoch": 0.804356493059331, "grad_norm": 2.2922828692475257, "learning_rate": 9.706348444090469e-07, "loss": 1.1139, "step": 22193 }, { "epoch": 0.8043927367619876, "grad_norm": 2.5907198793183928, "learning_rate": 9.702873587146749e-07, "loss": 0.8497, "step": 22194 }, { "epoch": 0.8044289804646443, "grad_norm": 2.4259458132987097, "learning_rate": 9.699399285473076e-07, "loss": 0.779, "step": 22195 }, { "epoch": 0.8044652241673009, "grad_norm": 2.402908989764728, "learning_rate": 9.695925539117278e-07, "loss": 0.6916, "step": 22196 }, { "epoch": 0.8045014678699576, "grad_norm": 2.391922197815866, "learning_rate": 9.692452348127278e-07, "loss": 1.0355, "step": 22197 }, { "epoch": 0.8045377115726142, "grad_norm": 2.4279006748303384, "learning_rate": 9.688979712550894e-07, "loss": 0.8261, "step": 22198 }, { "epoch": 0.8045739552752709, "grad_norm": 2.5805939607237764, "learning_rate": 9.68550763243601e-07, "loss": 0.926, "step": 22199 }, { "epoch": 0.8046101989779276, "grad_norm": 2.550868189056172, "learning_rate": 9.682036107830429e-07, "loss": 0.9349, "step": 22200 }, { "epoch": 0.8046464426805843, "grad_norm": 2.7211328351961486, "learning_rate": 9.67856513878201e-07, "loss": 1.0397, "step": 22201 }, { "epoch": 0.8046826863832409, "grad_norm": 2.3815553871715083, "learning_rate": 9.675094725338585e-07, "loss": 0.8807, "step": 22202 }, { "epoch": 0.8047189300858976, "grad_norm": 2.550880109626572, "learning_rate": 9.671624867547958e-07, "loss": 0.9407, "step": 22203 }, { "epoch": 0.8047551737885542, "grad_norm": 2.3073773933974184, "learning_rate": 9.668155565457953e-07, "loss": 1.0351, "step": 22204 }, { "epoch": 0.8047914174912109, "grad_norm": 2.2134842754365174, "learning_rate": 9.66468681911637e-07, "loss": 0.8375, "step": 22205 }, { "epoch": 0.8048276611938676, "grad_norm": 2.204256907714168, "learning_rate": 9.661218628571023e-07, "loss": 0.8238, "step": 22206 }, { "epoch": 0.8048639048965243, "grad_norm": 2.2894718538709986, "learning_rate": 9.65775099386967e-07, "loss": 0.787, "step": 22207 }, { "epoch": 0.8049001485991809, "grad_norm": 2.1303763897006367, "learning_rate": 9.654283915060114e-07, "loss": 0.9185, "step": 22208 }, { "epoch": 0.8049363923018376, "grad_norm": 2.106793210342951, "learning_rate": 9.650817392190137e-07, "loss": 0.8913, "step": 22209 }, { "epoch": 0.8049726360044942, "grad_norm": 2.773762217681576, "learning_rate": 9.647351425307484e-07, "loss": 0.9335, "step": 22210 }, { "epoch": 0.8050088797071508, "grad_norm": 2.1112755366822147, "learning_rate": 9.64388601445993e-07, "loss": 0.8611, "step": 22211 }, { "epoch": 0.8050451234098075, "grad_norm": 2.2779821146537085, "learning_rate": 9.6404211596952e-07, "loss": 0.8276, "step": 22212 }, { "epoch": 0.8050813671124643, "grad_norm": 2.1768536150292426, "learning_rate": 9.63695686106108e-07, "loss": 0.7852, "step": 22213 }, { "epoch": 0.8051176108151209, "grad_norm": 2.28772481514949, "learning_rate": 9.633493118605274e-07, "loss": 0.8677, "step": 22214 }, { "epoch": 0.8051538545177775, "grad_norm": 2.7241079514626243, "learning_rate": 9.630029932375534e-07, "loss": 0.9006, "step": 22215 }, { "epoch": 0.8051900982204342, "grad_norm": 2.250235000137492, "learning_rate": 9.626567302419538e-07, "loss": 0.7384, "step": 22216 }, { "epoch": 0.8052263419230908, "grad_norm": 2.695939979163774, "learning_rate": 9.623105228785052e-07, "loss": 1.0475, "step": 22217 }, { "epoch": 0.8052625856257475, "grad_norm": 2.2531416880174353, "learning_rate": 9.619643711519743e-07, "loss": 0.7931, "step": 22218 }, { "epoch": 0.8052988293284042, "grad_norm": 2.2654940918456794, "learning_rate": 9.616182750671343e-07, "loss": 0.8494, "step": 22219 }, { "epoch": 0.8053350730310609, "grad_norm": 2.1198316498167213, "learning_rate": 9.6127223462875e-07, "loss": 0.8397, "step": 22220 }, { "epoch": 0.8053713167337175, "grad_norm": 2.6206204458176443, "learning_rate": 9.609262498415923e-07, "loss": 0.9545, "step": 22221 }, { "epoch": 0.8054075604363742, "grad_norm": 2.54975669596169, "learning_rate": 9.605803207104291e-07, "loss": 0.9754, "step": 22222 }, { "epoch": 0.8054438041390308, "grad_norm": 2.407233166918051, "learning_rate": 9.602344472400238e-07, "loss": 0.7236, "step": 22223 }, { "epoch": 0.8054800478416875, "grad_norm": 2.2973344894883896, "learning_rate": 9.598886294351467e-07, "loss": 0.8737, "step": 22224 }, { "epoch": 0.8055162915443441, "grad_norm": 2.2974274840891327, "learning_rate": 9.595428673005602e-07, "loss": 0.8933, "step": 22225 }, { "epoch": 0.8055525352470009, "grad_norm": 2.313691121848001, "learning_rate": 9.591971608410306e-07, "loss": 0.9483, "step": 22226 }, { "epoch": 0.8055887789496575, "grad_norm": 2.373883024552536, "learning_rate": 9.588515100613187e-07, "loss": 0.838, "step": 22227 }, { "epoch": 0.8056250226523142, "grad_norm": 2.1875429622167193, "learning_rate": 9.58505914966189e-07, "loss": 0.9579, "step": 22228 }, { "epoch": 0.8056612663549708, "grad_norm": 2.4583120411762245, "learning_rate": 9.581603755604052e-07, "loss": 0.8109, "step": 22229 }, { "epoch": 0.8056975100576275, "grad_norm": 2.3385802700770975, "learning_rate": 9.578148918487256e-07, "loss": 0.8281, "step": 22230 }, { "epoch": 0.8057337537602841, "grad_norm": 2.2411275991616137, "learning_rate": 9.574694638359123e-07, "loss": 0.9532, "step": 22231 }, { "epoch": 0.8057699974629409, "grad_norm": 2.19749799070348, "learning_rate": 9.571240915267244e-07, "loss": 0.89, "step": 22232 }, { "epoch": 0.8058062411655975, "grad_norm": 2.464721029141548, "learning_rate": 9.567787749259232e-07, "loss": 1.0709, "step": 22233 }, { "epoch": 0.8058424848682542, "grad_norm": 2.3750011057531366, "learning_rate": 9.564335140382642e-07, "loss": 0.848, "step": 22234 }, { "epoch": 0.8058787285709108, "grad_norm": 2.1926899856781947, "learning_rate": 9.560883088685052e-07, "loss": 0.7183, "step": 22235 }, { "epoch": 0.8059149722735675, "grad_norm": 2.4697202968914334, "learning_rate": 9.557431594214056e-07, "loss": 0.8854, "step": 22236 }, { "epoch": 0.8059512159762241, "grad_norm": 1.9272678024179486, "learning_rate": 9.553980657017175e-07, "loss": 0.7039, "step": 22237 }, { "epoch": 0.8059874596788807, "grad_norm": 2.4808006874102753, "learning_rate": 9.550530277141995e-07, "loss": 1.0065, "step": 22238 }, { "epoch": 0.8060237033815375, "grad_norm": 2.119892192998389, "learning_rate": 9.54708045463602e-07, "loss": 0.9624, "step": 22239 }, { "epoch": 0.8060599470841942, "grad_norm": 1.9795313286436396, "learning_rate": 9.543631189546836e-07, "loss": 0.8597, "step": 22240 }, { "epoch": 0.8060961907868508, "grad_norm": 2.189020249510214, "learning_rate": 9.540182481921933e-07, "loss": 0.8283, "step": 22241 }, { "epoch": 0.8061324344895074, "grad_norm": 2.8736886773643433, "learning_rate": 9.536734331808862e-07, "loss": 0.9498, "step": 22242 }, { "epoch": 0.8061686781921641, "grad_norm": 2.1923908438322357, "learning_rate": 9.533286739255094e-07, "loss": 0.8317, "step": 22243 }, { "epoch": 0.8062049218948207, "grad_norm": 2.3459155120349857, "learning_rate": 9.52983970430818e-07, "loss": 0.7645, "step": 22244 }, { "epoch": 0.8062411655974775, "grad_norm": 2.412610489211905, "learning_rate": 9.526393227015596e-07, "loss": 0.9168, "step": 22245 }, { "epoch": 0.8062774093001341, "grad_norm": 2.3904089556006762, "learning_rate": 9.522947307424845e-07, "loss": 0.8703, "step": 22246 }, { "epoch": 0.8063136530027908, "grad_norm": 2.2235985568012047, "learning_rate": 9.519501945583387e-07, "loss": 1.0892, "step": 22247 }, { "epoch": 0.8063498967054474, "grad_norm": 2.465560154331822, "learning_rate": 9.516057141538709e-07, "loss": 1.0312, "step": 22248 }, { "epoch": 0.8063861404081041, "grad_norm": 2.321553742923016, "learning_rate": 9.512612895338291e-07, "loss": 0.897, "step": 22249 }, { "epoch": 0.8064223841107607, "grad_norm": 2.750529220331682, "learning_rate": 9.509169207029572e-07, "loss": 0.896, "step": 22250 }, { "epoch": 0.8064586278134174, "grad_norm": 2.114276326405148, "learning_rate": 9.505726076660016e-07, "loss": 0.8246, "step": 22251 }, { "epoch": 0.8064948715160741, "grad_norm": 3.143081832767298, "learning_rate": 9.50228350427706e-07, "loss": 1.021, "step": 22252 }, { "epoch": 0.8065311152187308, "grad_norm": 2.622963200630436, "learning_rate": 9.498841489928162e-07, "loss": 1.0225, "step": 22253 }, { "epoch": 0.8065673589213874, "grad_norm": 2.237257918715645, "learning_rate": 9.495400033660718e-07, "loss": 0.706, "step": 22254 }, { "epoch": 0.8066036026240441, "grad_norm": 1.9828106194333688, "learning_rate": 9.491959135522166e-07, "loss": 0.7608, "step": 22255 }, { "epoch": 0.8066398463267007, "grad_norm": 2.29231181526192, "learning_rate": 9.488518795559931e-07, "loss": 0.7692, "step": 22256 }, { "epoch": 0.8066760900293574, "grad_norm": 2.165424499617104, "learning_rate": 9.485079013821391e-07, "loss": 0.6802, "step": 22257 }, { "epoch": 0.806712333732014, "grad_norm": 2.5765499604147997, "learning_rate": 9.481639790353975e-07, "loss": 0.9015, "step": 22258 }, { "epoch": 0.8067485774346708, "grad_norm": 2.458520790198623, "learning_rate": 9.478201125205033e-07, "loss": 0.826, "step": 22259 }, { "epoch": 0.8067848211373274, "grad_norm": 2.14535331899958, "learning_rate": 9.474763018421995e-07, "loss": 0.8591, "step": 22260 }, { "epoch": 0.8068210648399841, "grad_norm": 2.581197288949268, "learning_rate": 9.471325470052206e-07, "loss": 0.8234, "step": 22261 }, { "epoch": 0.8068573085426407, "grad_norm": 2.417978039819911, "learning_rate": 9.467888480143051e-07, "loss": 0.9849, "step": 22262 }, { "epoch": 0.8068935522452974, "grad_norm": 2.405774334669121, "learning_rate": 9.464452048741857e-07, "loss": 1.1471, "step": 22263 }, { "epoch": 0.806929795947954, "grad_norm": 2.362214810581002, "learning_rate": 9.46101617589602e-07, "loss": 0.8622, "step": 22264 }, { "epoch": 0.8069660396506108, "grad_norm": 2.5706107695374225, "learning_rate": 9.457580861652848e-07, "loss": 0.885, "step": 22265 }, { "epoch": 0.8070022833532674, "grad_norm": 2.3873231423946586, "learning_rate": 9.454146106059714e-07, "loss": 0.9345, "step": 22266 }, { "epoch": 0.807038527055924, "grad_norm": 2.9472817697495386, "learning_rate": 9.450711909163906e-07, "loss": 1.1576, "step": 22267 }, { "epoch": 0.8070747707585807, "grad_norm": 2.6012068289445662, "learning_rate": 9.447278271012766e-07, "loss": 0.9083, "step": 22268 }, { "epoch": 0.8071110144612373, "grad_norm": 2.20631568486452, "learning_rate": 9.443845191653617e-07, "loss": 0.8606, "step": 22269 }, { "epoch": 0.807147258163894, "grad_norm": 2.264355081201818, "learning_rate": 9.440412671133742e-07, "loss": 0.9816, "step": 22270 }, { "epoch": 0.8071835018665506, "grad_norm": 2.481817513966419, "learning_rate": 9.436980709500454e-07, "loss": 0.6539, "step": 22271 }, { "epoch": 0.8072197455692074, "grad_norm": 2.136065731087902, "learning_rate": 9.433549306801037e-07, "loss": 0.9337, "step": 22272 }, { "epoch": 0.807255989271864, "grad_norm": 2.4760887151015147, "learning_rate": 9.430118463082788e-07, "loss": 0.8111, "step": 22273 }, { "epoch": 0.8072922329745207, "grad_norm": 2.0110632041314913, "learning_rate": 9.426688178392962e-07, "loss": 0.6869, "step": 22274 }, { "epoch": 0.8073284766771773, "grad_norm": 2.207925416337867, "learning_rate": 9.423258452778833e-07, "loss": 0.6647, "step": 22275 }, { "epoch": 0.807364720379834, "grad_norm": 1.959601300034279, "learning_rate": 9.419829286287674e-07, "loss": 0.7039, "step": 22276 }, { "epoch": 0.8074009640824906, "grad_norm": 2.70896227654765, "learning_rate": 9.416400678966714e-07, "loss": 0.7967, "step": 22277 }, { "epoch": 0.8074372077851474, "grad_norm": 2.3710404206213287, "learning_rate": 9.412972630863221e-07, "loss": 0.9349, "step": 22278 }, { "epoch": 0.807473451487804, "grad_norm": 2.4694517475290714, "learning_rate": 9.409545142024395e-07, "loss": 0.9434, "step": 22279 }, { "epoch": 0.8075096951904607, "grad_norm": 2.325536433453654, "learning_rate": 9.406118212497511e-07, "loss": 0.9157, "step": 22280 }, { "epoch": 0.8075459388931173, "grad_norm": 2.3439285280182465, "learning_rate": 9.402691842329758e-07, "loss": 0.7745, "step": 22281 }, { "epoch": 0.807582182595774, "grad_norm": 2.4044482017028326, "learning_rate": 9.399266031568372e-07, "loss": 1.0483, "step": 22282 }, { "epoch": 0.8076184262984306, "grad_norm": 2.376195916643234, "learning_rate": 9.395840780260518e-07, "loss": 1.0025, "step": 22283 }, { "epoch": 0.8076546700010873, "grad_norm": 2.3345791786307464, "learning_rate": 9.392416088453443e-07, "loss": 0.7873, "step": 22284 }, { "epoch": 0.807690913703744, "grad_norm": 2.185532564236829, "learning_rate": 9.38899195619431e-07, "loss": 0.8376, "step": 22285 }, { "epoch": 0.8077271574064007, "grad_norm": 2.47515623996743, "learning_rate": 9.385568383530314e-07, "loss": 1.0269, "step": 22286 }, { "epoch": 0.8077634011090573, "grad_norm": 2.1854320543163515, "learning_rate": 9.382145370508617e-07, "loss": 0.9353, "step": 22287 }, { "epoch": 0.807799644811714, "grad_norm": 2.2814378898400536, "learning_rate": 9.378722917176386e-07, "loss": 0.9299, "step": 22288 }, { "epoch": 0.8078358885143706, "grad_norm": 2.4919173184699246, "learning_rate": 9.375301023580802e-07, "loss": 0.8466, "step": 22289 }, { "epoch": 0.8078721322170273, "grad_norm": 2.2853218602693777, "learning_rate": 9.371879689768986e-07, "loss": 0.8873, "step": 22290 }, { "epoch": 0.807908375919684, "grad_norm": 2.432162620552823, "learning_rate": 9.368458915788098e-07, "loss": 0.9736, "step": 22291 }, { "epoch": 0.8079446196223407, "grad_norm": 2.276629812391843, "learning_rate": 9.36503870168527e-07, "loss": 0.887, "step": 22292 }, { "epoch": 0.8079808633249973, "grad_norm": 2.24661478866386, "learning_rate": 9.361619047507647e-07, "loss": 0.8204, "step": 22293 }, { "epoch": 0.808017107027654, "grad_norm": 2.4309809367112676, "learning_rate": 9.358199953302327e-07, "loss": 0.9658, "step": 22294 }, { "epoch": 0.8080533507303106, "grad_norm": 2.1746130521413076, "learning_rate": 9.354781419116432e-07, "loss": 0.8482, "step": 22295 }, { "epoch": 0.8080895944329672, "grad_norm": 2.2641084635053508, "learning_rate": 9.35136344499708e-07, "loss": 0.9346, "step": 22296 }, { "epoch": 0.8081258381356239, "grad_norm": 2.3013652267155194, "learning_rate": 9.347946030991339e-07, "loss": 0.8815, "step": 22297 }, { "epoch": 0.8081620818382806, "grad_norm": 2.5328961947392905, "learning_rate": 9.344529177146333e-07, "loss": 0.9429, "step": 22298 }, { "epoch": 0.8081983255409373, "grad_norm": 2.4407201146996016, "learning_rate": 9.341112883509101e-07, "loss": 0.7749, "step": 22299 }, { "epoch": 0.8082345692435939, "grad_norm": 2.1124345655068137, "learning_rate": 9.337697150126773e-07, "loss": 0.7776, "step": 22300 }, { "epoch": 0.8082708129462506, "grad_norm": 2.577813379672096, "learning_rate": 9.334281977046367e-07, "loss": 0.8952, "step": 22301 }, { "epoch": 0.8083070566489072, "grad_norm": 2.118604998446966, "learning_rate": 9.330867364314977e-07, "loss": 0.6131, "step": 22302 }, { "epoch": 0.8083433003515639, "grad_norm": 2.2757319907487386, "learning_rate": 9.327453311979618e-07, "loss": 0.8731, "step": 22303 }, { "epoch": 0.8083795440542206, "grad_norm": 2.1150386830104932, "learning_rate": 9.324039820087371e-07, "loss": 0.8835, "step": 22304 }, { "epoch": 0.8084157877568773, "grad_norm": 2.643413618440071, "learning_rate": 9.320626888685247e-07, "loss": 1.0002, "step": 22305 }, { "epoch": 0.8084520314595339, "grad_norm": 2.281791107974015, "learning_rate": 9.317214517820284e-07, "loss": 0.7579, "step": 22306 }, { "epoch": 0.8084882751621906, "grad_norm": 2.3236909246013386, "learning_rate": 9.31380270753951e-07, "loss": 0.8069, "step": 22307 }, { "epoch": 0.8085245188648472, "grad_norm": 2.233638847789308, "learning_rate": 9.310391457889922e-07, "loss": 0.6797, "step": 22308 }, { "epoch": 0.8085607625675039, "grad_norm": 2.5554387716776072, "learning_rate": 9.306980768918539e-07, "loss": 0.9566, "step": 22309 }, { "epoch": 0.8085970062701605, "grad_norm": 2.3737671791774098, "learning_rate": 9.303570640672327e-07, "loss": 0.7621, "step": 22310 }, { "epoch": 0.8086332499728173, "grad_norm": 2.4518751830680836, "learning_rate": 9.300161073198322e-07, "loss": 0.7793, "step": 22311 }, { "epoch": 0.8086694936754739, "grad_norm": 2.5869624225719554, "learning_rate": 9.296752066543468e-07, "loss": 1.031, "step": 22312 }, { "epoch": 0.8087057373781306, "grad_norm": 2.312759886062942, "learning_rate": 9.293343620754775e-07, "loss": 0.7324, "step": 22313 }, { "epoch": 0.8087419810807872, "grad_norm": 2.25283403619365, "learning_rate": 9.289935735879169e-07, "loss": 0.8672, "step": 22314 }, { "epoch": 0.8087782247834439, "grad_norm": 2.627487292758329, "learning_rate": 9.28652841196363e-07, "loss": 0.9701, "step": 22315 }, { "epoch": 0.8088144684861005, "grad_norm": 2.2006546983318436, "learning_rate": 9.283121649055121e-07, "loss": 0.8944, "step": 22316 }, { "epoch": 0.8088507121887571, "grad_norm": 2.4757848480569757, "learning_rate": 9.279715447200555e-07, "loss": 0.8147, "step": 22317 }, { "epoch": 0.8088869558914139, "grad_norm": 2.2835288894039096, "learning_rate": 9.276309806446887e-07, "loss": 0.7638, "step": 22318 }, { "epoch": 0.8089231995940706, "grad_norm": 2.403972195654474, "learning_rate": 9.272904726841037e-07, "loss": 0.8577, "step": 22319 }, { "epoch": 0.8089594432967272, "grad_norm": 2.3153057123919147, "learning_rate": 9.269500208429943e-07, "loss": 0.9697, "step": 22320 }, { "epoch": 0.8089956869993838, "grad_norm": 2.4371334746126783, "learning_rate": 9.266096251260492e-07, "loss": 1.0238, "step": 22321 }, { "epoch": 0.8090319307020405, "grad_norm": 2.3531687398338117, "learning_rate": 9.262692855379601e-07, "loss": 0.9582, "step": 22322 }, { "epoch": 0.8090681744046971, "grad_norm": 2.323033485214259, "learning_rate": 9.259290020834171e-07, "loss": 0.781, "step": 22323 }, { "epoch": 0.8091044181073539, "grad_norm": 2.396906267086775, "learning_rate": 9.255887747671083e-07, "loss": 0.9088, "step": 22324 }, { "epoch": 0.8091406618100105, "grad_norm": 2.277773813981084, "learning_rate": 9.252486035937225e-07, "loss": 0.9343, "step": 22325 }, { "epoch": 0.8091769055126672, "grad_norm": 2.6805748879746867, "learning_rate": 9.249084885679444e-07, "loss": 0.8431, "step": 22326 }, { "epoch": 0.8092131492153238, "grad_norm": 2.1774174326493765, "learning_rate": 9.245684296944651e-07, "loss": 0.8247, "step": 22327 }, { "epoch": 0.8092493929179805, "grad_norm": 2.3379635616405565, "learning_rate": 9.242284269779673e-07, "loss": 0.8493, "step": 22328 }, { "epoch": 0.8092856366206371, "grad_norm": 2.4611407408161368, "learning_rate": 9.238884804231374e-07, "loss": 0.8524, "step": 22329 }, { "epoch": 0.8093218803232938, "grad_norm": 2.3355806500887346, "learning_rate": 9.235485900346569e-07, "loss": 0.7805, "step": 22330 }, { "epoch": 0.8093581240259505, "grad_norm": 2.3707763297067306, "learning_rate": 9.232087558172142e-07, "loss": 0.8891, "step": 22331 }, { "epoch": 0.8093943677286072, "grad_norm": 2.479672610626586, "learning_rate": 9.228689777754879e-07, "loss": 0.9292, "step": 22332 }, { "epoch": 0.8094306114312638, "grad_norm": 2.052495836828689, "learning_rate": 9.225292559141624e-07, "loss": 0.9477, "step": 22333 }, { "epoch": 0.8094668551339205, "grad_norm": 2.384971401819944, "learning_rate": 9.221895902379169e-07, "loss": 0.9511, "step": 22334 }, { "epoch": 0.8095030988365771, "grad_norm": 1.9797184004715656, "learning_rate": 9.218499807514325e-07, "loss": 0.8825, "step": 22335 }, { "epoch": 0.8095393425392338, "grad_norm": 2.499070848449559, "learning_rate": 9.2151042745939e-07, "loss": 0.8977, "step": 22336 }, { "epoch": 0.8095755862418905, "grad_norm": 2.557249597747146, "learning_rate": 9.211709303664662e-07, "loss": 1.0144, "step": 22337 }, { "epoch": 0.8096118299445472, "grad_norm": 2.2217613622417636, "learning_rate": 9.208314894773401e-07, "loss": 0.6685, "step": 22338 }, { "epoch": 0.8096480736472038, "grad_norm": 2.4801931079976396, "learning_rate": 9.204921047966886e-07, "loss": 0.9129, "step": 22339 }, { "epoch": 0.8096843173498605, "grad_norm": 2.468953790716037, "learning_rate": 9.201527763291907e-07, "loss": 0.8849, "step": 22340 }, { "epoch": 0.8097205610525171, "grad_norm": 2.3491934862268256, "learning_rate": 9.198135040795186e-07, "loss": 0.7626, "step": 22341 }, { "epoch": 0.8097568047551738, "grad_norm": 2.2720155382199483, "learning_rate": 9.194742880523488e-07, "loss": 0.8879, "step": 22342 }, { "epoch": 0.8097930484578304, "grad_norm": 2.2633358998398143, "learning_rate": 9.191351282523564e-07, "loss": 0.8907, "step": 22343 }, { "epoch": 0.8098292921604872, "grad_norm": 2.6472434921176142, "learning_rate": 9.187960246842132e-07, "loss": 0.8379, "step": 22344 }, { "epoch": 0.8098655358631438, "grad_norm": 2.2137430307203654, "learning_rate": 9.184569773525936e-07, "loss": 0.9871, "step": 22345 }, { "epoch": 0.8099017795658005, "grad_norm": 2.2286881535795775, "learning_rate": 9.181179862621659e-07, "loss": 0.7838, "step": 22346 }, { "epoch": 0.8099380232684571, "grad_norm": 2.1713634615091837, "learning_rate": 9.17779051417606e-07, "loss": 0.778, "step": 22347 }, { "epoch": 0.8099742669711137, "grad_norm": 2.3553662744534702, "learning_rate": 9.17440172823581e-07, "loss": 0.8157, "step": 22348 }, { "epoch": 0.8100105106737704, "grad_norm": 2.3475150764626864, "learning_rate": 9.171013504847626e-07, "loss": 1.027, "step": 22349 }, { "epoch": 0.8100467543764271, "grad_norm": 2.3273151916919366, "learning_rate": 9.167625844058154e-07, "loss": 1.011, "step": 22350 }, { "epoch": 0.8100829980790838, "grad_norm": 2.2650606400743722, "learning_rate": 9.164238745914134e-07, "loss": 0.9252, "step": 22351 }, { "epoch": 0.8101192417817404, "grad_norm": 2.204828302231696, "learning_rate": 9.16085221046219e-07, "loss": 0.8653, "step": 22352 }, { "epoch": 0.8101554854843971, "grad_norm": 2.1558442783856133, "learning_rate": 9.157466237749019e-07, "loss": 0.7581, "step": 22353 }, { "epoch": 0.8101917291870537, "grad_norm": 2.3480289804565024, "learning_rate": 9.154080827821249e-07, "loss": 0.9218, "step": 22354 }, { "epoch": 0.8102279728897104, "grad_norm": 2.244320027823289, "learning_rate": 9.150695980725544e-07, "loss": 0.7236, "step": 22355 }, { "epoch": 0.810264216592367, "grad_norm": 2.6118366827101442, "learning_rate": 9.147311696508554e-07, "loss": 0.8284, "step": 22356 }, { "epoch": 0.8103004602950238, "grad_norm": 2.4855852592264567, "learning_rate": 9.143927975216893e-07, "loss": 1.2225, "step": 22357 }, { "epoch": 0.8103367039976804, "grad_norm": 2.3548077412311765, "learning_rate": 9.14054481689719e-07, "loss": 0.6881, "step": 22358 }, { "epoch": 0.8103729477003371, "grad_norm": 2.4704457067967165, "learning_rate": 9.137162221596074e-07, "loss": 0.8317, "step": 22359 }, { "epoch": 0.8104091914029937, "grad_norm": 2.277863490191971, "learning_rate": 9.133780189360159e-07, "loss": 0.8988, "step": 22360 }, { "epoch": 0.8104454351056504, "grad_norm": 2.067933761751362, "learning_rate": 9.130398720236028e-07, "loss": 0.8952, "step": 22361 }, { "epoch": 0.810481678808307, "grad_norm": 2.8379703265867686, "learning_rate": 9.127017814270278e-07, "loss": 0.8422, "step": 22362 }, { "epoch": 0.8105179225109638, "grad_norm": 2.3840625269940126, "learning_rate": 9.123637471509522e-07, "loss": 1.1025, "step": 22363 }, { "epoch": 0.8105541662136204, "grad_norm": 2.6089424071678593, "learning_rate": 9.120257692000312e-07, "loss": 0.9064, "step": 22364 }, { "epoch": 0.8105904099162771, "grad_norm": 2.2487163504994716, "learning_rate": 9.116878475789231e-07, "loss": 0.8809, "step": 22365 }, { "epoch": 0.8106266536189337, "grad_norm": 2.499426012471774, "learning_rate": 9.113499822922816e-07, "loss": 0.8263, "step": 22366 }, { "epoch": 0.8106628973215904, "grad_norm": 2.493305065275136, "learning_rate": 9.110121733447674e-07, "loss": 0.9241, "step": 22367 }, { "epoch": 0.810699141024247, "grad_norm": 2.5395279963226143, "learning_rate": 9.106744207410317e-07, "loss": 0.8671, "step": 22368 }, { "epoch": 0.8107353847269037, "grad_norm": 2.5189723073996686, "learning_rate": 9.103367244857298e-07, "loss": 0.8955, "step": 22369 }, { "epoch": 0.8107716284295604, "grad_norm": 2.336367857502815, "learning_rate": 9.099990845835121e-07, "loss": 0.8535, "step": 22370 }, { "epoch": 0.8108078721322171, "grad_norm": 2.409839739645847, "learning_rate": 9.096615010390364e-07, "loss": 1.0358, "step": 22371 }, { "epoch": 0.8108441158348737, "grad_norm": 2.4434382341718925, "learning_rate": 9.093239738569499e-07, "loss": 0.9697, "step": 22372 }, { "epoch": 0.8108803595375303, "grad_norm": 2.200803036572468, "learning_rate": 9.089865030419065e-07, "loss": 0.7919, "step": 22373 }, { "epoch": 0.810916603240187, "grad_norm": 2.1752874860248785, "learning_rate": 9.086490885985539e-07, "loss": 0.8617, "step": 22374 }, { "epoch": 0.8109528469428436, "grad_norm": 2.224491927913118, "learning_rate": 9.08311730531542e-07, "loss": 0.7104, "step": 22375 }, { "epoch": 0.8109890906455004, "grad_norm": 2.4526590290892756, "learning_rate": 9.079744288455211e-07, "loss": 0.876, "step": 22376 }, { "epoch": 0.811025334348157, "grad_norm": 2.276767369461406, "learning_rate": 9.07637183545137e-07, "loss": 0.821, "step": 22377 }, { "epoch": 0.8110615780508137, "grad_norm": 2.782939235071973, "learning_rate": 9.07299994635037e-07, "loss": 0.8891, "step": 22378 }, { "epoch": 0.8110978217534703, "grad_norm": 2.0725137847504285, "learning_rate": 9.069628621198684e-07, "loss": 0.8941, "step": 22379 }, { "epoch": 0.811134065456127, "grad_norm": 2.548986962211652, "learning_rate": 9.066257860042771e-07, "loss": 0.8617, "step": 22380 }, { "epoch": 0.8111703091587836, "grad_norm": 2.7994282965705, "learning_rate": 9.062887662929054e-07, "loss": 0.8426, "step": 22381 }, { "epoch": 0.8112065528614403, "grad_norm": 2.560989736090542, "learning_rate": 9.05951802990399e-07, "loss": 0.8773, "step": 22382 }, { "epoch": 0.811242796564097, "grad_norm": 2.482262725744008, "learning_rate": 9.056148961014022e-07, "loss": 0.994, "step": 22383 }, { "epoch": 0.8112790402667537, "grad_norm": 2.6245782165698746, "learning_rate": 9.052780456305543e-07, "loss": 0.7906, "step": 22384 }, { "epoch": 0.8113152839694103, "grad_norm": 2.40639410446944, "learning_rate": 9.049412515824985e-07, "loss": 0.9814, "step": 22385 }, { "epoch": 0.811351527672067, "grad_norm": 2.480451743817031, "learning_rate": 9.046045139618758e-07, "loss": 0.864, "step": 22386 }, { "epoch": 0.8113877713747236, "grad_norm": 2.494086175763546, "learning_rate": 9.042678327733279e-07, "loss": 0.8899, "step": 22387 }, { "epoch": 0.8114240150773803, "grad_norm": 2.2509735305107355, "learning_rate": 9.039312080214901e-07, "loss": 1.0015, "step": 22388 }, { "epoch": 0.8114602587800369, "grad_norm": 2.450590443952479, "learning_rate": 9.035946397110034e-07, "loss": 0.6984, "step": 22389 }, { "epoch": 0.8114965024826937, "grad_norm": 2.152269753265812, "learning_rate": 9.032581278465053e-07, "loss": 0.9195, "step": 22390 }, { "epoch": 0.8115327461853503, "grad_norm": 2.3953167087807925, "learning_rate": 9.029216724326334e-07, "loss": 0.9727, "step": 22391 }, { "epoch": 0.811568989888007, "grad_norm": 1.972875255190591, "learning_rate": 9.025852734740221e-07, "loss": 0.7802, "step": 22392 }, { "epoch": 0.8116052335906636, "grad_norm": 2.2859317511762396, "learning_rate": 9.022489309753075e-07, "loss": 0.6989, "step": 22393 }, { "epoch": 0.8116414772933203, "grad_norm": 2.195681675511912, "learning_rate": 9.019126449411258e-07, "loss": 0.8549, "step": 22394 }, { "epoch": 0.8116777209959769, "grad_norm": 2.3216374229449346, "learning_rate": 9.015764153761081e-07, "loss": 0.7481, "step": 22395 }, { "epoch": 0.8117139646986337, "grad_norm": 2.4812033946397984, "learning_rate": 9.012402422848903e-07, "loss": 1.1642, "step": 22396 }, { "epoch": 0.8117502084012903, "grad_norm": 2.439935837259854, "learning_rate": 9.009041256721002e-07, "loss": 0.9136, "step": 22397 }, { "epoch": 0.811786452103947, "grad_norm": 2.409292037988764, "learning_rate": 9.005680655423749e-07, "loss": 0.9505, "step": 22398 }, { "epoch": 0.8118226958066036, "grad_norm": 2.3735554802521612, "learning_rate": 9.002320619003408e-07, "loss": 1.0079, "step": 22399 }, { "epoch": 0.8118589395092602, "grad_norm": 2.3916864427843856, "learning_rate": 8.998961147506313e-07, "loss": 0.9801, "step": 22400 }, { "epoch": 0.8118951832119169, "grad_norm": 2.27046337592941, "learning_rate": 8.995602240978718e-07, "loss": 0.8287, "step": 22401 }, { "epoch": 0.8119314269145735, "grad_norm": 2.1550496749891863, "learning_rate": 8.992243899466929e-07, "loss": 0.8929, "step": 22402 }, { "epoch": 0.8119676706172303, "grad_norm": 2.2678365639773643, "learning_rate": 8.988886123017232e-07, "loss": 0.8644, "step": 22403 }, { "epoch": 0.8120039143198869, "grad_norm": 2.270126662082303, "learning_rate": 8.985528911675867e-07, "loss": 0.7181, "step": 22404 }, { "epoch": 0.8120401580225436, "grad_norm": 2.2110613720941976, "learning_rate": 8.982172265489109e-07, "loss": 0.7205, "step": 22405 }, { "epoch": 0.8120764017252002, "grad_norm": 2.307390423939867, "learning_rate": 8.978816184503209e-07, "loss": 1.1005, "step": 22406 }, { "epoch": 0.8121126454278569, "grad_norm": 2.493915971635255, "learning_rate": 8.975460668764424e-07, "loss": 0.8927, "step": 22407 }, { "epoch": 0.8121488891305135, "grad_norm": 2.542438168047068, "learning_rate": 8.972105718318974e-07, "loss": 1.0159, "step": 22408 }, { "epoch": 0.8121851328331703, "grad_norm": 2.1551422630940333, "learning_rate": 8.968751333213089e-07, "loss": 0.949, "step": 22409 }, { "epoch": 0.8122213765358269, "grad_norm": 2.3070712599076315, "learning_rate": 8.965397513493012e-07, "loss": 0.9808, "step": 22410 }, { "epoch": 0.8122576202384836, "grad_norm": 2.263821262174595, "learning_rate": 8.962044259204921e-07, "loss": 0.8795, "step": 22411 }, { "epoch": 0.8122938639411402, "grad_norm": 2.3291874092007325, "learning_rate": 8.958691570395061e-07, "loss": 1.036, "step": 22412 }, { "epoch": 0.8123301076437969, "grad_norm": 2.4284388244943638, "learning_rate": 8.955339447109584e-07, "loss": 0.9088, "step": 22413 }, { "epoch": 0.8123663513464535, "grad_norm": 2.2918905266365237, "learning_rate": 8.951987889394731e-07, "loss": 0.8031, "step": 22414 }, { "epoch": 0.8124025950491102, "grad_norm": 2.52537239805253, "learning_rate": 8.948636897296648e-07, "loss": 1.0103, "step": 22415 }, { "epoch": 0.8124388387517669, "grad_norm": 2.446249795228973, "learning_rate": 8.94528647086153e-07, "loss": 0.8055, "step": 22416 }, { "epoch": 0.8124750824544236, "grad_norm": 2.2826161904970994, "learning_rate": 8.94193661013551e-07, "loss": 1.0424, "step": 22417 }, { "epoch": 0.8125113261570802, "grad_norm": 2.2572822065287705, "learning_rate": 8.938587315164804e-07, "loss": 0.8783, "step": 22418 }, { "epoch": 0.8125475698597369, "grad_norm": 2.0586667013578346, "learning_rate": 8.935238585995515e-07, "loss": 0.8884, "step": 22419 }, { "epoch": 0.8125838135623935, "grad_norm": 2.450123054235613, "learning_rate": 8.931890422673817e-07, "loss": 0.8756, "step": 22420 }, { "epoch": 0.8126200572650502, "grad_norm": 2.5111851578383795, "learning_rate": 8.928542825245817e-07, "loss": 0.7964, "step": 22421 }, { "epoch": 0.8126563009677069, "grad_norm": 2.575186759764968, "learning_rate": 8.925195793757657e-07, "loss": 0.8748, "step": 22422 }, { "epoch": 0.8126925446703636, "grad_norm": 2.7997917421578147, "learning_rate": 8.921849328255472e-07, "loss": 0.8869, "step": 22423 }, { "epoch": 0.8127287883730202, "grad_norm": 2.3868504209233197, "learning_rate": 8.918503428785347e-07, "loss": 0.9129, "step": 22424 }, { "epoch": 0.8127650320756769, "grad_norm": 2.4460484935375253, "learning_rate": 8.9151580953934e-07, "loss": 0.837, "step": 22425 }, { "epoch": 0.8128012757783335, "grad_norm": 2.3007702096318754, "learning_rate": 8.911813328125729e-07, "loss": 0.7986, "step": 22426 }, { "epoch": 0.8128375194809901, "grad_norm": 2.3516136405616472, "learning_rate": 8.90846912702843e-07, "loss": 0.758, "step": 22427 }, { "epoch": 0.8128737631836468, "grad_norm": 2.260693238497062, "learning_rate": 8.905125492147565e-07, "loss": 0.9313, "step": 22428 }, { "epoch": 0.8129100068863035, "grad_norm": 2.1201933220920983, "learning_rate": 8.901782423529215e-07, "loss": 0.8975, "step": 22429 }, { "epoch": 0.8129462505889602, "grad_norm": 2.197548173515121, "learning_rate": 8.898439921219465e-07, "loss": 0.8979, "step": 22430 }, { "epoch": 0.8129824942916168, "grad_norm": 2.4908635044193246, "learning_rate": 8.895097985264339e-07, "loss": 1.0334, "step": 22431 }, { "epoch": 0.8130187379942735, "grad_norm": 2.4219331029490188, "learning_rate": 8.891756615709918e-07, "loss": 0.8467, "step": 22432 }, { "epoch": 0.8130549816969301, "grad_norm": 2.4167797814788754, "learning_rate": 8.888415812602202e-07, "loss": 0.9492, "step": 22433 }, { "epoch": 0.8130912253995868, "grad_norm": 2.288428573750232, "learning_rate": 8.885075575987278e-07, "loss": 0.8579, "step": 22434 }, { "epoch": 0.8131274691022435, "grad_norm": 2.383195438509106, "learning_rate": 8.881735905911137e-07, "loss": 1.0055, "step": 22435 }, { "epoch": 0.8131637128049002, "grad_norm": 2.4199163399052623, "learning_rate": 8.878396802419814e-07, "loss": 0.7917, "step": 22436 }, { "epoch": 0.8131999565075568, "grad_norm": 2.378017628391414, "learning_rate": 8.875058265559295e-07, "loss": 1.1226, "step": 22437 }, { "epoch": 0.8132362002102135, "grad_norm": 2.177182001917117, "learning_rate": 8.871720295375624e-07, "loss": 0.957, "step": 22438 }, { "epoch": 0.8132724439128701, "grad_norm": 2.391374667090737, "learning_rate": 8.868382891914761e-07, "loss": 0.9553, "step": 22439 }, { "epoch": 0.8133086876155268, "grad_norm": 2.798148493724018, "learning_rate": 8.86504605522272e-07, "loss": 0.8851, "step": 22440 }, { "epoch": 0.8133449313181834, "grad_norm": 2.4637669114334724, "learning_rate": 8.861709785345457e-07, "loss": 0.9029, "step": 22441 }, { "epoch": 0.8133811750208402, "grad_norm": 2.3067753254279904, "learning_rate": 8.858374082328947e-07, "loss": 0.9772, "step": 22442 }, { "epoch": 0.8134174187234968, "grad_norm": 2.3907712350050203, "learning_rate": 8.855038946219174e-07, "loss": 0.7279, "step": 22443 }, { "epoch": 0.8134536624261535, "grad_norm": 2.6738429672582686, "learning_rate": 8.851704377062071e-07, "loss": 0.9321, "step": 22444 }, { "epoch": 0.8134899061288101, "grad_norm": 2.7487411013126453, "learning_rate": 8.848370374903597e-07, "loss": 0.9217, "step": 22445 }, { "epoch": 0.8135261498314668, "grad_norm": 2.4781555804300153, "learning_rate": 8.845036939789691e-07, "loss": 0.934, "step": 22446 }, { "epoch": 0.8135623935341234, "grad_norm": 2.1288739239734435, "learning_rate": 8.841704071766294e-07, "loss": 0.8567, "step": 22447 }, { "epoch": 0.8135986372367802, "grad_norm": 2.5681593903422133, "learning_rate": 8.838371770879317e-07, "loss": 0.9538, "step": 22448 }, { "epoch": 0.8136348809394368, "grad_norm": 2.7063530127261513, "learning_rate": 8.83504003717468e-07, "loss": 0.8013, "step": 22449 }, { "epoch": 0.8136711246420935, "grad_norm": 2.459162261101102, "learning_rate": 8.831708870698308e-07, "loss": 0.9882, "step": 22450 }, { "epoch": 0.8137073683447501, "grad_norm": 2.1568701373588017, "learning_rate": 8.82837827149608e-07, "loss": 0.9293, "step": 22451 }, { "epoch": 0.8137436120474068, "grad_norm": 2.349563057405508, "learning_rate": 8.825048239613909e-07, "loss": 0.7893, "step": 22452 }, { "epoch": 0.8137798557500634, "grad_norm": 2.1977878254231875, "learning_rate": 8.821718775097648e-07, "loss": 0.8646, "step": 22453 }, { "epoch": 0.81381609945272, "grad_norm": 2.14207916523666, "learning_rate": 8.818389877993222e-07, "loss": 0.9064, "step": 22454 }, { "epoch": 0.8138523431553768, "grad_norm": 1.92670451061284, "learning_rate": 8.815061548346465e-07, "loss": 0.7364, "step": 22455 }, { "epoch": 0.8138885868580334, "grad_norm": 2.4637107646583725, "learning_rate": 8.811733786203264e-07, "loss": 0.9089, "step": 22456 }, { "epoch": 0.8139248305606901, "grad_norm": 2.464569476854319, "learning_rate": 8.808406591609442e-07, "loss": 1.0127, "step": 22457 }, { "epoch": 0.8139610742633467, "grad_norm": 2.341106012881356, "learning_rate": 8.805079964610885e-07, "loss": 0.766, "step": 22458 }, { "epoch": 0.8139973179660034, "grad_norm": 2.4539976667907046, "learning_rate": 8.801753905253401e-07, "loss": 0.7683, "step": 22459 }, { "epoch": 0.81403356166866, "grad_norm": 2.065243737491003, "learning_rate": 8.798428413582844e-07, "loss": 0.8231, "step": 22460 }, { "epoch": 0.8140698053713167, "grad_norm": 2.3344201662993265, "learning_rate": 8.795103489645012e-07, "loss": 0.8762, "step": 22461 }, { "epoch": 0.8141060490739734, "grad_norm": 2.386251761383719, "learning_rate": 8.79177913348574e-07, "loss": 1.048, "step": 22462 }, { "epoch": 0.8141422927766301, "grad_norm": 2.36097699806562, "learning_rate": 8.788455345150837e-07, "loss": 0.781, "step": 22463 }, { "epoch": 0.8141785364792867, "grad_norm": 2.6572126328159213, "learning_rate": 8.78513212468608e-07, "loss": 1.0323, "step": 22464 }, { "epoch": 0.8142147801819434, "grad_norm": 2.36861722554243, "learning_rate": 8.781809472137282e-07, "loss": 0.7412, "step": 22465 }, { "epoch": 0.8142510238846, "grad_norm": 2.4167564305737943, "learning_rate": 8.778487387550222e-07, "loss": 1.0871, "step": 22466 }, { "epoch": 0.8142872675872567, "grad_norm": 2.449921357019107, "learning_rate": 8.775165870970687e-07, "loss": 0.8839, "step": 22467 }, { "epoch": 0.8143235112899134, "grad_norm": 2.090409417550996, "learning_rate": 8.771844922444417e-07, "loss": 0.8252, "step": 22468 }, { "epoch": 0.8143597549925701, "grad_norm": 2.222914477889529, "learning_rate": 8.768524542017192e-07, "loss": 0.7695, "step": 22469 }, { "epoch": 0.8143959986952267, "grad_norm": 2.0415034959537253, "learning_rate": 8.765204729734783e-07, "loss": 0.8958, "step": 22470 }, { "epoch": 0.8144322423978834, "grad_norm": 2.4554280545311147, "learning_rate": 8.761885485642896e-07, "loss": 0.8465, "step": 22471 }, { "epoch": 0.81446848610054, "grad_norm": 2.183332028601584, "learning_rate": 8.758566809787283e-07, "loss": 0.8743, "step": 22472 }, { "epoch": 0.8145047298031967, "grad_norm": 2.3097849823339405, "learning_rate": 8.755248702213687e-07, "loss": 1.1141, "step": 22473 }, { "epoch": 0.8145409735058533, "grad_norm": 2.3111647708378755, "learning_rate": 8.751931162967825e-07, "loss": 0.8544, "step": 22474 }, { "epoch": 0.8145772172085101, "grad_norm": 2.203373175071573, "learning_rate": 8.748614192095395e-07, "loss": 0.9929, "step": 22475 }, { "epoch": 0.8146134609111667, "grad_norm": 2.144474985967037, "learning_rate": 8.745297789642116e-07, "loss": 1.0039, "step": 22476 }, { "epoch": 0.8146497046138234, "grad_norm": 2.1952814148983677, "learning_rate": 8.741981955653678e-07, "loss": 0.8726, "step": 22477 }, { "epoch": 0.81468594831648, "grad_norm": 2.4173796482944345, "learning_rate": 8.73866669017579e-07, "loss": 0.7848, "step": 22478 }, { "epoch": 0.8147221920191366, "grad_norm": 2.411874338491857, "learning_rate": 8.735351993254115e-07, "loss": 0.9754, "step": 22479 }, { "epoch": 0.8147584357217933, "grad_norm": 2.328433993946899, "learning_rate": 8.732037864934328e-07, "loss": 0.8078, "step": 22480 }, { "epoch": 0.81479467942445, "grad_norm": 2.138496530091467, "learning_rate": 8.728724305262115e-07, "loss": 0.7603, "step": 22481 }, { "epoch": 0.8148309231271067, "grad_norm": 2.3180924169788066, "learning_rate": 8.725411314283105e-07, "loss": 0.9628, "step": 22482 }, { "epoch": 0.8148671668297633, "grad_norm": 2.4205937829972126, "learning_rate": 8.722098892042985e-07, "loss": 0.8298, "step": 22483 }, { "epoch": 0.81490341053242, "grad_norm": 2.6328664589692172, "learning_rate": 8.718787038587345e-07, "loss": 0.8771, "step": 22484 }, { "epoch": 0.8149396542350766, "grad_norm": 2.0243505635487833, "learning_rate": 8.715475753961883e-07, "loss": 0.745, "step": 22485 }, { "epoch": 0.8149758979377333, "grad_norm": 2.30970860357977, "learning_rate": 8.712165038212184e-07, "loss": 0.9727, "step": 22486 }, { "epoch": 0.8150121416403899, "grad_norm": 2.0587891073035722, "learning_rate": 8.708854891383894e-07, "loss": 1.0053, "step": 22487 }, { "epoch": 0.8150483853430467, "grad_norm": 2.370373987942024, "learning_rate": 8.705545313522601e-07, "loss": 0.8735, "step": 22488 }, { "epoch": 0.8150846290457033, "grad_norm": 2.3398516311890645, "learning_rate": 8.702236304673916e-07, "loss": 1.1158, "step": 22489 }, { "epoch": 0.81512087274836, "grad_norm": 2.3242540537633665, "learning_rate": 8.69892786488345e-07, "loss": 0.7696, "step": 22490 }, { "epoch": 0.8151571164510166, "grad_norm": 2.3322689676595223, "learning_rate": 8.695619994196769e-07, "loss": 1.0213, "step": 22491 }, { "epoch": 0.8151933601536733, "grad_norm": 2.1879561392968947, "learning_rate": 8.692312692659465e-07, "loss": 0.8451, "step": 22492 }, { "epoch": 0.8152296038563299, "grad_norm": 2.325385381545477, "learning_rate": 8.68900596031711e-07, "loss": 0.7159, "step": 22493 }, { "epoch": 0.8152658475589867, "grad_norm": 2.0533161713458004, "learning_rate": 8.685699797215281e-07, "loss": 0.8861, "step": 22494 }, { "epoch": 0.8153020912616433, "grad_norm": 2.5214645148935597, "learning_rate": 8.682394203399508e-07, "loss": 0.9108, "step": 22495 }, { "epoch": 0.8153383349643, "grad_norm": 2.3991078312248306, "learning_rate": 8.679089178915357e-07, "loss": 1.0004, "step": 22496 }, { "epoch": 0.8153745786669566, "grad_norm": 2.3654832522454146, "learning_rate": 8.675784723808384e-07, "loss": 0.8843, "step": 22497 }, { "epoch": 0.8154108223696133, "grad_norm": 2.4574832724877425, "learning_rate": 8.672480838124086e-07, "loss": 0.9162, "step": 22498 }, { "epoch": 0.8154470660722699, "grad_norm": 1.9297423711105097, "learning_rate": 8.669177521908024e-07, "loss": 0.6635, "step": 22499 }, { "epoch": 0.8154833097749266, "grad_norm": 2.2898897468263084, "learning_rate": 8.665874775205674e-07, "loss": 0.7556, "step": 22500 }, { "epoch": 0.8155195534775833, "grad_norm": 2.524241365626763, "learning_rate": 8.662572598062597e-07, "loss": 0.8891, "step": 22501 }, { "epoch": 0.81555579718024, "grad_norm": 2.0947992408318368, "learning_rate": 8.659270990524254e-07, "loss": 0.7875, "step": 22502 }, { "epoch": 0.8155920408828966, "grad_norm": 2.0828691460709448, "learning_rate": 8.655969952636167e-07, "loss": 0.7472, "step": 22503 }, { "epoch": 0.8156282845855533, "grad_norm": 2.692119254933207, "learning_rate": 8.652669484443787e-07, "loss": 0.8496, "step": 22504 }, { "epoch": 0.8156645282882099, "grad_norm": 2.4056477178514064, "learning_rate": 8.649369585992645e-07, "loss": 0.9227, "step": 22505 }, { "epoch": 0.8157007719908665, "grad_norm": 2.468398566285324, "learning_rate": 8.646070257328165e-07, "loss": 0.9075, "step": 22506 }, { "epoch": 0.8157370156935233, "grad_norm": 2.7873038639989325, "learning_rate": 8.642771498495839e-07, "loss": 0.8594, "step": 22507 }, { "epoch": 0.81577325939618, "grad_norm": 2.216089800027739, "learning_rate": 8.639473309541096e-07, "loss": 1.0182, "step": 22508 }, { "epoch": 0.8158095030988366, "grad_norm": 2.717356106888158, "learning_rate": 8.636175690509396e-07, "loss": 0.9942, "step": 22509 }, { "epoch": 0.8158457468014932, "grad_norm": 2.3877811275274317, "learning_rate": 8.632878641446196e-07, "loss": 0.8429, "step": 22510 }, { "epoch": 0.8158819905041499, "grad_norm": 2.2212774008838045, "learning_rate": 8.629582162396899e-07, "loss": 0.8561, "step": 22511 }, { "epoch": 0.8159182342068065, "grad_norm": 2.3437543714185054, "learning_rate": 8.626286253406935e-07, "loss": 0.9958, "step": 22512 }, { "epoch": 0.8159544779094632, "grad_norm": 2.3691701810573456, "learning_rate": 8.622990914521723e-07, "loss": 0.8744, "step": 22513 }, { "epoch": 0.8159907216121199, "grad_norm": 2.3491373165165523, "learning_rate": 8.619696145786688e-07, "loss": 0.9073, "step": 22514 }, { "epoch": 0.8160269653147766, "grad_norm": 2.3499574059103625, "learning_rate": 8.616401947247205e-07, "loss": 0.8647, "step": 22515 }, { "epoch": 0.8160632090174332, "grad_norm": 2.2873923237019884, "learning_rate": 8.61310831894867e-07, "loss": 0.8953, "step": 22516 }, { "epoch": 0.8160994527200899, "grad_norm": 2.450872612566552, "learning_rate": 8.609815260936483e-07, "loss": 0.8437, "step": 22517 }, { "epoch": 0.8161356964227465, "grad_norm": 1.9948443257148634, "learning_rate": 8.606522773256004e-07, "loss": 0.8512, "step": 22518 }, { "epoch": 0.8161719401254032, "grad_norm": 2.3831754072455116, "learning_rate": 8.603230855952615e-07, "loss": 0.8856, "step": 22519 }, { "epoch": 0.8162081838280599, "grad_norm": 2.5949132631942207, "learning_rate": 8.599939509071642e-07, "loss": 0.9712, "step": 22520 }, { "epoch": 0.8162444275307166, "grad_norm": 2.4241811501797117, "learning_rate": 8.596648732658497e-07, "loss": 1.0551, "step": 22521 }, { "epoch": 0.8162806712333732, "grad_norm": 2.3428152906186854, "learning_rate": 8.593358526758472e-07, "loss": 0.9838, "step": 22522 }, { "epoch": 0.8163169149360299, "grad_norm": 2.0261155464339033, "learning_rate": 8.590068891416942e-07, "loss": 0.8231, "step": 22523 }, { "epoch": 0.8163531586386865, "grad_norm": 2.568911613677223, "learning_rate": 8.586779826679192e-07, "loss": 0.8317, "step": 22524 }, { "epoch": 0.8163894023413432, "grad_norm": 2.5239139825832417, "learning_rate": 8.583491332590599e-07, "loss": 0.9151, "step": 22525 }, { "epoch": 0.8164256460439998, "grad_norm": 2.7368577880169074, "learning_rate": 8.58020340919643e-07, "loss": 0.9471, "step": 22526 }, { "epoch": 0.8164618897466566, "grad_norm": 2.254647240461163, "learning_rate": 8.576916056542023e-07, "loss": 0.7429, "step": 22527 }, { "epoch": 0.8164981334493132, "grad_norm": 2.3732453330105647, "learning_rate": 8.573629274672646e-07, "loss": 0.7809, "step": 22528 }, { "epoch": 0.8165343771519699, "grad_norm": 2.900732794495114, "learning_rate": 8.570343063633607e-07, "loss": 0.9465, "step": 22529 }, { "epoch": 0.8165706208546265, "grad_norm": 2.3468397617237877, "learning_rate": 8.567057423470193e-07, "loss": 0.9137, "step": 22530 }, { "epoch": 0.8166068645572832, "grad_norm": 2.4058751534057836, "learning_rate": 8.563772354227662e-07, "loss": 0.638, "step": 22531 }, { "epoch": 0.8166431082599398, "grad_norm": 2.476808820318611, "learning_rate": 8.560487855951283e-07, "loss": 0.8517, "step": 22532 }, { "epoch": 0.8166793519625964, "grad_norm": 2.4050250393530788, "learning_rate": 8.557203928686325e-07, "loss": 0.8982, "step": 22533 }, { "epoch": 0.8167155956652532, "grad_norm": 2.279578533960996, "learning_rate": 8.553920572478041e-07, "loss": 0.8419, "step": 22534 }, { "epoch": 0.8167518393679098, "grad_norm": 2.441748002661244, "learning_rate": 8.550637787371658e-07, "loss": 1.0207, "step": 22535 }, { "epoch": 0.8167880830705665, "grad_norm": 2.3031438104190873, "learning_rate": 8.547355573412414e-07, "loss": 0.8476, "step": 22536 }, { "epoch": 0.8168243267732231, "grad_norm": 2.4780193296252633, "learning_rate": 8.544073930645558e-07, "loss": 0.7971, "step": 22537 }, { "epoch": 0.8168605704758798, "grad_norm": 2.298127362677266, "learning_rate": 8.540792859116271e-07, "loss": 1.0601, "step": 22538 }, { "epoch": 0.8168968141785364, "grad_norm": 2.043362706197158, "learning_rate": 8.537512358869804e-07, "loss": 0.8751, "step": 22539 }, { "epoch": 0.8169330578811932, "grad_norm": 2.1533612538306603, "learning_rate": 8.534232429951311e-07, "loss": 0.8248, "step": 22540 }, { "epoch": 0.8169693015838498, "grad_norm": 2.7281835584956133, "learning_rate": 8.530953072406051e-07, "loss": 0.8616, "step": 22541 }, { "epoch": 0.8170055452865065, "grad_norm": 2.334437191448359, "learning_rate": 8.527674286279158e-07, "loss": 0.8813, "step": 22542 }, { "epoch": 0.8170417889891631, "grad_norm": 2.3377334518798474, "learning_rate": 8.524396071615842e-07, "loss": 0.8622, "step": 22543 }, { "epoch": 0.8170780326918198, "grad_norm": 2.237070347183116, "learning_rate": 8.521118428461244e-07, "loss": 0.7988, "step": 22544 }, { "epoch": 0.8171142763944764, "grad_norm": 2.2339691898501273, "learning_rate": 8.517841356860573e-07, "loss": 0.6264, "step": 22545 }, { "epoch": 0.8171505200971331, "grad_norm": 2.0035234590148723, "learning_rate": 8.514564856858948e-07, "loss": 0.732, "step": 22546 }, { "epoch": 0.8171867637997898, "grad_norm": 2.5030385401189235, "learning_rate": 8.511288928501543e-07, "loss": 1.0889, "step": 22547 }, { "epoch": 0.8172230075024465, "grad_norm": 2.8374004210218224, "learning_rate": 8.508013571833473e-07, "loss": 0.9433, "step": 22548 }, { "epoch": 0.8172592512051031, "grad_norm": 2.5242510416337103, "learning_rate": 8.50473878689988e-07, "loss": 0.8093, "step": 22549 }, { "epoch": 0.8172954949077598, "grad_norm": 2.6276022155148744, "learning_rate": 8.501464573745905e-07, "loss": 1.0416, "step": 22550 }, { "epoch": 0.8173317386104164, "grad_norm": 2.5775371595052405, "learning_rate": 8.498190932416627e-07, "loss": 0.9828, "step": 22551 }, { "epoch": 0.8173679823130731, "grad_norm": 2.5029660052108533, "learning_rate": 8.494917862957203e-07, "loss": 0.8204, "step": 22552 }, { "epoch": 0.8174042260157298, "grad_norm": 2.5960866860614584, "learning_rate": 8.491645365412693e-07, "loss": 1.0675, "step": 22553 }, { "epoch": 0.8174404697183865, "grad_norm": 2.263941132875884, "learning_rate": 8.488373439828224e-07, "loss": 0.8095, "step": 22554 }, { "epoch": 0.8174767134210431, "grad_norm": 2.2251241888193047, "learning_rate": 8.485102086248848e-07, "loss": 0.8709, "step": 22555 }, { "epoch": 0.8175129571236998, "grad_norm": 2.2465252893443894, "learning_rate": 8.481831304719657e-07, "loss": 0.8286, "step": 22556 }, { "epoch": 0.8175492008263564, "grad_norm": 2.078022200244893, "learning_rate": 8.478561095285736e-07, "loss": 0.7643, "step": 22557 }, { "epoch": 0.817585444529013, "grad_norm": 2.6636883161381157, "learning_rate": 8.475291457992119e-07, "loss": 1.0666, "step": 22558 }, { "epoch": 0.8176216882316697, "grad_norm": 2.4588539232606923, "learning_rate": 8.472022392883872e-07, "loss": 0.926, "step": 22559 }, { "epoch": 0.8176579319343265, "grad_norm": 2.344930794180255, "learning_rate": 8.468753900006038e-07, "loss": 0.9454, "step": 22560 }, { "epoch": 0.8176941756369831, "grad_norm": 2.367094164454338, "learning_rate": 8.465485979403676e-07, "loss": 0.7265, "step": 22561 }, { "epoch": 0.8177304193396397, "grad_norm": 1.9800741665171577, "learning_rate": 8.462218631121782e-07, "loss": 0.6876, "step": 22562 }, { "epoch": 0.8177666630422964, "grad_norm": 2.843064130234635, "learning_rate": 8.458951855205394e-07, "loss": 0.8467, "step": 22563 }, { "epoch": 0.817802906744953, "grad_norm": 2.256254043802886, "learning_rate": 8.455685651699524e-07, "loss": 0.7676, "step": 22564 }, { "epoch": 0.8178391504476097, "grad_norm": 2.317813499763431, "learning_rate": 8.452420020649199e-07, "loss": 0.8569, "step": 22565 }, { "epoch": 0.8178753941502664, "grad_norm": 2.581590015558969, "learning_rate": 8.449154962099382e-07, "loss": 0.9018, "step": 22566 }, { "epoch": 0.8179116378529231, "grad_norm": 2.4896664774049677, "learning_rate": 8.445890476095081e-07, "loss": 0.8538, "step": 22567 }, { "epoch": 0.8179478815555797, "grad_norm": 2.267089187170199, "learning_rate": 8.442626562681294e-07, "loss": 0.8365, "step": 22568 }, { "epoch": 0.8179841252582364, "grad_norm": 2.289658319156138, "learning_rate": 8.439363221902964e-07, "loss": 0.9041, "step": 22569 }, { "epoch": 0.818020368960893, "grad_norm": 2.49310489404613, "learning_rate": 8.436100453805085e-07, "loss": 0.8305, "step": 22570 }, { "epoch": 0.8180566126635497, "grad_norm": 2.644359528694727, "learning_rate": 8.432838258432585e-07, "loss": 0.8002, "step": 22571 }, { "epoch": 0.8180928563662063, "grad_norm": 2.4828705486317086, "learning_rate": 8.429576635830456e-07, "loss": 0.8568, "step": 22572 }, { "epoch": 0.8181291000688631, "grad_norm": 2.166813777729731, "learning_rate": 8.42631558604361e-07, "loss": 0.7427, "step": 22573 }, { "epoch": 0.8181653437715197, "grad_norm": 2.262699212397045, "learning_rate": 8.423055109117001e-07, "loss": 0.8268, "step": 22574 }, { "epoch": 0.8182015874741764, "grad_norm": 2.3226190773654394, "learning_rate": 8.419795205095538e-07, "loss": 1.0188, "step": 22575 }, { "epoch": 0.818237831176833, "grad_norm": 2.2716185775044444, "learning_rate": 8.416535874024146e-07, "loss": 0.7736, "step": 22576 }, { "epoch": 0.8182740748794897, "grad_norm": 2.361904296866603, "learning_rate": 8.41327711594776e-07, "loss": 0.8822, "step": 22577 }, { "epoch": 0.8183103185821463, "grad_norm": 2.358994671309241, "learning_rate": 8.410018930911246e-07, "loss": 0.826, "step": 22578 }, { "epoch": 0.8183465622848031, "grad_norm": 2.288430602656772, "learning_rate": 8.406761318959523e-07, "loss": 0.7839, "step": 22579 }, { "epoch": 0.8183828059874597, "grad_norm": 2.390718136686902, "learning_rate": 8.40350428013747e-07, "loss": 0.9674, "step": 22580 }, { "epoch": 0.8184190496901164, "grad_norm": 2.5580162926689636, "learning_rate": 8.400247814489987e-07, "loss": 0.7452, "step": 22581 }, { "epoch": 0.818455293392773, "grad_norm": 2.428798828856709, "learning_rate": 8.396991922061914e-07, "loss": 0.8378, "step": 22582 }, { "epoch": 0.8184915370954297, "grad_norm": 2.4290094112865335, "learning_rate": 8.393736602898139e-07, "loss": 0.9897, "step": 22583 }, { "epoch": 0.8185277807980863, "grad_norm": 2.1416879101811492, "learning_rate": 8.390481857043514e-07, "loss": 0.9427, "step": 22584 }, { "epoch": 0.818564024500743, "grad_norm": 2.3277963032534648, "learning_rate": 8.387227684542881e-07, "loss": 0.8605, "step": 22585 }, { "epoch": 0.8186002682033997, "grad_norm": 2.072836340862438, "learning_rate": 8.383974085441088e-07, "loss": 0.7259, "step": 22586 }, { "epoch": 0.8186365119060564, "grad_norm": 2.2706502013602754, "learning_rate": 8.380721059782943e-07, "loss": 0.6411, "step": 22587 }, { "epoch": 0.818672755608713, "grad_norm": 2.634745925989472, "learning_rate": 8.377468607613315e-07, "loss": 1.0283, "step": 22588 }, { "epoch": 0.8187089993113696, "grad_norm": 2.4885646553150083, "learning_rate": 8.374216728976986e-07, "loss": 0.9181, "step": 22589 }, { "epoch": 0.8187452430140263, "grad_norm": 2.4035007147532665, "learning_rate": 8.370965423918792e-07, "loss": 0.7669, "step": 22590 }, { "epoch": 0.8187814867166829, "grad_norm": 2.8320117930025597, "learning_rate": 8.367714692483486e-07, "loss": 0.8582, "step": 22591 }, { "epoch": 0.8188177304193396, "grad_norm": 2.34739934698602, "learning_rate": 8.364464534715922e-07, "loss": 0.9174, "step": 22592 }, { "epoch": 0.8188539741219963, "grad_norm": 2.225828928498712, "learning_rate": 8.361214950660845e-07, "loss": 0.8407, "step": 22593 }, { "epoch": 0.818890217824653, "grad_norm": 2.411305182219581, "learning_rate": 8.357965940363049e-07, "loss": 0.9847, "step": 22594 }, { "epoch": 0.8189264615273096, "grad_norm": 2.3812952129009863, "learning_rate": 8.354717503867293e-07, "loss": 0.8344, "step": 22595 }, { "epoch": 0.8189627052299663, "grad_norm": 2.554768637770359, "learning_rate": 8.35146964121834e-07, "loss": 0.9288, "step": 22596 }, { "epoch": 0.8189989489326229, "grad_norm": 2.059051082659197, "learning_rate": 8.348222352460961e-07, "loss": 0.7151, "step": 22597 }, { "epoch": 0.8190351926352796, "grad_norm": 2.4027698241929585, "learning_rate": 8.344975637639879e-07, "loss": 0.8532, "step": 22598 }, { "epoch": 0.8190714363379363, "grad_norm": 2.3131664774727088, "learning_rate": 8.341729496799833e-07, "loss": 0.9005, "step": 22599 }, { "epoch": 0.819107680040593, "grad_norm": 2.7928912053549246, "learning_rate": 8.338483929985564e-07, "loss": 0.874, "step": 22600 }, { "epoch": 0.8191439237432496, "grad_norm": 2.5916366144932894, "learning_rate": 8.335238937241808e-07, "loss": 0.7248, "step": 22601 }, { "epoch": 0.8191801674459063, "grad_norm": 2.3150349735193845, "learning_rate": 8.33199451861324e-07, "loss": 0.7562, "step": 22602 }, { "epoch": 0.8192164111485629, "grad_norm": 2.385640273050296, "learning_rate": 8.328750674144592e-07, "loss": 0.917, "step": 22603 }, { "epoch": 0.8192526548512196, "grad_norm": 2.2497353204500525, "learning_rate": 8.325507403880567e-07, "loss": 0.8324, "step": 22604 }, { "epoch": 0.8192888985538762, "grad_norm": 2.415152186816104, "learning_rate": 8.322264707865841e-07, "loss": 0.9494, "step": 22605 }, { "epoch": 0.819325142256533, "grad_norm": 2.310135061026949, "learning_rate": 8.319022586145109e-07, "loss": 0.8696, "step": 22606 }, { "epoch": 0.8193613859591896, "grad_norm": 2.4686930560872256, "learning_rate": 8.31578103876301e-07, "loss": 0.9903, "step": 22607 }, { "epoch": 0.8193976296618463, "grad_norm": 2.286402045046947, "learning_rate": 8.312540065764269e-07, "loss": 0.9247, "step": 22608 }, { "epoch": 0.8194338733645029, "grad_norm": 2.3865032798095016, "learning_rate": 8.309299667193498e-07, "loss": 1.058, "step": 22609 }, { "epoch": 0.8194701170671596, "grad_norm": 2.336564983037543, "learning_rate": 8.306059843095376e-07, "loss": 0.8217, "step": 22610 }, { "epoch": 0.8195063607698162, "grad_norm": 2.328633563006359, "learning_rate": 8.30282059351451e-07, "loss": 1.0039, "step": 22611 }, { "epoch": 0.819542604472473, "grad_norm": 2.063814595177554, "learning_rate": 8.299581918495586e-07, "loss": 0.8432, "step": 22612 }, { "epoch": 0.8195788481751296, "grad_norm": 2.390398181128773, "learning_rate": 8.296343818083185e-07, "loss": 1.0114, "step": 22613 }, { "epoch": 0.8196150918777863, "grad_norm": 2.250055883679169, "learning_rate": 8.293106292321961e-07, "loss": 0.7647, "step": 22614 }, { "epoch": 0.8196513355804429, "grad_norm": 2.261018904024101, "learning_rate": 8.289869341256501e-07, "loss": 1.0015, "step": 22615 }, { "epoch": 0.8196875792830995, "grad_norm": 2.1895867114272423, "learning_rate": 8.286632964931407e-07, "loss": 0.9172, "step": 22616 }, { "epoch": 0.8197238229857562, "grad_norm": 2.4703225296380156, "learning_rate": 8.283397163391304e-07, "loss": 0.823, "step": 22617 }, { "epoch": 0.8197600666884128, "grad_norm": 2.5624727176703255, "learning_rate": 8.280161936680742e-07, "loss": 0.8077, "step": 22618 }, { "epoch": 0.8197963103910696, "grad_norm": 2.3508280248638584, "learning_rate": 8.276927284844316e-07, "loss": 0.7948, "step": 22619 }, { "epoch": 0.8198325540937262, "grad_norm": 2.420530832546433, "learning_rate": 8.273693207926598e-07, "loss": 0.8895, "step": 22620 }, { "epoch": 0.8198687977963829, "grad_norm": 2.383219170597504, "learning_rate": 8.270459705972166e-07, "loss": 0.8107, "step": 22621 }, { "epoch": 0.8199050414990395, "grad_norm": 2.4190224454192237, "learning_rate": 8.267226779025556e-07, "loss": 0.9536, "step": 22622 }, { "epoch": 0.8199412852016962, "grad_norm": 2.361394074516566, "learning_rate": 8.263994427131311e-07, "loss": 0.6316, "step": 22623 }, { "epoch": 0.8199775289043528, "grad_norm": 2.192789866408221, "learning_rate": 8.260762650334003e-07, "loss": 0.9553, "step": 22624 }, { "epoch": 0.8200137726070096, "grad_norm": 2.168834431196627, "learning_rate": 8.257531448678124e-07, "loss": 0.9463, "step": 22625 }, { "epoch": 0.8200500163096662, "grad_norm": 2.2592729189053653, "learning_rate": 8.254300822208233e-07, "loss": 1.0043, "step": 22626 }, { "epoch": 0.8200862600123229, "grad_norm": 2.3323035660851232, "learning_rate": 8.251070770968806e-07, "loss": 0.778, "step": 22627 }, { "epoch": 0.8201225037149795, "grad_norm": 2.360293110974132, "learning_rate": 8.247841295004394e-07, "loss": 0.8359, "step": 22628 }, { "epoch": 0.8201587474176362, "grad_norm": 2.5535512168889283, "learning_rate": 8.244612394359469e-07, "loss": 0.8996, "step": 22629 }, { "epoch": 0.8201949911202928, "grad_norm": 2.4204251462225215, "learning_rate": 8.241384069078534e-07, "loss": 0.7988, "step": 22630 }, { "epoch": 0.8202312348229495, "grad_norm": 2.172561105861897, "learning_rate": 8.23815631920607e-07, "loss": 0.8414, "step": 22631 }, { "epoch": 0.8202674785256062, "grad_norm": 2.3545151446329475, "learning_rate": 8.234929144786564e-07, "loss": 1.014, "step": 22632 }, { "epoch": 0.8203037222282629, "grad_norm": 2.7403982597764096, "learning_rate": 8.231702545864468e-07, "loss": 0.9624, "step": 22633 }, { "epoch": 0.8203399659309195, "grad_norm": 2.460955586695247, "learning_rate": 8.228476522484246e-07, "loss": 0.8208, "step": 22634 }, { "epoch": 0.8203762096335762, "grad_norm": 2.477548461031647, "learning_rate": 8.225251074690372e-07, "loss": 0.937, "step": 22635 }, { "epoch": 0.8204124533362328, "grad_norm": 2.5476480626354463, "learning_rate": 8.222026202527262e-07, "loss": 0.9676, "step": 22636 }, { "epoch": 0.8204486970388895, "grad_norm": 2.288610469624825, "learning_rate": 8.218801906039375e-07, "loss": 0.6743, "step": 22637 }, { "epoch": 0.8204849407415462, "grad_norm": 2.324728677619495, "learning_rate": 8.215578185271101e-07, "loss": 0.6599, "step": 22638 }, { "epoch": 0.8205211844442029, "grad_norm": 2.655394190297953, "learning_rate": 8.212355040266917e-07, "loss": 0.9537, "step": 22639 }, { "epoch": 0.8205574281468595, "grad_norm": 2.28037829804316, "learning_rate": 8.209132471071196e-07, "loss": 0.8653, "step": 22640 }, { "epoch": 0.8205936718495161, "grad_norm": 2.4611666251879587, "learning_rate": 8.205910477728368e-07, "loss": 1.0519, "step": 22641 }, { "epoch": 0.8206299155521728, "grad_norm": 2.4212294275174404, "learning_rate": 8.202689060282804e-07, "loss": 0.9505, "step": 22642 }, { "epoch": 0.8206661592548294, "grad_norm": 2.3547301366997426, "learning_rate": 8.199468218778906e-07, "loss": 0.8821, "step": 22643 }, { "epoch": 0.8207024029574861, "grad_norm": 2.545183189144888, "learning_rate": 8.196247953261066e-07, "loss": 0.9188, "step": 22644 }, { "epoch": 0.8207386466601428, "grad_norm": 2.430546579861847, "learning_rate": 8.193028263773639e-07, "loss": 1.0262, "step": 22645 }, { "epoch": 0.8207748903627995, "grad_norm": 2.2522480183910125, "learning_rate": 8.189809150361e-07, "loss": 0.7043, "step": 22646 }, { "epoch": 0.8208111340654561, "grad_norm": 2.4435018420677896, "learning_rate": 8.186590613067503e-07, "loss": 1.0607, "step": 22647 }, { "epoch": 0.8208473777681128, "grad_norm": 2.3563429256480832, "learning_rate": 8.18337265193751e-07, "loss": 1.1067, "step": 22648 }, { "epoch": 0.8208836214707694, "grad_norm": 2.3128631061642486, "learning_rate": 8.180155267015338e-07, "loss": 0.7906, "step": 22649 }, { "epoch": 0.8209198651734261, "grad_norm": 2.576269633464881, "learning_rate": 8.176938458345341e-07, "loss": 0.801, "step": 22650 }, { "epoch": 0.8209561088760828, "grad_norm": 2.348269760051234, "learning_rate": 8.17372222597183e-07, "loss": 0.9356, "step": 22651 }, { "epoch": 0.8209923525787395, "grad_norm": 2.3057230463122207, "learning_rate": 8.170506569939146e-07, "loss": 0.9413, "step": 22652 }, { "epoch": 0.8210285962813961, "grad_norm": 2.072994338179277, "learning_rate": 8.167291490291573e-07, "loss": 0.9004, "step": 22653 }, { "epoch": 0.8210648399840528, "grad_norm": 2.420390678169053, "learning_rate": 8.164076987073427e-07, "loss": 0.7506, "step": 22654 }, { "epoch": 0.8211010836867094, "grad_norm": 2.4034921432466456, "learning_rate": 8.160863060329006e-07, "loss": 0.7292, "step": 22655 }, { "epoch": 0.8211373273893661, "grad_norm": 2.2866597498337784, "learning_rate": 8.157649710102578e-07, "loss": 0.8423, "step": 22656 }, { "epoch": 0.8211735710920227, "grad_norm": 2.238025222943544, "learning_rate": 8.154436936438442e-07, "loss": 0.8742, "step": 22657 }, { "epoch": 0.8212098147946795, "grad_norm": 2.617932236316559, "learning_rate": 8.151224739380836e-07, "loss": 0.7014, "step": 22658 }, { "epoch": 0.8212460584973361, "grad_norm": 2.319754675863709, "learning_rate": 8.148013118974069e-07, "loss": 0.9986, "step": 22659 }, { "epoch": 0.8212823021999928, "grad_norm": 2.4100400019158608, "learning_rate": 8.144802075262354e-07, "loss": 0.8591, "step": 22660 }, { "epoch": 0.8213185459026494, "grad_norm": 2.373908543449336, "learning_rate": 8.141591608289967e-07, "loss": 0.7993, "step": 22661 }, { "epoch": 0.8213547896053061, "grad_norm": 2.397359890176784, "learning_rate": 8.138381718101124e-07, "loss": 0.7912, "step": 22662 }, { "epoch": 0.8213910333079627, "grad_norm": 2.557824676583134, "learning_rate": 8.135172404740054e-07, "loss": 0.9804, "step": 22663 }, { "epoch": 0.8214272770106193, "grad_norm": 2.587256810620341, "learning_rate": 8.131963668251009e-07, "loss": 1.016, "step": 22664 }, { "epoch": 0.8214635207132761, "grad_norm": 2.341346726120538, "learning_rate": 8.128755508678171e-07, "loss": 0.7607, "step": 22665 }, { "epoch": 0.8214997644159328, "grad_norm": 2.3187629172250412, "learning_rate": 8.125547926065757e-07, "loss": 0.8376, "step": 22666 }, { "epoch": 0.8215360081185894, "grad_norm": 2.4284940653906477, "learning_rate": 8.122340920457972e-07, "loss": 0.9066, "step": 22667 }, { "epoch": 0.821572251821246, "grad_norm": 2.4013489161069863, "learning_rate": 8.119134491899011e-07, "loss": 0.8285, "step": 22668 }, { "epoch": 0.8216084955239027, "grad_norm": 2.438904494465263, "learning_rate": 8.115928640433035e-07, "loss": 0.9423, "step": 22669 }, { "epoch": 0.8216447392265593, "grad_norm": 2.47195798382934, "learning_rate": 8.112723366104236e-07, "loss": 0.7016, "step": 22670 }, { "epoch": 0.8216809829292161, "grad_norm": 2.085303938525375, "learning_rate": 8.109518668956784e-07, "loss": 0.8261, "step": 22671 }, { "epoch": 0.8217172266318727, "grad_norm": 2.5053173122234, "learning_rate": 8.106314549034822e-07, "loss": 0.8521, "step": 22672 }, { "epoch": 0.8217534703345294, "grad_norm": 2.7434483851006797, "learning_rate": 8.103111006382503e-07, "loss": 0.7617, "step": 22673 }, { "epoch": 0.821789714037186, "grad_norm": 2.43542610323766, "learning_rate": 8.09990804104398e-07, "loss": 0.8385, "step": 22674 }, { "epoch": 0.8218259577398427, "grad_norm": 2.1515119005338543, "learning_rate": 8.096705653063397e-07, "loss": 0.7451, "step": 22675 }, { "epoch": 0.8218622014424993, "grad_norm": 2.0747354809580436, "learning_rate": 8.093503842484851e-07, "loss": 0.7414, "step": 22676 }, { "epoch": 0.821898445145156, "grad_norm": 2.4505295107310303, "learning_rate": 8.090302609352496e-07, "loss": 0.7088, "step": 22677 }, { "epoch": 0.8219346888478127, "grad_norm": 2.5502947343851976, "learning_rate": 8.087101953710397e-07, "loss": 0.7814, "step": 22678 }, { "epoch": 0.8219709325504694, "grad_norm": 2.010559705442605, "learning_rate": 8.083901875602713e-07, "loss": 0.8364, "step": 22679 }, { "epoch": 0.822007176253126, "grad_norm": 2.2939403211446976, "learning_rate": 8.08070237507349e-07, "loss": 0.9179, "step": 22680 }, { "epoch": 0.8220434199557827, "grad_norm": 2.3658487491702145, "learning_rate": 8.077503452166857e-07, "loss": 1.1292, "step": 22681 }, { "epoch": 0.8220796636584393, "grad_norm": 2.170367711027013, "learning_rate": 8.074305106926856e-07, "loss": 0.7679, "step": 22682 }, { "epoch": 0.822115907361096, "grad_norm": 2.3118791006393815, "learning_rate": 8.071107339397577e-07, "loss": 0.826, "step": 22683 }, { "epoch": 0.8221521510637527, "grad_norm": 2.4180202838345064, "learning_rate": 8.067910149623092e-07, "loss": 0.804, "step": 22684 }, { "epoch": 0.8221883947664094, "grad_norm": 2.596467457852881, "learning_rate": 8.064713537647434e-07, "loss": 0.9063, "step": 22685 }, { "epoch": 0.822224638469066, "grad_norm": 2.490952963978521, "learning_rate": 8.061517503514666e-07, "loss": 1.0364, "step": 22686 }, { "epoch": 0.8222608821717227, "grad_norm": 2.5728261369787524, "learning_rate": 8.058322047268819e-07, "loss": 0.9424, "step": 22687 }, { "epoch": 0.8222971258743793, "grad_norm": 2.101863178765053, "learning_rate": 8.055127168953936e-07, "loss": 0.8649, "step": 22688 }, { "epoch": 0.822333369577036, "grad_norm": 2.3403442083616857, "learning_rate": 8.051932868614027e-07, "loss": 0.8637, "step": 22689 }, { "epoch": 0.8223696132796926, "grad_norm": 2.18168883801684, "learning_rate": 8.048739146293116e-07, "loss": 0.9685, "step": 22690 }, { "epoch": 0.8224058569823494, "grad_norm": 2.390867025164365, "learning_rate": 8.045546002035215e-07, "loss": 0.9237, "step": 22691 }, { "epoch": 0.822442100685006, "grad_norm": 2.490961725399187, "learning_rate": 8.042353435884303e-07, "loss": 0.8709, "step": 22692 }, { "epoch": 0.8224783443876627, "grad_norm": 2.2329696967332535, "learning_rate": 8.039161447884402e-07, "loss": 0.8706, "step": 22693 }, { "epoch": 0.8225145880903193, "grad_norm": 2.684446650254728, "learning_rate": 8.035970038079455e-07, "loss": 0.9499, "step": 22694 }, { "epoch": 0.822550831792976, "grad_norm": 2.534124431726652, "learning_rate": 8.032779206513486e-07, "loss": 0.8103, "step": 22695 }, { "epoch": 0.8225870754956326, "grad_norm": 2.500637285291459, "learning_rate": 8.029588953230427e-07, "loss": 0.987, "step": 22696 }, { "epoch": 0.8226233191982893, "grad_norm": 2.1717936065087575, "learning_rate": 8.026399278274261e-07, "loss": 0.7104, "step": 22697 }, { "epoch": 0.822659562900946, "grad_norm": 2.6273413991088232, "learning_rate": 8.0232101816889e-07, "loss": 0.9748, "step": 22698 }, { "epoch": 0.8226958066036026, "grad_norm": 2.719520399276835, "learning_rate": 8.020021663518345e-07, "loss": 0.8858, "step": 22699 }, { "epoch": 0.8227320503062593, "grad_norm": 2.4004976698590585, "learning_rate": 8.016833723806489e-07, "loss": 0.8701, "step": 22700 }, { "epoch": 0.8227682940089159, "grad_norm": 2.3402495407584385, "learning_rate": 8.013646362597288e-07, "loss": 0.9179, "step": 22701 }, { "epoch": 0.8228045377115726, "grad_norm": 2.492554752568561, "learning_rate": 8.010459579934632e-07, "loss": 0.8794, "step": 22702 }, { "epoch": 0.8228407814142292, "grad_norm": 2.5001008195431997, "learning_rate": 8.007273375862451e-07, "loss": 0.8581, "step": 22703 }, { "epoch": 0.822877025116886, "grad_norm": 2.499838760758293, "learning_rate": 8.004087750424655e-07, "loss": 1.0172, "step": 22704 }, { "epoch": 0.8229132688195426, "grad_norm": 2.419611580807021, "learning_rate": 8.000902703665119e-07, "loss": 0.8906, "step": 22705 }, { "epoch": 0.8229495125221993, "grad_norm": 2.348093453194894, "learning_rate": 7.99771823562775e-07, "loss": 0.8368, "step": 22706 }, { "epoch": 0.8229857562248559, "grad_norm": 2.6573321402358854, "learning_rate": 7.994534346356414e-07, "loss": 1.0285, "step": 22707 }, { "epoch": 0.8230219999275126, "grad_norm": 2.534406886652211, "learning_rate": 7.991351035895006e-07, "loss": 0.8344, "step": 22708 }, { "epoch": 0.8230582436301692, "grad_norm": 2.254129076300161, "learning_rate": 7.988168304287364e-07, "loss": 0.764, "step": 22709 }, { "epoch": 0.823094487332826, "grad_norm": 2.383455504375866, "learning_rate": 7.984986151577357e-07, "loss": 0.8857, "step": 22710 }, { "epoch": 0.8231307310354826, "grad_norm": 2.601496941764073, "learning_rate": 7.981804577808838e-07, "loss": 0.8734, "step": 22711 }, { "epoch": 0.8231669747381393, "grad_norm": 2.1873156687219146, "learning_rate": 7.97862358302563e-07, "loss": 0.8485, "step": 22712 }, { "epoch": 0.8232032184407959, "grad_norm": 2.3590051501316687, "learning_rate": 7.975443167271574e-07, "loss": 1.0709, "step": 22713 }, { "epoch": 0.8232394621434526, "grad_norm": 2.5664393999058217, "learning_rate": 7.972263330590496e-07, "loss": 0.9442, "step": 22714 }, { "epoch": 0.8232757058461092, "grad_norm": 2.2420364174354397, "learning_rate": 7.969084073026229e-07, "loss": 0.9264, "step": 22715 }, { "epoch": 0.8233119495487659, "grad_norm": 2.3193519656877553, "learning_rate": 7.965905394622547e-07, "loss": 0.8891, "step": 22716 }, { "epoch": 0.8233481932514226, "grad_norm": 2.2412853954145344, "learning_rate": 7.962727295423267e-07, "loss": 0.8498, "step": 22717 }, { "epoch": 0.8233844369540793, "grad_norm": 2.624034385218202, "learning_rate": 7.95954977547218e-07, "loss": 0.9067, "step": 22718 }, { "epoch": 0.8234206806567359, "grad_norm": 2.439208698837564, "learning_rate": 7.956372834813087e-07, "loss": 1.0547, "step": 22719 }, { "epoch": 0.8234569243593926, "grad_norm": 2.0475249074664306, "learning_rate": 7.953196473489738e-07, "loss": 1.0339, "step": 22720 }, { "epoch": 0.8234931680620492, "grad_norm": 2.3001418439437935, "learning_rate": 7.950020691545912e-07, "loss": 0.8152, "step": 22721 }, { "epoch": 0.8235294117647058, "grad_norm": 2.4241111880985726, "learning_rate": 7.946845489025379e-07, "loss": 0.6562, "step": 22722 }, { "epoch": 0.8235656554673626, "grad_norm": 2.2525552420214483, "learning_rate": 7.943670865971869e-07, "loss": 0.7906, "step": 22723 }, { "epoch": 0.8236018991700192, "grad_norm": 2.473528833535619, "learning_rate": 7.940496822429156e-07, "loss": 0.9276, "step": 22724 }, { "epoch": 0.8236381428726759, "grad_norm": 2.381096842436834, "learning_rate": 7.937323358440935e-07, "loss": 0.9095, "step": 22725 }, { "epoch": 0.8236743865753325, "grad_norm": 2.732024369518559, "learning_rate": 7.934150474050983e-07, "loss": 0.8339, "step": 22726 }, { "epoch": 0.8237106302779892, "grad_norm": 2.2503719404142037, "learning_rate": 7.930978169302988e-07, "loss": 0.9331, "step": 22727 }, { "epoch": 0.8237468739806458, "grad_norm": 2.2769824275453447, "learning_rate": 7.927806444240676e-07, "loss": 0.926, "step": 22728 }, { "epoch": 0.8237831176833025, "grad_norm": 2.1884200392834052, "learning_rate": 7.92463529890774e-07, "loss": 0.8569, "step": 22729 }, { "epoch": 0.8238193613859592, "grad_norm": 2.5752225658515724, "learning_rate": 7.921464733347883e-07, "loss": 0.9192, "step": 22730 }, { "epoch": 0.8238556050886159, "grad_norm": 2.2315449323984584, "learning_rate": 7.918294747604804e-07, "loss": 0.9818, "step": 22731 }, { "epoch": 0.8238918487912725, "grad_norm": 2.3264782599177374, "learning_rate": 7.915125341722163e-07, "loss": 0.7186, "step": 22732 }, { "epoch": 0.8239280924939292, "grad_norm": 2.6145680309598043, "learning_rate": 7.911956515743641e-07, "loss": 0.8828, "step": 22733 }, { "epoch": 0.8239643361965858, "grad_norm": 2.572281796844709, "learning_rate": 7.908788269712909e-07, "loss": 0.9435, "step": 22734 }, { "epoch": 0.8240005798992425, "grad_norm": 2.5776839820692894, "learning_rate": 7.905620603673625e-07, "loss": 0.7309, "step": 22735 }, { "epoch": 0.8240368236018991, "grad_norm": 2.484600020236797, "learning_rate": 7.902453517669417e-07, "loss": 1.0371, "step": 22736 }, { "epoch": 0.8240730673045559, "grad_norm": 2.428779400814798, "learning_rate": 7.899287011743944e-07, "loss": 0.9768, "step": 22737 }, { "epoch": 0.8241093110072125, "grad_norm": 2.592664762247887, "learning_rate": 7.896121085940833e-07, "loss": 0.8689, "step": 22738 }, { "epoch": 0.8241455547098692, "grad_norm": 2.486308695668539, "learning_rate": 7.892955740303726e-07, "loss": 0.898, "step": 22739 }, { "epoch": 0.8241817984125258, "grad_norm": 2.2077339000506195, "learning_rate": 7.889790974876204e-07, "loss": 0.8117, "step": 22740 }, { "epoch": 0.8242180421151825, "grad_norm": 1.9103325718008626, "learning_rate": 7.886626789701902e-07, "loss": 0.843, "step": 22741 }, { "epoch": 0.8242542858178391, "grad_norm": 2.275464452504766, "learning_rate": 7.883463184824425e-07, "loss": 0.8926, "step": 22742 }, { "epoch": 0.8242905295204959, "grad_norm": 2.6105597586220313, "learning_rate": 7.880300160287335e-07, "loss": 0.8309, "step": 22743 }, { "epoch": 0.8243267732231525, "grad_norm": 2.59467572193352, "learning_rate": 7.877137716134254e-07, "loss": 0.9375, "step": 22744 }, { "epoch": 0.8243630169258092, "grad_norm": 2.4719078736743145, "learning_rate": 7.873975852408716e-07, "loss": 0.7955, "step": 22745 }, { "epoch": 0.8243992606284658, "grad_norm": 2.1138675933443527, "learning_rate": 7.870814569154333e-07, "loss": 0.7995, "step": 22746 }, { "epoch": 0.8244355043311224, "grad_norm": 2.2680523805430637, "learning_rate": 7.867653866414637e-07, "loss": 1.0837, "step": 22747 }, { "epoch": 0.8244717480337791, "grad_norm": 2.5262843800263233, "learning_rate": 7.864493744233203e-07, "loss": 0.935, "step": 22748 }, { "epoch": 0.8245079917364357, "grad_norm": 2.2917035152596408, "learning_rate": 7.861334202653548e-07, "loss": 1.1081, "step": 22749 }, { "epoch": 0.8245442354390925, "grad_norm": 2.14792534454076, "learning_rate": 7.858175241719229e-07, "loss": 0.8098, "step": 22750 }, { "epoch": 0.8245804791417491, "grad_norm": 2.228512339051929, "learning_rate": 7.855016861473774e-07, "loss": 0.8934, "step": 22751 }, { "epoch": 0.8246167228444058, "grad_norm": 2.664583502613476, "learning_rate": 7.851859061960687e-07, "loss": 1.0098, "step": 22752 }, { "epoch": 0.8246529665470624, "grad_norm": 2.065386908394119, "learning_rate": 7.848701843223494e-07, "loss": 0.7498, "step": 22753 }, { "epoch": 0.8246892102497191, "grad_norm": 2.446840667261894, "learning_rate": 7.8455452053057e-07, "loss": 0.8161, "step": 22754 }, { "epoch": 0.8247254539523757, "grad_norm": 2.286613282525308, "learning_rate": 7.842389148250807e-07, "loss": 0.6834, "step": 22755 }, { "epoch": 0.8247616976550325, "grad_norm": 2.5239256957364837, "learning_rate": 7.83923367210228e-07, "loss": 0.8931, "step": 22756 }, { "epoch": 0.8247979413576891, "grad_norm": 2.442494169438302, "learning_rate": 7.836078776903621e-07, "loss": 0.8824, "step": 22757 }, { "epoch": 0.8248341850603458, "grad_norm": 2.6472675368249714, "learning_rate": 7.832924462698305e-07, "loss": 1.186, "step": 22758 }, { "epoch": 0.8248704287630024, "grad_norm": 2.5360504909226207, "learning_rate": 7.829770729529778e-07, "loss": 1.008, "step": 22759 }, { "epoch": 0.8249066724656591, "grad_norm": 2.1770240073422293, "learning_rate": 7.826617577441509e-07, "loss": 0.994, "step": 22760 }, { "epoch": 0.8249429161683157, "grad_norm": 2.46049951570699, "learning_rate": 7.823465006476938e-07, "loss": 0.8047, "step": 22761 }, { "epoch": 0.8249791598709724, "grad_norm": 2.126246540885044, "learning_rate": 7.820313016679526e-07, "loss": 0.7347, "step": 22762 }, { "epoch": 0.8250154035736291, "grad_norm": 2.2835067419076203, "learning_rate": 7.817161608092682e-07, "loss": 0.8428, "step": 22763 }, { "epoch": 0.8250516472762858, "grad_norm": 2.0657211070201944, "learning_rate": 7.81401078075985e-07, "loss": 0.8119, "step": 22764 }, { "epoch": 0.8250878909789424, "grad_norm": 2.1505544633909044, "learning_rate": 7.810860534724412e-07, "loss": 1.0527, "step": 22765 }, { "epoch": 0.8251241346815991, "grad_norm": 2.479283192214179, "learning_rate": 7.807710870029822e-07, "loss": 0.9827, "step": 22766 }, { "epoch": 0.8251603783842557, "grad_norm": 2.401071233879653, "learning_rate": 7.80456178671945e-07, "loss": 0.8012, "step": 22767 }, { "epoch": 0.8251966220869124, "grad_norm": 2.3494724629048833, "learning_rate": 7.801413284836712e-07, "loss": 0.9079, "step": 22768 }, { "epoch": 0.8252328657895691, "grad_norm": 1.9550948790091465, "learning_rate": 7.798265364424962e-07, "loss": 0.6262, "step": 22769 }, { "epoch": 0.8252691094922258, "grad_norm": 2.0206653708399935, "learning_rate": 7.795118025527592e-07, "loss": 0.8968, "step": 22770 }, { "epoch": 0.8253053531948824, "grad_norm": 2.1440147332691804, "learning_rate": 7.791971268187986e-07, "loss": 1.0046, "step": 22771 }, { "epoch": 0.825341596897539, "grad_norm": 2.2198206192914243, "learning_rate": 7.788825092449476e-07, "loss": 0.7352, "step": 22772 }, { "epoch": 0.8253778406001957, "grad_norm": 2.379155123452557, "learning_rate": 7.785679498355431e-07, "loss": 0.9133, "step": 22773 }, { "epoch": 0.8254140843028523, "grad_norm": 2.4395708900283113, "learning_rate": 7.782534485949189e-07, "loss": 0.8258, "step": 22774 }, { "epoch": 0.825450328005509, "grad_norm": 2.3506401781736677, "learning_rate": 7.7793900552741e-07, "loss": 0.8395, "step": 22775 }, { "epoch": 0.8254865717081658, "grad_norm": 2.348736031027772, "learning_rate": 7.776246206373472e-07, "loss": 1.0605, "step": 22776 }, { "epoch": 0.8255228154108224, "grad_norm": 2.426515511536818, "learning_rate": 7.773102939290633e-07, "loss": 0.8185, "step": 22777 }, { "epoch": 0.825559059113479, "grad_norm": 2.335326160583478, "learning_rate": 7.769960254068909e-07, "loss": 0.9387, "step": 22778 }, { "epoch": 0.8255953028161357, "grad_norm": 2.3293710709516176, "learning_rate": 7.766818150751582e-07, "loss": 0.9794, "step": 22779 }, { "epoch": 0.8256315465187923, "grad_norm": 2.187654001064949, "learning_rate": 7.763676629381972e-07, "loss": 1.0783, "step": 22780 }, { "epoch": 0.825667790221449, "grad_norm": 2.0194981948853363, "learning_rate": 7.760535690003329e-07, "loss": 0.9019, "step": 22781 }, { "epoch": 0.8257040339241057, "grad_norm": 2.3756445191542626, "learning_rate": 7.757395332658984e-07, "loss": 0.8878, "step": 22782 }, { "epoch": 0.8257402776267624, "grad_norm": 2.5565549784128985, "learning_rate": 7.754255557392176e-07, "loss": 0.9102, "step": 22783 }, { "epoch": 0.825776521329419, "grad_norm": 2.4792563647137738, "learning_rate": 7.751116364246181e-07, "loss": 0.8877, "step": 22784 }, { "epoch": 0.8258127650320757, "grad_norm": 2.100584652178219, "learning_rate": 7.747977753264229e-07, "loss": 0.7079, "step": 22785 }, { "epoch": 0.8258490087347323, "grad_norm": 2.62098090950621, "learning_rate": 7.744839724489617e-07, "loss": 0.8237, "step": 22786 }, { "epoch": 0.825885252437389, "grad_norm": 2.2481037860293074, "learning_rate": 7.741702277965546e-07, "loss": 0.9753, "step": 22787 }, { "epoch": 0.8259214961400456, "grad_norm": 2.1956366730929537, "learning_rate": 7.738565413735272e-07, "loss": 0.8397, "step": 22788 }, { "epoch": 0.8259577398427024, "grad_norm": 2.3531669816257352, "learning_rate": 7.735429131841998e-07, "loss": 0.9443, "step": 22789 }, { "epoch": 0.825993983545359, "grad_norm": 2.257095455728173, "learning_rate": 7.732293432328947e-07, "loss": 0.9025, "step": 22790 }, { "epoch": 0.8260302272480157, "grad_norm": 2.1809819706075038, "learning_rate": 7.729158315239343e-07, "loss": 0.8686, "step": 22791 }, { "epoch": 0.8260664709506723, "grad_norm": 2.4258863913650974, "learning_rate": 7.726023780616348e-07, "loss": 0.8311, "step": 22792 }, { "epoch": 0.826102714653329, "grad_norm": 2.292893836415938, "learning_rate": 7.722889828503205e-07, "loss": 1.021, "step": 22793 }, { "epoch": 0.8261389583559856, "grad_norm": 2.3407939869415304, "learning_rate": 7.719756458943056e-07, "loss": 0.8411, "step": 22794 }, { "epoch": 0.8261752020586424, "grad_norm": 2.5133125564761696, "learning_rate": 7.716623671979107e-07, "loss": 0.8647, "step": 22795 }, { "epoch": 0.826211445761299, "grad_norm": 2.310529748438842, "learning_rate": 7.713491467654499e-07, "loss": 0.8437, "step": 22796 }, { "epoch": 0.8262476894639557, "grad_norm": 2.124604563264865, "learning_rate": 7.710359846012405e-07, "loss": 0.7947, "step": 22797 }, { "epoch": 0.8262839331666123, "grad_norm": 2.392351906836986, "learning_rate": 7.707228807095984e-07, "loss": 0.7755, "step": 22798 }, { "epoch": 0.826320176869269, "grad_norm": 2.2809108877751374, "learning_rate": 7.704098350948369e-07, "loss": 0.8298, "step": 22799 }, { "epoch": 0.8263564205719256, "grad_norm": 2.125107315041678, "learning_rate": 7.700968477612691e-07, "loss": 0.6923, "step": 22800 }, { "epoch": 0.8263926642745822, "grad_norm": 2.8327132043586003, "learning_rate": 7.697839187132089e-07, "loss": 0.8108, "step": 22801 }, { "epoch": 0.826428907977239, "grad_norm": 2.427804388453257, "learning_rate": 7.694710479549694e-07, "loss": 0.8976, "step": 22802 }, { "epoch": 0.8264651516798956, "grad_norm": 2.332138250295397, "learning_rate": 7.691582354908589e-07, "loss": 0.8114, "step": 22803 }, { "epoch": 0.8265013953825523, "grad_norm": 2.2288552546447744, "learning_rate": 7.688454813251888e-07, "loss": 0.8467, "step": 22804 }, { "epoch": 0.8265376390852089, "grad_norm": 2.10873739392035, "learning_rate": 7.685327854622699e-07, "loss": 0.7122, "step": 22805 }, { "epoch": 0.8265738827878656, "grad_norm": 2.421183576125954, "learning_rate": 7.682201479064105e-07, "loss": 0.959, "step": 22806 }, { "epoch": 0.8266101264905222, "grad_norm": 2.600332352090447, "learning_rate": 7.67907568661917e-07, "loss": 0.9111, "step": 22807 }, { "epoch": 0.8266463701931789, "grad_norm": 2.404110060632321, "learning_rate": 7.675950477330984e-07, "loss": 1.0599, "step": 22808 }, { "epoch": 0.8266826138958356, "grad_norm": 2.346527843646089, "learning_rate": 7.67282585124261e-07, "loss": 0.7888, "step": 22809 }, { "epoch": 0.8267188575984923, "grad_norm": 2.1897396542715883, "learning_rate": 7.669701808397084e-07, "loss": 0.9088, "step": 22810 }, { "epoch": 0.8267551013011489, "grad_norm": 2.321020980776434, "learning_rate": 7.666578348837484e-07, "loss": 0.8921, "step": 22811 }, { "epoch": 0.8267913450038056, "grad_norm": 2.428983624169515, "learning_rate": 7.663455472606807e-07, "loss": 0.863, "step": 22812 }, { "epoch": 0.8268275887064622, "grad_norm": 2.4836238108547444, "learning_rate": 7.660333179748131e-07, "loss": 0.9032, "step": 22813 }, { "epoch": 0.8268638324091189, "grad_norm": 2.5140533312232543, "learning_rate": 7.657211470304444e-07, "loss": 0.9152, "step": 22814 }, { "epoch": 0.8269000761117756, "grad_norm": 2.3735005846877666, "learning_rate": 7.654090344318788e-07, "loss": 0.9284, "step": 22815 }, { "epoch": 0.8269363198144323, "grad_norm": 2.436478056453074, "learning_rate": 7.650969801834147e-07, "loss": 0.7953, "step": 22816 }, { "epoch": 0.8269725635170889, "grad_norm": 2.3678742875122634, "learning_rate": 7.647849842893528e-07, "loss": 0.8592, "step": 22817 }, { "epoch": 0.8270088072197456, "grad_norm": 2.2535700429450105, "learning_rate": 7.644730467539935e-07, "loss": 0.948, "step": 22818 }, { "epoch": 0.8270450509224022, "grad_norm": 2.3617727339698895, "learning_rate": 7.641611675816335e-07, "loss": 0.8656, "step": 22819 }, { "epoch": 0.8270812946250589, "grad_norm": 2.8170172433056586, "learning_rate": 7.6384934677657e-07, "loss": 0.8376, "step": 22820 }, { "epoch": 0.8271175383277155, "grad_norm": 2.4646968490304366, "learning_rate": 7.635375843431014e-07, "loss": 0.8787, "step": 22821 }, { "epoch": 0.8271537820303723, "grad_norm": 2.3309874103166686, "learning_rate": 7.632258802855236e-07, "loss": 0.7405, "step": 22822 }, { "epoch": 0.8271900257330289, "grad_norm": 2.3827542401510073, "learning_rate": 7.629142346081297e-07, "loss": 0.7896, "step": 22823 }, { "epoch": 0.8272262694356856, "grad_norm": 2.381571761710644, "learning_rate": 7.626026473152148e-07, "loss": 0.9543, "step": 22824 }, { "epoch": 0.8272625131383422, "grad_norm": 2.5329290281279175, "learning_rate": 7.622911184110732e-07, "loss": 1.0488, "step": 22825 }, { "epoch": 0.8272987568409989, "grad_norm": 2.577055387451119, "learning_rate": 7.61979647899998e-07, "loss": 0.9528, "step": 22826 }, { "epoch": 0.8273350005436555, "grad_norm": 2.516472027369796, "learning_rate": 7.616682357862792e-07, "loss": 0.8267, "step": 22827 }, { "epoch": 0.8273712442463123, "grad_norm": 2.7998845411744484, "learning_rate": 7.613568820742085e-07, "loss": 0.9059, "step": 22828 }, { "epoch": 0.8274074879489689, "grad_norm": 2.306869472310527, "learning_rate": 7.610455867680777e-07, "loss": 0.9264, "step": 22829 }, { "epoch": 0.8274437316516255, "grad_norm": 2.3976695245765636, "learning_rate": 7.607343498721742e-07, "loss": 0.9508, "step": 22830 }, { "epoch": 0.8274799753542822, "grad_norm": 2.386187283916551, "learning_rate": 7.60423171390789e-07, "loss": 0.9637, "step": 22831 }, { "epoch": 0.8275162190569388, "grad_norm": 2.6919368852444774, "learning_rate": 7.601120513282056e-07, "loss": 1.0095, "step": 22832 }, { "epoch": 0.8275524627595955, "grad_norm": 2.4195246296639503, "learning_rate": 7.598009896887166e-07, "loss": 0.8161, "step": 22833 }, { "epoch": 0.8275887064622521, "grad_norm": 2.804383976840894, "learning_rate": 7.594899864766042e-07, "loss": 0.963, "step": 22834 }, { "epoch": 0.8276249501649089, "grad_norm": 2.495902127064491, "learning_rate": 7.591790416961564e-07, "loss": 0.8078, "step": 22835 }, { "epoch": 0.8276611938675655, "grad_norm": 2.4659951099852613, "learning_rate": 7.588681553516553e-07, "loss": 0.9281, "step": 22836 }, { "epoch": 0.8276974375702222, "grad_norm": 2.315235783498571, "learning_rate": 7.585573274473862e-07, "loss": 0.9194, "step": 22837 }, { "epoch": 0.8277336812728788, "grad_norm": 2.7578346409592345, "learning_rate": 7.58246557987633e-07, "loss": 0.7908, "step": 22838 }, { "epoch": 0.8277699249755355, "grad_norm": 2.263586009312361, "learning_rate": 7.579358469766762e-07, "loss": 1.0782, "step": 22839 }, { "epoch": 0.8278061686781921, "grad_norm": 2.5937591214402813, "learning_rate": 7.576251944187979e-07, "loss": 0.9278, "step": 22840 }, { "epoch": 0.8278424123808489, "grad_norm": 2.6567873312184784, "learning_rate": 7.57314600318278e-07, "loss": 0.9286, "step": 22841 }, { "epoch": 0.8278786560835055, "grad_norm": 2.509958489640987, "learning_rate": 7.570040646793991e-07, "loss": 0.8789, "step": 22842 }, { "epoch": 0.8279148997861622, "grad_norm": 2.305295841376485, "learning_rate": 7.566935875064363e-07, "loss": 0.7745, "step": 22843 }, { "epoch": 0.8279511434888188, "grad_norm": 2.151420633449871, "learning_rate": 7.5638316880367e-07, "loss": 0.8751, "step": 22844 }, { "epoch": 0.8279873871914755, "grad_norm": 2.3315178226926663, "learning_rate": 7.560728085753766e-07, "loss": 0.8859, "step": 22845 }, { "epoch": 0.8280236308941321, "grad_norm": 2.2087220874957043, "learning_rate": 7.557625068258351e-07, "loss": 0.7895, "step": 22846 }, { "epoch": 0.8280598745967888, "grad_norm": 2.170640966630694, "learning_rate": 7.554522635593175e-07, "loss": 0.8041, "step": 22847 }, { "epoch": 0.8280961182994455, "grad_norm": 2.50327038347328, "learning_rate": 7.551420787801012e-07, "loss": 0.907, "step": 22848 }, { "epoch": 0.8281323620021022, "grad_norm": 2.621032294451861, "learning_rate": 7.548319524924608e-07, "loss": 0.8709, "step": 22849 }, { "epoch": 0.8281686057047588, "grad_norm": 2.4478124125474925, "learning_rate": 7.54521884700668e-07, "loss": 1.0685, "step": 22850 }, { "epoch": 0.8282048494074155, "grad_norm": 2.3582446225889413, "learning_rate": 7.542118754089966e-07, "loss": 0.885, "step": 22851 }, { "epoch": 0.8282410931100721, "grad_norm": 2.2796861995438245, "learning_rate": 7.53901924621716e-07, "loss": 0.7814, "step": 22852 }, { "epoch": 0.8282773368127287, "grad_norm": 2.685782383247565, "learning_rate": 7.535920323431006e-07, "loss": 0.9122, "step": 22853 }, { "epoch": 0.8283135805153855, "grad_norm": 2.519107748892544, "learning_rate": 7.532821985774186e-07, "loss": 1.2432, "step": 22854 }, { "epoch": 0.8283498242180422, "grad_norm": 2.373636898427144, "learning_rate": 7.529724233289398e-07, "loss": 0.982, "step": 22855 }, { "epoch": 0.8283860679206988, "grad_norm": 2.0390583767550727, "learning_rate": 7.52662706601932e-07, "loss": 1.008, "step": 22856 }, { "epoch": 0.8284223116233554, "grad_norm": 2.2539174155484605, "learning_rate": 7.52353048400663e-07, "loss": 0.7347, "step": 22857 }, { "epoch": 0.8284585553260121, "grad_norm": 2.4217198715212263, "learning_rate": 7.520434487294015e-07, "loss": 0.9337, "step": 22858 }, { "epoch": 0.8284947990286687, "grad_norm": 2.3281234473939003, "learning_rate": 7.517339075924107e-07, "loss": 0.8709, "step": 22859 }, { "epoch": 0.8285310427313254, "grad_norm": 2.3390009562481153, "learning_rate": 7.514244249939578e-07, "loss": 1.0134, "step": 22860 }, { "epoch": 0.8285672864339821, "grad_norm": 2.4165077629133407, "learning_rate": 7.511150009383067e-07, "loss": 0.9195, "step": 22861 }, { "epoch": 0.8286035301366388, "grad_norm": 2.4474497468970684, "learning_rate": 7.508056354297221e-07, "loss": 0.9284, "step": 22862 }, { "epoch": 0.8286397738392954, "grad_norm": 2.19406195025475, "learning_rate": 7.504963284724653e-07, "loss": 0.9482, "step": 22863 }, { "epoch": 0.8286760175419521, "grad_norm": 2.24634213502759, "learning_rate": 7.501870800707989e-07, "loss": 1.0196, "step": 22864 }, { "epoch": 0.8287122612446087, "grad_norm": 2.395599381612966, "learning_rate": 7.498778902289855e-07, "loss": 0.9479, "step": 22865 }, { "epoch": 0.8287485049472654, "grad_norm": 2.46898224614288, "learning_rate": 7.495687589512835e-07, "loss": 0.9642, "step": 22866 }, { "epoch": 0.828784748649922, "grad_norm": 2.4298120296524863, "learning_rate": 7.492596862419537e-07, "loss": 0.8446, "step": 22867 }, { "epoch": 0.8288209923525788, "grad_norm": 2.219693039231543, "learning_rate": 7.489506721052531e-07, "loss": 0.9385, "step": 22868 }, { "epoch": 0.8288572360552354, "grad_norm": 2.3624339709412427, "learning_rate": 7.486417165454435e-07, "loss": 0.909, "step": 22869 }, { "epoch": 0.8288934797578921, "grad_norm": 2.4942364629126645, "learning_rate": 7.483328195667788e-07, "loss": 0.8704, "step": 22870 }, { "epoch": 0.8289297234605487, "grad_norm": 2.68676332082505, "learning_rate": 7.480239811735174e-07, "loss": 1.0257, "step": 22871 }, { "epoch": 0.8289659671632054, "grad_norm": 2.4431286591756596, "learning_rate": 7.477152013699118e-07, "loss": 0.9802, "step": 22872 }, { "epoch": 0.829002210865862, "grad_norm": 2.268775158841013, "learning_rate": 7.474064801602215e-07, "loss": 0.9758, "step": 22873 }, { "epoch": 0.8290384545685188, "grad_norm": 2.1461570205395275, "learning_rate": 7.470978175486971e-07, "loss": 0.7376, "step": 22874 }, { "epoch": 0.8290746982711754, "grad_norm": 2.4053064603372385, "learning_rate": 7.467892135395921e-07, "loss": 0.9884, "step": 22875 }, { "epoch": 0.8291109419738321, "grad_norm": 2.2646240595717795, "learning_rate": 7.464806681371611e-07, "loss": 0.6175, "step": 22876 }, { "epoch": 0.8291471856764887, "grad_norm": 2.5252131226799457, "learning_rate": 7.461721813456524e-07, "loss": 0.9634, "step": 22877 }, { "epoch": 0.8291834293791454, "grad_norm": 2.443292318398708, "learning_rate": 7.458637531693202e-07, "loss": 0.964, "step": 22878 }, { "epoch": 0.829219673081802, "grad_norm": 2.970520578723595, "learning_rate": 7.455553836124096e-07, "loss": 0.9146, "step": 22879 }, { "epoch": 0.8292559167844586, "grad_norm": 2.3938662523202487, "learning_rate": 7.452470726791761e-07, "loss": 0.936, "step": 22880 }, { "epoch": 0.8292921604871154, "grad_norm": 2.589339422039074, "learning_rate": 7.449388203738628e-07, "loss": 0.8399, "step": 22881 }, { "epoch": 0.829328404189772, "grad_norm": 2.0869981745639015, "learning_rate": 7.446306267007202e-07, "loss": 0.8566, "step": 22882 }, { "epoch": 0.8293646478924287, "grad_norm": 2.353757763535335, "learning_rate": 7.443224916639935e-07, "loss": 0.9228, "step": 22883 }, { "epoch": 0.8294008915950853, "grad_norm": 2.250663405009835, "learning_rate": 7.440144152679291e-07, "loss": 0.9811, "step": 22884 }, { "epoch": 0.829437135297742, "grad_norm": 2.2396084600854125, "learning_rate": 7.43706397516773e-07, "loss": 0.693, "step": 22885 }, { "epoch": 0.8294733790003986, "grad_norm": 2.4314330645009945, "learning_rate": 7.433984384147675e-07, "loss": 0.9493, "step": 22886 }, { "epoch": 0.8295096227030554, "grad_norm": 2.375717767717242, "learning_rate": 7.430905379661568e-07, "loss": 0.9437, "step": 22887 }, { "epoch": 0.829545866405712, "grad_norm": 2.2745094459198674, "learning_rate": 7.427826961751844e-07, "loss": 0.869, "step": 22888 }, { "epoch": 0.8295821101083687, "grad_norm": 2.283483127762298, "learning_rate": 7.424749130460929e-07, "loss": 0.7421, "step": 22889 }, { "epoch": 0.8296183538110253, "grad_norm": 2.1271503869674775, "learning_rate": 7.42167188583121e-07, "loss": 0.7821, "step": 22890 }, { "epoch": 0.829654597513682, "grad_norm": 2.239738632512777, "learning_rate": 7.418595227905101e-07, "loss": 0.6058, "step": 22891 }, { "epoch": 0.8296908412163386, "grad_norm": 2.628896150063081, "learning_rate": 7.415519156724999e-07, "loss": 0.9553, "step": 22892 }, { "epoch": 0.8297270849189953, "grad_norm": 2.8071893227728686, "learning_rate": 7.412443672333303e-07, "loss": 1.0039, "step": 22893 }, { "epoch": 0.829763328621652, "grad_norm": 2.7253406202777923, "learning_rate": 7.409368774772363e-07, "loss": 1.0074, "step": 22894 }, { "epoch": 0.8297995723243087, "grad_norm": 2.133846179463317, "learning_rate": 7.406294464084563e-07, "loss": 0.7743, "step": 22895 }, { "epoch": 0.8298358160269653, "grad_norm": 2.543075412976289, "learning_rate": 7.403220740312272e-07, "loss": 0.9312, "step": 22896 }, { "epoch": 0.829872059729622, "grad_norm": 2.3541978025543684, "learning_rate": 7.400147603497832e-07, "loss": 0.6542, "step": 22897 }, { "epoch": 0.8299083034322786, "grad_norm": 2.502373736996795, "learning_rate": 7.397075053683605e-07, "loss": 0.9259, "step": 22898 }, { "epoch": 0.8299445471349353, "grad_norm": 2.290583944783179, "learning_rate": 7.394003090911895e-07, "loss": 0.6478, "step": 22899 }, { "epoch": 0.829980790837592, "grad_norm": 2.368938304271449, "learning_rate": 7.39093171522508e-07, "loss": 0.9197, "step": 22900 }, { "epoch": 0.8300170345402487, "grad_norm": 2.4203074156330255, "learning_rate": 7.387860926665441e-07, "loss": 0.8135, "step": 22901 }, { "epoch": 0.8300532782429053, "grad_norm": 2.1590150960364767, "learning_rate": 7.384790725275325e-07, "loss": 0.8489, "step": 22902 }, { "epoch": 0.830089521945562, "grad_norm": 2.330343046008891, "learning_rate": 7.381721111097001e-07, "loss": 0.8229, "step": 22903 }, { "epoch": 0.8301257656482186, "grad_norm": 2.3091320981480723, "learning_rate": 7.378652084172794e-07, "loss": 0.8523, "step": 22904 }, { "epoch": 0.8301620093508753, "grad_norm": 2.130957540446432, "learning_rate": 7.375583644544987e-07, "loss": 0.7912, "step": 22905 }, { "epoch": 0.8301982530535319, "grad_norm": 2.560494834326804, "learning_rate": 7.372515792255852e-07, "loss": 0.9014, "step": 22906 }, { "epoch": 0.8302344967561887, "grad_norm": 2.4407364337096147, "learning_rate": 7.369448527347673e-07, "loss": 0.7867, "step": 22907 }, { "epoch": 0.8302707404588453, "grad_norm": 2.4138662785412253, "learning_rate": 7.366381849862709e-07, "loss": 0.9788, "step": 22908 }, { "epoch": 0.830306984161502, "grad_norm": 2.134766251813576, "learning_rate": 7.363315759843226e-07, "loss": 0.8157, "step": 22909 }, { "epoch": 0.8303432278641586, "grad_norm": 2.3732834726313037, "learning_rate": 7.360250257331458e-07, "loss": 0.879, "step": 22910 }, { "epoch": 0.8303794715668152, "grad_norm": 2.4219799752083317, "learning_rate": 7.357185342369649e-07, "loss": 0.964, "step": 22911 }, { "epoch": 0.8304157152694719, "grad_norm": 2.5914041776803747, "learning_rate": 7.354121015000043e-07, "loss": 0.7982, "step": 22912 }, { "epoch": 0.8304519589721286, "grad_norm": 2.3593707052295514, "learning_rate": 7.351057275264866e-07, "loss": 0.9639, "step": 22913 }, { "epoch": 0.8304882026747853, "grad_norm": 2.4548444587695015, "learning_rate": 7.34799412320632e-07, "loss": 0.8991, "step": 22914 }, { "epoch": 0.8305244463774419, "grad_norm": 2.3380571141661024, "learning_rate": 7.344931558866613e-07, "loss": 0.9671, "step": 22915 }, { "epoch": 0.8305606900800986, "grad_norm": 2.144435052179542, "learning_rate": 7.34186958228797e-07, "loss": 0.8893, "step": 22916 }, { "epoch": 0.8305969337827552, "grad_norm": 2.223998864809092, "learning_rate": 7.338808193512548e-07, "loss": 0.7707, "step": 22917 }, { "epoch": 0.8306331774854119, "grad_norm": 2.2623618915311323, "learning_rate": 7.335747392582559e-07, "loss": 0.8139, "step": 22918 }, { "epoch": 0.8306694211880685, "grad_norm": 2.2548553506366487, "learning_rate": 7.332687179540155e-07, "loss": 0.8923, "step": 22919 }, { "epoch": 0.8307056648907253, "grad_norm": 2.3196504455117233, "learning_rate": 7.329627554427532e-07, "loss": 0.9182, "step": 22920 }, { "epoch": 0.8307419085933819, "grad_norm": 1.961728248019644, "learning_rate": 7.326568517286825e-07, "loss": 0.8019, "step": 22921 }, { "epoch": 0.8307781522960386, "grad_norm": 2.1426688741333, "learning_rate": 7.323510068160205e-07, "loss": 0.7933, "step": 22922 }, { "epoch": 0.8308143959986952, "grad_norm": 2.2912410365216997, "learning_rate": 7.320452207089801e-07, "loss": 1.0113, "step": 22923 }, { "epoch": 0.8308506397013519, "grad_norm": 2.1296330370798957, "learning_rate": 7.317394934117749e-07, "loss": 0.6603, "step": 22924 }, { "epoch": 0.8308868834040085, "grad_norm": 2.335927376963539, "learning_rate": 7.314338249286196e-07, "loss": 0.7788, "step": 22925 }, { "epoch": 0.8309231271066653, "grad_norm": 2.5863126819042814, "learning_rate": 7.311282152637233e-07, "loss": 0.8714, "step": 22926 }, { "epoch": 0.8309593708093219, "grad_norm": 2.3005764755356135, "learning_rate": 7.308226644212984e-07, "loss": 0.8934, "step": 22927 }, { "epoch": 0.8309956145119786, "grad_norm": 2.2311285258039666, "learning_rate": 7.305171724055554e-07, "loss": 0.8473, "step": 22928 }, { "epoch": 0.8310318582146352, "grad_norm": 2.368879479894436, "learning_rate": 7.302117392207047e-07, "loss": 0.8534, "step": 22929 }, { "epoch": 0.8310681019172919, "grad_norm": 2.5950151112186366, "learning_rate": 7.299063648709532e-07, "loss": 0.9827, "step": 22930 }, { "epoch": 0.8311043456199485, "grad_norm": 2.156231643360342, "learning_rate": 7.296010493605088e-07, "loss": 0.9991, "step": 22931 }, { "epoch": 0.8311405893226052, "grad_norm": 2.3013385796676924, "learning_rate": 7.292957926935795e-07, "loss": 0.9792, "step": 22932 }, { "epoch": 0.8311768330252619, "grad_norm": 2.2761981174902037, "learning_rate": 7.289905948743731e-07, "loss": 0.9316, "step": 22933 }, { "epoch": 0.8312130767279186, "grad_norm": 2.0431044848506748, "learning_rate": 7.286854559070916e-07, "loss": 0.6374, "step": 22934 }, { "epoch": 0.8312493204305752, "grad_norm": 2.5286785999742527, "learning_rate": 7.283803757959412e-07, "loss": 0.9139, "step": 22935 }, { "epoch": 0.8312855641332318, "grad_norm": 2.695286808445112, "learning_rate": 7.280753545451275e-07, "loss": 0.933, "step": 22936 }, { "epoch": 0.8313218078358885, "grad_norm": 2.1294648504799376, "learning_rate": 7.277703921588502e-07, "loss": 0.6928, "step": 22937 }, { "epoch": 0.8313580515385451, "grad_norm": 2.319214959592009, "learning_rate": 7.274654886413146e-07, "loss": 0.8332, "step": 22938 }, { "epoch": 0.8313942952412018, "grad_norm": 2.5551940169817517, "learning_rate": 7.271606439967182e-07, "loss": 0.9134, "step": 22939 }, { "epoch": 0.8314305389438585, "grad_norm": 2.2424617494301318, "learning_rate": 7.268558582292661e-07, "loss": 0.8804, "step": 22940 }, { "epoch": 0.8314667826465152, "grad_norm": 2.326669616249281, "learning_rate": 7.26551131343155e-07, "loss": 0.8118, "step": 22941 }, { "epoch": 0.8315030263491718, "grad_norm": 2.2989922879372995, "learning_rate": 7.262464633425859e-07, "loss": 0.9004, "step": 22942 }, { "epoch": 0.8315392700518285, "grad_norm": 2.1139101259367328, "learning_rate": 7.259418542317548e-07, "loss": 0.7232, "step": 22943 }, { "epoch": 0.8315755137544851, "grad_norm": 2.603413973339553, "learning_rate": 7.256373040148601e-07, "loss": 0.7711, "step": 22944 }, { "epoch": 0.8316117574571418, "grad_norm": 2.169411757475681, "learning_rate": 7.25332812696099e-07, "loss": 0.94, "step": 22945 }, { "epoch": 0.8316480011597985, "grad_norm": 2.3411071401346177, "learning_rate": 7.250283802796659e-07, "loss": 0.9067, "step": 22946 }, { "epoch": 0.8316842448624552, "grad_norm": 2.148327832035172, "learning_rate": 7.247240067697564e-07, "loss": 0.942, "step": 22947 }, { "epoch": 0.8317204885651118, "grad_norm": 2.422708252593563, "learning_rate": 7.244196921705637e-07, "loss": 0.8779, "step": 22948 }, { "epoch": 0.8317567322677685, "grad_norm": 2.001904025285801, "learning_rate": 7.24115436486284e-07, "loss": 0.6006, "step": 22949 }, { "epoch": 0.8317929759704251, "grad_norm": 2.4625061340547623, "learning_rate": 7.238112397211056e-07, "loss": 0.8608, "step": 22950 }, { "epoch": 0.8318292196730818, "grad_norm": 2.265330692160024, "learning_rate": 7.235071018792228e-07, "loss": 0.8749, "step": 22951 }, { "epoch": 0.8318654633757384, "grad_norm": 2.3058561367747883, "learning_rate": 7.232030229648268e-07, "loss": 0.8957, "step": 22952 }, { "epoch": 0.8319017070783952, "grad_norm": 2.41978287894321, "learning_rate": 7.228990029821053e-07, "loss": 1.0763, "step": 22953 }, { "epoch": 0.8319379507810518, "grad_norm": 2.213892848641649, "learning_rate": 7.225950419352501e-07, "loss": 1.0141, "step": 22954 }, { "epoch": 0.8319741944837085, "grad_norm": 2.5487949286840084, "learning_rate": 7.22291139828446e-07, "loss": 0.9421, "step": 22955 }, { "epoch": 0.8320104381863651, "grad_norm": 2.389700452468881, "learning_rate": 7.219872966658847e-07, "loss": 0.9278, "step": 22956 }, { "epoch": 0.8320466818890218, "grad_norm": 2.364641096347552, "learning_rate": 7.216835124517508e-07, "loss": 0.8401, "step": 22957 }, { "epoch": 0.8320829255916784, "grad_norm": 2.1620546589033176, "learning_rate": 7.213797871902301e-07, "loss": 0.9742, "step": 22958 }, { "epoch": 0.8321191692943352, "grad_norm": 2.2501648235102594, "learning_rate": 7.210761208855088e-07, "loss": 0.8396, "step": 22959 }, { "epoch": 0.8321554129969918, "grad_norm": 2.570241725122918, "learning_rate": 7.207725135417715e-07, "loss": 0.8742, "step": 22960 }, { "epoch": 0.8321916566996485, "grad_norm": 2.193115652190926, "learning_rate": 7.204689651632002e-07, "loss": 0.6982, "step": 22961 }, { "epoch": 0.8322279004023051, "grad_norm": 2.2556246405601454, "learning_rate": 7.201654757539784e-07, "loss": 0.6919, "step": 22962 }, { "epoch": 0.8322641441049617, "grad_norm": 2.0347148278880094, "learning_rate": 7.198620453182887e-07, "loss": 0.6606, "step": 22963 }, { "epoch": 0.8323003878076184, "grad_norm": 2.3152587288504867, "learning_rate": 7.195586738603106e-07, "loss": 0.7237, "step": 22964 }, { "epoch": 0.832336631510275, "grad_norm": 2.3669173057601567, "learning_rate": 7.192553613842262e-07, "loss": 0.8782, "step": 22965 }, { "epoch": 0.8323728752129318, "grad_norm": 2.0746623273095772, "learning_rate": 7.189521078942119e-07, "loss": 1.017, "step": 22966 }, { "epoch": 0.8324091189155884, "grad_norm": 2.1717078430690666, "learning_rate": 7.186489133944507e-07, "loss": 0.8723, "step": 22967 }, { "epoch": 0.8324453626182451, "grad_norm": 2.4823834393994013, "learning_rate": 7.183457778891167e-07, "loss": 0.8457, "step": 22968 }, { "epoch": 0.8324816063209017, "grad_norm": 2.7141193543379325, "learning_rate": 7.180427013823904e-07, "loss": 0.836, "step": 22969 }, { "epoch": 0.8325178500235584, "grad_norm": 2.299884653987262, "learning_rate": 7.17739683878444e-07, "loss": 0.8052, "step": 22970 }, { "epoch": 0.832554093726215, "grad_norm": 2.533348872418166, "learning_rate": 7.174367253814557e-07, "loss": 0.8961, "step": 22971 }, { "epoch": 0.8325903374288718, "grad_norm": 2.226954004850195, "learning_rate": 7.171338258956001e-07, "loss": 0.9942, "step": 22972 }, { "epoch": 0.8326265811315284, "grad_norm": 2.411063811169645, "learning_rate": 7.168309854250488e-07, "loss": 0.7849, "step": 22973 }, { "epoch": 0.8326628248341851, "grad_norm": 2.188881085944172, "learning_rate": 7.165282039739763e-07, "loss": 0.6893, "step": 22974 }, { "epoch": 0.8326990685368417, "grad_norm": 2.2937731078068224, "learning_rate": 7.162254815465547e-07, "loss": 0.9619, "step": 22975 }, { "epoch": 0.8327353122394984, "grad_norm": 2.2188554812915857, "learning_rate": 7.159228181469563e-07, "loss": 0.8384, "step": 22976 }, { "epoch": 0.832771555942155, "grad_norm": 2.2381489375901524, "learning_rate": 7.156202137793494e-07, "loss": 0.8655, "step": 22977 }, { "epoch": 0.8328077996448117, "grad_norm": 2.387004740593798, "learning_rate": 7.153176684479052e-07, "loss": 1.0299, "step": 22978 }, { "epoch": 0.8328440433474684, "grad_norm": 2.276994508942076, "learning_rate": 7.150151821567919e-07, "loss": 0.8981, "step": 22979 }, { "epoch": 0.8328802870501251, "grad_norm": 2.2966769016815283, "learning_rate": 7.147127549101789e-07, "loss": 0.8108, "step": 22980 }, { "epoch": 0.8329165307527817, "grad_norm": 2.303187478210566, "learning_rate": 7.144103867122315e-07, "loss": 0.899, "step": 22981 }, { "epoch": 0.8329527744554384, "grad_norm": 2.32761463061026, "learning_rate": 7.141080775671172e-07, "loss": 0.8229, "step": 22982 }, { "epoch": 0.832989018158095, "grad_norm": 2.3934777153104756, "learning_rate": 7.138058274790022e-07, "loss": 0.8549, "step": 22983 }, { "epoch": 0.8330252618607517, "grad_norm": 2.363339178354787, "learning_rate": 7.135036364520504e-07, "loss": 0.8824, "step": 22984 }, { "epoch": 0.8330615055634084, "grad_norm": 2.280336904632783, "learning_rate": 7.132015044904267e-07, "loss": 0.9699, "step": 22985 }, { "epoch": 0.8330977492660651, "grad_norm": 2.3806431061242765, "learning_rate": 7.128994315982912e-07, "loss": 0.829, "step": 22986 }, { "epoch": 0.8331339929687217, "grad_norm": 2.7149844374242456, "learning_rate": 7.125974177798117e-07, "loss": 1.0517, "step": 22987 }, { "epoch": 0.8331702366713784, "grad_norm": 2.1507450783257767, "learning_rate": 7.122954630391454e-07, "loss": 0.877, "step": 22988 }, { "epoch": 0.833206480374035, "grad_norm": 2.1655009401044274, "learning_rate": 7.119935673804556e-07, "loss": 0.8902, "step": 22989 }, { "epoch": 0.8332427240766916, "grad_norm": 2.3384659908272805, "learning_rate": 7.116917308078997e-07, "loss": 0.9609, "step": 22990 }, { "epoch": 0.8332789677793483, "grad_norm": 2.557447772357935, "learning_rate": 7.113899533256385e-07, "loss": 0.9882, "step": 22991 }, { "epoch": 0.833315211482005, "grad_norm": 2.195742313266722, "learning_rate": 7.110882349378318e-07, "loss": 0.6799, "step": 22992 }, { "epoch": 0.8333514551846617, "grad_norm": 2.2653633812153267, "learning_rate": 7.107865756486337e-07, "loss": 0.754, "step": 22993 }, { "epoch": 0.8333876988873183, "grad_norm": 2.425325339504184, "learning_rate": 7.104849754622023e-07, "loss": 1.0015, "step": 22994 }, { "epoch": 0.833423942589975, "grad_norm": 2.4133770552654545, "learning_rate": 7.101834343826941e-07, "loss": 0.9154, "step": 22995 }, { "epoch": 0.8334601862926316, "grad_norm": 2.286106677730873, "learning_rate": 7.098819524142647e-07, "loss": 0.9245, "step": 22996 }, { "epoch": 0.8334964299952883, "grad_norm": 2.4347764682904316, "learning_rate": 7.095805295610664e-07, "loss": 0.9399, "step": 22997 }, { "epoch": 0.833532673697945, "grad_norm": 1.7338096020883491, "learning_rate": 7.09279165827254e-07, "loss": 0.6302, "step": 22998 }, { "epoch": 0.8335689174006017, "grad_norm": 2.3357050151362424, "learning_rate": 7.08977861216979e-07, "loss": 1.0284, "step": 22999 }, { "epoch": 0.8336051611032583, "grad_norm": 2.5891984559678685, "learning_rate": 7.086766157343955e-07, "loss": 0.9679, "step": 23000 }, { "epoch": 0.833641404805915, "grad_norm": 2.1358412984620885, "learning_rate": 7.08375429383652e-07, "loss": 0.9526, "step": 23001 }, { "epoch": 0.8336776485085716, "grad_norm": 2.2773284565872944, "learning_rate": 7.080743021688996e-07, "loss": 0.8406, "step": 23002 }, { "epoch": 0.8337138922112283, "grad_norm": 2.409794052558456, "learning_rate": 7.077732340942883e-07, "loss": 1.0355, "step": 23003 }, { "epoch": 0.8337501359138849, "grad_norm": 2.7375053566431498, "learning_rate": 7.074722251639649e-07, "loss": 0.9015, "step": 23004 }, { "epoch": 0.8337863796165417, "grad_norm": 2.3711101453430223, "learning_rate": 7.071712753820798e-07, "loss": 0.8027, "step": 23005 }, { "epoch": 0.8338226233191983, "grad_norm": 2.4157612532149235, "learning_rate": 7.068703847527759e-07, "loss": 1.0215, "step": 23006 }, { "epoch": 0.833858867021855, "grad_norm": 2.0811531675522894, "learning_rate": 7.065695532802036e-07, "loss": 0.6768, "step": 23007 }, { "epoch": 0.8338951107245116, "grad_norm": 2.0894693096216432, "learning_rate": 7.062687809685054e-07, "loss": 0.8839, "step": 23008 }, { "epoch": 0.8339313544271683, "grad_norm": 2.306051500738051, "learning_rate": 7.059680678218278e-07, "loss": 0.7882, "step": 23009 }, { "epoch": 0.8339675981298249, "grad_norm": 2.4598093420705585, "learning_rate": 7.05667413844312e-07, "loss": 0.7399, "step": 23010 }, { "epoch": 0.8340038418324816, "grad_norm": 2.2874200549273063, "learning_rate": 7.05366819040102e-07, "loss": 0.9781, "step": 23011 }, { "epoch": 0.8340400855351383, "grad_norm": 2.412265858488159, "learning_rate": 7.050662834133415e-07, "loss": 0.7325, "step": 23012 }, { "epoch": 0.834076329237795, "grad_norm": 2.5441746086803896, "learning_rate": 7.047658069681685e-07, "loss": 1.0474, "step": 23013 }, { "epoch": 0.8341125729404516, "grad_norm": 2.0742781191471167, "learning_rate": 7.04465389708725e-07, "loss": 0.7883, "step": 23014 }, { "epoch": 0.8341488166431082, "grad_norm": 2.6087778091965554, "learning_rate": 7.041650316391513e-07, "loss": 0.9607, "step": 23015 }, { "epoch": 0.8341850603457649, "grad_norm": 2.3236407422786467, "learning_rate": 7.038647327635856e-07, "loss": 0.9393, "step": 23016 }, { "epoch": 0.8342213040484215, "grad_norm": 2.281160573007279, "learning_rate": 7.035644930861651e-07, "loss": 0.9008, "step": 23017 }, { "epoch": 0.8342575477510783, "grad_norm": 2.4282314174589184, "learning_rate": 7.032643126110273e-07, "loss": 0.9964, "step": 23018 }, { "epoch": 0.834293791453735, "grad_norm": 1.9401685039084655, "learning_rate": 7.029641913423091e-07, "loss": 0.5431, "step": 23019 }, { "epoch": 0.8343300351563916, "grad_norm": 2.588239047074641, "learning_rate": 7.026641292841463e-07, "loss": 0.8169, "step": 23020 }, { "epoch": 0.8343662788590482, "grad_norm": 2.4358140353509055, "learning_rate": 7.023641264406717e-07, "loss": 0.816, "step": 23021 }, { "epoch": 0.8344025225617049, "grad_norm": 2.577265671250707, "learning_rate": 7.020641828160202e-07, "loss": 1.0933, "step": 23022 }, { "epoch": 0.8344387662643615, "grad_norm": 2.217533908390137, "learning_rate": 7.017642984143264e-07, "loss": 0.9781, "step": 23023 }, { "epoch": 0.8344750099670182, "grad_norm": 2.3854943948554546, "learning_rate": 7.014644732397197e-07, "loss": 0.7559, "step": 23024 }, { "epoch": 0.8345112536696749, "grad_norm": 2.447990706053964, "learning_rate": 7.011647072963345e-07, "loss": 1.0039, "step": 23025 }, { "epoch": 0.8345474973723316, "grad_norm": 2.1604712984263346, "learning_rate": 7.008650005882972e-07, "loss": 0.9998, "step": 23026 }, { "epoch": 0.8345837410749882, "grad_norm": 2.460420881782946, "learning_rate": 7.005653531197421e-07, "loss": 0.8458, "step": 23027 }, { "epoch": 0.8346199847776449, "grad_norm": 2.578071099981117, "learning_rate": 7.002657648947952e-07, "loss": 0.8845, "step": 23028 }, { "epoch": 0.8346562284803015, "grad_norm": 2.4089391484574274, "learning_rate": 6.999662359175874e-07, "loss": 0.8915, "step": 23029 }, { "epoch": 0.8346924721829582, "grad_norm": 2.437638704481143, "learning_rate": 6.996667661922424e-07, "loss": 0.7143, "step": 23030 }, { "epoch": 0.8347287158856149, "grad_norm": 2.3190495243487637, "learning_rate": 6.993673557228891e-07, "loss": 0.8629, "step": 23031 }, { "epoch": 0.8347649595882716, "grad_norm": 2.4440134485127953, "learning_rate": 6.990680045136539e-07, "loss": 0.9026, "step": 23032 }, { "epoch": 0.8348012032909282, "grad_norm": 2.084020712401896, "learning_rate": 6.987687125686587e-07, "loss": 0.7397, "step": 23033 }, { "epoch": 0.8348374469935849, "grad_norm": 2.4267125978472524, "learning_rate": 6.9846947989203e-07, "loss": 0.7533, "step": 23034 }, { "epoch": 0.8348736906962415, "grad_norm": 2.5240873376305135, "learning_rate": 6.981703064878904e-07, "loss": 0.9853, "step": 23035 }, { "epoch": 0.8349099343988982, "grad_norm": 2.9141651465036715, "learning_rate": 6.978711923603632e-07, "loss": 0.9681, "step": 23036 }, { "epoch": 0.8349461781015548, "grad_norm": 2.5816338104553567, "learning_rate": 6.975721375135674e-07, "loss": 0.876, "step": 23037 }, { "epoch": 0.8349824218042116, "grad_norm": 2.4920300127385273, "learning_rate": 6.972731419516265e-07, "loss": 1.0172, "step": 23038 }, { "epoch": 0.8350186655068682, "grad_norm": 2.2896197062870183, "learning_rate": 6.969742056786599e-07, "loss": 0.9878, "step": 23039 }, { "epoch": 0.8350549092095249, "grad_norm": 2.423212527300648, "learning_rate": 6.96675328698786e-07, "loss": 0.902, "step": 23040 }, { "epoch": 0.8350911529121815, "grad_norm": 2.3999952758575156, "learning_rate": 6.96376511016123e-07, "loss": 0.8962, "step": 23041 }, { "epoch": 0.8351273966148381, "grad_norm": 2.4559136666116954, "learning_rate": 6.960777526347889e-07, "loss": 0.8988, "step": 23042 }, { "epoch": 0.8351636403174948, "grad_norm": 2.5258443653371847, "learning_rate": 6.957790535589016e-07, "loss": 0.8602, "step": 23043 }, { "epoch": 0.8351998840201516, "grad_norm": 2.046902805451003, "learning_rate": 6.954804137925747e-07, "loss": 0.7587, "step": 23044 }, { "epoch": 0.8352361277228082, "grad_norm": 2.4716965398559276, "learning_rate": 6.951818333399246e-07, "loss": 0.8298, "step": 23045 }, { "epoch": 0.8352723714254648, "grad_norm": 2.1466268537300466, "learning_rate": 6.948833122050652e-07, "loss": 0.8084, "step": 23046 }, { "epoch": 0.8353086151281215, "grad_norm": 2.017138694564685, "learning_rate": 6.94584850392111e-07, "loss": 0.9345, "step": 23047 }, { "epoch": 0.8353448588307781, "grad_norm": 2.191425059375313, "learning_rate": 6.942864479051731e-07, "loss": 0.7238, "step": 23048 }, { "epoch": 0.8353811025334348, "grad_norm": 2.3620447581872694, "learning_rate": 6.939881047483638e-07, "loss": 0.7767, "step": 23049 }, { "epoch": 0.8354173462360914, "grad_norm": 2.3373620032787015, "learning_rate": 6.936898209257953e-07, "loss": 0.7596, "step": 23050 }, { "epoch": 0.8354535899387482, "grad_norm": 2.7941764609352404, "learning_rate": 6.933915964415755e-07, "loss": 0.9805, "step": 23051 }, { "epoch": 0.8354898336414048, "grad_norm": 2.0525561282770357, "learning_rate": 6.930934312998161e-07, "loss": 0.7468, "step": 23052 }, { "epoch": 0.8355260773440615, "grad_norm": 2.558698114416695, "learning_rate": 6.927953255046221e-07, "loss": 0.8582, "step": 23053 }, { "epoch": 0.8355623210467181, "grad_norm": 2.4277195558404743, "learning_rate": 6.924972790601059e-07, "loss": 0.7852, "step": 23054 }, { "epoch": 0.8355985647493748, "grad_norm": 2.4783670468721475, "learning_rate": 6.921992919703713e-07, "loss": 1.0266, "step": 23055 }, { "epoch": 0.8356348084520314, "grad_norm": 2.270743534123811, "learning_rate": 6.919013642395262e-07, "loss": 0.869, "step": 23056 }, { "epoch": 0.8356710521546882, "grad_norm": 2.2283919992576853, "learning_rate": 6.916034958716733e-07, "loss": 0.8579, "step": 23057 }, { "epoch": 0.8357072958573448, "grad_norm": 2.281902916550822, "learning_rate": 6.913056868709189e-07, "loss": 0.7764, "step": 23058 }, { "epoch": 0.8357435395600015, "grad_norm": 2.7533163522373285, "learning_rate": 6.910079372413675e-07, "loss": 0.9949, "step": 23059 }, { "epoch": 0.8357797832626581, "grad_norm": 2.5535883321217794, "learning_rate": 6.907102469871196e-07, "loss": 0.9044, "step": 23060 }, { "epoch": 0.8358160269653148, "grad_norm": 2.050717008311951, "learning_rate": 6.904126161122781e-07, "loss": 0.7631, "step": 23061 }, { "epoch": 0.8358522706679714, "grad_norm": 2.717468626735123, "learning_rate": 6.901150446209448e-07, "loss": 0.8921, "step": 23062 }, { "epoch": 0.835888514370628, "grad_norm": 2.5411484856213997, "learning_rate": 6.898175325172201e-07, "loss": 0.8354, "step": 23063 }, { "epoch": 0.8359247580732848, "grad_norm": 2.1216926101538056, "learning_rate": 6.895200798052027e-07, "loss": 0.6579, "step": 23064 }, { "epoch": 0.8359610017759415, "grad_norm": 2.286693247715015, "learning_rate": 6.892226864889912e-07, "loss": 0.8087, "step": 23065 }, { "epoch": 0.8359972454785981, "grad_norm": 2.216558675659309, "learning_rate": 6.889253525726841e-07, "loss": 0.966, "step": 23066 }, { "epoch": 0.8360334891812548, "grad_norm": 2.4738432706406868, "learning_rate": 6.886280780603799e-07, "loss": 0.8152, "step": 23067 }, { "epoch": 0.8360697328839114, "grad_norm": 2.602080761767582, "learning_rate": 6.88330862956172e-07, "loss": 0.9492, "step": 23068 }, { "epoch": 0.836105976586568, "grad_norm": 2.1225847880694886, "learning_rate": 6.880337072641569e-07, "loss": 0.7684, "step": 23069 }, { "epoch": 0.8361422202892248, "grad_norm": 2.4075326882081134, "learning_rate": 6.877366109884309e-07, "loss": 0.9861, "step": 23070 }, { "epoch": 0.8361784639918814, "grad_norm": 2.3593669034017366, "learning_rate": 6.874395741330852e-07, "loss": 0.905, "step": 23071 }, { "epoch": 0.8362147076945381, "grad_norm": 2.729407317374979, "learning_rate": 6.871425967022155e-07, "loss": 0.7685, "step": 23072 }, { "epoch": 0.8362509513971947, "grad_norm": 2.2261016136906395, "learning_rate": 6.8684567869991e-07, "loss": 0.9379, "step": 23073 }, { "epoch": 0.8362871950998514, "grad_norm": 2.2025791800379877, "learning_rate": 6.865488201302644e-07, "loss": 0.8272, "step": 23074 }, { "epoch": 0.836323438802508, "grad_norm": 2.089710924431979, "learning_rate": 6.862520209973667e-07, "loss": 0.8109, "step": 23075 }, { "epoch": 0.8363596825051647, "grad_norm": 2.4258421821725245, "learning_rate": 6.859552813053083e-07, "loss": 0.8586, "step": 23076 }, { "epoch": 0.8363959262078214, "grad_norm": 2.5175796952206353, "learning_rate": 6.856586010581761e-07, "loss": 0.8129, "step": 23077 }, { "epoch": 0.8364321699104781, "grad_norm": 2.3150801520807107, "learning_rate": 6.853619802600587e-07, "loss": 0.7111, "step": 23078 }, { "epoch": 0.8364684136131347, "grad_norm": 2.352797288095093, "learning_rate": 6.850654189150452e-07, "loss": 0.9177, "step": 23079 }, { "epoch": 0.8365046573157914, "grad_norm": 2.1824400056382145, "learning_rate": 6.847689170272198e-07, "loss": 0.8242, "step": 23080 }, { "epoch": 0.836540901018448, "grad_norm": 2.4940521083070837, "learning_rate": 6.844724746006687e-07, "loss": 0.9142, "step": 23081 }, { "epoch": 0.8365771447211047, "grad_norm": 2.45175908167887, "learning_rate": 6.841760916394769e-07, "loss": 0.9328, "step": 23082 }, { "epoch": 0.8366133884237613, "grad_norm": 2.5554624359288245, "learning_rate": 6.838797681477293e-07, "loss": 1.0603, "step": 23083 }, { "epoch": 0.8366496321264181, "grad_norm": 2.4588876097177037, "learning_rate": 6.835835041295075e-07, "loss": 0.746, "step": 23084 }, { "epoch": 0.8366858758290747, "grad_norm": 2.4797850296651163, "learning_rate": 6.832872995888945e-07, "loss": 0.9002, "step": 23085 }, { "epoch": 0.8367221195317314, "grad_norm": 2.219267223472661, "learning_rate": 6.829911545299716e-07, "loss": 0.7851, "step": 23086 }, { "epoch": 0.836758363234388, "grad_norm": 2.41257847914684, "learning_rate": 6.826950689568207e-07, "loss": 0.7829, "step": 23087 }, { "epoch": 0.8367946069370447, "grad_norm": 2.2975389568946203, "learning_rate": 6.8239904287352e-07, "loss": 0.8863, "step": 23088 }, { "epoch": 0.8368308506397013, "grad_norm": 2.3428236512180804, "learning_rate": 6.821030762841491e-07, "loss": 0.7236, "step": 23089 }, { "epoch": 0.8368670943423581, "grad_norm": 2.6072309135312137, "learning_rate": 6.81807169192788e-07, "loss": 0.7837, "step": 23090 }, { "epoch": 0.8369033380450147, "grad_norm": 2.507545609216239, "learning_rate": 6.815113216035107e-07, "loss": 1.1221, "step": 23091 }, { "epoch": 0.8369395817476714, "grad_norm": 2.4918361023852578, "learning_rate": 6.812155335203974e-07, "loss": 0.8895, "step": 23092 }, { "epoch": 0.836975825450328, "grad_norm": 2.1116614834121292, "learning_rate": 6.809198049475196e-07, "loss": 0.8368, "step": 23093 }, { "epoch": 0.8370120691529847, "grad_norm": 2.2394538846222476, "learning_rate": 6.80624135888957e-07, "loss": 0.9169, "step": 23094 }, { "epoch": 0.8370483128556413, "grad_norm": 2.46210176235975, "learning_rate": 6.803285263487807e-07, "loss": 0.885, "step": 23095 }, { "epoch": 0.8370845565582979, "grad_norm": 2.830885564578396, "learning_rate": 6.800329763310654e-07, "loss": 0.8791, "step": 23096 }, { "epoch": 0.8371208002609547, "grad_norm": 2.4308248162477906, "learning_rate": 6.797374858398826e-07, "loss": 0.9887, "step": 23097 }, { "epoch": 0.8371570439636113, "grad_norm": 2.39938050723512, "learning_rate": 6.79442054879304e-07, "loss": 0.861, "step": 23098 }, { "epoch": 0.837193287666268, "grad_norm": 2.215221515578687, "learning_rate": 6.791466834534022e-07, "loss": 0.9633, "step": 23099 }, { "epoch": 0.8372295313689246, "grad_norm": 2.526661577483228, "learning_rate": 6.788513715662448e-07, "loss": 0.9747, "step": 23100 }, { "epoch": 0.8372657750715813, "grad_norm": 2.461579963454722, "learning_rate": 6.78556119221902e-07, "loss": 0.8053, "step": 23101 }, { "epoch": 0.8373020187742379, "grad_norm": 2.564987018782487, "learning_rate": 6.782609264244422e-07, "loss": 0.8156, "step": 23102 }, { "epoch": 0.8373382624768947, "grad_norm": 2.4690690986207873, "learning_rate": 6.779657931779348e-07, "loss": 0.845, "step": 23103 }, { "epoch": 0.8373745061795513, "grad_norm": 2.4359046222548355, "learning_rate": 6.776707194864435e-07, "loss": 0.9128, "step": 23104 }, { "epoch": 0.837410749882208, "grad_norm": 2.4277587850075864, "learning_rate": 6.773757053540353e-07, "loss": 0.8071, "step": 23105 }, { "epoch": 0.8374469935848646, "grad_norm": 2.120411467243474, "learning_rate": 6.77080750784776e-07, "loss": 0.8673, "step": 23106 }, { "epoch": 0.8374832372875213, "grad_norm": 2.664597324431818, "learning_rate": 6.767858557827306e-07, "loss": 0.8414, "step": 23107 }, { "epoch": 0.8375194809901779, "grad_norm": 2.057211384580261, "learning_rate": 6.764910203519603e-07, "loss": 0.6749, "step": 23108 }, { "epoch": 0.8375557246928346, "grad_norm": 2.889328498455867, "learning_rate": 6.761962444965292e-07, "loss": 1.083, "step": 23109 }, { "epoch": 0.8375919683954913, "grad_norm": 2.2818546431730042, "learning_rate": 6.759015282204995e-07, "loss": 0.8787, "step": 23110 }, { "epoch": 0.837628212098148, "grad_norm": 2.2720333678999194, "learning_rate": 6.756068715279313e-07, "loss": 0.792, "step": 23111 }, { "epoch": 0.8376644558008046, "grad_norm": 2.3070885952894615, "learning_rate": 6.753122744228857e-07, "loss": 0.7455, "step": 23112 }, { "epoch": 0.8377006995034613, "grad_norm": 2.3973149399732274, "learning_rate": 6.750177369094191e-07, "loss": 0.8582, "step": 23113 }, { "epoch": 0.8377369432061179, "grad_norm": 2.3338123461032474, "learning_rate": 6.747232589915947e-07, "loss": 0.9933, "step": 23114 }, { "epoch": 0.8377731869087746, "grad_norm": 2.1022864348808787, "learning_rate": 6.744288406734673e-07, "loss": 0.7377, "step": 23115 }, { "epoch": 0.8378094306114313, "grad_norm": 2.2814771109218603, "learning_rate": 6.741344819590951e-07, "loss": 1.0183, "step": 23116 }, { "epoch": 0.837845674314088, "grad_norm": 2.1350255335429367, "learning_rate": 6.738401828525326e-07, "loss": 0.7909, "step": 23117 }, { "epoch": 0.8378819180167446, "grad_norm": 1.9827881135490248, "learning_rate": 6.735459433578362e-07, "loss": 0.8324, "step": 23118 }, { "epoch": 0.8379181617194013, "grad_norm": 2.446880664142647, "learning_rate": 6.732517634790609e-07, "loss": 0.8331, "step": 23119 }, { "epoch": 0.8379544054220579, "grad_norm": 2.6175800839018466, "learning_rate": 6.729576432202573e-07, "loss": 0.9468, "step": 23120 }, { "epoch": 0.8379906491247145, "grad_norm": 2.1223505464728922, "learning_rate": 6.726635825854832e-07, "loss": 0.9058, "step": 23121 }, { "epoch": 0.8380268928273712, "grad_norm": 2.4390262909937803, "learning_rate": 6.723695815787862e-07, "loss": 0.7649, "step": 23122 }, { "epoch": 0.838063136530028, "grad_norm": 2.268295157299537, "learning_rate": 6.720756402042205e-07, "loss": 0.7917, "step": 23123 }, { "epoch": 0.8380993802326846, "grad_norm": 2.290612622113731, "learning_rate": 6.717817584658337e-07, "loss": 1.0792, "step": 23124 }, { "epoch": 0.8381356239353412, "grad_norm": 2.312772846943085, "learning_rate": 6.714879363676768e-07, "loss": 0.7565, "step": 23125 }, { "epoch": 0.8381718676379979, "grad_norm": 2.147665316037158, "learning_rate": 6.711941739137994e-07, "loss": 0.9698, "step": 23126 }, { "epoch": 0.8382081113406545, "grad_norm": 2.2172008793078937, "learning_rate": 6.709004711082473e-07, "loss": 0.9226, "step": 23127 }, { "epoch": 0.8382443550433112, "grad_norm": 2.3449909161851474, "learning_rate": 6.706068279550687e-07, "loss": 0.92, "step": 23128 }, { "epoch": 0.8382805987459679, "grad_norm": 2.0111968249279024, "learning_rate": 6.703132444583093e-07, "loss": 0.6977, "step": 23129 }, { "epoch": 0.8383168424486246, "grad_norm": 2.4604869432517185, "learning_rate": 6.70019720622016e-07, "loss": 0.811, "step": 23130 }, { "epoch": 0.8383530861512812, "grad_norm": 2.3813011118283964, "learning_rate": 6.697262564502315e-07, "loss": 1.0045, "step": 23131 }, { "epoch": 0.8383893298539379, "grad_norm": 2.7285705545619288, "learning_rate": 6.69432851947e-07, "loss": 0.809, "step": 23132 }, { "epoch": 0.8384255735565945, "grad_norm": 2.5092366014200094, "learning_rate": 6.691395071163648e-07, "loss": 0.8645, "step": 23133 }, { "epoch": 0.8384618172592512, "grad_norm": 2.079018711595414, "learning_rate": 6.688462219623693e-07, "loss": 0.9998, "step": 23134 }, { "epoch": 0.8384980609619078, "grad_norm": 2.3659451482564466, "learning_rate": 6.685529964890519e-07, "loss": 0.8506, "step": 23135 }, { "epoch": 0.8385343046645646, "grad_norm": 2.546989402454346, "learning_rate": 6.682598307004551e-07, "loss": 1.1064, "step": 23136 }, { "epoch": 0.8385705483672212, "grad_norm": 2.2241461975836856, "learning_rate": 6.679667246006189e-07, "loss": 0.824, "step": 23137 }, { "epoch": 0.8386067920698779, "grad_norm": 2.1596459812946995, "learning_rate": 6.676736781935805e-07, "loss": 0.6955, "step": 23138 }, { "epoch": 0.8386430357725345, "grad_norm": 2.3857517639154975, "learning_rate": 6.673806914833797e-07, "loss": 0.9402, "step": 23139 }, { "epoch": 0.8386792794751912, "grad_norm": 2.469853228800537, "learning_rate": 6.670877644740504e-07, "loss": 0.9277, "step": 23140 }, { "epoch": 0.8387155231778478, "grad_norm": 2.0808644788095725, "learning_rate": 6.667948971696336e-07, "loss": 1.0166, "step": 23141 }, { "epoch": 0.8387517668805045, "grad_norm": 2.615967899760355, "learning_rate": 6.665020895741608e-07, "loss": 0.8334, "step": 23142 }, { "epoch": 0.8387880105831612, "grad_norm": 2.169649851172508, "learning_rate": 6.662093416916703e-07, "loss": 0.9235, "step": 23143 }, { "epoch": 0.8388242542858179, "grad_norm": 2.4621646003835966, "learning_rate": 6.659166535261924e-07, "loss": 0.966, "step": 23144 }, { "epoch": 0.8388604979884745, "grad_norm": 2.030847088234186, "learning_rate": 6.656240250817619e-07, "loss": 0.8565, "step": 23145 }, { "epoch": 0.8388967416911312, "grad_norm": 2.5582767707955294, "learning_rate": 6.653314563624124e-07, "loss": 0.963, "step": 23146 }, { "epoch": 0.8389329853937878, "grad_norm": 2.469485447417643, "learning_rate": 6.65038947372173e-07, "loss": 0.9152, "step": 23147 }, { "epoch": 0.8389692290964444, "grad_norm": 2.490705545664555, "learning_rate": 6.647464981150748e-07, "loss": 0.8972, "step": 23148 }, { "epoch": 0.8390054727991012, "grad_norm": 2.4590753213199683, "learning_rate": 6.644541085951478e-07, "loss": 0.8018, "step": 23149 }, { "epoch": 0.8390417165017579, "grad_norm": 2.5760675538598026, "learning_rate": 6.641617788164223e-07, "loss": 0.8915, "step": 23150 }, { "epoch": 0.8390779602044145, "grad_norm": 2.380851476152797, "learning_rate": 6.638695087829245e-07, "loss": 0.8854, "step": 23151 }, { "epoch": 0.8391142039070711, "grad_norm": 2.294300471307619, "learning_rate": 6.635772984986821e-07, "loss": 0.8718, "step": 23152 }, { "epoch": 0.8391504476097278, "grad_norm": 2.488245384942543, "learning_rate": 6.632851479677221e-07, "loss": 0.9059, "step": 23153 }, { "epoch": 0.8391866913123844, "grad_norm": 2.3102003654737833, "learning_rate": 6.629930571940713e-07, "loss": 1.0363, "step": 23154 }, { "epoch": 0.8392229350150411, "grad_norm": 2.4050492025778385, "learning_rate": 6.627010261817523e-07, "loss": 0.9093, "step": 23155 }, { "epoch": 0.8392591787176978, "grad_norm": 2.471213188083937, "learning_rate": 6.624090549347895e-07, "loss": 0.9391, "step": 23156 }, { "epoch": 0.8392954224203545, "grad_norm": 1.9869674239030335, "learning_rate": 6.621171434572076e-07, "loss": 0.6945, "step": 23157 }, { "epoch": 0.8393316661230111, "grad_norm": 2.3942351366754555, "learning_rate": 6.618252917530277e-07, "loss": 0.875, "step": 23158 }, { "epoch": 0.8393679098256678, "grad_norm": 2.328378568922632, "learning_rate": 6.615334998262724e-07, "loss": 0.9669, "step": 23159 }, { "epoch": 0.8394041535283244, "grad_norm": 2.586782849103749, "learning_rate": 6.61241767680959e-07, "loss": 0.8117, "step": 23160 }, { "epoch": 0.8394403972309811, "grad_norm": 2.4143013899525725, "learning_rate": 6.609500953211129e-07, "loss": 0.9627, "step": 23161 }, { "epoch": 0.8394766409336378, "grad_norm": 2.397429619009769, "learning_rate": 6.606584827507484e-07, "loss": 0.9044, "step": 23162 }, { "epoch": 0.8395128846362945, "grad_norm": 2.5408097258296594, "learning_rate": 6.603669299738874e-07, "loss": 0.9417, "step": 23163 }, { "epoch": 0.8395491283389511, "grad_norm": 2.737508586066667, "learning_rate": 6.600754369945445e-07, "loss": 0.9146, "step": 23164 }, { "epoch": 0.8395853720416078, "grad_norm": 2.5719447595097256, "learning_rate": 6.597840038167369e-07, "loss": 0.9146, "step": 23165 }, { "epoch": 0.8396216157442644, "grad_norm": 2.363603785531299, "learning_rate": 6.594926304444826e-07, "loss": 0.9488, "step": 23166 }, { "epoch": 0.8396578594469211, "grad_norm": 3.6052045174068925, "learning_rate": 6.592013168817929e-07, "loss": 1.0818, "step": 23167 }, { "epoch": 0.8396941031495777, "grad_norm": 2.469915441962761, "learning_rate": 6.589100631326845e-07, "loss": 0.9625, "step": 23168 }, { "epoch": 0.8397303468522345, "grad_norm": 2.0254272753870968, "learning_rate": 6.586188692011697e-07, "loss": 0.8771, "step": 23169 }, { "epoch": 0.8397665905548911, "grad_norm": 2.189599536775283, "learning_rate": 6.583277350912625e-07, "loss": 0.8179, "step": 23170 }, { "epoch": 0.8398028342575478, "grad_norm": 1.9091101865854325, "learning_rate": 6.580366608069722e-07, "loss": 0.684, "step": 23171 }, { "epoch": 0.8398390779602044, "grad_norm": 2.712539219040141, "learning_rate": 6.577456463523107e-07, "loss": 0.9888, "step": 23172 }, { "epoch": 0.839875321662861, "grad_norm": 2.431741191007831, "learning_rate": 6.574546917312885e-07, "loss": 0.9092, "step": 23173 }, { "epoch": 0.8399115653655177, "grad_norm": 2.2974671968070437, "learning_rate": 6.57163796947915e-07, "loss": 0.7762, "step": 23174 }, { "epoch": 0.8399478090681745, "grad_norm": 2.584858916633219, "learning_rate": 6.56872962006197e-07, "loss": 0.921, "step": 23175 }, { "epoch": 0.8399840527708311, "grad_norm": 2.391764350184708, "learning_rate": 6.56582186910143e-07, "loss": 0.9548, "step": 23176 }, { "epoch": 0.8400202964734877, "grad_norm": 2.1957374096629705, "learning_rate": 6.562914716637608e-07, "loss": 0.7107, "step": 23177 }, { "epoch": 0.8400565401761444, "grad_norm": 2.511065861202695, "learning_rate": 6.560008162710534e-07, "loss": 0.9718, "step": 23178 }, { "epoch": 0.840092783878801, "grad_norm": 1.9938893668201907, "learning_rate": 6.557102207360294e-07, "loss": 0.9736, "step": 23179 }, { "epoch": 0.8401290275814577, "grad_norm": 2.701083375951439, "learning_rate": 6.554196850626887e-07, "loss": 1.033, "step": 23180 }, { "epoch": 0.8401652712841143, "grad_norm": 2.659871787650957, "learning_rate": 6.551292092550399e-07, "loss": 0.9952, "step": 23181 }, { "epoch": 0.8402015149867711, "grad_norm": 2.3136686999056897, "learning_rate": 6.548387933170814e-07, "loss": 0.8691, "step": 23182 }, { "epoch": 0.8402377586894277, "grad_norm": 2.550777522853275, "learning_rate": 6.545484372528177e-07, "loss": 0.9877, "step": 23183 }, { "epoch": 0.8402740023920844, "grad_norm": 2.462336622557082, "learning_rate": 6.542581410662469e-07, "loss": 0.8028, "step": 23184 }, { "epoch": 0.840310246094741, "grad_norm": 2.61982807869552, "learning_rate": 6.539679047613712e-07, "loss": 1.107, "step": 23185 }, { "epoch": 0.8403464897973977, "grad_norm": 2.597073178105301, "learning_rate": 6.536777283421903e-07, "loss": 0.9461, "step": 23186 }, { "epoch": 0.8403827335000543, "grad_norm": 2.233803122661675, "learning_rate": 6.533876118127008e-07, "loss": 0.9568, "step": 23187 }, { "epoch": 0.8404189772027111, "grad_norm": 2.0872133953016094, "learning_rate": 6.530975551769009e-07, "loss": 0.8114, "step": 23188 }, { "epoch": 0.8404552209053677, "grad_norm": 2.338815532479499, "learning_rate": 6.528075584387883e-07, "loss": 0.9231, "step": 23189 }, { "epoch": 0.8404914646080244, "grad_norm": 2.7985696326782143, "learning_rate": 6.525176216023593e-07, "loss": 0.7609, "step": 23190 }, { "epoch": 0.840527708310681, "grad_norm": 2.3481448929437048, "learning_rate": 6.522277446716068e-07, "loss": 0.9355, "step": 23191 }, { "epoch": 0.8405639520133377, "grad_norm": 2.5218755467544796, "learning_rate": 6.519379276505267e-07, "loss": 0.9074, "step": 23192 }, { "epoch": 0.8406001957159943, "grad_norm": 2.417584097075321, "learning_rate": 6.516481705431127e-07, "loss": 0.8744, "step": 23193 }, { "epoch": 0.840636439418651, "grad_norm": 2.33533858206803, "learning_rate": 6.513584733533578e-07, "loss": 0.8656, "step": 23194 }, { "epoch": 0.8406726831213077, "grad_norm": 2.317377873573769, "learning_rate": 6.510688360852524e-07, "loss": 0.8584, "step": 23195 }, { "epoch": 0.8407089268239644, "grad_norm": 2.164186988134531, "learning_rate": 6.50779258742788e-07, "loss": 1.1598, "step": 23196 }, { "epoch": 0.840745170526621, "grad_norm": 2.319708134954212, "learning_rate": 6.50489741329956e-07, "loss": 0.9714, "step": 23197 }, { "epoch": 0.8407814142292777, "grad_norm": 2.2325893369919076, "learning_rate": 6.502002838507443e-07, "loss": 0.7786, "step": 23198 }, { "epoch": 0.8408176579319343, "grad_norm": 2.305565056524404, "learning_rate": 6.499108863091413e-07, "loss": 0.8274, "step": 23199 }, { "epoch": 0.840853901634591, "grad_norm": 2.3754086400002277, "learning_rate": 6.496215487091362e-07, "loss": 0.7247, "step": 23200 }, { "epoch": 0.8408901453372477, "grad_norm": 2.4451122891707655, "learning_rate": 6.493322710547156e-07, "loss": 0.8594, "step": 23201 }, { "epoch": 0.8409263890399044, "grad_norm": 2.3160006332848035, "learning_rate": 6.490430533498648e-07, "loss": 0.9894, "step": 23202 }, { "epoch": 0.840962632742561, "grad_norm": 2.3083424014013674, "learning_rate": 6.487538955985689e-07, "loss": 0.8218, "step": 23203 }, { "epoch": 0.8409988764452176, "grad_norm": 2.4394336682265316, "learning_rate": 6.484647978048137e-07, "loss": 0.9163, "step": 23204 }, { "epoch": 0.8410351201478743, "grad_norm": 2.185202769700659, "learning_rate": 6.481757599725807e-07, "loss": 0.638, "step": 23205 }, { "epoch": 0.8410713638505309, "grad_norm": 2.356919197173373, "learning_rate": 6.478867821058554e-07, "loss": 0.7612, "step": 23206 }, { "epoch": 0.8411076075531876, "grad_norm": 2.11109091989046, "learning_rate": 6.475978642086156e-07, "loss": 0.779, "step": 23207 }, { "epoch": 0.8411438512558443, "grad_norm": 2.189801065455277, "learning_rate": 6.473090062848475e-07, "loss": 0.8344, "step": 23208 }, { "epoch": 0.841180094958501, "grad_norm": 2.316765253153706, "learning_rate": 6.470202083385275e-07, "loss": 0.8455, "step": 23209 }, { "epoch": 0.8412163386611576, "grad_norm": 2.517283657926676, "learning_rate": 6.467314703736371e-07, "loss": 0.7273, "step": 23210 }, { "epoch": 0.8412525823638143, "grad_norm": 2.5250624216976076, "learning_rate": 6.46442792394154e-07, "loss": 0.8357, "step": 23211 }, { "epoch": 0.8412888260664709, "grad_norm": 2.2487553908191686, "learning_rate": 6.461541744040562e-07, "loss": 0.9435, "step": 23212 }, { "epoch": 0.8413250697691276, "grad_norm": 2.374280569634131, "learning_rate": 6.458656164073212e-07, "loss": 0.8467, "step": 23213 }, { "epoch": 0.8413613134717842, "grad_norm": 2.257813584567392, "learning_rate": 6.45577118407924e-07, "loss": 0.8299, "step": 23214 }, { "epoch": 0.841397557174441, "grad_norm": 2.014618681757555, "learning_rate": 6.452886804098407e-07, "loss": 0.8711, "step": 23215 }, { "epoch": 0.8414338008770976, "grad_norm": 2.2438251648696976, "learning_rate": 6.450003024170454e-07, "loss": 0.8214, "step": 23216 }, { "epoch": 0.8414700445797543, "grad_norm": 2.1371300955565182, "learning_rate": 6.447119844335137e-07, "loss": 0.7664, "step": 23217 }, { "epoch": 0.8415062882824109, "grad_norm": 2.3035722695107554, "learning_rate": 6.444237264632159e-07, "loss": 0.9775, "step": 23218 }, { "epoch": 0.8415425319850676, "grad_norm": 2.338706962556013, "learning_rate": 6.441355285101241e-07, "loss": 0.9527, "step": 23219 }, { "epoch": 0.8415787756877242, "grad_norm": 2.5115708042603333, "learning_rate": 6.438473905782111e-07, "loss": 0.7796, "step": 23220 }, { "epoch": 0.841615019390381, "grad_norm": 2.0816230935909257, "learning_rate": 6.435593126714473e-07, "loss": 0.8184, "step": 23221 }, { "epoch": 0.8416512630930376, "grad_norm": 2.3624794272105043, "learning_rate": 6.432712947938008e-07, "loss": 0.8407, "step": 23222 }, { "epoch": 0.8416875067956943, "grad_norm": 2.218306412611705, "learning_rate": 6.429833369492405e-07, "loss": 0.8484, "step": 23223 }, { "epoch": 0.8417237504983509, "grad_norm": 2.2083144204249154, "learning_rate": 6.426954391417361e-07, "loss": 0.903, "step": 23224 }, { "epoch": 0.8417599942010076, "grad_norm": 2.1573696550478245, "learning_rate": 6.424076013752522e-07, "loss": 0.8496, "step": 23225 }, { "epoch": 0.8417962379036642, "grad_norm": 2.3224259456941376, "learning_rate": 6.421198236537574e-07, "loss": 0.9933, "step": 23226 }, { "epoch": 0.8418324816063208, "grad_norm": 2.5955908166008466, "learning_rate": 6.418321059812133e-07, "loss": 0.7635, "step": 23227 }, { "epoch": 0.8418687253089776, "grad_norm": 2.408271251290363, "learning_rate": 6.415444483615896e-07, "loss": 1.024, "step": 23228 }, { "epoch": 0.8419049690116343, "grad_norm": 2.4338619234477363, "learning_rate": 6.412568507988465e-07, "loss": 0.8664, "step": 23229 }, { "epoch": 0.8419412127142909, "grad_norm": 2.097318508906517, "learning_rate": 6.409693132969485e-07, "loss": 0.7578, "step": 23230 }, { "epoch": 0.8419774564169475, "grad_norm": 2.631984949083242, "learning_rate": 6.406818358598565e-07, "loss": 0.9196, "step": 23231 }, { "epoch": 0.8420137001196042, "grad_norm": 2.302306923945774, "learning_rate": 6.403944184915323e-07, "loss": 0.8175, "step": 23232 }, { "epoch": 0.8420499438222608, "grad_norm": 2.4792472636075704, "learning_rate": 6.401070611959376e-07, "loss": 0.9862, "step": 23233 }, { "epoch": 0.8420861875249176, "grad_norm": 2.305493065102483, "learning_rate": 6.398197639770293e-07, "loss": 0.7323, "step": 23234 }, { "epoch": 0.8421224312275742, "grad_norm": 2.05778225096087, "learning_rate": 6.395325268387681e-07, "loss": 0.6885, "step": 23235 }, { "epoch": 0.8421586749302309, "grad_norm": 2.2716796898594445, "learning_rate": 6.392453497851108e-07, "loss": 0.8434, "step": 23236 }, { "epoch": 0.8421949186328875, "grad_norm": 2.1633485111377713, "learning_rate": 6.389582328200172e-07, "loss": 0.7414, "step": 23237 }, { "epoch": 0.8422311623355442, "grad_norm": 2.400936767455237, "learning_rate": 6.386711759474401e-07, "loss": 0.8628, "step": 23238 }, { "epoch": 0.8422674060382008, "grad_norm": 2.563668394489827, "learning_rate": 6.383841791713369e-07, "loss": 0.8014, "step": 23239 }, { "epoch": 0.8423036497408575, "grad_norm": 2.097453505216586, "learning_rate": 6.380972424956622e-07, "loss": 0.7819, "step": 23240 }, { "epoch": 0.8423398934435142, "grad_norm": 2.509838440532239, "learning_rate": 6.378103659243701e-07, "loss": 0.9636, "step": 23241 }, { "epoch": 0.8423761371461709, "grad_norm": 2.3435368932972414, "learning_rate": 6.375235494614118e-07, "loss": 1.074, "step": 23242 }, { "epoch": 0.8424123808488275, "grad_norm": 2.239321343092117, "learning_rate": 6.37236793110741e-07, "loss": 0.782, "step": 23243 }, { "epoch": 0.8424486245514842, "grad_norm": 2.584048289727952, "learning_rate": 6.369500968763093e-07, "loss": 0.7739, "step": 23244 }, { "epoch": 0.8424848682541408, "grad_norm": 2.5447312484279796, "learning_rate": 6.366634607620659e-07, "loss": 1.0521, "step": 23245 }, { "epoch": 0.8425211119567975, "grad_norm": 2.2798539251730836, "learning_rate": 6.363768847719621e-07, "loss": 0.9446, "step": 23246 }, { "epoch": 0.8425573556594542, "grad_norm": 2.327330047227623, "learning_rate": 6.360903689099435e-07, "loss": 0.7852, "step": 23247 }, { "epoch": 0.8425935993621109, "grad_norm": 2.223357984749873, "learning_rate": 6.358039131799625e-07, "loss": 0.8415, "step": 23248 }, { "epoch": 0.8426298430647675, "grad_norm": 2.236587923696558, "learning_rate": 6.355175175859634e-07, "loss": 0.6746, "step": 23249 }, { "epoch": 0.8426660867674242, "grad_norm": 2.310065126867058, "learning_rate": 6.352311821318946e-07, "loss": 0.9313, "step": 23250 }, { "epoch": 0.8427023304700808, "grad_norm": 2.397799407893778, "learning_rate": 6.349449068216989e-07, "loss": 0.9112, "step": 23251 }, { "epoch": 0.8427385741727375, "grad_norm": 2.306699113667713, "learning_rate": 6.346586916593228e-07, "loss": 0.8085, "step": 23252 }, { "epoch": 0.8427748178753941, "grad_norm": 2.4723478981991076, "learning_rate": 6.343725366487108e-07, "loss": 0.8432, "step": 23253 }, { "epoch": 0.8428110615780509, "grad_norm": 2.450324472465171, "learning_rate": 6.340864417938036e-07, "loss": 0.798, "step": 23254 }, { "epoch": 0.8428473052807075, "grad_norm": 2.0205679861460153, "learning_rate": 6.338004070985448e-07, "loss": 0.7906, "step": 23255 }, { "epoch": 0.8428835489833642, "grad_norm": 2.7174005819093856, "learning_rate": 6.335144325668763e-07, "loss": 1.0975, "step": 23256 }, { "epoch": 0.8429197926860208, "grad_norm": 2.3177369046427034, "learning_rate": 6.332285182027393e-07, "loss": 0.785, "step": 23257 }, { "epoch": 0.8429560363886774, "grad_norm": 2.1276940045770383, "learning_rate": 6.329426640100711e-07, "loss": 0.8826, "step": 23258 }, { "epoch": 0.8429922800913341, "grad_norm": 2.600230761722533, "learning_rate": 6.326568699928115e-07, "loss": 0.9159, "step": 23259 }, { "epoch": 0.8430285237939908, "grad_norm": 2.338003667104849, "learning_rate": 6.323711361548995e-07, "loss": 0.9436, "step": 23260 }, { "epoch": 0.8430647674966475, "grad_norm": 2.2628114627011917, "learning_rate": 6.320854625002726e-07, "loss": 0.9983, "step": 23261 }, { "epoch": 0.8431010111993041, "grad_norm": 2.140365383983584, "learning_rate": 6.317998490328653e-07, "loss": 0.7504, "step": 23262 }, { "epoch": 0.8431372549019608, "grad_norm": 2.5492473065380543, "learning_rate": 6.31514295756614e-07, "loss": 0.9408, "step": 23263 }, { "epoch": 0.8431734986046174, "grad_norm": 2.2764433528927572, "learning_rate": 6.312288026754554e-07, "loss": 0.8803, "step": 23264 }, { "epoch": 0.8432097423072741, "grad_norm": 2.3652297469472385, "learning_rate": 6.309433697933204e-07, "loss": 0.8716, "step": 23265 }, { "epoch": 0.8432459860099307, "grad_norm": 2.4481622794884728, "learning_rate": 6.306579971141441e-07, "loss": 1.0061, "step": 23266 }, { "epoch": 0.8432822297125875, "grad_norm": 2.5939931194392134, "learning_rate": 6.303726846418567e-07, "loss": 0.7703, "step": 23267 }, { "epoch": 0.8433184734152441, "grad_norm": 2.322799412143429, "learning_rate": 6.300874323803929e-07, "loss": 0.829, "step": 23268 }, { "epoch": 0.8433547171179008, "grad_norm": 1.955189627284772, "learning_rate": 6.298022403336801e-07, "loss": 0.7383, "step": 23269 }, { "epoch": 0.8433909608205574, "grad_norm": 2.529063458592504, "learning_rate": 6.295171085056506e-07, "loss": 0.8826, "step": 23270 }, { "epoch": 0.8434272045232141, "grad_norm": 2.4274954636496564, "learning_rate": 6.292320369002314e-07, "loss": 0.958, "step": 23271 }, { "epoch": 0.8434634482258707, "grad_norm": 2.6741903784538854, "learning_rate": 6.289470255213515e-07, "loss": 0.9311, "step": 23272 }, { "epoch": 0.8434996919285275, "grad_norm": 2.1902463669151837, "learning_rate": 6.286620743729382e-07, "loss": 0.7188, "step": 23273 }, { "epoch": 0.8435359356311841, "grad_norm": 2.148318770803824, "learning_rate": 6.283771834589175e-07, "loss": 0.8107, "step": 23274 }, { "epoch": 0.8435721793338408, "grad_norm": 2.1761016432808344, "learning_rate": 6.280923527832155e-07, "loss": 0.8105, "step": 23275 }, { "epoch": 0.8436084230364974, "grad_norm": 2.181801679542834, "learning_rate": 6.278075823497564e-07, "loss": 0.8325, "step": 23276 }, { "epoch": 0.8436446667391541, "grad_norm": 2.609945434821648, "learning_rate": 6.27522872162466e-07, "loss": 0.9677, "step": 23277 }, { "epoch": 0.8436809104418107, "grad_norm": 2.380464357937294, "learning_rate": 6.272382222252643e-07, "loss": 0.8316, "step": 23278 }, { "epoch": 0.8437171541444674, "grad_norm": 2.502611293193738, "learning_rate": 6.269536325420761e-07, "loss": 0.7889, "step": 23279 }, { "epoch": 0.8437533978471241, "grad_norm": 2.4002020233381463, "learning_rate": 6.266691031168215e-07, "loss": 1.1465, "step": 23280 }, { "epoch": 0.8437896415497808, "grad_norm": 2.7120118456520443, "learning_rate": 6.263846339534236e-07, "loss": 0.8945, "step": 23281 }, { "epoch": 0.8438258852524374, "grad_norm": 2.3770645049071133, "learning_rate": 6.261002250557985e-07, "loss": 1.0177, "step": 23282 }, { "epoch": 0.843862128955094, "grad_norm": 2.3500742161631383, "learning_rate": 6.258158764278677e-07, "loss": 0.8187, "step": 23283 }, { "epoch": 0.8438983726577507, "grad_norm": 2.0742978625723993, "learning_rate": 6.25531588073549e-07, "loss": 0.8304, "step": 23284 }, { "epoch": 0.8439346163604073, "grad_norm": 2.338187875822981, "learning_rate": 6.25247359996759e-07, "loss": 0.8382, "step": 23285 }, { "epoch": 0.843970860063064, "grad_norm": 2.241609701575088, "learning_rate": 6.249631922014143e-07, "loss": 0.7843, "step": 23286 }, { "epoch": 0.8440071037657207, "grad_norm": 2.4884558217479396, "learning_rate": 6.246790846914308e-07, "loss": 0.9599, "step": 23287 }, { "epoch": 0.8440433474683774, "grad_norm": 2.3534112924844113, "learning_rate": 6.243950374707247e-07, "loss": 0.9426, "step": 23288 }, { "epoch": 0.844079591171034, "grad_norm": 2.131531547305724, "learning_rate": 6.24111050543208e-07, "loss": 0.7813, "step": 23289 }, { "epoch": 0.8441158348736907, "grad_norm": 2.670913346450912, "learning_rate": 6.238271239127941e-07, "loss": 0.805, "step": 23290 }, { "epoch": 0.8441520785763473, "grad_norm": 2.5530782460039227, "learning_rate": 6.235432575833966e-07, "loss": 0.7487, "step": 23291 }, { "epoch": 0.844188322279004, "grad_norm": 2.4492562302681073, "learning_rate": 6.232594515589257e-07, "loss": 0.9816, "step": 23292 }, { "epoch": 0.8442245659816607, "grad_norm": 2.3105832164053695, "learning_rate": 6.229757058432934e-07, "loss": 0.9222, "step": 23293 }, { "epoch": 0.8442608096843174, "grad_norm": 2.327620418985407, "learning_rate": 6.226920204404069e-07, "loss": 0.5997, "step": 23294 }, { "epoch": 0.844297053386974, "grad_norm": 2.28091927459213, "learning_rate": 6.224083953541787e-07, "loss": 0.9167, "step": 23295 }, { "epoch": 0.8443332970896307, "grad_norm": 2.107697065636297, "learning_rate": 6.221248305885147e-07, "loss": 0.753, "step": 23296 }, { "epoch": 0.8443695407922873, "grad_norm": 2.3499657350835133, "learning_rate": 6.218413261473233e-07, "loss": 0.8825, "step": 23297 }, { "epoch": 0.844405784494944, "grad_norm": 2.4018181525381244, "learning_rate": 6.215578820345102e-07, "loss": 0.8517, "step": 23298 }, { "epoch": 0.8444420281976006, "grad_norm": 2.6505259472984752, "learning_rate": 6.212744982539815e-07, "loss": 0.7655, "step": 23299 }, { "epoch": 0.8444782719002574, "grad_norm": 2.7065991963820832, "learning_rate": 6.209911748096425e-07, "loss": 0.962, "step": 23300 }, { "epoch": 0.844514515602914, "grad_norm": 2.371289327506141, "learning_rate": 6.207079117053961e-07, "loss": 0.7841, "step": 23301 }, { "epoch": 0.8445507593055707, "grad_norm": 2.283090833328356, "learning_rate": 6.204247089451459e-07, "loss": 0.944, "step": 23302 }, { "epoch": 0.8445870030082273, "grad_norm": 2.31515575754125, "learning_rate": 6.201415665327947e-07, "loss": 0.8359, "step": 23303 }, { "epoch": 0.844623246710884, "grad_norm": 2.5043801777306745, "learning_rate": 6.198584844722455e-07, "loss": 1.0079, "step": 23304 }, { "epoch": 0.8446594904135406, "grad_norm": 2.376508157284716, "learning_rate": 6.195754627673955e-07, "loss": 0.7917, "step": 23305 }, { "epoch": 0.8446957341161974, "grad_norm": 2.1624051047395145, "learning_rate": 6.192925014221468e-07, "loss": 0.9164, "step": 23306 }, { "epoch": 0.844731977818854, "grad_norm": 2.5083320754582172, "learning_rate": 6.190096004403979e-07, "loss": 1.0277, "step": 23307 }, { "epoch": 0.8447682215215107, "grad_norm": 2.549446527383352, "learning_rate": 6.187267598260482e-07, "loss": 1.0023, "step": 23308 }, { "epoch": 0.8448044652241673, "grad_norm": 2.299608699956903, "learning_rate": 6.184439795829932e-07, "loss": 0.7911, "step": 23309 }, { "epoch": 0.844840708926824, "grad_norm": 2.266115486251932, "learning_rate": 6.181612597151304e-07, "loss": 0.7723, "step": 23310 }, { "epoch": 0.8448769526294806, "grad_norm": 2.2073067109985467, "learning_rate": 6.178786002263559e-07, "loss": 0.7846, "step": 23311 }, { "epoch": 0.8449131963321372, "grad_norm": 2.116304357877795, "learning_rate": 6.175960011205634e-07, "loss": 0.9028, "step": 23312 }, { "epoch": 0.844949440034794, "grad_norm": 2.6580184326850462, "learning_rate": 6.173134624016485e-07, "loss": 0.9284, "step": 23313 }, { "epoch": 0.8449856837374506, "grad_norm": 2.476384582466129, "learning_rate": 6.170309840735017e-07, "loss": 0.8173, "step": 23314 }, { "epoch": 0.8450219274401073, "grad_norm": 2.39335277697833, "learning_rate": 6.167485661400192e-07, "loss": 0.895, "step": 23315 }, { "epoch": 0.8450581711427639, "grad_norm": 2.418894887883127, "learning_rate": 6.164662086050893e-07, "loss": 0.7754, "step": 23316 }, { "epoch": 0.8450944148454206, "grad_norm": 2.2011902840616053, "learning_rate": 6.161839114726053e-07, "loss": 0.7954, "step": 23317 }, { "epoch": 0.8451306585480772, "grad_norm": 2.328375413630421, "learning_rate": 6.159016747464542e-07, "loss": 0.9169, "step": 23318 }, { "epoch": 0.845166902250734, "grad_norm": 2.2301361011864267, "learning_rate": 6.156194984305269e-07, "loss": 0.8782, "step": 23319 }, { "epoch": 0.8452031459533906, "grad_norm": 2.5256696457267, "learning_rate": 6.153373825287129e-07, "loss": 1.0395, "step": 23320 }, { "epoch": 0.8452393896560473, "grad_norm": 2.225496966751191, "learning_rate": 6.150553270448961e-07, "loss": 0.7911, "step": 23321 }, { "epoch": 0.8452756333587039, "grad_norm": 2.5993875162808515, "learning_rate": 6.147733319829652e-07, "loss": 0.8575, "step": 23322 }, { "epoch": 0.8453118770613606, "grad_norm": 2.297548874766748, "learning_rate": 6.144913973468058e-07, "loss": 0.9081, "step": 23323 }, { "epoch": 0.8453481207640172, "grad_norm": 2.1817962411727443, "learning_rate": 6.142095231403039e-07, "loss": 0.8119, "step": 23324 }, { "epoch": 0.8453843644666739, "grad_norm": 2.584859297826453, "learning_rate": 6.139277093673412e-07, "loss": 1.0356, "step": 23325 }, { "epoch": 0.8454206081693306, "grad_norm": 2.2612956287743797, "learning_rate": 6.136459560318019e-07, "loss": 0.782, "step": 23326 }, { "epoch": 0.8454568518719873, "grad_norm": 2.7646911134521037, "learning_rate": 6.133642631375686e-07, "loss": 0.9527, "step": 23327 }, { "epoch": 0.8454930955746439, "grad_norm": 2.3980230179067785, "learning_rate": 6.130826306885241e-07, "loss": 0.7875, "step": 23328 }, { "epoch": 0.8455293392773006, "grad_norm": 2.2845056373303474, "learning_rate": 6.128010586885463e-07, "loss": 0.8328, "step": 23329 }, { "epoch": 0.8455655829799572, "grad_norm": 2.344489671431984, "learning_rate": 6.125195471415169e-07, "loss": 0.785, "step": 23330 }, { "epoch": 0.8456018266826139, "grad_norm": 2.3847441021785043, "learning_rate": 6.122380960513158e-07, "loss": 0.8838, "step": 23331 }, { "epoch": 0.8456380703852706, "grad_norm": 3.086076371741944, "learning_rate": 6.119567054218189e-07, "loss": 1.025, "step": 23332 }, { "epoch": 0.8456743140879273, "grad_norm": 2.5771784371013053, "learning_rate": 6.11675375256906e-07, "loss": 0.999, "step": 23333 }, { "epoch": 0.8457105577905839, "grad_norm": 2.313259430884224, "learning_rate": 6.113941055604506e-07, "loss": 0.8584, "step": 23334 }, { "epoch": 0.8457468014932406, "grad_norm": 2.2090946269244442, "learning_rate": 6.111128963363316e-07, "loss": 1.0476, "step": 23335 }, { "epoch": 0.8457830451958972, "grad_norm": 2.47649931807891, "learning_rate": 6.108317475884218e-07, "loss": 0.7762, "step": 23336 }, { "epoch": 0.8458192888985538, "grad_norm": 2.4355208374792627, "learning_rate": 6.105506593205973e-07, "loss": 1.1269, "step": 23337 }, { "epoch": 0.8458555326012105, "grad_norm": 2.7109321543794507, "learning_rate": 6.102696315367291e-07, "loss": 0.9585, "step": 23338 }, { "epoch": 0.8458917763038672, "grad_norm": 2.3741542973298753, "learning_rate": 6.099886642406905e-07, "loss": 0.9939, "step": 23339 }, { "epoch": 0.8459280200065239, "grad_norm": 2.050018743338115, "learning_rate": 6.097077574363541e-07, "loss": 0.8028, "step": 23340 }, { "epoch": 0.8459642637091805, "grad_norm": 2.345529300400224, "learning_rate": 6.094269111275885e-07, "loss": 0.889, "step": 23341 }, { "epoch": 0.8460005074118372, "grad_norm": 2.5001777180775338, "learning_rate": 6.091461253182645e-07, "loss": 0.9875, "step": 23342 }, { "epoch": 0.8460367511144938, "grad_norm": 2.5893051383188372, "learning_rate": 6.088654000122518e-07, "loss": 0.9177, "step": 23343 }, { "epoch": 0.8460729948171505, "grad_norm": 2.502365364091458, "learning_rate": 6.085847352134195e-07, "loss": 0.9741, "step": 23344 }, { "epoch": 0.8461092385198072, "grad_norm": 2.870827185165794, "learning_rate": 6.083041309256321e-07, "loss": 0.8783, "step": 23345 }, { "epoch": 0.8461454822224639, "grad_norm": 2.3147283033625192, "learning_rate": 6.080235871527579e-07, "loss": 1.0507, "step": 23346 }, { "epoch": 0.8461817259251205, "grad_norm": 2.356440512618352, "learning_rate": 6.077431038986626e-07, "loss": 0.8645, "step": 23347 }, { "epoch": 0.8462179696277772, "grad_norm": 2.5539615739388, "learning_rate": 6.074626811672119e-07, "loss": 0.9981, "step": 23348 }, { "epoch": 0.8462542133304338, "grad_norm": 2.3104866031461384, "learning_rate": 6.071823189622683e-07, "loss": 0.9066, "step": 23349 }, { "epoch": 0.8462904570330905, "grad_norm": 2.6059023612584267, "learning_rate": 6.069020172876949e-07, "loss": 0.9067, "step": 23350 }, { "epoch": 0.8463267007357471, "grad_norm": 2.4707536336811304, "learning_rate": 6.066217761473564e-07, "loss": 0.8821, "step": 23351 }, { "epoch": 0.8463629444384039, "grad_norm": 2.4878652506601635, "learning_rate": 6.063415955451119e-07, "loss": 0.9608, "step": 23352 }, { "epoch": 0.8463991881410605, "grad_norm": 2.5890108247007344, "learning_rate": 6.060614754848238e-07, "loss": 0.8844, "step": 23353 }, { "epoch": 0.8464354318437172, "grad_norm": 2.5867906646332393, "learning_rate": 6.05781415970349e-07, "loss": 0.9278, "step": 23354 }, { "epoch": 0.8464716755463738, "grad_norm": 2.1650005675925477, "learning_rate": 6.055014170055512e-07, "loss": 0.7307, "step": 23355 }, { "epoch": 0.8465079192490305, "grad_norm": 2.4436148748824014, "learning_rate": 6.052214785942845e-07, "loss": 1.0561, "step": 23356 }, { "epoch": 0.8465441629516871, "grad_norm": 2.3915864993342746, "learning_rate": 6.049416007404097e-07, "loss": 0.8959, "step": 23357 }, { "epoch": 0.8465804066543438, "grad_norm": 2.358304984575947, "learning_rate": 6.046617834477802e-07, "loss": 0.8959, "step": 23358 }, { "epoch": 0.8466166503570005, "grad_norm": 2.417196131049795, "learning_rate": 6.043820267202527e-07, "loss": 1.022, "step": 23359 }, { "epoch": 0.8466528940596572, "grad_norm": 2.4247383381638383, "learning_rate": 6.041023305616844e-07, "loss": 1.0408, "step": 23360 }, { "epoch": 0.8466891377623138, "grad_norm": 2.8862053354446804, "learning_rate": 6.038226949759246e-07, "loss": 0.9275, "step": 23361 }, { "epoch": 0.8467253814649705, "grad_norm": 2.368625200909937, "learning_rate": 6.035431199668318e-07, "loss": 0.6871, "step": 23362 }, { "epoch": 0.8467616251676271, "grad_norm": 2.3892275890737165, "learning_rate": 6.032636055382546e-07, "loss": 0.848, "step": 23363 }, { "epoch": 0.8467978688702837, "grad_norm": 2.4251434888222105, "learning_rate": 6.029841516940471e-07, "loss": 0.969, "step": 23364 }, { "epoch": 0.8468341125729405, "grad_norm": 2.16822377974009, "learning_rate": 6.027047584380568e-07, "loss": 0.9524, "step": 23365 }, { "epoch": 0.8468703562755971, "grad_norm": 2.6166889559971795, "learning_rate": 6.024254257741375e-07, "loss": 0.8351, "step": 23366 }, { "epoch": 0.8469065999782538, "grad_norm": 2.3965185554029262, "learning_rate": 6.021461537061352e-07, "loss": 0.8383, "step": 23367 }, { "epoch": 0.8469428436809104, "grad_norm": 2.1966726909821737, "learning_rate": 6.018669422379003e-07, "loss": 0.8141, "step": 23368 }, { "epoch": 0.8469790873835671, "grad_norm": 2.5006999719540266, "learning_rate": 6.015877913732782e-07, "loss": 0.8889, "step": 23369 }, { "epoch": 0.8470153310862237, "grad_norm": 2.4699297757293177, "learning_rate": 6.013087011161156e-07, "loss": 0.8359, "step": 23370 }, { "epoch": 0.8470515747888804, "grad_norm": 2.5363911273581725, "learning_rate": 6.010296714702601e-07, "loss": 1.032, "step": 23371 }, { "epoch": 0.8470878184915371, "grad_norm": 2.416578810096805, "learning_rate": 6.007507024395548e-07, "loss": 0.9381, "step": 23372 }, { "epoch": 0.8471240621941938, "grad_norm": 2.1392713289830305, "learning_rate": 6.004717940278437e-07, "loss": 0.696, "step": 23373 }, { "epoch": 0.8471603058968504, "grad_norm": 2.3278230979167738, "learning_rate": 6.001929462389711e-07, "loss": 0.7157, "step": 23374 }, { "epoch": 0.8471965495995071, "grad_norm": 2.424838970857889, "learning_rate": 5.999141590767793e-07, "loss": 1.0559, "step": 23375 }, { "epoch": 0.8472327933021637, "grad_norm": 2.169301578624403, "learning_rate": 5.996354325451081e-07, "loss": 0.9063, "step": 23376 }, { "epoch": 0.8472690370048204, "grad_norm": 2.1478313009365886, "learning_rate": 5.993567666478001e-07, "loss": 0.5456, "step": 23377 }, { "epoch": 0.8473052807074771, "grad_norm": 2.3730227589984794, "learning_rate": 5.990781613886948e-07, "loss": 0.8692, "step": 23378 }, { "epoch": 0.8473415244101338, "grad_norm": 2.8969836262351834, "learning_rate": 5.987996167716304e-07, "loss": 0.9446, "step": 23379 }, { "epoch": 0.8473777681127904, "grad_norm": 2.660597455385752, "learning_rate": 5.985211328004459e-07, "loss": 0.8112, "step": 23380 }, { "epoch": 0.8474140118154471, "grad_norm": 2.350869887002829, "learning_rate": 5.982427094789767e-07, "loss": 1.0848, "step": 23381 }, { "epoch": 0.8474502555181037, "grad_norm": 2.440874471734456, "learning_rate": 5.979643468110624e-07, "loss": 0.8557, "step": 23382 }, { "epoch": 0.8474864992207604, "grad_norm": 2.1899005289271267, "learning_rate": 5.976860448005368e-07, "loss": 0.8001, "step": 23383 }, { "epoch": 0.847522742923417, "grad_norm": 2.2628522089009224, "learning_rate": 5.974078034512354e-07, "loss": 0.7496, "step": 23384 }, { "epoch": 0.8475589866260738, "grad_norm": 2.330397246541413, "learning_rate": 5.971296227669915e-07, "loss": 0.9067, "step": 23385 }, { "epoch": 0.8475952303287304, "grad_norm": 2.3602360144019694, "learning_rate": 5.968515027516381e-07, "loss": 0.7664, "step": 23386 }, { "epoch": 0.847631474031387, "grad_norm": 2.3245400094634086, "learning_rate": 5.965734434090093e-07, "loss": 0.8741, "step": 23387 }, { "epoch": 0.8476677177340437, "grad_norm": 2.1650294342858007, "learning_rate": 5.962954447429342e-07, "loss": 0.7802, "step": 23388 }, { "epoch": 0.8477039614367003, "grad_norm": 2.3325366515447135, "learning_rate": 5.960175067572449e-07, "loss": 0.8135, "step": 23389 }, { "epoch": 0.847740205139357, "grad_norm": 2.523526521887508, "learning_rate": 5.957396294557705e-07, "loss": 1.1143, "step": 23390 }, { "epoch": 0.8477764488420138, "grad_norm": 2.263281650246838, "learning_rate": 5.954618128423417e-07, "loss": 0.7797, "step": 23391 }, { "epoch": 0.8478126925446704, "grad_norm": 1.9502903242294474, "learning_rate": 5.95184056920784e-07, "loss": 0.8842, "step": 23392 }, { "epoch": 0.847848936247327, "grad_norm": 1.913445727760006, "learning_rate": 5.949063616949264e-07, "loss": 0.7128, "step": 23393 }, { "epoch": 0.8478851799499837, "grad_norm": 2.544819012147318, "learning_rate": 5.946287271685953e-07, "loss": 0.9697, "step": 23394 }, { "epoch": 0.8479214236526403, "grad_norm": 2.6492469730106487, "learning_rate": 5.943511533456165e-07, "loss": 0.7673, "step": 23395 }, { "epoch": 0.847957667355297, "grad_norm": 2.5417545192985584, "learning_rate": 5.940736402298136e-07, "loss": 0.8503, "step": 23396 }, { "epoch": 0.8479939110579536, "grad_norm": 2.1552627187734053, "learning_rate": 5.937961878250109e-07, "loss": 0.8179, "step": 23397 }, { "epoch": 0.8480301547606104, "grad_norm": 2.2823156095253996, "learning_rate": 5.935187961350336e-07, "loss": 0.9171, "step": 23398 }, { "epoch": 0.848066398463267, "grad_norm": 2.242658567093229, "learning_rate": 5.932414651637008e-07, "loss": 1.0262, "step": 23399 }, { "epoch": 0.8481026421659237, "grad_norm": 2.2883733459360993, "learning_rate": 5.929641949148368e-07, "loss": 0.926, "step": 23400 }, { "epoch": 0.8481388858685803, "grad_norm": 2.1781838420973174, "learning_rate": 5.92686985392259e-07, "loss": 0.8736, "step": 23401 }, { "epoch": 0.848175129571237, "grad_norm": 2.548510561450318, "learning_rate": 5.924098365997911e-07, "loss": 0.9284, "step": 23402 }, { "epoch": 0.8482113732738936, "grad_norm": 2.3641659018491485, "learning_rate": 5.921327485412487e-07, "loss": 0.9006, "step": 23403 }, { "epoch": 0.8482476169765504, "grad_norm": 2.456424359114425, "learning_rate": 5.918557212204523e-07, "loss": 0.9924, "step": 23404 }, { "epoch": 0.848283860679207, "grad_norm": 2.5869800520462967, "learning_rate": 5.915787546412172e-07, "loss": 0.8879, "step": 23405 }, { "epoch": 0.8483201043818637, "grad_norm": 2.5433032626517584, "learning_rate": 5.913018488073608e-07, "loss": 0.8237, "step": 23406 }, { "epoch": 0.8483563480845203, "grad_norm": 2.5874716876264063, "learning_rate": 5.910250037226994e-07, "loss": 1.031, "step": 23407 }, { "epoch": 0.848392591787177, "grad_norm": 2.386423256567421, "learning_rate": 5.907482193910458e-07, "loss": 0.7163, "step": 23408 }, { "epoch": 0.8484288354898336, "grad_norm": 2.4564001665580943, "learning_rate": 5.904714958162155e-07, "loss": 0.7896, "step": 23409 }, { "epoch": 0.8484650791924903, "grad_norm": 2.250668292288026, "learning_rate": 5.901948330020213e-07, "loss": 0.8087, "step": 23410 }, { "epoch": 0.848501322895147, "grad_norm": 2.0970558250347917, "learning_rate": 5.899182309522761e-07, "loss": 0.833, "step": 23411 }, { "epoch": 0.8485375665978037, "grad_norm": 2.4361596127661174, "learning_rate": 5.896416896707896e-07, "loss": 0.9243, "step": 23412 }, { "epoch": 0.8485738103004603, "grad_norm": 2.5018434757255648, "learning_rate": 5.893652091613733e-07, "loss": 0.8475, "step": 23413 }, { "epoch": 0.848610054003117, "grad_norm": 2.1399373388557112, "learning_rate": 5.890887894278369e-07, "loss": 0.7167, "step": 23414 }, { "epoch": 0.8486462977057736, "grad_norm": 2.516148104842301, "learning_rate": 5.888124304739906e-07, "loss": 0.8829, "step": 23415 }, { "epoch": 0.8486825414084302, "grad_norm": 2.44276754236775, "learning_rate": 5.885361323036399e-07, "loss": 0.9512, "step": 23416 }, { "epoch": 0.848718785111087, "grad_norm": 2.422262573852087, "learning_rate": 5.882598949205937e-07, "loss": 0.762, "step": 23417 }, { "epoch": 0.8487550288137437, "grad_norm": 2.4675670327035535, "learning_rate": 5.879837183286586e-07, "loss": 1.0367, "step": 23418 }, { "epoch": 0.8487912725164003, "grad_norm": 2.29610898162277, "learning_rate": 5.877076025316386e-07, "loss": 0.9094, "step": 23419 }, { "epoch": 0.8488275162190569, "grad_norm": 2.4979405251042883, "learning_rate": 5.874315475333403e-07, "loss": 0.9533, "step": 23420 }, { "epoch": 0.8488637599217136, "grad_norm": 2.2432349845524517, "learning_rate": 5.871555533375651e-07, "loss": 0.8287, "step": 23421 }, { "epoch": 0.8489000036243702, "grad_norm": 2.3976291245144417, "learning_rate": 5.868796199481191e-07, "loss": 0.912, "step": 23422 }, { "epoch": 0.8489362473270269, "grad_norm": 2.4806998421868043, "learning_rate": 5.866037473688024e-07, "loss": 0.9078, "step": 23423 }, { "epoch": 0.8489724910296836, "grad_norm": 2.5603148573506984, "learning_rate": 5.863279356034174e-07, "loss": 0.8569, "step": 23424 }, { "epoch": 0.8490087347323403, "grad_norm": 2.4278677634349966, "learning_rate": 5.860521846557638e-07, "loss": 0.7934, "step": 23425 }, { "epoch": 0.8490449784349969, "grad_norm": 2.054126335921649, "learning_rate": 5.857764945296413e-07, "loss": 0.7008, "step": 23426 }, { "epoch": 0.8490812221376536, "grad_norm": 2.3141028854207977, "learning_rate": 5.855008652288497e-07, "loss": 0.8527, "step": 23427 }, { "epoch": 0.8491174658403102, "grad_norm": 2.3242758214779102, "learning_rate": 5.852252967571858e-07, "loss": 0.9717, "step": 23428 }, { "epoch": 0.8491537095429669, "grad_norm": 2.1025356417068966, "learning_rate": 5.849497891184475e-07, "loss": 0.8795, "step": 23429 }, { "epoch": 0.8491899532456235, "grad_norm": 2.5139819769916425, "learning_rate": 5.846743423164303e-07, "loss": 0.6509, "step": 23430 }, { "epoch": 0.8492261969482803, "grad_norm": 2.1725324176176293, "learning_rate": 5.843989563549318e-07, "loss": 0.8305, "step": 23431 }, { "epoch": 0.8492624406509369, "grad_norm": 2.25766061655793, "learning_rate": 5.84123631237744e-07, "loss": 0.8596, "step": 23432 }, { "epoch": 0.8492986843535936, "grad_norm": 2.4037396109395193, "learning_rate": 5.838483669686624e-07, "loss": 0.8602, "step": 23433 }, { "epoch": 0.8493349280562502, "grad_norm": 2.383986259499084, "learning_rate": 5.835731635514791e-07, "loss": 0.9411, "step": 23434 }, { "epoch": 0.8493711717589069, "grad_norm": 2.3796334800066825, "learning_rate": 5.83298020989988e-07, "loss": 0.8677, "step": 23435 }, { "epoch": 0.8494074154615635, "grad_norm": 1.9776886549854282, "learning_rate": 5.830229392879777e-07, "loss": 0.7086, "step": 23436 }, { "epoch": 0.8494436591642203, "grad_norm": 2.4897947950198756, "learning_rate": 5.827479184492401e-07, "loss": 1.1286, "step": 23437 }, { "epoch": 0.8494799028668769, "grad_norm": 1.9656829313933273, "learning_rate": 5.824729584775657e-07, "loss": 0.6364, "step": 23438 }, { "epoch": 0.8495161465695336, "grad_norm": 2.1884851740841302, "learning_rate": 5.821980593767413e-07, "loss": 0.9093, "step": 23439 }, { "epoch": 0.8495523902721902, "grad_norm": 2.4453938525993952, "learning_rate": 5.819232211505566e-07, "loss": 0.8687, "step": 23440 }, { "epoch": 0.8495886339748469, "grad_norm": 2.0421752131780706, "learning_rate": 5.816484438027964e-07, "loss": 0.742, "step": 23441 }, { "epoch": 0.8496248776775035, "grad_norm": 2.457979895806132, "learning_rate": 5.813737273372499e-07, "loss": 0.8556, "step": 23442 }, { "epoch": 0.8496611213801601, "grad_norm": 2.4127575657017344, "learning_rate": 5.810990717577009e-07, "loss": 0.9318, "step": 23443 }, { "epoch": 0.8496973650828169, "grad_norm": 2.5736938537204455, "learning_rate": 5.808244770679339e-07, "loss": 0.8225, "step": 23444 }, { "epoch": 0.8497336087854735, "grad_norm": 2.406627588114035, "learning_rate": 5.805499432717338e-07, "loss": 0.8591, "step": 23445 }, { "epoch": 0.8497698524881302, "grad_norm": 2.3485609171085677, "learning_rate": 5.802754703728819e-07, "loss": 0.8574, "step": 23446 }, { "epoch": 0.8498060961907868, "grad_norm": 2.4858646907350246, "learning_rate": 5.80001058375162e-07, "loss": 0.6944, "step": 23447 }, { "epoch": 0.8498423398934435, "grad_norm": 2.4229958615223226, "learning_rate": 5.79726707282352e-07, "loss": 0.7215, "step": 23448 }, { "epoch": 0.8498785835961001, "grad_norm": 2.4439683592471027, "learning_rate": 5.794524170982374e-07, "loss": 0.9828, "step": 23449 }, { "epoch": 0.8499148272987569, "grad_norm": 2.476991710913651, "learning_rate": 5.791781878265934e-07, "loss": 0.9774, "step": 23450 }, { "epoch": 0.8499510710014135, "grad_norm": 2.4986000123776284, "learning_rate": 5.789040194712014e-07, "loss": 0.986, "step": 23451 }, { "epoch": 0.8499873147040702, "grad_norm": 2.126892845466995, "learning_rate": 5.786299120358368e-07, "loss": 0.7442, "step": 23452 }, { "epoch": 0.8500235584067268, "grad_norm": 2.450236853718844, "learning_rate": 5.783558655242793e-07, "loss": 0.8604, "step": 23453 }, { "epoch": 0.8500598021093835, "grad_norm": 2.221423390127517, "learning_rate": 5.780818799403032e-07, "loss": 0.8148, "step": 23454 }, { "epoch": 0.8500960458120401, "grad_norm": 2.274995215190853, "learning_rate": 5.778079552876859e-07, "loss": 0.8501, "step": 23455 }, { "epoch": 0.8501322895146968, "grad_norm": 2.515830469776948, "learning_rate": 5.775340915701988e-07, "loss": 0.841, "step": 23456 }, { "epoch": 0.8501685332173535, "grad_norm": 2.3261549375775887, "learning_rate": 5.772602887916178e-07, "loss": 0.8043, "step": 23457 }, { "epoch": 0.8502047769200102, "grad_norm": 2.5159085053009, "learning_rate": 5.769865469557162e-07, "loss": 1.0749, "step": 23458 }, { "epoch": 0.8502410206226668, "grad_norm": 2.3720090493477106, "learning_rate": 5.767128660662641e-07, "loss": 0.8737, "step": 23459 }, { "epoch": 0.8502772643253235, "grad_norm": 2.301707178808243, "learning_rate": 5.76439246127033e-07, "loss": 0.7405, "step": 23460 }, { "epoch": 0.8503135080279801, "grad_norm": 2.4363670660625902, "learning_rate": 5.761656871417942e-07, "loss": 0.8235, "step": 23461 }, { "epoch": 0.8503497517306368, "grad_norm": 2.430693454629848, "learning_rate": 5.758921891143177e-07, "loss": 0.7921, "step": 23462 }, { "epoch": 0.8503859954332935, "grad_norm": 2.4060751806014844, "learning_rate": 5.756187520483702e-07, "loss": 0.819, "step": 23463 }, { "epoch": 0.8504222391359502, "grad_norm": 2.5428135574188757, "learning_rate": 5.753453759477207e-07, "loss": 0.8169, "step": 23464 }, { "epoch": 0.8504584828386068, "grad_norm": 2.355486184492497, "learning_rate": 5.75072060816137e-07, "loss": 1.0258, "step": 23465 }, { "epoch": 0.8504947265412635, "grad_norm": 2.1484080594960675, "learning_rate": 5.747988066573834e-07, "loss": 0.8402, "step": 23466 }, { "epoch": 0.8505309702439201, "grad_norm": 2.316662760695707, "learning_rate": 5.745256134752264e-07, "loss": 0.8883, "step": 23467 }, { "epoch": 0.8505672139465768, "grad_norm": 2.375913332429866, "learning_rate": 5.742524812734284e-07, "loss": 0.8129, "step": 23468 }, { "epoch": 0.8506034576492334, "grad_norm": 2.3860190531950165, "learning_rate": 5.73979410055756e-07, "loss": 0.9195, "step": 23469 }, { "epoch": 0.8506397013518902, "grad_norm": 2.4157594198983627, "learning_rate": 5.737063998259701e-07, "loss": 0.9924, "step": 23470 }, { "epoch": 0.8506759450545468, "grad_norm": 2.5641436239334805, "learning_rate": 5.734334505878342e-07, "loss": 0.9546, "step": 23471 }, { "epoch": 0.8507121887572034, "grad_norm": 2.2887795015730896, "learning_rate": 5.73160562345107e-07, "loss": 0.8222, "step": 23472 }, { "epoch": 0.8507484324598601, "grad_norm": 2.419300763968264, "learning_rate": 5.7288773510155e-07, "loss": 0.8482, "step": 23473 }, { "epoch": 0.8507846761625167, "grad_norm": 2.1675925552062014, "learning_rate": 5.726149688609228e-07, "loss": 0.7203, "step": 23474 }, { "epoch": 0.8508209198651734, "grad_norm": 2.134898470928968, "learning_rate": 5.723422636269843e-07, "loss": 0.7304, "step": 23475 }, { "epoch": 0.8508571635678301, "grad_norm": 2.536095072148905, "learning_rate": 5.720696194034908e-07, "loss": 0.9278, "step": 23476 }, { "epoch": 0.8508934072704868, "grad_norm": 2.2332912774801637, "learning_rate": 5.717970361942004e-07, "loss": 0.9835, "step": 23477 }, { "epoch": 0.8509296509731434, "grad_norm": 2.365000975209688, "learning_rate": 5.715245140028692e-07, "loss": 0.7751, "step": 23478 }, { "epoch": 0.8509658946758001, "grad_norm": 2.3209598510726455, "learning_rate": 5.71252052833251e-07, "loss": 0.8999, "step": 23479 }, { "epoch": 0.8510021383784567, "grad_norm": 2.3657180339927324, "learning_rate": 5.709796526891015e-07, "loss": 0.8256, "step": 23480 }, { "epoch": 0.8510383820811134, "grad_norm": 2.483596327102626, "learning_rate": 5.707073135741736e-07, "loss": 0.8359, "step": 23481 }, { "epoch": 0.85107462578377, "grad_norm": 2.2633951736444855, "learning_rate": 5.704350354922211e-07, "loss": 0.8295, "step": 23482 }, { "epoch": 0.8511108694864268, "grad_norm": 2.1186012404963406, "learning_rate": 5.701628184469938e-07, "loss": 0.868, "step": 23483 }, { "epoch": 0.8511471131890834, "grad_norm": 2.264009265034237, "learning_rate": 5.69890662442244e-07, "loss": 0.8457, "step": 23484 }, { "epoch": 0.8511833568917401, "grad_norm": 2.45740474553005, "learning_rate": 5.696185674817228e-07, "loss": 0.8196, "step": 23485 }, { "epoch": 0.8512196005943967, "grad_norm": 2.6228296260923347, "learning_rate": 5.693465335691767e-07, "loss": 0.9084, "step": 23486 }, { "epoch": 0.8512558442970534, "grad_norm": 2.5286084004930727, "learning_rate": 5.690745607083576e-07, "loss": 1.0692, "step": 23487 }, { "epoch": 0.85129208799971, "grad_norm": 2.5771565858033885, "learning_rate": 5.688026489030085e-07, "loss": 0.8436, "step": 23488 }, { "epoch": 0.8513283317023667, "grad_norm": 2.4422014397716603, "learning_rate": 5.685307981568816e-07, "loss": 0.9335, "step": 23489 }, { "epoch": 0.8513645754050234, "grad_norm": 2.3692573956579452, "learning_rate": 5.682590084737188e-07, "loss": 1.0299, "step": 23490 }, { "epoch": 0.8514008191076801, "grad_norm": 2.4282578478614814, "learning_rate": 5.679872798572683e-07, "loss": 0.962, "step": 23491 }, { "epoch": 0.8514370628103367, "grad_norm": 2.671417726470875, "learning_rate": 5.677156123112709e-07, "loss": 0.9812, "step": 23492 }, { "epoch": 0.8514733065129934, "grad_norm": 2.2592375227567616, "learning_rate": 5.67444005839472e-07, "loss": 0.913, "step": 23493 }, { "epoch": 0.85150955021565, "grad_norm": 2.450381095652251, "learning_rate": 5.671724604456152e-07, "loss": 0.838, "step": 23494 }, { "epoch": 0.8515457939183066, "grad_norm": 2.0892357384972637, "learning_rate": 5.669009761334398e-07, "loss": 0.8649, "step": 23495 }, { "epoch": 0.8515820376209634, "grad_norm": 2.427587064787884, "learning_rate": 5.666295529066884e-07, "loss": 0.9163, "step": 23496 }, { "epoch": 0.85161828132362, "grad_norm": 2.2828191372846827, "learning_rate": 5.663581907691001e-07, "loss": 0.898, "step": 23497 }, { "epoch": 0.8516545250262767, "grad_norm": 2.2872478090966295, "learning_rate": 5.660868897244153e-07, "loss": 0.9042, "step": 23498 }, { "epoch": 0.8516907687289333, "grad_norm": 2.408255282357744, "learning_rate": 5.658156497763712e-07, "loss": 0.9129, "step": 23499 }, { "epoch": 0.85172701243159, "grad_norm": 2.291744307504679, "learning_rate": 5.655444709287055e-07, "loss": 0.9279, "step": 23500 }, { "epoch": 0.8517632561342466, "grad_norm": 2.327528942393573, "learning_rate": 5.652733531851551e-07, "loss": 0.6727, "step": 23501 }, { "epoch": 0.8517994998369033, "grad_norm": 2.193758477400455, "learning_rate": 5.650022965494567e-07, "loss": 0.8963, "step": 23502 }, { "epoch": 0.85183574353956, "grad_norm": 2.452454268201673, "learning_rate": 5.647313010253436e-07, "loss": 0.7625, "step": 23503 }, { "epoch": 0.8518719872422167, "grad_norm": 2.3040973646909966, "learning_rate": 5.64460366616551e-07, "loss": 0.827, "step": 23504 }, { "epoch": 0.8519082309448733, "grad_norm": 2.362521766067754, "learning_rate": 5.641894933268128e-07, "loss": 0.9188, "step": 23505 }, { "epoch": 0.85194447464753, "grad_norm": 2.194566577097491, "learning_rate": 5.639186811598596e-07, "loss": 0.7102, "step": 23506 }, { "epoch": 0.8519807183501866, "grad_norm": 2.3099119787561118, "learning_rate": 5.636479301194253e-07, "loss": 0.9044, "step": 23507 }, { "epoch": 0.8520169620528433, "grad_norm": 2.48941350466348, "learning_rate": 5.633772402092375e-07, "loss": 1.0092, "step": 23508 }, { "epoch": 0.8520532057555, "grad_norm": 2.723703379637112, "learning_rate": 5.631066114330302e-07, "loss": 1.0303, "step": 23509 }, { "epoch": 0.8520894494581567, "grad_norm": 2.3559716570490625, "learning_rate": 5.628360437945291e-07, "loss": 0.8637, "step": 23510 }, { "epoch": 0.8521256931608133, "grad_norm": 2.457096169044109, "learning_rate": 5.625655372974648e-07, "loss": 0.9859, "step": 23511 }, { "epoch": 0.85216193686347, "grad_norm": 2.6265035068545335, "learning_rate": 5.622950919455633e-07, "loss": 1.0579, "step": 23512 }, { "epoch": 0.8521981805661266, "grad_norm": 2.5857772897538966, "learning_rate": 5.620247077425511e-07, "loss": 1.0012, "step": 23513 }, { "epoch": 0.8522344242687833, "grad_norm": 2.377349580323975, "learning_rate": 5.617543846921558e-07, "loss": 0.8684, "step": 23514 }, { "epoch": 0.8522706679714399, "grad_norm": 2.5734981720558463, "learning_rate": 5.614841227980994e-07, "loss": 0.8979, "step": 23515 }, { "epoch": 0.8523069116740967, "grad_norm": 2.1682770592749736, "learning_rate": 5.612139220641077e-07, "loss": 0.8376, "step": 23516 }, { "epoch": 0.8523431553767533, "grad_norm": 2.3026366192057206, "learning_rate": 5.609437824939041e-07, "loss": 0.9193, "step": 23517 }, { "epoch": 0.85237939907941, "grad_norm": 2.4074584650122146, "learning_rate": 5.606737040912108e-07, "loss": 0.912, "step": 23518 }, { "epoch": 0.8524156427820666, "grad_norm": 2.2262347279717782, "learning_rate": 5.604036868597485e-07, "loss": 1.0531, "step": 23519 }, { "epoch": 0.8524518864847233, "grad_norm": 2.5149271085437417, "learning_rate": 5.601337308032384e-07, "loss": 1.1191, "step": 23520 }, { "epoch": 0.8524881301873799, "grad_norm": 2.424645935571499, "learning_rate": 5.598638359253999e-07, "loss": 0.8517, "step": 23521 }, { "epoch": 0.8525243738900367, "grad_norm": 2.2635351934756276, "learning_rate": 5.595940022299539e-07, "loss": 0.6481, "step": 23522 }, { "epoch": 0.8525606175926933, "grad_norm": 2.425357725847031, "learning_rate": 5.593242297206158e-07, "loss": 1.0367, "step": 23523 }, { "epoch": 0.85259686129535, "grad_norm": 2.4855787300657766, "learning_rate": 5.590545184011042e-07, "loss": 1.086, "step": 23524 }, { "epoch": 0.8526331049980066, "grad_norm": 2.5227515988522695, "learning_rate": 5.587848682751367e-07, "loss": 1.0536, "step": 23525 }, { "epoch": 0.8526693487006632, "grad_norm": 2.413495209583966, "learning_rate": 5.585152793464271e-07, "loss": 0.9282, "step": 23526 }, { "epoch": 0.8527055924033199, "grad_norm": 2.436063526637367, "learning_rate": 5.582457516186901e-07, "loss": 0.8654, "step": 23527 }, { "epoch": 0.8527418361059765, "grad_norm": 2.717892209664416, "learning_rate": 5.579762850956411e-07, "loss": 1.1339, "step": 23528 }, { "epoch": 0.8527780798086333, "grad_norm": 2.394856342777634, "learning_rate": 5.577068797809926e-07, "loss": 0.902, "step": 23529 }, { "epoch": 0.8528143235112899, "grad_norm": 2.4932646834039947, "learning_rate": 5.574375356784562e-07, "loss": 0.8737, "step": 23530 }, { "epoch": 0.8528505672139466, "grad_norm": 2.263571752635919, "learning_rate": 5.571682527917438e-07, "loss": 0.9695, "step": 23531 }, { "epoch": 0.8528868109166032, "grad_norm": 2.3427039859289915, "learning_rate": 5.56899031124567e-07, "loss": 0.8122, "step": 23532 }, { "epoch": 0.8529230546192599, "grad_norm": 2.5284743260490954, "learning_rate": 5.566298706806334e-07, "loss": 0.8169, "step": 23533 }, { "epoch": 0.8529592983219165, "grad_norm": 2.2559294049677288, "learning_rate": 5.563607714636537e-07, "loss": 0.8644, "step": 23534 }, { "epoch": 0.8529955420245733, "grad_norm": 2.021305820184467, "learning_rate": 5.560917334773331e-07, "loss": 0.7526, "step": 23535 }, { "epoch": 0.8530317857272299, "grad_norm": 2.1374668928569074, "learning_rate": 5.558227567253832e-07, "loss": 0.9263, "step": 23536 }, { "epoch": 0.8530680294298866, "grad_norm": 2.30189302412863, "learning_rate": 5.555538412115063e-07, "loss": 0.8519, "step": 23537 }, { "epoch": 0.8531042731325432, "grad_norm": 2.506381632342872, "learning_rate": 5.552849869394112e-07, "loss": 0.7632, "step": 23538 }, { "epoch": 0.8531405168351999, "grad_norm": 2.507426372498607, "learning_rate": 5.550161939127984e-07, "loss": 0.931, "step": 23539 }, { "epoch": 0.8531767605378565, "grad_norm": 2.53813566894301, "learning_rate": 5.547474621353766e-07, "loss": 0.808, "step": 23540 }, { "epoch": 0.8532130042405132, "grad_norm": 2.1345729941962017, "learning_rate": 5.544787916108457e-07, "loss": 0.674, "step": 23541 }, { "epoch": 0.8532492479431699, "grad_norm": 2.624024647905522, "learning_rate": 5.542101823429086e-07, "loss": 0.7706, "step": 23542 }, { "epoch": 0.8532854916458266, "grad_norm": 2.403646622878012, "learning_rate": 5.539416343352661e-07, "loss": 0.9749, "step": 23543 }, { "epoch": 0.8533217353484832, "grad_norm": 2.4298837157622515, "learning_rate": 5.53673147591619e-07, "loss": 0.9241, "step": 23544 }, { "epoch": 0.8533579790511399, "grad_norm": 2.0167057301543996, "learning_rate": 5.534047221156674e-07, "loss": 0.7466, "step": 23545 }, { "epoch": 0.8533942227537965, "grad_norm": 2.2323301168585608, "learning_rate": 5.531363579111088e-07, "loss": 1.0643, "step": 23546 }, { "epoch": 0.8534304664564532, "grad_norm": 2.124072037508794, "learning_rate": 5.528680549816417e-07, "loss": 0.8113, "step": 23547 }, { "epoch": 0.8534667101591099, "grad_norm": 2.6012833479775614, "learning_rate": 5.525998133309635e-07, "loss": 0.923, "step": 23548 }, { "epoch": 0.8535029538617666, "grad_norm": 2.3750243428841737, "learning_rate": 5.52331632962771e-07, "loss": 0.8565, "step": 23549 }, { "epoch": 0.8535391975644232, "grad_norm": 2.2103837820790946, "learning_rate": 5.520635138807578e-07, "loss": 1.0298, "step": 23550 }, { "epoch": 0.8535754412670798, "grad_norm": 2.1041080348049555, "learning_rate": 5.517954560886196e-07, "loss": 0.6926, "step": 23551 }, { "epoch": 0.8536116849697365, "grad_norm": 2.4716910806054777, "learning_rate": 5.515274595900505e-07, "loss": 0.9414, "step": 23552 }, { "epoch": 0.8536479286723931, "grad_norm": 2.4899376708833985, "learning_rate": 5.512595243887414e-07, "loss": 1.0642, "step": 23553 }, { "epoch": 0.8536841723750498, "grad_norm": 2.2789935404593553, "learning_rate": 5.509916504883872e-07, "loss": 0.7993, "step": 23554 }, { "epoch": 0.8537204160777065, "grad_norm": 2.170454744445918, "learning_rate": 5.507238378926749e-07, "loss": 0.853, "step": 23555 }, { "epoch": 0.8537566597803632, "grad_norm": 2.123476724326167, "learning_rate": 5.504560866052994e-07, "loss": 0.8648, "step": 23556 }, { "epoch": 0.8537929034830198, "grad_norm": 2.1398994819443518, "learning_rate": 5.501883966299471e-07, "loss": 0.9604, "step": 23557 }, { "epoch": 0.8538291471856765, "grad_norm": 2.248545553582832, "learning_rate": 5.499207679703084e-07, "loss": 0.8358, "step": 23558 }, { "epoch": 0.8538653908883331, "grad_norm": 2.26991908479485, "learning_rate": 5.496532006300692e-07, "loss": 0.9698, "step": 23559 }, { "epoch": 0.8539016345909898, "grad_norm": 2.3390684076760313, "learning_rate": 5.493856946129172e-07, "loss": 0.8647, "step": 23560 }, { "epoch": 0.8539378782936464, "grad_norm": 2.212407543474068, "learning_rate": 5.491182499225395e-07, "loss": 0.833, "step": 23561 }, { "epoch": 0.8539741219963032, "grad_norm": 2.089065513436881, "learning_rate": 5.488508665626208e-07, "loss": 0.7194, "step": 23562 }, { "epoch": 0.8540103656989598, "grad_norm": 2.5104460482073905, "learning_rate": 5.485835445368442e-07, "loss": 0.8641, "step": 23563 }, { "epoch": 0.8540466094016165, "grad_norm": 2.2467443886354475, "learning_rate": 5.483162838488942e-07, "loss": 1.0364, "step": 23564 }, { "epoch": 0.8540828531042731, "grad_norm": 2.589933426548851, "learning_rate": 5.48049084502455e-07, "loss": 0.878, "step": 23565 }, { "epoch": 0.8541190968069298, "grad_norm": 2.292983846477822, "learning_rate": 5.477819465012057e-07, "loss": 0.8591, "step": 23566 }, { "epoch": 0.8541553405095864, "grad_norm": 2.3105016660542224, "learning_rate": 5.475148698488286e-07, "loss": 0.8057, "step": 23567 }, { "epoch": 0.8541915842122432, "grad_norm": 2.3196013935655317, "learning_rate": 5.472478545490034e-07, "loss": 0.8366, "step": 23568 }, { "epoch": 0.8542278279148998, "grad_norm": 2.470917844934162, "learning_rate": 5.469809006054117e-07, "loss": 0.8906, "step": 23569 }, { "epoch": 0.8542640716175565, "grad_norm": 2.49811606350182, "learning_rate": 5.467140080217282e-07, "loss": 0.7756, "step": 23570 }, { "epoch": 0.8543003153202131, "grad_norm": 2.3232716670664644, "learning_rate": 5.464471768016327e-07, "loss": 0.8485, "step": 23571 }, { "epoch": 0.8543365590228698, "grad_norm": 2.0942480780536186, "learning_rate": 5.46180406948803e-07, "loss": 0.9334, "step": 23572 }, { "epoch": 0.8543728027255264, "grad_norm": 2.391882268677091, "learning_rate": 5.459136984669123e-07, "loss": 1.0006, "step": 23573 }, { "epoch": 0.854409046428183, "grad_norm": 2.5712423664948694, "learning_rate": 5.456470513596385e-07, "loss": 0.9884, "step": 23574 }, { "epoch": 0.8544452901308398, "grad_norm": 2.3552302383339305, "learning_rate": 5.453804656306522e-07, "loss": 0.8687, "step": 23575 }, { "epoch": 0.8544815338334965, "grad_norm": 2.505502539549265, "learning_rate": 5.451139412836309e-07, "loss": 0.8667, "step": 23576 }, { "epoch": 0.8545177775361531, "grad_norm": 2.2591554233331905, "learning_rate": 5.448474783222441e-07, "loss": 0.9852, "step": 23577 }, { "epoch": 0.8545540212388097, "grad_norm": 2.3598873220777254, "learning_rate": 5.445810767501658e-07, "loss": 0.7961, "step": 23578 }, { "epoch": 0.8545902649414664, "grad_norm": 2.3492734084556997, "learning_rate": 5.44314736571065e-07, "loss": 0.7438, "step": 23579 }, { "epoch": 0.854626508644123, "grad_norm": 2.194091275743635, "learning_rate": 5.440484577886124e-07, "loss": 0.8541, "step": 23580 }, { "epoch": 0.8546627523467798, "grad_norm": 2.670663346028707, "learning_rate": 5.43782240406478e-07, "loss": 0.8656, "step": 23581 }, { "epoch": 0.8546989960494364, "grad_norm": 2.3156579824854897, "learning_rate": 5.435160844283282e-07, "loss": 0.91, "step": 23582 }, { "epoch": 0.8547352397520931, "grad_norm": 2.049355562642213, "learning_rate": 5.432499898578314e-07, "loss": 0.8577, "step": 23583 }, { "epoch": 0.8547714834547497, "grad_norm": 2.271495908272275, "learning_rate": 5.42983956698655e-07, "loss": 0.9975, "step": 23584 }, { "epoch": 0.8548077271574064, "grad_norm": 2.4811912174702475, "learning_rate": 5.427179849544645e-07, "loss": 0.9738, "step": 23585 }, { "epoch": 0.854843970860063, "grad_norm": 2.4565023311504586, "learning_rate": 5.424520746289242e-07, "loss": 0.8997, "step": 23586 }, { "epoch": 0.8548802145627197, "grad_norm": 2.537791061325942, "learning_rate": 5.421862257256977e-07, "loss": 0.7573, "step": 23587 }, { "epoch": 0.8549164582653764, "grad_norm": 2.3843731583383287, "learning_rate": 5.419204382484495e-07, "loss": 0.8397, "step": 23588 }, { "epoch": 0.8549527019680331, "grad_norm": 2.7684717272292945, "learning_rate": 5.416547122008425e-07, "loss": 0.9979, "step": 23589 }, { "epoch": 0.8549889456706897, "grad_norm": 2.423219674597424, "learning_rate": 5.413890475865363e-07, "loss": 0.8948, "step": 23590 }, { "epoch": 0.8550251893733464, "grad_norm": 2.803116721577519, "learning_rate": 5.411234444091923e-07, "loss": 0.9859, "step": 23591 }, { "epoch": 0.855061433076003, "grad_norm": 1.998014168588001, "learning_rate": 5.408579026724714e-07, "loss": 0.9092, "step": 23592 }, { "epoch": 0.8550976767786597, "grad_norm": 2.497974331963056, "learning_rate": 5.405924223800313e-07, "loss": 0.8039, "step": 23593 }, { "epoch": 0.8551339204813164, "grad_norm": 2.5773222477898896, "learning_rate": 5.403270035355312e-07, "loss": 0.8326, "step": 23594 }, { "epoch": 0.8551701641839731, "grad_norm": 2.1611708810868038, "learning_rate": 5.400616461426261e-07, "loss": 0.9168, "step": 23595 }, { "epoch": 0.8552064078866297, "grad_norm": 2.555088990625704, "learning_rate": 5.397963502049764e-07, "loss": 0.8094, "step": 23596 }, { "epoch": 0.8552426515892864, "grad_norm": 2.289044655575472, "learning_rate": 5.395311157262345e-07, "loss": 0.9253, "step": 23597 }, { "epoch": 0.855278895291943, "grad_norm": 2.2889579599200944, "learning_rate": 5.392659427100572e-07, "loss": 0.7816, "step": 23598 }, { "epoch": 0.8553151389945997, "grad_norm": 2.711259192022298, "learning_rate": 5.390008311600958e-07, "loss": 0.8262, "step": 23599 }, { "epoch": 0.8553513826972563, "grad_norm": 2.3763422904017437, "learning_rate": 5.387357810800059e-07, "loss": 0.7975, "step": 23600 }, { "epoch": 0.8553876263999131, "grad_norm": 2.2168648213096516, "learning_rate": 5.384707924734395e-07, "loss": 1.0274, "step": 23601 }, { "epoch": 0.8554238701025697, "grad_norm": 2.1593719169480283, "learning_rate": 5.382058653440464e-07, "loss": 0.7779, "step": 23602 }, { "epoch": 0.8554601138052264, "grad_norm": 2.5028137169942886, "learning_rate": 5.379409996954776e-07, "loss": 0.815, "step": 23603 }, { "epoch": 0.855496357507883, "grad_norm": 2.5789024249795336, "learning_rate": 5.376761955313842e-07, "loss": 0.8992, "step": 23604 }, { "epoch": 0.8555326012105396, "grad_norm": 2.197273233412459, "learning_rate": 5.374114528554142e-07, "loss": 0.9887, "step": 23605 }, { "epoch": 0.8555688449131963, "grad_norm": 2.422832303174945, "learning_rate": 5.371467716712142e-07, "loss": 0.8446, "step": 23606 }, { "epoch": 0.855605088615853, "grad_norm": 2.310028622414761, "learning_rate": 5.368821519824341e-07, "loss": 0.831, "step": 23607 }, { "epoch": 0.8556413323185097, "grad_norm": 2.5528985509524023, "learning_rate": 5.366175937927176e-07, "loss": 0.7803, "step": 23608 }, { "epoch": 0.8556775760211663, "grad_norm": 2.45739647454388, "learning_rate": 5.36353097105713e-07, "loss": 0.8007, "step": 23609 }, { "epoch": 0.855713819723823, "grad_norm": 2.3658290132076307, "learning_rate": 5.360886619250616e-07, "loss": 0.9088, "step": 23610 }, { "epoch": 0.8557500634264796, "grad_norm": 2.545242421962136, "learning_rate": 5.35824288254409e-07, "loss": 0.8743, "step": 23611 }, { "epoch": 0.8557863071291363, "grad_norm": 2.54929767208882, "learning_rate": 5.355599760973989e-07, "loss": 0.714, "step": 23612 }, { "epoch": 0.8558225508317929, "grad_norm": 2.223589642797872, "learning_rate": 5.352957254576713e-07, "loss": 0.9483, "step": 23613 }, { "epoch": 0.8558587945344497, "grad_norm": 2.17669704152557, "learning_rate": 5.350315363388687e-07, "loss": 0.8575, "step": 23614 }, { "epoch": 0.8558950382371063, "grad_norm": 2.525716794523026, "learning_rate": 5.347674087446302e-07, "loss": 0.8602, "step": 23615 }, { "epoch": 0.855931281939763, "grad_norm": 2.5700915188798663, "learning_rate": 5.345033426785984e-07, "loss": 0.7897, "step": 23616 }, { "epoch": 0.8559675256424196, "grad_norm": 2.4010431784103416, "learning_rate": 5.342393381444083e-07, "loss": 0.9653, "step": 23617 }, { "epoch": 0.8560037693450763, "grad_norm": 2.4541912700370436, "learning_rate": 5.339753951456999e-07, "loss": 0.9111, "step": 23618 }, { "epoch": 0.8560400130477329, "grad_norm": 2.1639362340342965, "learning_rate": 5.337115136861098e-07, "loss": 0.7934, "step": 23619 }, { "epoch": 0.8560762567503897, "grad_norm": 2.3700927521601547, "learning_rate": 5.334476937692734e-07, "loss": 1.0094, "step": 23620 }, { "epoch": 0.8561125004530463, "grad_norm": 2.3759929384677307, "learning_rate": 5.331839353988278e-07, "loss": 0.8977, "step": 23621 }, { "epoch": 0.856148744155703, "grad_norm": 2.570164969091211, "learning_rate": 5.329202385784038e-07, "loss": 0.9004, "step": 23622 }, { "epoch": 0.8561849878583596, "grad_norm": 2.63071600369386, "learning_rate": 5.326566033116392e-07, "loss": 1.0336, "step": 23623 }, { "epoch": 0.8562212315610163, "grad_norm": 2.3582321712249037, "learning_rate": 5.323930296021634e-07, "loss": 0.8438, "step": 23624 }, { "epoch": 0.8562574752636729, "grad_norm": 2.221727979075115, "learning_rate": 5.321295174536117e-07, "loss": 0.9, "step": 23625 }, { "epoch": 0.8562937189663296, "grad_norm": 2.302587955340991, "learning_rate": 5.318660668696107e-07, "loss": 0.8497, "step": 23626 }, { "epoch": 0.8563299626689863, "grad_norm": 2.2345315450545886, "learning_rate": 5.316026778537947e-07, "loss": 1.0077, "step": 23627 }, { "epoch": 0.856366206371643, "grad_norm": 2.372841870763713, "learning_rate": 5.313393504097907e-07, "loss": 1.041, "step": 23628 }, { "epoch": 0.8564024500742996, "grad_norm": 2.003809202861867, "learning_rate": 5.310760845412294e-07, "loss": 0.8694, "step": 23629 }, { "epoch": 0.8564386937769563, "grad_norm": 2.564456807467333, "learning_rate": 5.308128802517354e-07, "loss": 0.7802, "step": 23630 }, { "epoch": 0.8564749374796129, "grad_norm": 2.5507562507358266, "learning_rate": 5.30549737544937e-07, "loss": 0.9475, "step": 23631 }, { "epoch": 0.8565111811822695, "grad_norm": 2.493527246898028, "learning_rate": 5.302866564244614e-07, "loss": 0.8782, "step": 23632 }, { "epoch": 0.8565474248849262, "grad_norm": 2.3059893037875296, "learning_rate": 5.300236368939316e-07, "loss": 0.9215, "step": 23633 }, { "epoch": 0.856583668587583, "grad_norm": 2.5944990444947242, "learning_rate": 5.297606789569721e-07, "loss": 0.8028, "step": 23634 }, { "epoch": 0.8566199122902396, "grad_norm": 2.334280072577997, "learning_rate": 5.294977826172077e-07, "loss": 0.7149, "step": 23635 }, { "epoch": 0.8566561559928962, "grad_norm": 2.251172193737707, "learning_rate": 5.292349478782605e-07, "loss": 0.8072, "step": 23636 }, { "epoch": 0.8566923996955529, "grad_norm": 2.721063542879561, "learning_rate": 5.289721747437515e-07, "loss": 0.9809, "step": 23637 }, { "epoch": 0.8567286433982095, "grad_norm": 2.4680011282630265, "learning_rate": 5.287094632173013e-07, "loss": 0.8049, "step": 23638 }, { "epoch": 0.8567648871008662, "grad_norm": 2.050548035033278, "learning_rate": 5.284468133025322e-07, "loss": 0.7384, "step": 23639 }, { "epoch": 0.8568011308035229, "grad_norm": 2.1740049813171662, "learning_rate": 5.281842250030599e-07, "loss": 0.9362, "step": 23640 }, { "epoch": 0.8568373745061796, "grad_norm": 2.4429122476748333, "learning_rate": 5.279216983225061e-07, "loss": 0.965, "step": 23641 }, { "epoch": 0.8568736182088362, "grad_norm": 2.140420806299874, "learning_rate": 5.276592332644847e-07, "loss": 1.0721, "step": 23642 }, { "epoch": 0.8569098619114929, "grad_norm": 2.3365212200019294, "learning_rate": 5.27396829832616e-07, "loss": 0.8701, "step": 23643 }, { "epoch": 0.8569461056141495, "grad_norm": 2.768533424645787, "learning_rate": 5.271344880305135e-07, "loss": 0.8059, "step": 23644 }, { "epoch": 0.8569823493168062, "grad_norm": 2.4254324072805584, "learning_rate": 5.268722078617933e-07, "loss": 0.9049, "step": 23645 }, { "epoch": 0.8570185930194628, "grad_norm": 2.4573221932136375, "learning_rate": 5.266099893300669e-07, "loss": 0.9157, "step": 23646 }, { "epoch": 0.8570548367221196, "grad_norm": 2.952447815997714, "learning_rate": 5.263478324389514e-07, "loss": 1.0385, "step": 23647 }, { "epoch": 0.8570910804247762, "grad_norm": 2.527748122098825, "learning_rate": 5.260857371920563e-07, "loss": 0.9295, "step": 23648 }, { "epoch": 0.8571273241274329, "grad_norm": 2.497598450788528, "learning_rate": 5.258237035929948e-07, "loss": 1.0342, "step": 23649 }, { "epoch": 0.8571635678300895, "grad_norm": 2.099461976663004, "learning_rate": 5.25561731645376e-07, "loss": 0.7649, "step": 23650 }, { "epoch": 0.8571998115327462, "grad_norm": 2.378593614916376, "learning_rate": 5.252998213528098e-07, "loss": 0.9858, "step": 23651 }, { "epoch": 0.8572360552354028, "grad_norm": 2.507417087497265, "learning_rate": 5.250379727189075e-07, "loss": 0.9344, "step": 23652 }, { "epoch": 0.8572722989380596, "grad_norm": 2.5032007690442963, "learning_rate": 5.24776185747275e-07, "loss": 0.8798, "step": 23653 }, { "epoch": 0.8573085426407162, "grad_norm": 2.2859170502945156, "learning_rate": 5.245144604415192e-07, "loss": 0.6811, "step": 23654 }, { "epoch": 0.8573447863433729, "grad_norm": 2.3219376791483386, "learning_rate": 5.242527968052479e-07, "loss": 0.8698, "step": 23655 }, { "epoch": 0.8573810300460295, "grad_norm": 2.323930902962145, "learning_rate": 5.239911948420673e-07, "loss": 0.9573, "step": 23656 }, { "epoch": 0.8574172737486861, "grad_norm": 2.39469889248686, "learning_rate": 5.237296545555798e-07, "loss": 1.0885, "step": 23657 }, { "epoch": 0.8574535174513428, "grad_norm": 2.6559679377770222, "learning_rate": 5.234681759493903e-07, "loss": 0.8481, "step": 23658 }, { "epoch": 0.8574897611539994, "grad_norm": 2.30183251728415, "learning_rate": 5.232067590271033e-07, "loss": 0.9555, "step": 23659 }, { "epoch": 0.8575260048566562, "grad_norm": 2.0588473327397065, "learning_rate": 5.229454037923187e-07, "loss": 0.7508, "step": 23660 }, { "epoch": 0.8575622485593128, "grad_norm": 2.5879554499196087, "learning_rate": 5.226841102486402e-07, "loss": 0.8601, "step": 23661 }, { "epoch": 0.8575984922619695, "grad_norm": 2.3970781888064194, "learning_rate": 5.224228783996643e-07, "loss": 1.035, "step": 23662 }, { "epoch": 0.8576347359646261, "grad_norm": 2.3620053928844236, "learning_rate": 5.221617082489955e-07, "loss": 0.8419, "step": 23663 }, { "epoch": 0.8576709796672828, "grad_norm": 2.211164275908949, "learning_rate": 5.219005998002291e-07, "loss": 0.7147, "step": 23664 }, { "epoch": 0.8577072233699394, "grad_norm": 2.5785178743132184, "learning_rate": 5.216395530569651e-07, "loss": 0.8836, "step": 23665 }, { "epoch": 0.8577434670725962, "grad_norm": 2.617004746467961, "learning_rate": 5.213785680227984e-07, "loss": 0.8937, "step": 23666 }, { "epoch": 0.8577797107752528, "grad_norm": 2.579145281792979, "learning_rate": 5.211176447013267e-07, "loss": 0.9222, "step": 23667 }, { "epoch": 0.8578159544779095, "grad_norm": 2.2423479443949343, "learning_rate": 5.208567830961464e-07, "loss": 0.8075, "step": 23668 }, { "epoch": 0.8578521981805661, "grad_norm": 2.1130942270145985, "learning_rate": 5.205959832108492e-07, "loss": 0.8841, "step": 23669 }, { "epoch": 0.8578884418832228, "grad_norm": 2.382348509420855, "learning_rate": 5.203352450490307e-07, "loss": 0.7579, "step": 23670 }, { "epoch": 0.8579246855858794, "grad_norm": 2.369853256800722, "learning_rate": 5.200745686142833e-07, "loss": 0.8481, "step": 23671 }, { "epoch": 0.8579609292885361, "grad_norm": 2.3147528272054108, "learning_rate": 5.198139539101999e-07, "loss": 0.6583, "step": 23672 }, { "epoch": 0.8579971729911928, "grad_norm": 2.3738074905253517, "learning_rate": 5.195534009403696e-07, "loss": 0.8737, "step": 23673 }, { "epoch": 0.8580334166938495, "grad_norm": 2.2145618381971874, "learning_rate": 5.192929097083838e-07, "loss": 0.8131, "step": 23674 }, { "epoch": 0.8580696603965061, "grad_norm": 2.0631049521269564, "learning_rate": 5.19032480217832e-07, "loss": 0.779, "step": 23675 }, { "epoch": 0.8581059040991628, "grad_norm": 2.1448884775982298, "learning_rate": 5.187721124723033e-07, "loss": 0.8841, "step": 23676 }, { "epoch": 0.8581421478018194, "grad_norm": 2.430785839698244, "learning_rate": 5.185118064753842e-07, "loss": 0.8606, "step": 23677 }, { "epoch": 0.8581783915044761, "grad_norm": 2.15522393550954, "learning_rate": 5.182515622306616e-07, "loss": 0.7696, "step": 23678 }, { "epoch": 0.8582146352071328, "grad_norm": 2.2475610799521837, "learning_rate": 5.17991379741723e-07, "loss": 0.8596, "step": 23679 }, { "epoch": 0.8582508789097895, "grad_norm": 2.176064834716318, "learning_rate": 5.177312590121519e-07, "loss": 0.8195, "step": 23680 }, { "epoch": 0.8582871226124461, "grad_norm": 2.241902038166419, "learning_rate": 5.17471200045534e-07, "loss": 0.8048, "step": 23681 }, { "epoch": 0.8583233663151028, "grad_norm": 2.244981432276892, "learning_rate": 5.172112028454501e-07, "loss": 0.9148, "step": 23682 }, { "epoch": 0.8583596100177594, "grad_norm": 2.578969476427579, "learning_rate": 5.169512674154869e-07, "loss": 0.9693, "step": 23683 }, { "epoch": 0.858395853720416, "grad_norm": 2.512181122957812, "learning_rate": 5.166913937592227e-07, "loss": 0.8541, "step": 23684 }, { "epoch": 0.8584320974230727, "grad_norm": 2.2580925181934877, "learning_rate": 5.164315818802407e-07, "loss": 0.9413, "step": 23685 }, { "epoch": 0.8584683411257295, "grad_norm": 2.681731816275446, "learning_rate": 5.16171831782119e-07, "loss": 0.8135, "step": 23686 }, { "epoch": 0.8585045848283861, "grad_norm": 2.337292598152935, "learning_rate": 5.159121434684378e-07, "loss": 0.9674, "step": 23687 }, { "epoch": 0.8585408285310427, "grad_norm": 2.4114819402246654, "learning_rate": 5.156525169427762e-07, "loss": 0.8276, "step": 23688 }, { "epoch": 0.8585770722336994, "grad_norm": 2.524506443132893, "learning_rate": 5.153929522087093e-07, "loss": 0.8156, "step": 23689 }, { "epoch": 0.858613315936356, "grad_norm": 2.242280880746249, "learning_rate": 5.151334492698168e-07, "loss": 0.6581, "step": 23690 }, { "epoch": 0.8586495596390127, "grad_norm": 2.369517552561366, "learning_rate": 5.148740081296722e-07, "loss": 0.8188, "step": 23691 }, { "epoch": 0.8586858033416694, "grad_norm": 2.463578155517866, "learning_rate": 5.146146287918524e-07, "loss": 0.9195, "step": 23692 }, { "epoch": 0.8587220470443261, "grad_norm": 2.2156744617514685, "learning_rate": 5.143553112599281e-07, "loss": 0.7904, "step": 23693 }, { "epoch": 0.8587582907469827, "grad_norm": 2.488622591907528, "learning_rate": 5.140960555374769e-07, "loss": 1.0454, "step": 23694 }, { "epoch": 0.8587945344496394, "grad_norm": 2.450508527245817, "learning_rate": 5.138368616280681e-07, "loss": 0.8971, "step": 23695 }, { "epoch": 0.858830778152296, "grad_norm": 2.5061566187872373, "learning_rate": 5.135777295352751e-07, "loss": 0.7867, "step": 23696 }, { "epoch": 0.8588670218549527, "grad_norm": 2.582768047857821, "learning_rate": 5.133186592626666e-07, "loss": 1.053, "step": 23697 }, { "epoch": 0.8589032655576093, "grad_norm": 2.2220481164112886, "learning_rate": 5.130596508138135e-07, "loss": 0.7497, "step": 23698 }, { "epoch": 0.8589395092602661, "grad_norm": 2.286034487380582, "learning_rate": 5.128007041922861e-07, "loss": 0.8443, "step": 23699 }, { "epoch": 0.8589757529629227, "grad_norm": 2.1148450648232013, "learning_rate": 5.125418194016501e-07, "loss": 0.9087, "step": 23700 }, { "epoch": 0.8590119966655794, "grad_norm": 2.1172808314492584, "learning_rate": 5.12282996445474e-07, "loss": 0.8301, "step": 23701 }, { "epoch": 0.859048240368236, "grad_norm": 2.650113884296141, "learning_rate": 5.120242353273242e-07, "loss": 1.005, "step": 23702 }, { "epoch": 0.8590844840708927, "grad_norm": 2.6410452819755283, "learning_rate": 5.117655360507668e-07, "loss": 0.855, "step": 23703 }, { "epoch": 0.8591207277735493, "grad_norm": 2.3821998686903454, "learning_rate": 5.11506898619365e-07, "loss": 0.8254, "step": 23704 }, { "epoch": 0.859156971476206, "grad_norm": 2.3880564198938354, "learning_rate": 5.112483230366839e-07, "loss": 0.7552, "step": 23705 }, { "epoch": 0.8591932151788627, "grad_norm": 2.333161794394551, "learning_rate": 5.109898093062871e-07, "loss": 0.8772, "step": 23706 }, { "epoch": 0.8592294588815194, "grad_norm": 2.1330494017882162, "learning_rate": 5.107313574317347e-07, "loss": 1.0583, "step": 23707 }, { "epoch": 0.859265702584176, "grad_norm": 2.406344395322222, "learning_rate": 5.104729674165898e-07, "loss": 0.9407, "step": 23708 }, { "epoch": 0.8593019462868327, "grad_norm": 2.4002356855515865, "learning_rate": 5.102146392644108e-07, "loss": 0.9805, "step": 23709 }, { "epoch": 0.8593381899894893, "grad_norm": 2.4579951448844457, "learning_rate": 5.099563729787605e-07, "loss": 0.8535, "step": 23710 }, { "epoch": 0.859374433692146, "grad_norm": 2.2770768093332734, "learning_rate": 5.096981685631952e-07, "loss": 1.0931, "step": 23711 }, { "epoch": 0.8594106773948027, "grad_norm": 2.0588132497687015, "learning_rate": 5.094400260212745e-07, "loss": 0.7451, "step": 23712 }, { "epoch": 0.8594469210974593, "grad_norm": 2.162160116947025, "learning_rate": 5.091819453565521e-07, "loss": 1.0328, "step": 23713 }, { "epoch": 0.859483164800116, "grad_norm": 2.1138059433114345, "learning_rate": 5.089239265725887e-07, "loss": 0.7969, "step": 23714 }, { "epoch": 0.8595194085027726, "grad_norm": 2.4983092621526977, "learning_rate": 5.086659696729362e-07, "loss": 0.9463, "step": 23715 }, { "epoch": 0.8595556522054293, "grad_norm": 2.4841297957237165, "learning_rate": 5.084080746611514e-07, "loss": 0.8683, "step": 23716 }, { "epoch": 0.8595918959080859, "grad_norm": 2.3492918980021624, "learning_rate": 5.081502415407863e-07, "loss": 0.9031, "step": 23717 }, { "epoch": 0.8596281396107426, "grad_norm": 2.2638948647080164, "learning_rate": 5.078924703153942e-07, "loss": 1.0213, "step": 23718 }, { "epoch": 0.8596643833133993, "grad_norm": 1.9605723472685705, "learning_rate": 5.076347609885279e-07, "loss": 0.9201, "step": 23719 }, { "epoch": 0.859700627016056, "grad_norm": 2.5140355339822547, "learning_rate": 5.073771135637367e-07, "loss": 0.9284, "step": 23720 }, { "epoch": 0.8597368707187126, "grad_norm": 2.27729227758416, "learning_rate": 5.071195280445718e-07, "loss": 0.8678, "step": 23721 }, { "epoch": 0.8597731144213693, "grad_norm": 2.3361515972552516, "learning_rate": 5.068620044345828e-07, "loss": 0.762, "step": 23722 }, { "epoch": 0.8598093581240259, "grad_norm": 2.5585969647665388, "learning_rate": 5.06604542737319e-07, "loss": 0.8712, "step": 23723 }, { "epoch": 0.8598456018266826, "grad_norm": 2.3866787070141986, "learning_rate": 5.063471429563261e-07, "loss": 1.0237, "step": 23724 }, { "epoch": 0.8598818455293393, "grad_norm": 2.3880064432801027, "learning_rate": 5.060898050951519e-07, "loss": 0.8762, "step": 23725 }, { "epoch": 0.859918089231996, "grad_norm": 2.468053990176584, "learning_rate": 5.05832529157343e-07, "loss": 0.7448, "step": 23726 }, { "epoch": 0.8599543329346526, "grad_norm": 2.302833657744789, "learning_rate": 5.055753151464432e-07, "loss": 0.9452, "step": 23727 }, { "epoch": 0.8599905766373093, "grad_norm": 2.2628513192015047, "learning_rate": 5.053181630659981e-07, "loss": 0.7781, "step": 23728 }, { "epoch": 0.8600268203399659, "grad_norm": 2.508697242081482, "learning_rate": 5.050610729195488e-07, "loss": 0.9122, "step": 23729 }, { "epoch": 0.8600630640426226, "grad_norm": 2.196144075954544, "learning_rate": 5.048040447106417e-07, "loss": 0.7043, "step": 23730 }, { "epoch": 0.8600993077452792, "grad_norm": 2.6826832062849926, "learning_rate": 5.045470784428147e-07, "loss": 0.8053, "step": 23731 }, { "epoch": 0.860135551447936, "grad_norm": 2.379371525288736, "learning_rate": 5.042901741196116e-07, "loss": 0.8273, "step": 23732 }, { "epoch": 0.8601717951505926, "grad_norm": 2.5571391129924477, "learning_rate": 5.040333317445684e-07, "loss": 0.9155, "step": 23733 }, { "epoch": 0.8602080388532493, "grad_norm": 2.5270109295439016, "learning_rate": 5.037765513212295e-07, "loss": 0.9309, "step": 23734 }, { "epoch": 0.8602442825559059, "grad_norm": 2.3418355448210315, "learning_rate": 5.035198328531288e-07, "loss": 0.841, "step": 23735 }, { "epoch": 0.8602805262585626, "grad_norm": 2.630114477286102, "learning_rate": 5.032631763438067e-07, "loss": 0.8368, "step": 23736 }, { "epoch": 0.8603167699612192, "grad_norm": 2.7672175721324868, "learning_rate": 5.030065817967977e-07, "loss": 0.9361, "step": 23737 }, { "epoch": 0.860353013663876, "grad_norm": 2.175019050224867, "learning_rate": 5.027500492156384e-07, "loss": 1.0075, "step": 23738 }, { "epoch": 0.8603892573665326, "grad_norm": 2.1975396754679717, "learning_rate": 5.024935786038642e-07, "loss": 0.6045, "step": 23739 }, { "epoch": 0.8604255010691892, "grad_norm": 2.2911970418290593, "learning_rate": 5.022371699650075e-07, "loss": 0.8323, "step": 23740 }, { "epoch": 0.8604617447718459, "grad_norm": 2.1392280352068322, "learning_rate": 5.01980823302603e-07, "loss": 0.9821, "step": 23741 }, { "epoch": 0.8604979884745025, "grad_norm": 2.0759032939873023, "learning_rate": 5.017245386201819e-07, "loss": 0.8659, "step": 23742 }, { "epoch": 0.8605342321771592, "grad_norm": 2.582962679087937, "learning_rate": 5.014683159212774e-07, "loss": 0.8762, "step": 23743 }, { "epoch": 0.8605704758798158, "grad_norm": 2.2558707858839417, "learning_rate": 5.012121552094174e-07, "loss": 0.834, "step": 23744 }, { "epoch": 0.8606067195824726, "grad_norm": 2.277904215220378, "learning_rate": 5.009560564881339e-07, "loss": 0.8102, "step": 23745 }, { "epoch": 0.8606429632851292, "grad_norm": 2.1764608105168954, "learning_rate": 5.007000197609551e-07, "loss": 0.7955, "step": 23746 }, { "epoch": 0.8606792069877859, "grad_norm": 2.615970587559134, "learning_rate": 5.004440450314085e-07, "loss": 1.017, "step": 23747 }, { "epoch": 0.8607154506904425, "grad_norm": 2.2764105780976416, "learning_rate": 5.001881323030228e-07, "loss": 1.0184, "step": 23748 }, { "epoch": 0.8607516943930992, "grad_norm": 2.1805737411844577, "learning_rate": 4.999322815793212e-07, "loss": 0.6761, "step": 23749 }, { "epoch": 0.8607879380957558, "grad_norm": 2.2576454111698, "learning_rate": 4.996764928638331e-07, "loss": 0.8625, "step": 23750 }, { "epoch": 0.8608241817984126, "grad_norm": 2.5385271625420245, "learning_rate": 4.994207661600803e-07, "loss": 1.0641, "step": 23751 }, { "epoch": 0.8608604255010692, "grad_norm": 2.1377361910878157, "learning_rate": 4.991651014715882e-07, "loss": 0.8854, "step": 23752 }, { "epoch": 0.8608966692037259, "grad_norm": 2.4168299794685275, "learning_rate": 4.989094988018783e-07, "loss": 0.8051, "step": 23753 }, { "epoch": 0.8609329129063825, "grad_norm": 2.192687903344941, "learning_rate": 4.986539581544736e-07, "loss": 0.896, "step": 23754 }, { "epoch": 0.8609691566090392, "grad_norm": 2.165715312533317, "learning_rate": 4.983984795328956e-07, "loss": 0.8297, "step": 23755 }, { "epoch": 0.8610054003116958, "grad_norm": 2.3634057685205145, "learning_rate": 4.981430629406631e-07, "loss": 1.0573, "step": 23756 }, { "epoch": 0.8610416440143525, "grad_norm": 2.2872708228845062, "learning_rate": 4.978877083812967e-07, "loss": 0.6505, "step": 23757 }, { "epoch": 0.8610778877170092, "grad_norm": 2.694754108588291, "learning_rate": 4.976324158583145e-07, "loss": 0.923, "step": 23758 }, { "epoch": 0.8611141314196659, "grad_norm": 2.4995111745919627, "learning_rate": 4.973771853752357e-07, "loss": 0.9589, "step": 23759 }, { "epoch": 0.8611503751223225, "grad_norm": 2.245554146660163, "learning_rate": 4.971220169355756e-07, "loss": 0.7922, "step": 23760 }, { "epoch": 0.8611866188249792, "grad_norm": 2.0767975348281458, "learning_rate": 4.968669105428508e-07, "loss": 0.5479, "step": 23761 }, { "epoch": 0.8612228625276358, "grad_norm": 2.4273109800642563, "learning_rate": 4.966118662005764e-07, "loss": 0.8864, "step": 23762 }, { "epoch": 0.8612591062302924, "grad_norm": 1.9612987252805025, "learning_rate": 4.963568839122679e-07, "loss": 0.8986, "step": 23763 }, { "epoch": 0.8612953499329491, "grad_norm": 2.394829098164574, "learning_rate": 4.961019636814368e-07, "loss": 0.7473, "step": 23764 }, { "epoch": 0.8613315936356059, "grad_norm": 2.0740863549384096, "learning_rate": 4.958471055115971e-07, "loss": 0.8233, "step": 23765 }, { "epoch": 0.8613678373382625, "grad_norm": 2.26222748788928, "learning_rate": 4.95592309406261e-07, "loss": 0.7412, "step": 23766 }, { "epoch": 0.8614040810409191, "grad_norm": 2.4555622507438923, "learning_rate": 4.953375753689377e-07, "loss": 0.7206, "step": 23767 }, { "epoch": 0.8614403247435758, "grad_norm": 2.573035463035175, "learning_rate": 4.950829034031385e-07, "loss": 0.7174, "step": 23768 }, { "epoch": 0.8614765684462324, "grad_norm": 2.3312375669478977, "learning_rate": 4.948282935123722e-07, "loss": 0.9044, "step": 23769 }, { "epoch": 0.8615128121488891, "grad_norm": 2.356371448380544, "learning_rate": 4.945737457001482e-07, "loss": 0.9457, "step": 23770 }, { "epoch": 0.8615490558515458, "grad_norm": 2.3825314171371135, "learning_rate": 4.943192599699725e-07, "loss": 0.9188, "step": 23771 }, { "epoch": 0.8615852995542025, "grad_norm": 2.132688754095017, "learning_rate": 4.94064836325352e-07, "loss": 0.8431, "step": 23772 }, { "epoch": 0.8616215432568591, "grad_norm": 2.388177606478999, "learning_rate": 4.938104747697942e-07, "loss": 0.7467, "step": 23773 }, { "epoch": 0.8616577869595158, "grad_norm": 2.439523053080796, "learning_rate": 4.935561753068019e-07, "loss": 0.9766, "step": 23774 }, { "epoch": 0.8616940306621724, "grad_norm": 2.5637030154224183, "learning_rate": 4.933019379398812e-07, "loss": 1.0001, "step": 23775 }, { "epoch": 0.8617302743648291, "grad_norm": 2.3362672274169802, "learning_rate": 4.930477626725322e-07, "loss": 0.7663, "step": 23776 }, { "epoch": 0.8617665180674857, "grad_norm": 2.128542239799197, "learning_rate": 4.927936495082614e-07, "loss": 0.9499, "step": 23777 }, { "epoch": 0.8618027617701425, "grad_norm": 2.554659759435048, "learning_rate": 4.925395984505671e-07, "loss": 0.9276, "step": 23778 }, { "epoch": 0.8618390054727991, "grad_norm": 2.6472528668100743, "learning_rate": 4.922856095029516e-07, "loss": 0.8945, "step": 23779 }, { "epoch": 0.8618752491754558, "grad_norm": 2.1873177474222913, "learning_rate": 4.920316826689131e-07, "loss": 1.0245, "step": 23780 }, { "epoch": 0.8619114928781124, "grad_norm": 2.273084386236067, "learning_rate": 4.917778179519528e-07, "loss": 1.0098, "step": 23781 }, { "epoch": 0.8619477365807691, "grad_norm": 2.377714205388039, "learning_rate": 4.915240153555672e-07, "loss": 0.9849, "step": 23782 }, { "epoch": 0.8619839802834257, "grad_norm": 2.341448098027636, "learning_rate": 4.912702748832554e-07, "loss": 0.9399, "step": 23783 }, { "epoch": 0.8620202239860825, "grad_norm": 2.53582193135651, "learning_rate": 4.910165965385105e-07, "loss": 0.8643, "step": 23784 }, { "epoch": 0.8620564676887391, "grad_norm": 2.097798529699001, "learning_rate": 4.90762980324831e-07, "loss": 0.7226, "step": 23785 }, { "epoch": 0.8620927113913958, "grad_norm": 2.5571578995229514, "learning_rate": 4.905094262457105e-07, "loss": 0.8013, "step": 23786 }, { "epoch": 0.8621289550940524, "grad_norm": 2.5523506799398934, "learning_rate": 4.902559343046426e-07, "loss": 1.0208, "step": 23787 }, { "epoch": 0.862165198796709, "grad_norm": 2.224968398076282, "learning_rate": 4.900025045051204e-07, "loss": 1.0544, "step": 23788 }, { "epoch": 0.8622014424993657, "grad_norm": 2.23642992422303, "learning_rate": 4.897491368506358e-07, "loss": 0.7469, "step": 23789 }, { "epoch": 0.8622376862020223, "grad_norm": 2.3590582682477885, "learning_rate": 4.894958313446813e-07, "loss": 0.9992, "step": 23790 }, { "epoch": 0.8622739299046791, "grad_norm": 2.4451543065756907, "learning_rate": 4.892425879907459e-07, "loss": 0.9588, "step": 23791 }, { "epoch": 0.8623101736073358, "grad_norm": 2.37388477077256, "learning_rate": 4.889894067923195e-07, "loss": 0.7956, "step": 23792 }, { "epoch": 0.8623464173099924, "grad_norm": 2.1039917957025476, "learning_rate": 4.887362877528917e-07, "loss": 0.7406, "step": 23793 }, { "epoch": 0.862382661012649, "grad_norm": 2.310165232059892, "learning_rate": 4.884832308759485e-07, "loss": 0.886, "step": 23794 }, { "epoch": 0.8624189047153057, "grad_norm": 2.8029182889669664, "learning_rate": 4.882302361649788e-07, "loss": 1.1415, "step": 23795 }, { "epoch": 0.8624551484179623, "grad_norm": 2.7838929778916186, "learning_rate": 4.879773036234664e-07, "loss": 0.9552, "step": 23796 }, { "epoch": 0.8624913921206191, "grad_norm": 2.060974855369053, "learning_rate": 4.877244332548991e-07, "loss": 0.8784, "step": 23797 }, { "epoch": 0.8625276358232757, "grad_norm": 2.274621466649618, "learning_rate": 4.874716250627598e-07, "loss": 0.8313, "step": 23798 }, { "epoch": 0.8625638795259324, "grad_norm": 2.4108311490474894, "learning_rate": 4.872188790505333e-07, "loss": 0.6906, "step": 23799 }, { "epoch": 0.862600123228589, "grad_norm": 2.4157977144052314, "learning_rate": 4.869661952216998e-07, "loss": 0.8314, "step": 23800 }, { "epoch": 0.8626363669312457, "grad_norm": 2.286350382093778, "learning_rate": 4.86713573579744e-07, "loss": 0.92, "step": 23801 }, { "epoch": 0.8626726106339023, "grad_norm": 2.207881306113326, "learning_rate": 4.86461014128145e-07, "loss": 0.9325, "step": 23802 }, { "epoch": 0.862708854336559, "grad_norm": 2.2282214840156467, "learning_rate": 4.862085168703845e-07, "loss": 0.6681, "step": 23803 }, { "epoch": 0.8627450980392157, "grad_norm": 2.053375996186393, "learning_rate": 4.859560818099396e-07, "loss": 0.7668, "step": 23804 }, { "epoch": 0.8627813417418724, "grad_norm": 2.49753141160297, "learning_rate": 4.857037089502903e-07, "loss": 0.879, "step": 23805 }, { "epoch": 0.862817585444529, "grad_norm": 2.2508515490065255, "learning_rate": 4.854513982949144e-07, "loss": 0.8317, "step": 23806 }, { "epoch": 0.8628538291471857, "grad_norm": 2.3043358321061724, "learning_rate": 4.851991498472869e-07, "loss": 0.6594, "step": 23807 }, { "epoch": 0.8628900728498423, "grad_norm": 2.238441299134431, "learning_rate": 4.849469636108844e-07, "loss": 0.7199, "step": 23808 }, { "epoch": 0.862926316552499, "grad_norm": 2.385444359841279, "learning_rate": 4.846948395891826e-07, "loss": 0.8251, "step": 23809 }, { "epoch": 0.8629625602551557, "grad_norm": 2.3841970530217007, "learning_rate": 4.844427777856558e-07, "loss": 0.8311, "step": 23810 }, { "epoch": 0.8629988039578124, "grad_norm": 2.2121133517323996, "learning_rate": 4.841907782037758e-07, "loss": 0.8529, "step": 23811 }, { "epoch": 0.863035047660469, "grad_norm": 2.324622364086846, "learning_rate": 4.839388408470158e-07, "loss": 0.7623, "step": 23812 }, { "epoch": 0.8630712913631257, "grad_norm": 2.3439655652753495, "learning_rate": 4.836869657188487e-07, "loss": 0.6685, "step": 23813 }, { "epoch": 0.8631075350657823, "grad_norm": 2.34217637159779, "learning_rate": 4.834351528227421e-07, "loss": 0.769, "step": 23814 }, { "epoch": 0.863143778768439, "grad_norm": 2.4241886660220073, "learning_rate": 4.831834021621689e-07, "loss": 0.8997, "step": 23815 }, { "epoch": 0.8631800224710956, "grad_norm": 2.2776723479196006, "learning_rate": 4.829317137405948e-07, "loss": 0.8758, "step": 23816 }, { "epoch": 0.8632162661737524, "grad_norm": 2.7664495318241706, "learning_rate": 4.826800875614917e-07, "loss": 0.8205, "step": 23817 }, { "epoch": 0.863252509876409, "grad_norm": 2.299450940613353, "learning_rate": 4.824285236283243e-07, "loss": 0.9359, "step": 23818 }, { "epoch": 0.8632887535790656, "grad_norm": 2.427918636151599, "learning_rate": 4.821770219445604e-07, "loss": 0.9737, "step": 23819 }, { "epoch": 0.8633249972817223, "grad_norm": 2.2983423412335022, "learning_rate": 4.819255825136632e-07, "loss": 0.8203, "step": 23820 }, { "epoch": 0.8633612409843789, "grad_norm": 2.475652982596107, "learning_rate": 4.816742053391004e-07, "loss": 0.7443, "step": 23821 }, { "epoch": 0.8633974846870356, "grad_norm": 2.565826251763643, "learning_rate": 4.814228904243339e-07, "loss": 0.8876, "step": 23822 }, { "epoch": 0.8634337283896923, "grad_norm": 2.35478260289247, "learning_rate": 4.811716377728287e-07, "loss": 0.8379, "step": 23823 }, { "epoch": 0.863469972092349, "grad_norm": 2.4998526481488264, "learning_rate": 4.809204473880436e-07, "loss": 0.742, "step": 23824 }, { "epoch": 0.8635062157950056, "grad_norm": 2.6362491779561226, "learning_rate": 4.806693192734418e-07, "loss": 1.0368, "step": 23825 }, { "epoch": 0.8635424594976623, "grad_norm": 2.353557986725375, "learning_rate": 4.804182534324853e-07, "loss": 0.8439, "step": 23826 }, { "epoch": 0.8635787032003189, "grad_norm": 2.2188139564253504, "learning_rate": 4.801672498686299e-07, "loss": 0.8174, "step": 23827 }, { "epoch": 0.8636149469029756, "grad_norm": 2.590203849889045, "learning_rate": 4.79916308585337e-07, "loss": 0.8775, "step": 23828 }, { "epoch": 0.8636511906056322, "grad_norm": 2.4690995967708216, "learning_rate": 4.796654295860637e-07, "loss": 0.7502, "step": 23829 }, { "epoch": 0.863687434308289, "grad_norm": 2.3601225332130067, "learning_rate": 4.794146128742677e-07, "loss": 0.8955, "step": 23830 }, { "epoch": 0.8637236780109456, "grad_norm": 2.517149890176492, "learning_rate": 4.791638584534031e-07, "loss": 0.9112, "step": 23831 }, { "epoch": 0.8637599217136023, "grad_norm": 2.3039957323850775, "learning_rate": 4.78913166326927e-07, "loss": 0.9939, "step": 23832 }, { "epoch": 0.8637961654162589, "grad_norm": 2.1231540881399584, "learning_rate": 4.786625364982938e-07, "loss": 0.6692, "step": 23833 }, { "epoch": 0.8638324091189156, "grad_norm": 2.0576049628174937, "learning_rate": 4.784119689709554e-07, "loss": 0.9522, "step": 23834 }, { "epoch": 0.8638686528215722, "grad_norm": 2.3513253155312737, "learning_rate": 4.781614637483667e-07, "loss": 0.8797, "step": 23835 }, { "epoch": 0.8639048965242289, "grad_norm": 2.4706590581511154, "learning_rate": 4.779110208339765e-07, "loss": 0.947, "step": 23836 }, { "epoch": 0.8639411402268856, "grad_norm": 2.505311285269817, "learning_rate": 4.776606402312395e-07, "loss": 0.8258, "step": 23837 }, { "epoch": 0.8639773839295423, "grad_norm": 2.3592850883600476, "learning_rate": 4.774103219436027e-07, "loss": 0.6762, "step": 23838 }, { "epoch": 0.8640136276321989, "grad_norm": 2.2536326647012643, "learning_rate": 4.771600659745174e-07, "loss": 0.8213, "step": 23839 }, { "epoch": 0.8640498713348556, "grad_norm": 2.53233021504405, "learning_rate": 4.769098723274302e-07, "loss": 1.0088, "step": 23840 }, { "epoch": 0.8640861150375122, "grad_norm": 2.5318206564998293, "learning_rate": 4.7665974100578936e-07, "loss": 0.9928, "step": 23841 }, { "epoch": 0.8641223587401688, "grad_norm": 2.1239208405768246, "learning_rate": 4.7640967201304234e-07, "loss": 0.83, "step": 23842 }, { "epoch": 0.8641586024428256, "grad_norm": 2.4888270847975864, "learning_rate": 4.76159665352634e-07, "loss": 0.7885, "step": 23843 }, { "epoch": 0.8641948461454823, "grad_norm": 2.452746636965475, "learning_rate": 4.759097210280089e-07, "loss": 0.9904, "step": 23844 }, { "epoch": 0.8642310898481389, "grad_norm": 2.352868351903671, "learning_rate": 4.75659839042612e-07, "loss": 0.7667, "step": 23845 }, { "epoch": 0.8642673335507955, "grad_norm": 2.509665107148742, "learning_rate": 4.754100193998873e-07, "loss": 0.9109, "step": 23846 }, { "epoch": 0.8643035772534522, "grad_norm": 2.456031177658493, "learning_rate": 4.7516026210327526e-07, "loss": 1.0261, "step": 23847 }, { "epoch": 0.8643398209561088, "grad_norm": 3.051770050146515, "learning_rate": 4.749105671562182e-07, "loss": 0.8444, "step": 23848 }, { "epoch": 0.8643760646587655, "grad_norm": 2.144355485754092, "learning_rate": 4.7466093456215723e-07, "loss": 0.8651, "step": 23849 }, { "epoch": 0.8644123083614222, "grad_norm": 2.1656809871322045, "learning_rate": 4.7441136432453183e-07, "loss": 1.0152, "step": 23850 }, { "epoch": 0.8644485520640789, "grad_norm": 2.4670462092294088, "learning_rate": 4.7416185644678094e-07, "loss": 1.0719, "step": 23851 }, { "epoch": 0.8644847957667355, "grad_norm": 2.192075708334397, "learning_rate": 4.7391241093234176e-07, "loss": 0.952, "step": 23852 }, { "epoch": 0.8645210394693922, "grad_norm": 2.581394120353402, "learning_rate": 4.7366302778465325e-07, "loss": 0.9424, "step": 23853 }, { "epoch": 0.8645572831720488, "grad_norm": 2.1751627301139083, "learning_rate": 4.7341370700714986e-07, "loss": 0.8479, "step": 23854 }, { "epoch": 0.8645935268747055, "grad_norm": 2.2609739543387493, "learning_rate": 4.7316444860326827e-07, "loss": 0.9655, "step": 23855 }, { "epoch": 0.8646297705773622, "grad_norm": 2.3278884784700082, "learning_rate": 4.72915252576443e-07, "loss": 1.0046, "step": 23856 }, { "epoch": 0.8646660142800189, "grad_norm": 2.3743181850588098, "learning_rate": 4.7266611893010794e-07, "loss": 0.8657, "step": 23857 }, { "epoch": 0.8647022579826755, "grad_norm": 2.4284495813084654, "learning_rate": 4.7241704766769535e-07, "loss": 0.8854, "step": 23858 }, { "epoch": 0.8647385016853322, "grad_norm": 2.0851377337433523, "learning_rate": 4.721680387926375e-07, "loss": 0.8073, "step": 23859 }, { "epoch": 0.8647747453879888, "grad_norm": 2.4462722049162156, "learning_rate": 4.7191909230836663e-07, "loss": 0.8711, "step": 23860 }, { "epoch": 0.8648109890906455, "grad_norm": 2.2944327215331666, "learning_rate": 4.716702082183111e-07, "loss": 0.9074, "step": 23861 }, { "epoch": 0.8648472327933021, "grad_norm": 2.135207597680098, "learning_rate": 4.7142138652590266e-07, "loss": 0.8568, "step": 23862 }, { "epoch": 0.8648834764959589, "grad_norm": 2.0120098422524064, "learning_rate": 4.7117262723456624e-07, "loss": 0.707, "step": 23863 }, { "epoch": 0.8649197201986155, "grad_norm": 2.2960067141340326, "learning_rate": 4.7092393034773476e-07, "loss": 0.699, "step": 23864 }, { "epoch": 0.8649559639012722, "grad_norm": 2.5457026816009045, "learning_rate": 4.70675295868831e-07, "loss": 0.8784, "step": 23865 }, { "epoch": 0.8649922076039288, "grad_norm": 2.290057079347987, "learning_rate": 4.704267238012833e-07, "loss": 0.9128, "step": 23866 }, { "epoch": 0.8650284513065855, "grad_norm": 2.263025766848789, "learning_rate": 4.701782141485145e-07, "loss": 0.9598, "step": 23867 }, { "epoch": 0.8650646950092421, "grad_norm": 2.3586708709049167, "learning_rate": 4.6992976691395186e-07, "loss": 0.9008, "step": 23868 }, { "epoch": 0.8651009387118989, "grad_norm": 2.0192142026316886, "learning_rate": 4.6968138210101656e-07, "loss": 0.827, "step": 23869 }, { "epoch": 0.8651371824145555, "grad_norm": 2.2511605066912255, "learning_rate": 4.6943305971313305e-07, "loss": 0.9008, "step": 23870 }, { "epoch": 0.8651734261172122, "grad_norm": 2.26181967324152, "learning_rate": 4.691847997537208e-07, "loss": 0.7268, "step": 23871 }, { "epoch": 0.8652096698198688, "grad_norm": 2.3061047499346934, "learning_rate": 4.689366022262015e-07, "loss": 0.9356, "step": 23872 }, { "epoch": 0.8652459135225254, "grad_norm": 2.2086668761793855, "learning_rate": 4.6868846713399695e-07, "loss": 0.7579, "step": 23873 }, { "epoch": 0.8652821572251821, "grad_norm": 2.180978564129676, "learning_rate": 4.6844039448052383e-07, "loss": 0.9102, "step": 23874 }, { "epoch": 0.8653184009278387, "grad_norm": 2.3146627471014836, "learning_rate": 4.6819238426920155e-07, "loss": 0.8849, "step": 23875 }, { "epoch": 0.8653546446304955, "grad_norm": 2.5507490994640114, "learning_rate": 4.6794443650344747e-07, "loss": 0.8816, "step": 23876 }, { "epoch": 0.8653908883331521, "grad_norm": 2.5062009353421444, "learning_rate": 4.676965511866788e-07, "loss": 0.9971, "step": 23877 }, { "epoch": 0.8654271320358088, "grad_norm": 2.62679512405682, "learning_rate": 4.6744872832231e-07, "loss": 1.0037, "step": 23878 }, { "epoch": 0.8654633757384654, "grad_norm": 2.423212510267871, "learning_rate": 4.672009679137568e-07, "loss": 0.9467, "step": 23879 }, { "epoch": 0.8654996194411221, "grad_norm": 2.8483824582145245, "learning_rate": 4.66953269964433e-07, "loss": 0.8659, "step": 23880 }, { "epoch": 0.8655358631437787, "grad_norm": 2.1203104341284704, "learning_rate": 4.6670563447775154e-07, "loss": 0.9041, "step": 23881 }, { "epoch": 0.8655721068464355, "grad_norm": 2.358625533523453, "learning_rate": 4.664580614571251e-07, "loss": 0.8018, "step": 23882 }, { "epoch": 0.8656083505490921, "grad_norm": 2.2320940590436398, "learning_rate": 4.6621055090596333e-07, "loss": 0.8946, "step": 23883 }, { "epoch": 0.8656445942517488, "grad_norm": 2.2745634201943488, "learning_rate": 4.659631028276801e-07, "loss": 0.7462, "step": 23884 }, { "epoch": 0.8656808379544054, "grad_norm": 2.4343349195038124, "learning_rate": 4.657157172256821e-07, "loss": 0.9177, "step": 23885 }, { "epoch": 0.8657170816570621, "grad_norm": 2.0928869362722984, "learning_rate": 4.6546839410337994e-07, "loss": 0.882, "step": 23886 }, { "epoch": 0.8657533253597187, "grad_norm": 2.391680109549659, "learning_rate": 4.652211334641793e-07, "loss": 0.6748, "step": 23887 }, { "epoch": 0.8657895690623754, "grad_norm": 2.350828360309917, "learning_rate": 4.649739353114907e-07, "loss": 0.8679, "step": 23888 }, { "epoch": 0.8658258127650321, "grad_norm": 2.545483467646494, "learning_rate": 4.647267996487176e-07, "loss": 0.8937, "step": 23889 }, { "epoch": 0.8658620564676888, "grad_norm": 2.319468120826993, "learning_rate": 4.6447972647926776e-07, "loss": 0.8091, "step": 23890 }, { "epoch": 0.8658983001703454, "grad_norm": 2.5850282590744817, "learning_rate": 4.6423271580654294e-07, "loss": 0.8556, "step": 23891 }, { "epoch": 0.8659345438730021, "grad_norm": 2.4359496482132488, "learning_rate": 4.639857676339482e-07, "loss": 1.118, "step": 23892 }, { "epoch": 0.8659707875756587, "grad_norm": 2.0994793740480744, "learning_rate": 4.637388819648875e-07, "loss": 0.9815, "step": 23893 }, { "epoch": 0.8660070312783154, "grad_norm": 2.8685942644699223, "learning_rate": 4.6349205880276026e-07, "loss": 0.8889, "step": 23894 }, { "epoch": 0.8660432749809721, "grad_norm": 2.2777281889701966, "learning_rate": 4.6324529815096886e-07, "loss": 0.8702, "step": 23895 }, { "epoch": 0.8660795186836288, "grad_norm": 2.5891090122251152, "learning_rate": 4.6299860001291387e-07, "loss": 0.8705, "step": 23896 }, { "epoch": 0.8661157623862854, "grad_norm": 2.4225254853775273, "learning_rate": 4.627519643919948e-07, "loss": 0.9838, "step": 23897 }, { "epoch": 0.866152006088942, "grad_norm": 2.6837895995453747, "learning_rate": 4.6250539129160887e-07, "loss": 0.9191, "step": 23898 }, { "epoch": 0.8661882497915987, "grad_norm": 2.5724640861673684, "learning_rate": 4.6225888071515455e-07, "loss": 0.7721, "step": 23899 }, { "epoch": 0.8662244934942553, "grad_norm": 2.3048685651542904, "learning_rate": 4.6201243266602913e-07, "loss": 0.8552, "step": 23900 }, { "epoch": 0.866260737196912, "grad_norm": 2.489371219105013, "learning_rate": 4.617660471476271e-07, "loss": 0.8529, "step": 23901 }, { "epoch": 0.8662969808995687, "grad_norm": 2.2479481591915094, "learning_rate": 4.6151972416334514e-07, "loss": 0.8041, "step": 23902 }, { "epoch": 0.8663332246022254, "grad_norm": 2.5011418273443478, "learning_rate": 4.61273463716575e-07, "loss": 0.7888, "step": 23903 }, { "epoch": 0.866369468304882, "grad_norm": 2.406671513248866, "learning_rate": 4.610272658107134e-07, "loss": 0.9242, "step": 23904 }, { "epoch": 0.8664057120075387, "grad_norm": 2.58563796004495, "learning_rate": 4.6078113044914996e-07, "loss": 0.9415, "step": 23905 }, { "epoch": 0.8664419557101953, "grad_norm": 2.453602386515844, "learning_rate": 4.605350576352785e-07, "loss": 0.9974, "step": 23906 }, { "epoch": 0.866478199412852, "grad_norm": 2.1561233163345084, "learning_rate": 4.602890473724864e-07, "loss": 0.7483, "step": 23907 }, { "epoch": 0.8665144431155086, "grad_norm": 2.4226046183462118, "learning_rate": 4.600430996641675e-07, "loss": 1.0061, "step": 23908 }, { "epoch": 0.8665506868181654, "grad_norm": 2.2994211264083595, "learning_rate": 4.5979721451370863e-07, "loss": 1.0002, "step": 23909 }, { "epoch": 0.866586930520822, "grad_norm": 2.6003234951912106, "learning_rate": 4.595513919244987e-07, "loss": 0.7452, "step": 23910 }, { "epoch": 0.8666231742234787, "grad_norm": 2.3170171670479327, "learning_rate": 4.5930563189992394e-07, "loss": 0.9048, "step": 23911 }, { "epoch": 0.8666594179261353, "grad_norm": 2.3212320310715264, "learning_rate": 4.590599344433716e-07, "loss": 0.8926, "step": 23912 }, { "epoch": 0.866695661628792, "grad_norm": 2.5874926043539226, "learning_rate": 4.588142995582273e-07, "loss": 1.0506, "step": 23913 }, { "epoch": 0.8667319053314486, "grad_norm": 2.3570600403241087, "learning_rate": 4.5856872724787503e-07, "loss": 0.975, "step": 23914 }, { "epoch": 0.8667681490341054, "grad_norm": 2.3330532301865348, "learning_rate": 4.583232175156993e-07, "loss": 0.9096, "step": 23915 }, { "epoch": 0.866804392736762, "grad_norm": 2.409213857473894, "learning_rate": 4.5807777036508295e-07, "loss": 0.9109, "step": 23916 }, { "epoch": 0.8668406364394187, "grad_norm": 2.35367737333534, "learning_rate": 4.578323857994088e-07, "loss": 0.8639, "step": 23917 }, { "epoch": 0.8668768801420753, "grad_norm": 2.085933221291665, "learning_rate": 4.5758706382205643e-07, "loss": 0.9399, "step": 23918 }, { "epoch": 0.866913123844732, "grad_norm": 2.3921679135829748, "learning_rate": 4.5734180443640754e-07, "loss": 0.9107, "step": 23919 }, { "epoch": 0.8669493675473886, "grad_norm": 2.691212101201168, "learning_rate": 4.570966076458416e-07, "loss": 0.8708, "step": 23920 }, { "epoch": 0.8669856112500453, "grad_norm": 2.4861698887830683, "learning_rate": 4.5685147345373603e-07, "loss": 0.9169, "step": 23921 }, { "epoch": 0.867021854952702, "grad_norm": 2.4819617124299547, "learning_rate": 4.566064018634708e-07, "loss": 0.8712, "step": 23922 }, { "epoch": 0.8670580986553587, "grad_norm": 2.3216088638759875, "learning_rate": 4.5636139287842e-07, "loss": 0.9195, "step": 23923 }, { "epoch": 0.8670943423580153, "grad_norm": 2.641316480899877, "learning_rate": 4.5611644650196243e-07, "loss": 0.8928, "step": 23924 }, { "epoch": 0.867130586060672, "grad_norm": 2.1909590293079564, "learning_rate": 4.558715627374716e-07, "loss": 0.8427, "step": 23925 }, { "epoch": 0.8671668297633286, "grad_norm": 2.3070101861478327, "learning_rate": 4.5562674158832367e-07, "loss": 0.7716, "step": 23926 }, { "epoch": 0.8672030734659852, "grad_norm": 2.245049081578127, "learning_rate": 4.553819830578898e-07, "loss": 0.8343, "step": 23927 }, { "epoch": 0.867239317168642, "grad_norm": 2.178239360258683, "learning_rate": 4.5513728714954355e-07, "loss": 0.9475, "step": 23928 }, { "epoch": 0.8672755608712986, "grad_norm": 2.363123580218539, "learning_rate": 4.548926538666576e-07, "loss": 0.6886, "step": 23929 }, { "epoch": 0.8673118045739553, "grad_norm": 2.4672591470336718, "learning_rate": 4.5464808321260156e-07, "loss": 0.9171, "step": 23930 }, { "epoch": 0.8673480482766119, "grad_norm": 2.2628449025328243, "learning_rate": 4.544035751907455e-07, "loss": 0.9422, "step": 23931 }, { "epoch": 0.8673842919792686, "grad_norm": 2.3672275392313975, "learning_rate": 4.5415912980445954e-07, "loss": 0.8442, "step": 23932 }, { "epoch": 0.8674205356819252, "grad_norm": 2.5683603908103327, "learning_rate": 4.539147470571126e-07, "loss": 0.9832, "step": 23933 }, { "epoch": 0.8674567793845819, "grad_norm": 2.4988347505068997, "learning_rate": 4.5367042695206865e-07, "loss": 0.9624, "step": 23934 }, { "epoch": 0.8674930230872386, "grad_norm": 2.197263447169871, "learning_rate": 4.534261694926989e-07, "loss": 0.9014, "step": 23935 }, { "epoch": 0.8675292667898953, "grad_norm": 2.5434298475134254, "learning_rate": 4.5318197468236567e-07, "loss": 0.8406, "step": 23936 }, { "epoch": 0.8675655104925519, "grad_norm": 2.550283778253167, "learning_rate": 4.529378425244363e-07, "loss": 0.9904, "step": 23937 }, { "epoch": 0.8676017541952086, "grad_norm": 2.5584356549324623, "learning_rate": 4.5269377302227245e-07, "loss": 1.0912, "step": 23938 }, { "epoch": 0.8676379978978652, "grad_norm": 2.3748514798426856, "learning_rate": 4.524497661792382e-07, "loss": 0.7933, "step": 23939 }, { "epoch": 0.8676742416005219, "grad_norm": 2.5158759571774256, "learning_rate": 4.5220582199869635e-07, "loss": 0.8371, "step": 23940 }, { "epoch": 0.8677104853031786, "grad_norm": 2.0491332880234068, "learning_rate": 4.519619404840075e-07, "loss": 0.7993, "step": 23941 }, { "epoch": 0.8677467290058353, "grad_norm": 2.492604674149336, "learning_rate": 4.5171812163853244e-07, "loss": 0.9338, "step": 23942 }, { "epoch": 0.8677829727084919, "grad_norm": 2.404991531881203, "learning_rate": 4.514743654656312e-07, "loss": 0.9045, "step": 23943 }, { "epoch": 0.8678192164111486, "grad_norm": 2.4664662497862886, "learning_rate": 4.5123067196866264e-07, "loss": 0.7464, "step": 23944 }, { "epoch": 0.8678554601138052, "grad_norm": 2.2110230438886442, "learning_rate": 4.5098704115098426e-07, "loss": 0.8576, "step": 23945 }, { "epoch": 0.8678917038164619, "grad_norm": 2.3199855792652357, "learning_rate": 4.5074347301595267e-07, "loss": 0.8693, "step": 23946 }, { "epoch": 0.8679279475191185, "grad_norm": 2.6989486666801414, "learning_rate": 4.504999675669258e-07, "loss": 0.8894, "step": 23947 }, { "epoch": 0.8679641912217753, "grad_norm": 2.222480894291884, "learning_rate": 4.5025652480725713e-07, "loss": 0.865, "step": 23948 }, { "epoch": 0.8680004349244319, "grad_norm": 2.517734669608076, "learning_rate": 4.500131447403028e-07, "loss": 0.863, "step": 23949 }, { "epoch": 0.8680366786270886, "grad_norm": 2.569882434515411, "learning_rate": 4.49769827369414e-07, "loss": 0.9253, "step": 23950 }, { "epoch": 0.8680729223297452, "grad_norm": 2.259948034978307, "learning_rate": 4.495265726979464e-07, "loss": 0.8455, "step": 23951 }, { "epoch": 0.8681091660324018, "grad_norm": 2.422729401696986, "learning_rate": 4.4928338072925015e-07, "loss": 0.9173, "step": 23952 }, { "epoch": 0.8681454097350585, "grad_norm": 2.369118576098491, "learning_rate": 4.4904025146667753e-07, "loss": 0.8893, "step": 23953 }, { "epoch": 0.8681816534377153, "grad_norm": 2.261234195556091, "learning_rate": 4.487971849135758e-07, "loss": 0.7306, "step": 23954 }, { "epoch": 0.8682178971403719, "grad_norm": 2.362571173437951, "learning_rate": 4.4855418107329853e-07, "loss": 0.7792, "step": 23955 }, { "epoch": 0.8682541408430285, "grad_norm": 2.2787258606844563, "learning_rate": 4.483112399491912e-07, "loss": 0.8915, "step": 23956 }, { "epoch": 0.8682903845456852, "grad_norm": 2.4584594991016773, "learning_rate": 4.4806836154460296e-07, "loss": 0.9852, "step": 23957 }, { "epoch": 0.8683266282483418, "grad_norm": 2.589393185288741, "learning_rate": 4.4782554586287886e-07, "loss": 0.9795, "step": 23958 }, { "epoch": 0.8683628719509985, "grad_norm": 2.474643906472245, "learning_rate": 4.4758279290736563e-07, "loss": 0.7852, "step": 23959 }, { "epoch": 0.8683991156536551, "grad_norm": 2.7155796032519253, "learning_rate": 4.473401026814089e-07, "loss": 0.9708, "step": 23960 }, { "epoch": 0.8684353593563119, "grad_norm": 2.230134576210552, "learning_rate": 4.4709747518835167e-07, "loss": 0.8911, "step": 23961 }, { "epoch": 0.8684716030589685, "grad_norm": 2.194680713006252, "learning_rate": 4.4685491043153785e-07, "loss": 0.9461, "step": 23962 }, { "epoch": 0.8685078467616252, "grad_norm": 2.439882138780517, "learning_rate": 4.4661240841430917e-07, "loss": 0.9219, "step": 23963 }, { "epoch": 0.8685440904642818, "grad_norm": 2.607011006814174, "learning_rate": 4.463699691400092e-07, "loss": 0.9738, "step": 23964 }, { "epoch": 0.8685803341669385, "grad_norm": 2.3959586124525565, "learning_rate": 4.4612759261197626e-07, "loss": 0.7701, "step": 23965 }, { "epoch": 0.8686165778695951, "grad_norm": 2.3853057908468425, "learning_rate": 4.4588527883355046e-07, "loss": 0.8406, "step": 23966 }, { "epoch": 0.8686528215722519, "grad_norm": 2.2046347235070414, "learning_rate": 4.4564302780807254e-07, "loss": 0.8642, "step": 23967 }, { "epoch": 0.8686890652749085, "grad_norm": 2.346698940032401, "learning_rate": 4.4540083953887816e-07, "loss": 0.7823, "step": 23968 }, { "epoch": 0.8687253089775652, "grad_norm": 2.3675856981541017, "learning_rate": 4.4515871402930675e-07, "loss": 0.916, "step": 23969 }, { "epoch": 0.8687615526802218, "grad_norm": 2.4373459210929864, "learning_rate": 4.449166512826919e-07, "loss": 0.8046, "step": 23970 }, { "epoch": 0.8687977963828785, "grad_norm": 2.4153570764597667, "learning_rate": 4.446746513023725e-07, "loss": 0.9235, "step": 23971 }, { "epoch": 0.8688340400855351, "grad_norm": 2.5482212192801583, "learning_rate": 4.444327140916804e-07, "loss": 0.9619, "step": 23972 }, { "epoch": 0.8688702837881918, "grad_norm": 2.2313570139623975, "learning_rate": 4.441908396539513e-07, "loss": 0.8338, "step": 23973 }, { "epoch": 0.8689065274908485, "grad_norm": 2.345323644045819, "learning_rate": 4.4394902799251516e-07, "loss": 0.8246, "step": 23974 }, { "epoch": 0.8689427711935052, "grad_norm": 2.346561366266101, "learning_rate": 4.4370727911070834e-07, "loss": 0.7253, "step": 23975 }, { "epoch": 0.8689790148961618, "grad_norm": 2.323503591371531, "learning_rate": 4.434655930118581e-07, "loss": 0.878, "step": 23976 }, { "epoch": 0.8690152585988185, "grad_norm": 2.175879527806312, "learning_rate": 4.432239696992974e-07, "loss": 0.6419, "step": 23977 }, { "epoch": 0.8690515023014751, "grad_norm": 2.7567875944398135, "learning_rate": 4.42982409176354e-07, "loss": 0.8191, "step": 23978 }, { "epoch": 0.8690877460041317, "grad_norm": 2.403368520446927, "learning_rate": 4.4274091144635653e-07, "loss": 0.897, "step": 23979 }, { "epoch": 0.8691239897067884, "grad_norm": 2.368582754045505, "learning_rate": 4.4249947651263445e-07, "loss": 0.8278, "step": 23980 }, { "epoch": 0.8691602334094451, "grad_norm": 2.1590927969777485, "learning_rate": 4.4225810437851234e-07, "loss": 0.7251, "step": 23981 }, { "epoch": 0.8691964771121018, "grad_norm": 2.0345038631206247, "learning_rate": 4.4201679504731696e-07, "loss": 0.833, "step": 23982 }, { "epoch": 0.8692327208147584, "grad_norm": 2.087689278802048, "learning_rate": 4.4177554852237403e-07, "loss": 0.8052, "step": 23983 }, { "epoch": 0.8692689645174151, "grad_norm": 2.4579078630187166, "learning_rate": 4.4153436480700753e-07, "loss": 0.7355, "step": 23984 }, { "epoch": 0.8693052082200717, "grad_norm": 2.433667319663084, "learning_rate": 4.412932439045403e-07, "loss": 0.9588, "step": 23985 }, { "epoch": 0.8693414519227284, "grad_norm": 2.6950035605032907, "learning_rate": 4.410521858182948e-07, "loss": 0.9277, "step": 23986 }, { "epoch": 0.8693776956253851, "grad_norm": 2.2825836491167038, "learning_rate": 4.408111905515938e-07, "loss": 0.9114, "step": 23987 }, { "epoch": 0.8694139393280418, "grad_norm": 3.4561376115432845, "learning_rate": 4.405702581077564e-07, "loss": 0.9818, "step": 23988 }, { "epoch": 0.8694501830306984, "grad_norm": 2.466370524328721, "learning_rate": 4.403293884901044e-07, "loss": 0.8678, "step": 23989 }, { "epoch": 0.8694864267333551, "grad_norm": 2.490668211264791, "learning_rate": 4.400885817019546e-07, "loss": 0.9607, "step": 23990 }, { "epoch": 0.8695226704360117, "grad_norm": 2.698899838688806, "learning_rate": 4.398478377466275e-07, "loss": 0.8658, "step": 23991 }, { "epoch": 0.8695589141386684, "grad_norm": 2.517092802586739, "learning_rate": 4.3960715662743845e-07, "loss": 0.8004, "step": 23992 }, { "epoch": 0.869595157841325, "grad_norm": 2.3748909584865507, "learning_rate": 4.393665383477058e-07, "loss": 0.8375, "step": 23993 }, { "epoch": 0.8696314015439818, "grad_norm": 2.2587508105017284, "learning_rate": 4.391259829107425e-07, "loss": 0.9939, "step": 23994 }, { "epoch": 0.8696676452466384, "grad_norm": 2.552365488222776, "learning_rate": 4.3888549031986636e-07, "loss": 0.9812, "step": 23995 }, { "epoch": 0.8697038889492951, "grad_norm": 2.3663105817655206, "learning_rate": 4.386450605783887e-07, "loss": 0.9769, "step": 23996 }, { "epoch": 0.8697401326519517, "grad_norm": 2.37681649879721, "learning_rate": 4.3840469368962414e-07, "loss": 0.7684, "step": 23997 }, { "epoch": 0.8697763763546084, "grad_norm": 2.4027548052522825, "learning_rate": 4.381643896568838e-07, "loss": 0.8769, "step": 23998 }, { "epoch": 0.869812620057265, "grad_norm": 2.3660147472561786, "learning_rate": 4.3792414848347895e-07, "loss": 0.9445, "step": 23999 }, { "epoch": 0.8698488637599218, "grad_norm": 2.3910571464435897, "learning_rate": 4.3768397017272144e-07, "loss": 0.777, "step": 24000 }, { "epoch": 0.8698851074625784, "grad_norm": 2.5079449216410308, "learning_rate": 4.374438547279186e-07, "loss": 0.9585, "step": 24001 }, { "epoch": 0.8699213511652351, "grad_norm": 2.4175815040767246, "learning_rate": 4.3720380215238e-07, "loss": 0.7979, "step": 24002 }, { "epoch": 0.8699575948678917, "grad_norm": 2.1910327220748194, "learning_rate": 4.3696381244941353e-07, "loss": 0.9265, "step": 24003 }, { "epoch": 0.8699938385705484, "grad_norm": 2.480920594966816, "learning_rate": 4.3672388562232714e-07, "loss": 0.7897, "step": 24004 }, { "epoch": 0.870030082273205, "grad_norm": 2.2642209848211268, "learning_rate": 4.364840216744248e-07, "loss": 0.813, "step": 24005 }, { "epoch": 0.8700663259758616, "grad_norm": 2.709964445118315, "learning_rate": 4.3624422060901283e-07, "loss": 0.9411, "step": 24006 }, { "epoch": 0.8701025696785184, "grad_norm": 2.32552531919069, "learning_rate": 4.360044824293957e-07, "loss": 0.8856, "step": 24007 }, { "epoch": 0.870138813381175, "grad_norm": 2.2625847440549594, "learning_rate": 4.357648071388765e-07, "loss": 0.8837, "step": 24008 }, { "epoch": 0.8701750570838317, "grad_norm": 2.6488643264540777, "learning_rate": 4.355251947407585e-07, "loss": 0.869, "step": 24009 }, { "epoch": 0.8702113007864883, "grad_norm": 2.3598192301061043, "learning_rate": 4.3528564523834084e-07, "loss": 0.9186, "step": 24010 }, { "epoch": 0.870247544489145, "grad_norm": 2.4309683689373176, "learning_rate": 4.350461586349286e-07, "loss": 1.0004, "step": 24011 }, { "epoch": 0.8702837881918016, "grad_norm": 2.3715073796968085, "learning_rate": 4.3480673493381806e-07, "loss": 0.939, "step": 24012 }, { "epoch": 0.8703200318944584, "grad_norm": 2.092314729412826, "learning_rate": 4.3456737413830995e-07, "loss": 0.8807, "step": 24013 }, { "epoch": 0.870356275597115, "grad_norm": 2.048884549573572, "learning_rate": 4.3432807625170326e-07, "loss": 0.8421, "step": 24014 }, { "epoch": 0.8703925192997717, "grad_norm": 2.162267144238185, "learning_rate": 4.340888412772937e-07, "loss": 0.9136, "step": 24015 }, { "epoch": 0.8704287630024283, "grad_norm": 2.2628202321947954, "learning_rate": 4.3384966921837965e-07, "loss": 0.9923, "step": 24016 }, { "epoch": 0.870465006705085, "grad_norm": 2.2481151886507034, "learning_rate": 4.3361056007825366e-07, "loss": 0.7864, "step": 24017 }, { "epoch": 0.8705012504077416, "grad_norm": 2.1765150914748728, "learning_rate": 4.3337151386021406e-07, "loss": 0.9101, "step": 24018 }, { "epoch": 0.8705374941103983, "grad_norm": 2.453832220819569, "learning_rate": 4.3313253056755213e-07, "loss": 1.0111, "step": 24019 }, { "epoch": 0.870573737813055, "grad_norm": 2.359684365889092, "learning_rate": 4.32893610203563e-07, "loss": 0.9134, "step": 24020 }, { "epoch": 0.8706099815157117, "grad_norm": 2.4414604161817812, "learning_rate": 4.3265475277153635e-07, "loss": 0.9062, "step": 24021 }, { "epoch": 0.8706462252183683, "grad_norm": 2.443499112533795, "learning_rate": 4.324159582747667e-07, "loss": 0.7677, "step": 24022 }, { "epoch": 0.870682468921025, "grad_norm": 2.241901496383675, "learning_rate": 4.3217722671654137e-07, "loss": 0.9335, "step": 24023 }, { "epoch": 0.8707187126236816, "grad_norm": 2.2619426240432214, "learning_rate": 4.3193855810015286e-07, "loss": 0.7595, "step": 24024 }, { "epoch": 0.8707549563263383, "grad_norm": 2.5124807294789346, "learning_rate": 4.316999524288873e-07, "loss": 0.8282, "step": 24025 }, { "epoch": 0.870791200028995, "grad_norm": 2.3614448448599177, "learning_rate": 4.3146140970603325e-07, "loss": 0.5612, "step": 24026 }, { "epoch": 0.8708274437316517, "grad_norm": 2.084493375072217, "learning_rate": 4.312229299348791e-07, "loss": 0.828, "step": 24027 }, { "epoch": 0.8708636874343083, "grad_norm": 2.434645586734507, "learning_rate": 4.3098451311870904e-07, "loss": 0.7999, "step": 24028 }, { "epoch": 0.870899931136965, "grad_norm": 2.4828221383473865, "learning_rate": 4.307461592608092e-07, "loss": 0.8908, "step": 24029 }, { "epoch": 0.8709361748396216, "grad_norm": 2.3509909502004636, "learning_rate": 4.3050786836446367e-07, "loss": 0.873, "step": 24030 }, { "epoch": 0.8709724185422782, "grad_norm": 2.6184308696328427, "learning_rate": 4.3026964043295704e-07, "loss": 0.923, "step": 24031 }, { "epoch": 0.8710086622449349, "grad_norm": 2.2099899984254545, "learning_rate": 4.300314754695706e-07, "loss": 1.053, "step": 24032 }, { "epoch": 0.8710449059475917, "grad_norm": 2.3209765739383257, "learning_rate": 4.2979337347758607e-07, "loss": 0.8802, "step": 24033 }, { "epoch": 0.8710811496502483, "grad_norm": 2.3770400949123482, "learning_rate": 4.2955533446028597e-07, "loss": 0.7631, "step": 24034 }, { "epoch": 0.871117393352905, "grad_norm": 2.7382057273857496, "learning_rate": 4.2931735842094814e-07, "loss": 0.971, "step": 24035 }, { "epoch": 0.8711536370555616, "grad_norm": 2.197675487276423, "learning_rate": 4.290794453628544e-07, "loss": 1.0806, "step": 24036 }, { "epoch": 0.8711898807582182, "grad_norm": 2.324565727944959, "learning_rate": 4.2884159528927936e-07, "loss": 0.7963, "step": 24037 }, { "epoch": 0.8712261244608749, "grad_norm": 2.810252792377439, "learning_rate": 4.2860380820350433e-07, "loss": 0.8237, "step": 24038 }, { "epoch": 0.8712623681635315, "grad_norm": 2.2861545980958664, "learning_rate": 4.2836608410880276e-07, "loss": 0.6815, "step": 24039 }, { "epoch": 0.8712986118661883, "grad_norm": 2.563248343395592, "learning_rate": 4.2812842300845313e-07, "loss": 0.7702, "step": 24040 }, { "epoch": 0.8713348555688449, "grad_norm": 2.1412956312026417, "learning_rate": 4.2789082490572733e-07, "loss": 0.9351, "step": 24041 }, { "epoch": 0.8713710992715016, "grad_norm": 2.476473892089016, "learning_rate": 4.276532898039021e-07, "loss": 0.9242, "step": 24042 }, { "epoch": 0.8714073429741582, "grad_norm": 2.2069428476849526, "learning_rate": 4.2741581770624873e-07, "loss": 0.9129, "step": 24043 }, { "epoch": 0.8714435866768149, "grad_norm": 2.220682114391224, "learning_rate": 4.2717840861604075e-07, "loss": 1.1393, "step": 24044 }, { "epoch": 0.8714798303794715, "grad_norm": 2.4460597402327267, "learning_rate": 4.2694106253654775e-07, "loss": 0.8513, "step": 24045 }, { "epoch": 0.8715160740821283, "grad_norm": 2.4567826034950575, "learning_rate": 4.2670377947104103e-07, "loss": 0.8487, "step": 24046 }, { "epoch": 0.8715523177847849, "grad_norm": 2.2020761770652735, "learning_rate": 4.2646655942279183e-07, "loss": 0.8336, "step": 24047 }, { "epoch": 0.8715885614874416, "grad_norm": 2.351153789150388, "learning_rate": 4.2622940239506636e-07, "loss": 0.803, "step": 24048 }, { "epoch": 0.8716248051900982, "grad_norm": 2.439309283448983, "learning_rate": 4.2599230839113327e-07, "loss": 0.7808, "step": 24049 }, { "epoch": 0.8716610488927549, "grad_norm": 2.4346832439794848, "learning_rate": 4.2575527741426035e-07, "loss": 1.0066, "step": 24050 }, { "epoch": 0.8716972925954115, "grad_norm": 2.5908652310845377, "learning_rate": 4.25518309467714e-07, "loss": 0.7536, "step": 24051 }, { "epoch": 0.8717335362980682, "grad_norm": 2.3203919538519564, "learning_rate": 4.2528140455475763e-07, "loss": 0.8231, "step": 24052 }, { "epoch": 0.8717697800007249, "grad_norm": 2.4684942981581544, "learning_rate": 4.25044562678657e-07, "loss": 0.8855, "step": 24053 }, { "epoch": 0.8718060237033816, "grad_norm": 2.619451453474916, "learning_rate": 4.248077838426767e-07, "loss": 0.9398, "step": 24054 }, { "epoch": 0.8718422674060382, "grad_norm": 2.5276788851683643, "learning_rate": 4.2457106805007696e-07, "loss": 0.9126, "step": 24055 }, { "epoch": 0.8718785111086949, "grad_norm": 2.471714154744998, "learning_rate": 4.243344153041218e-07, "loss": 0.8583, "step": 24056 }, { "epoch": 0.8719147548113515, "grad_norm": 2.584054615211184, "learning_rate": 4.240978256080691e-07, "loss": 1.0542, "step": 24057 }, { "epoch": 0.8719509985140081, "grad_norm": 2.1536001277381205, "learning_rate": 4.238612989651825e-07, "loss": 0.7815, "step": 24058 }, { "epoch": 0.8719872422166649, "grad_norm": 2.547659672211685, "learning_rate": 4.236248353787192e-07, "loss": 0.8999, "step": 24059 }, { "epoch": 0.8720234859193216, "grad_norm": 2.187448349687528, "learning_rate": 4.233884348519385e-07, "loss": 0.8801, "step": 24060 }, { "epoch": 0.8720597296219782, "grad_norm": 2.4059806264358814, "learning_rate": 4.2315209738809595e-07, "loss": 0.8326, "step": 24061 }, { "epoch": 0.8720959733246348, "grad_norm": 2.389778399446369, "learning_rate": 4.229158229904512e-07, "loss": 0.9562, "step": 24062 }, { "epoch": 0.8721322170272915, "grad_norm": 1.9997690340924594, "learning_rate": 4.2267961166225725e-07, "loss": 0.8096, "step": 24063 }, { "epoch": 0.8721684607299481, "grad_norm": 2.6106594333230064, "learning_rate": 4.224434634067709e-07, "loss": 0.8401, "step": 24064 }, { "epoch": 0.8722047044326048, "grad_norm": 2.5376430367105, "learning_rate": 4.222073782272446e-07, "loss": 0.9359, "step": 24065 }, { "epoch": 0.8722409481352615, "grad_norm": 2.0642316235553118, "learning_rate": 4.2197135612693176e-07, "loss": 0.891, "step": 24066 }, { "epoch": 0.8722771918379182, "grad_norm": 2.2481176580194435, "learning_rate": 4.217353971090854e-07, "loss": 0.9673, "step": 24067 }, { "epoch": 0.8723134355405748, "grad_norm": 2.3444784177288858, "learning_rate": 4.2149950117695625e-07, "loss": 1.0333, "step": 24068 }, { "epoch": 0.8723496792432315, "grad_norm": 2.370107393119226, "learning_rate": 4.2126366833379453e-07, "loss": 1.0728, "step": 24069 }, { "epoch": 0.8723859229458881, "grad_norm": 2.5683155041064616, "learning_rate": 4.210278985828509e-07, "loss": 0.8842, "step": 24070 }, { "epoch": 0.8724221666485448, "grad_norm": 2.2788485701689267, "learning_rate": 4.2079219192737387e-07, "loss": 0.8362, "step": 24071 }, { "epoch": 0.8724584103512015, "grad_norm": 2.0442045755196094, "learning_rate": 4.2055654837061035e-07, "loss": 0.6621, "step": 24072 }, { "epoch": 0.8724946540538582, "grad_norm": 2.2499759048946566, "learning_rate": 4.203209679158082e-07, "loss": 0.7277, "step": 24073 }, { "epoch": 0.8725308977565148, "grad_norm": 2.2675978553124905, "learning_rate": 4.2008545056621385e-07, "loss": 0.7593, "step": 24074 }, { "epoch": 0.8725671414591715, "grad_norm": 2.7111584924662515, "learning_rate": 4.1984999632507185e-07, "loss": 0.8666, "step": 24075 }, { "epoch": 0.8726033851618281, "grad_norm": 2.110226360044645, "learning_rate": 4.1961460519562735e-07, "loss": 0.6843, "step": 24076 }, { "epoch": 0.8726396288644848, "grad_norm": 2.567051330074142, "learning_rate": 4.193792771811217e-07, "loss": 0.9957, "step": 24077 }, { "epoch": 0.8726758725671414, "grad_norm": 2.411209790731752, "learning_rate": 4.191440122848012e-07, "loss": 0.995, "step": 24078 }, { "epoch": 0.8727121162697982, "grad_norm": 2.1998164772277575, "learning_rate": 4.1890881050990484e-07, "loss": 0.9452, "step": 24079 }, { "epoch": 0.8727483599724548, "grad_norm": 2.3942699201529916, "learning_rate": 4.1867367185967566e-07, "loss": 0.7819, "step": 24080 }, { "epoch": 0.8727846036751115, "grad_norm": 2.163944242883407, "learning_rate": 4.1843859633735053e-07, "loss": 0.5929, "step": 24081 }, { "epoch": 0.8728208473777681, "grad_norm": 2.1770018489090015, "learning_rate": 4.182035839461729e-07, "loss": 0.62, "step": 24082 }, { "epoch": 0.8728570910804248, "grad_norm": 2.5708240778891693, "learning_rate": 4.179686346893774e-07, "loss": 0.7828, "step": 24083 }, { "epoch": 0.8728933347830814, "grad_norm": 2.176634952930897, "learning_rate": 4.1773374857020433e-07, "loss": 0.6099, "step": 24084 }, { "epoch": 0.8729295784857382, "grad_norm": 2.3655293674701663, "learning_rate": 4.1749892559188766e-07, "loss": 1.0558, "step": 24085 }, { "epoch": 0.8729658221883948, "grad_norm": 2.497956991829672, "learning_rate": 4.172641657576648e-07, "loss": 0.8223, "step": 24086 }, { "epoch": 0.8730020658910514, "grad_norm": 2.3066709381992565, "learning_rate": 4.1702946907077103e-07, "loss": 0.944, "step": 24087 }, { "epoch": 0.8730383095937081, "grad_norm": 2.315315106067622, "learning_rate": 4.167948355344381e-07, "loss": 0.8614, "step": 24088 }, { "epoch": 0.8730745532963647, "grad_norm": 2.18321353601846, "learning_rate": 4.165602651519013e-07, "loss": 0.8145, "step": 24089 }, { "epoch": 0.8731107969990214, "grad_norm": 2.8727232405397696, "learning_rate": 4.163257579263913e-07, "loss": 0.7954, "step": 24090 }, { "epoch": 0.873147040701678, "grad_norm": 2.4855608179644433, "learning_rate": 4.1609131386114167e-07, "loss": 0.8924, "step": 24091 }, { "epoch": 0.8731832844043348, "grad_norm": 2.479769720671973, "learning_rate": 4.1585693295938033e-07, "loss": 0.8466, "step": 24092 }, { "epoch": 0.8732195281069914, "grad_norm": 2.103167382976947, "learning_rate": 4.1562261522433813e-07, "loss": 0.9265, "step": 24093 }, { "epoch": 0.8732557718096481, "grad_norm": 2.572459079929148, "learning_rate": 4.153883606592446e-07, "loss": 0.9905, "step": 24094 }, { "epoch": 0.8732920155123047, "grad_norm": 2.71491143207162, "learning_rate": 4.151541692673261e-07, "loss": 0.9642, "step": 24095 }, { "epoch": 0.8733282592149614, "grad_norm": 2.399432131541178, "learning_rate": 4.1492004105181004e-07, "loss": 1.0414, "step": 24096 }, { "epoch": 0.873364502917618, "grad_norm": 2.364897683709974, "learning_rate": 4.1468597601592277e-07, "loss": 0.9067, "step": 24097 }, { "epoch": 0.8734007466202748, "grad_norm": 2.392061971027998, "learning_rate": 4.144519741628911e-07, "loss": 0.8802, "step": 24098 }, { "epoch": 0.8734369903229314, "grad_norm": 2.2659180878174885, "learning_rate": 4.142180354959363e-07, "loss": 0.9651, "step": 24099 }, { "epoch": 0.8734732340255881, "grad_norm": 2.3657561490589534, "learning_rate": 4.139841600182842e-07, "loss": 0.8481, "step": 24100 }, { "epoch": 0.8735094777282447, "grad_norm": 2.1848436936373696, "learning_rate": 4.1375034773315725e-07, "loss": 0.9458, "step": 24101 }, { "epoch": 0.8735457214309014, "grad_norm": 2.2366539595894572, "learning_rate": 4.135165986437761e-07, "loss": 0.8276, "step": 24102 }, { "epoch": 0.873581965133558, "grad_norm": 2.2573072097259463, "learning_rate": 4.1328291275336273e-07, "loss": 1.0747, "step": 24103 }, { "epoch": 0.8736182088362147, "grad_norm": 2.2412737411986186, "learning_rate": 4.130492900651367e-07, "loss": 0.7534, "step": 24104 }, { "epoch": 0.8736544525388714, "grad_norm": 2.5906047062948963, "learning_rate": 4.128157305823183e-07, "loss": 0.9383, "step": 24105 }, { "epoch": 0.8736906962415281, "grad_norm": 2.253190848151043, "learning_rate": 4.125822343081237e-07, "loss": 0.7803, "step": 24106 }, { "epoch": 0.8737269399441847, "grad_norm": 2.459631473285569, "learning_rate": 4.123488012457727e-07, "loss": 0.7962, "step": 24107 }, { "epoch": 0.8737631836468414, "grad_norm": 2.4156979389320417, "learning_rate": 4.1211543139847877e-07, "loss": 0.8956, "step": 24108 }, { "epoch": 0.873799427349498, "grad_norm": 2.687875433124126, "learning_rate": 4.1188212476946155e-07, "loss": 0.9317, "step": 24109 }, { "epoch": 0.8738356710521546, "grad_norm": 2.4387607839871426, "learning_rate": 4.116488813619324e-07, "loss": 1.1772, "step": 24110 }, { "epoch": 0.8738719147548113, "grad_norm": 2.4873740185380218, "learning_rate": 4.114157011791081e-07, "loss": 0.89, "step": 24111 }, { "epoch": 0.873908158457468, "grad_norm": 2.0995245516455467, "learning_rate": 4.111825842241995e-07, "loss": 0.93, "step": 24112 }, { "epoch": 0.8739444021601247, "grad_norm": 2.460326371047269, "learning_rate": 4.1094953050041907e-07, "loss": 0.9688, "step": 24113 }, { "epoch": 0.8739806458627813, "grad_norm": 2.640014884762319, "learning_rate": 4.1071654001098026e-07, "loss": 0.8677, "step": 24114 }, { "epoch": 0.874016889565438, "grad_norm": 2.4221207087079106, "learning_rate": 4.104836127590911e-07, "loss": 0.9013, "step": 24115 }, { "epoch": 0.8740531332680946, "grad_norm": 2.4824975105128613, "learning_rate": 4.102507487479618e-07, "loss": 0.898, "step": 24116 }, { "epoch": 0.8740893769707513, "grad_norm": 2.4878563450173856, "learning_rate": 4.100179479808014e-07, "loss": 0.9223, "step": 24117 }, { "epoch": 0.874125620673408, "grad_norm": 2.548324316278896, "learning_rate": 4.0978521046081854e-07, "loss": 0.8953, "step": 24118 }, { "epoch": 0.8741618643760647, "grad_norm": 2.406443411420067, "learning_rate": 4.095525361912189e-07, "loss": 0.8684, "step": 24119 }, { "epoch": 0.8741981080787213, "grad_norm": 2.1624614208903528, "learning_rate": 4.0931992517520893e-07, "loss": 0.9518, "step": 24120 }, { "epoch": 0.874234351781378, "grad_norm": 1.9541771029879775, "learning_rate": 4.090873774159948e-07, "loss": 0.7616, "step": 24121 }, { "epoch": 0.8742705954840346, "grad_norm": 2.5256828207230124, "learning_rate": 4.088548929167796e-07, "loss": 0.914, "step": 24122 }, { "epoch": 0.8743068391866913, "grad_norm": 2.345406175215391, "learning_rate": 4.08622471680768e-07, "loss": 0.9065, "step": 24123 }, { "epoch": 0.8743430828893479, "grad_norm": 2.43639042149115, "learning_rate": 4.0839011371116013e-07, "loss": 0.9976, "step": 24124 }, { "epoch": 0.8743793265920047, "grad_norm": 2.330792240203339, "learning_rate": 4.0815781901116125e-07, "loss": 0.929, "step": 24125 }, { "epoch": 0.8744155702946613, "grad_norm": 2.6107574931385975, "learning_rate": 4.0792558758396993e-07, "loss": 0.7997, "step": 24126 }, { "epoch": 0.874451813997318, "grad_norm": 2.309786471910233, "learning_rate": 4.076934194327875e-07, "loss": 0.8184, "step": 24127 }, { "epoch": 0.8744880576999746, "grad_norm": 2.608358593036869, "learning_rate": 4.0746131456081085e-07, "loss": 0.895, "step": 24128 }, { "epoch": 0.8745243014026313, "grad_norm": 2.3122220699909026, "learning_rate": 4.0722927297124126e-07, "loss": 1.0072, "step": 24129 }, { "epoch": 0.8745605451052879, "grad_norm": 2.3775707804042376, "learning_rate": 4.06997294667274e-07, "loss": 0.9677, "step": 24130 }, { "epoch": 0.8745967888079447, "grad_norm": 2.097903757930675, "learning_rate": 4.0676537965210706e-07, "loss": 0.8331, "step": 24131 }, { "epoch": 0.8746330325106013, "grad_norm": 2.1229911246479634, "learning_rate": 4.0653352792893453e-07, "loss": 0.9038, "step": 24132 }, { "epoch": 0.874669276213258, "grad_norm": 2.362652382720429, "learning_rate": 4.063017395009522e-07, "loss": 0.6794, "step": 24133 }, { "epoch": 0.8747055199159146, "grad_norm": 2.648535257664506, "learning_rate": 4.060700143713542e-07, "loss": 0.9464, "step": 24134 }, { "epoch": 0.8747417636185713, "grad_norm": 2.2754658487866197, "learning_rate": 4.0583835254333235e-07, "loss": 0.8631, "step": 24135 }, { "epoch": 0.8747780073212279, "grad_norm": 2.257943347388047, "learning_rate": 4.056067540200792e-07, "loss": 0.9355, "step": 24136 }, { "epoch": 0.8748142510238845, "grad_norm": 2.3427074006494615, "learning_rate": 4.053752188047866e-07, "loss": 0.9395, "step": 24137 }, { "epoch": 0.8748504947265413, "grad_norm": 2.22104678957916, "learning_rate": 4.051437469006458e-07, "loss": 0.8985, "step": 24138 }, { "epoch": 0.874886738429198, "grad_norm": 2.0851326659443243, "learning_rate": 4.049123383108444e-07, "loss": 0.8674, "step": 24139 }, { "epoch": 0.8749229821318546, "grad_norm": 2.255145135321393, "learning_rate": 4.04680993038572e-07, "loss": 0.7397, "step": 24140 }, { "epoch": 0.8749592258345112, "grad_norm": 2.400678592793964, "learning_rate": 4.0444971108701713e-07, "loss": 0.9433, "step": 24141 }, { "epoch": 0.8749954695371679, "grad_norm": 2.430047526724241, "learning_rate": 4.0421849245936506e-07, "loss": 0.8918, "step": 24142 }, { "epoch": 0.8750317132398245, "grad_norm": 2.379425511775649, "learning_rate": 4.0398733715880325e-07, "loss": 0.7115, "step": 24143 }, { "epoch": 0.8750679569424813, "grad_norm": 2.502104125672229, "learning_rate": 4.0375624518851517e-07, "loss": 0.7957, "step": 24144 }, { "epoch": 0.8751042006451379, "grad_norm": 2.4253771757845186, "learning_rate": 4.035252165516879e-07, "loss": 0.7817, "step": 24145 }, { "epoch": 0.8751404443477946, "grad_norm": 2.3912125921283662, "learning_rate": 4.03294251251502e-07, "loss": 0.7637, "step": 24146 }, { "epoch": 0.8751766880504512, "grad_norm": 2.383142926963914, "learning_rate": 4.030633492911423e-07, "loss": 0.8747, "step": 24147 }, { "epoch": 0.8752129317531079, "grad_norm": 2.2612104101054933, "learning_rate": 4.0283251067378847e-07, "loss": 0.7199, "step": 24148 }, { "epoch": 0.8752491754557645, "grad_norm": 2.3797734531530756, "learning_rate": 4.0260173540262347e-07, "loss": 1.0401, "step": 24149 }, { "epoch": 0.8752854191584212, "grad_norm": 2.7011207644695445, "learning_rate": 4.023710234808254e-07, "loss": 0.9029, "step": 24150 }, { "epoch": 0.8753216628610779, "grad_norm": 2.108354915257629, "learning_rate": 4.0214037491157485e-07, "loss": 0.9714, "step": 24151 }, { "epoch": 0.8753579065637346, "grad_norm": 2.4454149760859014, "learning_rate": 4.019097896980484e-07, "loss": 0.8211, "step": 24152 }, { "epoch": 0.8753941502663912, "grad_norm": 2.341613799265348, "learning_rate": 4.016792678434245e-07, "loss": 0.7659, "step": 24153 }, { "epoch": 0.8754303939690479, "grad_norm": 2.1839805667170347, "learning_rate": 4.0144880935088e-07, "loss": 0.8873, "step": 24154 }, { "epoch": 0.8754666376717045, "grad_norm": 2.290310312792143, "learning_rate": 4.0121841422358865e-07, "loss": 0.8587, "step": 24155 }, { "epoch": 0.8755028813743612, "grad_norm": 2.451493499886195, "learning_rate": 4.0098808246472666e-07, "loss": 0.8725, "step": 24156 }, { "epoch": 0.8755391250770179, "grad_norm": 2.2188315619460837, "learning_rate": 4.0075781407746705e-07, "loss": 0.8898, "step": 24157 }, { "epoch": 0.8755753687796746, "grad_norm": 2.539783171891479, "learning_rate": 4.0052760906498454e-07, "loss": 0.8132, "step": 24158 }, { "epoch": 0.8756116124823312, "grad_norm": 2.2290709910688133, "learning_rate": 4.0029746743044883e-07, "loss": 0.7265, "step": 24159 }, { "epoch": 0.8756478561849879, "grad_norm": 2.617348589723212, "learning_rate": 4.000673891770324e-07, "loss": 1.0382, "step": 24160 }, { "epoch": 0.8756840998876445, "grad_norm": 2.508040724366002, "learning_rate": 3.998373743079065e-07, "loss": 0.9126, "step": 24161 }, { "epoch": 0.8757203435903012, "grad_norm": 2.309088894277193, "learning_rate": 3.996074228262381e-07, "loss": 0.8592, "step": 24162 }, { "epoch": 0.8757565872929578, "grad_norm": 2.181707934528266, "learning_rate": 3.9937753473519804e-07, "loss": 0.8119, "step": 24163 }, { "epoch": 0.8757928309956146, "grad_norm": 2.535781093679864, "learning_rate": 3.9914771003795203e-07, "loss": 0.8945, "step": 24164 }, { "epoch": 0.8758290746982712, "grad_norm": 2.3360591697835, "learning_rate": 3.989179487376693e-07, "loss": 0.8233, "step": 24165 }, { "epoch": 0.8758653184009279, "grad_norm": 2.5696856489134223, "learning_rate": 3.986882508375139e-07, "loss": 0.7838, "step": 24166 }, { "epoch": 0.8759015621035845, "grad_norm": 2.610976538795601, "learning_rate": 3.984586163406523e-07, "loss": 0.8833, "step": 24167 }, { "epoch": 0.8759378058062411, "grad_norm": 2.038967076593383, "learning_rate": 3.982290452502463e-07, "loss": 0.7602, "step": 24168 }, { "epoch": 0.8759740495088978, "grad_norm": 2.571302725318973, "learning_rate": 3.9799953756946285e-07, "loss": 0.9845, "step": 24169 }, { "epoch": 0.8760102932115545, "grad_norm": 2.3986311015931503, "learning_rate": 3.9777009330146163e-07, "loss": 0.8017, "step": 24170 }, { "epoch": 0.8760465369142112, "grad_norm": 2.540387795228691, "learning_rate": 3.975407124494063e-07, "loss": 0.9665, "step": 24171 }, { "epoch": 0.8760827806168678, "grad_norm": 2.6885216744892437, "learning_rate": 3.9731139501645534e-07, "loss": 0.8592, "step": 24172 }, { "epoch": 0.8761190243195245, "grad_norm": 2.4874349956757813, "learning_rate": 3.9708214100577014e-07, "loss": 0.9762, "step": 24173 }, { "epoch": 0.8761552680221811, "grad_norm": 2.3517744342404923, "learning_rate": 3.9685295042050984e-07, "loss": 0.7452, "step": 24174 }, { "epoch": 0.8761915117248378, "grad_norm": 2.3510451699711234, "learning_rate": 3.966238232638303e-07, "loss": 0.7318, "step": 24175 }, { "epoch": 0.8762277554274944, "grad_norm": 2.215668553610225, "learning_rate": 3.963947595388923e-07, "loss": 0.894, "step": 24176 }, { "epoch": 0.8762639991301512, "grad_norm": 2.4397660269351675, "learning_rate": 3.961657592488494e-07, "loss": 0.923, "step": 24177 }, { "epoch": 0.8763002428328078, "grad_norm": 2.582284676275945, "learning_rate": 3.959368223968585e-07, "loss": 0.7725, "step": 24178 }, { "epoch": 0.8763364865354645, "grad_norm": 2.25876078659847, "learning_rate": 3.9570794898607326e-07, "loss": 0.799, "step": 24179 }, { "epoch": 0.8763727302381211, "grad_norm": 2.3630429017606396, "learning_rate": 3.954791390196477e-07, "loss": 0.8948, "step": 24180 }, { "epoch": 0.8764089739407778, "grad_norm": 2.462964412377459, "learning_rate": 3.9525039250073617e-07, "loss": 0.759, "step": 24181 }, { "epoch": 0.8764452176434344, "grad_norm": 2.5087984302377464, "learning_rate": 3.950217094324882e-07, "loss": 0.969, "step": 24182 }, { "epoch": 0.8764814613460911, "grad_norm": 2.300852666102443, "learning_rate": 3.947930898180558e-07, "loss": 0.6975, "step": 24183 }, { "epoch": 0.8765177050487478, "grad_norm": 2.2532340500913364, "learning_rate": 3.9456453366058965e-07, "loss": 0.7692, "step": 24184 }, { "epoch": 0.8765539487514045, "grad_norm": 2.383819628230249, "learning_rate": 3.943360409632402e-07, "loss": 0.9642, "step": 24185 }, { "epoch": 0.8765901924540611, "grad_norm": 2.530494375035597, "learning_rate": 3.941076117291537e-07, "loss": 0.9149, "step": 24186 }, { "epoch": 0.8766264361567178, "grad_norm": 2.1689019073793676, "learning_rate": 3.938792459614782e-07, "loss": 0.8108, "step": 24187 }, { "epoch": 0.8766626798593744, "grad_norm": 2.1407560869709754, "learning_rate": 3.9365094366336234e-07, "loss": 0.9176, "step": 24188 }, { "epoch": 0.876698923562031, "grad_norm": 2.4955084226940962, "learning_rate": 3.9342270483794963e-07, "loss": 0.9752, "step": 24189 }, { "epoch": 0.8767351672646878, "grad_norm": 2.6357711509412813, "learning_rate": 3.9319452948838655e-07, "loss": 1.0633, "step": 24190 }, { "epoch": 0.8767714109673445, "grad_norm": 2.3217597873905746, "learning_rate": 3.9296641761781607e-07, "loss": 1.0193, "step": 24191 }, { "epoch": 0.8768076546700011, "grad_norm": 2.4205092458467194, "learning_rate": 3.927383692293835e-07, "loss": 0.8792, "step": 24192 }, { "epoch": 0.8768438983726577, "grad_norm": 2.0965871369515137, "learning_rate": 3.9251038432622847e-07, "loss": 0.7725, "step": 24193 }, { "epoch": 0.8768801420753144, "grad_norm": 2.053256781711083, "learning_rate": 3.9228246291149517e-07, "loss": 0.8111, "step": 24194 }, { "epoch": 0.876916385777971, "grad_norm": 2.276305851195861, "learning_rate": 3.920546049883206e-07, "loss": 0.8483, "step": 24195 }, { "epoch": 0.8769526294806277, "grad_norm": 2.4625627253125293, "learning_rate": 3.918268105598488e-07, "loss": 1.1156, "step": 24196 }, { "epoch": 0.8769888731832844, "grad_norm": 2.3759848655714992, "learning_rate": 3.915990796292163e-07, "loss": 0.6577, "step": 24197 }, { "epoch": 0.8770251168859411, "grad_norm": 2.053538991522821, "learning_rate": 3.913714121995615e-07, "loss": 0.7996, "step": 24198 }, { "epoch": 0.8770613605885977, "grad_norm": 2.6176256994165565, "learning_rate": 3.9114380827402097e-07, "loss": 0.9415, "step": 24199 }, { "epoch": 0.8770976042912544, "grad_norm": 2.470222912182673, "learning_rate": 3.90916267855731e-07, "loss": 0.8951, "step": 24200 }, { "epoch": 0.877133847993911, "grad_norm": 2.181275136873976, "learning_rate": 3.906887909478285e-07, "loss": 0.7023, "step": 24201 }, { "epoch": 0.8771700916965677, "grad_norm": 2.246354203306002, "learning_rate": 3.90461377553446e-07, "loss": 0.7329, "step": 24202 }, { "epoch": 0.8772063353992244, "grad_norm": 2.2682783522132306, "learning_rate": 3.9023402767571775e-07, "loss": 1.0219, "step": 24203 }, { "epoch": 0.8772425791018811, "grad_norm": 2.0222505718779478, "learning_rate": 3.9000674131777714e-07, "loss": 0.8165, "step": 24204 }, { "epoch": 0.8772788228045377, "grad_norm": 2.487796376953083, "learning_rate": 3.897795184827563e-07, "loss": 0.7022, "step": 24205 }, { "epoch": 0.8773150665071944, "grad_norm": 2.2714733236792486, "learning_rate": 3.895523591737843e-07, "loss": 0.8094, "step": 24206 }, { "epoch": 0.877351310209851, "grad_norm": 2.247517604059073, "learning_rate": 3.893252633939931e-07, "loss": 0.8553, "step": 24207 }, { "epoch": 0.8773875539125077, "grad_norm": 2.580549111802046, "learning_rate": 3.8909823114651134e-07, "loss": 0.9409, "step": 24208 }, { "epoch": 0.8774237976151643, "grad_norm": 1.9818504083538575, "learning_rate": 3.8887126243446706e-07, "loss": 0.7585, "step": 24209 }, { "epoch": 0.8774600413178211, "grad_norm": 2.4686654168445696, "learning_rate": 3.886443572609888e-07, "loss": 0.9133, "step": 24210 }, { "epoch": 0.8774962850204777, "grad_norm": 2.397678408245596, "learning_rate": 3.884175156292008e-07, "loss": 0.6977, "step": 24211 }, { "epoch": 0.8775325287231344, "grad_norm": 2.375592248319835, "learning_rate": 3.8819073754223223e-07, "loss": 0.8279, "step": 24212 }, { "epoch": 0.877568772425791, "grad_norm": 2.3139888101028303, "learning_rate": 3.8796402300320557e-07, "loss": 0.8309, "step": 24213 }, { "epoch": 0.8776050161284477, "grad_norm": 2.298393175234041, "learning_rate": 3.8773737201524553e-07, "loss": 0.898, "step": 24214 }, { "epoch": 0.8776412598311043, "grad_norm": 2.27508904908496, "learning_rate": 3.875107845814741e-07, "loss": 0.8281, "step": 24215 }, { "epoch": 0.8776775035337611, "grad_norm": 2.2958208042337542, "learning_rate": 3.872842607050159e-07, "loss": 1.0036, "step": 24216 }, { "epoch": 0.8777137472364177, "grad_norm": 2.7030344538260414, "learning_rate": 3.8705780038898965e-07, "loss": 1.0082, "step": 24217 }, { "epoch": 0.8777499909390744, "grad_norm": 2.2931758615016378, "learning_rate": 3.868314036365184e-07, "loss": 0.9898, "step": 24218 }, { "epoch": 0.877786234641731, "grad_norm": 2.365637896117897, "learning_rate": 3.866050704507196e-07, "loss": 0.9343, "step": 24219 }, { "epoch": 0.8778224783443876, "grad_norm": 2.177712065195316, "learning_rate": 3.86378800834713e-07, "loss": 0.6618, "step": 24220 }, { "epoch": 0.8778587220470443, "grad_norm": 2.4265722378488306, "learning_rate": 3.861525947916167e-07, "loss": 0.748, "step": 24221 }, { "epoch": 0.8778949657497009, "grad_norm": 2.5001301361573507, "learning_rate": 3.859264523245465e-07, "loss": 0.9442, "step": 24222 }, { "epoch": 0.8779312094523577, "grad_norm": 2.384091352886271, "learning_rate": 3.857003734366194e-07, "loss": 0.9951, "step": 24223 }, { "epoch": 0.8779674531550143, "grad_norm": 2.461313188035196, "learning_rate": 3.8547435813095056e-07, "loss": 0.9473, "step": 24224 }, { "epoch": 0.878003696857671, "grad_norm": 2.291253047832804, "learning_rate": 3.852484064106554e-07, "loss": 0.9141, "step": 24225 }, { "epoch": 0.8780399405603276, "grad_norm": 2.4603076664111354, "learning_rate": 3.850225182788453e-07, "loss": 1.0382, "step": 24226 }, { "epoch": 0.8780761842629843, "grad_norm": 2.3361909589678813, "learning_rate": 3.8479669373863324e-07, "loss": 0.7844, "step": 24227 }, { "epoch": 0.8781124279656409, "grad_norm": 2.540515737923247, "learning_rate": 3.8457093279313296e-07, "loss": 0.8138, "step": 24228 }, { "epoch": 0.8781486716682977, "grad_norm": 2.047353331748091, "learning_rate": 3.84345235445453e-07, "loss": 0.878, "step": 24229 }, { "epoch": 0.8781849153709543, "grad_norm": 2.5591282596156506, "learning_rate": 3.841196016987053e-07, "loss": 0.9448, "step": 24230 }, { "epoch": 0.878221159073611, "grad_norm": 2.564658943255033, "learning_rate": 3.838940315559958e-07, "loss": 1.0189, "step": 24231 }, { "epoch": 0.8782574027762676, "grad_norm": 2.345531374728868, "learning_rate": 3.8366852502043694e-07, "loss": 0.8182, "step": 24232 }, { "epoch": 0.8782936464789243, "grad_norm": 2.0805276375146486, "learning_rate": 3.8344308209513234e-07, "loss": 0.7988, "step": 24233 }, { "epoch": 0.8783298901815809, "grad_norm": 2.377391923765643, "learning_rate": 3.832177027831918e-07, "loss": 0.8714, "step": 24234 }, { "epoch": 0.8783661338842376, "grad_norm": 2.2846643245415885, "learning_rate": 3.829923870877167e-07, "loss": 0.8617, "step": 24235 }, { "epoch": 0.8784023775868943, "grad_norm": 2.457860275224523, "learning_rate": 3.827671350118162e-07, "loss": 0.7341, "step": 24236 }, { "epoch": 0.878438621289551, "grad_norm": 2.4249806638312768, "learning_rate": 3.8254194655859165e-07, "loss": 0.8389, "step": 24237 }, { "epoch": 0.8784748649922076, "grad_norm": 2.1305594799660676, "learning_rate": 3.8231682173114736e-07, "loss": 0.7633, "step": 24238 }, { "epoch": 0.8785111086948643, "grad_norm": 2.4135982048279274, "learning_rate": 3.82091760532583e-07, "loss": 0.7916, "step": 24239 }, { "epoch": 0.8785473523975209, "grad_norm": 2.3295076663904446, "learning_rate": 3.8186676296600224e-07, "loss": 0.8762, "step": 24240 }, { "epoch": 0.8785835961001776, "grad_norm": 2.0964589808801595, "learning_rate": 3.8164182903450477e-07, "loss": 1.0424, "step": 24241 }, { "epoch": 0.8786198398028343, "grad_norm": 2.2841046211131752, "learning_rate": 3.814169587411892e-07, "loss": 0.9314, "step": 24242 }, { "epoch": 0.878656083505491, "grad_norm": 2.567571643850303, "learning_rate": 3.8119215208915484e-07, "loss": 0.8963, "step": 24243 }, { "epoch": 0.8786923272081476, "grad_norm": 2.426257796555205, "learning_rate": 3.809674090814991e-07, "loss": 0.966, "step": 24244 }, { "epoch": 0.8787285709108043, "grad_norm": 2.056632988504412, "learning_rate": 3.8074272972131955e-07, "loss": 0.8057, "step": 24245 }, { "epoch": 0.8787648146134609, "grad_norm": 2.5539542520796874, "learning_rate": 3.8051811401171093e-07, "loss": 0.8267, "step": 24246 }, { "epoch": 0.8788010583161175, "grad_norm": 2.3083972418769982, "learning_rate": 3.8029356195576903e-07, "loss": 0.8238, "step": 24247 }, { "epoch": 0.8788373020187742, "grad_norm": 2.34124343752013, "learning_rate": 3.8006907355658875e-07, "loss": 0.9095, "step": 24248 }, { "epoch": 0.878873545721431, "grad_norm": 2.449867337643997, "learning_rate": 3.7984464881726193e-07, "loss": 0.8256, "step": 24249 }, { "epoch": 0.8789097894240876, "grad_norm": 2.092933183825966, "learning_rate": 3.7962028774088224e-07, "loss": 0.8674, "step": 24250 }, { "epoch": 0.8789460331267442, "grad_norm": 2.1136507761228276, "learning_rate": 3.7939599033053887e-07, "loss": 0.8449, "step": 24251 }, { "epoch": 0.8789822768294009, "grad_norm": 2.7779779554273314, "learning_rate": 3.791717565893266e-07, "loss": 0.846, "step": 24252 }, { "epoch": 0.8790185205320575, "grad_norm": 2.477648562509791, "learning_rate": 3.7894758652033125e-07, "loss": 1.0407, "step": 24253 }, { "epoch": 0.8790547642347142, "grad_norm": 2.123090846955529, "learning_rate": 3.7872348012664484e-07, "loss": 0.7816, "step": 24254 }, { "epoch": 0.8790910079373708, "grad_norm": 2.529440644838392, "learning_rate": 3.784994374113521e-07, "loss": 1.0833, "step": 24255 }, { "epoch": 0.8791272516400276, "grad_norm": 2.5162418431325615, "learning_rate": 3.782754583775439e-07, "loss": 1.0853, "step": 24256 }, { "epoch": 0.8791634953426842, "grad_norm": 2.2760338521695216, "learning_rate": 3.7805154302830383e-07, "loss": 0.9609, "step": 24257 }, { "epoch": 0.8791997390453409, "grad_norm": 2.4816337737166965, "learning_rate": 3.778276913667184e-07, "loss": 0.7759, "step": 24258 }, { "epoch": 0.8792359827479975, "grad_norm": 2.552430785721069, "learning_rate": 3.7760390339587283e-07, "loss": 0.9597, "step": 24259 }, { "epoch": 0.8792722264506542, "grad_norm": 2.4375375625131745, "learning_rate": 3.773801791188486e-07, "loss": 0.8744, "step": 24260 }, { "epoch": 0.8793084701533108, "grad_norm": 2.4450016795061122, "learning_rate": 3.7715651853873105e-07, "loss": 0.939, "step": 24261 }, { "epoch": 0.8793447138559676, "grad_norm": 2.571823750603136, "learning_rate": 3.769329216585993e-07, "loss": 0.8681, "step": 24262 }, { "epoch": 0.8793809575586242, "grad_norm": 2.4353286178919866, "learning_rate": 3.767093884815376e-07, "loss": 0.7804, "step": 24263 }, { "epoch": 0.8794172012612809, "grad_norm": 2.4723718288947407, "learning_rate": 3.7648591901062346e-07, "loss": 0.8872, "step": 24264 }, { "epoch": 0.8794534449639375, "grad_norm": 2.3937419988881885, "learning_rate": 3.7626251324893834e-07, "loss": 0.9757, "step": 24265 }, { "epoch": 0.8794896886665942, "grad_norm": 2.2074679522515055, "learning_rate": 3.7603917119955814e-07, "loss": 0.7409, "step": 24266 }, { "epoch": 0.8795259323692508, "grad_norm": 2.8099490894011296, "learning_rate": 3.7581589286556197e-07, "loss": 0.8548, "step": 24267 }, { "epoch": 0.8795621760719075, "grad_norm": 2.298095020267233, "learning_rate": 3.755926782500269e-07, "loss": 0.7614, "step": 24268 }, { "epoch": 0.8795984197745642, "grad_norm": 2.191486037239761, "learning_rate": 3.7536952735602704e-07, "loss": 0.8508, "step": 24269 }, { "epoch": 0.8796346634772209, "grad_norm": 2.204909655224071, "learning_rate": 3.751464401866384e-07, "loss": 0.8775, "step": 24270 }, { "epoch": 0.8796709071798775, "grad_norm": 2.254129287193164, "learning_rate": 3.749234167449345e-07, "loss": 0.8569, "step": 24271 }, { "epoch": 0.8797071508825342, "grad_norm": 2.262802335810556, "learning_rate": 3.747004570339896e-07, "loss": 0.7416, "step": 24272 }, { "epoch": 0.8797433945851908, "grad_norm": 1.8541626299385114, "learning_rate": 3.744775610568746e-07, "loss": 0.6739, "step": 24273 }, { "epoch": 0.8797796382878474, "grad_norm": 2.2745822644712925, "learning_rate": 3.742547288166615e-07, "loss": 0.8199, "step": 24274 }, { "epoch": 0.8798158819905042, "grad_norm": 2.509873951850721, "learning_rate": 3.7403196031642e-07, "loss": 0.7967, "step": 24275 }, { "epoch": 0.8798521256931608, "grad_norm": 2.2257163063056695, "learning_rate": 3.7380925555922156e-07, "loss": 0.8332, "step": 24276 }, { "epoch": 0.8798883693958175, "grad_norm": 2.0582691653995195, "learning_rate": 3.735866145481332e-07, "loss": 0.7303, "step": 24277 }, { "epoch": 0.8799246130984741, "grad_norm": 2.022952362341067, "learning_rate": 3.7336403728622295e-07, "loss": 0.7615, "step": 24278 }, { "epoch": 0.8799608568011308, "grad_norm": 2.214138635279499, "learning_rate": 3.731415237765595e-07, "loss": 0.9153, "step": 24279 }, { "epoch": 0.8799971005037874, "grad_norm": 2.678469568597493, "learning_rate": 3.7291907402220604e-07, "loss": 0.8805, "step": 24280 }, { "epoch": 0.8800333442064441, "grad_norm": 2.211182759240203, "learning_rate": 3.726966880262312e-07, "loss": 0.9363, "step": 24281 }, { "epoch": 0.8800695879091008, "grad_norm": 2.19617988850109, "learning_rate": 3.724743657916952e-07, "loss": 0.9123, "step": 24282 }, { "epoch": 0.8801058316117575, "grad_norm": 2.253685937219503, "learning_rate": 3.7225210732166503e-07, "loss": 0.9765, "step": 24283 }, { "epoch": 0.8801420753144141, "grad_norm": 2.353791084643075, "learning_rate": 3.7202991261920176e-07, "loss": 0.8149, "step": 24284 }, { "epoch": 0.8801783190170708, "grad_norm": 2.384716995970874, "learning_rate": 3.718077816873683e-07, "loss": 0.9042, "step": 24285 }, { "epoch": 0.8802145627197274, "grad_norm": 2.536487767233856, "learning_rate": 3.715857145292234e-07, "loss": 0.729, "step": 24286 }, { "epoch": 0.8802508064223841, "grad_norm": 2.085879765599112, "learning_rate": 3.7136371114782856e-07, "loss": 0.6983, "step": 24287 }, { "epoch": 0.8802870501250408, "grad_norm": 2.402427019044485, "learning_rate": 3.7114177154624344e-07, "loss": 0.8048, "step": 24288 }, { "epoch": 0.8803232938276975, "grad_norm": 2.317411636877583, "learning_rate": 3.7091989572752394e-07, "loss": 0.756, "step": 24289 }, { "epoch": 0.8803595375303541, "grad_norm": 2.2764394175020093, "learning_rate": 3.7069808369472884e-07, "loss": 0.7171, "step": 24290 }, { "epoch": 0.8803957812330108, "grad_norm": 2.1452483334067804, "learning_rate": 3.7047633545091445e-07, "loss": 0.8672, "step": 24291 }, { "epoch": 0.8804320249356674, "grad_norm": 2.067061606041484, "learning_rate": 3.7025465099913725e-07, "loss": 0.8884, "step": 24292 }, { "epoch": 0.8804682686383241, "grad_norm": 2.19546703807495, "learning_rate": 3.700330303424499e-07, "loss": 0.8377, "step": 24293 }, { "epoch": 0.8805045123409807, "grad_norm": 2.4567325026911324, "learning_rate": 3.6981147348390754e-07, "loss": 0.8265, "step": 24294 }, { "epoch": 0.8805407560436375, "grad_norm": 2.718069687687877, "learning_rate": 3.6958998042656346e-07, "loss": 0.9511, "step": 24295 }, { "epoch": 0.8805769997462941, "grad_norm": 2.4282297241225392, "learning_rate": 3.693685511734685e-07, "loss": 0.8908, "step": 24296 }, { "epoch": 0.8806132434489508, "grad_norm": 2.49299671207425, "learning_rate": 3.6914718572767515e-07, "loss": 0.9775, "step": 24297 }, { "epoch": 0.8806494871516074, "grad_norm": 2.1802489141754675, "learning_rate": 3.6892588409223104e-07, "loss": 0.8525, "step": 24298 }, { "epoch": 0.880685730854264, "grad_norm": 2.3980804918030136, "learning_rate": 3.687046462701893e-07, "loss": 1.0133, "step": 24299 }, { "epoch": 0.8807219745569207, "grad_norm": 2.176428157943554, "learning_rate": 3.684834722645958e-07, "loss": 0.8436, "step": 24300 }, { "epoch": 0.8807582182595775, "grad_norm": 2.4284814135752555, "learning_rate": 3.6826236207849977e-07, "loss": 0.9843, "step": 24301 }, { "epoch": 0.8807944619622341, "grad_norm": 2.154823539681267, "learning_rate": 3.6804131571494537e-07, "loss": 0.9493, "step": 24302 }, { "epoch": 0.8808307056648907, "grad_norm": 2.4238675144793214, "learning_rate": 3.6782033317698193e-07, "loss": 0.8855, "step": 24303 }, { "epoch": 0.8808669493675474, "grad_norm": 2.064240892057223, "learning_rate": 3.675994144676526e-07, "loss": 0.8516, "step": 24304 }, { "epoch": 0.880903193070204, "grad_norm": 2.497673122591226, "learning_rate": 3.6737855959000203e-07, "loss": 0.9155, "step": 24305 }, { "epoch": 0.8809394367728607, "grad_norm": 2.5729934678234425, "learning_rate": 3.671577685470723e-07, "loss": 0.8049, "step": 24306 }, { "epoch": 0.8809756804755173, "grad_norm": 2.4634182488792153, "learning_rate": 3.669370413419071e-07, "loss": 1.0212, "step": 24307 }, { "epoch": 0.8810119241781741, "grad_norm": 2.50611294237599, "learning_rate": 3.667163779775484e-07, "loss": 1.1127, "step": 24308 }, { "epoch": 0.8810481678808307, "grad_norm": 2.34267152779284, "learning_rate": 3.6649577845703446e-07, "loss": 1.0046, "step": 24309 }, { "epoch": 0.8810844115834874, "grad_norm": 2.3690827633230183, "learning_rate": 3.662752427834071e-07, "loss": 0.8688, "step": 24310 }, { "epoch": 0.881120655286144, "grad_norm": 2.3940468926139897, "learning_rate": 3.6605477095970396e-07, "loss": 0.9314, "step": 24311 }, { "epoch": 0.8811568989888007, "grad_norm": 2.368504600867049, "learning_rate": 3.658343629889649e-07, "loss": 0.8433, "step": 24312 }, { "epoch": 0.8811931426914573, "grad_norm": 2.6358211604895136, "learning_rate": 3.656140188742241e-07, "loss": 1.1008, "step": 24313 }, { "epoch": 0.8812293863941141, "grad_norm": 2.2125432912255825, "learning_rate": 3.653937386185202e-07, "loss": 0.7818, "step": 24314 }, { "epoch": 0.8812656300967707, "grad_norm": 2.6733227037915213, "learning_rate": 3.6517352222488756e-07, "loss": 0.867, "step": 24315 }, { "epoch": 0.8813018737994274, "grad_norm": 2.5559067133361997, "learning_rate": 3.6495336969636086e-07, "loss": 0.8395, "step": 24316 }, { "epoch": 0.881338117502084, "grad_norm": 2.4403101969881766, "learning_rate": 3.647332810359738e-07, "loss": 0.703, "step": 24317 }, { "epoch": 0.8813743612047407, "grad_norm": 2.401872353615001, "learning_rate": 3.6451325624675735e-07, "loss": 0.8218, "step": 24318 }, { "epoch": 0.8814106049073973, "grad_norm": 2.2134918479287724, "learning_rate": 3.642932953317463e-07, "loss": 0.9193, "step": 24319 }, { "epoch": 0.881446848610054, "grad_norm": 2.363826019506193, "learning_rate": 3.6407339829396924e-07, "loss": 0.9496, "step": 24320 }, { "epoch": 0.8814830923127107, "grad_norm": 2.4572332445538887, "learning_rate": 3.6385356513645773e-07, "loss": 0.9278, "step": 24321 }, { "epoch": 0.8815193360153674, "grad_norm": 2.730604504176002, "learning_rate": 3.636337958622388e-07, "loss": 0.8387, "step": 24322 }, { "epoch": 0.881555579718024, "grad_norm": 2.145665728860852, "learning_rate": 3.6341409047434386e-07, "loss": 0.7255, "step": 24323 }, { "epoch": 0.8815918234206807, "grad_norm": 2.2367102745128222, "learning_rate": 3.631944489757977e-07, "loss": 0.7999, "step": 24324 }, { "epoch": 0.8816280671233373, "grad_norm": 2.2794704442003657, "learning_rate": 3.629748713696285e-07, "loss": 0.8982, "step": 24325 }, { "epoch": 0.881664310825994, "grad_norm": 2.1213766537945107, "learning_rate": 3.627553576588605e-07, "loss": 0.7362, "step": 24326 }, { "epoch": 0.8817005545286506, "grad_norm": 2.309935366103749, "learning_rate": 3.6253590784651905e-07, "loss": 0.8843, "step": 24327 }, { "epoch": 0.8817367982313074, "grad_norm": 2.287766373013632, "learning_rate": 3.623165219356295e-07, "loss": 0.9231, "step": 24328 }, { "epoch": 0.881773041933964, "grad_norm": 2.4770250728677867, "learning_rate": 3.6209719992921224e-07, "loss": 0.7849, "step": 24329 }, { "epoch": 0.8818092856366206, "grad_norm": 2.2370875783297897, "learning_rate": 3.6187794183029034e-07, "loss": 0.9452, "step": 24330 }, { "epoch": 0.8818455293392773, "grad_norm": 2.6845588061389964, "learning_rate": 3.616587476418859e-07, "loss": 0.83, "step": 24331 }, { "epoch": 0.8818817730419339, "grad_norm": 2.140579581567344, "learning_rate": 3.6143961736701924e-07, "loss": 0.8014, "step": 24332 }, { "epoch": 0.8819180167445906, "grad_norm": 2.3339943086384687, "learning_rate": 3.6122055100870855e-07, "loss": 0.8248, "step": 24333 }, { "epoch": 0.8819542604472473, "grad_norm": 2.2444489383461232, "learning_rate": 3.610015485699736e-07, "loss": 0.9442, "step": 24334 }, { "epoch": 0.881990504149904, "grad_norm": 2.178441331201244, "learning_rate": 3.60782610053832e-07, "loss": 0.9372, "step": 24335 }, { "epoch": 0.8820267478525606, "grad_norm": 2.4816650779076896, "learning_rate": 3.605637354632996e-07, "loss": 0.906, "step": 24336 }, { "epoch": 0.8820629915552173, "grad_norm": 2.481108355574995, "learning_rate": 3.6034492480139405e-07, "loss": 0.907, "step": 24337 }, { "epoch": 0.8820992352578739, "grad_norm": 1.9130108487624, "learning_rate": 3.6012617807112794e-07, "loss": 0.7352, "step": 24338 }, { "epoch": 0.8821354789605306, "grad_norm": 2.3467973732665692, "learning_rate": 3.599074952755188e-07, "loss": 0.8079, "step": 24339 }, { "epoch": 0.8821717226631872, "grad_norm": 2.411019941976087, "learning_rate": 3.596888764175771e-07, "loss": 1.0217, "step": 24340 }, { "epoch": 0.882207966365844, "grad_norm": 2.027936971999153, "learning_rate": 3.594703215003159e-07, "loss": 0.8988, "step": 24341 }, { "epoch": 0.8822442100685006, "grad_norm": 2.466993897348448, "learning_rate": 3.5925183052674783e-07, "loss": 0.8868, "step": 24342 }, { "epoch": 0.8822804537711573, "grad_norm": 2.5240309872095525, "learning_rate": 3.5903340349988323e-07, "loss": 0.9504, "step": 24343 }, { "epoch": 0.8823166974738139, "grad_norm": 1.9269927374134737, "learning_rate": 3.5881504042273084e-07, "loss": 0.8039, "step": 24344 }, { "epoch": 0.8823529411764706, "grad_norm": 2.3086789422428025, "learning_rate": 3.5859674129830047e-07, "loss": 1.0166, "step": 24345 }, { "epoch": 0.8823891848791272, "grad_norm": 2.208843200517153, "learning_rate": 3.583785061296008e-07, "loss": 0.8155, "step": 24346 }, { "epoch": 0.882425428581784, "grad_norm": 2.348995258138639, "learning_rate": 3.581603349196372e-07, "loss": 0.8869, "step": 24347 }, { "epoch": 0.8824616722844406, "grad_norm": 2.4830098616815133, "learning_rate": 3.5794222767141784e-07, "loss": 0.7554, "step": 24348 }, { "epoch": 0.8824979159870973, "grad_norm": 2.174884836485773, "learning_rate": 3.577241843879453e-07, "loss": 0.8818, "step": 24349 }, { "epoch": 0.8825341596897539, "grad_norm": 2.654017389725539, "learning_rate": 3.575062050722278e-07, "loss": 0.9283, "step": 24350 }, { "epoch": 0.8825704033924106, "grad_norm": 2.322343776393011, "learning_rate": 3.572882897272661e-07, "loss": 0.8096, "step": 24351 }, { "epoch": 0.8826066470950672, "grad_norm": 2.7560520709339653, "learning_rate": 3.570704383560647e-07, "loss": 0.8377, "step": 24352 }, { "epoch": 0.8826428907977238, "grad_norm": 2.444005443403922, "learning_rate": 3.568526509616238e-07, "loss": 1.0964, "step": 24353 }, { "epoch": 0.8826791345003806, "grad_norm": 2.3780714067477864, "learning_rate": 3.566349275469455e-07, "loss": 0.7801, "step": 24354 }, { "epoch": 0.8827153782030372, "grad_norm": 2.4506864584754755, "learning_rate": 3.5641726811503073e-07, "loss": 0.9529, "step": 24355 }, { "epoch": 0.8827516219056939, "grad_norm": 2.1709894812329797, "learning_rate": 3.5619967266887656e-07, "loss": 0.7773, "step": 24356 }, { "epoch": 0.8827878656083505, "grad_norm": 2.163815223153114, "learning_rate": 3.5598214121148277e-07, "loss": 0.7786, "step": 24357 }, { "epoch": 0.8828241093110072, "grad_norm": 2.603559252027162, "learning_rate": 3.557646737458459e-07, "loss": 0.8509, "step": 24358 }, { "epoch": 0.8828603530136638, "grad_norm": 2.460531706985388, "learning_rate": 3.555472702749641e-07, "loss": 0.9346, "step": 24359 }, { "epoch": 0.8828965967163206, "grad_norm": 2.284092585930083, "learning_rate": 3.553299308018315e-07, "loss": 0.8486, "step": 24360 }, { "epoch": 0.8829328404189772, "grad_norm": 2.2951120783268593, "learning_rate": 3.5511265532944315e-07, "loss": 0.7783, "step": 24361 }, { "epoch": 0.8829690841216339, "grad_norm": 2.1615215912998362, "learning_rate": 3.5489544386079323e-07, "loss": 0.8676, "step": 24362 }, { "epoch": 0.8830053278242905, "grad_norm": 2.2625311265019423, "learning_rate": 3.54678296398876e-07, "loss": 0.7792, "step": 24363 }, { "epoch": 0.8830415715269472, "grad_norm": 2.4673718198259538, "learning_rate": 3.5446121294668125e-07, "loss": 1.0032, "step": 24364 }, { "epoch": 0.8830778152296038, "grad_norm": 2.4402534529281987, "learning_rate": 3.542441935072016e-07, "loss": 0.8179, "step": 24365 }, { "epoch": 0.8831140589322605, "grad_norm": 2.1309017871514464, "learning_rate": 3.540272380834281e-07, "loss": 0.7613, "step": 24366 }, { "epoch": 0.8831503026349172, "grad_norm": 2.3253385563110993, "learning_rate": 3.5381034667834877e-07, "loss": 1.1003, "step": 24367 }, { "epoch": 0.8831865463375739, "grad_norm": 2.205061108642736, "learning_rate": 3.5359351929495353e-07, "loss": 0.8202, "step": 24368 }, { "epoch": 0.8832227900402305, "grad_norm": 1.8733240100763706, "learning_rate": 3.533767559362283e-07, "loss": 0.9228, "step": 24369 }, { "epoch": 0.8832590337428872, "grad_norm": 2.3220900595604217, "learning_rate": 3.531600566051624e-07, "loss": 0.9146, "step": 24370 }, { "epoch": 0.8832952774455438, "grad_norm": 2.441779510172239, "learning_rate": 3.5294342130474e-07, "loss": 0.8063, "step": 24371 }, { "epoch": 0.8833315211482005, "grad_norm": 2.397362302626856, "learning_rate": 3.5272685003794773e-07, "loss": 0.8866, "step": 24372 }, { "epoch": 0.8833677648508572, "grad_norm": 2.3207478663884498, "learning_rate": 3.525103428077675e-07, "loss": 0.8919, "step": 24373 }, { "epoch": 0.8834040085535139, "grad_norm": 2.604110798130567, "learning_rate": 3.5229389961718484e-07, "loss": 0.9295, "step": 24374 }, { "epoch": 0.8834402522561705, "grad_norm": 2.340132537073059, "learning_rate": 3.5207752046918173e-07, "loss": 0.8605, "step": 24375 }, { "epoch": 0.8834764959588272, "grad_norm": 2.424366480246529, "learning_rate": 3.5186120536673864e-07, "loss": 0.9457, "step": 24376 }, { "epoch": 0.8835127396614838, "grad_norm": 2.1629175199231385, "learning_rate": 3.5164495431283697e-07, "loss": 0.7898, "step": 24377 }, { "epoch": 0.8835489833641405, "grad_norm": 2.2368991450383047, "learning_rate": 3.514287673104566e-07, "loss": 0.8335, "step": 24378 }, { "epoch": 0.8835852270667971, "grad_norm": 2.393646219509949, "learning_rate": 3.512126443625774e-07, "loss": 1.1234, "step": 24379 }, { "epoch": 0.8836214707694539, "grad_norm": 2.600724388390226, "learning_rate": 3.509965854721759e-07, "loss": 0.8542, "step": 24380 }, { "epoch": 0.8836577144721105, "grad_norm": 2.383389946799493, "learning_rate": 3.507805906422296e-07, "loss": 0.8856, "step": 24381 }, { "epoch": 0.8836939581747671, "grad_norm": 2.6400878507699006, "learning_rate": 3.5056465987571565e-07, "loss": 0.9192, "step": 24382 }, { "epoch": 0.8837302018774238, "grad_norm": 2.333383226679559, "learning_rate": 3.503487931756078e-07, "loss": 0.8489, "step": 24383 }, { "epoch": 0.8837664455800804, "grad_norm": 2.449487002920678, "learning_rate": 3.50132990544883e-07, "loss": 0.8964, "step": 24384 }, { "epoch": 0.8838026892827371, "grad_norm": 2.365274916963265, "learning_rate": 3.4991725198651125e-07, "loss": 0.9195, "step": 24385 }, { "epoch": 0.8838389329853937, "grad_norm": 2.6006369895187724, "learning_rate": 3.4970157750346957e-07, "loss": 0.8684, "step": 24386 }, { "epoch": 0.8838751766880505, "grad_norm": 2.682476697694679, "learning_rate": 3.4948596709872664e-07, "loss": 0.9825, "step": 24387 }, { "epoch": 0.8839114203907071, "grad_norm": 2.3597930332634247, "learning_rate": 3.492704207752551e-07, "loss": 0.9006, "step": 24388 }, { "epoch": 0.8839476640933638, "grad_norm": 2.2376713201730225, "learning_rate": 3.4905493853602313e-07, "loss": 0.9168, "step": 24389 }, { "epoch": 0.8839839077960204, "grad_norm": 2.3123039468873823, "learning_rate": 3.488395203840034e-07, "loss": 0.9087, "step": 24390 }, { "epoch": 0.8840201514986771, "grad_norm": 2.496217303649939, "learning_rate": 3.4862416632216065e-07, "loss": 0.8737, "step": 24391 }, { "epoch": 0.8840563952013337, "grad_norm": 2.109681431894773, "learning_rate": 3.4840887635346487e-07, "loss": 0.7824, "step": 24392 }, { "epoch": 0.8840926389039905, "grad_norm": 2.3073687158329954, "learning_rate": 3.4819365048088083e-07, "loss": 0.9141, "step": 24393 }, { "epoch": 0.8841288826066471, "grad_norm": 2.4071869577869323, "learning_rate": 3.4797848870737505e-07, "loss": 0.8937, "step": 24394 }, { "epoch": 0.8841651263093038, "grad_norm": 2.369330145203859, "learning_rate": 3.4776339103591293e-07, "loss": 1.028, "step": 24395 }, { "epoch": 0.8842013700119604, "grad_norm": 2.4669240648926967, "learning_rate": 3.4754835746945714e-07, "loss": 0.764, "step": 24396 }, { "epoch": 0.8842376137146171, "grad_norm": 2.587678659617747, "learning_rate": 3.473333880109708e-07, "loss": 0.985, "step": 24397 }, { "epoch": 0.8842738574172737, "grad_norm": 2.36552023302327, "learning_rate": 3.4711848266341664e-07, "loss": 0.6901, "step": 24398 }, { "epoch": 0.8843101011199304, "grad_norm": 2.1381662186189256, "learning_rate": 3.469036414297566e-07, "loss": 0.8408, "step": 24399 }, { "epoch": 0.8843463448225871, "grad_norm": 2.6163077731761466, "learning_rate": 3.4668886431295014e-07, "loss": 0.8653, "step": 24400 }, { "epoch": 0.8843825885252438, "grad_norm": 2.5126969634886116, "learning_rate": 3.464741513159564e-07, "loss": 0.8306, "step": 24401 }, { "epoch": 0.8844188322279004, "grad_norm": 2.2146593133482497, "learning_rate": 3.462595024417348e-07, "loss": 0.7214, "step": 24402 }, { "epoch": 0.884455075930557, "grad_norm": 2.4569745639764906, "learning_rate": 3.460449176932429e-07, "loss": 0.8425, "step": 24403 }, { "epoch": 0.8844913196332137, "grad_norm": 2.308339371362159, "learning_rate": 3.458303970734378e-07, "loss": 0.9773, "step": 24404 }, { "epoch": 0.8845275633358703, "grad_norm": 2.6872124549276863, "learning_rate": 3.456159405852733e-07, "loss": 0.995, "step": 24405 }, { "epoch": 0.8845638070385271, "grad_norm": 2.171250458238152, "learning_rate": 3.45401548231708e-07, "loss": 0.9061, "step": 24406 }, { "epoch": 0.8846000507411838, "grad_norm": 2.2908305012386623, "learning_rate": 3.451872200156936e-07, "loss": 0.8609, "step": 24407 }, { "epoch": 0.8846362944438404, "grad_norm": 2.521577319470648, "learning_rate": 3.449729559401849e-07, "loss": 0.9976, "step": 24408 }, { "epoch": 0.884672538146497, "grad_norm": 2.7047184384808594, "learning_rate": 3.447587560081317e-07, "loss": 0.92, "step": 24409 }, { "epoch": 0.8847087818491537, "grad_norm": 2.4038129665212393, "learning_rate": 3.4454462022248947e-07, "loss": 0.9162, "step": 24410 }, { "epoch": 0.8847450255518103, "grad_norm": 2.487735801904999, "learning_rate": 3.4433054858620587e-07, "loss": 1.1365, "step": 24411 }, { "epoch": 0.884781269254467, "grad_norm": 2.3190658510617133, "learning_rate": 3.4411654110223236e-07, "loss": 0.8761, "step": 24412 }, { "epoch": 0.8848175129571237, "grad_norm": 2.5289987895530217, "learning_rate": 3.4390259777351663e-07, "loss": 0.9549, "step": 24413 }, { "epoch": 0.8848537566597804, "grad_norm": 2.5292597928631575, "learning_rate": 3.436887186030069e-07, "loss": 0.8431, "step": 24414 }, { "epoch": 0.884890000362437, "grad_norm": 2.3876138817578743, "learning_rate": 3.4347490359365187e-07, "loss": 0.8486, "step": 24415 }, { "epoch": 0.8849262440650937, "grad_norm": 2.418222153649447, "learning_rate": 3.4326115274839476e-07, "loss": 0.8889, "step": 24416 }, { "epoch": 0.8849624877677503, "grad_norm": 2.5457871266379843, "learning_rate": 3.4304746607018326e-07, "loss": 0.9538, "step": 24417 }, { "epoch": 0.884998731470407, "grad_norm": 2.4983846784162167, "learning_rate": 3.428338435619616e-07, "loss": 0.7772, "step": 24418 }, { "epoch": 0.8850349751730637, "grad_norm": 2.4753831729931055, "learning_rate": 3.42620285226673e-07, "loss": 0.8888, "step": 24419 }, { "epoch": 0.8850712188757204, "grad_norm": 2.743635304730208, "learning_rate": 3.4240679106726015e-07, "loss": 0.8872, "step": 24420 }, { "epoch": 0.885107462578377, "grad_norm": 2.1111553299557673, "learning_rate": 3.421933610866646e-07, "loss": 0.7556, "step": 24421 }, { "epoch": 0.8851437062810337, "grad_norm": 2.6225246069953467, "learning_rate": 3.4197999528782834e-07, "loss": 1.0411, "step": 24422 }, { "epoch": 0.8851799499836903, "grad_norm": 2.575542003194223, "learning_rate": 3.4176669367368966e-07, "loss": 0.9179, "step": 24423 }, { "epoch": 0.885216193686347, "grad_norm": 2.3852466231065157, "learning_rate": 3.4155345624718896e-07, "loss": 0.8652, "step": 24424 }, { "epoch": 0.8852524373890036, "grad_norm": 2.5671331271677293, "learning_rate": 3.413402830112644e-07, "loss": 0.8047, "step": 24425 }, { "epoch": 0.8852886810916604, "grad_norm": 1.9562254250155753, "learning_rate": 3.4112717396885376e-07, "loss": 0.7538, "step": 24426 }, { "epoch": 0.885324924794317, "grad_norm": 2.6282025275108176, "learning_rate": 3.4091412912289235e-07, "loss": 0.8708, "step": 24427 }, { "epoch": 0.8853611684969737, "grad_norm": 2.222388751483808, "learning_rate": 3.4070114847631676e-07, "loss": 0.7869, "step": 24428 }, { "epoch": 0.8853974121996303, "grad_norm": 2.3263798325408773, "learning_rate": 3.4048823203206125e-07, "loss": 0.7886, "step": 24429 }, { "epoch": 0.885433655902287, "grad_norm": 2.6206228507338225, "learning_rate": 3.402753797930608e-07, "loss": 0.8953, "step": 24430 }, { "epoch": 0.8854698996049436, "grad_norm": 2.20099046055013, "learning_rate": 3.4006259176224687e-07, "loss": 0.6737, "step": 24431 }, { "epoch": 0.8855061433076004, "grad_norm": 2.327524374359463, "learning_rate": 3.398498679425521e-07, "loss": 0.7444, "step": 24432 }, { "epoch": 0.885542387010257, "grad_norm": 2.479790083413218, "learning_rate": 3.396372083369082e-07, "loss": 0.9842, "step": 24433 }, { "epoch": 0.8855786307129137, "grad_norm": 2.237105979840496, "learning_rate": 3.394246129482448e-07, "loss": 0.7968, "step": 24434 }, { "epoch": 0.8856148744155703, "grad_norm": 2.233849972107049, "learning_rate": 3.392120817794919e-07, "loss": 0.873, "step": 24435 }, { "epoch": 0.8856511181182269, "grad_norm": 2.609950751762123, "learning_rate": 3.389996148335767e-07, "loss": 0.8345, "step": 24436 }, { "epoch": 0.8856873618208836, "grad_norm": 2.403066004460328, "learning_rate": 3.387872121134289e-07, "loss": 0.9287, "step": 24437 }, { "epoch": 0.8857236055235402, "grad_norm": 2.451076188999062, "learning_rate": 3.3857487362197406e-07, "loss": 0.8979, "step": 24438 }, { "epoch": 0.885759849226197, "grad_norm": 2.3892637203365323, "learning_rate": 3.383625993621392e-07, "loss": 0.8959, "step": 24439 }, { "epoch": 0.8857960929288536, "grad_norm": 2.443663698537211, "learning_rate": 3.381503893368471e-07, "loss": 0.9327, "step": 24440 }, { "epoch": 0.8858323366315103, "grad_norm": 2.6851602231340026, "learning_rate": 3.379382435490236e-07, "loss": 0.8134, "step": 24441 }, { "epoch": 0.8858685803341669, "grad_norm": 2.6801432311150313, "learning_rate": 3.377261620015926e-07, "loss": 0.923, "step": 24442 }, { "epoch": 0.8859048240368236, "grad_norm": 2.119217436658346, "learning_rate": 3.375141446974739e-07, "loss": 0.7352, "step": 24443 }, { "epoch": 0.8859410677394802, "grad_norm": 2.110864879382554, "learning_rate": 3.3730219163959134e-07, "loss": 0.9411, "step": 24444 }, { "epoch": 0.885977311442137, "grad_norm": 2.9140130231271977, "learning_rate": 3.370903028308642e-07, "loss": 0.9723, "step": 24445 }, { "epoch": 0.8860135551447936, "grad_norm": 2.7246856311219427, "learning_rate": 3.368784782742135e-07, "loss": 0.9122, "step": 24446 }, { "epoch": 0.8860497988474503, "grad_norm": 2.408299560326692, "learning_rate": 3.3666671797255636e-07, "loss": 0.9818, "step": 24447 }, { "epoch": 0.8860860425501069, "grad_norm": 2.3518656735572576, "learning_rate": 3.364550219288115e-07, "loss": 0.8709, "step": 24448 }, { "epoch": 0.8861222862527636, "grad_norm": 2.421200034405528, "learning_rate": 3.3624339014589615e-07, "loss": 0.9282, "step": 24449 }, { "epoch": 0.8861585299554202, "grad_norm": 2.756811367804326, "learning_rate": 3.3603182262672727e-07, "loss": 0.9019, "step": 24450 }, { "epoch": 0.8861947736580769, "grad_norm": 2.403814453505446, "learning_rate": 3.358203193742182e-07, "loss": 0.9417, "step": 24451 }, { "epoch": 0.8862310173607336, "grad_norm": 2.3996872936589866, "learning_rate": 3.3560888039128437e-07, "loss": 0.7831, "step": 24452 }, { "epoch": 0.8862672610633903, "grad_norm": 2.629173080384318, "learning_rate": 3.3539750568084005e-07, "loss": 0.9674, "step": 24453 }, { "epoch": 0.8863035047660469, "grad_norm": 2.3581652068166212, "learning_rate": 3.351861952457963e-07, "loss": 0.9713, "step": 24454 }, { "epoch": 0.8863397484687036, "grad_norm": 2.342481401783443, "learning_rate": 3.3497494908906636e-07, "loss": 0.8951, "step": 24455 }, { "epoch": 0.8863759921713602, "grad_norm": 2.41352544347354, "learning_rate": 3.347637672135584e-07, "loss": 0.8605, "step": 24456 }, { "epoch": 0.8864122358740169, "grad_norm": 2.3620991796576933, "learning_rate": 3.3455264962218627e-07, "loss": 0.7662, "step": 24457 }, { "epoch": 0.8864484795766735, "grad_norm": 2.452923653583196, "learning_rate": 3.343415963178559e-07, "loss": 0.8597, "step": 24458 }, { "epoch": 0.8864847232793303, "grad_norm": 2.434972441726475, "learning_rate": 3.341306073034778e-07, "loss": 0.9438, "step": 24459 }, { "epoch": 0.8865209669819869, "grad_norm": 2.341669005332815, "learning_rate": 3.339196825819574e-07, "loss": 0.8064, "step": 24460 }, { "epoch": 0.8865572106846435, "grad_norm": 2.371949933712355, "learning_rate": 3.3370882215620124e-07, "loss": 0.7541, "step": 24461 }, { "epoch": 0.8865934543873002, "grad_norm": 2.6195729684405538, "learning_rate": 3.334980260291165e-07, "loss": 0.8929, "step": 24462 }, { "epoch": 0.8866296980899568, "grad_norm": 2.4715695821770836, "learning_rate": 3.332872942036064e-07, "loss": 0.8741, "step": 24463 }, { "epoch": 0.8866659417926135, "grad_norm": 2.4351863889073537, "learning_rate": 3.330766266825747e-07, "loss": 0.965, "step": 24464 }, { "epoch": 0.8867021854952702, "grad_norm": 2.0556455227111563, "learning_rate": 3.328660234689246e-07, "loss": 0.7127, "step": 24465 }, { "epoch": 0.8867384291979269, "grad_norm": 2.184406407055758, "learning_rate": 3.3265548456555886e-07, "loss": 0.8711, "step": 24466 }, { "epoch": 0.8867746729005835, "grad_norm": 2.5590005454132476, "learning_rate": 3.3244500997537675e-07, "loss": 0.9719, "step": 24467 }, { "epoch": 0.8868109166032402, "grad_norm": 2.963278293425505, "learning_rate": 3.322345997012799e-07, "loss": 0.9057, "step": 24468 }, { "epoch": 0.8868471603058968, "grad_norm": 2.4065988773147047, "learning_rate": 3.320242537461682e-07, "loss": 0.8899, "step": 24469 }, { "epoch": 0.8868834040085535, "grad_norm": 2.4474415964308367, "learning_rate": 3.318139721129382e-07, "loss": 0.9531, "step": 24470 }, { "epoch": 0.8869196477112101, "grad_norm": 2.2765070684519304, "learning_rate": 3.316037548044893e-07, "loss": 1.0779, "step": 24471 }, { "epoch": 0.8869558914138669, "grad_norm": 2.364074816407083, "learning_rate": 3.313936018237157e-07, "loss": 0.7627, "step": 24472 }, { "epoch": 0.8869921351165235, "grad_norm": 2.4106223375893205, "learning_rate": 3.3118351317351636e-07, "loss": 0.887, "step": 24473 }, { "epoch": 0.8870283788191802, "grad_norm": 2.3152965022371914, "learning_rate": 3.3097348885678394e-07, "loss": 0.8471, "step": 24474 }, { "epoch": 0.8870646225218368, "grad_norm": 2.296476709943956, "learning_rate": 3.3076352887641384e-07, "loss": 0.8019, "step": 24475 }, { "epoch": 0.8871008662244935, "grad_norm": 2.469197441313442, "learning_rate": 3.3055363323529654e-07, "loss": 0.9855, "step": 24476 }, { "epoch": 0.8871371099271501, "grad_norm": 2.377075755883377, "learning_rate": 3.303438019363281e-07, "loss": 0.9298, "step": 24477 }, { "epoch": 0.8871733536298069, "grad_norm": 2.7951676692755005, "learning_rate": 3.3013403498239726e-07, "loss": 0.8521, "step": 24478 }, { "epoch": 0.8872095973324635, "grad_norm": 2.7139203161230134, "learning_rate": 3.2992433237639563e-07, "loss": 1.0259, "step": 24479 }, { "epoch": 0.8872458410351202, "grad_norm": 2.532602699976679, "learning_rate": 3.297146941212115e-07, "loss": 0.8099, "step": 24480 }, { "epoch": 0.8872820847377768, "grad_norm": 2.3688609129110754, "learning_rate": 3.2950512021973466e-07, "loss": 0.9786, "step": 24481 }, { "epoch": 0.8873183284404335, "grad_norm": 2.7159550299431907, "learning_rate": 3.292956106748535e-07, "loss": 0.8723, "step": 24482 }, { "epoch": 0.8873545721430901, "grad_norm": 2.2375005155217518, "learning_rate": 3.290861654894528e-07, "loss": 0.9273, "step": 24483 }, { "epoch": 0.8873908158457467, "grad_norm": 2.4651765162173422, "learning_rate": 3.288767846664204e-07, "loss": 0.8482, "step": 24484 }, { "epoch": 0.8874270595484035, "grad_norm": 2.2369354753322277, "learning_rate": 3.28667468208641e-07, "loss": 0.919, "step": 24485 }, { "epoch": 0.8874633032510602, "grad_norm": 2.3727284386635366, "learning_rate": 3.284582161189992e-07, "loss": 0.709, "step": 24486 }, { "epoch": 0.8874995469537168, "grad_norm": 2.4460437405871738, "learning_rate": 3.28249028400377e-07, "loss": 0.8356, "step": 24487 }, { "epoch": 0.8875357906563734, "grad_norm": 2.4907579005266767, "learning_rate": 3.280399050556582e-07, "loss": 0.7503, "step": 24488 }, { "epoch": 0.8875720343590301, "grad_norm": 2.568373546250263, "learning_rate": 3.2783084608772444e-07, "loss": 0.8504, "step": 24489 }, { "epoch": 0.8876082780616867, "grad_norm": 2.2033106962927613, "learning_rate": 3.27621851499455e-07, "loss": 0.9399, "step": 24490 }, { "epoch": 0.8876445217643435, "grad_norm": 2.600906303965616, "learning_rate": 3.274129212937321e-07, "loss": 0.8986, "step": 24491 }, { "epoch": 0.8876807654670001, "grad_norm": 2.62001271761658, "learning_rate": 3.2720405547343124e-07, "loss": 0.8958, "step": 24492 }, { "epoch": 0.8877170091696568, "grad_norm": 2.6369242070550736, "learning_rate": 3.2699525404143393e-07, "loss": 0.8333, "step": 24493 }, { "epoch": 0.8877532528723134, "grad_norm": 2.2909891476852167, "learning_rate": 3.267865170006157e-07, "loss": 0.7843, "step": 24494 }, { "epoch": 0.8877894965749701, "grad_norm": 2.084211849311426, "learning_rate": 3.265778443538531e-07, "loss": 0.84, "step": 24495 }, { "epoch": 0.8878257402776267, "grad_norm": 2.3256738336057348, "learning_rate": 3.263692361040205e-07, "loss": 0.8604, "step": 24496 }, { "epoch": 0.8878619839802834, "grad_norm": 2.2987941509070997, "learning_rate": 3.261606922539945e-07, "loss": 1.0228, "step": 24497 }, { "epoch": 0.8878982276829401, "grad_norm": 2.2414268151167542, "learning_rate": 3.259522128066467e-07, "loss": 0.7787, "step": 24498 }, { "epoch": 0.8879344713855968, "grad_norm": 2.4853686505644577, "learning_rate": 3.2574379776485144e-07, "loss": 0.9243, "step": 24499 }, { "epoch": 0.8879707150882534, "grad_norm": 2.217286985376187, "learning_rate": 3.255354471314792e-07, "loss": 0.8595, "step": 24500 }, { "epoch": 0.8880069587909101, "grad_norm": 2.270186876135626, "learning_rate": 3.253271609094011e-07, "loss": 0.9219, "step": 24501 }, { "epoch": 0.8880432024935667, "grad_norm": 2.3753704911026765, "learning_rate": 3.2511893910148863e-07, "loss": 0.9141, "step": 24502 }, { "epoch": 0.8880794461962234, "grad_norm": 2.460541820873361, "learning_rate": 3.2491078171060843e-07, "loss": 0.8266, "step": 24503 }, { "epoch": 0.8881156898988801, "grad_norm": 2.259087484714233, "learning_rate": 3.247026887396315e-07, "loss": 0.988, "step": 24504 }, { "epoch": 0.8881519336015368, "grad_norm": 2.5026981346384694, "learning_rate": 3.244946601914234e-07, "loss": 0.8658, "step": 24505 }, { "epoch": 0.8881881773041934, "grad_norm": 2.0966239554616872, "learning_rate": 3.242866960688518e-07, "loss": 0.7619, "step": 24506 }, { "epoch": 0.8882244210068501, "grad_norm": 2.5125260087587407, "learning_rate": 3.24078796374781e-07, "loss": 0.7912, "step": 24507 }, { "epoch": 0.8882606647095067, "grad_norm": 2.319177259855851, "learning_rate": 3.238709611120766e-07, "loss": 0.8542, "step": 24508 }, { "epoch": 0.8882969084121634, "grad_norm": 2.4022932868432694, "learning_rate": 3.2366319028360347e-07, "loss": 0.8723, "step": 24509 }, { "epoch": 0.88833315211482, "grad_norm": 2.2504783243499666, "learning_rate": 3.2345548389222214e-07, "loss": 0.8435, "step": 24510 }, { "epoch": 0.8883693958174768, "grad_norm": 2.8079690321371307, "learning_rate": 3.2324784194079583e-07, "loss": 1.0081, "step": 24511 }, { "epoch": 0.8884056395201334, "grad_norm": 2.2899976936407116, "learning_rate": 3.230402644321862e-07, "loss": 0.869, "step": 24512 }, { "epoch": 0.88844188322279, "grad_norm": 2.1338716014443686, "learning_rate": 3.228327513692542e-07, "loss": 0.9494, "step": 24513 }, { "epoch": 0.8884781269254467, "grad_norm": 2.5065908369878325, "learning_rate": 3.2262530275485705e-07, "loss": 1.0416, "step": 24514 }, { "epoch": 0.8885143706281033, "grad_norm": 2.2997151834800587, "learning_rate": 3.224179185918541e-07, "loss": 0.9101, "step": 24515 }, { "epoch": 0.88855061433076, "grad_norm": 2.599034989643978, "learning_rate": 3.222105988831037e-07, "loss": 0.9401, "step": 24516 }, { "epoch": 0.8885868580334167, "grad_norm": 2.230607671461831, "learning_rate": 3.2200334363146343e-07, "loss": 0.9582, "step": 24517 }, { "epoch": 0.8886231017360734, "grad_norm": 2.759961373619038, "learning_rate": 3.217961528397867e-07, "loss": 0.7738, "step": 24518 }, { "epoch": 0.88865934543873, "grad_norm": 2.4474704371013947, "learning_rate": 3.215890265109295e-07, "loss": 0.8567, "step": 24519 }, { "epoch": 0.8886955891413867, "grad_norm": 2.511619524195063, "learning_rate": 3.213819646477473e-07, "loss": 0.8852, "step": 24520 }, { "epoch": 0.8887318328440433, "grad_norm": 2.3428833892141703, "learning_rate": 3.2117496725309114e-07, "loss": 0.9869, "step": 24521 }, { "epoch": 0.8887680765467, "grad_norm": 2.477227544503056, "learning_rate": 3.2096803432981495e-07, "loss": 0.7671, "step": 24522 }, { "epoch": 0.8888043202493566, "grad_norm": 2.8697132839280113, "learning_rate": 3.20761165880768e-07, "loss": 0.9956, "step": 24523 }, { "epoch": 0.8888405639520134, "grad_norm": 2.339892330432699, "learning_rate": 3.205543619088036e-07, "loss": 0.9955, "step": 24524 }, { "epoch": 0.88887680765467, "grad_norm": 2.422586002309515, "learning_rate": 3.203476224167695e-07, "loss": 0.9525, "step": 24525 }, { "epoch": 0.8889130513573267, "grad_norm": 2.5183181411215414, "learning_rate": 3.2014094740751623e-07, "loss": 0.8796, "step": 24526 }, { "epoch": 0.8889492950599833, "grad_norm": 2.4877175064933907, "learning_rate": 3.199343368838892e-07, "loss": 0.9759, "step": 24527 }, { "epoch": 0.88898553876264, "grad_norm": 2.7609320436824687, "learning_rate": 3.1972779084873617e-07, "loss": 1.0447, "step": 24528 }, { "epoch": 0.8890217824652966, "grad_norm": 2.4585033095842643, "learning_rate": 3.195213093049049e-07, "loss": 0.8256, "step": 24529 }, { "epoch": 0.8890580261679533, "grad_norm": 2.4127690768909917, "learning_rate": 3.193148922552386e-07, "loss": 0.9276, "step": 24530 }, { "epoch": 0.88909426987061, "grad_norm": 2.526666446644331, "learning_rate": 3.191085397025823e-07, "loss": 0.8255, "step": 24531 }, { "epoch": 0.8891305135732667, "grad_norm": 2.3503156666010097, "learning_rate": 3.189022516497792e-07, "loss": 0.9151, "step": 24532 }, { "epoch": 0.8891667572759233, "grad_norm": 2.2837430405045347, "learning_rate": 3.186960280996726e-07, "loss": 0.7703, "step": 24533 }, { "epoch": 0.88920300097858, "grad_norm": 2.185039609392947, "learning_rate": 3.1848986905510303e-07, "loss": 0.8085, "step": 24534 }, { "epoch": 0.8892392446812366, "grad_norm": 3.625641864871516, "learning_rate": 3.182837745189121e-07, "loss": 0.8429, "step": 24535 }, { "epoch": 0.8892754883838933, "grad_norm": 2.666609266394083, "learning_rate": 3.1807774449393924e-07, "loss": 0.947, "step": 24536 }, { "epoch": 0.88931173208655, "grad_norm": 2.5823263894718176, "learning_rate": 3.178717789830238e-07, "loss": 0.915, "step": 24537 }, { "epoch": 0.8893479757892067, "grad_norm": 2.520747093688601, "learning_rate": 3.176658779890035e-07, "loss": 1.007, "step": 24538 }, { "epoch": 0.8893842194918633, "grad_norm": 2.0620340717969907, "learning_rate": 3.17460041514715e-07, "loss": 0.8906, "step": 24539 }, { "epoch": 0.88942046319452, "grad_norm": 2.1754937515167567, "learning_rate": 3.1725426956299653e-07, "loss": 0.9748, "step": 24540 }, { "epoch": 0.8894567068971766, "grad_norm": 2.528841172382461, "learning_rate": 3.170485621366809e-07, "loss": 0.8351, "step": 24541 }, { "epoch": 0.8894929505998332, "grad_norm": 2.237361847729177, "learning_rate": 3.1684291923860523e-07, "loss": 0.8415, "step": 24542 }, { "epoch": 0.8895291943024899, "grad_norm": 2.17439187611625, "learning_rate": 3.166373408716e-07, "loss": 0.7908, "step": 24543 }, { "epoch": 0.8895654380051466, "grad_norm": 2.1840523397724274, "learning_rate": 3.164318270385014e-07, "loss": 0.918, "step": 24544 }, { "epoch": 0.8896016817078033, "grad_norm": 2.5934781197160532, "learning_rate": 3.162263777421387e-07, "loss": 0.9946, "step": 24545 }, { "epoch": 0.8896379254104599, "grad_norm": 2.3128264233145908, "learning_rate": 3.1602099298534474e-07, "loss": 1.0096, "step": 24546 }, { "epoch": 0.8896741691131166, "grad_norm": 2.2263258324841937, "learning_rate": 3.1581567277094827e-07, "loss": 0.8053, "step": 24547 }, { "epoch": 0.8897104128157732, "grad_norm": 2.357876867505873, "learning_rate": 3.1561041710177877e-07, "loss": 0.9469, "step": 24548 }, { "epoch": 0.8897466565184299, "grad_norm": 2.27321836767251, "learning_rate": 3.1540522598066503e-07, "loss": 0.8212, "step": 24549 }, { "epoch": 0.8897829002210866, "grad_norm": 2.483106100028234, "learning_rate": 3.1520009941043374e-07, "loss": 0.8707, "step": 24550 }, { "epoch": 0.8898191439237433, "grad_norm": 2.1700229965286595, "learning_rate": 3.1499503739391203e-07, "loss": 0.9909, "step": 24551 }, { "epoch": 0.8898553876263999, "grad_norm": 2.365783957388166, "learning_rate": 3.147900399339249e-07, "loss": 0.8171, "step": 24552 }, { "epoch": 0.8898916313290566, "grad_norm": 2.69262703240067, "learning_rate": 3.1458510703329835e-07, "loss": 0.9383, "step": 24553 }, { "epoch": 0.8899278750317132, "grad_norm": 2.3653996952973033, "learning_rate": 3.143802386948547e-07, "loss": 1.044, "step": 24554 }, { "epoch": 0.8899641187343699, "grad_norm": 2.7127610734410705, "learning_rate": 3.1417543492141767e-07, "loss": 0.9415, "step": 24555 }, { "epoch": 0.8900003624370265, "grad_norm": 1.9927793188254426, "learning_rate": 3.139706957158095e-07, "loss": 0.746, "step": 24556 }, { "epoch": 0.8900366061396833, "grad_norm": 2.2831582116859184, "learning_rate": 3.137660210808507e-07, "loss": 0.9519, "step": 24557 }, { "epoch": 0.8900728498423399, "grad_norm": 2.311657428530812, "learning_rate": 3.13561411019363e-07, "loss": 0.8758, "step": 24558 }, { "epoch": 0.8901090935449966, "grad_norm": 2.3122550963690265, "learning_rate": 3.133568655341629e-07, "loss": 0.8408, "step": 24559 }, { "epoch": 0.8901453372476532, "grad_norm": 2.6600212247762793, "learning_rate": 3.1315238462807264e-07, "loss": 0.8233, "step": 24560 }, { "epoch": 0.8901815809503099, "grad_norm": 2.3711541401683376, "learning_rate": 3.1294796830390716e-07, "loss": 0.8081, "step": 24561 }, { "epoch": 0.8902178246529665, "grad_norm": 2.1890090932073902, "learning_rate": 3.127436165644848e-07, "loss": 0.8216, "step": 24562 }, { "epoch": 0.8902540683556233, "grad_norm": 2.4976940855096648, "learning_rate": 3.125393294126189e-07, "loss": 0.9785, "step": 24563 }, { "epoch": 0.8902903120582799, "grad_norm": 2.633456340537214, "learning_rate": 3.1233510685112823e-07, "loss": 0.8608, "step": 24564 }, { "epoch": 0.8903265557609366, "grad_norm": 2.5225897878149826, "learning_rate": 3.1213094888282335e-07, "loss": 0.6912, "step": 24565 }, { "epoch": 0.8903627994635932, "grad_norm": 2.0894616969484288, "learning_rate": 3.119268555105198e-07, "loss": 0.8157, "step": 24566 }, { "epoch": 0.8903990431662498, "grad_norm": 2.3822010811175773, "learning_rate": 3.117228267370287e-07, "loss": 0.9102, "step": 24567 }, { "epoch": 0.8904352868689065, "grad_norm": 2.273141571484402, "learning_rate": 3.1151886256516106e-07, "loss": 0.904, "step": 24568 }, { "epoch": 0.8904715305715631, "grad_norm": 2.3708401064005096, "learning_rate": 3.113149629977286e-07, "loss": 0.8775, "step": 24569 }, { "epoch": 0.8905077742742199, "grad_norm": 2.183806521770575, "learning_rate": 3.111111280375401e-07, "loss": 0.939, "step": 24570 }, { "epoch": 0.8905440179768765, "grad_norm": 2.538864839438646, "learning_rate": 3.109073576874044e-07, "loss": 0.87, "step": 24571 }, { "epoch": 0.8905802616795332, "grad_norm": 2.4041628948364497, "learning_rate": 3.107036519501294e-07, "loss": 0.8599, "step": 24572 }, { "epoch": 0.8906165053821898, "grad_norm": 2.4953788728687183, "learning_rate": 3.1050001082852275e-07, "loss": 0.9437, "step": 24573 }, { "epoch": 0.8906527490848465, "grad_norm": 2.403919546982715, "learning_rate": 3.102964343253889e-07, "loss": 0.9284, "step": 24574 }, { "epoch": 0.8906889927875031, "grad_norm": 2.3765472743625717, "learning_rate": 3.1009292244353393e-07, "loss": 0.9327, "step": 24575 }, { "epoch": 0.8907252364901599, "grad_norm": 2.60987821005751, "learning_rate": 3.098894751857634e-07, "loss": 0.7647, "step": 24576 }, { "epoch": 0.8907614801928165, "grad_norm": 2.5257745923133896, "learning_rate": 3.0968609255487834e-07, "loss": 0.9908, "step": 24577 }, { "epoch": 0.8907977238954732, "grad_norm": 2.527109589971851, "learning_rate": 3.0948277455368325e-07, "loss": 0.7854, "step": 24578 }, { "epoch": 0.8908339675981298, "grad_norm": 2.3687148432982514, "learning_rate": 3.0927952118497694e-07, "loss": 0.8891, "step": 24579 }, { "epoch": 0.8908702113007865, "grad_norm": 2.116701415965187, "learning_rate": 3.090763324515633e-07, "loss": 0.6788, "step": 24580 }, { "epoch": 0.8909064550034431, "grad_norm": 2.6097599117384687, "learning_rate": 3.088732083562407e-07, "loss": 0.8472, "step": 24581 }, { "epoch": 0.8909426987060998, "grad_norm": 2.579796274761016, "learning_rate": 3.086701489018079e-07, "loss": 1.0404, "step": 24582 }, { "epoch": 0.8909789424087565, "grad_norm": 2.2821091403435334, "learning_rate": 3.084671540910633e-07, "loss": 1.0195, "step": 24583 }, { "epoch": 0.8910151861114132, "grad_norm": 2.260369096165752, "learning_rate": 3.0826422392680466e-07, "loss": 0.9241, "step": 24584 }, { "epoch": 0.8910514298140698, "grad_norm": 2.1879428882370218, "learning_rate": 3.080613584118269e-07, "loss": 0.9858, "step": 24585 }, { "epoch": 0.8910876735167265, "grad_norm": 2.415435059264821, "learning_rate": 3.078585575489257e-07, "loss": 0.8232, "step": 24586 }, { "epoch": 0.8911239172193831, "grad_norm": 2.5990080554593447, "learning_rate": 3.07655821340897e-07, "loss": 0.9999, "step": 24587 }, { "epoch": 0.8911601609220398, "grad_norm": 2.347974886821077, "learning_rate": 3.07453149790532e-07, "loss": 0.8124, "step": 24588 }, { "epoch": 0.8911964046246965, "grad_norm": 2.6196793052458798, "learning_rate": 3.0725054290062563e-07, "loss": 0.7742, "step": 24589 }, { "epoch": 0.8912326483273532, "grad_norm": 2.2591490465349837, "learning_rate": 3.070480006739673e-07, "loss": 0.8753, "step": 24590 }, { "epoch": 0.8912688920300098, "grad_norm": 2.4159105870761732, "learning_rate": 3.0684552311335104e-07, "loss": 0.7559, "step": 24591 }, { "epoch": 0.8913051357326665, "grad_norm": 2.5543414848231514, "learning_rate": 3.066431102215639e-07, "loss": 0.9714, "step": 24592 }, { "epoch": 0.8913413794353231, "grad_norm": 2.2726612032494966, "learning_rate": 3.0644076200139707e-07, "loss": 0.9548, "step": 24593 }, { "epoch": 0.8913776231379797, "grad_norm": 2.2593277306014863, "learning_rate": 3.062384784556377e-07, "loss": 0.9482, "step": 24594 }, { "epoch": 0.8914138668406364, "grad_norm": 2.1443313429578894, "learning_rate": 3.0603625958707307e-07, "loss": 0.6461, "step": 24595 }, { "epoch": 0.8914501105432932, "grad_norm": 2.4395154498390337, "learning_rate": 3.058341053984909e-07, "loss": 0.8379, "step": 24596 }, { "epoch": 0.8914863542459498, "grad_norm": 2.091735696576715, "learning_rate": 3.0563201589267454e-07, "loss": 0.7999, "step": 24597 }, { "epoch": 0.8915225979486064, "grad_norm": 2.5406469592209557, "learning_rate": 3.054299910724101e-07, "loss": 0.8744, "step": 24598 }, { "epoch": 0.8915588416512631, "grad_norm": 2.267825742233898, "learning_rate": 3.052280309404815e-07, "loss": 0.9099, "step": 24599 }, { "epoch": 0.8915950853539197, "grad_norm": 2.4824821938004353, "learning_rate": 3.0502613549967197e-07, "loss": 0.8684, "step": 24600 }, { "epoch": 0.8916313290565764, "grad_norm": 2.330089152302045, "learning_rate": 3.0482430475276215e-07, "loss": 0.8079, "step": 24601 }, { "epoch": 0.891667572759233, "grad_norm": 2.1997012341159525, "learning_rate": 3.0462253870253366e-07, "loss": 0.7508, "step": 24602 }, { "epoch": 0.8917038164618898, "grad_norm": 2.5139626204716703, "learning_rate": 3.044208373517671e-07, "loss": 0.9347, "step": 24603 }, { "epoch": 0.8917400601645464, "grad_norm": 2.7034927713472556, "learning_rate": 3.042192007032424e-07, "loss": 0.7989, "step": 24604 }, { "epoch": 0.8917763038672031, "grad_norm": 2.473434860919973, "learning_rate": 3.0401762875973627e-07, "loss": 1.0418, "step": 24605 }, { "epoch": 0.8918125475698597, "grad_norm": 2.322591496917116, "learning_rate": 3.038161215240276e-07, "loss": 0.8993, "step": 24606 }, { "epoch": 0.8918487912725164, "grad_norm": 2.3320757339094382, "learning_rate": 3.0361467899889307e-07, "loss": 0.8293, "step": 24607 }, { "epoch": 0.891885034975173, "grad_norm": 2.3432739859382767, "learning_rate": 3.034133011871071e-07, "loss": 1.0524, "step": 24608 }, { "epoch": 0.8919212786778298, "grad_norm": 2.576680408986829, "learning_rate": 3.0321198809144635e-07, "loss": 0.9583, "step": 24609 }, { "epoch": 0.8919575223804864, "grad_norm": 2.6795486313685455, "learning_rate": 3.03010739714682e-07, "loss": 0.9436, "step": 24610 }, { "epoch": 0.8919937660831431, "grad_norm": 2.290575618513784, "learning_rate": 3.028095560595912e-07, "loss": 0.7353, "step": 24611 }, { "epoch": 0.8920300097857997, "grad_norm": 2.394245892432283, "learning_rate": 3.0260843712894285e-07, "loss": 1.0135, "step": 24612 }, { "epoch": 0.8920662534884564, "grad_norm": 2.2872886639500476, "learning_rate": 3.0240738292550975e-07, "loss": 0.7231, "step": 24613 }, { "epoch": 0.892102497191113, "grad_norm": 2.355389199550768, "learning_rate": 3.022063934520614e-07, "loss": 0.799, "step": 24614 }, { "epoch": 0.8921387408937697, "grad_norm": 2.4322210884384474, "learning_rate": 3.020054687113677e-07, "loss": 0.8899, "step": 24615 }, { "epoch": 0.8921749845964264, "grad_norm": 2.3150894437637772, "learning_rate": 3.0180460870619874e-07, "loss": 0.9761, "step": 24616 }, { "epoch": 0.8922112282990831, "grad_norm": 2.0624872075789424, "learning_rate": 3.0160381343931953e-07, "loss": 0.7315, "step": 24617 }, { "epoch": 0.8922474720017397, "grad_norm": 2.5562520586245245, "learning_rate": 3.014030829134984e-07, "loss": 0.7378, "step": 24618 }, { "epoch": 0.8922837157043964, "grad_norm": 2.4141150499836646, "learning_rate": 3.0120241713150144e-07, "loss": 0.8503, "step": 24619 }, { "epoch": 0.892319959407053, "grad_norm": 2.203318522404826, "learning_rate": 3.010018160960937e-07, "loss": 0.8197, "step": 24620 }, { "epoch": 0.8923562031097096, "grad_norm": 2.4256098946930233, "learning_rate": 3.008012798100385e-07, "loss": 1.0031, "step": 24621 }, { "epoch": 0.8923924468123664, "grad_norm": 2.3637326768574103, "learning_rate": 3.006008082760997e-07, "loss": 0.8124, "step": 24622 }, { "epoch": 0.892428690515023, "grad_norm": 2.2995572716152344, "learning_rate": 3.004004014970402e-07, "loss": 0.9447, "step": 24623 }, { "epoch": 0.8924649342176797, "grad_norm": 2.2809337644185206, "learning_rate": 3.00200059475621e-07, "loss": 0.8092, "step": 24624 }, { "epoch": 0.8925011779203363, "grad_norm": 2.0676579977289187, "learning_rate": 2.9999978221460225e-07, "loss": 0.7546, "step": 24625 }, { "epoch": 0.892537421622993, "grad_norm": 2.485304964552368, "learning_rate": 2.997995697167438e-07, "loss": 0.8838, "step": 24626 }, { "epoch": 0.8925736653256496, "grad_norm": 2.591381291514872, "learning_rate": 2.995994219848053e-07, "loss": 0.8491, "step": 24627 }, { "epoch": 0.8926099090283063, "grad_norm": 2.4604139618522636, "learning_rate": 2.9939933902154384e-07, "loss": 1.0017, "step": 24628 }, { "epoch": 0.892646152730963, "grad_norm": 2.443130369842841, "learning_rate": 2.991993208297167e-07, "loss": 0.9066, "step": 24629 }, { "epoch": 0.8926823964336197, "grad_norm": 2.180169650793223, "learning_rate": 2.9899936741207893e-07, "loss": 0.7965, "step": 24630 }, { "epoch": 0.8927186401362763, "grad_norm": 2.318997392644728, "learning_rate": 2.9879947877138826e-07, "loss": 0.8516, "step": 24631 }, { "epoch": 0.892754883838933, "grad_norm": 2.2597428328449056, "learning_rate": 2.98599654910397e-07, "loss": 0.7262, "step": 24632 }, { "epoch": 0.8927911275415896, "grad_norm": 2.340705548496293, "learning_rate": 2.983998958318596e-07, "loss": 0.8317, "step": 24633 }, { "epoch": 0.8928273712442463, "grad_norm": 2.544183492144309, "learning_rate": 2.9820020153852766e-07, "loss": 0.9764, "step": 24634 }, { "epoch": 0.892863614946903, "grad_norm": 2.7033083814911176, "learning_rate": 2.9800057203315303e-07, "loss": 0.9584, "step": 24635 }, { "epoch": 0.8928998586495597, "grad_norm": 2.182247850010856, "learning_rate": 2.978010073184878e-07, "loss": 0.9867, "step": 24636 }, { "epoch": 0.8929361023522163, "grad_norm": 2.2385559149595977, "learning_rate": 2.9760150739728045e-07, "loss": 0.8105, "step": 24637 }, { "epoch": 0.892972346054873, "grad_norm": 2.2295806509569256, "learning_rate": 2.9740207227228033e-07, "loss": 0.7906, "step": 24638 }, { "epoch": 0.8930085897575296, "grad_norm": 2.49607046845753, "learning_rate": 2.9720270194623537e-07, "loss": 0.7883, "step": 24639 }, { "epoch": 0.8930448334601863, "grad_norm": 2.331867806734157, "learning_rate": 2.970033964218938e-07, "loss": 1.0301, "step": 24640 }, { "epoch": 0.8930810771628429, "grad_norm": 2.333213722609629, "learning_rate": 2.9680415570200015e-07, "loss": 0.9019, "step": 24641 }, { "epoch": 0.8931173208654997, "grad_norm": 2.4155071493882243, "learning_rate": 2.9660497978930114e-07, "loss": 0.8809, "step": 24642 }, { "epoch": 0.8931535645681563, "grad_norm": 2.3074441621264117, "learning_rate": 2.9640586868654174e-07, "loss": 1.1311, "step": 24643 }, { "epoch": 0.893189808270813, "grad_norm": 2.45743057050105, "learning_rate": 2.9620682239646424e-07, "loss": 0.9216, "step": 24644 }, { "epoch": 0.8932260519734696, "grad_norm": 2.6765495927346223, "learning_rate": 2.960078409218126e-07, "loss": 0.9675, "step": 24645 }, { "epoch": 0.8932622956761263, "grad_norm": 2.0673398985581, "learning_rate": 2.9580892426532615e-07, "loss": 0.8211, "step": 24646 }, { "epoch": 0.8932985393787829, "grad_norm": 2.2150320398811774, "learning_rate": 2.956100724297495e-07, "loss": 0.8703, "step": 24647 }, { "epoch": 0.8933347830814397, "grad_norm": 2.4011730528320143, "learning_rate": 2.9541128541782036e-07, "loss": 0.9167, "step": 24648 }, { "epoch": 0.8933710267840963, "grad_norm": 2.440848911063144, "learning_rate": 2.9521256323227887e-07, "loss": 0.771, "step": 24649 }, { "epoch": 0.893407270486753, "grad_norm": 2.396240482792108, "learning_rate": 2.9501390587586163e-07, "loss": 0.8717, "step": 24650 }, { "epoch": 0.8934435141894096, "grad_norm": 2.412714087690211, "learning_rate": 2.9481531335130875e-07, "loss": 0.9196, "step": 24651 }, { "epoch": 0.8934797578920662, "grad_norm": 2.3294000151181353, "learning_rate": 2.9461678566135465e-07, "loss": 0.8828, "step": 24652 }, { "epoch": 0.8935160015947229, "grad_norm": 2.497501953578254, "learning_rate": 2.9441832280873606e-07, "loss": 0.69, "step": 24653 }, { "epoch": 0.8935522452973795, "grad_norm": 2.5945783375112854, "learning_rate": 2.9421992479618634e-07, "loss": 0.9833, "step": 24654 }, { "epoch": 0.8935884890000363, "grad_norm": 2.3247744092892977, "learning_rate": 2.9402159162643993e-07, "loss": 0.8997, "step": 24655 }, { "epoch": 0.8936247327026929, "grad_norm": 2.118801409228002, "learning_rate": 2.9382332330223084e-07, "loss": 0.7106, "step": 24656 }, { "epoch": 0.8936609764053496, "grad_norm": 2.1189904969571747, "learning_rate": 2.9362511982628963e-07, "loss": 0.7681, "step": 24657 }, { "epoch": 0.8936972201080062, "grad_norm": 2.182239984206613, "learning_rate": 2.9342698120134795e-07, "loss": 0.7589, "step": 24658 }, { "epoch": 0.8937334638106629, "grad_norm": 2.3067260059384167, "learning_rate": 2.9322890743013587e-07, "loss": 0.77, "step": 24659 }, { "epoch": 0.8937697075133195, "grad_norm": 2.8044162891396396, "learning_rate": 2.9303089851538344e-07, "loss": 0.9839, "step": 24660 }, { "epoch": 0.8938059512159762, "grad_norm": 2.1866492857466198, "learning_rate": 2.9283295445981795e-07, "loss": 0.6701, "step": 24661 }, { "epoch": 0.8938421949186329, "grad_norm": 2.37702559139791, "learning_rate": 2.9263507526616775e-07, "loss": 0.669, "step": 24662 }, { "epoch": 0.8938784386212896, "grad_norm": 2.157929410871631, "learning_rate": 2.9243726093715953e-07, "loss": 0.7398, "step": 24663 }, { "epoch": 0.8939146823239462, "grad_norm": 2.2506594160952957, "learning_rate": 2.922395114755183e-07, "loss": 0.6974, "step": 24664 }, { "epoch": 0.8939509260266029, "grad_norm": 2.31790362300047, "learning_rate": 2.9204182688396974e-07, "loss": 1.0622, "step": 24665 }, { "epoch": 0.8939871697292595, "grad_norm": 2.772386039483728, "learning_rate": 2.918442071652372e-07, "loss": 1.0027, "step": 24666 }, { "epoch": 0.8940234134319162, "grad_norm": 2.0218028257777396, "learning_rate": 2.9164665232204513e-07, "loss": 0.8683, "step": 24667 }, { "epoch": 0.8940596571345729, "grad_norm": 2.244642930810037, "learning_rate": 2.914491623571136e-07, "loss": 0.8116, "step": 24668 }, { "epoch": 0.8940959008372296, "grad_norm": 2.4056362998777643, "learning_rate": 2.912517372731649e-07, "loss": 0.7691, "step": 24669 }, { "epoch": 0.8941321445398862, "grad_norm": 2.214263267527937, "learning_rate": 2.9105437707292016e-07, "loss": 0.8219, "step": 24670 }, { "epoch": 0.8941683882425429, "grad_norm": 2.343542497536786, "learning_rate": 2.908570817590983e-07, "loss": 0.8423, "step": 24671 }, { "epoch": 0.8942046319451995, "grad_norm": 2.5255638598802728, "learning_rate": 2.906598513344172e-07, "loss": 1.0017, "step": 24672 }, { "epoch": 0.8942408756478561, "grad_norm": 2.7009602585344137, "learning_rate": 2.904626858015952e-07, "loss": 0.8404, "step": 24673 }, { "epoch": 0.8942771193505128, "grad_norm": 2.391287724218887, "learning_rate": 2.9026558516335014e-07, "loss": 0.8096, "step": 24674 }, { "epoch": 0.8943133630531696, "grad_norm": 2.3899686713958395, "learning_rate": 2.9006854942239646e-07, "loss": 0.8999, "step": 24675 }, { "epoch": 0.8943496067558262, "grad_norm": 2.383972275147429, "learning_rate": 2.898715785814499e-07, "loss": 0.8992, "step": 24676 }, { "epoch": 0.8943858504584828, "grad_norm": 2.424272260014848, "learning_rate": 2.8967467264322314e-07, "loss": 0.9782, "step": 24677 }, { "epoch": 0.8944220941611395, "grad_norm": 2.3820957247125487, "learning_rate": 2.8947783161043186e-07, "loss": 0.9064, "step": 24678 }, { "epoch": 0.8944583378637961, "grad_norm": 2.2245853477583974, "learning_rate": 2.8928105548578723e-07, "loss": 0.7601, "step": 24679 }, { "epoch": 0.8944945815664528, "grad_norm": 2.56983877331972, "learning_rate": 2.8908434427200094e-07, "loss": 0.8297, "step": 24680 }, { "epoch": 0.8945308252691095, "grad_norm": 2.3528369897252857, "learning_rate": 2.8888769797178253e-07, "loss": 0.9376, "step": 24681 }, { "epoch": 0.8945670689717662, "grad_norm": 2.3916228091616967, "learning_rate": 2.8869111658784256e-07, "loss": 0.7479, "step": 24682 }, { "epoch": 0.8946033126744228, "grad_norm": 2.460143617668962, "learning_rate": 2.8849460012289e-07, "loss": 0.8944, "step": 24683 }, { "epoch": 0.8946395563770795, "grad_norm": 2.2171724976149862, "learning_rate": 2.882981485796321e-07, "loss": 0.8329, "step": 24684 }, { "epoch": 0.8946758000797361, "grad_norm": 2.3147839849197624, "learning_rate": 2.881017619607762e-07, "loss": 0.9499, "step": 24685 }, { "epoch": 0.8947120437823928, "grad_norm": 2.598022455031319, "learning_rate": 2.8790544026902843e-07, "loss": 0.8484, "step": 24686 }, { "epoch": 0.8947482874850494, "grad_norm": 3.182680554409402, "learning_rate": 2.877091835070944e-07, "loss": 0.9224, "step": 24687 }, { "epoch": 0.8947845311877062, "grad_norm": 2.2083872621979848, "learning_rate": 2.8751299167767756e-07, "loss": 0.9384, "step": 24688 }, { "epoch": 0.8948207748903628, "grad_norm": 2.446959698061281, "learning_rate": 2.873168647834812e-07, "loss": 0.9455, "step": 24689 }, { "epoch": 0.8948570185930195, "grad_norm": 2.689646334564927, "learning_rate": 2.8712080282720877e-07, "loss": 0.8514, "step": 24690 }, { "epoch": 0.8948932622956761, "grad_norm": 2.452265055853761, "learning_rate": 2.86924805811562e-07, "loss": 0.7617, "step": 24691 }, { "epoch": 0.8949295059983328, "grad_norm": 2.551626785134973, "learning_rate": 2.8672887373923987e-07, "loss": 0.9296, "step": 24692 }, { "epoch": 0.8949657497009894, "grad_norm": 2.402816027734386, "learning_rate": 2.86533006612944e-07, "loss": 0.7444, "step": 24693 }, { "epoch": 0.8950019934036462, "grad_norm": 2.234136968869786, "learning_rate": 2.8633720443537295e-07, "loss": 0.9952, "step": 24694 }, { "epoch": 0.8950382371063028, "grad_norm": 2.0788973103202504, "learning_rate": 2.861414672092239e-07, "loss": 0.8326, "step": 24695 }, { "epoch": 0.8950744808089595, "grad_norm": 2.1260201446201528, "learning_rate": 2.859457949371952e-07, "loss": 0.8312, "step": 24696 }, { "epoch": 0.8951107245116161, "grad_norm": 2.443942411949211, "learning_rate": 2.857501876219815e-07, "loss": 0.9875, "step": 24697 }, { "epoch": 0.8951469682142728, "grad_norm": 2.338639820995433, "learning_rate": 2.8555464526628e-07, "loss": 0.9141, "step": 24698 }, { "epoch": 0.8951832119169294, "grad_norm": 2.3996554636952174, "learning_rate": 2.853591678727841e-07, "loss": 0.8905, "step": 24699 }, { "epoch": 0.895219455619586, "grad_norm": 2.809339903054502, "learning_rate": 2.8516375544418774e-07, "loss": 0.855, "step": 24700 }, { "epoch": 0.8952556993222428, "grad_norm": 2.3769001978273616, "learning_rate": 2.8496840798318273e-07, "loss": 0.8, "step": 24701 }, { "epoch": 0.8952919430248995, "grad_norm": 2.467597529850168, "learning_rate": 2.847731254924618e-07, "loss": 0.8787, "step": 24702 }, { "epoch": 0.8953281867275561, "grad_norm": 2.564361645508663, "learning_rate": 2.845779079747163e-07, "loss": 0.932, "step": 24703 }, { "epoch": 0.8953644304302127, "grad_norm": 2.5579772818914495, "learning_rate": 2.843827554326345e-07, "loss": 0.8399, "step": 24704 }, { "epoch": 0.8954006741328694, "grad_norm": 2.3959998193753687, "learning_rate": 2.8418766786890595e-07, "loss": 0.8354, "step": 24705 }, { "epoch": 0.895436917835526, "grad_norm": 2.392826853276726, "learning_rate": 2.8399264528622017e-07, "loss": 0.9616, "step": 24706 }, { "epoch": 0.8954731615381828, "grad_norm": 2.4752452197563963, "learning_rate": 2.837976876872639e-07, "loss": 0.9562, "step": 24707 }, { "epoch": 0.8955094052408394, "grad_norm": 2.1628414501678135, "learning_rate": 2.8360279507472223e-07, "loss": 0.7939, "step": 24708 }, { "epoch": 0.8955456489434961, "grad_norm": 2.4455962460893708, "learning_rate": 2.8340796745128243e-07, "loss": 0.9733, "step": 24709 }, { "epoch": 0.8955818926461527, "grad_norm": 2.1996008496888844, "learning_rate": 2.8321320481962734e-07, "loss": 1.0757, "step": 24710 }, { "epoch": 0.8956181363488094, "grad_norm": 2.204689283341077, "learning_rate": 2.8301850718244315e-07, "loss": 0.8363, "step": 24711 }, { "epoch": 0.895654380051466, "grad_norm": 2.346868264654005, "learning_rate": 2.8282387454241e-07, "loss": 0.8119, "step": 24712 }, { "epoch": 0.8956906237541227, "grad_norm": 2.398960802230034, "learning_rate": 2.826293069022112e-07, "loss": 0.8927, "step": 24713 }, { "epoch": 0.8957268674567794, "grad_norm": 2.1711348603805196, "learning_rate": 2.8243480426452863e-07, "loss": 0.8251, "step": 24714 }, { "epoch": 0.8957631111594361, "grad_norm": 2.544661674468475, "learning_rate": 2.8224036663204e-07, "loss": 0.9273, "step": 24715 }, { "epoch": 0.8957993548620927, "grad_norm": 2.2181464370117188, "learning_rate": 2.8204599400742714e-07, "loss": 1.0141, "step": 24716 }, { "epoch": 0.8958355985647494, "grad_norm": 2.3878849492853518, "learning_rate": 2.818516863933657e-07, "loss": 0.9636, "step": 24717 }, { "epoch": 0.895871842267406, "grad_norm": 2.652428899971871, "learning_rate": 2.816574437925357e-07, "loss": 0.9636, "step": 24718 }, { "epoch": 0.8959080859700627, "grad_norm": 2.3926277770715028, "learning_rate": 2.814632662076122e-07, "loss": 0.7634, "step": 24719 }, { "epoch": 0.8959443296727194, "grad_norm": 2.1377098901484795, "learning_rate": 2.812691536412715e-07, "loss": 0.9963, "step": 24720 }, { "epoch": 0.8959805733753761, "grad_norm": 2.554311925246499, "learning_rate": 2.810751060961875e-07, "loss": 0.8821, "step": 24721 }, { "epoch": 0.8960168170780327, "grad_norm": 2.066152115477931, "learning_rate": 2.8088112357503474e-07, "loss": 0.8788, "step": 24722 }, { "epoch": 0.8960530607806894, "grad_norm": 2.326488456329443, "learning_rate": 2.8068720608048717e-07, "loss": 0.9717, "step": 24723 }, { "epoch": 0.896089304483346, "grad_norm": 2.440992982180119, "learning_rate": 2.8049335361521436e-07, "loss": 0.8802, "step": 24724 }, { "epoch": 0.8961255481860027, "grad_norm": 2.430066266343459, "learning_rate": 2.8029956618188916e-07, "loss": 1.0429, "step": 24725 }, { "epoch": 0.8961617918886593, "grad_norm": 2.55411476791058, "learning_rate": 2.8010584378318164e-07, "loss": 0.7849, "step": 24726 }, { "epoch": 0.8961980355913161, "grad_norm": 2.586154186073609, "learning_rate": 2.7991218642176133e-07, "loss": 0.9533, "step": 24727 }, { "epoch": 0.8962342792939727, "grad_norm": 2.2711256353866354, "learning_rate": 2.797185941002961e-07, "loss": 0.9566, "step": 24728 }, { "epoch": 0.8962705229966293, "grad_norm": 2.4209837296277517, "learning_rate": 2.795250668214544e-07, "loss": 0.9029, "step": 24729 }, { "epoch": 0.896306766699286, "grad_norm": 2.4890185618458296, "learning_rate": 2.7933160458790243e-07, "loss": 0.8783, "step": 24730 }, { "epoch": 0.8963430104019426, "grad_norm": 2.3438915931814264, "learning_rate": 2.7913820740230524e-07, "loss": 0.7991, "step": 24731 }, { "epoch": 0.8963792541045993, "grad_norm": 2.5110342091569753, "learning_rate": 2.7894487526732903e-07, "loss": 0.7995, "step": 24732 }, { "epoch": 0.8964154978072559, "grad_norm": 2.6218777045875608, "learning_rate": 2.7875160818563673e-07, "loss": 0.99, "step": 24733 }, { "epoch": 0.8964517415099127, "grad_norm": 2.4108274068077433, "learning_rate": 2.7855840615989284e-07, "loss": 0.9235, "step": 24734 }, { "epoch": 0.8964879852125693, "grad_norm": 2.500113344232519, "learning_rate": 2.78365269192758e-07, "loss": 0.8716, "step": 24735 }, { "epoch": 0.896524228915226, "grad_norm": 2.160308749781817, "learning_rate": 2.78172197286895e-07, "loss": 0.8236, "step": 24736 }, { "epoch": 0.8965604726178826, "grad_norm": 2.543909972807719, "learning_rate": 2.779791904449619e-07, "loss": 0.8635, "step": 24737 }, { "epoch": 0.8965967163205393, "grad_norm": 2.3942273693380014, "learning_rate": 2.7778624866962147e-07, "loss": 0.7311, "step": 24738 }, { "epoch": 0.8966329600231959, "grad_norm": 2.1365929311298952, "learning_rate": 2.775933719635293e-07, "loss": 0.7704, "step": 24739 }, { "epoch": 0.8966692037258527, "grad_norm": 2.0857838499266133, "learning_rate": 2.774005603293456e-07, "loss": 0.6695, "step": 24740 }, { "epoch": 0.8967054474285093, "grad_norm": 2.270927471315268, "learning_rate": 2.772078137697254e-07, "loss": 0.7931, "step": 24741 }, { "epoch": 0.896741691131166, "grad_norm": 2.4348823305349567, "learning_rate": 2.770151322873249e-07, "loss": 0.8879, "step": 24742 }, { "epoch": 0.8967779348338226, "grad_norm": 2.7000737183525483, "learning_rate": 2.768225158848009e-07, "loss": 0.8176, "step": 24743 }, { "epoch": 0.8968141785364793, "grad_norm": 2.369192892848042, "learning_rate": 2.7662996456480404e-07, "loss": 0.9193, "step": 24744 }, { "epoch": 0.8968504222391359, "grad_norm": 2.151067301226762, "learning_rate": 2.7643747832999113e-07, "loss": 0.8976, "step": 24745 }, { "epoch": 0.8968866659417926, "grad_norm": 2.261295439609407, "learning_rate": 2.7624505718301276e-07, "loss": 0.945, "step": 24746 }, { "epoch": 0.8969229096444493, "grad_norm": 2.482987274647331, "learning_rate": 2.7605270112652125e-07, "loss": 0.9341, "step": 24747 }, { "epoch": 0.896959153347106, "grad_norm": 2.413538885767273, "learning_rate": 2.758604101631657e-07, "loss": 0.7893, "step": 24748 }, { "epoch": 0.8969953970497626, "grad_norm": 2.4328299144287207, "learning_rate": 2.756681842955966e-07, "loss": 0.9022, "step": 24749 }, { "epoch": 0.8970316407524193, "grad_norm": 2.4942619820758765, "learning_rate": 2.7547602352646364e-07, "loss": 0.968, "step": 24750 }, { "epoch": 0.8970678844550759, "grad_norm": 2.311936559791312, "learning_rate": 2.7528392785841296e-07, "loss": 0.8107, "step": 24751 }, { "epoch": 0.8971041281577325, "grad_norm": 2.3020952508364076, "learning_rate": 2.750918972940925e-07, "loss": 0.7805, "step": 24752 }, { "epoch": 0.8971403718603893, "grad_norm": 2.482030116252625, "learning_rate": 2.748999318361478e-07, "loss": 0.7851, "step": 24753 }, { "epoch": 0.897176615563046, "grad_norm": 2.0030337537466463, "learning_rate": 2.7470803148722527e-07, "loss": 0.9176, "step": 24754 }, { "epoch": 0.8972128592657026, "grad_norm": 1.9885213938510335, "learning_rate": 2.745161962499676e-07, "loss": 0.6248, "step": 24755 }, { "epoch": 0.8972491029683592, "grad_norm": 2.2379232941750775, "learning_rate": 2.7432442612701894e-07, "loss": 0.7441, "step": 24756 }, { "epoch": 0.8972853466710159, "grad_norm": 2.399016970418058, "learning_rate": 2.7413272112102153e-07, "loss": 0.8191, "step": 24757 }, { "epoch": 0.8973215903736725, "grad_norm": 2.322496672440333, "learning_rate": 2.7394108123461773e-07, "loss": 0.8001, "step": 24758 }, { "epoch": 0.8973578340763292, "grad_norm": 2.3318001871270524, "learning_rate": 2.7374950647044706e-07, "loss": 0.8522, "step": 24759 }, { "epoch": 0.8973940777789859, "grad_norm": 2.3119124276136374, "learning_rate": 2.7355799683115024e-07, "loss": 0.7813, "step": 24760 }, { "epoch": 0.8974303214816426, "grad_norm": 2.764110113916596, "learning_rate": 2.7336655231936573e-07, "loss": 0.8214, "step": 24761 }, { "epoch": 0.8974665651842992, "grad_norm": 2.585219852805403, "learning_rate": 2.731751729377313e-07, "loss": 1.0001, "step": 24762 }, { "epoch": 0.8975028088869559, "grad_norm": 2.326091138600415, "learning_rate": 2.72983858688885e-07, "loss": 0.7706, "step": 24763 }, { "epoch": 0.8975390525896125, "grad_norm": 2.503190725053823, "learning_rate": 2.727926095754607e-07, "loss": 0.8143, "step": 24764 }, { "epoch": 0.8975752962922692, "grad_norm": 2.952369593495888, "learning_rate": 2.7260142560009693e-07, "loss": 0.995, "step": 24765 }, { "epoch": 0.8976115399949259, "grad_norm": 2.337224796198894, "learning_rate": 2.7241030676542544e-07, "loss": 0.8609, "step": 24766 }, { "epoch": 0.8976477836975826, "grad_norm": 2.421523760745615, "learning_rate": 2.722192530740819e-07, "loss": 0.8034, "step": 24767 }, { "epoch": 0.8976840274002392, "grad_norm": 2.402621765408449, "learning_rate": 2.7202826452869704e-07, "loss": 0.7973, "step": 24768 }, { "epoch": 0.8977202711028959, "grad_norm": 2.240123187782529, "learning_rate": 2.7183734113190306e-07, "loss": 0.7904, "step": 24769 }, { "epoch": 0.8977565148055525, "grad_norm": 2.255068720866105, "learning_rate": 2.716464828863319e-07, "loss": 0.9034, "step": 24770 }, { "epoch": 0.8977927585082092, "grad_norm": 2.420706400474992, "learning_rate": 2.714556897946119e-07, "loss": 0.8158, "step": 24771 }, { "epoch": 0.8978290022108658, "grad_norm": 1.9866385711038361, "learning_rate": 2.712649618593727e-07, "loss": 0.8577, "step": 24772 }, { "epoch": 0.8978652459135226, "grad_norm": 2.534110967491118, "learning_rate": 2.7107429908324214e-07, "loss": 0.8312, "step": 24773 }, { "epoch": 0.8979014896161792, "grad_norm": 2.669208142111974, "learning_rate": 2.7088370146884923e-07, "loss": 0.975, "step": 24774 }, { "epoch": 0.8979377333188359, "grad_norm": 2.494311932159597, "learning_rate": 2.706931690188175e-07, "loss": 0.9562, "step": 24775 }, { "epoch": 0.8979739770214925, "grad_norm": 2.2829933221908, "learning_rate": 2.7050270173577363e-07, "loss": 0.973, "step": 24776 }, { "epoch": 0.8980102207241492, "grad_norm": 2.34037165898542, "learning_rate": 2.7031229962234286e-07, "loss": 0.7991, "step": 24777 }, { "epoch": 0.8980464644268058, "grad_norm": 2.335708019757727, "learning_rate": 2.7012196268114854e-07, "loss": 0.7377, "step": 24778 }, { "epoch": 0.8980827081294626, "grad_norm": 2.639035645697679, "learning_rate": 2.6993169091481197e-07, "loss": 1.0783, "step": 24779 }, { "epoch": 0.8981189518321192, "grad_norm": 2.443713642911871, "learning_rate": 2.697414843259566e-07, "loss": 0.9038, "step": 24780 }, { "epoch": 0.8981551955347759, "grad_norm": 2.338593168884319, "learning_rate": 2.695513429172031e-07, "loss": 0.7863, "step": 24781 }, { "epoch": 0.8981914392374325, "grad_norm": 2.61120870684815, "learning_rate": 2.69361266691171e-07, "loss": 1.0564, "step": 24782 }, { "epoch": 0.8982276829400891, "grad_norm": 2.291727977399183, "learning_rate": 2.6917125565048e-07, "loss": 0.9275, "step": 24783 }, { "epoch": 0.8982639266427458, "grad_norm": 2.3051072360415685, "learning_rate": 2.6898130979774675e-07, "loss": 0.8846, "step": 24784 }, { "epoch": 0.8983001703454024, "grad_norm": 2.3052301831497957, "learning_rate": 2.6879142913559087e-07, "loss": 0.7632, "step": 24785 }, { "epoch": 0.8983364140480592, "grad_norm": 2.2410870011671262, "learning_rate": 2.686016136666275e-07, "loss": 0.8893, "step": 24786 }, { "epoch": 0.8983726577507158, "grad_norm": 2.2030268400997857, "learning_rate": 2.684118633934729e-07, "loss": 0.8517, "step": 24787 }, { "epoch": 0.8984089014533725, "grad_norm": 2.5916466332140553, "learning_rate": 2.682221783187405e-07, "loss": 0.9313, "step": 24788 }, { "epoch": 0.8984451451560291, "grad_norm": 2.398374412649284, "learning_rate": 2.6803255844504494e-07, "loss": 0.751, "step": 24789 }, { "epoch": 0.8984813888586858, "grad_norm": 2.355776346718567, "learning_rate": 2.678430037749996e-07, "loss": 0.852, "step": 24790 }, { "epoch": 0.8985176325613424, "grad_norm": 2.381130598885751, "learning_rate": 2.6765351431121465e-07, "loss": 0.9972, "step": 24791 }, { "epoch": 0.8985538762639992, "grad_norm": 2.5044497710402998, "learning_rate": 2.67464090056303e-07, "loss": 0.8084, "step": 24792 }, { "epoch": 0.8985901199666558, "grad_norm": 2.6782232663406997, "learning_rate": 2.6727473101287373e-07, "loss": 0.6666, "step": 24793 }, { "epoch": 0.8986263636693125, "grad_norm": 2.607312156090845, "learning_rate": 2.6708543718353686e-07, "loss": 0.7586, "step": 24794 }, { "epoch": 0.8986626073719691, "grad_norm": 2.288537875803733, "learning_rate": 2.6689620857089983e-07, "loss": 0.9138, "step": 24795 }, { "epoch": 0.8986988510746258, "grad_norm": 2.426077183185658, "learning_rate": 2.667070451775705e-07, "loss": 0.9286, "step": 24796 }, { "epoch": 0.8987350947772824, "grad_norm": 1.9525470233016033, "learning_rate": 2.665179470061552e-07, "loss": 0.7923, "step": 24797 }, { "epoch": 0.8987713384799391, "grad_norm": 2.2039103978454686, "learning_rate": 2.6632891405926066e-07, "loss": 0.906, "step": 24798 }, { "epoch": 0.8988075821825958, "grad_norm": 2.3712031034919065, "learning_rate": 2.6613994633949037e-07, "loss": 0.8277, "step": 24799 }, { "epoch": 0.8988438258852525, "grad_norm": 2.1911194978058575, "learning_rate": 2.6595104384944894e-07, "loss": 0.6325, "step": 24800 }, { "epoch": 0.8988800695879091, "grad_norm": 2.6388106571306458, "learning_rate": 2.6576220659173926e-07, "loss": 0.9001, "step": 24801 }, { "epoch": 0.8989163132905658, "grad_norm": 2.256417760427517, "learning_rate": 2.6557343456896254e-07, "loss": 0.9728, "step": 24802 }, { "epoch": 0.8989525569932224, "grad_norm": 2.212496909959014, "learning_rate": 2.653847277837218e-07, "loss": 0.8974, "step": 24803 }, { "epoch": 0.898988800695879, "grad_norm": 2.3213853534247493, "learning_rate": 2.651960862386144e-07, "loss": 0.8299, "step": 24804 }, { "epoch": 0.8990250443985357, "grad_norm": 2.1294079592531605, "learning_rate": 2.650075099362426e-07, "loss": 0.7162, "step": 24805 }, { "epoch": 0.8990612881011925, "grad_norm": 2.427757126731905, "learning_rate": 2.6481899887920324e-07, "loss": 0.8863, "step": 24806 }, { "epoch": 0.8990975318038491, "grad_norm": 2.335703248713777, "learning_rate": 2.646305530700949e-07, "loss": 1.0727, "step": 24807 }, { "epoch": 0.8991337755065058, "grad_norm": 2.5333576159499858, "learning_rate": 2.644421725115132e-07, "loss": 0.827, "step": 24808 }, { "epoch": 0.8991700192091624, "grad_norm": 2.3815019817295764, "learning_rate": 2.642538572060538e-07, "loss": 0.9047, "step": 24809 }, { "epoch": 0.899206262911819, "grad_norm": 2.6195851217257125, "learning_rate": 2.640656071563136e-07, "loss": 0.8159, "step": 24810 }, { "epoch": 0.8992425066144757, "grad_norm": 2.2846035294366716, "learning_rate": 2.6387742236488376e-07, "loss": 0.8225, "step": 24811 }, { "epoch": 0.8992787503171324, "grad_norm": 2.4774053141972967, "learning_rate": 2.63689302834359e-07, "loss": 0.9482, "step": 24812 }, { "epoch": 0.8993149940197891, "grad_norm": 2.243210046584178, "learning_rate": 2.6350124856733163e-07, "loss": 0.8053, "step": 24813 }, { "epoch": 0.8993512377224457, "grad_norm": 2.4322659560211024, "learning_rate": 2.633132595663929e-07, "loss": 1.0164, "step": 24814 }, { "epoch": 0.8993874814251024, "grad_norm": 2.3145872145813766, "learning_rate": 2.631253358341318e-07, "loss": 0.8903, "step": 24815 }, { "epoch": 0.899423725127759, "grad_norm": 2.220571307475183, "learning_rate": 2.6293747737313914e-07, "loss": 0.7934, "step": 24816 }, { "epoch": 0.8994599688304157, "grad_norm": 2.362065881544755, "learning_rate": 2.627496841860039e-07, "loss": 0.8743, "step": 24817 }, { "epoch": 0.8994962125330723, "grad_norm": 2.6055345127822354, "learning_rate": 2.6256195627531233e-07, "loss": 0.8821, "step": 24818 }, { "epoch": 0.8995324562357291, "grad_norm": 2.2574617949155886, "learning_rate": 2.623742936436513e-07, "loss": 0.795, "step": 24819 }, { "epoch": 0.8995686999383857, "grad_norm": 2.660823513131203, "learning_rate": 2.6218669629360817e-07, "loss": 0.8539, "step": 24820 }, { "epoch": 0.8996049436410424, "grad_norm": 2.133616032299058, "learning_rate": 2.619991642277669e-07, "loss": 0.8403, "step": 24821 }, { "epoch": 0.899641187343699, "grad_norm": 2.258112947302299, "learning_rate": 2.6181169744871173e-07, "loss": 0.7809, "step": 24822 }, { "epoch": 0.8996774310463557, "grad_norm": 2.616912078143264, "learning_rate": 2.6162429595902595e-07, "loss": 1.0908, "step": 24823 }, { "epoch": 0.8997136747490123, "grad_norm": 2.399291577650903, "learning_rate": 2.6143695976129037e-07, "loss": 0.9446, "step": 24824 }, { "epoch": 0.8997499184516691, "grad_norm": 2.612637280767717, "learning_rate": 2.6124968885808955e-07, "loss": 0.8827, "step": 24825 }, { "epoch": 0.8997861621543257, "grad_norm": 2.443271214800248, "learning_rate": 2.610624832520009e-07, "loss": 0.8848, "step": 24826 }, { "epoch": 0.8998224058569824, "grad_norm": 2.772128028685843, "learning_rate": 2.608753429456057e-07, "loss": 0.8993, "step": 24827 }, { "epoch": 0.899858649559639, "grad_norm": 2.209928154112007, "learning_rate": 2.6068826794148236e-07, "loss": 0.5649, "step": 24828 }, { "epoch": 0.8998948932622957, "grad_norm": 2.186549423258562, "learning_rate": 2.6050125824220785e-07, "loss": 0.7357, "step": 24829 }, { "epoch": 0.8999311369649523, "grad_norm": 2.5636868483529462, "learning_rate": 2.603143138503605e-07, "loss": 0.9367, "step": 24830 }, { "epoch": 0.899967380667609, "grad_norm": 2.13231590141497, "learning_rate": 2.60127434768514e-07, "loss": 0.7955, "step": 24831 }, { "epoch": 0.9000036243702657, "grad_norm": 2.408749269782869, "learning_rate": 2.599406209992461e-07, "loss": 0.8492, "step": 24832 }, { "epoch": 0.9000398680729224, "grad_norm": 2.281157661249916, "learning_rate": 2.597538725451293e-07, "loss": 0.9167, "step": 24833 }, { "epoch": 0.900076111775579, "grad_norm": 2.199883728379579, "learning_rate": 2.5956718940873814e-07, "loss": 0.6459, "step": 24834 }, { "epoch": 0.9001123554782356, "grad_norm": 1.9893618427553974, "learning_rate": 2.5938057159264284e-07, "loss": 0.8, "step": 24835 }, { "epoch": 0.9001485991808923, "grad_norm": 2.587442934929091, "learning_rate": 2.591940190994169e-07, "loss": 0.7626, "step": 24836 }, { "epoch": 0.9001848428835489, "grad_norm": 2.374247477436264, "learning_rate": 2.590075319316304e-07, "loss": 0.8253, "step": 24837 }, { "epoch": 0.9002210865862057, "grad_norm": 2.381162620707409, "learning_rate": 2.588211100918525e-07, "loss": 0.9514, "step": 24838 }, { "epoch": 0.9002573302888623, "grad_norm": 2.3534243083790733, "learning_rate": 2.586347535826522e-07, "loss": 0.8978, "step": 24839 }, { "epoch": 0.900293573991519, "grad_norm": 2.3350789151162754, "learning_rate": 2.584484624065975e-07, "loss": 0.7363, "step": 24840 }, { "epoch": 0.9003298176941756, "grad_norm": 2.226894991493185, "learning_rate": 2.5826223656625624e-07, "loss": 0.8109, "step": 24841 }, { "epoch": 0.9003660613968323, "grad_norm": 2.4396730505446, "learning_rate": 2.580760760641932e-07, "loss": 1.006, "step": 24842 }, { "epoch": 0.9004023050994889, "grad_norm": 2.4131503929279634, "learning_rate": 2.5788998090297335e-07, "loss": 0.9088, "step": 24843 }, { "epoch": 0.9004385488021456, "grad_norm": 2.603647752747632, "learning_rate": 2.57703951085162e-07, "loss": 0.909, "step": 24844 }, { "epoch": 0.9004747925048023, "grad_norm": 2.5172979389951835, "learning_rate": 2.575179866133226e-07, "loss": 0.8871, "step": 24845 }, { "epoch": 0.900511036207459, "grad_norm": 2.514466557894271, "learning_rate": 2.57332087490017e-07, "loss": 0.8691, "step": 24846 }, { "epoch": 0.9005472799101156, "grad_norm": 2.199257651132232, "learning_rate": 2.5714625371780654e-07, "loss": 0.8066, "step": 24847 }, { "epoch": 0.9005835236127723, "grad_norm": 2.337316731025322, "learning_rate": 2.56960485299253e-07, "loss": 1.0836, "step": 24848 }, { "epoch": 0.9006197673154289, "grad_norm": 2.393668605279257, "learning_rate": 2.567747822369149e-07, "loss": 1.0952, "step": 24849 }, { "epoch": 0.9006560110180856, "grad_norm": 2.1697350921183296, "learning_rate": 2.5658914453335236e-07, "loss": 0.8406, "step": 24850 }, { "epoch": 0.9006922547207423, "grad_norm": 2.424746399279728, "learning_rate": 2.564035721911218e-07, "loss": 0.8947, "step": 24851 }, { "epoch": 0.900728498423399, "grad_norm": 2.6372778909501062, "learning_rate": 2.562180652127816e-07, "loss": 0.7905, "step": 24852 }, { "epoch": 0.9007647421260556, "grad_norm": 2.000650593909623, "learning_rate": 2.5603262360088754e-07, "loss": 0.7868, "step": 24853 }, { "epoch": 0.9008009858287123, "grad_norm": 2.4011673130330973, "learning_rate": 2.558472473579954e-07, "loss": 0.762, "step": 24854 }, { "epoch": 0.9008372295313689, "grad_norm": 2.445682690250952, "learning_rate": 2.5566193648665805e-07, "loss": 0.7846, "step": 24855 }, { "epoch": 0.9008734732340256, "grad_norm": 2.585501272606099, "learning_rate": 2.5547669098943016e-07, "loss": 0.9666, "step": 24856 }, { "epoch": 0.9009097169366822, "grad_norm": 2.4938601210386206, "learning_rate": 2.552915108688647e-07, "loss": 0.9732, "step": 24857 }, { "epoch": 0.900945960639339, "grad_norm": 2.097032158963063, "learning_rate": 2.551063961275124e-07, "loss": 0.7701, "step": 24858 }, { "epoch": 0.9009822043419956, "grad_norm": 2.3385908445257497, "learning_rate": 2.54921346767924e-07, "loss": 0.8343, "step": 24859 }, { "epoch": 0.9010184480446523, "grad_norm": 2.3354387170984325, "learning_rate": 2.547363627926497e-07, "loss": 0.9432, "step": 24860 }, { "epoch": 0.9010546917473089, "grad_norm": 2.156736606730196, "learning_rate": 2.5455144420423903e-07, "loss": 0.9778, "step": 24861 }, { "epoch": 0.9010909354499655, "grad_norm": 2.3992982142741623, "learning_rate": 2.54366591005239e-07, "loss": 0.7363, "step": 24862 }, { "epoch": 0.9011271791526222, "grad_norm": 2.61666882603337, "learning_rate": 2.541818031981974e-07, "loss": 0.9316, "step": 24863 }, { "epoch": 0.901163422855279, "grad_norm": 2.416276449790418, "learning_rate": 2.539970807856601e-07, "loss": 0.9157, "step": 24864 }, { "epoch": 0.9011996665579356, "grad_norm": 2.259468620248925, "learning_rate": 2.5381242377017336e-07, "loss": 0.9804, "step": 24865 }, { "epoch": 0.9012359102605922, "grad_norm": 2.270406130789257, "learning_rate": 2.5362783215428064e-07, "loss": 0.7947, "step": 24866 }, { "epoch": 0.9012721539632489, "grad_norm": 2.7171946634544306, "learning_rate": 2.534433059405256e-07, "loss": 0.9626, "step": 24867 }, { "epoch": 0.9013083976659055, "grad_norm": 2.5596852644439703, "learning_rate": 2.532588451314522e-07, "loss": 0.9728, "step": 24868 }, { "epoch": 0.9013446413685622, "grad_norm": 2.236202986226665, "learning_rate": 2.5307444972960006e-07, "loss": 0.598, "step": 24869 }, { "epoch": 0.9013808850712188, "grad_norm": 2.438787202150843, "learning_rate": 2.528901197375122e-07, "loss": 0.9622, "step": 24870 }, { "epoch": 0.9014171287738756, "grad_norm": 2.3995054023232334, "learning_rate": 2.5270585515772605e-07, "loss": 0.9374, "step": 24871 }, { "epoch": 0.9014533724765322, "grad_norm": 2.530475443661444, "learning_rate": 2.5252165599278345e-07, "loss": 0.7674, "step": 24872 }, { "epoch": 0.9014896161791889, "grad_norm": 2.3470983208396894, "learning_rate": 2.523375222452207e-07, "loss": 0.9729, "step": 24873 }, { "epoch": 0.9015258598818455, "grad_norm": 2.145355261166478, "learning_rate": 2.521534539175763e-07, "loss": 0.7669, "step": 24874 }, { "epoch": 0.9015621035845022, "grad_norm": 2.0492158087295587, "learning_rate": 2.519694510123849e-07, "loss": 0.6614, "step": 24875 }, { "epoch": 0.9015983472871588, "grad_norm": 2.270962100523092, "learning_rate": 2.5178551353218337e-07, "loss": 0.8439, "step": 24876 }, { "epoch": 0.9016345909898155, "grad_norm": 2.317183362285822, "learning_rate": 2.516016414795064e-07, "loss": 0.9282, "step": 24877 }, { "epoch": 0.9016708346924722, "grad_norm": 2.5018086241255384, "learning_rate": 2.514178348568863e-07, "loss": 0.9055, "step": 24878 }, { "epoch": 0.9017070783951289, "grad_norm": 2.107198228244406, "learning_rate": 2.512340936668567e-07, "loss": 0.7941, "step": 24879 }, { "epoch": 0.9017433220977855, "grad_norm": 2.236356952279862, "learning_rate": 2.5105041791194886e-07, "loss": 0.881, "step": 24880 }, { "epoch": 0.9017795658004422, "grad_norm": 2.129804079357583, "learning_rate": 2.508668075946952e-07, "loss": 0.8597, "step": 24881 }, { "epoch": 0.9018158095030988, "grad_norm": 1.992440076726163, "learning_rate": 2.506832627176242e-07, "loss": 0.7318, "step": 24882 }, { "epoch": 0.9018520532057555, "grad_norm": 2.3512183893685084, "learning_rate": 2.5049978328326564e-07, "loss": 0.9483, "step": 24883 }, { "epoch": 0.9018882969084122, "grad_norm": 2.3579695272103893, "learning_rate": 2.503163692941474e-07, "loss": 0.7748, "step": 24884 }, { "epoch": 0.9019245406110689, "grad_norm": 2.430067416927232, "learning_rate": 2.5013302075279744e-07, "loss": 0.8819, "step": 24885 }, { "epoch": 0.9019607843137255, "grad_norm": 2.315962802407597, "learning_rate": 2.4994973766174156e-07, "loss": 0.8489, "step": 24886 }, { "epoch": 0.9019970280163822, "grad_norm": 2.348863052533145, "learning_rate": 2.4976652002350556e-07, "loss": 0.6688, "step": 24887 }, { "epoch": 0.9020332717190388, "grad_norm": 2.3051177823414677, "learning_rate": 2.495833678406151e-07, "loss": 0.846, "step": 24888 }, { "epoch": 0.9020695154216954, "grad_norm": 2.563652595045743, "learning_rate": 2.494002811155916e-07, "loss": 0.9891, "step": 24889 }, { "epoch": 0.9021057591243521, "grad_norm": 2.237348722314821, "learning_rate": 2.492172598509601e-07, "loss": 0.987, "step": 24890 }, { "epoch": 0.9021420028270088, "grad_norm": 2.2713346097959666, "learning_rate": 2.490343040492399e-07, "loss": 0.8285, "step": 24891 }, { "epoch": 0.9021782465296655, "grad_norm": 2.370108822008746, "learning_rate": 2.488514137129555e-07, "loss": 0.9701, "step": 24892 }, { "epoch": 0.9022144902323221, "grad_norm": 2.833811050549806, "learning_rate": 2.4866858884462444e-07, "loss": 0.9174, "step": 24893 }, { "epoch": 0.9022507339349788, "grad_norm": 2.36454211578796, "learning_rate": 2.4848582944676735e-07, "loss": 0.8331, "step": 24894 }, { "epoch": 0.9022869776376354, "grad_norm": 2.297512753292135, "learning_rate": 2.483031355219012e-07, "loss": 0.8174, "step": 24895 }, { "epoch": 0.9023232213402921, "grad_norm": 2.32209759179125, "learning_rate": 2.48120507072544e-07, "loss": 0.9136, "step": 24896 }, { "epoch": 0.9023594650429488, "grad_norm": 2.4365905333896185, "learning_rate": 2.4793794410121307e-07, "loss": 0.9337, "step": 24897 }, { "epoch": 0.9023957087456055, "grad_norm": 2.2672932050653247, "learning_rate": 2.477554466104226e-07, "loss": 0.9564, "step": 24898 }, { "epoch": 0.9024319524482621, "grad_norm": 2.5898106030362458, "learning_rate": 2.4757301460268836e-07, "loss": 1.1197, "step": 24899 }, { "epoch": 0.9024681961509188, "grad_norm": 2.415903030385345, "learning_rate": 2.4739064808052326e-07, "loss": 0.8931, "step": 24900 }, { "epoch": 0.9025044398535754, "grad_norm": 2.397153333289107, "learning_rate": 2.47208347046442e-07, "loss": 0.938, "step": 24901 }, { "epoch": 0.9025406835562321, "grad_norm": 2.425289336970211, "learning_rate": 2.470261115029543e-07, "loss": 0.7872, "step": 24902 }, { "epoch": 0.9025769272588887, "grad_norm": 2.5065852646882605, "learning_rate": 2.4684394145257193e-07, "loss": 0.8345, "step": 24903 }, { "epoch": 0.9026131709615455, "grad_norm": 2.4426548500045007, "learning_rate": 2.4666183689780633e-07, "loss": 0.8132, "step": 24904 }, { "epoch": 0.9026494146642021, "grad_norm": 2.4003205471560665, "learning_rate": 2.4647979784116595e-07, "loss": 0.9285, "step": 24905 }, { "epoch": 0.9026856583668588, "grad_norm": 2.4031648993701693, "learning_rate": 2.4629782428515827e-07, "loss": 0.8322, "step": 24906 }, { "epoch": 0.9027219020695154, "grad_norm": 2.3456479864544435, "learning_rate": 2.461159162322918e-07, "loss": 0.901, "step": 24907 }, { "epoch": 0.9027581457721721, "grad_norm": 2.363975933201092, "learning_rate": 2.4593407368507404e-07, "loss": 0.8403, "step": 24908 }, { "epoch": 0.9027943894748287, "grad_norm": 2.3866433025424483, "learning_rate": 2.4575229664600853e-07, "loss": 0.9546, "step": 24909 }, { "epoch": 0.9028306331774855, "grad_norm": 2.300323247549634, "learning_rate": 2.45570585117601e-07, "loss": 0.7106, "step": 24910 }, { "epoch": 0.9028668768801421, "grad_norm": 2.441828901844052, "learning_rate": 2.4538893910235564e-07, "loss": 0.8526, "step": 24911 }, { "epoch": 0.9029031205827988, "grad_norm": 2.311572143175147, "learning_rate": 2.452073586027759e-07, "loss": 1.0008, "step": 24912 }, { "epoch": 0.9029393642854554, "grad_norm": 2.662797501154743, "learning_rate": 2.450258436213626e-07, "loss": 0.9145, "step": 24913 }, { "epoch": 0.902975607988112, "grad_norm": 2.1513694582359646, "learning_rate": 2.448443941606171e-07, "loss": 0.8933, "step": 24914 }, { "epoch": 0.9030118516907687, "grad_norm": 2.619129445697519, "learning_rate": 2.4466301022304075e-07, "loss": 0.7411, "step": 24915 }, { "epoch": 0.9030480953934253, "grad_norm": 2.495113128490465, "learning_rate": 2.444816918111315e-07, "loss": 0.853, "step": 24916 }, { "epoch": 0.9030843390960821, "grad_norm": 2.4664192613087024, "learning_rate": 2.443004389273895e-07, "loss": 0.9996, "step": 24917 }, { "epoch": 0.9031205827987387, "grad_norm": 2.3875780438250493, "learning_rate": 2.441192515743096e-07, "loss": 0.9357, "step": 24918 }, { "epoch": 0.9031568265013954, "grad_norm": 2.3474716868503913, "learning_rate": 2.439381297543919e-07, "loss": 0.8027, "step": 24919 }, { "epoch": 0.903193070204052, "grad_norm": 2.258467228568023, "learning_rate": 2.4375707347012947e-07, "loss": 0.9932, "step": 24920 }, { "epoch": 0.9032293139067087, "grad_norm": 2.310843219973655, "learning_rate": 2.4357608272401855e-07, "loss": 1.0086, "step": 24921 }, { "epoch": 0.9032655576093653, "grad_norm": 2.624670548351445, "learning_rate": 2.433951575185522e-07, "loss": 0.9634, "step": 24922 }, { "epoch": 0.9033018013120221, "grad_norm": 2.6474780317337663, "learning_rate": 2.432142978562235e-07, "loss": 0.9185, "step": 24923 }, { "epoch": 0.9033380450146787, "grad_norm": 2.2145768323566695, "learning_rate": 2.430335037395265e-07, "loss": 0.723, "step": 24924 }, { "epoch": 0.9033742887173354, "grad_norm": 2.115199051897875, "learning_rate": 2.428527751709492e-07, "loss": 0.8165, "step": 24925 }, { "epoch": 0.903410532419992, "grad_norm": 2.3424260263472245, "learning_rate": 2.42672112152984e-07, "loss": 0.7952, "step": 24926 }, { "epoch": 0.9034467761226487, "grad_norm": 2.6716375146537175, "learning_rate": 2.424915146881202e-07, "loss": 0.9899, "step": 24927 }, { "epoch": 0.9034830198253053, "grad_norm": 2.3873404266211673, "learning_rate": 2.4231098277884666e-07, "loss": 0.8119, "step": 24928 }, { "epoch": 0.903519263527962, "grad_norm": 2.4480366481354023, "learning_rate": 2.4213051642764997e-07, "loss": 1.182, "step": 24929 }, { "epoch": 0.9035555072306187, "grad_norm": 2.397079692913934, "learning_rate": 2.4195011563701686e-07, "loss": 1.0888, "step": 24930 }, { "epoch": 0.9035917509332754, "grad_norm": 2.150116498003118, "learning_rate": 2.417697804094338e-07, "loss": 0.648, "step": 24931 }, { "epoch": 0.903627994635932, "grad_norm": 2.2168680583531026, "learning_rate": 2.415895107473859e-07, "loss": 0.7978, "step": 24932 }, { "epoch": 0.9036642383385887, "grad_norm": 2.4646763525284094, "learning_rate": 2.414093066533563e-07, "loss": 0.7314, "step": 24933 }, { "epoch": 0.9037004820412453, "grad_norm": 2.3731690527839113, "learning_rate": 2.4122916812982844e-07, "loss": 0.87, "step": 24934 }, { "epoch": 0.903736725743902, "grad_norm": 2.4900863834714406, "learning_rate": 2.4104909517928543e-07, "loss": 0.866, "step": 24935 }, { "epoch": 0.9037729694465586, "grad_norm": 2.3892994753979644, "learning_rate": 2.4086908780420694e-07, "loss": 1.1414, "step": 24936 }, { "epoch": 0.9038092131492154, "grad_norm": 2.334199095337923, "learning_rate": 2.4068914600707536e-07, "loss": 0.8979, "step": 24937 }, { "epoch": 0.903845456851872, "grad_norm": 2.108834149946418, "learning_rate": 2.4050926979036715e-07, "loss": 0.8107, "step": 24938 }, { "epoch": 0.9038817005545287, "grad_norm": 2.6282713923647467, "learning_rate": 2.403294591565641e-07, "loss": 0.7933, "step": 24939 }, { "epoch": 0.9039179442571853, "grad_norm": 2.551339445713194, "learning_rate": 2.4014971410814214e-07, "loss": 0.8415, "step": 24940 }, { "epoch": 0.903954187959842, "grad_norm": 2.498986440300299, "learning_rate": 2.399700346475786e-07, "loss": 0.8467, "step": 24941 }, { "epoch": 0.9039904316624986, "grad_norm": 2.406235111824559, "learning_rate": 2.397904207773494e-07, "loss": 0.8416, "step": 24942 }, { "epoch": 0.9040266753651554, "grad_norm": 2.284050932735332, "learning_rate": 2.3961087249992853e-07, "loss": 0.9078, "step": 24943 }, { "epoch": 0.904062919067812, "grad_norm": 2.478288081294476, "learning_rate": 2.394313898177919e-07, "loss": 0.9742, "step": 24944 }, { "epoch": 0.9040991627704686, "grad_norm": 2.4349347835450446, "learning_rate": 2.392519727334108e-07, "loss": 0.884, "step": 24945 }, { "epoch": 0.9041354064731253, "grad_norm": 2.586940060048044, "learning_rate": 2.3907262124925836e-07, "loss": 0.8549, "step": 24946 }, { "epoch": 0.9041716501757819, "grad_norm": 2.238803199979601, "learning_rate": 2.388933353678058e-07, "loss": 0.7601, "step": 24947 }, { "epoch": 0.9042078938784386, "grad_norm": 2.562003595933894, "learning_rate": 2.3871411509152455e-07, "loss": 0.9333, "step": 24948 }, { "epoch": 0.9042441375810952, "grad_norm": 2.3569026941269815, "learning_rate": 2.385349604228826e-07, "loss": 0.7996, "step": 24949 }, { "epoch": 0.904280381283752, "grad_norm": 2.5511533016035646, "learning_rate": 2.3835587136434913e-07, "loss": 0.8269, "step": 24950 }, { "epoch": 0.9043166249864086, "grad_norm": 2.218148772504765, "learning_rate": 2.3817684791839214e-07, "loss": 0.7756, "step": 24951 }, { "epoch": 0.9043528686890653, "grad_norm": 2.6756829477007025, "learning_rate": 2.3799789008747853e-07, "loss": 0.8738, "step": 24952 }, { "epoch": 0.9043891123917219, "grad_norm": 2.1960011571401945, "learning_rate": 2.3781899787407358e-07, "loss": 0.8475, "step": 24953 }, { "epoch": 0.9044253560943786, "grad_norm": 2.636540372519376, "learning_rate": 2.376401712806431e-07, "loss": 0.9299, "step": 24954 }, { "epoch": 0.9044615997970352, "grad_norm": 2.5145605656098358, "learning_rate": 2.374614103096512e-07, "loss": 0.9594, "step": 24955 }, { "epoch": 0.904497843499692, "grad_norm": 2.280166789608819, "learning_rate": 2.3728271496356037e-07, "loss": 0.7712, "step": 24956 }, { "epoch": 0.9045340872023486, "grad_norm": 2.1608451533185558, "learning_rate": 2.3710408524483365e-07, "loss": 0.756, "step": 24957 }, { "epoch": 0.9045703309050053, "grad_norm": 2.1482500108989813, "learning_rate": 2.3692552115593127e-07, "loss": 0.8242, "step": 24958 }, { "epoch": 0.9046065746076619, "grad_norm": 2.335658580345588, "learning_rate": 2.3674702269931516e-07, "loss": 0.8056, "step": 24959 }, { "epoch": 0.9046428183103186, "grad_norm": 2.8010149530149278, "learning_rate": 2.3656858987744447e-07, "loss": 0.9162, "step": 24960 }, { "epoch": 0.9046790620129752, "grad_norm": 2.4201854223321737, "learning_rate": 2.3639022269277835e-07, "loss": 0.9238, "step": 24961 }, { "epoch": 0.9047153057156319, "grad_norm": 2.1318718351074266, "learning_rate": 2.3621192114777313e-07, "loss": 0.8992, "step": 24962 }, { "epoch": 0.9047515494182886, "grad_norm": 2.560308657876742, "learning_rate": 2.3603368524488633e-07, "loss": 0.8349, "step": 24963 }, { "epoch": 0.9047877931209453, "grad_norm": 2.148632499874273, "learning_rate": 2.3585551498657543e-07, "loss": 0.7263, "step": 24964 }, { "epoch": 0.9048240368236019, "grad_norm": 2.4062977587908048, "learning_rate": 2.356774103752929e-07, "loss": 0.8779, "step": 24965 }, { "epoch": 0.9048602805262586, "grad_norm": 2.206199254531497, "learning_rate": 2.354993714134951e-07, "loss": 0.8225, "step": 24966 }, { "epoch": 0.9048965242289152, "grad_norm": 2.1953299747699466, "learning_rate": 2.3532139810363396e-07, "loss": 0.8947, "step": 24967 }, { "epoch": 0.9049327679315718, "grad_norm": 2.1571914289815504, "learning_rate": 2.351434904481631e-07, "loss": 0.9909, "step": 24968 }, { "epoch": 0.9049690116342286, "grad_norm": 2.2014885471971253, "learning_rate": 2.3496564844953273e-07, "loss": 0.7919, "step": 24969 }, { "epoch": 0.9050052553368853, "grad_norm": 2.454491987848824, "learning_rate": 2.347878721101937e-07, "loss": 0.8752, "step": 24970 }, { "epoch": 0.9050414990395419, "grad_norm": 2.2842723218880394, "learning_rate": 2.3461016143259575e-07, "loss": 1.1124, "step": 24971 }, { "epoch": 0.9050777427421985, "grad_norm": 2.428465219447228, "learning_rate": 2.3443251641918907e-07, "loss": 0.9626, "step": 24972 }, { "epoch": 0.9051139864448552, "grad_norm": 2.193365588954697, "learning_rate": 2.3425493707241897e-07, "loss": 0.8709, "step": 24973 }, { "epoch": 0.9051502301475118, "grad_norm": 2.6542546663852473, "learning_rate": 2.3407742339473404e-07, "loss": 0.9322, "step": 24974 }, { "epoch": 0.9051864738501685, "grad_norm": 2.1899392446981296, "learning_rate": 2.338999753885801e-07, "loss": 0.9809, "step": 24975 }, { "epoch": 0.9052227175528252, "grad_norm": 2.3069938372287258, "learning_rate": 2.3372259305640132e-07, "loss": 0.8809, "step": 24976 }, { "epoch": 0.9052589612554819, "grad_norm": 2.5740219761621144, "learning_rate": 2.3354527640064406e-07, "loss": 1.0729, "step": 24977 }, { "epoch": 0.9052952049581385, "grad_norm": 2.4370251228315363, "learning_rate": 2.3336802542374804e-07, "loss": 0.8776, "step": 24978 }, { "epoch": 0.9053314486607952, "grad_norm": 2.4878612840887304, "learning_rate": 2.3319084012815962e-07, "loss": 0.9731, "step": 24979 }, { "epoch": 0.9053676923634518, "grad_norm": 2.397800724273135, "learning_rate": 2.33013720516318e-07, "loss": 1.0196, "step": 24980 }, { "epoch": 0.9054039360661085, "grad_norm": 2.408758281174344, "learning_rate": 2.3283666659066452e-07, "loss": 0.8525, "step": 24981 }, { "epoch": 0.9054401797687652, "grad_norm": 2.427508654626835, "learning_rate": 2.3265967835363834e-07, "loss": 1.0063, "step": 24982 }, { "epoch": 0.9054764234714219, "grad_norm": 2.2588575644625304, "learning_rate": 2.3248275580767865e-07, "loss": 1.0282, "step": 24983 }, { "epoch": 0.9055126671740785, "grad_norm": 2.2364368498180576, "learning_rate": 2.3230589895522403e-07, "loss": 1.0041, "step": 24984 }, { "epoch": 0.9055489108767352, "grad_norm": 2.130746060805502, "learning_rate": 2.321291077987098e-07, "loss": 0.9254, "step": 24985 }, { "epoch": 0.9055851545793918, "grad_norm": 2.2551524251111905, "learning_rate": 2.319523823405734e-07, "loss": 0.7107, "step": 24986 }, { "epoch": 0.9056213982820485, "grad_norm": 2.6850113722619477, "learning_rate": 2.31775722583249e-07, "loss": 0.9274, "step": 24987 }, { "epoch": 0.9056576419847051, "grad_norm": 2.6894575404859484, "learning_rate": 2.3159912852917242e-07, "loss": 1.008, "step": 24988 }, { "epoch": 0.9056938856873619, "grad_norm": 2.3522702885907596, "learning_rate": 2.3142260018077512e-07, "loss": 0.909, "step": 24989 }, { "epoch": 0.9057301293900185, "grad_norm": 2.553396317614927, "learning_rate": 2.3124613754049064e-07, "loss": 1.05, "step": 24990 }, { "epoch": 0.9057663730926752, "grad_norm": 2.5889397840266164, "learning_rate": 2.3106974061074982e-07, "loss": 0.8967, "step": 24991 }, { "epoch": 0.9058026167953318, "grad_norm": 2.3007780787339844, "learning_rate": 2.308934093939852e-07, "loss": 0.8237, "step": 24992 }, { "epoch": 0.9058388604979885, "grad_norm": 2.4502846712982618, "learning_rate": 2.3071714389262423e-07, "loss": 0.8475, "step": 24993 }, { "epoch": 0.9058751042006451, "grad_norm": 2.0806487031049024, "learning_rate": 2.3054094410909666e-07, "loss": 0.8174, "step": 24994 }, { "epoch": 0.9059113479033019, "grad_norm": 2.5412563152586136, "learning_rate": 2.3036481004583055e-07, "loss": 0.9462, "step": 24995 }, { "epoch": 0.9059475916059585, "grad_norm": 2.385865763052026, "learning_rate": 2.3018874170525285e-07, "loss": 1.0195, "step": 24996 }, { "epoch": 0.9059838353086151, "grad_norm": 2.48237457377036, "learning_rate": 2.3001273908978937e-07, "loss": 0.9711, "step": 24997 }, { "epoch": 0.9060200790112718, "grad_norm": 2.4472810420481865, "learning_rate": 2.2983680220186544e-07, "loss": 0.9488, "step": 24998 }, { "epoch": 0.9060563227139284, "grad_norm": 2.45286654489908, "learning_rate": 2.296609310439063e-07, "loss": 1.0521, "step": 24999 }, { "epoch": 0.9060925664165851, "grad_norm": 2.279270409526775, "learning_rate": 2.2948512561833336e-07, "loss": 0.7935, "step": 25000 }, { "epoch": 0.9061288101192417, "grad_norm": 2.516339744365043, "learning_rate": 2.293093859275708e-07, "loss": 0.9144, "step": 25001 }, { "epoch": 0.9061650538218985, "grad_norm": 2.472297333000079, "learning_rate": 2.2913371197404e-07, "loss": 0.8971, "step": 25002 }, { "epoch": 0.9062012975245551, "grad_norm": 2.60748400279384, "learning_rate": 2.2895810376016126e-07, "loss": 0.9661, "step": 25003 }, { "epoch": 0.9062375412272118, "grad_norm": 2.2057929597722215, "learning_rate": 2.287825612883543e-07, "loss": 0.8578, "step": 25004 }, { "epoch": 0.9062737849298684, "grad_norm": 2.1534971363049964, "learning_rate": 2.2860708456103719e-07, "loss": 0.6731, "step": 25005 }, { "epoch": 0.9063100286325251, "grad_norm": 2.6147407600068395, "learning_rate": 2.2843167358063023e-07, "loss": 0.8522, "step": 25006 }, { "epoch": 0.9063462723351817, "grad_norm": 2.29068912630507, "learning_rate": 2.2825632834954813e-07, "loss": 0.8936, "step": 25007 }, { "epoch": 0.9063825160378384, "grad_norm": 2.3980922204318733, "learning_rate": 2.28081048870209e-07, "loss": 0.9843, "step": 25008 }, { "epoch": 0.9064187597404951, "grad_norm": 2.498971234000147, "learning_rate": 2.2790583514502584e-07, "loss": 0.9656, "step": 25009 }, { "epoch": 0.9064550034431518, "grad_norm": 2.23364321049159, "learning_rate": 2.2773068717641456e-07, "loss": 0.8949, "step": 25010 }, { "epoch": 0.9064912471458084, "grad_norm": 2.396114733261827, "learning_rate": 2.2755560496678874e-07, "loss": 0.9026, "step": 25011 }, { "epoch": 0.9065274908484651, "grad_norm": 2.376178626411247, "learning_rate": 2.2738058851856038e-07, "loss": 0.7218, "step": 25012 }, { "epoch": 0.9065637345511217, "grad_norm": 2.428342190783166, "learning_rate": 2.272056378341403e-07, "loss": 0.9916, "step": 25013 }, { "epoch": 0.9065999782537784, "grad_norm": 2.4231330925998456, "learning_rate": 2.2703075291594044e-07, "loss": 0.8942, "step": 25014 }, { "epoch": 0.9066362219564351, "grad_norm": 2.292906070257831, "learning_rate": 2.268559337663706e-07, "loss": 0.8379, "step": 25015 }, { "epoch": 0.9066724656590918, "grad_norm": 2.507355920032862, "learning_rate": 2.2668118038783882e-07, "loss": 0.7861, "step": 25016 }, { "epoch": 0.9067087093617484, "grad_norm": 2.2351453748185954, "learning_rate": 2.2650649278275372e-07, "loss": 0.8911, "step": 25017 }, { "epoch": 0.9067449530644051, "grad_norm": 2.5398761634861766, "learning_rate": 2.2633187095352171e-07, "loss": 0.823, "step": 25018 }, { "epoch": 0.9067811967670617, "grad_norm": 2.4077379390795826, "learning_rate": 2.2615731490255088e-07, "loss": 0.8507, "step": 25019 }, { "epoch": 0.9068174404697183, "grad_norm": 2.1765430886326365, "learning_rate": 2.2598282463224375e-07, "loss": 0.7519, "step": 25020 }, { "epoch": 0.906853684172375, "grad_norm": 2.5677668767808353, "learning_rate": 2.2580840014500672e-07, "loss": 0.9435, "step": 25021 }, { "epoch": 0.9068899278750318, "grad_norm": 2.2625634979262585, "learning_rate": 2.2563404144324285e-07, "loss": 0.9267, "step": 25022 }, { "epoch": 0.9069261715776884, "grad_norm": 2.104541374939636, "learning_rate": 2.254597485293536e-07, "loss": 0.9013, "step": 25023 }, { "epoch": 0.906962415280345, "grad_norm": 2.444317954479952, "learning_rate": 2.2528552140574255e-07, "loss": 0.8662, "step": 25024 }, { "epoch": 0.9069986589830017, "grad_norm": 2.576562104591096, "learning_rate": 2.2511136007480782e-07, "loss": 0.9394, "step": 25025 }, { "epoch": 0.9070349026856583, "grad_norm": 2.5052779879302283, "learning_rate": 2.2493726453895193e-07, "loss": 0.8963, "step": 25026 }, { "epoch": 0.907071146388315, "grad_norm": 2.193131690873285, "learning_rate": 2.2476323480057238e-07, "loss": 0.7347, "step": 25027 }, { "epoch": 0.9071073900909717, "grad_norm": 2.1000464596271393, "learning_rate": 2.2458927086206784e-07, "loss": 0.8677, "step": 25028 }, { "epoch": 0.9071436337936284, "grad_norm": 2.5346387297289774, "learning_rate": 2.244153727258347e-07, "loss": 1.0235, "step": 25029 }, { "epoch": 0.907179877496285, "grad_norm": 2.2760051794781457, "learning_rate": 2.2424154039426938e-07, "loss": 0.7744, "step": 25030 }, { "epoch": 0.9072161211989417, "grad_norm": 2.1378154124780644, "learning_rate": 2.240677738697683e-07, "loss": 0.8331, "step": 25031 }, { "epoch": 0.9072523649015983, "grad_norm": 2.6090103779559217, "learning_rate": 2.23894073154724e-07, "loss": 0.9086, "step": 25032 }, { "epoch": 0.907288608604255, "grad_norm": 2.1065887542823907, "learning_rate": 2.2372043825153066e-07, "loss": 0.8914, "step": 25033 }, { "epoch": 0.9073248523069116, "grad_norm": 2.3330418892113958, "learning_rate": 2.235468691625814e-07, "loss": 0.8128, "step": 25034 }, { "epoch": 0.9073610960095684, "grad_norm": 2.2941567682800854, "learning_rate": 2.2337336589026815e-07, "loss": 0.989, "step": 25035 }, { "epoch": 0.907397339712225, "grad_norm": 2.3812163590387914, "learning_rate": 2.2319992843698013e-07, "loss": 1.0349, "step": 25036 }, { "epoch": 0.9074335834148817, "grad_norm": 2.2683763953247142, "learning_rate": 2.2302655680510877e-07, "loss": 0.7196, "step": 25037 }, { "epoch": 0.9074698271175383, "grad_norm": 2.5082203171948527, "learning_rate": 2.2285325099704214e-07, "loss": 0.7128, "step": 25038 }, { "epoch": 0.907506070820195, "grad_norm": 2.383300879521306, "learning_rate": 2.226800110151689e-07, "loss": 0.9798, "step": 25039 }, { "epoch": 0.9075423145228516, "grad_norm": 2.4526896951998505, "learning_rate": 2.2250683686187546e-07, "loss": 0.8359, "step": 25040 }, { "epoch": 0.9075785582255084, "grad_norm": 2.520643326848668, "learning_rate": 2.2233372853954882e-07, "loss": 0.9272, "step": 25041 }, { "epoch": 0.907614801928165, "grad_norm": 2.397201044134351, "learning_rate": 2.2216068605057427e-07, "loss": 0.8271, "step": 25042 }, { "epoch": 0.9076510456308217, "grad_norm": 2.2849245538326457, "learning_rate": 2.219877093973355e-07, "loss": 0.7024, "step": 25043 }, { "epoch": 0.9076872893334783, "grad_norm": 2.300378109641606, "learning_rate": 2.2181479858221666e-07, "loss": 0.886, "step": 25044 }, { "epoch": 0.907723533036135, "grad_norm": 2.2782337366368135, "learning_rate": 2.2164195360759921e-07, "loss": 0.9039, "step": 25045 }, { "epoch": 0.9077597767387916, "grad_norm": 2.615971606887733, "learning_rate": 2.2146917447586735e-07, "loss": 0.9393, "step": 25046 }, { "epoch": 0.9077960204414482, "grad_norm": 2.4548185404071674, "learning_rate": 2.212964611893992e-07, "loss": 1.0005, "step": 25047 }, { "epoch": 0.907832264144105, "grad_norm": 2.685189067083455, "learning_rate": 2.2112381375057667e-07, "loss": 0.8036, "step": 25048 }, { "epoch": 0.9078685078467617, "grad_norm": 2.3913795429722113, "learning_rate": 2.2095123216177683e-07, "loss": 0.9431, "step": 25049 }, { "epoch": 0.9079047515494183, "grad_norm": 2.3529530221853947, "learning_rate": 2.2077871642537886e-07, "loss": 0.6835, "step": 25050 }, { "epoch": 0.907940995252075, "grad_norm": 2.5396602154875105, "learning_rate": 2.2060626654376026e-07, "loss": 0.9423, "step": 25051 }, { "epoch": 0.9079772389547316, "grad_norm": 2.208020584677664, "learning_rate": 2.2043388251929641e-07, "loss": 0.7069, "step": 25052 }, { "epoch": 0.9080134826573882, "grad_norm": 2.840351336466585, "learning_rate": 2.202615643543632e-07, "loss": 0.8395, "step": 25053 }, { "epoch": 0.908049726360045, "grad_norm": 2.6456810891382148, "learning_rate": 2.2008931205133478e-07, "loss": 0.9234, "step": 25054 }, { "epoch": 0.9080859700627016, "grad_norm": 2.413945946460791, "learning_rate": 2.1991712561258537e-07, "loss": 0.8427, "step": 25055 }, { "epoch": 0.9081222137653583, "grad_norm": 2.3745221878307956, "learning_rate": 2.1974500504048702e-07, "loss": 0.9037, "step": 25056 }, { "epoch": 0.9081584574680149, "grad_norm": 2.1976509043762076, "learning_rate": 2.195729503374111e-07, "loss": 0.8064, "step": 25057 }, { "epoch": 0.9081947011706716, "grad_norm": 2.3478008828385604, "learning_rate": 2.1940096150572855e-07, "loss": 0.9562, "step": 25058 }, { "epoch": 0.9082309448733282, "grad_norm": 2.4705305856247737, "learning_rate": 2.1922903854781018e-07, "loss": 0.8788, "step": 25059 }, { "epoch": 0.9082671885759849, "grad_norm": 2.5398355468313327, "learning_rate": 2.1905718146602418e-07, "loss": 1.0081, "step": 25060 }, { "epoch": 0.9083034322786416, "grad_norm": 2.1654621302819685, "learning_rate": 2.1888539026273859e-07, "loss": 0.8453, "step": 25061 }, { "epoch": 0.9083396759812983, "grad_norm": 2.0716398243134253, "learning_rate": 2.1871366494032098e-07, "loss": 0.676, "step": 25062 }, { "epoch": 0.9083759196839549, "grad_norm": 2.6149142549983906, "learning_rate": 2.1854200550113724e-07, "loss": 0.8999, "step": 25063 }, { "epoch": 0.9084121633866116, "grad_norm": 2.281840186170601, "learning_rate": 2.183704119475538e-07, "loss": 0.733, "step": 25064 }, { "epoch": 0.9084484070892682, "grad_norm": 2.3096006208096873, "learning_rate": 2.181988842819327e-07, "loss": 0.8816, "step": 25065 }, { "epoch": 0.9084846507919249, "grad_norm": 2.116895611737762, "learning_rate": 2.1802742250664034e-07, "loss": 0.7671, "step": 25066 }, { "epoch": 0.9085208944945816, "grad_norm": 2.448351889163675, "learning_rate": 2.1785602662403705e-07, "loss": 0.8375, "step": 25067 }, { "epoch": 0.9085571381972383, "grad_norm": 3.7974919405474337, "learning_rate": 2.176846966364865e-07, "loss": 0.8809, "step": 25068 }, { "epoch": 0.9085933818998949, "grad_norm": 2.4916836341841555, "learning_rate": 2.1751343254634793e-07, "loss": 0.9608, "step": 25069 }, { "epoch": 0.9086296256025516, "grad_norm": 2.488533568937415, "learning_rate": 2.1734223435598224e-07, "loss": 0.9601, "step": 25070 }, { "epoch": 0.9086658693052082, "grad_norm": 2.377179780094381, "learning_rate": 2.171711020677486e-07, "loss": 0.8801, "step": 25071 }, { "epoch": 0.9087021130078649, "grad_norm": 2.052545567648903, "learning_rate": 2.1700003568400297e-07, "loss": 0.6861, "step": 25072 }, { "epoch": 0.9087383567105215, "grad_norm": 2.5137093422076777, "learning_rate": 2.1682903520710563e-07, "loss": 1.038, "step": 25073 }, { "epoch": 0.9087746004131783, "grad_norm": 2.314675303327824, "learning_rate": 2.1665810063941085e-07, "loss": 0.7805, "step": 25074 }, { "epoch": 0.9088108441158349, "grad_norm": 2.1688623554405866, "learning_rate": 2.1648723198327504e-07, "loss": 0.8618, "step": 25075 }, { "epoch": 0.9088470878184916, "grad_norm": 2.9147333702408176, "learning_rate": 2.1631642924105135e-07, "loss": 0.9951, "step": 25076 }, { "epoch": 0.9088833315211482, "grad_norm": 2.210523837115729, "learning_rate": 2.161456924150951e-07, "loss": 1.0076, "step": 25077 }, { "epoch": 0.9089195752238048, "grad_norm": 2.450334416035705, "learning_rate": 2.1597502150775718e-07, "loss": 0.9573, "step": 25078 }, { "epoch": 0.9089558189264615, "grad_norm": 2.3514563475377095, "learning_rate": 2.1580441652139128e-07, "loss": 0.9011, "step": 25079 }, { "epoch": 0.9089920626291181, "grad_norm": 2.069401770348573, "learning_rate": 2.1563387745834609e-07, "loss": 0.8911, "step": 25080 }, { "epoch": 0.9090283063317749, "grad_norm": 2.2861923947584772, "learning_rate": 2.15463404320973e-07, "loss": 0.7719, "step": 25081 }, { "epoch": 0.9090645500344315, "grad_norm": 2.5903143447266226, "learning_rate": 2.1529299711162077e-07, "loss": 0.8025, "step": 25082 }, { "epoch": 0.9091007937370882, "grad_norm": 2.3379950369615092, "learning_rate": 2.1512265583263747e-07, "loss": 0.9174, "step": 25083 }, { "epoch": 0.9091370374397448, "grad_norm": 2.6972362237192624, "learning_rate": 2.1495238048636958e-07, "loss": 1.0239, "step": 25084 }, { "epoch": 0.9091732811424015, "grad_norm": 2.1544183036141185, "learning_rate": 2.1478217107516407e-07, "loss": 0.9393, "step": 25085 }, { "epoch": 0.9092095248450581, "grad_norm": 2.1824995932137785, "learning_rate": 2.146120276013669e-07, "loss": 0.9253, "step": 25086 }, { "epoch": 0.9092457685477149, "grad_norm": 2.2071042477107, "learning_rate": 2.1444195006732116e-07, "loss": 0.8906, "step": 25087 }, { "epoch": 0.9092820122503715, "grad_norm": 2.434210739554716, "learning_rate": 2.1427193847537108e-07, "loss": 0.7986, "step": 25088 }, { "epoch": 0.9093182559530282, "grad_norm": 2.0153339994689925, "learning_rate": 2.1410199282786037e-07, "loss": 0.6983, "step": 25089 }, { "epoch": 0.9093544996556848, "grad_norm": 2.3449669954884573, "learning_rate": 2.139321131271288e-07, "loss": 0.6854, "step": 25090 }, { "epoch": 0.9093907433583415, "grad_norm": 2.0037653942418627, "learning_rate": 2.1376229937551896e-07, "loss": 0.8776, "step": 25091 }, { "epoch": 0.9094269870609981, "grad_norm": 2.6238223991256784, "learning_rate": 2.135925515753684e-07, "loss": 0.8317, "step": 25092 }, { "epoch": 0.9094632307636548, "grad_norm": 1.9838799401071148, "learning_rate": 2.1342286972901915e-07, "loss": 0.787, "step": 25093 }, { "epoch": 0.9094994744663115, "grad_norm": 2.306303684346884, "learning_rate": 2.1325325383880768e-07, "loss": 0.8552, "step": 25094 }, { "epoch": 0.9095357181689682, "grad_norm": 2.783988809251314, "learning_rate": 2.130837039070721e-07, "loss": 0.9044, "step": 25095 }, { "epoch": 0.9095719618716248, "grad_norm": 2.1870155120890855, "learning_rate": 2.1291421993614724e-07, "loss": 0.6785, "step": 25096 }, { "epoch": 0.9096082055742815, "grad_norm": 2.3898107631883545, "learning_rate": 2.12744801928369e-07, "loss": 0.9643, "step": 25097 }, { "epoch": 0.9096444492769381, "grad_norm": 2.3158777254887277, "learning_rate": 2.1257544988607325e-07, "loss": 0.7122, "step": 25098 }, { "epoch": 0.9096806929795948, "grad_norm": 2.532534194993255, "learning_rate": 2.124061638115915e-07, "loss": 0.9571, "step": 25099 }, { "epoch": 0.9097169366822515, "grad_norm": 2.6938555499786125, "learning_rate": 2.1223694370725745e-07, "loss": 1.0355, "step": 25100 }, { "epoch": 0.9097531803849082, "grad_norm": 2.4222577910422687, "learning_rate": 2.1206778957540252e-07, "loss": 0.8189, "step": 25101 }, { "epoch": 0.9097894240875648, "grad_norm": 2.8058599840032383, "learning_rate": 2.1189870141835877e-07, "loss": 1.1108, "step": 25102 }, { "epoch": 0.9098256677902214, "grad_norm": 2.2561302146630786, "learning_rate": 2.1172967923845434e-07, "loss": 0.8907, "step": 25103 }, { "epoch": 0.9098619114928781, "grad_norm": 2.3253256362018493, "learning_rate": 2.1156072303801955e-07, "loss": 0.7622, "step": 25104 }, { "epoch": 0.9098981551955347, "grad_norm": 2.239999833149521, "learning_rate": 2.1139183281938147e-07, "loss": 0.9194, "step": 25105 }, { "epoch": 0.9099343988981914, "grad_norm": 2.342706338427586, "learning_rate": 2.1122300858486877e-07, "loss": 0.6823, "step": 25106 }, { "epoch": 0.9099706426008481, "grad_norm": 2.534153954105635, "learning_rate": 2.1105425033680628e-07, "loss": 0.9626, "step": 25107 }, { "epoch": 0.9100068863035048, "grad_norm": 2.5236312174855713, "learning_rate": 2.1088555807751986e-07, "loss": 0.8732, "step": 25108 }, { "epoch": 0.9100431300061614, "grad_norm": 2.4780144731497677, "learning_rate": 2.107169318093344e-07, "loss": 0.7647, "step": 25109 }, { "epoch": 0.9100793737088181, "grad_norm": 2.3465371551104415, "learning_rate": 2.1054837153457296e-07, "loss": 1.033, "step": 25110 }, { "epoch": 0.9101156174114747, "grad_norm": 2.055847087905359, "learning_rate": 2.1037987725555875e-07, "loss": 0.8056, "step": 25111 }, { "epoch": 0.9101518611141314, "grad_norm": 2.543993436747969, "learning_rate": 2.102114489746121e-07, "loss": 0.8932, "step": 25112 }, { "epoch": 0.9101881048167881, "grad_norm": 2.6372620865695984, "learning_rate": 2.1004308669405614e-07, "loss": 0.8155, "step": 25113 }, { "epoch": 0.9102243485194448, "grad_norm": 2.2041977897948954, "learning_rate": 2.0987479041620906e-07, "loss": 0.8232, "step": 25114 }, { "epoch": 0.9102605922221014, "grad_norm": 2.339470947315469, "learning_rate": 2.0970656014339064e-07, "loss": 0.9462, "step": 25115 }, { "epoch": 0.9102968359247581, "grad_norm": 2.4147082679690905, "learning_rate": 2.0953839587791848e-07, "loss": 0.8671, "step": 25116 }, { "epoch": 0.9103330796274147, "grad_norm": 2.095324567671758, "learning_rate": 2.093702976221096e-07, "loss": 0.8565, "step": 25117 }, { "epoch": 0.9103693233300714, "grad_norm": 2.5429704700123033, "learning_rate": 2.0920226537828214e-07, "loss": 0.834, "step": 25118 }, { "epoch": 0.910405567032728, "grad_norm": 2.5694657154540272, "learning_rate": 2.0903429914874874e-07, "loss": 0.9232, "step": 25119 }, { "epoch": 0.9104418107353848, "grad_norm": 2.1144419782929615, "learning_rate": 2.0886639893582527e-07, "loss": 0.858, "step": 25120 }, { "epoch": 0.9104780544380414, "grad_norm": 2.1744374278855414, "learning_rate": 2.086985647418255e-07, "loss": 0.7405, "step": 25121 }, { "epoch": 0.9105142981406981, "grad_norm": 2.055067821403861, "learning_rate": 2.085307965690625e-07, "loss": 0.813, "step": 25122 }, { "epoch": 0.9105505418433547, "grad_norm": 2.10794939850254, "learning_rate": 2.083630944198467e-07, "loss": 0.8137, "step": 25123 }, { "epoch": 0.9105867855460114, "grad_norm": 2.223190860498223, "learning_rate": 2.0819545829648958e-07, "loss": 0.9706, "step": 25124 }, { "epoch": 0.910623029248668, "grad_norm": 2.3067271733062595, "learning_rate": 2.0802788820130093e-07, "loss": 0.6868, "step": 25125 }, { "epoch": 0.9106592729513248, "grad_norm": 2.670824087959508, "learning_rate": 2.078603841365906e-07, "loss": 0.949, "step": 25126 }, { "epoch": 0.9106955166539814, "grad_norm": 2.4294562628296177, "learning_rate": 2.0769294610466505e-07, "loss": 0.8418, "step": 25127 }, { "epoch": 0.910731760356638, "grad_norm": 2.2488600667048866, "learning_rate": 2.0752557410783247e-07, "loss": 0.9507, "step": 25128 }, { "epoch": 0.9107680040592947, "grad_norm": 2.627553175417504, "learning_rate": 2.0735826814839987e-07, "loss": 0.9188, "step": 25129 }, { "epoch": 0.9108042477619513, "grad_norm": 2.219386627131351, "learning_rate": 2.0719102822867154e-07, "loss": 0.894, "step": 25130 }, { "epoch": 0.910840491464608, "grad_norm": 2.6780911593086025, "learning_rate": 2.070238543509523e-07, "loss": 0.898, "step": 25131 }, { "epoch": 0.9108767351672646, "grad_norm": 2.5078359553333405, "learning_rate": 2.0685674651754472e-07, "loss": 0.8864, "step": 25132 }, { "epoch": 0.9109129788699214, "grad_norm": 2.1583233845295724, "learning_rate": 2.0668970473075366e-07, "loss": 0.5911, "step": 25133 }, { "epoch": 0.910949222572578, "grad_norm": 2.4457058559533493, "learning_rate": 2.0652272899287894e-07, "loss": 0.9129, "step": 25134 }, { "epoch": 0.9109854662752347, "grad_norm": 2.6872868480077194, "learning_rate": 2.063558193062226e-07, "loss": 0.9109, "step": 25135 }, { "epoch": 0.9110217099778913, "grad_norm": 2.199123965312696, "learning_rate": 2.0618897567308283e-07, "loss": 0.92, "step": 25136 }, { "epoch": 0.911057953680548, "grad_norm": 2.4614587525153375, "learning_rate": 2.060221980957605e-07, "loss": 1.1029, "step": 25137 }, { "epoch": 0.9110941973832046, "grad_norm": 2.2691588620889847, "learning_rate": 2.058554865765533e-07, "loss": 0.714, "step": 25138 }, { "epoch": 0.9111304410858614, "grad_norm": 2.4766424210310194, "learning_rate": 2.0568884111775766e-07, "loss": 0.8209, "step": 25139 }, { "epoch": 0.911166684788518, "grad_norm": 2.3632660620048447, "learning_rate": 2.055222617216701e-07, "loss": 0.8719, "step": 25140 }, { "epoch": 0.9112029284911747, "grad_norm": 2.1558205003626147, "learning_rate": 2.0535574839058602e-07, "loss": 0.9475, "step": 25141 }, { "epoch": 0.9112391721938313, "grad_norm": 2.2695941441016103, "learning_rate": 2.051893011268008e-07, "loss": 0.7507, "step": 25142 }, { "epoch": 0.911275415896488, "grad_norm": 2.5910418076261426, "learning_rate": 2.0502291993260648e-07, "loss": 0.9082, "step": 25143 }, { "epoch": 0.9113116595991446, "grad_norm": 2.3059939101846747, "learning_rate": 2.0485660481029624e-07, "loss": 0.9599, "step": 25144 }, { "epoch": 0.9113479033018013, "grad_norm": 2.436039946097237, "learning_rate": 2.0469035576216266e-07, "loss": 0.8165, "step": 25145 }, { "epoch": 0.911384147004458, "grad_norm": 2.3386133057675584, "learning_rate": 2.0452417279049564e-07, "loss": 0.8634, "step": 25146 }, { "epoch": 0.9114203907071147, "grad_norm": 2.3443190519489683, "learning_rate": 2.043580558975855e-07, "loss": 0.9141, "step": 25147 }, { "epoch": 0.9114566344097713, "grad_norm": 2.245796336861985, "learning_rate": 2.0419200508572045e-07, "loss": 0.7579, "step": 25148 }, { "epoch": 0.911492878112428, "grad_norm": 2.298730076539768, "learning_rate": 2.0402602035718977e-07, "loss": 0.9431, "step": 25149 }, { "epoch": 0.9115291218150846, "grad_norm": 2.482528596538786, "learning_rate": 2.0386010171427996e-07, "loss": 1.0087, "step": 25150 }, { "epoch": 0.9115653655177413, "grad_norm": 1.9774597563697862, "learning_rate": 2.0369424915927692e-07, "loss": 0.7152, "step": 25151 }, { "epoch": 0.9116016092203979, "grad_norm": 2.515206750576318, "learning_rate": 2.035284626944667e-07, "loss": 0.9651, "step": 25152 }, { "epoch": 0.9116378529230547, "grad_norm": 1.9802550483083603, "learning_rate": 2.033627423221335e-07, "loss": 0.7819, "step": 25153 }, { "epoch": 0.9116740966257113, "grad_norm": 2.4196969552219336, "learning_rate": 2.0319708804456052e-07, "loss": 0.9898, "step": 25154 }, { "epoch": 0.911710340328368, "grad_norm": 2.5506885455460795, "learning_rate": 2.030314998640309e-07, "loss": 1.1378, "step": 25155 }, { "epoch": 0.9117465840310246, "grad_norm": 2.434839434969536, "learning_rate": 2.0286597778282624e-07, "loss": 0.8193, "step": 25156 }, { "epoch": 0.9117828277336812, "grad_norm": 2.2559121945405103, "learning_rate": 2.0270052180322686e-07, "loss": 0.8483, "step": 25157 }, { "epoch": 0.9118190714363379, "grad_norm": 2.556445519429016, "learning_rate": 2.0253513192751374e-07, "loss": 1.0023, "step": 25158 }, { "epoch": 0.9118553151389946, "grad_norm": 1.9658306298378216, "learning_rate": 2.0236980815796336e-07, "loss": 0.5824, "step": 25159 }, { "epoch": 0.9118915588416513, "grad_norm": 2.700063349267589, "learning_rate": 2.0220455049685728e-07, "loss": 0.9414, "step": 25160 }, { "epoch": 0.9119278025443079, "grad_norm": 2.1325365197993276, "learning_rate": 2.0203935894646975e-07, "loss": 0.9451, "step": 25161 }, { "epoch": 0.9119640462469646, "grad_norm": 2.4014282981637587, "learning_rate": 2.0187423350907897e-07, "loss": 1.0859, "step": 25162 }, { "epoch": 0.9120002899496212, "grad_norm": 2.183867657058618, "learning_rate": 2.0170917418695867e-07, "loss": 0.8371, "step": 25163 }, { "epoch": 0.9120365336522779, "grad_norm": 2.606234245056515, "learning_rate": 2.0154418098238483e-07, "loss": 0.9165, "step": 25164 }, { "epoch": 0.9120727773549345, "grad_norm": 2.2046760401438745, "learning_rate": 2.0137925389762947e-07, "loss": 0.7352, "step": 25165 }, { "epoch": 0.9121090210575913, "grad_norm": 2.2627753829728676, "learning_rate": 2.0121439293496692e-07, "loss": 0.6729, "step": 25166 }, { "epoch": 0.9121452647602479, "grad_norm": 2.4882640816870545, "learning_rate": 2.0104959809666646e-07, "loss": 0.7915, "step": 25167 }, { "epoch": 0.9121815084629046, "grad_norm": 2.547326901983132, "learning_rate": 2.0088486938500074e-07, "loss": 1.1539, "step": 25168 }, { "epoch": 0.9122177521655612, "grad_norm": 2.1181121068011444, "learning_rate": 2.007202068022396e-07, "loss": 0.8664, "step": 25169 }, { "epoch": 0.9122539958682179, "grad_norm": 2.649669109957511, "learning_rate": 2.0055561035065118e-07, "loss": 0.8401, "step": 25170 }, { "epoch": 0.9122902395708745, "grad_norm": 2.3974417241996995, "learning_rate": 2.0039108003250318e-07, "loss": 0.9734, "step": 25171 }, { "epoch": 0.9123264832735313, "grad_norm": 2.208753539490771, "learning_rate": 2.002266158500643e-07, "loss": 0.7594, "step": 25172 }, { "epoch": 0.9123627269761879, "grad_norm": 2.3332265559203176, "learning_rate": 2.000622178056e-07, "loss": 0.8096, "step": 25173 }, { "epoch": 0.9123989706788446, "grad_norm": 2.5362060810697193, "learning_rate": 1.998978859013745e-07, "loss": 0.9576, "step": 25174 }, { "epoch": 0.9124352143815012, "grad_norm": 2.3475502975985965, "learning_rate": 1.9973362013965324e-07, "loss": 1.01, "step": 25175 }, { "epoch": 0.9124714580841579, "grad_norm": 2.206169343877676, "learning_rate": 1.9956942052270056e-07, "loss": 0.8246, "step": 25176 }, { "epoch": 0.9125077017868145, "grad_norm": 2.343157829791022, "learning_rate": 1.994052870527774e-07, "loss": 0.7562, "step": 25177 }, { "epoch": 0.9125439454894712, "grad_norm": 2.154174280097073, "learning_rate": 1.9924121973214638e-07, "loss": 0.7522, "step": 25178 }, { "epoch": 0.9125801891921279, "grad_norm": 2.281913162850125, "learning_rate": 1.9907721856306682e-07, "loss": 0.6708, "step": 25179 }, { "epoch": 0.9126164328947846, "grad_norm": 2.2666164667874313, "learning_rate": 1.9891328354780082e-07, "loss": 0.858, "step": 25180 }, { "epoch": 0.9126526765974412, "grad_norm": 2.4017243164731985, "learning_rate": 1.98749414688606e-07, "loss": 0.8, "step": 25181 }, { "epoch": 0.9126889203000979, "grad_norm": 2.5754275816994388, "learning_rate": 1.9858561198774052e-07, "loss": 0.9413, "step": 25182 }, { "epoch": 0.9127251640027545, "grad_norm": 2.167690492460234, "learning_rate": 1.984218754474615e-07, "loss": 0.7293, "step": 25183 }, { "epoch": 0.9127614077054111, "grad_norm": 2.528257220345596, "learning_rate": 1.982582050700249e-07, "loss": 0.8746, "step": 25184 }, { "epoch": 0.9127976514080679, "grad_norm": 2.3349219442686495, "learning_rate": 1.9809460085768672e-07, "loss": 0.9089, "step": 25185 }, { "epoch": 0.9128338951107245, "grad_norm": 2.459281119020728, "learning_rate": 1.9793106281270015e-07, "loss": 0.8021, "step": 25186 }, { "epoch": 0.9128701388133812, "grad_norm": 2.342400646814083, "learning_rate": 1.9776759093731946e-07, "loss": 0.7949, "step": 25187 }, { "epoch": 0.9129063825160378, "grad_norm": 2.3886469754426862, "learning_rate": 1.9760418523379733e-07, "loss": 0.9571, "step": 25188 }, { "epoch": 0.9129426262186945, "grad_norm": 2.189009196866621, "learning_rate": 1.9744084570438526e-07, "loss": 0.9746, "step": 25189 }, { "epoch": 0.9129788699213511, "grad_norm": 2.1028688533659046, "learning_rate": 1.972775723513337e-07, "loss": 0.8081, "step": 25190 }, { "epoch": 0.9130151136240078, "grad_norm": 2.1556414832230173, "learning_rate": 1.9711436517689254e-07, "loss": 0.8366, "step": 25191 }, { "epoch": 0.9130513573266645, "grad_norm": 2.662737961585234, "learning_rate": 1.9695122418331047e-07, "loss": 0.7888, "step": 25192 }, { "epoch": 0.9130876010293212, "grad_norm": 2.2129598878310905, "learning_rate": 1.967881493728363e-07, "loss": 0.8229, "step": 25193 }, { "epoch": 0.9131238447319778, "grad_norm": 2.338954546395413, "learning_rate": 1.9662514074771599e-07, "loss": 0.8899, "step": 25194 }, { "epoch": 0.9131600884346345, "grad_norm": 1.9772853441762623, "learning_rate": 1.9646219831019663e-07, "loss": 0.9604, "step": 25195 }, { "epoch": 0.9131963321372911, "grad_norm": 2.3451380087190197, "learning_rate": 1.962993220625231e-07, "loss": 0.9713, "step": 25196 }, { "epoch": 0.9132325758399478, "grad_norm": 2.2749452010757745, "learning_rate": 1.9613651200693972e-07, "loss": 0.7552, "step": 25197 }, { "epoch": 0.9132688195426045, "grad_norm": 2.2541286496208333, "learning_rate": 1.9597376814569023e-07, "loss": 0.6224, "step": 25198 }, { "epoch": 0.9133050632452612, "grad_norm": 2.102615544103072, "learning_rate": 1.9581109048101565e-07, "loss": 0.8673, "step": 25199 }, { "epoch": 0.9133413069479178, "grad_norm": 2.406063387659814, "learning_rate": 1.9564847901515972e-07, "loss": 0.8981, "step": 25200 }, { "epoch": 0.9133775506505745, "grad_norm": 2.4814084124461386, "learning_rate": 1.9548593375036174e-07, "loss": 0.9836, "step": 25201 }, { "epoch": 0.9134137943532311, "grad_norm": 2.3001542703244633, "learning_rate": 1.9532345468886271e-07, "loss": 1.0172, "step": 25202 }, { "epoch": 0.9134500380558878, "grad_norm": 2.341505294207406, "learning_rate": 1.951610418329003e-07, "loss": 0.7598, "step": 25203 }, { "epoch": 0.9134862817585444, "grad_norm": 2.4286833289555005, "learning_rate": 1.9499869518471215e-07, "loss": 0.8048, "step": 25204 }, { "epoch": 0.9135225254612012, "grad_norm": 2.341453235882358, "learning_rate": 1.9483641474653702e-07, "loss": 0.8762, "step": 25205 }, { "epoch": 0.9135587691638578, "grad_norm": 2.35049704735362, "learning_rate": 1.9467420052060982e-07, "loss": 0.8983, "step": 25206 }, { "epoch": 0.9135950128665145, "grad_norm": 2.5134768865204444, "learning_rate": 1.945120525091654e-07, "loss": 0.9224, "step": 25207 }, { "epoch": 0.9136312565691711, "grad_norm": 2.6942944975895187, "learning_rate": 1.943499707144386e-07, "loss": 0.8236, "step": 25208 }, { "epoch": 0.9136675002718277, "grad_norm": 2.4630752437673586, "learning_rate": 1.9418795513866383e-07, "loss": 0.8919, "step": 25209 }, { "epoch": 0.9137037439744844, "grad_norm": 2.5621259268930947, "learning_rate": 1.940260057840715e-07, "loss": 0.8902, "step": 25210 }, { "epoch": 0.9137399876771412, "grad_norm": 2.1098159896011697, "learning_rate": 1.9386412265289478e-07, "loss": 0.8168, "step": 25211 }, { "epoch": 0.9137762313797978, "grad_norm": 2.388598106017264, "learning_rate": 1.937023057473636e-07, "loss": 0.8367, "step": 25212 }, { "epoch": 0.9138124750824544, "grad_norm": 2.4685069198742102, "learning_rate": 1.9354055506970838e-07, "loss": 0.9428, "step": 25213 }, { "epoch": 0.9138487187851111, "grad_norm": 2.239453016941417, "learning_rate": 1.9337887062215677e-07, "loss": 0.7878, "step": 25214 }, { "epoch": 0.9138849624877677, "grad_norm": 2.984751979393448, "learning_rate": 1.9321725240693757e-07, "loss": 0.9677, "step": 25215 }, { "epoch": 0.9139212061904244, "grad_norm": 2.4433308861851994, "learning_rate": 1.9305570042627786e-07, "loss": 0.7829, "step": 25216 }, { "epoch": 0.913957449893081, "grad_norm": 2.416254761954842, "learning_rate": 1.928942146824031e-07, "loss": 0.8392, "step": 25217 }, { "epoch": 0.9139936935957378, "grad_norm": 2.50586255692363, "learning_rate": 1.9273279517753983e-07, "loss": 0.9253, "step": 25218 }, { "epoch": 0.9140299372983944, "grad_norm": 2.4353186850064277, "learning_rate": 1.9257144191390964e-07, "loss": 0.8362, "step": 25219 }, { "epoch": 0.9140661810010511, "grad_norm": 2.2251438768491303, "learning_rate": 1.924101548937385e-07, "loss": 0.9879, "step": 25220 }, { "epoch": 0.9141024247037077, "grad_norm": 2.4569706546167107, "learning_rate": 1.9224893411924793e-07, "loss": 0.8962, "step": 25221 }, { "epoch": 0.9141386684063644, "grad_norm": 2.281477851773511, "learning_rate": 1.9208777959265957e-07, "loss": 0.8158, "step": 25222 }, { "epoch": 0.914174912109021, "grad_norm": 2.6860751523404476, "learning_rate": 1.9192669131619323e-07, "loss": 0.9219, "step": 25223 }, { "epoch": 0.9142111558116777, "grad_norm": 2.4276060704130096, "learning_rate": 1.9176566929206942e-07, "loss": 0.874, "step": 25224 }, { "epoch": 0.9142473995143344, "grad_norm": 2.0905046133741454, "learning_rate": 1.9160471352250743e-07, "loss": 0.8581, "step": 25225 }, { "epoch": 0.9142836432169911, "grad_norm": 2.189746326560102, "learning_rate": 1.9144382400972383e-07, "loss": 0.8311, "step": 25226 }, { "epoch": 0.9143198869196477, "grad_norm": 2.6406285662567517, "learning_rate": 1.9128300075593575e-07, "loss": 0.8337, "step": 25227 }, { "epoch": 0.9143561306223044, "grad_norm": 2.2929100482966724, "learning_rate": 1.911222437633603e-07, "loss": 0.8404, "step": 25228 }, { "epoch": 0.914392374324961, "grad_norm": 2.2763084701133964, "learning_rate": 1.9096155303421238e-07, "loss": 0.8565, "step": 25229 }, { "epoch": 0.9144286180276177, "grad_norm": 2.2036838517621082, "learning_rate": 1.908009285707052e-07, "loss": 1.0511, "step": 25230 }, { "epoch": 0.9144648617302744, "grad_norm": 2.2784469923799207, "learning_rate": 1.9064037037505312e-07, "loss": 0.9537, "step": 25231 }, { "epoch": 0.9145011054329311, "grad_norm": 2.200860018021037, "learning_rate": 1.904798784494677e-07, "loss": 0.7618, "step": 25232 }, { "epoch": 0.9145373491355877, "grad_norm": 2.193560495227817, "learning_rate": 1.903194527961616e-07, "loss": 0.8219, "step": 25233 }, { "epoch": 0.9145735928382444, "grad_norm": 2.3804824888925653, "learning_rate": 1.901590934173436e-07, "loss": 0.7192, "step": 25234 }, { "epoch": 0.914609836540901, "grad_norm": 2.3259019375023526, "learning_rate": 1.8999880031522476e-07, "loss": 0.8872, "step": 25235 }, { "epoch": 0.9146460802435576, "grad_norm": 2.2838754740278575, "learning_rate": 1.898385734920144e-07, "loss": 1.0396, "step": 25236 }, { "epoch": 0.9146823239462143, "grad_norm": 2.460561317117578, "learning_rate": 1.8967841294991852e-07, "loss": 0.933, "step": 25237 }, { "epoch": 0.914718567648871, "grad_norm": 2.8184135164485156, "learning_rate": 1.895183186911448e-07, "loss": 0.9645, "step": 25238 }, { "epoch": 0.9147548113515277, "grad_norm": 2.567028199269197, "learning_rate": 1.8935829071789924e-07, "loss": 0.8679, "step": 25239 }, { "epoch": 0.9147910550541843, "grad_norm": 2.141493336285393, "learning_rate": 1.891983290323879e-07, "loss": 0.7323, "step": 25240 }, { "epoch": 0.914827298756841, "grad_norm": 2.1078109543307733, "learning_rate": 1.890384336368134e-07, "loss": 0.7389, "step": 25241 }, { "epoch": 0.9148635424594976, "grad_norm": 2.4043104188450912, "learning_rate": 1.8887860453338014e-07, "loss": 0.8898, "step": 25242 }, { "epoch": 0.9148997861621543, "grad_norm": 2.385229260550065, "learning_rate": 1.887188417242902e-07, "loss": 1.0395, "step": 25243 }, { "epoch": 0.914936029864811, "grad_norm": 2.12406332832435, "learning_rate": 1.885591452117441e-07, "loss": 0.8032, "step": 25244 }, { "epoch": 0.9149722735674677, "grad_norm": 2.491588918822167, "learning_rate": 1.8839951499794397e-07, "loss": 0.8088, "step": 25245 }, { "epoch": 0.9150085172701243, "grad_norm": 2.2638202043304867, "learning_rate": 1.882399510850874e-07, "loss": 0.8581, "step": 25246 }, { "epoch": 0.915044760972781, "grad_norm": 2.702093815480617, "learning_rate": 1.880804534753755e-07, "loss": 0.994, "step": 25247 }, { "epoch": 0.9150810046754376, "grad_norm": 2.7562917268475204, "learning_rate": 1.8792102217100428e-07, "loss": 0.9053, "step": 25248 }, { "epoch": 0.9151172483780943, "grad_norm": 2.270674880855595, "learning_rate": 1.877616571741714e-07, "loss": 0.8437, "step": 25249 }, { "epoch": 0.9151534920807509, "grad_norm": 2.0786651694757867, "learning_rate": 1.876023584870712e-07, "loss": 0.7444, "step": 25250 }, { "epoch": 0.9151897357834077, "grad_norm": 2.442284579310041, "learning_rate": 1.8744312611190142e-07, "loss": 0.8796, "step": 25251 }, { "epoch": 0.9152259794860643, "grad_norm": 2.341950248844632, "learning_rate": 1.872839600508547e-07, "loss": 0.8214, "step": 25252 }, { "epoch": 0.915262223188721, "grad_norm": 2.6965707246332142, "learning_rate": 1.8712486030612432e-07, "loss": 0.8829, "step": 25253 }, { "epoch": 0.9152984668913776, "grad_norm": 2.366548085828518, "learning_rate": 1.869658268799024e-07, "loss": 1.0019, "step": 25254 }, { "epoch": 0.9153347105940343, "grad_norm": 2.5656804047079307, "learning_rate": 1.868068597743805e-07, "loss": 0.8575, "step": 25255 }, { "epoch": 0.9153709542966909, "grad_norm": 2.3340773982580636, "learning_rate": 1.866479589917497e-07, "loss": 0.8946, "step": 25256 }, { "epoch": 0.9154071979993477, "grad_norm": 2.4937871221206325, "learning_rate": 1.864891245341982e-07, "loss": 0.9176, "step": 25257 }, { "epoch": 0.9154434417020043, "grad_norm": 2.542304141702395, "learning_rate": 1.8633035640391594e-07, "loss": 0.9503, "step": 25258 }, { "epoch": 0.915479685404661, "grad_norm": 2.296615449025194, "learning_rate": 1.861716546030895e-07, "loss": 1.0224, "step": 25259 }, { "epoch": 0.9155159291073176, "grad_norm": 2.1163157618716855, "learning_rate": 1.8601301913390768e-07, "loss": 0.8097, "step": 25260 }, { "epoch": 0.9155521728099743, "grad_norm": 2.5036191790066584, "learning_rate": 1.8585444999855373e-07, "loss": 1.013, "step": 25261 }, { "epoch": 0.9155884165126309, "grad_norm": 2.4009739254007916, "learning_rate": 1.8569594719921425e-07, "loss": 0.8877, "step": 25262 }, { "epoch": 0.9156246602152875, "grad_norm": 2.3560984032049848, "learning_rate": 1.8553751073807357e-07, "loss": 0.8714, "step": 25263 }, { "epoch": 0.9156609039179443, "grad_norm": 2.4009523559684767, "learning_rate": 1.8537914061731387e-07, "loss": 0.8014, "step": 25264 }, { "epoch": 0.915697147620601, "grad_norm": 2.5585790644850026, "learning_rate": 1.852208368391184e-07, "loss": 0.7904, "step": 25265 }, { "epoch": 0.9157333913232576, "grad_norm": 2.4464639149424254, "learning_rate": 1.8506259940566652e-07, "loss": 0.977, "step": 25266 }, { "epoch": 0.9157696350259142, "grad_norm": 2.683958730012486, "learning_rate": 1.8490442831914146e-07, "loss": 1.0014, "step": 25267 }, { "epoch": 0.9158058787285709, "grad_norm": 2.620265200012613, "learning_rate": 1.8474632358172096e-07, "loss": 0.8676, "step": 25268 }, { "epoch": 0.9158421224312275, "grad_norm": 2.3859551525032856, "learning_rate": 1.845882851955838e-07, "loss": 0.8352, "step": 25269 }, { "epoch": 0.9158783661338843, "grad_norm": 2.2377438181577505, "learning_rate": 1.8443031316290772e-07, "loss": 0.8592, "step": 25270 }, { "epoch": 0.9159146098365409, "grad_norm": 2.3406839240420836, "learning_rate": 1.8427240748586984e-07, "loss": 0.7182, "step": 25271 }, { "epoch": 0.9159508535391976, "grad_norm": 2.2848867629697054, "learning_rate": 1.8411456816664563e-07, "loss": 0.9262, "step": 25272 }, { "epoch": 0.9159870972418542, "grad_norm": 2.4444103394809242, "learning_rate": 1.8395679520741005e-07, "loss": 0.8232, "step": 25273 }, { "epoch": 0.9160233409445109, "grad_norm": 2.379065841765077, "learning_rate": 1.8379908861033691e-07, "loss": 0.8946, "step": 25274 }, { "epoch": 0.9160595846471675, "grad_norm": 2.289116781202329, "learning_rate": 1.8364144837759946e-07, "loss": 0.8131, "step": 25275 }, { "epoch": 0.9160958283498242, "grad_norm": 2.4500373415305505, "learning_rate": 1.8348387451137094e-07, "loss": 0.8566, "step": 25276 }, { "epoch": 0.9161320720524809, "grad_norm": 2.3529846126715537, "learning_rate": 1.833263670138208e-07, "loss": 1.0128, "step": 25277 }, { "epoch": 0.9161683157551376, "grad_norm": 2.3467515602335562, "learning_rate": 1.8316892588712054e-07, "loss": 0.7426, "step": 25278 }, { "epoch": 0.9162045594577942, "grad_norm": 2.179392531923828, "learning_rate": 1.8301155113343904e-07, "loss": 0.7708, "step": 25279 }, { "epoch": 0.9162408031604509, "grad_norm": 2.442827944553998, "learning_rate": 1.8285424275494623e-07, "loss": 0.9829, "step": 25280 }, { "epoch": 0.9162770468631075, "grad_norm": 2.095020334339949, "learning_rate": 1.8269700075380759e-07, "loss": 0.968, "step": 25281 }, { "epoch": 0.9163132905657642, "grad_norm": 2.3626226068084715, "learning_rate": 1.825398251321908e-07, "loss": 0.9, "step": 25282 }, { "epoch": 0.9163495342684208, "grad_norm": 2.420961435981806, "learning_rate": 1.8238271589226308e-07, "loss": 0.8735, "step": 25283 }, { "epoch": 0.9163857779710776, "grad_norm": 2.395595297384578, "learning_rate": 1.8222567303618656e-07, "loss": 0.7206, "step": 25284 }, { "epoch": 0.9164220216737342, "grad_norm": 2.44626206106937, "learning_rate": 1.8206869656612725e-07, "loss": 0.9231, "step": 25285 }, { "epoch": 0.9164582653763909, "grad_norm": 2.4558125901724175, "learning_rate": 1.819117864842468e-07, "loss": 0.8983, "step": 25286 }, { "epoch": 0.9164945090790475, "grad_norm": 2.26262707839388, "learning_rate": 1.81754942792709e-07, "loss": 0.7952, "step": 25287 }, { "epoch": 0.9165307527817041, "grad_norm": 2.3752793637680885, "learning_rate": 1.8159816549367327e-07, "loss": 0.9518, "step": 25288 }, { "epoch": 0.9165669964843608, "grad_norm": 2.198570455343702, "learning_rate": 1.8144145458930117e-07, "loss": 0.7116, "step": 25289 }, { "epoch": 0.9166032401870176, "grad_norm": 2.2707800924204364, "learning_rate": 1.812848100817516e-07, "loss": 0.7115, "step": 25290 }, { "epoch": 0.9166394838896742, "grad_norm": 2.412907052845852, "learning_rate": 1.8112823197318275e-07, "loss": 0.9184, "step": 25291 }, { "epoch": 0.9166757275923308, "grad_norm": 2.434718777648862, "learning_rate": 1.809717202657535e-07, "loss": 0.9043, "step": 25292 }, { "epoch": 0.9167119712949875, "grad_norm": 2.4043826913321493, "learning_rate": 1.8081527496161822e-07, "loss": 0.8905, "step": 25293 }, { "epoch": 0.9167482149976441, "grad_norm": 2.469399304062777, "learning_rate": 1.8065889606293462e-07, "loss": 1.1802, "step": 25294 }, { "epoch": 0.9167844587003008, "grad_norm": 2.581437975735176, "learning_rate": 1.8050258357185657e-07, "loss": 0.8689, "step": 25295 }, { "epoch": 0.9168207024029574, "grad_norm": 2.3466734606437396, "learning_rate": 1.8034633749053843e-07, "loss": 0.9006, "step": 25296 }, { "epoch": 0.9168569461056142, "grad_norm": 2.280849743308813, "learning_rate": 1.8019015782113236e-07, "loss": 0.8908, "step": 25297 }, { "epoch": 0.9168931898082708, "grad_norm": 2.1467437147871067, "learning_rate": 1.8003404456579165e-07, "loss": 0.8435, "step": 25298 }, { "epoch": 0.9169294335109275, "grad_norm": 2.1887473809872082, "learning_rate": 1.7987799772666626e-07, "loss": 0.8008, "step": 25299 }, { "epoch": 0.9169656772135841, "grad_norm": 2.1970407843324855, "learning_rate": 1.7972201730590723e-07, "loss": 0.868, "step": 25300 }, { "epoch": 0.9170019209162408, "grad_norm": 2.9444862989795513, "learning_rate": 1.7956610330566336e-07, "loss": 1.0133, "step": 25301 }, { "epoch": 0.9170381646188974, "grad_norm": 2.3354900499479627, "learning_rate": 1.7941025572808357e-07, "loss": 0.8617, "step": 25302 }, { "epoch": 0.9170744083215542, "grad_norm": 2.274660432824135, "learning_rate": 1.7925447457531552e-07, "loss": 0.6174, "step": 25303 }, { "epoch": 0.9171106520242108, "grad_norm": 2.5546873710154796, "learning_rate": 1.790987598495042e-07, "loss": 0.7699, "step": 25304 }, { "epoch": 0.9171468957268675, "grad_norm": 2.5818213084982067, "learning_rate": 1.789431115527973e-07, "loss": 0.8552, "step": 25305 }, { "epoch": 0.9171831394295241, "grad_norm": 2.3673194532653903, "learning_rate": 1.787875296873376e-07, "loss": 0.8942, "step": 25306 }, { "epoch": 0.9172193831321808, "grad_norm": 2.0025712858755473, "learning_rate": 1.786320142552711e-07, "loss": 0.6502, "step": 25307 }, { "epoch": 0.9172556268348374, "grad_norm": 2.419512339425493, "learning_rate": 1.7847656525873892e-07, "loss": 0.9207, "step": 25308 }, { "epoch": 0.9172918705374941, "grad_norm": 1.9959744825952395, "learning_rate": 1.783211826998843e-07, "loss": 0.7942, "step": 25309 }, { "epoch": 0.9173281142401508, "grad_norm": 2.229584987662367, "learning_rate": 1.7816586658084723e-07, "loss": 0.8291, "step": 25310 }, { "epoch": 0.9173643579428075, "grad_norm": 2.410324273503009, "learning_rate": 1.780106169037682e-07, "loss": 0.7369, "step": 25311 }, { "epoch": 0.9174006016454641, "grad_norm": 2.179835406428466, "learning_rate": 1.7785543367078771e-07, "loss": 0.7444, "step": 25312 }, { "epoch": 0.9174368453481208, "grad_norm": 2.6993012807145385, "learning_rate": 1.7770031688404187e-07, "loss": 0.9183, "step": 25313 }, { "epoch": 0.9174730890507774, "grad_norm": 2.167808305258063, "learning_rate": 1.775452665456695e-07, "loss": 0.762, "step": 25314 }, { "epoch": 0.917509332753434, "grad_norm": 2.5487846069928057, "learning_rate": 1.7739028265780668e-07, "loss": 0.8593, "step": 25315 }, { "epoch": 0.9175455764560908, "grad_norm": 2.626651463685265, "learning_rate": 1.7723536522258944e-07, "loss": 0.9064, "step": 25316 }, { "epoch": 0.9175818201587475, "grad_norm": 2.154316106897672, "learning_rate": 1.770805142421511e-07, "loss": 0.6626, "step": 25317 }, { "epoch": 0.9176180638614041, "grad_norm": 2.4039847359072426, "learning_rate": 1.769257297186283e-07, "loss": 0.9027, "step": 25318 }, { "epoch": 0.9176543075640607, "grad_norm": 2.6341379401038014, "learning_rate": 1.76771011654151e-07, "loss": 0.9517, "step": 25319 }, { "epoch": 0.9176905512667174, "grad_norm": 2.280057750533099, "learning_rate": 1.7661636005085248e-07, "loss": 0.7911, "step": 25320 }, { "epoch": 0.917726794969374, "grad_norm": 2.220956773868631, "learning_rate": 1.7646177491086324e-07, "loss": 0.8505, "step": 25321 }, { "epoch": 0.9177630386720307, "grad_norm": 2.664308322353571, "learning_rate": 1.763072562363133e-07, "loss": 0.8289, "step": 25322 }, { "epoch": 0.9177992823746874, "grad_norm": 2.304145139539909, "learning_rate": 1.7615280402933255e-07, "loss": 0.6955, "step": 25323 }, { "epoch": 0.9178355260773441, "grad_norm": 2.222260562712127, "learning_rate": 1.7599841829204877e-07, "loss": 0.8133, "step": 25324 }, { "epoch": 0.9178717697800007, "grad_norm": 2.492442189700093, "learning_rate": 1.7584409902658917e-07, "loss": 1.0497, "step": 25325 }, { "epoch": 0.9179080134826574, "grad_norm": 1.8530977255022758, "learning_rate": 1.7568984623507978e-07, "loss": 0.6163, "step": 25326 }, { "epoch": 0.917944257185314, "grad_norm": 2.4893326458327807, "learning_rate": 1.7553565991964784e-07, "loss": 0.8667, "step": 25327 }, { "epoch": 0.9179805008879707, "grad_norm": 2.469714831936921, "learning_rate": 1.7538154008241604e-07, "loss": 0.8756, "step": 25328 }, { "epoch": 0.9180167445906274, "grad_norm": 2.1660799704929032, "learning_rate": 1.7522748672550881e-07, "loss": 0.8704, "step": 25329 }, { "epoch": 0.9180529882932841, "grad_norm": 2.445446010307901, "learning_rate": 1.7507349985104892e-07, "loss": 0.8114, "step": 25330 }, { "epoch": 0.9180892319959407, "grad_norm": 2.465719916685753, "learning_rate": 1.74919579461158e-07, "loss": 0.9619, "step": 25331 }, { "epoch": 0.9181254756985974, "grad_norm": 2.5418238365745767, "learning_rate": 1.7476572555795823e-07, "loss": 0.8491, "step": 25332 }, { "epoch": 0.918161719401254, "grad_norm": 2.2115332006939026, "learning_rate": 1.746119381435668e-07, "loss": 0.7726, "step": 25333 }, { "epoch": 0.9181979631039107, "grad_norm": 2.2564073327375014, "learning_rate": 1.744582172201059e-07, "loss": 0.7101, "step": 25334 }, { "epoch": 0.9182342068065673, "grad_norm": 2.400898452383116, "learning_rate": 1.7430456278969165e-07, "loss": 0.8837, "step": 25335 }, { "epoch": 0.9182704505092241, "grad_norm": 2.391005726310379, "learning_rate": 1.741509748544429e-07, "loss": 0.8512, "step": 25336 }, { "epoch": 0.9183066942118807, "grad_norm": 2.490117319575027, "learning_rate": 1.7399745341647344e-07, "loss": 0.9189, "step": 25337 }, { "epoch": 0.9183429379145374, "grad_norm": 2.570012116093738, "learning_rate": 1.7384399847790223e-07, "loss": 1.04, "step": 25338 }, { "epoch": 0.918379181617194, "grad_norm": 2.4303742362251657, "learning_rate": 1.7369061004084086e-07, "loss": 0.9297, "step": 25339 }, { "epoch": 0.9184154253198507, "grad_norm": 2.4486497566569647, "learning_rate": 1.7353728810740488e-07, "loss": 0.8804, "step": 25340 }, { "epoch": 0.9184516690225073, "grad_norm": 2.4258788997290543, "learning_rate": 1.7338403267970537e-07, "loss": 0.7989, "step": 25341 }, { "epoch": 0.9184879127251641, "grad_norm": 2.2773894293546273, "learning_rate": 1.732308437598551e-07, "loss": 0.9316, "step": 25342 }, { "epoch": 0.9185241564278207, "grad_norm": 2.2460186157325035, "learning_rate": 1.7307772134996514e-07, "loss": 0.9074, "step": 25343 }, { "epoch": 0.9185604001304774, "grad_norm": 2.391173702286801, "learning_rate": 1.7292466545214436e-07, "loss": 0.703, "step": 25344 }, { "epoch": 0.918596643833134, "grad_norm": 2.678026635210218, "learning_rate": 1.727716760685022e-07, "loss": 0.9564, "step": 25345 }, { "epoch": 0.9186328875357906, "grad_norm": 2.3265409769564016, "learning_rate": 1.7261875320114695e-07, "loss": 1.0529, "step": 25346 }, { "epoch": 0.9186691312384473, "grad_norm": 2.4629248190948734, "learning_rate": 1.724658968521864e-07, "loss": 0.8094, "step": 25347 }, { "epoch": 0.9187053749411039, "grad_norm": 2.297415412057546, "learning_rate": 1.7231310702372606e-07, "loss": 0.7055, "step": 25348 }, { "epoch": 0.9187416186437607, "grad_norm": 2.4093640239390233, "learning_rate": 1.7216038371787092e-07, "loss": 0.8905, "step": 25349 }, { "epoch": 0.9187778623464173, "grad_norm": 2.208220306536792, "learning_rate": 1.720077269367265e-07, "loss": 0.8155, "step": 25350 }, { "epoch": 0.918814106049074, "grad_norm": 2.2386655538621887, "learning_rate": 1.7185513668239506e-07, "loss": 1.0201, "step": 25351 }, { "epoch": 0.9188503497517306, "grad_norm": 2.297455250339685, "learning_rate": 1.7170261295698098e-07, "loss": 0.8291, "step": 25352 }, { "epoch": 0.9188865934543873, "grad_norm": 2.3496499674348446, "learning_rate": 1.7155015576258316e-07, "loss": 0.9304, "step": 25353 }, { "epoch": 0.9189228371570439, "grad_norm": 2.4466178050005465, "learning_rate": 1.7139776510130546e-07, "loss": 0.8524, "step": 25354 }, { "epoch": 0.9189590808597006, "grad_norm": 2.221724137707948, "learning_rate": 1.7124544097524565e-07, "loss": 1.0118, "step": 25355 }, { "epoch": 0.9189953245623573, "grad_norm": 2.518723768032899, "learning_rate": 1.710931833865037e-07, "loss": 0.7746, "step": 25356 }, { "epoch": 0.919031568265014, "grad_norm": 2.625709148608872, "learning_rate": 1.7094099233717686e-07, "loss": 1.0185, "step": 25357 }, { "epoch": 0.9190678119676706, "grad_norm": 2.7788086048260414, "learning_rate": 1.707888678293629e-07, "loss": 0.9351, "step": 25358 }, { "epoch": 0.9191040556703273, "grad_norm": 2.6133848394748145, "learning_rate": 1.7063680986515785e-07, "loss": 1.0484, "step": 25359 }, { "epoch": 0.9191402993729839, "grad_norm": 2.249458276755411, "learning_rate": 1.7048481844665676e-07, "loss": 0.7577, "step": 25360 }, { "epoch": 0.9191765430756406, "grad_norm": 2.063033778036452, "learning_rate": 1.703328935759535e-07, "loss": 0.6844, "step": 25361 }, { "epoch": 0.9192127867782973, "grad_norm": 2.7527862633548565, "learning_rate": 1.7018103525514252e-07, "loss": 0.8909, "step": 25362 }, { "epoch": 0.919249030480954, "grad_norm": 2.6132717501111586, "learning_rate": 1.7002924348631656e-07, "loss": 0.6955, "step": 25363 }, { "epoch": 0.9192852741836106, "grad_norm": 2.1949942427191944, "learning_rate": 1.6987751827156562e-07, "loss": 0.7574, "step": 25364 }, { "epoch": 0.9193215178862673, "grad_norm": 2.606450318890923, "learning_rate": 1.6972585961298137e-07, "loss": 0.8911, "step": 25365 }, { "epoch": 0.9193577615889239, "grad_norm": 2.4498149859580223, "learning_rate": 1.6957426751265327e-07, "loss": 0.8103, "step": 25366 }, { "epoch": 0.9193940052915806, "grad_norm": 2.446593075665665, "learning_rate": 1.694227419726713e-07, "loss": 0.9364, "step": 25367 }, { "epoch": 0.9194302489942372, "grad_norm": 2.2325575914169136, "learning_rate": 1.6927128299512207e-07, "loss": 0.7891, "step": 25368 }, { "epoch": 0.919466492696894, "grad_norm": 2.286870009410956, "learning_rate": 1.691198905820929e-07, "loss": 1.0025, "step": 25369 }, { "epoch": 0.9195027363995506, "grad_norm": 2.5690256011325934, "learning_rate": 1.6896856473567036e-07, "loss": 0.9114, "step": 25370 }, { "epoch": 0.9195389801022072, "grad_norm": 2.8013418830730776, "learning_rate": 1.6881730545793896e-07, "loss": 0.7645, "step": 25371 }, { "epoch": 0.9195752238048639, "grad_norm": 2.4266767965348084, "learning_rate": 1.6866611275098422e-07, "loss": 0.8618, "step": 25372 }, { "epoch": 0.9196114675075205, "grad_norm": 2.5001866602290375, "learning_rate": 1.685149866168867e-07, "loss": 0.9103, "step": 25373 }, { "epoch": 0.9196477112101772, "grad_norm": 2.4567097120569215, "learning_rate": 1.6836392705773198e-07, "loss": 0.8279, "step": 25374 }, { "epoch": 0.919683954912834, "grad_norm": 2.597890374973522, "learning_rate": 1.6821293407560002e-07, "loss": 0.8509, "step": 25375 }, { "epoch": 0.9197201986154906, "grad_norm": 2.1680135090205335, "learning_rate": 1.6806200767257197e-07, "loss": 0.76, "step": 25376 }, { "epoch": 0.9197564423181472, "grad_norm": 2.561695373208434, "learning_rate": 1.679111478507267e-07, "loss": 0.8817, "step": 25377 }, { "epoch": 0.9197926860208039, "grad_norm": 2.1294723882879834, "learning_rate": 1.6776035461214367e-07, "loss": 0.9474, "step": 25378 }, { "epoch": 0.9198289297234605, "grad_norm": 2.5277490105623093, "learning_rate": 1.6760962795890067e-07, "loss": 0.8941, "step": 25379 }, { "epoch": 0.9198651734261172, "grad_norm": 2.4470432962932205, "learning_rate": 1.6745896789307435e-07, "loss": 0.9821, "step": 25380 }, { "epoch": 0.9199014171287738, "grad_norm": 2.4515539942496045, "learning_rate": 1.673083744167403e-07, "loss": 1.1846, "step": 25381 }, { "epoch": 0.9199376608314306, "grad_norm": 2.3321872745744225, "learning_rate": 1.6715784753197462e-07, "loss": 0.8958, "step": 25382 }, { "epoch": 0.9199739045340872, "grad_norm": 2.528807496204927, "learning_rate": 1.6700738724085118e-07, "loss": 0.96, "step": 25383 }, { "epoch": 0.9200101482367439, "grad_norm": 2.4745450156242317, "learning_rate": 1.6685699354544281e-07, "loss": 0.9497, "step": 25384 }, { "epoch": 0.9200463919394005, "grad_norm": 2.2713124256189565, "learning_rate": 1.6670666644782173e-07, "loss": 0.8174, "step": 25385 }, { "epoch": 0.9200826356420572, "grad_norm": 2.38928213891349, "learning_rate": 1.6655640595005962e-07, "loss": 0.9343, "step": 25386 }, { "epoch": 0.9201188793447138, "grad_norm": 2.4469212446236117, "learning_rate": 1.6640621205422812e-07, "loss": 0.7299, "step": 25387 }, { "epoch": 0.9201551230473706, "grad_norm": 2.3771287577936153, "learning_rate": 1.6625608476239508e-07, "loss": 0.8475, "step": 25388 }, { "epoch": 0.9201913667500272, "grad_norm": 2.582245793231994, "learning_rate": 1.6610602407662935e-07, "loss": 0.887, "step": 25389 }, { "epoch": 0.9202276104526839, "grad_norm": 2.4975368244089977, "learning_rate": 1.659560299990004e-07, "loss": 0.9471, "step": 25390 }, { "epoch": 0.9202638541553405, "grad_norm": 2.288693811564779, "learning_rate": 1.658061025315727e-07, "loss": 0.9485, "step": 25391 }, { "epoch": 0.9203000978579972, "grad_norm": 2.178299489782886, "learning_rate": 1.6565624167641403e-07, "loss": 0.8951, "step": 25392 }, { "epoch": 0.9203363415606538, "grad_norm": 2.332444330675837, "learning_rate": 1.6550644743558776e-07, "loss": 0.8283, "step": 25393 }, { "epoch": 0.9203725852633104, "grad_norm": 2.272743259069064, "learning_rate": 1.6535671981115997e-07, "loss": 0.8907, "step": 25394 }, { "epoch": 0.9204088289659672, "grad_norm": 2.1582599664939046, "learning_rate": 1.6520705880519183e-07, "loss": 0.8021, "step": 25395 }, { "epoch": 0.9204450726686239, "grad_norm": 2.322708524723627, "learning_rate": 1.6505746441974613e-07, "loss": 0.8013, "step": 25396 }, { "epoch": 0.9204813163712805, "grad_norm": 2.2499635693780697, "learning_rate": 1.6490793665688566e-07, "loss": 0.9845, "step": 25397 }, { "epoch": 0.9205175600739371, "grad_norm": 2.490984322933039, "learning_rate": 1.6475847551866874e-07, "loss": 0.8182, "step": 25398 }, { "epoch": 0.9205538037765938, "grad_norm": 2.433449990643625, "learning_rate": 1.6460908100715655e-07, "loss": 0.7756, "step": 25399 }, { "epoch": 0.9205900474792504, "grad_norm": 2.381022096244587, "learning_rate": 1.6445975312440578e-07, "loss": 0.9612, "step": 25400 }, { "epoch": 0.9206262911819072, "grad_norm": 2.5869247082619777, "learning_rate": 1.6431049187247583e-07, "loss": 1.0468, "step": 25401 }, { "epoch": 0.9206625348845638, "grad_norm": 2.3541260705000644, "learning_rate": 1.641612972534229e-07, "loss": 0.8999, "step": 25402 }, { "epoch": 0.9206987785872205, "grad_norm": 2.194978087491508, "learning_rate": 1.6401216926930254e-07, "loss": 0.7811, "step": 25403 }, { "epoch": 0.9207350222898771, "grad_norm": 2.354109346386563, "learning_rate": 1.638631079221692e-07, "loss": 0.9328, "step": 25404 }, { "epoch": 0.9207712659925338, "grad_norm": 2.4290582178858813, "learning_rate": 1.637141132140785e-07, "loss": 0.8036, "step": 25405 }, { "epoch": 0.9208075096951904, "grad_norm": 2.453250873696424, "learning_rate": 1.6356518514708153e-07, "loss": 0.8526, "step": 25406 }, { "epoch": 0.9208437533978471, "grad_norm": 2.488143387823675, "learning_rate": 1.6341632372323225e-07, "loss": 0.91, "step": 25407 }, { "epoch": 0.9208799971005038, "grad_norm": 2.380808247542163, "learning_rate": 1.6326752894458008e-07, "loss": 0.8832, "step": 25408 }, { "epoch": 0.9209162408031605, "grad_norm": 2.4856179290582023, "learning_rate": 1.631188008131762e-07, "loss": 0.8421, "step": 25409 }, { "epoch": 0.9209524845058171, "grad_norm": 2.274929315087805, "learning_rate": 1.6297013933107065e-07, "loss": 0.6909, "step": 25410 }, { "epoch": 0.9209887282084738, "grad_norm": 2.7078616022572612, "learning_rate": 1.628215445003106e-07, "loss": 0.6905, "step": 25411 }, { "epoch": 0.9210249719111304, "grad_norm": 2.5400008717008076, "learning_rate": 1.6267301632294451e-07, "loss": 1.0136, "step": 25412 }, { "epoch": 0.9210612156137871, "grad_norm": 2.1932699276275605, "learning_rate": 1.6252455480101847e-07, "loss": 0.7914, "step": 25413 }, { "epoch": 0.9210974593164438, "grad_norm": 2.2379161024146925, "learning_rate": 1.6237615993657917e-07, "loss": 0.9475, "step": 25414 }, { "epoch": 0.9211337030191005, "grad_norm": 2.2379957235886745, "learning_rate": 1.6222783173167e-07, "loss": 0.7469, "step": 25415 }, { "epoch": 0.9211699467217571, "grad_norm": 2.4811872556844556, "learning_rate": 1.6207957018833598e-07, "loss": 0.9809, "step": 25416 }, { "epoch": 0.9212061904244138, "grad_norm": 2.219234028078067, "learning_rate": 1.6193137530861935e-07, "loss": 0.6809, "step": 25417 }, { "epoch": 0.9212424341270704, "grad_norm": 2.5277644800041115, "learning_rate": 1.617832470945624e-07, "loss": 1.0372, "step": 25418 }, { "epoch": 0.921278677829727, "grad_norm": 2.5004517077775605, "learning_rate": 1.6163518554820678e-07, "loss": 0.8978, "step": 25419 }, { "epoch": 0.9213149215323837, "grad_norm": 2.3670739589658725, "learning_rate": 1.614871906715909e-07, "loss": 0.8154, "step": 25420 }, { "epoch": 0.9213511652350405, "grad_norm": 2.3095139450317967, "learning_rate": 1.6133926246675646e-07, "loss": 0.8957, "step": 25421 }, { "epoch": 0.9213874089376971, "grad_norm": 2.3677782942952503, "learning_rate": 1.6119140093574014e-07, "loss": 0.8221, "step": 25422 }, { "epoch": 0.9214236526403538, "grad_norm": 2.880721367085307, "learning_rate": 1.6104360608058033e-07, "loss": 1.0385, "step": 25423 }, { "epoch": 0.9214598963430104, "grad_norm": 2.4380867104257096, "learning_rate": 1.6089587790331206e-07, "loss": 0.7613, "step": 25424 }, { "epoch": 0.921496140045667, "grad_norm": 2.467990698191839, "learning_rate": 1.6074821640597314e-07, "loss": 0.9177, "step": 25425 }, { "epoch": 0.9215323837483237, "grad_norm": 2.3481670786099587, "learning_rate": 1.6060062159059642e-07, "loss": 0.9077, "step": 25426 }, { "epoch": 0.9215686274509803, "grad_norm": 2.6845095007299578, "learning_rate": 1.6045309345921688e-07, "loss": 0.9272, "step": 25427 }, { "epoch": 0.9216048711536371, "grad_norm": 2.5684922179613427, "learning_rate": 1.6030563201386685e-07, "loss": 0.8462, "step": 25428 }, { "epoch": 0.9216411148562937, "grad_norm": 2.2016226382339856, "learning_rate": 1.601582372565774e-07, "loss": 0.809, "step": 25429 }, { "epoch": 0.9216773585589504, "grad_norm": 2.4429960252390175, "learning_rate": 1.6001090918938146e-07, "loss": 0.8385, "step": 25430 }, { "epoch": 0.921713602261607, "grad_norm": 2.4125282999076054, "learning_rate": 1.5986364781430785e-07, "loss": 0.8962, "step": 25431 }, { "epoch": 0.9217498459642637, "grad_norm": 2.0041213752959726, "learning_rate": 1.5971645313338556e-07, "loss": 0.8288, "step": 25432 }, { "epoch": 0.9217860896669203, "grad_norm": 2.2633969831554897, "learning_rate": 1.5956932514864298e-07, "loss": 0.9671, "step": 25433 }, { "epoch": 0.9218223333695771, "grad_norm": 2.5090706878084634, "learning_rate": 1.5942226386210845e-07, "loss": 0.9951, "step": 25434 }, { "epoch": 0.9218585770722337, "grad_norm": 2.3492338141045623, "learning_rate": 1.5927526927580705e-07, "loss": 0.8885, "step": 25435 }, { "epoch": 0.9218948207748904, "grad_norm": 2.54778842174468, "learning_rate": 1.5912834139176493e-07, "loss": 1.0429, "step": 25436 }, { "epoch": 0.921931064477547, "grad_norm": 2.381029709022749, "learning_rate": 1.589814802120071e-07, "loss": 0.8163, "step": 25437 }, { "epoch": 0.9219673081802037, "grad_norm": 2.4138178255623255, "learning_rate": 1.5883468573855643e-07, "loss": 0.8608, "step": 25438 }, { "epoch": 0.9220035518828603, "grad_norm": 2.5204408177772635, "learning_rate": 1.5868795797343628e-07, "loss": 0.88, "step": 25439 }, { "epoch": 0.922039795585517, "grad_norm": 2.578090904192826, "learning_rate": 1.5854129691866727e-07, "loss": 0.8514, "step": 25440 }, { "epoch": 0.9220760392881737, "grad_norm": 1.9468911364484733, "learning_rate": 1.5839470257627165e-07, "loss": 0.6921, "step": 25441 }, { "epoch": 0.9221122829908304, "grad_norm": 2.295462133267261, "learning_rate": 1.5824817494826895e-07, "loss": 0.8113, "step": 25442 }, { "epoch": 0.922148526693487, "grad_norm": 2.300808780440381, "learning_rate": 1.5810171403667863e-07, "loss": 0.9063, "step": 25443 }, { "epoch": 0.9221847703961437, "grad_norm": 2.204977829598913, "learning_rate": 1.5795531984351798e-07, "loss": 0.7569, "step": 25444 }, { "epoch": 0.9222210140988003, "grad_norm": 2.387849147521375, "learning_rate": 1.5780899237080427e-07, "loss": 0.9324, "step": 25445 }, { "epoch": 0.922257257801457, "grad_norm": 2.33544773467211, "learning_rate": 1.576627316205548e-07, "loss": 1.0562, "step": 25446 }, { "epoch": 0.9222935015041137, "grad_norm": 2.30731853231162, "learning_rate": 1.5751653759478403e-07, "loss": 0.9258, "step": 25447 }, { "epoch": 0.9223297452067704, "grad_norm": 1.9950274051228496, "learning_rate": 1.573704102955065e-07, "loss": 0.7162, "step": 25448 }, { "epoch": 0.922365988909427, "grad_norm": 2.4299470629874134, "learning_rate": 1.572243497247361e-07, "loss": 0.8745, "step": 25449 }, { "epoch": 0.9224022326120837, "grad_norm": 2.0409297845492955, "learning_rate": 1.5707835588448572e-07, "loss": 0.7908, "step": 25450 }, { "epoch": 0.9224384763147403, "grad_norm": 2.488353999198547, "learning_rate": 1.5693242877676652e-07, "loss": 0.7783, "step": 25451 }, { "epoch": 0.9224747200173969, "grad_norm": 1.9654386569976294, "learning_rate": 1.5678656840358908e-07, "loss": 0.8085, "step": 25452 }, { "epoch": 0.9225109637200536, "grad_norm": 2.185689115817013, "learning_rate": 1.5664077476696404e-07, "loss": 0.7229, "step": 25453 }, { "epoch": 0.9225472074227103, "grad_norm": 2.6429065589675393, "learning_rate": 1.5649504786890036e-07, "loss": 1.0752, "step": 25454 }, { "epoch": 0.922583451125367, "grad_norm": 2.3636665873585287, "learning_rate": 1.5634938771140473e-07, "loss": 0.85, "step": 25455 }, { "epoch": 0.9226196948280236, "grad_norm": 2.5963583359797933, "learning_rate": 1.5620379429648557e-07, "loss": 0.8952, "step": 25456 }, { "epoch": 0.9226559385306803, "grad_norm": 2.4099429070791443, "learning_rate": 1.5605826762614962e-07, "loss": 0.8682, "step": 25457 }, { "epoch": 0.9226921822333369, "grad_norm": 2.1415119169087258, "learning_rate": 1.559128077023997e-07, "loss": 1.0247, "step": 25458 }, { "epoch": 0.9227284259359936, "grad_norm": 2.5934658864517917, "learning_rate": 1.5576741452724308e-07, "loss": 0.9366, "step": 25459 }, { "epoch": 0.9227646696386503, "grad_norm": 2.6358517356748914, "learning_rate": 1.5562208810268042e-07, "loss": 0.9312, "step": 25460 }, { "epoch": 0.922800913341307, "grad_norm": 2.467015858692954, "learning_rate": 1.5547682843071622e-07, "loss": 0.8635, "step": 25461 }, { "epoch": 0.9228371570439636, "grad_norm": 2.332803564689027, "learning_rate": 1.5533163551335163e-07, "loss": 0.8804, "step": 25462 }, { "epoch": 0.9228734007466203, "grad_norm": 2.6612785144281434, "learning_rate": 1.5518650935258728e-07, "loss": 0.7757, "step": 25463 }, { "epoch": 0.9229096444492769, "grad_norm": 2.6433833288146933, "learning_rate": 1.5504144995042214e-07, "loss": 0.8559, "step": 25464 }, { "epoch": 0.9229458881519336, "grad_norm": 2.460662805373572, "learning_rate": 1.5489645730885627e-07, "loss": 0.9164, "step": 25465 }, { "epoch": 0.9229821318545902, "grad_norm": 2.3552787748936455, "learning_rate": 1.5475153142988698e-07, "loss": 0.9312, "step": 25466 }, { "epoch": 0.923018375557247, "grad_norm": 2.1765186398254786, "learning_rate": 1.5460667231551096e-07, "loss": 0.7839, "step": 25467 }, { "epoch": 0.9230546192599036, "grad_norm": 2.6535125764766465, "learning_rate": 1.5446187996772443e-07, "loss": 0.8956, "step": 25468 }, { "epoch": 0.9230908629625603, "grad_norm": 2.2771871099263983, "learning_rate": 1.5431715438852302e-07, "loss": 0.7009, "step": 25469 }, { "epoch": 0.9231271066652169, "grad_norm": 2.4874242198072714, "learning_rate": 1.5417249557990065e-07, "loss": 0.8813, "step": 25470 }, { "epoch": 0.9231633503678736, "grad_norm": 2.4708929248703595, "learning_rate": 1.5402790354385077e-07, "loss": 0.839, "step": 25471 }, { "epoch": 0.9231995940705302, "grad_norm": 2.540740487356062, "learning_rate": 1.538833782823651e-07, "loss": 0.9289, "step": 25472 }, { "epoch": 0.923235837773187, "grad_norm": 2.4602491238201343, "learning_rate": 1.5373891979743593e-07, "loss": 0.9355, "step": 25473 }, { "epoch": 0.9232720814758436, "grad_norm": 2.165365587120274, "learning_rate": 1.535945280910539e-07, "loss": 1.0618, "step": 25474 }, { "epoch": 0.9233083251785003, "grad_norm": 2.412101034100303, "learning_rate": 1.5345020316520743e-07, "loss": 0.8977, "step": 25475 }, { "epoch": 0.9233445688811569, "grad_norm": 2.108205865302022, "learning_rate": 1.5330594502188656e-07, "loss": 0.7124, "step": 25476 }, { "epoch": 0.9233808125838135, "grad_norm": 2.3074032526640993, "learning_rate": 1.5316175366307916e-07, "loss": 0.8092, "step": 25477 }, { "epoch": 0.9234170562864702, "grad_norm": 2.228858172314239, "learning_rate": 1.530176290907709e-07, "loss": 0.9455, "step": 25478 }, { "epoch": 0.9234532999891268, "grad_norm": 2.0161336433134944, "learning_rate": 1.528735713069479e-07, "loss": 0.8581, "step": 25479 }, { "epoch": 0.9234895436917836, "grad_norm": 1.9514401731008764, "learning_rate": 1.5272958031359585e-07, "loss": 0.945, "step": 25480 }, { "epoch": 0.9235257873944402, "grad_norm": 2.3200949261061807, "learning_rate": 1.5258565611269925e-07, "loss": 0.6653, "step": 25481 }, { "epoch": 0.9235620310970969, "grad_norm": 2.312719459908048, "learning_rate": 1.5244179870624042e-07, "loss": 1.0133, "step": 25482 }, { "epoch": 0.9235982747997535, "grad_norm": 2.681059477863606, "learning_rate": 1.5229800809620164e-07, "loss": 0.7582, "step": 25483 }, { "epoch": 0.9236345185024102, "grad_norm": 2.339522475546108, "learning_rate": 1.5215428428456524e-07, "loss": 0.9585, "step": 25484 }, { "epoch": 0.9236707622050668, "grad_norm": 2.308009364869885, "learning_rate": 1.5201062727331018e-07, "loss": 0.8403, "step": 25485 }, { "epoch": 0.9237070059077236, "grad_norm": 2.240094613184174, "learning_rate": 1.518670370644171e-07, "loss": 0.7707, "step": 25486 }, { "epoch": 0.9237432496103802, "grad_norm": 2.435720433184007, "learning_rate": 1.5172351365986325e-07, "loss": 0.7273, "step": 25487 }, { "epoch": 0.9237794933130369, "grad_norm": 2.3755764080386763, "learning_rate": 1.5158005706162883e-07, "loss": 0.9959, "step": 25488 }, { "epoch": 0.9238157370156935, "grad_norm": 2.5086022142060393, "learning_rate": 1.5143666727168771e-07, "loss": 0.7561, "step": 25489 }, { "epoch": 0.9238519807183502, "grad_norm": 2.4880390504789625, "learning_rate": 1.5129334429201782e-07, "loss": 0.9664, "step": 25490 }, { "epoch": 0.9238882244210068, "grad_norm": 2.231280106855196, "learning_rate": 1.5115008812459198e-07, "loss": 0.9269, "step": 25491 }, { "epoch": 0.9239244681236635, "grad_norm": 2.669734797218974, "learning_rate": 1.5100689877138697e-07, "loss": 0.9984, "step": 25492 }, { "epoch": 0.9239607118263202, "grad_norm": 2.1999929306304367, "learning_rate": 1.5086377623437344e-07, "loss": 0.8155, "step": 25493 }, { "epoch": 0.9239969555289769, "grad_norm": 2.630809761193559, "learning_rate": 1.5072072051552477e-07, "loss": 1.0806, "step": 25494 }, { "epoch": 0.9240331992316335, "grad_norm": 2.589889000072402, "learning_rate": 1.5057773161681166e-07, "loss": 0.9082, "step": 25495 }, { "epoch": 0.9240694429342902, "grad_norm": 2.1988954001835723, "learning_rate": 1.5043480954020472e-07, "loss": 0.8045, "step": 25496 }, { "epoch": 0.9241056866369468, "grad_norm": 2.6125335862934147, "learning_rate": 1.502919542876735e-07, "loss": 1.1002, "step": 25497 }, { "epoch": 0.9241419303396035, "grad_norm": 2.5311152996873565, "learning_rate": 1.5014916586118533e-07, "loss": 0.9781, "step": 25498 }, { "epoch": 0.9241781740422601, "grad_norm": 2.6104940898938707, "learning_rate": 1.5000644426270861e-07, "loss": 0.864, "step": 25499 }, { "epoch": 0.9242144177449169, "grad_norm": 2.2933443242361418, "learning_rate": 1.498637894942101e-07, "loss": 0.8235, "step": 25500 }, { "epoch": 0.9242506614475735, "grad_norm": 2.5889956704384516, "learning_rate": 1.49721201557656e-07, "loss": 0.8509, "step": 25501 }, { "epoch": 0.9242869051502302, "grad_norm": 2.236561817321966, "learning_rate": 1.495786804550098e-07, "loss": 0.7946, "step": 25502 }, { "epoch": 0.9243231488528868, "grad_norm": 2.3008773361878156, "learning_rate": 1.4943622618823538e-07, "loss": 1.054, "step": 25503 }, { "epoch": 0.9243593925555434, "grad_norm": 2.6032632702653653, "learning_rate": 1.4929383875929736e-07, "loss": 0.8736, "step": 25504 }, { "epoch": 0.9243956362582001, "grad_norm": 2.6878661923406804, "learning_rate": 1.4915151817015583e-07, "loss": 0.9109, "step": 25505 }, { "epoch": 0.9244318799608569, "grad_norm": 2.399691084995295, "learning_rate": 1.4900926442277364e-07, "loss": 0.879, "step": 25506 }, { "epoch": 0.9244681236635135, "grad_norm": 2.4015615921644478, "learning_rate": 1.4886707751910866e-07, "loss": 0.869, "step": 25507 }, { "epoch": 0.9245043673661701, "grad_norm": 2.2999567328480253, "learning_rate": 1.4872495746112215e-07, "loss": 0.9584, "step": 25508 }, { "epoch": 0.9245406110688268, "grad_norm": 2.337740603278446, "learning_rate": 1.4858290425077194e-07, "loss": 0.8865, "step": 25509 }, { "epoch": 0.9245768547714834, "grad_norm": 2.0993959329164475, "learning_rate": 1.4844091789001536e-07, "loss": 0.9654, "step": 25510 }, { "epoch": 0.9246130984741401, "grad_norm": 2.831202256458956, "learning_rate": 1.4829899838080753e-07, "loss": 0.9411, "step": 25511 }, { "epoch": 0.9246493421767967, "grad_norm": 2.412421180471871, "learning_rate": 1.4815714572510686e-07, "loss": 0.9879, "step": 25512 }, { "epoch": 0.9246855858794535, "grad_norm": 2.6288389513546435, "learning_rate": 1.4801535992486572e-07, "loss": 0.8677, "step": 25513 }, { "epoch": 0.9247218295821101, "grad_norm": 2.257418670452288, "learning_rate": 1.478736409820386e-07, "loss": 0.8235, "step": 25514 }, { "epoch": 0.9247580732847668, "grad_norm": 2.425943684693588, "learning_rate": 1.4773198889857842e-07, "loss": 1.0229, "step": 25515 }, { "epoch": 0.9247943169874234, "grad_norm": 2.5807008757559387, "learning_rate": 1.475904036764364e-07, "loss": 0.8462, "step": 25516 }, { "epoch": 0.9248305606900801, "grad_norm": 2.3660910862320925, "learning_rate": 1.4744888531756485e-07, "loss": 0.9137, "step": 25517 }, { "epoch": 0.9248668043927367, "grad_norm": 2.5714698795957203, "learning_rate": 1.4730743382391222e-07, "loss": 0.9788, "step": 25518 }, { "epoch": 0.9249030480953935, "grad_norm": 2.413001579150658, "learning_rate": 1.4716604919742806e-07, "loss": 0.7513, "step": 25519 }, { "epoch": 0.9249392917980501, "grad_norm": 2.3093569346232385, "learning_rate": 1.4702473144006134e-07, "loss": 0.741, "step": 25520 }, { "epoch": 0.9249755355007068, "grad_norm": 2.46308252355327, "learning_rate": 1.468834805537589e-07, "loss": 0.945, "step": 25521 }, { "epoch": 0.9250117792033634, "grad_norm": 2.599912201238743, "learning_rate": 1.467422965404669e-07, "loss": 0.9093, "step": 25522 }, { "epoch": 0.9250480229060201, "grad_norm": 2.6752506688720534, "learning_rate": 1.4660117940213103e-07, "loss": 0.9979, "step": 25523 }, { "epoch": 0.9250842666086767, "grad_norm": 2.0438594707167623, "learning_rate": 1.4646012914069585e-07, "loss": 0.7372, "step": 25524 }, { "epoch": 0.9251205103113334, "grad_norm": 2.1568977889178425, "learning_rate": 1.4631914575810425e-07, "loss": 1.1234, "step": 25525 }, { "epoch": 0.9251567540139901, "grad_norm": 2.3396843939352765, "learning_rate": 1.4617822925629966e-07, "loss": 0.686, "step": 25526 }, { "epoch": 0.9251929977166468, "grad_norm": 2.459243783462481, "learning_rate": 1.460373796372233e-07, "loss": 0.8798, "step": 25527 }, { "epoch": 0.9252292414193034, "grad_norm": 2.3757566055312207, "learning_rate": 1.4589659690281644e-07, "loss": 0.9854, "step": 25528 }, { "epoch": 0.92526548512196, "grad_norm": 2.888241000939638, "learning_rate": 1.4575588105501915e-07, "loss": 0.8295, "step": 25529 }, { "epoch": 0.9253017288246167, "grad_norm": 2.5536664599483525, "learning_rate": 1.4561523209576988e-07, "loss": 0.8366, "step": 25530 }, { "epoch": 0.9253379725272733, "grad_norm": 2.282996800317659, "learning_rate": 1.4547465002700656e-07, "loss": 0.7271, "step": 25531 }, { "epoch": 0.9253742162299301, "grad_norm": 2.3789968527341614, "learning_rate": 1.4533413485066649e-07, "loss": 0.961, "step": 25532 }, { "epoch": 0.9254104599325867, "grad_norm": 2.0927674672218024, "learning_rate": 1.4519368656868648e-07, "loss": 0.8097, "step": 25533 }, { "epoch": 0.9254467036352434, "grad_norm": 2.389063701345348, "learning_rate": 1.4505330518300164e-07, "loss": 0.9735, "step": 25534 }, { "epoch": 0.9254829473379, "grad_norm": 2.576909884108149, "learning_rate": 1.4491299069554543e-07, "loss": 0.8207, "step": 25535 }, { "epoch": 0.9255191910405567, "grad_norm": 2.6469444745303803, "learning_rate": 1.447727431082524e-07, "loss": 0.8858, "step": 25536 }, { "epoch": 0.9255554347432133, "grad_norm": 2.5128856525508105, "learning_rate": 1.446325624230549e-07, "loss": 0.9529, "step": 25537 }, { "epoch": 0.92559167844587, "grad_norm": 1.9898646827540347, "learning_rate": 1.444924486418836e-07, "loss": 0.8375, "step": 25538 }, { "epoch": 0.9256279221485267, "grad_norm": 2.2465042105952806, "learning_rate": 1.4435240176666976e-07, "loss": 0.8356, "step": 25539 }, { "epoch": 0.9256641658511834, "grad_norm": 2.4797062557202083, "learning_rate": 1.4421242179934347e-07, "loss": 0.8939, "step": 25540 }, { "epoch": 0.92570040955384, "grad_norm": 2.22405576965164, "learning_rate": 1.4407250874183377e-07, "loss": 0.975, "step": 25541 }, { "epoch": 0.9257366532564967, "grad_norm": 2.0617516166590253, "learning_rate": 1.4393266259606742e-07, "loss": 0.7935, "step": 25542 }, { "epoch": 0.9257728969591533, "grad_norm": 2.37669762317311, "learning_rate": 1.4379288336397236e-07, "loss": 0.917, "step": 25543 }, { "epoch": 0.92580914066181, "grad_norm": 2.2774811231446894, "learning_rate": 1.4365317104747477e-07, "loss": 0.7601, "step": 25544 }, { "epoch": 0.9258453843644667, "grad_norm": 2.2603031688272903, "learning_rate": 1.435135256484993e-07, "loss": 0.8457, "step": 25545 }, { "epoch": 0.9258816280671234, "grad_norm": 2.598726084128141, "learning_rate": 1.4337394716897046e-07, "loss": 0.8901, "step": 25546 }, { "epoch": 0.92591787176978, "grad_norm": 2.256980716755136, "learning_rate": 1.4323443561081064e-07, "loss": 0.8323, "step": 25547 }, { "epoch": 0.9259541154724367, "grad_norm": 2.286958395833735, "learning_rate": 1.430949909759438e-07, "loss": 0.8547, "step": 25548 }, { "epoch": 0.9259903591750933, "grad_norm": 2.4681182154361005, "learning_rate": 1.4295561326629014e-07, "loss": 0.8332, "step": 25549 }, { "epoch": 0.92602660287775, "grad_norm": 2.3463416721381978, "learning_rate": 1.4281630248377142e-07, "loss": 0.8779, "step": 25550 }, { "epoch": 0.9260628465804066, "grad_norm": 2.2837988341450983, "learning_rate": 1.4267705863030612e-07, "loss": 0.9013, "step": 25551 }, { "epoch": 0.9260990902830634, "grad_norm": 2.291978156559211, "learning_rate": 1.4253788170781267e-07, "loss": 0.838, "step": 25552 }, { "epoch": 0.92613533398572, "grad_norm": 2.60324575650437, "learning_rate": 1.4239877171821072e-07, "loss": 0.9037, "step": 25553 }, { "epoch": 0.9261715776883767, "grad_norm": 2.061533658688066, "learning_rate": 1.422597286634153e-07, "loss": 0.7214, "step": 25554 }, { "epoch": 0.9262078213910333, "grad_norm": 2.122265005626649, "learning_rate": 1.4212075254534274e-07, "loss": 0.9721, "step": 25555 }, { "epoch": 0.92624406509369, "grad_norm": 2.058890321587214, "learning_rate": 1.4198184336590814e-07, "loss": 0.7491, "step": 25556 }, { "epoch": 0.9262803087963466, "grad_norm": 2.4076553455934886, "learning_rate": 1.4184300112702666e-07, "loss": 0.9204, "step": 25557 }, { "epoch": 0.9263165524990032, "grad_norm": 2.210161046634318, "learning_rate": 1.4170422583060893e-07, "loss": 0.7861, "step": 25558 }, { "epoch": 0.92635279620166, "grad_norm": 2.3273411188427455, "learning_rate": 1.4156551747857018e-07, "loss": 0.9222, "step": 25559 }, { "epoch": 0.9263890399043166, "grad_norm": 2.4312782874119656, "learning_rate": 1.414268760728199e-07, "loss": 0.8309, "step": 25560 }, { "epoch": 0.9264252836069733, "grad_norm": 2.4963597397307273, "learning_rate": 1.4128830161526885e-07, "loss": 0.8137, "step": 25561 }, { "epoch": 0.9264615273096299, "grad_norm": 2.1933540418104926, "learning_rate": 1.411497941078266e-07, "loss": 0.798, "step": 25562 }, { "epoch": 0.9264977710122866, "grad_norm": 2.4753837101065255, "learning_rate": 1.410113535524016e-07, "loss": 1.0551, "step": 25563 }, { "epoch": 0.9265340147149432, "grad_norm": 2.288092242172234, "learning_rate": 1.4087297995090176e-07, "loss": 1.085, "step": 25564 }, { "epoch": 0.9265702584176, "grad_norm": 2.3370090982182425, "learning_rate": 1.4073467330523282e-07, "loss": 0.8296, "step": 25565 }, { "epoch": 0.9266065021202566, "grad_norm": 2.52571589004228, "learning_rate": 1.4059643361730213e-07, "loss": 0.9069, "step": 25566 }, { "epoch": 0.9266427458229133, "grad_norm": 3.716110612421207, "learning_rate": 1.4045826088901316e-07, "loss": 0.8297, "step": 25567 }, { "epoch": 0.9266789895255699, "grad_norm": 2.6031479341677097, "learning_rate": 1.4032015512227104e-07, "loss": 0.9614, "step": 25568 }, { "epoch": 0.9267152332282266, "grad_norm": 2.3001498654992605, "learning_rate": 1.4018211631897705e-07, "loss": 0.7892, "step": 25569 }, { "epoch": 0.9267514769308832, "grad_norm": 2.520850738508493, "learning_rate": 1.400441444810352e-07, "loss": 0.7818, "step": 25570 }, { "epoch": 0.9267877206335399, "grad_norm": 2.4582886297543554, "learning_rate": 1.3990623961034566e-07, "loss": 0.9043, "step": 25571 }, { "epoch": 0.9268239643361966, "grad_norm": 2.2732356773816296, "learning_rate": 1.3976840170880912e-07, "loss": 0.766, "step": 25572 }, { "epoch": 0.9268602080388533, "grad_norm": 2.612975579205113, "learning_rate": 1.3963063077832462e-07, "loss": 0.8309, "step": 25573 }, { "epoch": 0.9268964517415099, "grad_norm": 2.3019164026084065, "learning_rate": 1.3949292682078952e-07, "loss": 0.9293, "step": 25574 }, { "epoch": 0.9269326954441666, "grad_norm": 2.25080969288165, "learning_rate": 1.3935528983810343e-07, "loss": 0.8266, "step": 25575 }, { "epoch": 0.9269689391468232, "grad_norm": 2.37243125644553, "learning_rate": 1.3921771983216147e-07, "loss": 0.8123, "step": 25576 }, { "epoch": 0.9270051828494799, "grad_norm": 2.7904860545472863, "learning_rate": 1.3908021680485996e-07, "loss": 0.8232, "step": 25577 }, { "epoch": 0.9270414265521366, "grad_norm": 2.566814613938796, "learning_rate": 1.3894278075809232e-07, "loss": 0.9298, "step": 25578 }, { "epoch": 0.9270776702547933, "grad_norm": 2.6293757406873333, "learning_rate": 1.3880541169375428e-07, "loss": 1.0136, "step": 25579 }, { "epoch": 0.9271139139574499, "grad_norm": 2.3410249276444604, "learning_rate": 1.3866810961373766e-07, "loss": 0.7599, "step": 25580 }, { "epoch": 0.9271501576601066, "grad_norm": 2.7520234122578393, "learning_rate": 1.385308745199343e-07, "loss": 0.7861, "step": 25581 }, { "epoch": 0.9271864013627632, "grad_norm": 2.277589208771046, "learning_rate": 1.3839370641423488e-07, "loss": 0.8954, "step": 25582 }, { "epoch": 0.9272226450654198, "grad_norm": 2.4023387686604916, "learning_rate": 1.382566052985307e-07, "loss": 0.9235, "step": 25583 }, { "epoch": 0.9272588887680765, "grad_norm": 2.523230852752417, "learning_rate": 1.381195711747102e-07, "loss": 0.8574, "step": 25584 }, { "epoch": 0.9272951324707333, "grad_norm": 2.3155315865158417, "learning_rate": 1.3798260404466078e-07, "loss": 0.8766, "step": 25585 }, { "epoch": 0.9273313761733899, "grad_norm": 2.211821206279703, "learning_rate": 1.3784570391027095e-07, "loss": 0.829, "step": 25586 }, { "epoch": 0.9273676198760465, "grad_norm": 2.6474702652616506, "learning_rate": 1.3770887077342699e-07, "loss": 0.7972, "step": 25587 }, { "epoch": 0.9274038635787032, "grad_norm": 2.091539729591932, "learning_rate": 1.3757210463601456e-07, "loss": 0.8666, "step": 25588 }, { "epoch": 0.9274401072813598, "grad_norm": 2.5112552042657486, "learning_rate": 1.3743540549991718e-07, "loss": 0.879, "step": 25589 }, { "epoch": 0.9274763509840165, "grad_norm": 2.2894448996346823, "learning_rate": 1.3729877336701947e-07, "loss": 0.9549, "step": 25590 }, { "epoch": 0.9275125946866732, "grad_norm": 2.746481197691225, "learning_rate": 1.3716220823920378e-07, "loss": 0.7655, "step": 25591 }, { "epoch": 0.9275488383893299, "grad_norm": 2.197673701596495, "learning_rate": 1.3702571011835198e-07, "loss": 0.7879, "step": 25592 }, { "epoch": 0.9275850820919865, "grad_norm": 2.5535080106041277, "learning_rate": 1.3688927900634474e-07, "loss": 0.9854, "step": 25593 }, { "epoch": 0.9276213257946432, "grad_norm": 2.094909852739648, "learning_rate": 1.367529149050617e-07, "loss": 1.087, "step": 25594 }, { "epoch": 0.9276575694972998, "grad_norm": 2.708495081218319, "learning_rate": 1.3661661781638303e-07, "loss": 0.9463, "step": 25595 }, { "epoch": 0.9276938131999565, "grad_norm": 2.1944254454616243, "learning_rate": 1.3648038774218553e-07, "loss": 1.036, "step": 25596 }, { "epoch": 0.9277300569026131, "grad_norm": 2.1497584421066023, "learning_rate": 1.3634422468434715e-07, "loss": 0.8498, "step": 25597 }, { "epoch": 0.9277663006052699, "grad_norm": 2.5533295182147184, "learning_rate": 1.3620812864474365e-07, "loss": 0.8114, "step": 25598 }, { "epoch": 0.9278025443079265, "grad_norm": 2.6665930232387867, "learning_rate": 1.3607209962525126e-07, "loss": 0.9155, "step": 25599 }, { "epoch": 0.9278387880105832, "grad_norm": 2.065265250423041, "learning_rate": 1.3593613762774295e-07, "loss": 0.8504, "step": 25600 }, { "epoch": 0.9278750317132398, "grad_norm": 2.2935549254418097, "learning_rate": 1.3580024265409387e-07, "loss": 0.737, "step": 25601 }, { "epoch": 0.9279112754158965, "grad_norm": 2.2167682045781016, "learning_rate": 1.3566441470617474e-07, "loss": 1.0029, "step": 25602 }, { "epoch": 0.9279475191185531, "grad_norm": 2.1821467859008115, "learning_rate": 1.3552865378585857e-07, "loss": 0.98, "step": 25603 }, { "epoch": 0.9279837628212099, "grad_norm": 2.594776470832779, "learning_rate": 1.35392959895016e-07, "loss": 0.8831, "step": 25604 }, { "epoch": 0.9280200065238665, "grad_norm": 2.3936377482175066, "learning_rate": 1.3525733303551558e-07, "loss": 1.0292, "step": 25605 }, { "epoch": 0.9280562502265232, "grad_norm": 2.3085907946285302, "learning_rate": 1.3512177320922747e-07, "loss": 0.8532, "step": 25606 }, { "epoch": 0.9280924939291798, "grad_norm": 2.433338447430897, "learning_rate": 1.3498628041801854e-07, "loss": 0.7184, "step": 25607 }, { "epoch": 0.9281287376318365, "grad_norm": 2.8022701376621844, "learning_rate": 1.348508546637578e-07, "loss": 0.8053, "step": 25608 }, { "epoch": 0.9281649813344931, "grad_norm": 2.246844513249817, "learning_rate": 1.347154959483088e-07, "loss": 0.6829, "step": 25609 }, { "epoch": 0.9282012250371497, "grad_norm": 2.3133994928337827, "learning_rate": 1.345802042735378e-07, "loss": 0.8898, "step": 25610 }, { "epoch": 0.9282374687398065, "grad_norm": 2.1141353371846776, "learning_rate": 1.3444497964131e-07, "loss": 0.7745, "step": 25611 }, { "epoch": 0.9282737124424632, "grad_norm": 2.4260173191988352, "learning_rate": 1.3430982205348664e-07, "loss": 0.8131, "step": 25612 }, { "epoch": 0.9283099561451198, "grad_norm": 2.477619230293013, "learning_rate": 1.341747315119324e-07, "loss": 0.8956, "step": 25613 }, { "epoch": 0.9283461998477764, "grad_norm": 2.7398328220347468, "learning_rate": 1.340397080185063e-07, "loss": 0.9604, "step": 25614 }, { "epoch": 0.9283824435504331, "grad_norm": 2.700671973777116, "learning_rate": 1.3390475157507132e-07, "loss": 0.858, "step": 25615 }, { "epoch": 0.9284186872530897, "grad_norm": 2.287153081340539, "learning_rate": 1.3376986218348543e-07, "loss": 0.8059, "step": 25616 }, { "epoch": 0.9284549309557465, "grad_norm": 2.269300687789264, "learning_rate": 1.336350398456082e-07, "loss": 0.8342, "step": 25617 }, { "epoch": 0.9284911746584031, "grad_norm": 2.006225088170305, "learning_rate": 1.3350028456329655e-07, "loss": 0.8002, "step": 25618 }, { "epoch": 0.9285274183610598, "grad_norm": 2.7585927963526675, "learning_rate": 1.3336559633840784e-07, "loss": 0.9173, "step": 25619 }, { "epoch": 0.9285636620637164, "grad_norm": 2.2764098487945037, "learning_rate": 1.3323097517279782e-07, "loss": 0.7557, "step": 25620 }, { "epoch": 0.9285999057663731, "grad_norm": 2.2958064636196225, "learning_rate": 1.3309642106832221e-07, "loss": 0.7579, "step": 25621 }, { "epoch": 0.9286361494690297, "grad_norm": 2.273207657632635, "learning_rate": 1.3296193402683455e-07, "loss": 0.7799, "step": 25622 }, { "epoch": 0.9286723931716864, "grad_norm": 2.2887337880362777, "learning_rate": 1.3282751405018725e-07, "loss": 0.9328, "step": 25623 }, { "epoch": 0.9287086368743431, "grad_norm": 2.25631259956436, "learning_rate": 1.3269316114023434e-07, "loss": 0.7937, "step": 25624 }, { "epoch": 0.9287448805769998, "grad_norm": 2.1538175827027475, "learning_rate": 1.3255887529882495e-07, "loss": 0.9077, "step": 25625 }, { "epoch": 0.9287811242796564, "grad_norm": 2.2414792867399536, "learning_rate": 1.324246565278109e-07, "loss": 0.9049, "step": 25626 }, { "epoch": 0.9288173679823131, "grad_norm": 2.196475120438612, "learning_rate": 1.322905048290407e-07, "loss": 0.88, "step": 25627 }, { "epoch": 0.9288536116849697, "grad_norm": 2.558497208088861, "learning_rate": 1.321564202043646e-07, "loss": 0.9124, "step": 25628 }, { "epoch": 0.9288898553876264, "grad_norm": 2.321199384635739, "learning_rate": 1.3202240265562827e-07, "loss": 0.6917, "step": 25629 }, { "epoch": 0.928926099090283, "grad_norm": 2.6368699458236944, "learning_rate": 1.3188845218467917e-07, "loss": 0.8972, "step": 25630 }, { "epoch": 0.9289623427929398, "grad_norm": 2.5138033662004404, "learning_rate": 1.3175456879336356e-07, "loss": 0.7828, "step": 25631 }, { "epoch": 0.9289985864955964, "grad_norm": 2.364571130536393, "learning_rate": 1.3162075248352557e-07, "loss": 0.8557, "step": 25632 }, { "epoch": 0.9290348301982531, "grad_norm": 2.1805612387529054, "learning_rate": 1.3148700325700925e-07, "loss": 0.8622, "step": 25633 }, { "epoch": 0.9290710739009097, "grad_norm": 2.1169295245297, "learning_rate": 1.3135332111565703e-07, "loss": 0.7391, "step": 25634 }, { "epoch": 0.9291073176035664, "grad_norm": 2.012840049408254, "learning_rate": 1.312197060613124e-07, "loss": 0.8547, "step": 25635 }, { "epoch": 0.929143561306223, "grad_norm": 2.2718107858303958, "learning_rate": 1.310861580958156e-07, "loss": 0.72, "step": 25636 }, { "epoch": 0.9291798050088798, "grad_norm": 2.3332117241463095, "learning_rate": 1.309526772210068e-07, "loss": 0.7765, "step": 25637 }, { "epoch": 0.9292160487115364, "grad_norm": 2.481454084532404, "learning_rate": 1.3081926343872509e-07, "loss": 0.7573, "step": 25638 }, { "epoch": 0.929252292414193, "grad_norm": 2.093880938527997, "learning_rate": 1.3068591675080955e-07, "loss": 0.7031, "step": 25639 }, { "epoch": 0.9292885361168497, "grad_norm": 2.4572397740616054, "learning_rate": 1.3055263715909761e-07, "loss": 0.9195, "step": 25640 }, { "epoch": 0.9293247798195063, "grad_norm": 2.39347790919491, "learning_rate": 1.304194246654239e-07, "loss": 0.9288, "step": 25641 }, { "epoch": 0.929361023522163, "grad_norm": 2.315349903540338, "learning_rate": 1.3028627927162696e-07, "loss": 0.7722, "step": 25642 }, { "epoch": 0.9293972672248196, "grad_norm": 2.246123265728428, "learning_rate": 1.301532009795392e-07, "loss": 0.8401, "step": 25643 }, { "epoch": 0.9294335109274764, "grad_norm": 2.4772602813189484, "learning_rate": 1.3002018979099583e-07, "loss": 0.8508, "step": 25644 }, { "epoch": 0.929469754630133, "grad_norm": 2.323840226935041, "learning_rate": 1.2988724570782763e-07, "loss": 0.9527, "step": 25645 }, { "epoch": 0.9295059983327897, "grad_norm": 2.640765408411233, "learning_rate": 1.297543687318692e-07, "loss": 0.9538, "step": 25646 }, { "epoch": 0.9295422420354463, "grad_norm": 2.5475011305803488, "learning_rate": 1.2962155886494966e-07, "loss": 0.8218, "step": 25647 }, { "epoch": 0.929578485738103, "grad_norm": 2.277508891364459, "learning_rate": 1.2948881610889974e-07, "loss": 0.9249, "step": 25648 }, { "epoch": 0.9296147294407596, "grad_norm": 2.3730929796645595, "learning_rate": 1.29356140465548e-07, "loss": 0.916, "step": 25649 }, { "epoch": 0.9296509731434164, "grad_norm": 2.4027579573154307, "learning_rate": 1.29223531936723e-07, "loss": 0.8545, "step": 25650 }, { "epoch": 0.929687216846073, "grad_norm": 2.4024150123574683, "learning_rate": 1.290909905242521e-07, "loss": 0.9489, "step": 25651 }, { "epoch": 0.9297234605487297, "grad_norm": 2.447650053337542, "learning_rate": 1.289585162299617e-07, "loss": 0.8965, "step": 25652 }, { "epoch": 0.9297597042513863, "grad_norm": 2.5095349163525777, "learning_rate": 1.288261090556764e-07, "loss": 0.9488, "step": 25653 }, { "epoch": 0.929795947954043, "grad_norm": 2.540971084646585, "learning_rate": 1.2869376900322194e-07, "loss": 0.8662, "step": 25654 }, { "epoch": 0.9298321916566996, "grad_norm": 2.3800716022370443, "learning_rate": 1.2856149607442136e-07, "loss": 0.8252, "step": 25655 }, { "epoch": 0.9298684353593563, "grad_norm": 2.1588671664213157, "learning_rate": 1.2842929027109706e-07, "loss": 0.9606, "step": 25656 }, { "epoch": 0.929904679062013, "grad_norm": 2.4945662740686654, "learning_rate": 1.282971515950704e-07, "loss": 0.7721, "step": 25657 }, { "epoch": 0.9299409227646697, "grad_norm": 2.2192808265762314, "learning_rate": 1.281650800481632e-07, "loss": 0.8209, "step": 25658 }, { "epoch": 0.9299771664673263, "grad_norm": 2.5213319746258755, "learning_rate": 1.2803307563219458e-07, "loss": 0.8829, "step": 25659 }, { "epoch": 0.930013410169983, "grad_norm": 2.3472304617478073, "learning_rate": 1.2790113834898422e-07, "loss": 0.8425, "step": 25660 }, { "epoch": 0.9300496538726396, "grad_norm": 2.5373356099857465, "learning_rate": 1.2776926820034897e-07, "loss": 0.6976, "step": 25661 }, { "epoch": 0.9300858975752962, "grad_norm": 2.3045955757346297, "learning_rate": 1.276374651881068e-07, "loss": 0.7615, "step": 25662 }, { "epoch": 0.930122141277953, "grad_norm": 2.2363828248287785, "learning_rate": 1.2750572931407357e-07, "loss": 0.7624, "step": 25663 }, { "epoch": 0.9301583849806097, "grad_norm": 2.102286238354508, "learning_rate": 1.2737406058006496e-07, "loss": 0.7966, "step": 25664 }, { "epoch": 0.9301946286832663, "grad_norm": 2.311128534679291, "learning_rate": 1.27242458987894e-07, "loss": 0.7828, "step": 25665 }, { "epoch": 0.930230872385923, "grad_norm": 2.369253372882817, "learning_rate": 1.2711092453937647e-07, "loss": 0.7307, "step": 25666 }, { "epoch": 0.9302671160885796, "grad_norm": 2.306334624761482, "learning_rate": 1.2697945723632199e-07, "loss": 0.8177, "step": 25667 }, { "epoch": 0.9303033597912362, "grad_norm": 2.445213897549657, "learning_rate": 1.268480570805447e-07, "loss": 0.9458, "step": 25668 }, { "epoch": 0.9303396034938929, "grad_norm": 2.3941558716360603, "learning_rate": 1.2671672407385316e-07, "loss": 0.8307, "step": 25669 }, { "epoch": 0.9303758471965496, "grad_norm": 2.4787131977773145, "learning_rate": 1.265854582180581e-07, "loss": 0.7826, "step": 25670 }, { "epoch": 0.9304120908992063, "grad_norm": 2.633958364057866, "learning_rate": 1.2645425951496815e-07, "loss": 0.888, "step": 25671 }, { "epoch": 0.9304483346018629, "grad_norm": 2.6488043618625063, "learning_rate": 1.2632312796639124e-07, "loss": 0.8952, "step": 25672 }, { "epoch": 0.9304845783045196, "grad_norm": 2.3004041183451847, "learning_rate": 1.2619206357413371e-07, "loss": 1.019, "step": 25673 }, { "epoch": 0.9305208220071762, "grad_norm": 2.6424476607225635, "learning_rate": 1.2606106634000192e-07, "loss": 1.0069, "step": 25674 }, { "epoch": 0.9305570657098329, "grad_norm": 2.126309378956042, "learning_rate": 1.2593013626580165e-07, "loss": 0.8867, "step": 25675 }, { "epoch": 0.9305933094124896, "grad_norm": 2.3907382078595534, "learning_rate": 1.257992733533353e-07, "loss": 0.9176, "step": 25676 }, { "epoch": 0.9306295531151463, "grad_norm": 2.1718755239837435, "learning_rate": 1.2566847760440758e-07, "loss": 0.6674, "step": 25677 }, { "epoch": 0.9306657968178029, "grad_norm": 2.5306063022203755, "learning_rate": 1.2553774902082095e-07, "loss": 0.8351, "step": 25678 }, { "epoch": 0.9307020405204596, "grad_norm": 2.3251560472433828, "learning_rate": 1.254070876043756e-07, "loss": 0.8963, "step": 25679 }, { "epoch": 0.9307382842231162, "grad_norm": 2.382711839749718, "learning_rate": 1.2527649335687285e-07, "loss": 0.8642, "step": 25680 }, { "epoch": 0.9307745279257729, "grad_norm": 2.701275813883222, "learning_rate": 1.2514596628011132e-07, "loss": 0.8909, "step": 25681 }, { "epoch": 0.9308107716284295, "grad_norm": 2.430551494111882, "learning_rate": 1.2501550637589066e-07, "loss": 0.9158, "step": 25682 }, { "epoch": 0.9308470153310863, "grad_norm": 2.4740243653498486, "learning_rate": 1.2488511364600774e-07, "loss": 1.015, "step": 25683 }, { "epoch": 0.9308832590337429, "grad_norm": 2.0853528028616015, "learning_rate": 1.2475478809226005e-07, "loss": 0.8839, "step": 25684 }, { "epoch": 0.9309195027363996, "grad_norm": 2.1870563083784784, "learning_rate": 1.2462452971644223e-07, "loss": 0.7962, "step": 25685 }, { "epoch": 0.9309557464390562, "grad_norm": 2.1693319727014266, "learning_rate": 1.2449433852035009e-07, "loss": 0.9564, "step": 25686 }, { "epoch": 0.9309919901417129, "grad_norm": 2.8547892275213975, "learning_rate": 1.2436421450577774e-07, "loss": 0.9506, "step": 25687 }, { "epoch": 0.9310282338443695, "grad_norm": 2.2777690212362627, "learning_rate": 1.2423415767451763e-07, "loss": 0.9931, "step": 25688 }, { "epoch": 0.9310644775470263, "grad_norm": 2.2758225024322782, "learning_rate": 1.2410416802836167e-07, "loss": 0.7928, "step": 25689 }, { "epoch": 0.9311007212496829, "grad_norm": 2.2120805929460388, "learning_rate": 1.2397424556910175e-07, "loss": 0.9673, "step": 25690 }, { "epoch": 0.9311369649523396, "grad_norm": 2.098931021460826, "learning_rate": 1.2384439029852813e-07, "loss": 0.8697, "step": 25691 }, { "epoch": 0.9311732086549962, "grad_norm": 2.4287082719461823, "learning_rate": 1.2371460221842934e-07, "loss": 0.7791, "step": 25692 }, { "epoch": 0.9312094523576528, "grad_norm": 2.507266820882262, "learning_rate": 1.2358488133059455e-07, "loss": 0.8407, "step": 25693 }, { "epoch": 0.9312456960603095, "grad_norm": 2.513820649212217, "learning_rate": 1.2345522763681006e-07, "loss": 0.9809, "step": 25694 }, { "epoch": 0.9312819397629661, "grad_norm": 2.3831988128515817, "learning_rate": 1.2332564113886448e-07, "loss": 0.995, "step": 25695 }, { "epoch": 0.9313181834656229, "grad_norm": 2.6568248240142385, "learning_rate": 1.231961218385419e-07, "loss": 0.9327, "step": 25696 }, { "epoch": 0.9313544271682795, "grad_norm": 2.58456090568788, "learning_rate": 1.2306666973762704e-07, "loss": 0.9054, "step": 25697 }, { "epoch": 0.9313906708709362, "grad_norm": 2.272910031793819, "learning_rate": 1.2293728483790456e-07, "loss": 0.8031, "step": 25698 }, { "epoch": 0.9314269145735928, "grad_norm": 2.104396329043469, "learning_rate": 1.2280796714115583e-07, "loss": 0.9659, "step": 25699 }, { "epoch": 0.9314631582762495, "grad_norm": 2.0661472523404454, "learning_rate": 1.2267871664916498e-07, "loss": 0.8864, "step": 25700 }, { "epoch": 0.9314994019789061, "grad_norm": 2.3928378862290094, "learning_rate": 1.2254953336371001e-07, "loss": 0.9545, "step": 25701 }, { "epoch": 0.9315356456815628, "grad_norm": 2.375316015832243, "learning_rate": 1.2242041728657394e-07, "loss": 0.8197, "step": 25702 }, { "epoch": 0.9315718893842195, "grad_norm": 2.2263809300079074, "learning_rate": 1.2229136841953426e-07, "loss": 0.9191, "step": 25703 }, { "epoch": 0.9316081330868762, "grad_norm": 2.217359124137956, "learning_rate": 1.2216238676437008e-07, "loss": 0.9016, "step": 25704 }, { "epoch": 0.9316443767895328, "grad_norm": 2.1956923028335282, "learning_rate": 1.2203347232285723e-07, "loss": 0.8459, "step": 25705 }, { "epoch": 0.9316806204921895, "grad_norm": 2.2033068696849427, "learning_rate": 1.219046250967737e-07, "loss": 0.6576, "step": 25706 }, { "epoch": 0.9317168641948461, "grad_norm": 2.178352714408301, "learning_rate": 1.2177584508789419e-07, "loss": 0.8439, "step": 25707 }, { "epoch": 0.9317531078975028, "grad_norm": 2.3700127518766854, "learning_rate": 1.216471322979934e-07, "loss": 0.7369, "step": 25708 }, { "epoch": 0.9317893516001595, "grad_norm": 2.1761243908428134, "learning_rate": 1.2151848672884435e-07, "loss": 0.8949, "step": 25709 }, { "epoch": 0.9318255953028162, "grad_norm": 2.349558956636182, "learning_rate": 1.2138990838222008e-07, "loss": 0.9346, "step": 25710 }, { "epoch": 0.9318618390054728, "grad_norm": 2.488976582888265, "learning_rate": 1.2126139725989306e-07, "loss": 0.9132, "step": 25711 }, { "epoch": 0.9318980827081295, "grad_norm": 2.5816472406968827, "learning_rate": 1.2113295336363296e-07, "loss": 0.8498, "step": 25712 }, { "epoch": 0.9319343264107861, "grad_norm": 2.171192901422578, "learning_rate": 1.2100457669521003e-07, "loss": 0.8426, "step": 25713 }, { "epoch": 0.9319705701134428, "grad_norm": 2.6752809659372834, "learning_rate": 1.2087626725639345e-07, "loss": 0.9323, "step": 25714 }, { "epoch": 0.9320068138160994, "grad_norm": 2.252787801110114, "learning_rate": 1.2074802504895178e-07, "loss": 0.895, "step": 25715 }, { "epoch": 0.9320430575187562, "grad_norm": 2.4145358510495156, "learning_rate": 1.2061985007465027e-07, "loss": 1.0574, "step": 25716 }, { "epoch": 0.9320793012214128, "grad_norm": 2.7273610354897224, "learning_rate": 1.2049174233525695e-07, "loss": 0.8658, "step": 25717 }, { "epoch": 0.9321155449240695, "grad_norm": 2.7823108989942873, "learning_rate": 1.2036370183253654e-07, "loss": 0.959, "step": 25718 }, { "epoch": 0.9321517886267261, "grad_norm": 2.6796690660163294, "learning_rate": 1.202357285682526e-07, "loss": 0.9277, "step": 25719 }, { "epoch": 0.9321880323293827, "grad_norm": 2.2048536357754625, "learning_rate": 1.2010782254416985e-07, "loss": 0.9508, "step": 25720 }, { "epoch": 0.9322242760320394, "grad_norm": 2.164121808586999, "learning_rate": 1.199799837620491e-07, "loss": 0.78, "step": 25721 }, { "epoch": 0.9322605197346961, "grad_norm": 2.6420602637662602, "learning_rate": 1.1985221222365395e-07, "loss": 0.9568, "step": 25722 }, { "epoch": 0.9322967634373528, "grad_norm": 2.0741244981144984, "learning_rate": 1.1972450793074297e-07, "loss": 0.8575, "step": 25723 }, { "epoch": 0.9323330071400094, "grad_norm": 2.223923042239453, "learning_rate": 1.19596870885077e-07, "loss": 0.9352, "step": 25724 }, { "epoch": 0.9323692508426661, "grad_norm": 2.3466781280666584, "learning_rate": 1.1946930108841515e-07, "loss": 0.8552, "step": 25725 }, { "epoch": 0.9324054945453227, "grad_norm": 2.2814919848208204, "learning_rate": 1.1934179854251382e-07, "loss": 0.7847, "step": 25726 }, { "epoch": 0.9324417382479794, "grad_norm": 2.304767259773119, "learning_rate": 1.192143632491316e-07, "loss": 0.8296, "step": 25727 }, { "epoch": 0.932477981950636, "grad_norm": 2.262584432727102, "learning_rate": 1.1908699521002265e-07, "loss": 1.1037, "step": 25728 }, { "epoch": 0.9325142256532928, "grad_norm": 2.4394135069851304, "learning_rate": 1.1895969442694389e-07, "loss": 0.8732, "step": 25729 }, { "epoch": 0.9325504693559494, "grad_norm": 2.3909471151559893, "learning_rate": 1.1883246090164835e-07, "loss": 0.9964, "step": 25730 }, { "epoch": 0.9325867130586061, "grad_norm": 2.126068070469828, "learning_rate": 1.1870529463588964e-07, "loss": 0.7144, "step": 25731 }, { "epoch": 0.9326229567612627, "grad_norm": 2.6286885277927974, "learning_rate": 1.1857819563141915e-07, "loss": 0.9119, "step": 25732 }, { "epoch": 0.9326592004639194, "grad_norm": 2.4520164161894877, "learning_rate": 1.184511638899899e-07, "loss": 0.8728, "step": 25733 }, { "epoch": 0.932695444166576, "grad_norm": 2.122923108632356, "learning_rate": 1.1832419941335049e-07, "loss": 1.0165, "step": 25734 }, { "epoch": 0.9327316878692328, "grad_norm": 2.596396457773276, "learning_rate": 1.1819730220325231e-07, "loss": 0.9794, "step": 25735 }, { "epoch": 0.9327679315718894, "grad_norm": 2.470056303400231, "learning_rate": 1.1807047226144175e-07, "loss": 0.9263, "step": 25736 }, { "epoch": 0.9328041752745461, "grad_norm": 2.4384965973545505, "learning_rate": 1.1794370958966795e-07, "loss": 0.9315, "step": 25737 }, { "epoch": 0.9328404189772027, "grad_norm": 2.3766402089719496, "learning_rate": 1.1781701418967783e-07, "loss": 0.8784, "step": 25738 }, { "epoch": 0.9328766626798594, "grad_norm": 2.1784250434567762, "learning_rate": 1.1769038606321615e-07, "loss": 0.8004, "step": 25739 }, { "epoch": 0.932912906382516, "grad_norm": 2.287761434095991, "learning_rate": 1.1756382521202814e-07, "loss": 0.8139, "step": 25740 }, { "epoch": 0.9329491500851727, "grad_norm": 2.918932035684893, "learning_rate": 1.1743733163785742e-07, "loss": 0.9102, "step": 25741 }, { "epoch": 0.9329853937878294, "grad_norm": 2.4273545082792594, "learning_rate": 1.1731090534244815e-07, "loss": 0.8769, "step": 25742 }, { "epoch": 0.933021637490486, "grad_norm": 2.5574984584065734, "learning_rate": 1.1718454632754118e-07, "loss": 0.8983, "step": 25743 }, { "epoch": 0.9330578811931427, "grad_norm": 2.375311174414114, "learning_rate": 1.1705825459487841e-07, "loss": 0.7199, "step": 25744 }, { "epoch": 0.9330941248957993, "grad_norm": 2.5829855103920916, "learning_rate": 1.1693203014620013e-07, "loss": 1.0464, "step": 25745 }, { "epoch": 0.933130368598456, "grad_norm": 2.484664712741587, "learning_rate": 1.1680587298324441e-07, "loss": 0.9012, "step": 25746 }, { "epoch": 0.9331666123011126, "grad_norm": 2.2122816183354863, "learning_rate": 1.1667978310775151e-07, "loss": 0.8676, "step": 25747 }, { "epoch": 0.9332028560037694, "grad_norm": 2.209813671716807, "learning_rate": 1.1655376052145673e-07, "loss": 0.8699, "step": 25748 }, { "epoch": 0.933239099706426, "grad_norm": 2.0479644415049796, "learning_rate": 1.1642780522609865e-07, "loss": 0.7339, "step": 25749 }, { "epoch": 0.9332753434090827, "grad_norm": 2.3634818381434135, "learning_rate": 1.1630191722341145e-07, "loss": 0.6882, "step": 25750 }, { "epoch": 0.9333115871117393, "grad_norm": 2.6228662660421174, "learning_rate": 1.161760965151304e-07, "loss": 0.8584, "step": 25751 }, { "epoch": 0.933347830814396, "grad_norm": 2.844433043640878, "learning_rate": 1.1605034310298858e-07, "loss": 0.9525, "step": 25752 }, { "epoch": 0.9333840745170526, "grad_norm": 2.742061147800173, "learning_rate": 1.1592465698872013e-07, "loss": 1.0673, "step": 25753 }, { "epoch": 0.9334203182197093, "grad_norm": 2.292011665901814, "learning_rate": 1.1579903817405591e-07, "loss": 0.9398, "step": 25754 }, { "epoch": 0.933456561922366, "grad_norm": 2.2729561261090843, "learning_rate": 1.156734866607273e-07, "loss": 0.8805, "step": 25755 }, { "epoch": 0.9334928056250227, "grad_norm": 2.218658293477583, "learning_rate": 1.155480024504635e-07, "loss": 0.8691, "step": 25756 }, { "epoch": 0.9335290493276793, "grad_norm": 2.295495801528843, "learning_rate": 1.154225855449942e-07, "loss": 0.7609, "step": 25757 }, { "epoch": 0.933565293030336, "grad_norm": 2.5499485755658995, "learning_rate": 1.1529723594604802e-07, "loss": 0.7276, "step": 25758 }, { "epoch": 0.9336015367329926, "grad_norm": 2.1517729957677134, "learning_rate": 1.1517195365535139e-07, "loss": 0.9887, "step": 25759 }, { "epoch": 0.9336377804356493, "grad_norm": 2.2724289746343365, "learning_rate": 1.1504673867463124e-07, "loss": 0.8816, "step": 25760 }, { "epoch": 0.933674024138306, "grad_norm": 2.2439434660083233, "learning_rate": 1.1492159100561228e-07, "loss": 1.0312, "step": 25761 }, { "epoch": 0.9337102678409627, "grad_norm": 2.3173544078372292, "learning_rate": 1.147965106500204e-07, "loss": 0.7707, "step": 25762 }, { "epoch": 0.9337465115436193, "grad_norm": 2.2538910069052753, "learning_rate": 1.1467149760957696e-07, "loss": 0.9159, "step": 25763 }, { "epoch": 0.933782755246276, "grad_norm": 2.520334154329283, "learning_rate": 1.1454655188600616e-07, "loss": 0.8149, "step": 25764 }, { "epoch": 0.9338189989489326, "grad_norm": 2.5383630761646647, "learning_rate": 1.1442167348102995e-07, "loss": 1.0913, "step": 25765 }, { "epoch": 0.9338552426515893, "grad_norm": 2.7296980525240135, "learning_rate": 1.142968623963675e-07, "loss": 0.825, "step": 25766 }, { "epoch": 0.9338914863542459, "grad_norm": 2.3438209528140117, "learning_rate": 1.1417211863374022e-07, "loss": 0.9171, "step": 25767 }, { "epoch": 0.9339277300569027, "grad_norm": 2.149968147872131, "learning_rate": 1.1404744219486508e-07, "loss": 0.8187, "step": 25768 }, { "epoch": 0.9339639737595593, "grad_norm": 2.6204897737681176, "learning_rate": 1.1392283308146234e-07, "loss": 0.9128, "step": 25769 }, { "epoch": 0.934000217462216, "grad_norm": 2.124450080035221, "learning_rate": 1.1379829129524733e-07, "loss": 0.8062, "step": 25770 }, { "epoch": 0.9340364611648726, "grad_norm": 2.332328168246079, "learning_rate": 1.1367381683793754e-07, "loss": 0.9322, "step": 25771 }, { "epoch": 0.9340727048675292, "grad_norm": 2.4432442038486544, "learning_rate": 1.1354940971124606e-07, "loss": 0.7275, "step": 25772 }, { "epoch": 0.9341089485701859, "grad_norm": 2.8162681042144815, "learning_rate": 1.1342506991688984e-07, "loss": 0.9267, "step": 25773 }, { "epoch": 0.9341451922728425, "grad_norm": 2.276551618231291, "learning_rate": 1.133007974565803e-07, "loss": 0.8925, "step": 25774 }, { "epoch": 0.9341814359754993, "grad_norm": 2.6939051653793165, "learning_rate": 1.131765923320305e-07, "loss": 0.9603, "step": 25775 }, { "epoch": 0.9342176796781559, "grad_norm": 2.263743193216904, "learning_rate": 1.1305245454495183e-07, "loss": 0.7491, "step": 25776 }, { "epoch": 0.9342539233808126, "grad_norm": 2.4071838532814134, "learning_rate": 1.1292838409705464e-07, "loss": 0.7801, "step": 25777 }, { "epoch": 0.9342901670834692, "grad_norm": 2.342661389798021, "learning_rate": 1.128043809900492e-07, "loss": 0.8608, "step": 25778 }, { "epoch": 0.9343264107861259, "grad_norm": 2.1574790110964943, "learning_rate": 1.1268044522564303e-07, "loss": 0.6925, "step": 25779 }, { "epoch": 0.9343626544887825, "grad_norm": 2.6083222372868184, "learning_rate": 1.1255657680554477e-07, "loss": 0.8344, "step": 25780 }, { "epoch": 0.9343988981914393, "grad_norm": 2.37209631020725, "learning_rate": 1.1243277573146084e-07, "loss": 0.8855, "step": 25781 }, { "epoch": 0.9344351418940959, "grad_norm": 2.324238092944682, "learning_rate": 1.1230904200509818e-07, "loss": 0.8255, "step": 25782 }, { "epoch": 0.9344713855967526, "grad_norm": 2.3325050888267445, "learning_rate": 1.1218537562816046e-07, "loss": 0.859, "step": 25783 }, { "epoch": 0.9345076292994092, "grad_norm": 2.2921551831133717, "learning_rate": 1.1206177660235185e-07, "loss": 0.8383, "step": 25784 }, { "epoch": 0.9345438730020659, "grad_norm": 2.419135529473251, "learning_rate": 1.1193824492937655e-07, "loss": 0.8891, "step": 25785 }, { "epoch": 0.9345801167047225, "grad_norm": 2.3718589650970836, "learning_rate": 1.1181478061093543e-07, "loss": 0.8587, "step": 25786 }, { "epoch": 0.9346163604073792, "grad_norm": 2.394679211654128, "learning_rate": 1.11691383648731e-07, "loss": 0.8475, "step": 25787 }, { "epoch": 0.9346526041100359, "grad_norm": 2.0849883379709704, "learning_rate": 1.1156805404446192e-07, "loss": 0.89, "step": 25788 }, { "epoch": 0.9346888478126926, "grad_norm": 2.092055412802883, "learning_rate": 1.1144479179982903e-07, "loss": 1.0123, "step": 25789 }, { "epoch": 0.9347250915153492, "grad_norm": 2.295178649271819, "learning_rate": 1.1132159691653044e-07, "loss": 0.9278, "step": 25790 }, { "epoch": 0.9347613352180059, "grad_norm": 2.4562840214793966, "learning_rate": 1.1119846939626422e-07, "loss": 0.952, "step": 25791 }, { "epoch": 0.9347975789206625, "grad_norm": 2.3558464104716497, "learning_rate": 1.1107540924072569e-07, "loss": 0.8392, "step": 25792 }, { "epoch": 0.9348338226233192, "grad_norm": 2.3390980311608054, "learning_rate": 1.1095241645161182e-07, "loss": 0.8084, "step": 25793 }, { "epoch": 0.9348700663259759, "grad_norm": 2.503692316305367, "learning_rate": 1.1082949103061624e-07, "loss": 0.9498, "step": 25794 }, { "epoch": 0.9349063100286326, "grad_norm": 2.4656435726677968, "learning_rate": 1.107066329794343e-07, "loss": 0.8952, "step": 25795 }, { "epoch": 0.9349425537312892, "grad_norm": 2.212811304624447, "learning_rate": 1.1058384229975738e-07, "loss": 0.9338, "step": 25796 }, { "epoch": 0.9349787974339459, "grad_norm": 2.2496120047521173, "learning_rate": 1.1046111899327805e-07, "loss": 0.825, "step": 25797 }, { "epoch": 0.9350150411366025, "grad_norm": 2.4901489098619805, "learning_rate": 1.1033846306168828e-07, "loss": 0.8504, "step": 25798 }, { "epoch": 0.9350512848392591, "grad_norm": 2.2916356417954282, "learning_rate": 1.1021587450667615e-07, "loss": 0.9624, "step": 25799 }, { "epoch": 0.9350875285419158, "grad_norm": 2.1475830192006984, "learning_rate": 1.1009335332993254e-07, "loss": 0.8371, "step": 25800 }, { "epoch": 0.9351237722445725, "grad_norm": 2.150387552917984, "learning_rate": 1.0997089953314499e-07, "loss": 0.8451, "step": 25801 }, { "epoch": 0.9351600159472292, "grad_norm": 2.2875110856591863, "learning_rate": 1.098485131180016e-07, "loss": 0.8761, "step": 25802 }, { "epoch": 0.9351962596498858, "grad_norm": 2.1185548467604836, "learning_rate": 1.0972619408618768e-07, "loss": 0.7983, "step": 25803 }, { "epoch": 0.9352325033525425, "grad_norm": 2.6390572770230527, "learning_rate": 1.096039424393891e-07, "loss": 0.9993, "step": 25804 }, { "epoch": 0.9352687470551991, "grad_norm": 2.400757004271644, "learning_rate": 1.0948175817929118e-07, "loss": 0.9032, "step": 25805 }, { "epoch": 0.9353049907578558, "grad_norm": 2.166835784469842, "learning_rate": 1.0935964130757649e-07, "loss": 0.7275, "step": 25806 }, { "epoch": 0.9353412344605125, "grad_norm": 2.2809424910077842, "learning_rate": 1.0923759182592808e-07, "loss": 0.7251, "step": 25807 }, { "epoch": 0.9353774781631692, "grad_norm": 2.2856880417796694, "learning_rate": 1.0911560973602798e-07, "loss": 0.9633, "step": 25808 }, { "epoch": 0.9354137218658258, "grad_norm": 2.3113958885417674, "learning_rate": 1.0899369503955759e-07, "loss": 0.8301, "step": 25809 }, { "epoch": 0.9354499655684825, "grad_norm": 2.140053867575891, "learning_rate": 1.0887184773819503e-07, "loss": 0.7017, "step": 25810 }, { "epoch": 0.9354862092711391, "grad_norm": 2.523474461130211, "learning_rate": 1.0875006783362063e-07, "loss": 0.8023, "step": 25811 }, { "epoch": 0.9355224529737958, "grad_norm": 2.292477155226694, "learning_rate": 1.0862835532751303e-07, "loss": 0.8902, "step": 25812 }, { "epoch": 0.9355586966764524, "grad_norm": 2.6562019619032426, "learning_rate": 1.0850671022154757e-07, "loss": 0.9961, "step": 25813 }, { "epoch": 0.9355949403791092, "grad_norm": 2.205226397258105, "learning_rate": 1.083851325174018e-07, "loss": 0.9225, "step": 25814 }, { "epoch": 0.9356311840817658, "grad_norm": 2.3631730377162197, "learning_rate": 1.0826362221674991e-07, "loss": 0.8206, "step": 25815 }, { "epoch": 0.9356674277844225, "grad_norm": 2.483126800824475, "learning_rate": 1.0814217932126725e-07, "loss": 0.8137, "step": 25816 }, { "epoch": 0.9357036714870791, "grad_norm": 2.7594106194595365, "learning_rate": 1.0802080383262691e-07, "loss": 0.9158, "step": 25817 }, { "epoch": 0.9357399151897358, "grad_norm": 2.2134453000261094, "learning_rate": 1.0789949575250146e-07, "loss": 0.8517, "step": 25818 }, { "epoch": 0.9357761588923924, "grad_norm": 2.2881122654957826, "learning_rate": 1.0777825508256178e-07, "loss": 0.9099, "step": 25819 }, { "epoch": 0.9358124025950492, "grad_norm": 1.9705636117052403, "learning_rate": 1.0765708182447987e-07, "loss": 0.6845, "step": 25820 }, { "epoch": 0.9358486462977058, "grad_norm": 2.264283299318162, "learning_rate": 1.0753597597992382e-07, "loss": 1.0475, "step": 25821 }, { "epoch": 0.9358848900003625, "grad_norm": 2.3693676830888433, "learning_rate": 1.0741493755056399e-07, "loss": 0.7711, "step": 25822 }, { "epoch": 0.9359211337030191, "grad_norm": 2.248531560608593, "learning_rate": 1.0729396653806624e-07, "loss": 0.8682, "step": 25823 }, { "epoch": 0.9359573774056758, "grad_norm": 2.4300695005421615, "learning_rate": 1.0717306294409923e-07, "loss": 1.0159, "step": 25824 }, { "epoch": 0.9359936211083324, "grad_norm": 2.2330865295573474, "learning_rate": 1.070522267703289e-07, "loss": 0.8279, "step": 25825 }, { "epoch": 0.936029864810989, "grad_norm": 2.174150208254555, "learning_rate": 1.0693145801841886e-07, "loss": 1.0511, "step": 25826 }, { "epoch": 0.9360661085136458, "grad_norm": 2.6776314866934174, "learning_rate": 1.0681075669003394e-07, "loss": 0.8907, "step": 25827 }, { "epoch": 0.9361023522163024, "grad_norm": 1.9865663071962028, "learning_rate": 1.0669012278683777e-07, "loss": 0.7021, "step": 25828 }, { "epoch": 0.9361385959189591, "grad_norm": 2.2019160753352494, "learning_rate": 1.0656955631049292e-07, "loss": 0.9285, "step": 25829 }, { "epoch": 0.9361748396216157, "grad_norm": 2.536915979985534, "learning_rate": 1.0644905726265975e-07, "loss": 0.7267, "step": 25830 }, { "epoch": 0.9362110833242724, "grad_norm": 2.240661473737557, "learning_rate": 1.0632862564499857e-07, "loss": 0.8585, "step": 25831 }, { "epoch": 0.936247327026929, "grad_norm": 2.1898333505805434, "learning_rate": 1.0620826145917029e-07, "loss": 0.9014, "step": 25832 }, { "epoch": 0.9362835707295857, "grad_norm": 2.510009040194778, "learning_rate": 1.0608796470683191e-07, "loss": 0.963, "step": 25833 }, { "epoch": 0.9363198144322424, "grad_norm": 2.305659736356586, "learning_rate": 1.0596773538964211e-07, "loss": 0.9739, "step": 25834 }, { "epoch": 0.9363560581348991, "grad_norm": 2.321699262196151, "learning_rate": 1.0584757350925567e-07, "loss": 0.9409, "step": 25835 }, { "epoch": 0.9363923018375557, "grad_norm": 2.439622907536, "learning_rate": 1.0572747906733128e-07, "loss": 0.8973, "step": 25836 }, { "epoch": 0.9364285455402124, "grad_norm": 2.3318298654760774, "learning_rate": 1.0560745206552148e-07, "loss": 0.9065, "step": 25837 }, { "epoch": 0.936464789242869, "grad_norm": 2.42299982654931, "learning_rate": 1.0548749250548162e-07, "loss": 1.043, "step": 25838 }, { "epoch": 0.9365010329455257, "grad_norm": 2.5103387004140276, "learning_rate": 1.0536760038886318e-07, "loss": 0.8246, "step": 25839 }, { "epoch": 0.9365372766481824, "grad_norm": 2.0432310094739563, "learning_rate": 1.0524777571731981e-07, "loss": 0.8649, "step": 25840 }, { "epoch": 0.9365735203508391, "grad_norm": 2.4567539987799214, "learning_rate": 1.0512801849250131e-07, "loss": 0.8166, "step": 25841 }, { "epoch": 0.9366097640534957, "grad_norm": 2.1993040460019313, "learning_rate": 1.050083287160586e-07, "loss": 0.7323, "step": 25842 }, { "epoch": 0.9366460077561524, "grad_norm": 2.354877988397374, "learning_rate": 1.0488870638964089e-07, "loss": 0.7718, "step": 25843 }, { "epoch": 0.936682251458809, "grad_norm": 2.426872583538877, "learning_rate": 1.0476915151489575e-07, "loss": 0.945, "step": 25844 }, { "epoch": 0.9367184951614657, "grad_norm": 2.6948240484184045, "learning_rate": 1.0464966409347189e-07, "loss": 0.9879, "step": 25845 }, { "epoch": 0.9367547388641223, "grad_norm": 2.4822059459129013, "learning_rate": 1.0453024412701462e-07, "loss": 0.8423, "step": 25846 }, { "epoch": 0.9367909825667791, "grad_norm": 2.281322132735416, "learning_rate": 1.0441089161716988e-07, "loss": 0.85, "step": 25847 }, { "epoch": 0.9368272262694357, "grad_norm": 2.656550484502996, "learning_rate": 1.0429160656558246e-07, "loss": 0.8204, "step": 25848 }, { "epoch": 0.9368634699720924, "grad_norm": 2.2465988112030786, "learning_rate": 1.0417238897389603e-07, "loss": 0.8033, "step": 25849 }, { "epoch": 0.936899713674749, "grad_norm": 2.226278110989707, "learning_rate": 1.0405323884375263e-07, "loss": 0.905, "step": 25850 }, { "epoch": 0.9369359573774056, "grad_norm": 2.4577408370206135, "learning_rate": 1.0393415617679537e-07, "loss": 0.8645, "step": 25851 }, { "epoch": 0.9369722010800623, "grad_norm": 2.5369704937870154, "learning_rate": 1.0381514097466406e-07, "loss": 0.8387, "step": 25852 }, { "epoch": 0.937008444782719, "grad_norm": 2.3168767661884284, "learning_rate": 1.0369619323899849e-07, "loss": 0.9022, "step": 25853 }, { "epoch": 0.9370446884853757, "grad_norm": 2.4657186967724085, "learning_rate": 1.0357731297143903e-07, "loss": 0.8911, "step": 25854 }, { "epoch": 0.9370809321880323, "grad_norm": 2.3871059159231742, "learning_rate": 1.0345850017362214e-07, "loss": 0.738, "step": 25855 }, { "epoch": 0.937117175890689, "grad_norm": 2.3709089759968336, "learning_rate": 1.033397548471865e-07, "loss": 0.7616, "step": 25856 }, { "epoch": 0.9371534195933456, "grad_norm": 2.426136542064508, "learning_rate": 1.0322107699376693e-07, "loss": 1.0086, "step": 25857 }, { "epoch": 0.9371896632960023, "grad_norm": 2.1866343888566253, "learning_rate": 1.0310246661500045e-07, "loss": 0.7707, "step": 25858 }, { "epoch": 0.9372259069986589, "grad_norm": 2.642435578849811, "learning_rate": 1.0298392371251909e-07, "loss": 0.9555, "step": 25859 }, { "epoch": 0.9372621507013157, "grad_norm": 2.7860991444304286, "learning_rate": 1.0286544828795875e-07, "loss": 0.9025, "step": 25860 }, { "epoch": 0.9372983944039723, "grad_norm": 2.033178616182422, "learning_rate": 1.0274704034295035e-07, "loss": 0.7959, "step": 25861 }, { "epoch": 0.937334638106629, "grad_norm": 2.5126285584489394, "learning_rate": 1.0262869987912649e-07, "loss": 0.8352, "step": 25862 }, { "epoch": 0.9373708818092856, "grad_norm": 2.3744788419525578, "learning_rate": 1.0251042689811641e-07, "loss": 0.9564, "step": 25863 }, { "epoch": 0.9374071255119423, "grad_norm": 2.7619355573861806, "learning_rate": 1.0239222140155103e-07, "loss": 0.9449, "step": 25864 }, { "epoch": 0.9374433692145989, "grad_norm": 2.5423084429960925, "learning_rate": 1.022740833910596e-07, "loss": 0.7981, "step": 25865 }, { "epoch": 0.9374796129172557, "grad_norm": 2.8033978805000017, "learning_rate": 1.0215601286826859e-07, "loss": 0.8585, "step": 25866 }, { "epoch": 0.9375158566199123, "grad_norm": 2.1402888284449606, "learning_rate": 1.0203800983480561e-07, "loss": 0.7681, "step": 25867 }, { "epoch": 0.937552100322569, "grad_norm": 2.3811826911614773, "learning_rate": 1.0192007429229656e-07, "loss": 0.7808, "step": 25868 }, { "epoch": 0.9375883440252256, "grad_norm": 2.2873332251351006, "learning_rate": 1.0180220624236681e-07, "loss": 0.853, "step": 25869 }, { "epoch": 0.9376245877278823, "grad_norm": 2.4492254676612353, "learning_rate": 1.0168440568664061e-07, "loss": 0.8426, "step": 25870 }, { "epoch": 0.9376608314305389, "grad_norm": 2.1928631082958097, "learning_rate": 1.0156667262674002e-07, "loss": 0.8909, "step": 25871 }, { "epoch": 0.9376970751331956, "grad_norm": 2.1437742933887463, "learning_rate": 1.0144900706428928e-07, "loss": 0.8651, "step": 25872 }, { "epoch": 0.9377333188358523, "grad_norm": 2.289659066606953, "learning_rate": 1.0133140900090822e-07, "loss": 0.8971, "step": 25873 }, { "epoch": 0.937769562538509, "grad_norm": 2.2495318642319133, "learning_rate": 1.0121387843821773e-07, "loss": 0.9463, "step": 25874 }, { "epoch": 0.9378058062411656, "grad_norm": 2.510167030515723, "learning_rate": 1.010964153778371e-07, "loss": 0.9275, "step": 25875 }, { "epoch": 0.9378420499438223, "grad_norm": 2.3906328410587947, "learning_rate": 1.0097901982138558e-07, "loss": 0.8034, "step": 25876 }, { "epoch": 0.9378782936464789, "grad_norm": 2.5170571734144658, "learning_rate": 1.0086169177047967e-07, "loss": 0.873, "step": 25877 }, { "epoch": 0.9379145373491355, "grad_norm": 2.542441194043264, "learning_rate": 1.0074443122673749e-07, "loss": 0.961, "step": 25878 }, { "epoch": 0.9379507810517923, "grad_norm": 2.6074799800846464, "learning_rate": 1.0062723819177334e-07, "loss": 0.9919, "step": 25879 }, { "epoch": 0.937987024754449, "grad_norm": 2.5399492200982396, "learning_rate": 1.0051011266720312e-07, "loss": 0.9288, "step": 25880 }, { "epoch": 0.9380232684571056, "grad_norm": 2.360285683792198, "learning_rate": 1.0039305465464056e-07, "loss": 0.9253, "step": 25881 }, { "epoch": 0.9380595121597622, "grad_norm": 2.2542701863904737, "learning_rate": 1.0027606415569879e-07, "loss": 0.9139, "step": 25882 }, { "epoch": 0.9380957558624189, "grad_norm": 2.571555127647855, "learning_rate": 1.0015914117198877e-07, "loss": 1.0337, "step": 25883 }, { "epoch": 0.9381319995650755, "grad_norm": 2.320745990530547, "learning_rate": 1.0004228570512309e-07, "loss": 0.7059, "step": 25884 }, { "epoch": 0.9381682432677322, "grad_norm": 2.269565945868703, "learning_rate": 9.992549775671101e-08, "loss": 0.8763, "step": 25885 }, { "epoch": 0.9382044869703889, "grad_norm": 2.3999709396008724, "learning_rate": 9.98087773283618e-08, "loss": 0.9098, "step": 25886 }, { "epoch": 0.9382407306730456, "grad_norm": 2.402787252906483, "learning_rate": 9.969212442168474e-08, "loss": 0.8342, "step": 25887 }, { "epoch": 0.9382769743757022, "grad_norm": 2.5005759478921017, "learning_rate": 9.957553903828631e-08, "loss": 0.911, "step": 25888 }, { "epoch": 0.9383132180783589, "grad_norm": 2.6366474079668367, "learning_rate": 9.945902117977357e-08, "loss": 0.9271, "step": 25889 }, { "epoch": 0.9383494617810155, "grad_norm": 2.700985014023873, "learning_rate": 9.934257084775134e-08, "loss": 0.8516, "step": 25890 }, { "epoch": 0.9383857054836722, "grad_norm": 1.9919899390658757, "learning_rate": 9.9226188043825e-08, "loss": 1.0038, "step": 25891 }, { "epoch": 0.9384219491863289, "grad_norm": 2.3156943281759275, "learning_rate": 9.910987276959827e-08, "loss": 0.7643, "step": 25892 }, { "epoch": 0.9384581928889856, "grad_norm": 2.293929164693239, "learning_rate": 9.899362502667264e-08, "loss": 1.0126, "step": 25893 }, { "epoch": 0.9384944365916422, "grad_norm": 2.5162222179794327, "learning_rate": 9.88774448166513e-08, "loss": 0.8702, "step": 25894 }, { "epoch": 0.9385306802942989, "grad_norm": 2.6739724661757682, "learning_rate": 9.876133214113404e-08, "loss": 0.9003, "step": 25895 }, { "epoch": 0.9385669239969555, "grad_norm": 2.4081896558873996, "learning_rate": 9.864528700172238e-08, "loss": 0.7625, "step": 25896 }, { "epoch": 0.9386031676996122, "grad_norm": 2.1984467478929126, "learning_rate": 9.852930940001393e-08, "loss": 0.8182, "step": 25897 }, { "epoch": 0.9386394114022688, "grad_norm": 2.2871668057986385, "learning_rate": 9.84133993376074e-08, "loss": 0.7892, "step": 25898 }, { "epoch": 0.9386756551049256, "grad_norm": 2.4737342388982113, "learning_rate": 9.829755681609988e-08, "loss": 0.7846, "step": 25899 }, { "epoch": 0.9387118988075822, "grad_norm": 2.2324451567029366, "learning_rate": 9.818178183708727e-08, "loss": 0.874, "step": 25900 }, { "epoch": 0.9387481425102389, "grad_norm": 2.2251535823181015, "learning_rate": 9.806607440216608e-08, "loss": 0.8694, "step": 25901 }, { "epoch": 0.9387843862128955, "grad_norm": 2.3711533423774886, "learning_rate": 9.795043451292841e-08, "loss": 0.8139, "step": 25902 }, { "epoch": 0.9388206299155522, "grad_norm": 2.847736841494996, "learning_rate": 9.783486217097016e-08, "loss": 0.835, "step": 25903 }, { "epoch": 0.9388568736182088, "grad_norm": 2.518947678785157, "learning_rate": 9.77193573778823e-08, "loss": 0.8378, "step": 25904 }, { "epoch": 0.9388931173208654, "grad_norm": 2.240399748521385, "learning_rate": 9.760392013525687e-08, "loss": 0.6468, "step": 25905 }, { "epoch": 0.9389293610235222, "grad_norm": 2.3317697020364907, "learning_rate": 9.74885504446843e-08, "loss": 0.8178, "step": 25906 }, { "epoch": 0.9389656047261788, "grad_norm": 2.6721646102268366, "learning_rate": 9.737324830775552e-08, "loss": 0.8903, "step": 25907 }, { "epoch": 0.9390018484288355, "grad_norm": 2.234258918875488, "learning_rate": 9.725801372605759e-08, "loss": 0.9763, "step": 25908 }, { "epoch": 0.9390380921314921, "grad_norm": 2.2408138142744076, "learning_rate": 9.71428467011798e-08, "loss": 0.945, "step": 25909 }, { "epoch": 0.9390743358341488, "grad_norm": 2.130491469716822, "learning_rate": 9.702774723470753e-08, "loss": 0.7759, "step": 25910 }, { "epoch": 0.9391105795368054, "grad_norm": 2.2287082321776093, "learning_rate": 9.691271532822788e-08, "loss": 0.8135, "step": 25911 }, { "epoch": 0.9391468232394622, "grad_norm": 2.235996698621434, "learning_rate": 9.679775098332622e-08, "loss": 0.9243, "step": 25912 }, { "epoch": 0.9391830669421188, "grad_norm": 2.29197219988147, "learning_rate": 9.668285420158573e-08, "loss": 0.9458, "step": 25913 }, { "epoch": 0.9392193106447755, "grad_norm": 2.439805184794689, "learning_rate": 9.656802498459017e-08, "loss": 1.0059, "step": 25914 }, { "epoch": 0.9392555543474321, "grad_norm": 2.3135950446238107, "learning_rate": 9.645326333392213e-08, "loss": 0.8492, "step": 25915 }, { "epoch": 0.9392917980500888, "grad_norm": 2.3651371794838427, "learning_rate": 9.633856925116258e-08, "loss": 0.8356, "step": 25916 }, { "epoch": 0.9393280417527454, "grad_norm": 2.608651981027153, "learning_rate": 9.622394273789138e-08, "loss": 0.8181, "step": 25917 }, { "epoch": 0.9393642854554021, "grad_norm": 2.561483266512963, "learning_rate": 9.610938379568835e-08, "loss": 0.911, "step": 25918 }, { "epoch": 0.9394005291580588, "grad_norm": 2.576795823135263, "learning_rate": 9.59948924261328e-08, "loss": 0.816, "step": 25919 }, { "epoch": 0.9394367728607155, "grad_norm": 2.4980351066781767, "learning_rate": 9.588046863080181e-08, "loss": 0.9386, "step": 25920 }, { "epoch": 0.9394730165633721, "grad_norm": 2.6476271171882484, "learning_rate": 9.576611241127187e-08, "loss": 1.116, "step": 25921 }, { "epoch": 0.9395092602660288, "grad_norm": 2.3142903218201334, "learning_rate": 9.565182376911842e-08, "loss": 0.7383, "step": 25922 }, { "epoch": 0.9395455039686854, "grad_norm": 2.6637705309281348, "learning_rate": 9.553760270591794e-08, "loss": 0.8817, "step": 25923 }, { "epoch": 0.9395817476713421, "grad_norm": 2.396570307788694, "learning_rate": 9.542344922324199e-08, "loss": 0.9278, "step": 25924 }, { "epoch": 0.9396179913739988, "grad_norm": 2.183372266349185, "learning_rate": 9.530936332266539e-08, "loss": 0.8342, "step": 25925 }, { "epoch": 0.9396542350766555, "grad_norm": 2.2940442214402723, "learning_rate": 9.519534500575912e-08, "loss": 0.8177, "step": 25926 }, { "epoch": 0.9396904787793121, "grad_norm": 2.561763941221598, "learning_rate": 9.508139427409525e-08, "loss": 0.8348, "step": 25927 }, { "epoch": 0.9397267224819688, "grad_norm": 2.370065345801017, "learning_rate": 9.496751112924252e-08, "loss": 0.8603, "step": 25928 }, { "epoch": 0.9397629661846254, "grad_norm": 2.0179189653915084, "learning_rate": 9.485369557277191e-08, "loss": 0.6524, "step": 25929 }, { "epoch": 0.939799209887282, "grad_norm": 2.28618072186795, "learning_rate": 9.47399476062505e-08, "loss": 0.9044, "step": 25930 }, { "epoch": 0.9398354535899387, "grad_norm": 2.5444640049022027, "learning_rate": 9.462626723124535e-08, "loss": 0.7662, "step": 25931 }, { "epoch": 0.9398716972925955, "grad_norm": 2.4247924975813775, "learning_rate": 9.451265444932466e-08, "loss": 0.8297, "step": 25932 }, { "epoch": 0.9399079409952521, "grad_norm": 2.497583821763382, "learning_rate": 9.43991092620522e-08, "loss": 0.8292, "step": 25933 }, { "epoch": 0.9399441846979087, "grad_norm": 2.737009690947826, "learning_rate": 9.428563167099336e-08, "loss": 0.9191, "step": 25934 }, { "epoch": 0.9399804284005654, "grad_norm": 2.3436406459220116, "learning_rate": 9.417222167771134e-08, "loss": 0.8825, "step": 25935 }, { "epoch": 0.940016672103222, "grad_norm": 2.4173008223474963, "learning_rate": 9.405887928376989e-08, "loss": 0.8294, "step": 25936 }, { "epoch": 0.9400529158058787, "grad_norm": 2.4365124620801444, "learning_rate": 9.394560449072943e-08, "loss": 0.918, "step": 25937 }, { "epoch": 0.9400891595085354, "grad_norm": 2.722697642640976, "learning_rate": 9.383239730015149e-08, "loss": 0.936, "step": 25938 }, { "epoch": 0.9401254032111921, "grad_norm": 2.292405554736708, "learning_rate": 9.371925771359592e-08, "loss": 0.8317, "step": 25939 }, { "epoch": 0.9401616469138487, "grad_norm": 2.1781535873245583, "learning_rate": 9.360618573262203e-08, "loss": 0.8826, "step": 25940 }, { "epoch": 0.9401978906165054, "grad_norm": 2.373696599245717, "learning_rate": 9.349318135878804e-08, "loss": 0.9026, "step": 25941 }, { "epoch": 0.940234134319162, "grad_norm": 2.5026389397256508, "learning_rate": 9.338024459364936e-08, "loss": 0.9532, "step": 25942 }, { "epoch": 0.9402703780218187, "grad_norm": 2.3237623277577537, "learning_rate": 9.326737543876474e-08, "loss": 1.041, "step": 25943 }, { "epoch": 0.9403066217244753, "grad_norm": 2.3713453975705368, "learning_rate": 9.315457389568738e-08, "loss": 0.9532, "step": 25944 }, { "epoch": 0.9403428654271321, "grad_norm": 3.54943471533027, "learning_rate": 9.304183996597271e-08, "loss": 0.7855, "step": 25945 }, { "epoch": 0.9403791091297887, "grad_norm": 2.4543655540364777, "learning_rate": 9.292917365117338e-08, "loss": 0.8485, "step": 25946 }, { "epoch": 0.9404153528324454, "grad_norm": 2.142600107136148, "learning_rate": 9.281657495284313e-08, "loss": 0.6361, "step": 25947 }, { "epoch": 0.940451596535102, "grad_norm": 2.2808623101670977, "learning_rate": 9.27040438725324e-08, "loss": 0.9092, "step": 25948 }, { "epoch": 0.9404878402377587, "grad_norm": 2.289133784152619, "learning_rate": 9.259158041179217e-08, "loss": 0.9076, "step": 25949 }, { "epoch": 0.9405240839404153, "grad_norm": 2.247576890773492, "learning_rate": 9.247918457217231e-08, "loss": 0.8949, "step": 25950 }, { "epoch": 0.9405603276430721, "grad_norm": 2.4783400653085503, "learning_rate": 9.236685635522047e-08, "loss": 0.8973, "step": 25951 }, { "epoch": 0.9405965713457287, "grad_norm": 2.0031358263088266, "learning_rate": 9.225459576248653e-08, "loss": 0.7105, "step": 25952 }, { "epoch": 0.9406328150483854, "grad_norm": 2.146783411975346, "learning_rate": 9.214240279551534e-08, "loss": 0.8228, "step": 25953 }, { "epoch": 0.940669058751042, "grad_norm": 2.0528029965649823, "learning_rate": 9.203027745585346e-08, "loss": 0.8509, "step": 25954 }, { "epoch": 0.9407053024536987, "grad_norm": 2.2435546426023456, "learning_rate": 9.191821974504633e-08, "loss": 0.928, "step": 25955 }, { "epoch": 0.9407415461563553, "grad_norm": 2.5408536559009964, "learning_rate": 9.180622966463826e-08, "loss": 1.022, "step": 25956 }, { "epoch": 0.940777789859012, "grad_norm": 2.4197060872094145, "learning_rate": 9.169430721617135e-08, "loss": 1.0008, "step": 25957 }, { "epoch": 0.9408140335616687, "grad_norm": 2.47994974006994, "learning_rate": 9.15824524011888e-08, "loss": 0.8272, "step": 25958 }, { "epoch": 0.9408502772643254, "grad_norm": 2.2902992203326606, "learning_rate": 9.14706652212316e-08, "loss": 0.8682, "step": 25959 }, { "epoch": 0.940886520966982, "grad_norm": 1.9798019363060164, "learning_rate": 9.135894567783965e-08, "loss": 0.7643, "step": 25960 }, { "epoch": 0.9409227646696386, "grad_norm": 2.225398356053987, "learning_rate": 9.124729377255337e-08, "loss": 0.8288, "step": 25961 }, { "epoch": 0.9409590083722953, "grad_norm": 2.4276463929804564, "learning_rate": 9.113570950690986e-08, "loss": 0.8229, "step": 25962 }, { "epoch": 0.9409952520749519, "grad_norm": 2.3574814693167743, "learning_rate": 9.102419288244846e-08, "loss": 0.8327, "step": 25963 }, { "epoch": 0.9410314957776087, "grad_norm": 2.4061322578763797, "learning_rate": 9.091274390070403e-08, "loss": 0.8801, "step": 25964 }, { "epoch": 0.9410677394802653, "grad_norm": 2.5743240108738643, "learning_rate": 9.080136256321314e-08, "loss": 0.7919, "step": 25965 }, { "epoch": 0.941103983182922, "grad_norm": 2.2956748127853372, "learning_rate": 9.069004887151067e-08, "loss": 0.7854, "step": 25966 }, { "epoch": 0.9411402268855786, "grad_norm": 2.6002473367487897, "learning_rate": 9.057880282713039e-08, "loss": 0.9151, "step": 25967 }, { "epoch": 0.9411764705882353, "grad_norm": 2.287851783396954, "learning_rate": 9.046762443160495e-08, "loss": 0.8575, "step": 25968 }, { "epoch": 0.9412127142908919, "grad_norm": 2.182515067839516, "learning_rate": 9.035651368646647e-08, "loss": 0.7896, "step": 25969 }, { "epoch": 0.9412489579935486, "grad_norm": 2.155414084793646, "learning_rate": 9.024547059324595e-08, "loss": 0.9616, "step": 25970 }, { "epoch": 0.9412852016962053, "grad_norm": 2.309754207397261, "learning_rate": 9.013449515347383e-08, "loss": 0.9207, "step": 25971 }, { "epoch": 0.941321445398862, "grad_norm": 2.560260464708862, "learning_rate": 9.002358736867888e-08, "loss": 0.766, "step": 25972 }, { "epoch": 0.9413576891015186, "grad_norm": 2.400876184232099, "learning_rate": 8.991274724038879e-08, "loss": 0.8321, "step": 25973 }, { "epoch": 0.9413939328041753, "grad_norm": 2.284527801777362, "learning_rate": 8.98019747701323e-08, "loss": 0.847, "step": 25974 }, { "epoch": 0.9414301765068319, "grad_norm": 1.9356198964646032, "learning_rate": 8.969126995943434e-08, "loss": 0.7493, "step": 25975 }, { "epoch": 0.9414664202094886, "grad_norm": 2.5098975534320886, "learning_rate": 8.958063280982198e-08, "loss": 0.9091, "step": 25976 }, { "epoch": 0.9415026639121452, "grad_norm": 2.323091576297304, "learning_rate": 8.947006332281794e-08, "loss": 0.9571, "step": 25977 }, { "epoch": 0.941538907614802, "grad_norm": 2.229559229869694, "learning_rate": 8.935956149994652e-08, "loss": 0.8612, "step": 25978 }, { "epoch": 0.9415751513174586, "grad_norm": 2.2905738276493373, "learning_rate": 8.92491273427315e-08, "loss": 0.8434, "step": 25979 }, { "epoch": 0.9416113950201153, "grad_norm": 2.492800328659311, "learning_rate": 8.91387608526928e-08, "loss": 0.896, "step": 25980 }, { "epoch": 0.9416476387227719, "grad_norm": 2.388360438951143, "learning_rate": 8.902846203135196e-08, "loss": 0.7573, "step": 25981 }, { "epoch": 0.9416838824254286, "grad_norm": 2.4557337294235047, "learning_rate": 8.891823088022889e-08, "loss": 0.7924, "step": 25982 }, { "epoch": 0.9417201261280852, "grad_norm": 2.495039674709313, "learning_rate": 8.880806740084291e-08, "loss": 0.9614, "step": 25983 }, { "epoch": 0.941756369830742, "grad_norm": 2.4509138322879735, "learning_rate": 8.869797159471117e-08, "loss": 0.9046, "step": 25984 }, { "epoch": 0.9417926135333986, "grad_norm": 2.5411052821112516, "learning_rate": 8.858794346335076e-08, "loss": 0.7965, "step": 25985 }, { "epoch": 0.9418288572360553, "grad_norm": 2.130569381339019, "learning_rate": 8.84779830082788e-08, "loss": 0.7444, "step": 25986 }, { "epoch": 0.9418651009387119, "grad_norm": 2.5737070015504453, "learning_rate": 8.836809023100968e-08, "loss": 0.813, "step": 25987 }, { "epoch": 0.9419013446413685, "grad_norm": 2.7060554749575245, "learning_rate": 8.825826513305824e-08, "loss": 0.8744, "step": 25988 }, { "epoch": 0.9419375883440252, "grad_norm": 2.376592820816202, "learning_rate": 8.814850771593609e-08, "loss": 0.803, "step": 25989 }, { "epoch": 0.9419738320466818, "grad_norm": 2.414829424001492, "learning_rate": 8.80388179811581e-08, "loss": 0.8646, "step": 25990 }, { "epoch": 0.9420100757493386, "grad_norm": 2.2804357151977075, "learning_rate": 8.792919593023419e-08, "loss": 0.8473, "step": 25991 }, { "epoch": 0.9420463194519952, "grad_norm": 2.330919529389074, "learning_rate": 8.781964156467537e-08, "loss": 0.8113, "step": 25992 }, { "epoch": 0.9420825631546519, "grad_norm": 2.402626924334765, "learning_rate": 8.771015488599044e-08, "loss": 0.8725, "step": 25993 }, { "epoch": 0.9421188068573085, "grad_norm": 2.322158481139131, "learning_rate": 8.76007358956893e-08, "loss": 1.0111, "step": 25994 }, { "epoch": 0.9421550505599652, "grad_norm": 2.8204159312355874, "learning_rate": 8.749138459527906e-08, "loss": 1.0682, "step": 25995 }, { "epoch": 0.9421912942626218, "grad_norm": 2.3370332898144768, "learning_rate": 8.738210098626687e-08, "loss": 0.9868, "step": 25996 }, { "epoch": 0.9422275379652786, "grad_norm": 2.082365417383167, "learning_rate": 8.727288507015763e-08, "loss": 0.7502, "step": 25997 }, { "epoch": 0.9422637816679352, "grad_norm": 1.9942530863898673, "learning_rate": 8.71637368484568e-08, "loss": 0.6999, "step": 25998 }, { "epoch": 0.9423000253705919, "grad_norm": 2.4075427876515687, "learning_rate": 8.705465632266929e-08, "loss": 0.8529, "step": 25999 }, { "epoch": 0.9423362690732485, "grad_norm": 2.4478538641980268, "learning_rate": 8.694564349429668e-08, "loss": 0.9872, "step": 26000 }, { "epoch": 0.9423725127759052, "grad_norm": 2.12422662532318, "learning_rate": 8.683669836484221e-08, "loss": 0.8962, "step": 26001 }, { "epoch": 0.9424087564785618, "grad_norm": 2.2835962340681855, "learning_rate": 8.672782093580634e-08, "loss": 0.7833, "step": 26002 }, { "epoch": 0.9424450001812185, "grad_norm": 2.407075628413422, "learning_rate": 8.66190112086901e-08, "loss": 0.8916, "step": 26003 }, { "epoch": 0.9424812438838752, "grad_norm": 2.5415494581445497, "learning_rate": 8.651026918499173e-08, "loss": 0.6255, "step": 26004 }, { "epoch": 0.9425174875865319, "grad_norm": 2.464607418914226, "learning_rate": 8.640159486621059e-08, "loss": 0.8773, "step": 26005 }, { "epoch": 0.9425537312891885, "grad_norm": 2.477780908562554, "learning_rate": 8.629298825384435e-08, "loss": 0.8535, "step": 26006 }, { "epoch": 0.9425899749918452, "grad_norm": 2.358074782190373, "learning_rate": 8.618444934938796e-08, "loss": 0.7127, "step": 26007 }, { "epoch": 0.9426262186945018, "grad_norm": 2.1417072182267742, "learning_rate": 8.60759781543391e-08, "loss": 0.8984, "step": 26008 }, { "epoch": 0.9426624623971585, "grad_norm": 2.565331670749272, "learning_rate": 8.596757467019046e-08, "loss": 0.7507, "step": 26009 }, { "epoch": 0.9426987060998152, "grad_norm": 2.488251263351999, "learning_rate": 8.58592388984375e-08, "loss": 0.9918, "step": 26010 }, { "epoch": 0.9427349498024719, "grad_norm": 2.583342850656977, "learning_rate": 8.575097084057183e-08, "loss": 0.9509, "step": 26011 }, { "epoch": 0.9427711935051285, "grad_norm": 2.5356889219410528, "learning_rate": 8.564277049808611e-08, "loss": 0.851, "step": 26012 }, { "epoch": 0.9428074372077851, "grad_norm": 2.3340343528486924, "learning_rate": 8.55346378724703e-08, "loss": 0.9591, "step": 26013 }, { "epoch": 0.9428436809104418, "grad_norm": 2.4414973037208663, "learning_rate": 8.542657296521539e-08, "loss": 0.9481, "step": 26014 }, { "epoch": 0.9428799246130984, "grad_norm": 2.3002835539588187, "learning_rate": 8.531857577781022e-08, "loss": 0.9273, "step": 26015 }, { "epoch": 0.9429161683157551, "grad_norm": 2.699870998046932, "learning_rate": 8.521064631174248e-08, "loss": 0.8896, "step": 26016 }, { "epoch": 0.9429524120184118, "grad_norm": 2.215960166208135, "learning_rate": 8.510278456849985e-08, "loss": 0.8186, "step": 26017 }, { "epoch": 0.9429886557210685, "grad_norm": 2.505859658458363, "learning_rate": 8.499499054956839e-08, "loss": 0.8689, "step": 26018 }, { "epoch": 0.9430248994237251, "grad_norm": 2.0113020782871858, "learning_rate": 8.488726425643356e-08, "loss": 0.8444, "step": 26019 }, { "epoch": 0.9430611431263818, "grad_norm": 2.3868705655593323, "learning_rate": 8.477960569057919e-08, "loss": 0.8281, "step": 26020 }, { "epoch": 0.9430973868290384, "grad_norm": 2.209191513297588, "learning_rate": 8.467201485348963e-08, "loss": 0.8793, "step": 26021 }, { "epoch": 0.9431336305316951, "grad_norm": 2.3215219444187727, "learning_rate": 8.456449174664649e-08, "loss": 0.9188, "step": 26022 }, { "epoch": 0.9431698742343518, "grad_norm": 2.170068147233526, "learning_rate": 8.445703637153301e-08, "loss": 0.8317, "step": 26023 }, { "epoch": 0.9432061179370085, "grad_norm": 2.577098822778011, "learning_rate": 8.434964872962803e-08, "loss": 0.8952, "step": 26024 }, { "epoch": 0.9432423616396651, "grad_norm": 2.470426067474472, "learning_rate": 8.424232882241145e-08, "loss": 0.9075, "step": 26025 }, { "epoch": 0.9432786053423218, "grad_norm": 2.5783505265393654, "learning_rate": 8.413507665136377e-08, "loss": 0.7959, "step": 26026 }, { "epoch": 0.9433148490449784, "grad_norm": 2.422713930061963, "learning_rate": 8.402789221796104e-08, "loss": 0.7632, "step": 26027 }, { "epoch": 0.9433510927476351, "grad_norm": 2.2108627428978056, "learning_rate": 8.392077552368094e-08, "loss": 0.8504, "step": 26028 }, { "epoch": 0.9433873364502917, "grad_norm": 2.289266950198634, "learning_rate": 8.381372656999897e-08, "loss": 0.805, "step": 26029 }, { "epoch": 0.9434235801529485, "grad_norm": 2.5650044168104214, "learning_rate": 8.370674535839173e-08, "loss": 0.776, "step": 26030 }, { "epoch": 0.9434598238556051, "grad_norm": 2.3437010821971436, "learning_rate": 8.359983189033138e-08, "loss": 0.8223, "step": 26031 }, { "epoch": 0.9434960675582618, "grad_norm": 2.243221822473117, "learning_rate": 8.349298616729228e-08, "loss": 0.8355, "step": 26032 }, { "epoch": 0.9435323112609184, "grad_norm": 2.56744242148899, "learning_rate": 8.338620819074605e-08, "loss": 1.1295, "step": 26033 }, { "epoch": 0.9435685549635751, "grad_norm": 2.297735740187689, "learning_rate": 8.327949796216539e-08, "loss": 0.8591, "step": 26034 }, { "epoch": 0.9436047986662317, "grad_norm": 2.527484522509197, "learning_rate": 8.317285548301857e-08, "loss": 0.7797, "step": 26035 }, { "epoch": 0.9436410423688885, "grad_norm": 2.4861240192679737, "learning_rate": 8.306628075477718e-08, "loss": 0.7443, "step": 26036 }, { "epoch": 0.9436772860715451, "grad_norm": 2.529797855396221, "learning_rate": 8.295977377890785e-08, "loss": 0.9723, "step": 26037 }, { "epoch": 0.9437135297742018, "grad_norm": 2.612366416120529, "learning_rate": 8.285333455687993e-08, "loss": 0.9048, "step": 26038 }, { "epoch": 0.9437497734768584, "grad_norm": 2.340274006188841, "learning_rate": 8.274696309015895e-08, "loss": 0.9457, "step": 26039 }, { "epoch": 0.943786017179515, "grad_norm": 2.254186760377444, "learning_rate": 8.264065938021093e-08, "loss": 0.7837, "step": 26040 }, { "epoch": 0.9438222608821717, "grad_norm": 2.2764832600399343, "learning_rate": 8.253442342850026e-08, "loss": 0.8404, "step": 26041 }, { "epoch": 0.9438585045848283, "grad_norm": 2.207471047991242, "learning_rate": 8.242825523649189e-08, "loss": 0.9475, "step": 26042 }, { "epoch": 0.9438947482874851, "grad_norm": 2.295732318398749, "learning_rate": 8.232215480564798e-08, "loss": 0.9903, "step": 26043 }, { "epoch": 0.9439309919901417, "grad_norm": 2.0960792687222174, "learning_rate": 8.221612213743014e-08, "loss": 0.7698, "step": 26044 }, { "epoch": 0.9439672356927984, "grad_norm": 2.4306281184352403, "learning_rate": 8.211015723330051e-08, "loss": 0.7289, "step": 26045 }, { "epoch": 0.944003479395455, "grad_norm": 2.2230868445194387, "learning_rate": 8.200426009471851e-08, "loss": 0.9151, "step": 26046 }, { "epoch": 0.9440397230981117, "grad_norm": 2.5582314633232826, "learning_rate": 8.189843072314352e-08, "loss": 0.9221, "step": 26047 }, { "epoch": 0.9440759668007683, "grad_norm": 2.561493247645448, "learning_rate": 8.179266912003381e-08, "loss": 0.8852, "step": 26048 }, { "epoch": 0.944112210503425, "grad_norm": 2.387701980463259, "learning_rate": 8.168697528684655e-08, "loss": 0.8659, "step": 26049 }, { "epoch": 0.9441484542060817, "grad_norm": 2.3891777236543277, "learning_rate": 8.158134922503835e-08, "loss": 0.8578, "step": 26050 }, { "epoch": 0.9441846979087384, "grad_norm": 2.36859208197304, "learning_rate": 8.147579093606473e-08, "loss": 0.986, "step": 26051 }, { "epoch": 0.944220941611395, "grad_norm": 2.5475504950946237, "learning_rate": 8.137030042138005e-08, "loss": 0.8478, "step": 26052 }, { "epoch": 0.9442571853140517, "grad_norm": 2.3532995829895764, "learning_rate": 8.126487768243762e-08, "loss": 0.8199, "step": 26053 }, { "epoch": 0.9442934290167083, "grad_norm": 2.1926059871178736, "learning_rate": 8.115952272069072e-08, "loss": 0.6596, "step": 26054 }, { "epoch": 0.944329672719365, "grad_norm": 2.221811207980375, "learning_rate": 8.105423553759095e-08, "loss": 0.8165, "step": 26055 }, { "epoch": 0.9443659164220217, "grad_norm": 2.3199401247556737, "learning_rate": 8.094901613458828e-08, "loss": 0.8758, "step": 26056 }, { "epoch": 0.9444021601246784, "grad_norm": 2.1929962636039266, "learning_rate": 8.084386451313431e-08, "loss": 0.9245, "step": 26057 }, { "epoch": 0.944438403827335, "grad_norm": 2.333059770321602, "learning_rate": 8.073878067467567e-08, "loss": 1.0422, "step": 26058 }, { "epoch": 0.9444746475299917, "grad_norm": 2.4503074783351075, "learning_rate": 8.06337646206623e-08, "loss": 0.7824, "step": 26059 }, { "epoch": 0.9445108912326483, "grad_norm": 2.0827369431401372, "learning_rate": 8.052881635254029e-08, "loss": 0.774, "step": 26060 }, { "epoch": 0.944547134935305, "grad_norm": 2.434205358972688, "learning_rate": 8.042393587175623e-08, "loss": 0.8495, "step": 26061 }, { "epoch": 0.9445833786379616, "grad_norm": 2.331256886473246, "learning_rate": 8.031912317975454e-08, "loss": 0.8983, "step": 26062 }, { "epoch": 0.9446196223406184, "grad_norm": 2.170458770434799, "learning_rate": 8.021437827798073e-08, "loss": 0.8876, "step": 26063 }, { "epoch": 0.944655866043275, "grad_norm": 2.1967161859153177, "learning_rate": 8.010970116787642e-08, "loss": 0.8383, "step": 26064 }, { "epoch": 0.9446921097459317, "grad_norm": 2.2405764911525883, "learning_rate": 8.000509185088546e-08, "loss": 0.8777, "step": 26065 }, { "epoch": 0.9447283534485883, "grad_norm": 2.330352003649428, "learning_rate": 7.990055032844946e-08, "loss": 0.8041, "step": 26066 }, { "epoch": 0.944764597151245, "grad_norm": 2.3681993195212825, "learning_rate": 7.979607660200727e-08, "loss": 0.7257, "step": 26067 }, { "epoch": 0.9448008408539016, "grad_norm": 2.2295884534202357, "learning_rate": 7.969167067299999e-08, "loss": 1.0621, "step": 26068 }, { "epoch": 0.9448370845565583, "grad_norm": 2.1922795592961415, "learning_rate": 7.958733254286587e-08, "loss": 1.0085, "step": 26069 }, { "epoch": 0.944873328259215, "grad_norm": 2.121636452402516, "learning_rate": 7.94830622130427e-08, "loss": 0.6879, "step": 26070 }, { "epoch": 0.9449095719618716, "grad_norm": 2.560823399137262, "learning_rate": 7.93788596849665e-08, "loss": 0.8758, "step": 26071 }, { "epoch": 0.9449458156645283, "grad_norm": 2.4621722478696833, "learning_rate": 7.927472496007393e-08, "loss": 0.9049, "step": 26072 }, { "epoch": 0.9449820593671849, "grad_norm": 2.2447809987216365, "learning_rate": 7.917065803979995e-08, "loss": 0.822, "step": 26073 }, { "epoch": 0.9450183030698416, "grad_norm": 2.5406134202691475, "learning_rate": 7.906665892557841e-08, "loss": 0.821, "step": 26074 }, { "epoch": 0.9450545467724982, "grad_norm": 2.3424606795679526, "learning_rate": 7.896272761884205e-08, "loss": 0.7395, "step": 26075 }, { "epoch": 0.945090790475155, "grad_norm": 2.486200958290886, "learning_rate": 7.885886412102251e-08, "loss": 0.8458, "step": 26076 }, { "epoch": 0.9451270341778116, "grad_norm": 2.2880298693061154, "learning_rate": 7.875506843355252e-08, "loss": 0.8209, "step": 26077 }, { "epoch": 0.9451632778804683, "grad_norm": 2.315148946761429, "learning_rate": 7.865134055786094e-08, "loss": 0.9101, "step": 26078 }, { "epoch": 0.9451995215831249, "grad_norm": 2.562358553374558, "learning_rate": 7.854768049537776e-08, "loss": 0.9825, "step": 26079 }, { "epoch": 0.9452357652857816, "grad_norm": 2.2928365822535874, "learning_rate": 7.844408824753069e-08, "loss": 0.8615, "step": 26080 }, { "epoch": 0.9452720089884382, "grad_norm": 2.0604015788464234, "learning_rate": 7.834056381574807e-08, "loss": 0.8474, "step": 26081 }, { "epoch": 0.945308252691095, "grad_norm": 2.3663350520732105, "learning_rate": 7.823710720145594e-08, "loss": 0.9697, "step": 26082 }, { "epoch": 0.9453444963937516, "grad_norm": 2.2700078379566087, "learning_rate": 7.813371840607986e-08, "loss": 0.8224, "step": 26083 }, { "epoch": 0.9453807400964083, "grad_norm": 2.4343177442671764, "learning_rate": 7.803039743104423e-08, "loss": 0.854, "step": 26084 }, { "epoch": 0.9454169837990649, "grad_norm": 2.3348400828678475, "learning_rate": 7.792714427777293e-08, "loss": 1.0417, "step": 26085 }, { "epoch": 0.9454532275017216, "grad_norm": 2.7423780708509633, "learning_rate": 7.782395894768979e-08, "loss": 0.8749, "step": 26086 }, { "epoch": 0.9454894712043782, "grad_norm": 2.4556534457017465, "learning_rate": 7.772084144221481e-08, "loss": 1.0961, "step": 26087 }, { "epoch": 0.9455257149070349, "grad_norm": 2.4099509862094393, "learning_rate": 7.761779176276962e-08, "loss": 0.8116, "step": 26088 }, { "epoch": 0.9455619586096916, "grad_norm": 2.2346248025640487, "learning_rate": 7.75148099107742e-08, "loss": 0.7178, "step": 26089 }, { "epoch": 0.9455982023123483, "grad_norm": 2.641361234376826, "learning_rate": 7.741189588764853e-08, "loss": 1.0523, "step": 26090 }, { "epoch": 0.9456344460150049, "grad_norm": 2.680374935379343, "learning_rate": 7.730904969480923e-08, "loss": 0.8205, "step": 26091 }, { "epoch": 0.9456706897176616, "grad_norm": 2.5848363126028633, "learning_rate": 7.720627133367408e-08, "loss": 1.0, "step": 26092 }, { "epoch": 0.9457069334203182, "grad_norm": 2.4749566925398954, "learning_rate": 7.710356080565972e-08, "loss": 0.9229, "step": 26093 }, { "epoch": 0.9457431771229748, "grad_norm": 2.203222601014397, "learning_rate": 7.700091811218058e-08, "loss": 0.783, "step": 26094 }, { "epoch": 0.9457794208256316, "grad_norm": 2.4027310343880965, "learning_rate": 7.689834325465162e-08, "loss": 0.8393, "step": 26095 }, { "epoch": 0.9458156645282882, "grad_norm": 2.183023743354374, "learning_rate": 7.679583623448561e-08, "loss": 0.942, "step": 26096 }, { "epoch": 0.9458519082309449, "grad_norm": 2.8768558392082233, "learning_rate": 7.669339705309641e-08, "loss": 0.774, "step": 26097 }, { "epoch": 0.9458881519336015, "grad_norm": 2.437620053572236, "learning_rate": 7.659102571189403e-08, "loss": 0.7347, "step": 26098 }, { "epoch": 0.9459243956362582, "grad_norm": 2.200677179702988, "learning_rate": 7.64887222122901e-08, "loss": 0.9362, "step": 26099 }, { "epoch": 0.9459606393389148, "grad_norm": 2.3448728440966846, "learning_rate": 7.638648655569347e-08, "loss": 0.7466, "step": 26100 }, { "epoch": 0.9459968830415715, "grad_norm": 2.5866280631288987, "learning_rate": 7.628431874351416e-08, "loss": 0.818, "step": 26101 }, { "epoch": 0.9460331267442282, "grad_norm": 2.2549822526737713, "learning_rate": 7.618221877715826e-08, "loss": 0.8439, "step": 26102 }, { "epoch": 0.9460693704468849, "grad_norm": 2.350627557912551, "learning_rate": 7.608018665803462e-08, "loss": 1.0264, "step": 26103 }, { "epoch": 0.9461056141495415, "grad_norm": 2.318170981245871, "learning_rate": 7.597822238754715e-08, "loss": 0.9219, "step": 26104 }, { "epoch": 0.9461418578521982, "grad_norm": 2.4832102883702007, "learning_rate": 7.587632596710193e-08, "loss": 0.6473, "step": 26105 }, { "epoch": 0.9461781015548548, "grad_norm": 2.5193323454533503, "learning_rate": 7.577449739810338e-08, "loss": 0.824, "step": 26106 }, { "epoch": 0.9462143452575115, "grad_norm": 2.116658741994858, "learning_rate": 7.567273668195374e-08, "loss": 0.7152, "step": 26107 }, { "epoch": 0.9462505889601681, "grad_norm": 2.4190786151559607, "learning_rate": 7.557104382005576e-08, "loss": 0.7959, "step": 26108 }, { "epoch": 0.9462868326628249, "grad_norm": 2.346726069629173, "learning_rate": 7.546941881381053e-08, "loss": 0.7277, "step": 26109 }, { "epoch": 0.9463230763654815, "grad_norm": 2.053057349801408, "learning_rate": 7.536786166461918e-08, "loss": 0.9047, "step": 26110 }, { "epoch": 0.9463593200681382, "grad_norm": 2.419639508000999, "learning_rate": 7.526637237387946e-08, "loss": 0.9221, "step": 26111 }, { "epoch": 0.9463955637707948, "grad_norm": 2.5054014634791186, "learning_rate": 7.516495094299081e-08, "loss": 0.804, "step": 26112 }, { "epoch": 0.9464318074734515, "grad_norm": 2.3110663375582674, "learning_rate": 7.506359737335157e-08, "loss": 0.8839, "step": 26113 }, { "epoch": 0.9464680511761081, "grad_norm": 2.466933602330185, "learning_rate": 7.49623116663567e-08, "loss": 0.7539, "step": 26114 }, { "epoch": 0.9465042948787649, "grad_norm": 2.352516347858581, "learning_rate": 7.48610938234029e-08, "loss": 0.907, "step": 26115 }, { "epoch": 0.9465405385814215, "grad_norm": 2.359235994918294, "learning_rate": 7.475994384588403e-08, "loss": 1.0466, "step": 26116 }, { "epoch": 0.9465767822840782, "grad_norm": 2.1595165937513205, "learning_rate": 7.46588617351951e-08, "loss": 0.672, "step": 26117 }, { "epoch": 0.9466130259867348, "grad_norm": 2.6673209020262525, "learning_rate": 7.455784749272776e-08, "loss": 0.9199, "step": 26118 }, { "epoch": 0.9466492696893914, "grad_norm": 2.250682127679292, "learning_rate": 7.445690111987536e-08, "loss": 0.83, "step": 26119 }, { "epoch": 0.9466855133920481, "grad_norm": 2.232208330162756, "learning_rate": 7.435602261802677e-08, "loss": 1.0088, "step": 26120 }, { "epoch": 0.9467217570947047, "grad_norm": 2.2425777785104275, "learning_rate": 7.425521198857421e-08, "loss": 0.8076, "step": 26121 }, { "epoch": 0.9467580007973615, "grad_norm": 2.5175424317678012, "learning_rate": 7.415446923290548e-08, "loss": 0.9688, "step": 26122 }, { "epoch": 0.9467942445000181, "grad_norm": 2.5109086454090734, "learning_rate": 7.40537943524089e-08, "loss": 0.8441, "step": 26123 }, { "epoch": 0.9468304882026748, "grad_norm": 2.42281565832347, "learning_rate": 7.395318734847223e-08, "loss": 0.7522, "step": 26124 }, { "epoch": 0.9468667319053314, "grad_norm": 2.2975157185841657, "learning_rate": 7.385264822248162e-08, "loss": 0.8832, "step": 26125 }, { "epoch": 0.9469029756079881, "grad_norm": 1.9587209358441264, "learning_rate": 7.375217697582204e-08, "loss": 0.8002, "step": 26126 }, { "epoch": 0.9469392193106447, "grad_norm": 2.3443538993087043, "learning_rate": 7.365177360987741e-08, "loss": 0.7766, "step": 26127 }, { "epoch": 0.9469754630133015, "grad_norm": 2.309817558672194, "learning_rate": 7.355143812603327e-08, "loss": 0.8007, "step": 26128 }, { "epoch": 0.9470117067159581, "grad_norm": 2.3077546693444395, "learning_rate": 7.345117052567019e-08, "loss": 0.9619, "step": 26129 }, { "epoch": 0.9470479504186148, "grad_norm": 2.3147938984077805, "learning_rate": 7.335097081017095e-08, "loss": 0.8816, "step": 26130 }, { "epoch": 0.9470841941212714, "grad_norm": 2.472182641140537, "learning_rate": 7.325083898091556e-08, "loss": 0.8907, "step": 26131 }, { "epoch": 0.9471204378239281, "grad_norm": 2.321304913243727, "learning_rate": 7.315077503928348e-08, "loss": 0.9291, "step": 26132 }, { "epoch": 0.9471566815265847, "grad_norm": 2.2237808550781994, "learning_rate": 7.305077898665525e-08, "loss": 0.9239, "step": 26133 }, { "epoch": 0.9471929252292414, "grad_norm": 2.3767446956028078, "learning_rate": 7.295085082440645e-08, "loss": 0.8811, "step": 26134 }, { "epoch": 0.9472291689318981, "grad_norm": 2.45078983554236, "learning_rate": 7.285099055391542e-08, "loss": 0.8924, "step": 26135 }, { "epoch": 0.9472654126345548, "grad_norm": 2.19487404061226, "learning_rate": 7.275119817655774e-08, "loss": 0.7369, "step": 26136 }, { "epoch": 0.9473016563372114, "grad_norm": 2.32501992591386, "learning_rate": 7.26514736937095e-08, "loss": 0.8514, "step": 26137 }, { "epoch": 0.9473379000398681, "grad_norm": 2.126509985419721, "learning_rate": 7.255181710674297e-08, "loss": 0.9792, "step": 26138 }, { "epoch": 0.9473741437425247, "grad_norm": 2.4376289048714623, "learning_rate": 7.245222841703315e-08, "loss": 0.9961, "step": 26139 }, { "epoch": 0.9474103874451814, "grad_norm": 2.3713111780762337, "learning_rate": 7.23527076259506e-08, "loss": 0.9649, "step": 26140 }, { "epoch": 0.9474466311478381, "grad_norm": 2.6111862887375765, "learning_rate": 7.225325473486866e-08, "loss": 0.8315, "step": 26141 }, { "epoch": 0.9474828748504948, "grad_norm": 2.457568839861352, "learning_rate": 7.215386974515626e-08, "loss": 0.9909, "step": 26142 }, { "epoch": 0.9475191185531514, "grad_norm": 2.2122500735694968, "learning_rate": 7.205455265818284e-08, "loss": 0.7969, "step": 26143 }, { "epoch": 0.947555362255808, "grad_norm": 2.6912048106006377, "learning_rate": 7.195530347531787e-08, "loss": 0.8381, "step": 26144 }, { "epoch": 0.9475916059584647, "grad_norm": 2.403133897558018, "learning_rate": 7.18561221979286e-08, "loss": 0.9564, "step": 26145 }, { "epoch": 0.9476278496611213, "grad_norm": 2.371708098601713, "learning_rate": 7.175700882738112e-08, "loss": 0.8904, "step": 26146 }, { "epoch": 0.947664093363778, "grad_norm": 2.3303332133049297, "learning_rate": 7.165796336504105e-08, "loss": 0.8105, "step": 26147 }, { "epoch": 0.9477003370664348, "grad_norm": 2.3372856597980176, "learning_rate": 7.15589858122745e-08, "loss": 0.8574, "step": 26148 }, { "epoch": 0.9477365807690914, "grad_norm": 2.483161693813152, "learning_rate": 7.146007617044426e-08, "loss": 0.872, "step": 26149 }, { "epoch": 0.947772824471748, "grad_norm": 2.6654312581299155, "learning_rate": 7.13612344409137e-08, "loss": 0.9578, "step": 26150 }, { "epoch": 0.9478090681744047, "grad_norm": 2.843112943858543, "learning_rate": 7.126246062504449e-08, "loss": 0.9181, "step": 26151 }, { "epoch": 0.9478453118770613, "grad_norm": 2.187906655314846, "learning_rate": 7.116375472419723e-08, "loss": 0.958, "step": 26152 }, { "epoch": 0.947881555579718, "grad_norm": 2.1404064255071824, "learning_rate": 7.10651167397336e-08, "loss": 0.7865, "step": 26153 }, { "epoch": 0.9479177992823747, "grad_norm": 2.487007271637009, "learning_rate": 7.096654667301083e-08, "loss": 0.8435, "step": 26154 }, { "epoch": 0.9479540429850314, "grad_norm": 2.973303482445401, "learning_rate": 7.086804452538842e-08, "loss": 1.1813, "step": 26155 }, { "epoch": 0.947990286687688, "grad_norm": 2.648686972346612, "learning_rate": 7.076961029822305e-08, "loss": 0.825, "step": 26156 }, { "epoch": 0.9480265303903447, "grad_norm": 2.224908203255355, "learning_rate": 7.067124399287195e-08, "loss": 0.8688, "step": 26157 }, { "epoch": 0.9480627740930013, "grad_norm": 2.6280046474602656, "learning_rate": 7.057294561068905e-08, "loss": 0.9512, "step": 26158 }, { "epoch": 0.948099017795658, "grad_norm": 1.989339208343616, "learning_rate": 7.04747151530305e-08, "loss": 0.7307, "step": 26159 }, { "epoch": 0.9481352614983146, "grad_norm": 2.5913622239606715, "learning_rate": 7.03765526212491e-08, "loss": 0.8586, "step": 26160 }, { "epoch": 0.9481715052009714, "grad_norm": 2.0037587037208797, "learning_rate": 7.027845801669708e-08, "loss": 0.6746, "step": 26161 }, { "epoch": 0.948207748903628, "grad_norm": 2.421779335234671, "learning_rate": 7.018043134072616e-08, "loss": 0.8314, "step": 26162 }, { "epoch": 0.9482439926062847, "grad_norm": 2.409743137213216, "learning_rate": 7.008247259468804e-08, "loss": 0.9763, "step": 26163 }, { "epoch": 0.9482802363089413, "grad_norm": 2.4917668859228344, "learning_rate": 6.998458177993216e-08, "loss": 0.8007, "step": 26164 }, { "epoch": 0.948316480011598, "grad_norm": 2.4926440304907103, "learning_rate": 6.988675889780638e-08, "loss": 0.9037, "step": 26165 }, { "epoch": 0.9483527237142546, "grad_norm": 2.4538262824796377, "learning_rate": 6.978900394966015e-08, "loss": 0.8926, "step": 26166 }, { "epoch": 0.9483889674169114, "grad_norm": 2.206107681681055, "learning_rate": 6.969131693683907e-08, "loss": 0.7398, "step": 26167 }, { "epoch": 0.948425211119568, "grad_norm": 2.241982622236857, "learning_rate": 6.959369786069037e-08, "loss": 1.1329, "step": 26168 }, { "epoch": 0.9484614548222247, "grad_norm": 2.4552111743733747, "learning_rate": 6.949614672255856e-08, "loss": 0.7822, "step": 26169 }, { "epoch": 0.9484976985248813, "grad_norm": 2.586663296046457, "learning_rate": 6.93986635237881e-08, "loss": 1.0315, "step": 26170 }, { "epoch": 0.948533942227538, "grad_norm": 2.5304564120753845, "learning_rate": 6.930124826572183e-08, "loss": 0.9562, "step": 26171 }, { "epoch": 0.9485701859301946, "grad_norm": 2.197831206070218, "learning_rate": 6.920390094970254e-08, "loss": 0.8241, "step": 26172 }, { "epoch": 0.9486064296328512, "grad_norm": 2.464071232480596, "learning_rate": 6.910662157707193e-08, "loss": 0.8505, "step": 26173 }, { "epoch": 0.948642673335508, "grad_norm": 2.3141286616974366, "learning_rate": 6.900941014916896e-08, "loss": 0.9361, "step": 26174 }, { "epoch": 0.9486789170381646, "grad_norm": 2.3821075090714516, "learning_rate": 6.891226666733475e-08, "loss": 1.0047, "step": 26175 }, { "epoch": 0.9487151607408213, "grad_norm": 2.38345896918201, "learning_rate": 6.881519113290713e-08, "loss": 0.954, "step": 26176 }, { "epoch": 0.9487514044434779, "grad_norm": 2.287780640659971, "learning_rate": 6.871818354722448e-08, "loss": 0.7796, "step": 26177 }, { "epoch": 0.9487876481461346, "grad_norm": 2.3195509437950883, "learning_rate": 6.86212439116224e-08, "loss": 1.1209, "step": 26178 }, { "epoch": 0.9488238918487912, "grad_norm": 2.2709898883392405, "learning_rate": 6.852437222743701e-08, "loss": 0.9954, "step": 26179 }, { "epoch": 0.9488601355514479, "grad_norm": 2.503109089909217, "learning_rate": 6.84275684960034e-08, "loss": 0.9, "step": 26180 }, { "epoch": 0.9488963792541046, "grad_norm": 2.2340601450352757, "learning_rate": 6.833083271865549e-08, "loss": 0.8204, "step": 26181 }, { "epoch": 0.9489326229567613, "grad_norm": 2.315268363875752, "learning_rate": 6.823416489672662e-08, "loss": 0.9208, "step": 26182 }, { "epoch": 0.9489688666594179, "grad_norm": 2.4762884129843683, "learning_rate": 6.813756503154689e-08, "loss": 0.9663, "step": 26183 }, { "epoch": 0.9490051103620746, "grad_norm": 2.3806103822666103, "learning_rate": 6.80410331244502e-08, "loss": 0.8247, "step": 26184 }, { "epoch": 0.9490413540647312, "grad_norm": 2.500623213241089, "learning_rate": 6.794456917676495e-08, "loss": 0.9139, "step": 26185 }, { "epoch": 0.9490775977673879, "grad_norm": 2.517211115991273, "learning_rate": 6.784817318982062e-08, "loss": 0.7302, "step": 26186 }, { "epoch": 0.9491138414700446, "grad_norm": 2.345330644399418, "learning_rate": 6.775184516494505e-08, "loss": 0.9156, "step": 26187 }, { "epoch": 0.9491500851727013, "grad_norm": 2.0937254427438465, "learning_rate": 6.765558510346715e-08, "loss": 0.9297, "step": 26188 }, { "epoch": 0.9491863288753579, "grad_norm": 2.1616901078711623, "learning_rate": 6.755939300671144e-08, "loss": 0.9271, "step": 26189 }, { "epoch": 0.9492225725780146, "grad_norm": 2.2754919272510015, "learning_rate": 6.746326887600519e-08, "loss": 0.9177, "step": 26190 }, { "epoch": 0.9492588162806712, "grad_norm": 2.255965951152951, "learning_rate": 6.736721271267122e-08, "loss": 0.9375, "step": 26191 }, { "epoch": 0.9492950599833279, "grad_norm": 2.7911282124740917, "learning_rate": 6.727122451803403e-08, "loss": 0.7891, "step": 26192 }, { "epoch": 0.9493313036859845, "grad_norm": 2.4188585948894112, "learning_rate": 6.717530429341646e-08, "loss": 0.888, "step": 26193 }, { "epoch": 0.9493675473886413, "grad_norm": 2.5774292472938054, "learning_rate": 6.707945204013966e-08, "loss": 0.8378, "step": 26194 }, { "epoch": 0.9494037910912979, "grad_norm": 2.5611515176298036, "learning_rate": 6.698366775952425e-08, "loss": 0.9983, "step": 26195 }, { "epoch": 0.9494400347939546, "grad_norm": 2.2681130158610787, "learning_rate": 6.68879514528914e-08, "loss": 1.0201, "step": 26196 }, { "epoch": 0.9494762784966112, "grad_norm": 2.3940116620126335, "learning_rate": 6.679230312155894e-08, "loss": 0.7553, "step": 26197 }, { "epoch": 0.9495125221992678, "grad_norm": 2.7759757603384765, "learning_rate": 6.669672276684468e-08, "loss": 0.8967, "step": 26198 }, { "epoch": 0.9495487659019245, "grad_norm": 2.4567252180977244, "learning_rate": 6.660121039006595e-08, "loss": 0.8289, "step": 26199 }, { "epoch": 0.9495850096045813, "grad_norm": 2.420901303993206, "learning_rate": 6.650576599253888e-08, "loss": 0.8961, "step": 26200 }, { "epoch": 0.9496212533072379, "grad_norm": 2.1524461904076757, "learning_rate": 6.64103895755791e-08, "loss": 0.8804, "step": 26201 }, { "epoch": 0.9496574970098945, "grad_norm": 1.9451985015284137, "learning_rate": 6.631508114050056e-08, "loss": 0.6856, "step": 26202 }, { "epoch": 0.9496937407125512, "grad_norm": 2.4316319451181467, "learning_rate": 6.621984068861553e-08, "loss": 1.0344, "step": 26203 }, { "epoch": 0.9497299844152078, "grad_norm": 2.3296864734432075, "learning_rate": 6.612466822123797e-08, "loss": 0.8631, "step": 26204 }, { "epoch": 0.9497662281178645, "grad_norm": 2.579879958403534, "learning_rate": 6.602956373967795e-08, "loss": 0.8346, "step": 26205 }, { "epoch": 0.9498024718205211, "grad_norm": 2.13580658351631, "learning_rate": 6.593452724524719e-08, "loss": 0.9366, "step": 26206 }, { "epoch": 0.9498387155231779, "grad_norm": 2.1717699137899, "learning_rate": 6.583955873925408e-08, "loss": 0.8199, "step": 26207 }, { "epoch": 0.9498749592258345, "grad_norm": 2.3931863520093515, "learning_rate": 6.574465822300869e-08, "loss": 0.9237, "step": 26208 }, { "epoch": 0.9499112029284912, "grad_norm": 2.056694380251226, "learning_rate": 6.564982569781664e-08, "loss": 0.8226, "step": 26209 }, { "epoch": 0.9499474466311478, "grad_norm": 1.9887928215430242, "learning_rate": 6.555506116498634e-08, "loss": 0.8451, "step": 26210 }, { "epoch": 0.9499836903338045, "grad_norm": 2.4356935084708136, "learning_rate": 6.546036462582339e-08, "loss": 0.7711, "step": 26211 }, { "epoch": 0.9500199340364611, "grad_norm": 2.3800300114114687, "learning_rate": 6.536573608163177e-08, "loss": 0.8137, "step": 26212 }, { "epoch": 0.9500561777391179, "grad_norm": 1.9123831465740704, "learning_rate": 6.527117553371653e-08, "loss": 0.6207, "step": 26213 }, { "epoch": 0.9500924214417745, "grad_norm": 2.4762248300720517, "learning_rate": 6.517668298337887e-08, "loss": 0.8103, "step": 26214 }, { "epoch": 0.9501286651444312, "grad_norm": 2.153872470615635, "learning_rate": 6.508225843192329e-08, "loss": 0.8781, "step": 26215 }, { "epoch": 0.9501649088470878, "grad_norm": 2.204363072486209, "learning_rate": 6.498790188064874e-08, "loss": 0.8725, "step": 26216 }, { "epoch": 0.9502011525497445, "grad_norm": 2.470269301662805, "learning_rate": 6.489361333085697e-08, "loss": 0.8904, "step": 26217 }, { "epoch": 0.9502373962524011, "grad_norm": 2.5348405131614364, "learning_rate": 6.47993927838464e-08, "loss": 0.9025, "step": 26218 }, { "epoch": 0.9502736399550578, "grad_norm": 2.4859331331232277, "learning_rate": 6.470524024091541e-08, "loss": 1.1169, "step": 26219 }, { "epoch": 0.9503098836577145, "grad_norm": 2.4180910497746795, "learning_rate": 6.46111557033613e-08, "loss": 0.8769, "step": 26220 }, { "epoch": 0.9503461273603712, "grad_norm": 2.548773883651317, "learning_rate": 6.451713917248081e-08, "loss": 0.7601, "step": 26221 }, { "epoch": 0.9503823710630278, "grad_norm": 2.3935916600241978, "learning_rate": 6.442319064956903e-08, "loss": 1.034, "step": 26222 }, { "epoch": 0.9504186147656845, "grad_norm": 2.3896516731057273, "learning_rate": 6.432931013592103e-08, "loss": 0.9376, "step": 26223 }, { "epoch": 0.9504548584683411, "grad_norm": 2.0593611768141895, "learning_rate": 6.423549763283021e-08, "loss": 0.7669, "step": 26224 }, { "epoch": 0.9504911021709977, "grad_norm": 2.5117160684709297, "learning_rate": 6.414175314158889e-08, "loss": 0.967, "step": 26225 }, { "epoch": 0.9505273458736545, "grad_norm": 2.505786860356704, "learning_rate": 6.404807666348877e-08, "loss": 0.9328, "step": 26226 }, { "epoch": 0.9505635895763112, "grad_norm": 2.619043299773429, "learning_rate": 6.39544681998211e-08, "loss": 1.0818, "step": 26227 }, { "epoch": 0.9505998332789678, "grad_norm": 2.448997047319532, "learning_rate": 6.386092775187646e-08, "loss": 1.014, "step": 26228 }, { "epoch": 0.9506360769816244, "grad_norm": 2.1330298087038893, "learning_rate": 6.37674553209422e-08, "loss": 0.8365, "step": 26229 }, { "epoch": 0.9506723206842811, "grad_norm": 2.255229574281426, "learning_rate": 6.367405090830725e-08, "loss": 0.9292, "step": 26230 }, { "epoch": 0.9507085643869377, "grad_norm": 2.1104430501930436, "learning_rate": 6.358071451525893e-08, "loss": 0.6416, "step": 26231 }, { "epoch": 0.9507448080895944, "grad_norm": 2.460154591494719, "learning_rate": 6.348744614308234e-08, "loss": 0.8743, "step": 26232 }, { "epoch": 0.9507810517922511, "grad_norm": 2.322044386303597, "learning_rate": 6.339424579306364e-08, "loss": 0.9309, "step": 26233 }, { "epoch": 0.9508172954949078, "grad_norm": 2.476766658493145, "learning_rate": 6.33011134664857e-08, "loss": 0.9471, "step": 26234 }, { "epoch": 0.9508535391975644, "grad_norm": 2.0462535727073017, "learning_rate": 6.320804916463419e-08, "loss": 0.729, "step": 26235 }, { "epoch": 0.9508897829002211, "grad_norm": 2.358497376189456, "learning_rate": 6.311505288878917e-08, "loss": 0.8901, "step": 26236 }, { "epoch": 0.9509260266028777, "grad_norm": 2.8608539766897194, "learning_rate": 6.30221246402335e-08, "loss": 0.7469, "step": 26237 }, { "epoch": 0.9509622703055344, "grad_norm": 2.633598339000168, "learning_rate": 6.292926442024671e-08, "loss": 0.8726, "step": 26238 }, { "epoch": 0.9509985140081911, "grad_norm": 2.304707056484527, "learning_rate": 6.283647223010891e-08, "loss": 0.7922, "step": 26239 }, { "epoch": 0.9510347577108478, "grad_norm": 2.461876826316001, "learning_rate": 6.274374807109907e-08, "loss": 0.9397, "step": 26240 }, { "epoch": 0.9510710014135044, "grad_norm": 2.3735023916176523, "learning_rate": 6.265109194449393e-08, "loss": 0.8348, "step": 26241 }, { "epoch": 0.9511072451161611, "grad_norm": 2.361241733101237, "learning_rate": 6.25585038515708e-08, "loss": 0.9822, "step": 26242 }, { "epoch": 0.9511434888188177, "grad_norm": 2.3742254164882977, "learning_rate": 6.246598379360535e-08, "loss": 0.9425, "step": 26243 }, { "epoch": 0.9511797325214744, "grad_norm": 2.4544609438321046, "learning_rate": 6.237353177187322e-08, "loss": 0.8405, "step": 26244 }, { "epoch": 0.951215976224131, "grad_norm": 2.120724727938871, "learning_rate": 6.228114778764672e-08, "loss": 0.8875, "step": 26245 }, { "epoch": 0.9512522199267878, "grad_norm": 2.563606911403872, "learning_rate": 6.218883184219981e-08, "loss": 0.7827, "step": 26246 }, { "epoch": 0.9512884636294444, "grad_norm": 2.218566715299839, "learning_rate": 6.20965839368054e-08, "loss": 0.9088, "step": 26247 }, { "epoch": 0.9513247073321011, "grad_norm": 2.449695044949179, "learning_rate": 6.2004404072733e-08, "loss": 0.9313, "step": 26248 }, { "epoch": 0.9513609510347577, "grad_norm": 2.354533650762698, "learning_rate": 6.191229225125328e-08, "loss": 1.0213, "step": 26249 }, { "epoch": 0.9513971947374144, "grad_norm": 2.541102957009124, "learning_rate": 6.182024847363632e-08, "loss": 1.1136, "step": 26250 }, { "epoch": 0.951433438440071, "grad_norm": 2.4521890237679007, "learning_rate": 6.172827274115e-08, "loss": 0.7969, "step": 26251 }, { "epoch": 0.9514696821427276, "grad_norm": 2.205885408811856, "learning_rate": 6.163636505506054e-08, "loss": 0.8686, "step": 26252 }, { "epoch": 0.9515059258453844, "grad_norm": 2.212239351657702, "learning_rate": 6.154452541663635e-08, "loss": 0.7444, "step": 26253 }, { "epoch": 0.951542169548041, "grad_norm": 2.285967299287067, "learning_rate": 6.145275382714089e-08, "loss": 0.8764, "step": 26254 }, { "epoch": 0.9515784132506977, "grad_norm": 2.1289453329825494, "learning_rate": 6.136105028784089e-08, "loss": 0.6906, "step": 26255 }, { "epoch": 0.9516146569533543, "grad_norm": 2.3378415150521064, "learning_rate": 6.126941479999814e-08, "loss": 0.8771, "step": 26256 }, { "epoch": 0.951650900656011, "grad_norm": 2.460002102720499, "learning_rate": 6.117784736487609e-08, "loss": 0.9222, "step": 26257 }, { "epoch": 0.9516871443586676, "grad_norm": 2.3815805379436665, "learning_rate": 6.108634798373648e-08, "loss": 0.7621, "step": 26258 }, { "epoch": 0.9517233880613244, "grad_norm": 2.018547748218151, "learning_rate": 6.099491665783997e-08, "loss": 0.7204, "step": 26259 }, { "epoch": 0.951759631763981, "grad_norm": 2.3955350707566603, "learning_rate": 6.090355338844667e-08, "loss": 0.778, "step": 26260 }, { "epoch": 0.9517958754666377, "grad_norm": 2.1633017542307558, "learning_rate": 6.081225817681502e-08, "loss": 0.8264, "step": 26261 }, { "epoch": 0.9518321191692943, "grad_norm": 2.27305834554891, "learning_rate": 6.072103102420346e-08, "loss": 1.0015, "step": 26262 }, { "epoch": 0.951868362871951, "grad_norm": 2.18710628069102, "learning_rate": 6.062987193186875e-08, "loss": 0.9841, "step": 26263 }, { "epoch": 0.9519046065746076, "grad_norm": 2.456012742241485, "learning_rate": 6.053878090106769e-08, "loss": 0.8217, "step": 26264 }, { "epoch": 0.9519408502772643, "grad_norm": 2.4043949165189393, "learning_rate": 6.044775793305424e-08, "loss": 0.9017, "step": 26265 }, { "epoch": 0.951977093979921, "grad_norm": 2.272458886196737, "learning_rate": 6.035680302908353e-08, "loss": 1.0159, "step": 26266 }, { "epoch": 0.9520133376825777, "grad_norm": 2.3576063852841216, "learning_rate": 6.026591619040956e-08, "loss": 1.0296, "step": 26267 }, { "epoch": 0.9520495813852343, "grad_norm": 2.513408578990975, "learning_rate": 6.017509741828298e-08, "loss": 0.8255, "step": 26268 }, { "epoch": 0.952085825087891, "grad_norm": 2.4100030112861184, "learning_rate": 6.008434671395614e-08, "loss": 0.8432, "step": 26269 }, { "epoch": 0.9521220687905476, "grad_norm": 2.568635924926524, "learning_rate": 5.999366407867913e-08, "loss": 0.7967, "step": 26270 }, { "epoch": 0.9521583124932043, "grad_norm": 2.5646492102340583, "learning_rate": 5.990304951370207e-08, "loss": 0.7897, "step": 26271 }, { "epoch": 0.952194556195861, "grad_norm": 2.4501899167970205, "learning_rate": 5.981250302027342e-08, "loss": 0.8783, "step": 26272 }, { "epoch": 0.9522307998985177, "grad_norm": 2.2290037515776406, "learning_rate": 5.972202459964104e-08, "loss": 0.7274, "step": 26273 }, { "epoch": 0.9522670436011743, "grad_norm": 2.3622174309186525, "learning_rate": 5.963161425305065e-08, "loss": 0.8854, "step": 26274 }, { "epoch": 0.952303287303831, "grad_norm": 2.385118078961308, "learning_rate": 5.954127198174953e-08, "loss": 0.7135, "step": 26275 }, { "epoch": 0.9523395310064876, "grad_norm": 2.43148428420998, "learning_rate": 5.945099778698116e-08, "loss": 0.7164, "step": 26276 }, { "epoch": 0.9523757747091443, "grad_norm": 2.326218078540509, "learning_rate": 5.936079166999065e-08, "loss": 0.8049, "step": 26277 }, { "epoch": 0.9524120184118009, "grad_norm": 2.271827795844041, "learning_rate": 5.927065363201978e-08, "loss": 1.0475, "step": 26278 }, { "epoch": 0.9524482621144577, "grad_norm": 2.5154874083486103, "learning_rate": 5.9180583674311456e-08, "loss": 0.9522, "step": 26279 }, { "epoch": 0.9524845058171143, "grad_norm": 2.459184718620206, "learning_rate": 5.909058179810745e-08, "loss": 0.9658, "step": 26280 }, { "epoch": 0.952520749519771, "grad_norm": 2.429960429552792, "learning_rate": 5.900064800464622e-08, "loss": 0.7735, "step": 26281 }, { "epoch": 0.9525569932224276, "grad_norm": 2.6808005372359704, "learning_rate": 5.891078229516789e-08, "loss": 0.9984, "step": 26282 }, { "epoch": 0.9525932369250842, "grad_norm": 2.8693587774190847, "learning_rate": 5.8820984670910355e-08, "loss": 1.0435, "step": 26283 }, { "epoch": 0.9526294806277409, "grad_norm": 2.724665650543821, "learning_rate": 5.8731255133112065e-08, "loss": 0.8602, "step": 26284 }, { "epoch": 0.9526657243303976, "grad_norm": 2.35336090674204, "learning_rate": 5.864159368300815e-08, "loss": 0.8414, "step": 26285 }, { "epoch": 0.9527019680330543, "grad_norm": 1.9733285687217665, "learning_rate": 5.8552000321834834e-08, "loss": 0.7426, "step": 26286 }, { "epoch": 0.9527382117357109, "grad_norm": 2.7557643602487305, "learning_rate": 5.8462475050826695e-08, "loss": 0.9908, "step": 26287 }, { "epoch": 0.9527744554383676, "grad_norm": 2.683396354656255, "learning_rate": 5.837301787121663e-08, "loss": 0.8865, "step": 26288 }, { "epoch": 0.9528106991410242, "grad_norm": 2.4080211028050154, "learning_rate": 5.828362878423866e-08, "loss": 0.8015, "step": 26289 }, { "epoch": 0.9528469428436809, "grad_norm": 2.2385836246316333, "learning_rate": 5.819430779112234e-08, "loss": 0.6854, "step": 26290 }, { "epoch": 0.9528831865463375, "grad_norm": 2.643161794064095, "learning_rate": 5.810505489310059e-08, "loss": 0.8917, "step": 26291 }, { "epoch": 0.9529194302489943, "grad_norm": 2.566533054793757, "learning_rate": 5.801587009140242e-08, "loss": 0.7194, "step": 26292 }, { "epoch": 0.9529556739516509, "grad_norm": 2.791808857707776, "learning_rate": 5.792675338725629e-08, "loss": 0.8896, "step": 26293 }, { "epoch": 0.9529919176543076, "grad_norm": 2.1357161394820654, "learning_rate": 5.783770478189066e-08, "loss": 0.8247, "step": 26294 }, { "epoch": 0.9530281613569642, "grad_norm": 2.5793545987639317, "learning_rate": 5.774872427653344e-08, "loss": 0.8428, "step": 26295 }, { "epoch": 0.9530644050596209, "grad_norm": 2.0719576671626867, "learning_rate": 5.7659811872408656e-08, "loss": 0.8927, "step": 26296 }, { "epoch": 0.9531006487622775, "grad_norm": 2.241036618800335, "learning_rate": 5.7570967570743094e-08, "loss": 0.7568, "step": 26297 }, { "epoch": 0.9531368924649343, "grad_norm": 2.4208148726820395, "learning_rate": 5.7482191372760787e-08, "loss": 0.8033, "step": 26298 }, { "epoch": 0.9531731361675909, "grad_norm": 2.4945549096253257, "learning_rate": 5.7393483279684636e-08, "loss": 0.92, "step": 26299 }, { "epoch": 0.9532093798702476, "grad_norm": 2.0650110920118947, "learning_rate": 5.7304843292737e-08, "loss": 0.7146, "step": 26300 }, { "epoch": 0.9532456235729042, "grad_norm": 2.4696962948452956, "learning_rate": 5.721627141313913e-08, "loss": 0.8018, "step": 26301 }, { "epoch": 0.9532818672755609, "grad_norm": 2.605844342604927, "learning_rate": 5.7127767642112254e-08, "loss": 0.7966, "step": 26302 }, { "epoch": 0.9533181109782175, "grad_norm": 2.3656111562543676, "learning_rate": 5.7039331980874855e-08, "loss": 1.0144, "step": 26303 }, { "epoch": 0.9533543546808741, "grad_norm": 2.6214873591502403, "learning_rate": 5.695096443064652e-08, "loss": 1.0686, "step": 26304 }, { "epoch": 0.9533905983835309, "grad_norm": 2.0034243826618776, "learning_rate": 5.6862664992644035e-08, "loss": 0.9116, "step": 26305 }, { "epoch": 0.9534268420861876, "grad_norm": 2.5494395225913755, "learning_rate": 5.6774433668084775e-08, "loss": 0.9864, "step": 26306 }, { "epoch": 0.9534630857888442, "grad_norm": 2.479712085291246, "learning_rate": 5.668627045818442e-08, "loss": 0.9276, "step": 26307 }, { "epoch": 0.9534993294915008, "grad_norm": 2.40104146910795, "learning_rate": 5.659817536415757e-08, "loss": 0.9282, "step": 26308 }, { "epoch": 0.9535355731941575, "grad_norm": 2.3515426062168507, "learning_rate": 5.6510148387217686e-08, "loss": 0.7669, "step": 26309 }, { "epoch": 0.9535718168968141, "grad_norm": 2.3557305928034986, "learning_rate": 5.642218952857825e-08, "loss": 0.8156, "step": 26310 }, { "epoch": 0.9536080605994709, "grad_norm": 2.2411169492807046, "learning_rate": 5.633429878945218e-08, "loss": 0.784, "step": 26311 }, { "epoch": 0.9536443043021275, "grad_norm": 2.2297596112802034, "learning_rate": 5.62464761710485e-08, "loss": 0.8885, "step": 26312 }, { "epoch": 0.9536805480047842, "grad_norm": 2.6760482090406095, "learning_rate": 5.615872167457903e-08, "loss": 0.9086, "step": 26313 }, { "epoch": 0.9537167917074408, "grad_norm": 2.4838542596794597, "learning_rate": 5.607103530125224e-08, "loss": 0.7707, "step": 26314 }, { "epoch": 0.9537530354100975, "grad_norm": 2.168685460665507, "learning_rate": 5.598341705227717e-08, "loss": 0.8873, "step": 26315 }, { "epoch": 0.9537892791127541, "grad_norm": 2.6150582802941407, "learning_rate": 5.5895866928860066e-08, "loss": 0.9553, "step": 26316 }, { "epoch": 0.9538255228154108, "grad_norm": 2.584433038981099, "learning_rate": 5.580838493220775e-08, "loss": 1.082, "step": 26317 }, { "epoch": 0.9538617665180675, "grad_norm": 2.2189182430780687, "learning_rate": 5.572097106352647e-08, "loss": 1.0262, "step": 26318 }, { "epoch": 0.9538980102207242, "grad_norm": 2.0308194096095344, "learning_rate": 5.563362532401917e-08, "loss": 0.8661, "step": 26319 }, { "epoch": 0.9539342539233808, "grad_norm": 2.014889213205099, "learning_rate": 5.554634771489098e-08, "loss": 0.9616, "step": 26320 }, { "epoch": 0.9539704976260375, "grad_norm": 2.452333637574765, "learning_rate": 5.5459138237342614e-08, "loss": 0.806, "step": 26321 }, { "epoch": 0.9540067413286941, "grad_norm": 2.304309537261099, "learning_rate": 5.537199689257811e-08, "loss": 0.8371, "step": 26322 }, { "epoch": 0.9540429850313508, "grad_norm": 2.3475604901912424, "learning_rate": 5.52849236817965e-08, "loss": 0.953, "step": 26323 }, { "epoch": 0.9540792287340074, "grad_norm": 2.3732520021288774, "learning_rate": 5.5197918606198494e-08, "loss": 0.8414, "step": 26324 }, { "epoch": 0.9541154724366642, "grad_norm": 2.2116630846816756, "learning_rate": 5.511098166698259e-08, "loss": 0.8841, "step": 26325 }, { "epoch": 0.9541517161393208, "grad_norm": 2.399608275649764, "learning_rate": 5.50241128653467e-08, "loss": 0.863, "step": 26326 }, { "epoch": 0.9541879598419775, "grad_norm": 2.548473314477093, "learning_rate": 5.493731220248766e-08, "loss": 1.0914, "step": 26327 }, { "epoch": 0.9542242035446341, "grad_norm": 2.303022895234482, "learning_rate": 5.485057967960228e-08, "loss": 0.7988, "step": 26328 }, { "epoch": 0.9542604472472908, "grad_norm": 2.747669021647981, "learning_rate": 5.476391529788461e-08, "loss": 0.8993, "step": 26329 }, { "epoch": 0.9542966909499474, "grad_norm": 2.369175446217935, "learning_rate": 5.46773190585298e-08, "loss": 1.0256, "step": 26330 }, { "epoch": 0.9543329346526042, "grad_norm": 2.403450865060939, "learning_rate": 5.459079096273079e-08, "loss": 1.0475, "step": 26331 }, { "epoch": 0.9543691783552608, "grad_norm": 2.4374885327583122, "learning_rate": 5.45043310116794e-08, "loss": 0.7058, "step": 26332 }, { "epoch": 0.9544054220579175, "grad_norm": 2.5209021044592506, "learning_rate": 5.441793920656746e-08, "loss": 0.9875, "step": 26333 }, { "epoch": 0.9544416657605741, "grad_norm": 2.5614852718666, "learning_rate": 5.433161554858568e-08, "loss": 0.7662, "step": 26334 }, { "epoch": 0.9544779094632307, "grad_norm": 2.1660238700386754, "learning_rate": 5.424536003892311e-08, "loss": 0.9624, "step": 26335 }, { "epoch": 0.9545141531658874, "grad_norm": 2.413750225113798, "learning_rate": 5.415917267876825e-08, "loss": 0.9581, "step": 26336 }, { "epoch": 0.954550396868544, "grad_norm": 2.3974536402329623, "learning_rate": 5.407305346930847e-08, "loss": 0.9434, "step": 26337 }, { "epoch": 0.9545866405712008, "grad_norm": 2.342976594296491, "learning_rate": 5.398700241173172e-08, "loss": 0.8334, "step": 26338 }, { "epoch": 0.9546228842738574, "grad_norm": 2.547567165073644, "learning_rate": 5.390101950722204e-08, "loss": 0.9126, "step": 26339 }, { "epoch": 0.9546591279765141, "grad_norm": 2.4302403710427587, "learning_rate": 5.381510475696572e-08, "loss": 0.8994, "step": 26340 }, { "epoch": 0.9546953716791707, "grad_norm": 2.3498669991100503, "learning_rate": 5.372925816214458e-08, "loss": 0.8884, "step": 26341 }, { "epoch": 0.9547316153818274, "grad_norm": 2.3108561750552683, "learning_rate": 5.3643479723944346e-08, "loss": 0.8518, "step": 26342 }, { "epoch": 0.954767859084484, "grad_norm": 2.670556220665632, "learning_rate": 5.3557769443544625e-08, "loss": 1.0752, "step": 26343 }, { "epoch": 0.9548041027871408, "grad_norm": 2.5272221690231986, "learning_rate": 5.347212732212781e-08, "loss": 0.9399, "step": 26344 }, { "epoch": 0.9548403464897974, "grad_norm": 2.6863929668896613, "learning_rate": 5.338655336087295e-08, "loss": 0.8566, "step": 26345 }, { "epoch": 0.9548765901924541, "grad_norm": 2.239504328327832, "learning_rate": 5.3301047560960216e-08, "loss": 1.0606, "step": 26346 }, { "epoch": 0.9549128338951107, "grad_norm": 2.393841115555679, "learning_rate": 5.321560992356756e-08, "loss": 0.876, "step": 26347 }, { "epoch": 0.9549490775977674, "grad_norm": 2.589552636609584, "learning_rate": 5.313024044987181e-08, "loss": 0.9587, "step": 26348 }, { "epoch": 0.954985321300424, "grad_norm": 2.309988744472505, "learning_rate": 5.304493914104924e-08, "loss": 0.8617, "step": 26349 }, { "epoch": 0.9550215650030807, "grad_norm": 2.130244818264365, "learning_rate": 5.2959705998275604e-08, "loss": 0.6788, "step": 26350 }, { "epoch": 0.9550578087057374, "grad_norm": 2.413262812939218, "learning_rate": 5.2874541022726044e-08, "loss": 0.7219, "step": 26351 }, { "epoch": 0.9550940524083941, "grad_norm": 2.564599915387739, "learning_rate": 5.278944421557297e-08, "loss": 0.6734, "step": 26352 }, { "epoch": 0.9551302961110507, "grad_norm": 2.098890662626263, "learning_rate": 5.270441557798933e-08, "loss": 0.753, "step": 26353 }, { "epoch": 0.9551665398137074, "grad_norm": 2.1403954880824414, "learning_rate": 5.261945511114697e-08, "loss": 0.8599, "step": 26354 }, { "epoch": 0.955202783516364, "grad_norm": 2.5664529364411974, "learning_rate": 5.2534562816216604e-08, "loss": 0.9526, "step": 26355 }, { "epoch": 0.9552390272190207, "grad_norm": 2.4118178562412638, "learning_rate": 5.2449738694367315e-08, "loss": 0.7743, "step": 26356 }, { "epoch": 0.9552752709216774, "grad_norm": 2.0262460906081055, "learning_rate": 5.236498274676871e-08, "loss": 0.8299, "step": 26357 }, { "epoch": 0.9553115146243341, "grad_norm": 2.5987253347219696, "learning_rate": 5.22802949745882e-08, "loss": 0.9333, "step": 26358 }, { "epoch": 0.9553477583269907, "grad_norm": 2.3449171462325142, "learning_rate": 5.219567537899317e-08, "loss": 1.0022, "step": 26359 }, { "epoch": 0.9553840020296474, "grad_norm": 2.321766864682684, "learning_rate": 5.211112396114937e-08, "loss": 0.7982, "step": 26360 }, { "epoch": 0.955420245732304, "grad_norm": 2.1984028052539877, "learning_rate": 5.2026640722221965e-08, "loss": 0.8108, "step": 26361 }, { "epoch": 0.9554564894349606, "grad_norm": 2.260410378163907, "learning_rate": 5.1942225663375034e-08, "loss": 0.8146, "step": 26362 }, { "epoch": 0.9554927331376173, "grad_norm": 2.815864086457419, "learning_rate": 5.185787878577153e-08, "loss": 0.8194, "step": 26363 }, { "epoch": 0.955528976840274, "grad_norm": 2.16473148084083, "learning_rate": 5.1773600090574413e-08, "loss": 0.7721, "step": 26364 }, { "epoch": 0.9555652205429307, "grad_norm": 2.4087573247760634, "learning_rate": 5.168938957894387e-08, "loss": 0.8979, "step": 26365 }, { "epoch": 0.9556014642455873, "grad_norm": 2.3383152027373435, "learning_rate": 5.160524725204119e-08, "loss": 0.8373, "step": 26366 }, { "epoch": 0.955637707948244, "grad_norm": 2.2186060324024295, "learning_rate": 5.1521173111025447e-08, "loss": 0.8521, "step": 26367 }, { "epoch": 0.9556739516509006, "grad_norm": 2.5056076809958387, "learning_rate": 5.143716715705516e-08, "loss": 0.841, "step": 26368 }, { "epoch": 0.9557101953535573, "grad_norm": 2.3574734937761086, "learning_rate": 5.135322939128773e-08, "loss": 0.9753, "step": 26369 }, { "epoch": 0.955746439056214, "grad_norm": 1.8590570422216355, "learning_rate": 5.126935981488001e-08, "loss": 0.9306, "step": 26370 }, { "epoch": 0.9557826827588707, "grad_norm": 2.253202931269461, "learning_rate": 5.118555842898831e-08, "loss": 0.7518, "step": 26371 }, { "epoch": 0.9558189264615273, "grad_norm": 2.4423620629395817, "learning_rate": 5.1101825234766144e-08, "loss": 0.9096, "step": 26372 }, { "epoch": 0.955855170164184, "grad_norm": 2.4842935332544824, "learning_rate": 5.101816023336759e-08, "loss": 0.9979, "step": 26373 }, { "epoch": 0.9558914138668406, "grad_norm": 2.253706783792172, "learning_rate": 5.0934563425946157e-08, "loss": 0.8005, "step": 26374 }, { "epoch": 0.9559276575694973, "grad_norm": 2.596688669816957, "learning_rate": 5.0851034813653166e-08, "loss": 0.9623, "step": 26375 }, { "epoch": 0.9559639012721539, "grad_norm": 2.260706579708962, "learning_rate": 5.076757439763935e-08, "loss": 0.8507, "step": 26376 }, { "epoch": 0.9560001449748107, "grad_norm": 2.616904012100083, "learning_rate": 5.0684182179055464e-08, "loss": 0.8335, "step": 26377 }, { "epoch": 0.9560363886774673, "grad_norm": 2.483445217689951, "learning_rate": 5.060085815905058e-08, "loss": 0.8424, "step": 26378 }, { "epoch": 0.956072632380124, "grad_norm": 2.4315577961693937, "learning_rate": 5.0517602338772675e-08, "loss": 0.936, "step": 26379 }, { "epoch": 0.9561088760827806, "grad_norm": 2.2216736133922397, "learning_rate": 5.0434414719368604e-08, "loss": 0.7863, "step": 26380 }, { "epoch": 0.9561451197854373, "grad_norm": 2.851774075901104, "learning_rate": 5.035129530198468e-08, "loss": 0.8525, "step": 26381 }, { "epoch": 0.9561813634880939, "grad_norm": 2.8475178223315925, "learning_rate": 5.026824408776665e-08, "loss": 0.9751, "step": 26382 }, { "epoch": 0.9562176071907507, "grad_norm": 2.5037195116645456, "learning_rate": 5.0185261077858596e-08, "loss": 1.0326, "step": 26383 }, { "epoch": 0.9562538508934073, "grad_norm": 2.336504388274578, "learning_rate": 5.010234627340404e-08, "loss": 0.8135, "step": 26384 }, { "epoch": 0.956290094596064, "grad_norm": 2.2924091248639638, "learning_rate": 5.001949967554598e-08, "loss": 0.7182, "step": 26385 }, { "epoch": 0.9563263382987206, "grad_norm": 2.457094377753854, "learning_rate": 4.993672128542515e-08, "loss": 0.9217, "step": 26386 }, { "epoch": 0.9563625820013772, "grad_norm": 2.681150274507418, "learning_rate": 4.9854011104182867e-08, "loss": 0.9114, "step": 26387 }, { "epoch": 0.9563988257040339, "grad_norm": 2.483896298248026, "learning_rate": 4.977136913295822e-08, "loss": 0.9066, "step": 26388 }, { "epoch": 0.9564350694066905, "grad_norm": 2.3572673918884584, "learning_rate": 4.968879537289084e-08, "loss": 0.9848, "step": 26389 }, { "epoch": 0.9564713131093473, "grad_norm": 2.460991763889116, "learning_rate": 4.9606289825117615e-08, "loss": 0.9144, "step": 26390 }, { "epoch": 0.956507556812004, "grad_norm": 2.7296954561186144, "learning_rate": 4.952385249077596e-08, "loss": 0.9798, "step": 26391 }, { "epoch": 0.9565438005146606, "grad_norm": 2.439609049959849, "learning_rate": 4.944148337100163e-08, "loss": 0.8569, "step": 26392 }, { "epoch": 0.9565800442173172, "grad_norm": 2.419119516800401, "learning_rate": 4.935918246692928e-08, "loss": 0.7771, "step": 26393 }, { "epoch": 0.9566162879199739, "grad_norm": 2.2509250292381986, "learning_rate": 4.9276949779694105e-08, "loss": 0.8432, "step": 26394 }, { "epoch": 0.9566525316226305, "grad_norm": 2.3642630994177987, "learning_rate": 4.9194785310427986e-08, "loss": 0.7566, "step": 26395 }, { "epoch": 0.9566887753252872, "grad_norm": 2.5076477577239396, "learning_rate": 4.911268906026334e-08, "loss": 0.9065, "step": 26396 }, { "epoch": 0.9567250190279439, "grad_norm": 2.51917825970747, "learning_rate": 4.903066103033205e-08, "loss": 0.7727, "step": 26397 }, { "epoch": 0.9567612627306006, "grad_norm": 2.5349766023952127, "learning_rate": 4.8948701221764314e-08, "loss": 1.0324, "step": 26398 }, { "epoch": 0.9567975064332572, "grad_norm": 2.342727840059405, "learning_rate": 4.8866809635688683e-08, "loss": 0.8204, "step": 26399 }, { "epoch": 0.9568337501359139, "grad_norm": 2.3811618751820856, "learning_rate": 4.878498627323369e-08, "loss": 0.9565, "step": 26400 }, { "epoch": 0.9568699938385705, "grad_norm": 2.314639347054076, "learning_rate": 4.8703231135527886e-08, "loss": 0.8357, "step": 26401 }, { "epoch": 0.9569062375412272, "grad_norm": 2.6643316471398686, "learning_rate": 4.8621544223697024e-08, "loss": 0.9458, "step": 26402 }, { "epoch": 0.9569424812438839, "grad_norm": 2.099126849863569, "learning_rate": 4.853992553886633e-08, "loss": 0.9512, "step": 26403 }, { "epoch": 0.9569787249465406, "grad_norm": 2.4088409135337248, "learning_rate": 4.8458375082161005e-08, "loss": 0.8657, "step": 26404 }, { "epoch": 0.9570149686491972, "grad_norm": 2.409163458736882, "learning_rate": 4.837689285470515e-08, "loss": 1.0132, "step": 26405 }, { "epoch": 0.9570512123518539, "grad_norm": 2.4744023981024608, "learning_rate": 4.829547885762065e-08, "loss": 0.9738, "step": 26406 }, { "epoch": 0.9570874560545105, "grad_norm": 2.631228926512959, "learning_rate": 4.8214133092029935e-08, "loss": 0.95, "step": 26407 }, { "epoch": 0.9571236997571672, "grad_norm": 2.340635399366575, "learning_rate": 4.813285555905323e-08, "loss": 0.8727, "step": 26408 }, { "epoch": 0.9571599434598238, "grad_norm": 2.2506224721526267, "learning_rate": 4.8051646259811844e-08, "loss": 0.8353, "step": 26409 }, { "epoch": 0.9571961871624806, "grad_norm": 2.4189135336471477, "learning_rate": 4.797050519542323e-08, "loss": 0.7856, "step": 26410 }, { "epoch": 0.9572324308651372, "grad_norm": 2.273007391751323, "learning_rate": 4.7889432367007046e-08, "loss": 0.8767, "step": 26411 }, { "epoch": 0.9572686745677939, "grad_norm": 2.49826985168571, "learning_rate": 4.780842777567851e-08, "loss": 0.9868, "step": 26412 }, { "epoch": 0.9573049182704505, "grad_norm": 2.3884697767177667, "learning_rate": 4.772749142255506e-08, "loss": 0.9671, "step": 26413 }, { "epoch": 0.9573411619731071, "grad_norm": 2.3610158822340277, "learning_rate": 4.764662330875247e-08, "loss": 0.8987, "step": 26414 }, { "epoch": 0.9573774056757638, "grad_norm": 2.159508791262334, "learning_rate": 4.756582343538374e-08, "loss": 0.7615, "step": 26415 }, { "epoch": 0.9574136493784206, "grad_norm": 2.6860320589181255, "learning_rate": 4.7485091803562975e-08, "loss": 0.9405, "step": 26416 }, { "epoch": 0.9574498930810772, "grad_norm": 2.327262331955182, "learning_rate": 4.740442841440207e-08, "loss": 0.9358, "step": 26417 }, { "epoch": 0.9574861367837338, "grad_norm": 2.4263136431452277, "learning_rate": 4.7323833269013466e-08, "loss": 0.7786, "step": 26418 }, { "epoch": 0.9575223804863905, "grad_norm": 2.213949047106708, "learning_rate": 4.7243306368506825e-08, "loss": 0.9101, "step": 26419 }, { "epoch": 0.9575586241890471, "grad_norm": 2.2900134238012857, "learning_rate": 4.716284771399182e-08, "loss": 1.0106, "step": 26420 }, { "epoch": 0.9575948678917038, "grad_norm": 2.4525118999855, "learning_rate": 4.7082457306578124e-08, "loss": 0.9985, "step": 26421 }, { "epoch": 0.9576311115943604, "grad_norm": 2.6802215699565064, "learning_rate": 4.7002135147372066e-08, "loss": 0.9827, "step": 26422 }, { "epoch": 0.9576673552970172, "grad_norm": 2.5161007434235296, "learning_rate": 4.692188123748165e-08, "loss": 0.9826, "step": 26423 }, { "epoch": 0.9577035989996738, "grad_norm": 2.518548442792286, "learning_rate": 4.6841695578011547e-08, "loss": 0.994, "step": 26424 }, { "epoch": 0.9577398427023305, "grad_norm": 2.299643549533865, "learning_rate": 4.67615781700681e-08, "loss": 0.8215, "step": 26425 }, { "epoch": 0.9577760864049871, "grad_norm": 2.421902750726656, "learning_rate": 4.668152901475376e-08, "loss": 0.8514, "step": 26426 }, { "epoch": 0.9578123301076438, "grad_norm": 2.0926360710736778, "learning_rate": 4.660154811317319e-08, "loss": 0.9331, "step": 26427 }, { "epoch": 0.9578485738103004, "grad_norm": 2.7398129185755176, "learning_rate": 4.652163546642663e-08, "loss": 0.9221, "step": 26428 }, { "epoch": 0.9578848175129572, "grad_norm": 2.442657895766827, "learning_rate": 4.6441791075616527e-08, "loss": 0.911, "step": 26429 }, { "epoch": 0.9579210612156138, "grad_norm": 2.7405746768490777, "learning_rate": 4.636201494184256e-08, "loss": 0.8847, "step": 26430 }, { "epoch": 0.9579573049182705, "grad_norm": 2.406834025229573, "learning_rate": 4.62823070662044e-08, "loss": 0.9745, "step": 26431 }, { "epoch": 0.9579935486209271, "grad_norm": 2.1436068290652974, "learning_rate": 4.620266744980007e-08, "loss": 0.8417, "step": 26432 }, { "epoch": 0.9580297923235838, "grad_norm": 2.5470235493492535, "learning_rate": 4.612309609372701e-08, "loss": 1.2295, "step": 26433 }, { "epoch": 0.9580660360262404, "grad_norm": 2.480268017308249, "learning_rate": 4.604359299908212e-08, "loss": 0.852, "step": 26434 }, { "epoch": 0.958102279728897, "grad_norm": 2.279973465540015, "learning_rate": 4.5964158166959536e-08, "loss": 0.796, "step": 26435 }, { "epoch": 0.9581385234315538, "grad_norm": 2.5868755356812536, "learning_rate": 4.588479159845505e-08, "loss": 0.9156, "step": 26436 }, { "epoch": 0.9581747671342105, "grad_norm": 2.328689524171496, "learning_rate": 4.5805493294662216e-08, "loss": 0.7652, "step": 26437 }, { "epoch": 0.9582110108368671, "grad_norm": 2.20346484784534, "learning_rate": 4.572626325667351e-08, "loss": 0.8816, "step": 26438 }, { "epoch": 0.9582472545395238, "grad_norm": 2.08444239906445, "learning_rate": 4.564710148558082e-08, "loss": 0.7661, "step": 26439 }, { "epoch": 0.9582834982421804, "grad_norm": 2.576981153027471, "learning_rate": 4.55680079824744e-08, "loss": 0.987, "step": 26440 }, { "epoch": 0.958319741944837, "grad_norm": 2.2671689935193298, "learning_rate": 4.5488982748444485e-08, "loss": 1.0115, "step": 26441 }, { "epoch": 0.9583559856474938, "grad_norm": 2.1821705156414035, "learning_rate": 4.54100257845802e-08, "loss": 0.8726, "step": 26442 }, { "epoch": 0.9583922293501504, "grad_norm": 2.0651226201513864, "learning_rate": 4.5331137091969565e-08, "loss": 0.848, "step": 26443 }, { "epoch": 0.9584284730528071, "grad_norm": 2.2626068823762675, "learning_rate": 4.525231667169838e-08, "loss": 0.7999, "step": 26444 }, { "epoch": 0.9584647167554637, "grad_norm": 2.6312040627831874, "learning_rate": 4.517356452485466e-08, "loss": 0.9027, "step": 26445 }, { "epoch": 0.9585009604581204, "grad_norm": 2.406126068554807, "learning_rate": 4.509488065252254e-08, "loss": 0.8527, "step": 26446 }, { "epoch": 0.958537204160777, "grad_norm": 2.750108232318411, "learning_rate": 4.501626505578616e-08, "loss": 0.8967, "step": 26447 }, { "epoch": 0.9585734478634337, "grad_norm": 2.3396022173078457, "learning_rate": 4.4937717735728524e-08, "loss": 0.9346, "step": 26448 }, { "epoch": 0.9586096915660904, "grad_norm": 2.51959906920321, "learning_rate": 4.4859238693433225e-08, "loss": 0.9184, "step": 26449 }, { "epoch": 0.9586459352687471, "grad_norm": 2.4234352490929165, "learning_rate": 4.4780827929980505e-08, "loss": 0.8839, "step": 26450 }, { "epoch": 0.9586821789714037, "grad_norm": 2.1554452372570467, "learning_rate": 4.470248544645117e-08, "loss": 1.0134, "step": 26451 }, { "epoch": 0.9587184226740604, "grad_norm": 2.2595584999435645, "learning_rate": 4.462421124392491e-08, "loss": 0.8591, "step": 26452 }, { "epoch": 0.958754666376717, "grad_norm": 2.020979371850917, "learning_rate": 4.4546005323479744e-08, "loss": 0.9643, "step": 26453 }, { "epoch": 0.9587909100793737, "grad_norm": 2.4212282998419243, "learning_rate": 4.446786768619371e-08, "loss": 1.1354, "step": 26454 }, { "epoch": 0.9588271537820303, "grad_norm": 2.2509565511663125, "learning_rate": 4.4389798333143164e-08, "loss": 0.9478, "step": 26455 }, { "epoch": 0.9588633974846871, "grad_norm": 2.1418192737897654, "learning_rate": 4.431179726540502e-08, "loss": 0.8782, "step": 26456 }, { "epoch": 0.9588996411873437, "grad_norm": 2.4515152583649864, "learning_rate": 4.423386448405231e-08, "loss": 1.0073, "step": 26457 }, { "epoch": 0.9589358848900004, "grad_norm": 2.3569740347947175, "learning_rate": 4.415599999016029e-08, "loss": 0.9329, "step": 26458 }, { "epoch": 0.958972128592657, "grad_norm": 2.151587099013085, "learning_rate": 4.407820378480143e-08, "loss": 0.9084, "step": 26459 }, { "epoch": 0.9590083722953137, "grad_norm": 2.4111370482573684, "learning_rate": 4.400047586904766e-08, "loss": 0.8304, "step": 26460 }, { "epoch": 0.9590446159979703, "grad_norm": 2.526323559458668, "learning_rate": 4.3922816243969765e-08, "loss": 0.9549, "step": 26461 }, { "epoch": 0.9590808597006271, "grad_norm": 2.489989841350431, "learning_rate": 4.384522491063858e-08, "loss": 0.7453, "step": 26462 }, { "epoch": 0.9591171034032837, "grad_norm": 2.2267641201091752, "learning_rate": 4.376770187012214e-08, "loss": 0.846, "step": 26463 }, { "epoch": 0.9591533471059404, "grad_norm": 2.100107876917235, "learning_rate": 4.3690247123489574e-08, "loss": 0.8129, "step": 26464 }, { "epoch": 0.959189590808597, "grad_norm": 2.5008435254396497, "learning_rate": 4.361286067180836e-08, "loss": 0.9312, "step": 26465 }, { "epoch": 0.9592258345112536, "grad_norm": 2.6599626658821256, "learning_rate": 4.353554251614378e-08, "loss": 0.83, "step": 26466 }, { "epoch": 0.9592620782139103, "grad_norm": 2.429538601470283, "learning_rate": 4.3458292657562184e-08, "loss": 0.9593, "step": 26467 }, { "epoch": 0.9592983219165669, "grad_norm": 2.1459017189604968, "learning_rate": 4.338111109712773e-08, "loss": 0.8569, "step": 26468 }, { "epoch": 0.9593345656192237, "grad_norm": 2.1540077727869305, "learning_rate": 4.3303997835904e-08, "loss": 0.8694, "step": 26469 }, { "epoch": 0.9593708093218803, "grad_norm": 2.262944936921736, "learning_rate": 4.322695287495293e-08, "loss": 0.8406, "step": 26470 }, { "epoch": 0.959407053024537, "grad_norm": 2.3525789912860944, "learning_rate": 4.314997621533701e-08, "loss": 0.9501, "step": 26471 }, { "epoch": 0.9594432967271936, "grad_norm": 2.469175525701675, "learning_rate": 4.3073067858117045e-08, "loss": 0.7369, "step": 26472 }, { "epoch": 0.9594795404298503, "grad_norm": 2.3278659963618455, "learning_rate": 4.2996227804351645e-08, "loss": 0.8997, "step": 26473 }, { "epoch": 0.9595157841325069, "grad_norm": 1.9608506080708241, "learning_rate": 4.2919456055101063e-08, "loss": 0.8648, "step": 26474 }, { "epoch": 0.9595520278351637, "grad_norm": 2.5065260385378103, "learning_rate": 4.2842752611421676e-08, "loss": 0.8069, "step": 26475 }, { "epoch": 0.9595882715378203, "grad_norm": 2.7289251713232563, "learning_rate": 4.276611747437154e-08, "loss": 1.0038, "step": 26476 }, { "epoch": 0.959624515240477, "grad_norm": 2.408056891650742, "learning_rate": 4.268955064500646e-08, "loss": 1.0108, "step": 26477 }, { "epoch": 0.9596607589431336, "grad_norm": 2.577000562718151, "learning_rate": 4.261305212438116e-08, "loss": 0.941, "step": 26478 }, { "epoch": 0.9596970026457903, "grad_norm": 2.4418640668711853, "learning_rate": 4.253662191354979e-08, "loss": 0.9715, "step": 26479 }, { "epoch": 0.9597332463484469, "grad_norm": 2.4149347039211095, "learning_rate": 4.24602600135654e-08, "loss": 0.9537, "step": 26480 }, { "epoch": 0.9597694900511036, "grad_norm": 2.367120509701253, "learning_rate": 4.2383966425480485e-08, "loss": 1.0936, "step": 26481 }, { "epoch": 0.9598057337537603, "grad_norm": 2.4808296401522747, "learning_rate": 4.2307741150346413e-08, "loss": 0.8554, "step": 26482 }, { "epoch": 0.959841977456417, "grad_norm": 2.4343670666137682, "learning_rate": 4.2231584189212917e-08, "loss": 0.9354, "step": 26483 }, { "epoch": 0.9598782211590736, "grad_norm": 2.593417630334014, "learning_rate": 4.2155495543130256e-08, "loss": 0.7602, "step": 26484 }, { "epoch": 0.9599144648617303, "grad_norm": 2.4887479474129606, "learning_rate": 4.207947521314648e-08, "loss": 0.9536, "step": 26485 }, { "epoch": 0.9599507085643869, "grad_norm": 2.6131164553730044, "learning_rate": 4.200352320030909e-08, "loss": 0.9545, "step": 26486 }, { "epoch": 0.9599869522670436, "grad_norm": 2.413166218876543, "learning_rate": 4.192763950566447e-08, "loss": 0.8958, "step": 26487 }, { "epoch": 0.9600231959697003, "grad_norm": 2.435323527721502, "learning_rate": 4.1851824130258454e-08, "loss": 0.8876, "step": 26488 }, { "epoch": 0.960059439672357, "grad_norm": 2.5226507874460165, "learning_rate": 4.177607707513576e-08, "loss": 1.1038, "step": 26489 }, { "epoch": 0.9600956833750136, "grad_norm": 2.3797808982066875, "learning_rate": 4.170039834133999e-08, "loss": 0.7225, "step": 26490 }, { "epoch": 0.9601319270776703, "grad_norm": 2.277447891741563, "learning_rate": 4.16247879299142e-08, "loss": 0.8154, "step": 26491 }, { "epoch": 0.9601681707803269, "grad_norm": 2.490389914933125, "learning_rate": 4.1549245841900344e-08, "loss": 0.8247, "step": 26492 }, { "epoch": 0.9602044144829835, "grad_norm": 2.2030269809283474, "learning_rate": 4.1473772078338694e-08, "loss": 0.8509, "step": 26493 }, { "epoch": 0.9602406581856402, "grad_norm": 2.26216576347029, "learning_rate": 4.139836664027008e-08, "loss": 1.095, "step": 26494 }, { "epoch": 0.960276901888297, "grad_norm": 2.429150769917975, "learning_rate": 4.132302952873202e-08, "loss": 0.877, "step": 26495 }, { "epoch": 0.9603131455909536, "grad_norm": 2.339576180631812, "learning_rate": 4.124776074476477e-08, "loss": 1.058, "step": 26496 }, { "epoch": 0.9603493892936102, "grad_norm": 2.521586227732553, "learning_rate": 4.11725602894042e-08, "loss": 0.8664, "step": 26497 }, { "epoch": 0.9603856329962669, "grad_norm": 2.349474178001387, "learning_rate": 4.1097428163686673e-08, "loss": 0.8453, "step": 26498 }, { "epoch": 0.9604218766989235, "grad_norm": 2.5833442979488397, "learning_rate": 4.1022364368647484e-08, "loss": 1.0085, "step": 26499 }, { "epoch": 0.9604581204015802, "grad_norm": 2.575104612020811, "learning_rate": 4.0947368905320806e-08, "loss": 0.7385, "step": 26500 }, { "epoch": 0.9604943641042369, "grad_norm": 2.425699853844671, "learning_rate": 4.087244177474081e-08, "loss": 0.9747, "step": 26501 }, { "epoch": 0.9605306078068936, "grad_norm": 2.5207889007567643, "learning_rate": 4.0797582977938896e-08, "loss": 0.9454, "step": 26502 }, { "epoch": 0.9605668515095502, "grad_norm": 2.2364030071889265, "learning_rate": 4.072279251594702e-08, "loss": 0.7644, "step": 26503 }, { "epoch": 0.9606030952122069, "grad_norm": 2.153418796340074, "learning_rate": 4.064807038979601e-08, "loss": 0.9756, "step": 26504 }, { "epoch": 0.9606393389148635, "grad_norm": 2.0368474562661207, "learning_rate": 4.057341660051561e-08, "loss": 0.6152, "step": 26505 }, { "epoch": 0.9606755826175202, "grad_norm": 2.16670010240844, "learning_rate": 4.049883114913389e-08, "loss": 0.762, "step": 26506 }, { "epoch": 0.9607118263201768, "grad_norm": 2.369597315661204, "learning_rate": 4.0424314036678346e-08, "loss": 0.9043, "step": 26507 }, { "epoch": 0.9607480700228336, "grad_norm": 2.1588730581700024, "learning_rate": 4.0349865264177057e-08, "loss": 0.877, "step": 26508 }, { "epoch": 0.9607843137254902, "grad_norm": 2.7326388876195633, "learning_rate": 4.027548483265475e-08, "loss": 1.0789, "step": 26509 }, { "epoch": 0.9608205574281469, "grad_norm": 2.138120291550801, "learning_rate": 4.020117274313673e-08, "loss": 0.8281, "step": 26510 }, { "epoch": 0.9608568011308035, "grad_norm": 2.4458243608101156, "learning_rate": 4.0126928996647165e-08, "loss": 0.9324, "step": 26511 }, { "epoch": 0.9608930448334602, "grad_norm": 2.381248165319397, "learning_rate": 4.005275359420857e-08, "loss": 0.8129, "step": 26512 }, { "epoch": 0.9609292885361168, "grad_norm": 2.016417042040905, "learning_rate": 3.997864653684347e-08, "loss": 0.8709, "step": 26513 }, { "epoch": 0.9609655322387736, "grad_norm": 2.5693295737871855, "learning_rate": 3.990460782557326e-08, "loss": 0.8212, "step": 26514 }, { "epoch": 0.9610017759414302, "grad_norm": 2.496530063488234, "learning_rate": 3.983063746141713e-08, "loss": 0.8042, "step": 26515 }, { "epoch": 0.9610380196440869, "grad_norm": 2.501075875614237, "learning_rate": 3.9756735445395376e-08, "loss": 0.8487, "step": 26516 }, { "epoch": 0.9610742633467435, "grad_norm": 2.4213942155232613, "learning_rate": 3.968290177852607e-08, "loss": 0.9975, "step": 26517 }, { "epoch": 0.9611105070494002, "grad_norm": 2.629646411537965, "learning_rate": 3.9609136461826184e-08, "loss": 0.8971, "step": 26518 }, { "epoch": 0.9611467507520568, "grad_norm": 2.093641178735236, "learning_rate": 3.953543949631267e-08, "loss": 0.8082, "step": 26519 }, { "epoch": 0.9611829944547134, "grad_norm": 2.590331600632079, "learning_rate": 3.946181088300083e-08, "loss": 0.8708, "step": 26520 }, { "epoch": 0.9612192381573702, "grad_norm": 1.9243684939233907, "learning_rate": 3.9388250622904855e-08, "loss": 0.7896, "step": 26521 }, { "epoch": 0.9612554818600269, "grad_norm": 2.082530338719125, "learning_rate": 3.9314758717038934e-08, "loss": 1.055, "step": 26522 }, { "epoch": 0.9612917255626835, "grad_norm": 2.0254285693250442, "learning_rate": 3.9241335166415595e-08, "loss": 0.6794, "step": 26523 }, { "epoch": 0.9613279692653401, "grad_norm": 2.2820267043106646, "learning_rate": 3.916797997204624e-08, "loss": 0.7533, "step": 26524 }, { "epoch": 0.9613642129679968, "grad_norm": 2.5653901445141205, "learning_rate": 3.90946931349423e-08, "loss": 0.9369, "step": 26525 }, { "epoch": 0.9614004566706534, "grad_norm": 2.771693581834975, "learning_rate": 3.9021474656112947e-08, "loss": 0.8757, "step": 26526 }, { "epoch": 0.9614367003733101, "grad_norm": 2.4528821225552067, "learning_rate": 3.8948324536567383e-08, "loss": 0.7948, "step": 26527 }, { "epoch": 0.9614729440759668, "grad_norm": 2.1318249144361676, "learning_rate": 3.887524277731369e-08, "loss": 0.8371, "step": 26528 }, { "epoch": 0.9615091877786235, "grad_norm": 2.5302802070248656, "learning_rate": 3.880222937935885e-08, "loss": 0.9354, "step": 26529 }, { "epoch": 0.9615454314812801, "grad_norm": 2.46555554681473, "learning_rate": 3.8729284343708705e-08, "loss": 0.8894, "step": 26530 }, { "epoch": 0.9615816751839368, "grad_norm": 2.640960375254759, "learning_rate": 3.865640767136802e-08, "loss": 0.9087, "step": 26531 }, { "epoch": 0.9616179188865934, "grad_norm": 2.0901766738484393, "learning_rate": 3.8583599363342106e-08, "loss": 0.7704, "step": 26532 }, { "epoch": 0.9616541625892501, "grad_norm": 2.253016039090002, "learning_rate": 3.851085942063348e-08, "loss": 0.7744, "step": 26533 }, { "epoch": 0.9616904062919068, "grad_norm": 2.5222963701470613, "learning_rate": 3.843818784424469e-08, "loss": 0.834, "step": 26534 }, { "epoch": 0.9617266499945635, "grad_norm": 2.4539715590295734, "learning_rate": 3.836558463517659e-08, "loss": 0.8258, "step": 26535 }, { "epoch": 0.9617628936972201, "grad_norm": 2.5681932149218922, "learning_rate": 3.82930497944306e-08, "loss": 0.9108, "step": 26536 }, { "epoch": 0.9617991373998768, "grad_norm": 2.604335378448105, "learning_rate": 3.822058332300538e-08, "loss": 0.8382, "step": 26537 }, { "epoch": 0.9618353811025334, "grad_norm": 2.216076501482332, "learning_rate": 3.8148185221900114e-08, "loss": 0.7303, "step": 26538 }, { "epoch": 0.9618716248051901, "grad_norm": 2.4099420812160424, "learning_rate": 3.807585549211179e-08, "loss": 0.7611, "step": 26539 }, { "epoch": 0.9619078685078467, "grad_norm": 2.355019247871098, "learning_rate": 3.800359413463684e-08, "loss": 1.1169, "step": 26540 }, { "epoch": 0.9619441122105035, "grad_norm": 2.4253444887722213, "learning_rate": 3.793140115047222e-08, "loss": 0.9764, "step": 26541 }, { "epoch": 0.9619803559131601, "grad_norm": 2.3851978023523817, "learning_rate": 3.785927654061105e-08, "loss": 0.8796, "step": 26542 }, { "epoch": 0.9620165996158168, "grad_norm": 2.512308859363397, "learning_rate": 3.7787220306048644e-08, "loss": 0.8738, "step": 26543 }, { "epoch": 0.9620528433184734, "grad_norm": 2.3419873180947306, "learning_rate": 3.771523244777697e-08, "loss": 1.0066, "step": 26544 }, { "epoch": 0.96208908702113, "grad_norm": 2.3751462851132437, "learning_rate": 3.7643312966789134e-08, "loss": 0.941, "step": 26545 }, { "epoch": 0.9621253307237867, "grad_norm": 2.3177035631741054, "learning_rate": 3.757146186407434e-08, "loss": 1.1542, "step": 26546 }, { "epoch": 0.9621615744264435, "grad_norm": 2.4254118910779163, "learning_rate": 3.7499679140624025e-08, "loss": 0.9568, "step": 26547 }, { "epoch": 0.9621978181291001, "grad_norm": 2.264910475939865, "learning_rate": 3.742796479742683e-08, "loss": 0.9258, "step": 26548 }, { "epoch": 0.9622340618317567, "grad_norm": 2.0859186830036553, "learning_rate": 3.7356318835470864e-08, "loss": 1.0466, "step": 26549 }, { "epoch": 0.9622703055344134, "grad_norm": 2.3688165277713047, "learning_rate": 3.7284741255743665e-08, "loss": 0.7933, "step": 26550 }, { "epoch": 0.96230654923707, "grad_norm": 2.6139300008123625, "learning_rate": 3.7213232059231106e-08, "loss": 0.9372, "step": 26551 }, { "epoch": 0.9623427929397267, "grad_norm": 2.8565260293627373, "learning_rate": 3.714179124691963e-08, "loss": 0.8623, "step": 26552 }, { "epoch": 0.9623790366423833, "grad_norm": 2.119429365968085, "learning_rate": 3.707041881979234e-08, "loss": 0.787, "step": 26553 }, { "epoch": 0.9624152803450401, "grad_norm": 2.3023749414352808, "learning_rate": 3.699911477883289e-08, "loss": 1.0258, "step": 26554 }, { "epoch": 0.9624515240476967, "grad_norm": 2.2508027969389492, "learning_rate": 3.692787912502438e-08, "loss": 0.8678, "step": 26555 }, { "epoch": 0.9624877677503534, "grad_norm": 2.2767712708701433, "learning_rate": 3.685671185934825e-08, "loss": 0.9259, "step": 26556 }, { "epoch": 0.96252401145301, "grad_norm": 2.2883672748009487, "learning_rate": 3.6785612982784825e-08, "loss": 0.7982, "step": 26557 }, { "epoch": 0.9625602551556667, "grad_norm": 2.477385045065331, "learning_rate": 3.671458249631388e-08, "loss": 0.8601, "step": 26558 }, { "epoch": 0.9625964988583233, "grad_norm": 2.2272817551044333, "learning_rate": 3.664362040091463e-08, "loss": 0.8035, "step": 26559 }, { "epoch": 0.9626327425609801, "grad_norm": 2.15146523882658, "learning_rate": 3.657272669756406e-08, "loss": 0.6038, "step": 26560 }, { "epoch": 0.9626689862636367, "grad_norm": 2.2237891173822337, "learning_rate": 3.65019013872403e-08, "loss": 0.8682, "step": 26561 }, { "epoch": 0.9627052299662934, "grad_norm": 2.659307959102763, "learning_rate": 3.643114447091756e-08, "loss": 0.8722, "step": 26562 }, { "epoch": 0.96274147366895, "grad_norm": 2.0724796935866374, "learning_rate": 3.636045594957227e-08, "loss": 0.7837, "step": 26563 }, { "epoch": 0.9627777173716067, "grad_norm": 2.0451834906966524, "learning_rate": 3.628983582417811e-08, "loss": 0.8123, "step": 26564 }, { "epoch": 0.9628139610742633, "grad_norm": 2.26167503433374, "learning_rate": 3.6219284095708184e-08, "loss": 0.8401, "step": 26565 }, { "epoch": 0.96285020477692, "grad_norm": 2.4192240405440835, "learning_rate": 3.614880076513394e-08, "loss": 0.8264, "step": 26566 }, { "epoch": 0.9628864484795767, "grad_norm": 2.455121335368945, "learning_rate": 3.607838583342793e-08, "loss": 0.9551, "step": 26567 }, { "epoch": 0.9629226921822334, "grad_norm": 2.432167584697736, "learning_rate": 3.600803930155938e-08, "loss": 0.7767, "step": 26568 }, { "epoch": 0.96295893588489, "grad_norm": 2.39468917837481, "learning_rate": 3.593776117049752e-08, "loss": 0.914, "step": 26569 }, { "epoch": 0.9629951795875467, "grad_norm": 2.327492286107591, "learning_rate": 3.586755144121157e-08, "loss": 0.7922, "step": 26570 }, { "epoch": 0.9630314232902033, "grad_norm": 2.6428787033116787, "learning_rate": 3.5797410114667976e-08, "loss": 0.8717, "step": 26571 }, { "epoch": 0.96306766699286, "grad_norm": 2.973232914137338, "learning_rate": 3.5727337191834855e-08, "loss": 0.903, "step": 26572 }, { "epoch": 0.9631039106955167, "grad_norm": 2.406452111860706, "learning_rate": 3.565733267367588e-08, "loss": 0.9144, "step": 26573 }, { "epoch": 0.9631401543981734, "grad_norm": 2.5987192227255567, "learning_rate": 3.5587396561156396e-08, "loss": 0.9865, "step": 26574 }, { "epoch": 0.96317639810083, "grad_norm": 2.0238217283201827, "learning_rate": 3.5517528855240626e-08, "loss": 0.8284, "step": 26575 }, { "epoch": 0.9632126418034866, "grad_norm": 2.097663652729627, "learning_rate": 3.544772955689113e-08, "loss": 0.7453, "step": 26576 }, { "epoch": 0.9632488855061433, "grad_norm": 2.4720431436668555, "learning_rate": 3.537799866706937e-08, "loss": 0.8398, "step": 26577 }, { "epoch": 0.9632851292087999, "grad_norm": 2.4340446585429425, "learning_rate": 3.530833618673568e-08, "loss": 0.8297, "step": 26578 }, { "epoch": 0.9633213729114566, "grad_norm": 2.443887957630709, "learning_rate": 3.523874211685097e-08, "loss": 0.685, "step": 26579 }, { "epoch": 0.9633576166141133, "grad_norm": 2.6304572168556564, "learning_rate": 3.51692164583739e-08, "loss": 0.9341, "step": 26580 }, { "epoch": 0.96339386031677, "grad_norm": 2.628695666743898, "learning_rate": 3.509975921226261e-08, "loss": 0.8646, "step": 26581 }, { "epoch": 0.9634301040194266, "grad_norm": 2.476453298313948, "learning_rate": 3.5030370379472986e-08, "loss": 0.7121, "step": 26582 }, { "epoch": 0.9634663477220833, "grad_norm": 2.8063202005557297, "learning_rate": 3.496104996096317e-08, "loss": 0.9163, "step": 26583 }, { "epoch": 0.9635025914247399, "grad_norm": 2.257591553976527, "learning_rate": 3.489179795768682e-08, "loss": 1.0184, "step": 26584 }, { "epoch": 0.9635388351273966, "grad_norm": 2.267971071378228, "learning_rate": 3.4822614370599304e-08, "loss": 0.7285, "step": 26585 }, { "epoch": 0.9635750788300533, "grad_norm": 2.594192393709548, "learning_rate": 3.4753499200652626e-08, "loss": 1.0819, "step": 26586 }, { "epoch": 0.96361132253271, "grad_norm": 2.780580584840558, "learning_rate": 3.4684452448799924e-08, "loss": 0.814, "step": 26587 }, { "epoch": 0.9636475662353666, "grad_norm": 2.9146893635474065, "learning_rate": 3.461547411599264e-08, "loss": 0.8174, "step": 26588 }, { "epoch": 0.9636838099380233, "grad_norm": 2.4580659113885175, "learning_rate": 3.4546564203181146e-08, "loss": 0.773, "step": 26589 }, { "epoch": 0.9637200536406799, "grad_norm": 2.2267968594386787, "learning_rate": 3.447772271131522e-08, "loss": 0.6315, "step": 26590 }, { "epoch": 0.9637562973433366, "grad_norm": 2.4408870494567134, "learning_rate": 3.4408949641343e-08, "loss": 0.9165, "step": 26591 }, { "epoch": 0.9637925410459932, "grad_norm": 2.421089686535989, "learning_rate": 3.4340244994212625e-08, "loss": 0.9738, "step": 26592 }, { "epoch": 0.96382878474865, "grad_norm": 2.4376511481867955, "learning_rate": 3.427160877086999e-08, "loss": 0.8643, "step": 26593 }, { "epoch": 0.9638650284513066, "grad_norm": 2.2290473304108085, "learning_rate": 3.420304097226157e-08, "loss": 0.8404, "step": 26594 }, { "epoch": 0.9639012721539633, "grad_norm": 2.415546307553827, "learning_rate": 3.413454159933216e-08, "loss": 0.8982, "step": 26595 }, { "epoch": 0.9639375158566199, "grad_norm": 2.658773089916395, "learning_rate": 3.406611065302601e-08, "loss": 0.9462, "step": 26596 }, { "epoch": 0.9639737595592766, "grad_norm": 2.1939001668790694, "learning_rate": 3.399774813428513e-08, "loss": 0.9242, "step": 26597 }, { "epoch": 0.9640100032619332, "grad_norm": 2.1996122513075584, "learning_rate": 3.392945404405157e-08, "loss": 0.7242, "step": 26598 }, { "epoch": 0.9640462469645898, "grad_norm": 2.6460318263879015, "learning_rate": 3.386122838326733e-08, "loss": 0.7552, "step": 26599 }, { "epoch": 0.9640824906672466, "grad_norm": 2.4385763296547163, "learning_rate": 3.379307115287167e-08, "loss": 0.705, "step": 26600 }, { "epoch": 0.9641187343699033, "grad_norm": 2.3677531883700036, "learning_rate": 3.3724982353803836e-08, "loss": 0.942, "step": 26601 }, { "epoch": 0.9641549780725599, "grad_norm": 2.4000829646237545, "learning_rate": 3.365696198700252e-08, "loss": 0.8868, "step": 26602 }, { "epoch": 0.9641912217752165, "grad_norm": 4.468136149389627, "learning_rate": 3.358901005340476e-08, "loss": 0.8005, "step": 26603 }, { "epoch": 0.9642274654778732, "grad_norm": 2.0680254256949695, "learning_rate": 3.352112655394646e-08, "loss": 0.8428, "step": 26604 }, { "epoch": 0.9642637091805298, "grad_norm": 2.390408888908285, "learning_rate": 3.34533114895641e-08, "loss": 1.1214, "step": 26605 }, { "epoch": 0.9642999528831866, "grad_norm": 2.3182478859453095, "learning_rate": 3.3385564861190824e-08, "loss": 0.8338, "step": 26606 }, { "epoch": 0.9643361965858432, "grad_norm": 2.3272838705880843, "learning_rate": 3.331788666976088e-08, "loss": 0.8239, "step": 26607 }, { "epoch": 0.9643724402884999, "grad_norm": 2.3687446602582094, "learning_rate": 3.325027691620686e-08, "loss": 1.0202, "step": 26608 }, { "epoch": 0.9644086839911565, "grad_norm": 2.3240286578143383, "learning_rate": 3.3182735601460234e-08, "loss": 0.8889, "step": 26609 }, { "epoch": 0.9644449276938132, "grad_norm": 2.535055407165666, "learning_rate": 3.311526272645138e-08, "loss": 0.8857, "step": 26610 }, { "epoch": 0.9644811713964698, "grad_norm": 2.410359318645343, "learning_rate": 3.3047858292110105e-08, "loss": 1.0866, "step": 26611 }, { "epoch": 0.9645174150991265, "grad_norm": 2.2047379844901873, "learning_rate": 3.2980522299366215e-08, "loss": 0.8116, "step": 26612 }, { "epoch": 0.9645536588017832, "grad_norm": 2.354156146448434, "learning_rate": 3.2913254749146193e-08, "loss": 1.0656, "step": 26613 }, { "epoch": 0.9645899025044399, "grad_norm": 2.292576002682311, "learning_rate": 3.284605564237764e-08, "loss": 0.876, "step": 26614 }, { "epoch": 0.9646261462070965, "grad_norm": 2.575691825161927, "learning_rate": 3.2778924979987025e-08, "loss": 1.0345, "step": 26615 }, { "epoch": 0.9646623899097532, "grad_norm": 2.4502598618421723, "learning_rate": 3.271186276289806e-08, "loss": 0.8576, "step": 26616 }, { "epoch": 0.9646986336124098, "grad_norm": 2.521521763029864, "learning_rate": 3.264486899203556e-08, "loss": 0.8048, "step": 26617 }, { "epoch": 0.9647348773150665, "grad_norm": 2.285224077905037, "learning_rate": 3.2577943668322674e-08, "loss": 0.8565, "step": 26618 }, { "epoch": 0.9647711210177232, "grad_norm": 2.551923867476529, "learning_rate": 3.2511086792681444e-08, "loss": 0.9288, "step": 26619 }, { "epoch": 0.9648073647203799, "grad_norm": 2.424213323532481, "learning_rate": 3.2444298366033355e-08, "loss": 0.9892, "step": 26620 }, { "epoch": 0.9648436084230365, "grad_norm": 2.45738528104715, "learning_rate": 3.237757838929878e-08, "loss": 0.9159, "step": 26621 }, { "epoch": 0.9648798521256932, "grad_norm": 2.6931174488562637, "learning_rate": 3.231092686339643e-08, "loss": 0.8955, "step": 26622 }, { "epoch": 0.9649160958283498, "grad_norm": 2.342642541897177, "learning_rate": 3.224434378924501e-08, "loss": 0.9091, "step": 26623 }, { "epoch": 0.9649523395310065, "grad_norm": 2.3442418102345344, "learning_rate": 3.2177829167762684e-08, "loss": 0.8027, "step": 26624 }, { "epoch": 0.9649885832336631, "grad_norm": 2.244329963616829, "learning_rate": 3.211138299986483e-08, "loss": 0.9094, "step": 26625 }, { "epoch": 0.9650248269363199, "grad_norm": 2.091816555459343, "learning_rate": 3.204500528646848e-08, "loss": 0.7254, "step": 26626 }, { "epoch": 0.9650610706389765, "grad_norm": 2.4206078483112825, "learning_rate": 3.19786960284868e-08, "loss": 0.9291, "step": 26627 }, { "epoch": 0.9650973143416332, "grad_norm": 2.2594724527383994, "learning_rate": 3.191245522683406e-08, "loss": 0.8774, "step": 26628 }, { "epoch": 0.9651335580442898, "grad_norm": 2.6632065846511916, "learning_rate": 3.1846282882423416e-08, "loss": 0.8285, "step": 26629 }, { "epoch": 0.9651698017469464, "grad_norm": 2.490690918715305, "learning_rate": 3.178017899616637e-08, "loss": 0.7637, "step": 26630 }, { "epoch": 0.9652060454496031, "grad_norm": 2.419437145357505, "learning_rate": 3.171414356897329e-08, "loss": 1.0104, "step": 26631 }, { "epoch": 0.9652422891522598, "grad_norm": 2.459079564686741, "learning_rate": 3.164817660175512e-08, "loss": 0.9215, "step": 26632 }, { "epoch": 0.9652785328549165, "grad_norm": 2.9307131613510085, "learning_rate": 3.158227809542003e-08, "loss": 1.0385, "step": 26633 }, { "epoch": 0.9653147765575731, "grad_norm": 2.481522070922683, "learning_rate": 3.151644805087617e-08, "loss": 0.8868, "step": 26634 }, { "epoch": 0.9653510202602298, "grad_norm": 2.494711894429075, "learning_rate": 3.1450686469031154e-08, "loss": 1.0823, "step": 26635 }, { "epoch": 0.9653872639628864, "grad_norm": 2.179983421734403, "learning_rate": 3.138499335079093e-08, "loss": 0.7905, "step": 26636 }, { "epoch": 0.9654235076655431, "grad_norm": 2.425982532787963, "learning_rate": 3.1319368697060313e-08, "loss": 0.7069, "step": 26637 }, { "epoch": 0.9654597513681997, "grad_norm": 2.4302972905169833, "learning_rate": 3.1253812508743595e-08, "loss": 0.7917, "step": 26638 }, { "epoch": 0.9654959950708565, "grad_norm": 2.666720238125125, "learning_rate": 3.118832478674505e-08, "loss": 0.8408, "step": 26639 }, { "epoch": 0.9655322387735131, "grad_norm": 2.356079875003741, "learning_rate": 3.112290553196562e-08, "loss": 1.0112, "step": 26640 }, { "epoch": 0.9655684824761698, "grad_norm": 2.573413505528904, "learning_rate": 3.105755474530792e-08, "loss": 0.8574, "step": 26641 }, { "epoch": 0.9656047261788264, "grad_norm": 2.218966557494663, "learning_rate": 3.099227242767178e-08, "loss": 0.8548, "step": 26642 }, { "epoch": 0.9656409698814831, "grad_norm": 2.414402729371422, "learning_rate": 3.0927058579957035e-08, "loss": 1.0147, "step": 26643 }, { "epoch": 0.9656772135841397, "grad_norm": 2.1717369591462097, "learning_rate": 3.0861913203061866e-08, "loss": 0.8297, "step": 26644 }, { "epoch": 0.9657134572867965, "grad_norm": 2.216550409921294, "learning_rate": 3.079683629788499e-08, "loss": 0.8145, "step": 26645 }, { "epoch": 0.9657497009894531, "grad_norm": 2.5471532498236753, "learning_rate": 3.0731827865321806e-08, "loss": 0.979, "step": 26646 }, { "epoch": 0.9657859446921098, "grad_norm": 2.308318192906909, "learning_rate": 3.066688790626937e-08, "loss": 0.7798, "step": 26647 }, { "epoch": 0.9658221883947664, "grad_norm": 2.698698964632446, "learning_rate": 3.060201642162142e-08, "loss": 0.8722, "step": 26648 }, { "epoch": 0.9658584320974231, "grad_norm": 2.2475888789768357, "learning_rate": 3.0537213412272226e-08, "loss": 0.7132, "step": 26649 }, { "epoch": 0.9658946758000797, "grad_norm": 2.2581630109476984, "learning_rate": 3.047247887911497e-08, "loss": 0.972, "step": 26650 }, { "epoch": 0.9659309195027364, "grad_norm": 2.3700014507231693, "learning_rate": 3.040781282304117e-08, "loss": 0.8573, "step": 26651 }, { "epoch": 0.9659671632053931, "grad_norm": 2.5015209969977423, "learning_rate": 3.034321524494288e-08, "loss": 0.9188, "step": 26652 }, { "epoch": 0.9660034069080498, "grad_norm": 2.385469977982654, "learning_rate": 3.027868614570883e-08, "loss": 0.8809, "step": 26653 }, { "epoch": 0.9660396506107064, "grad_norm": 2.2513902741266927, "learning_rate": 3.021422552622943e-08, "loss": 0.791, "step": 26654 }, { "epoch": 0.966075894313363, "grad_norm": 2.4104586403628137, "learning_rate": 3.0149833387392304e-08, "loss": 0.9133, "step": 26655 }, { "epoch": 0.9661121380160197, "grad_norm": 1.9816471880104636, "learning_rate": 3.0085509730084506e-08, "loss": 0.8292, "step": 26656 }, { "epoch": 0.9661483817186763, "grad_norm": 2.247609323125149, "learning_rate": 3.002125455519256e-08, "loss": 0.8884, "step": 26657 }, { "epoch": 0.9661846254213331, "grad_norm": 2.08131757328039, "learning_rate": 2.9957067863601864e-08, "loss": 0.8436, "step": 26658 }, { "epoch": 0.9662208691239897, "grad_norm": 2.4950779702675403, "learning_rate": 2.989294965619782e-08, "loss": 0.8041, "step": 26659 }, { "epoch": 0.9662571128266464, "grad_norm": 2.5695723210623647, "learning_rate": 2.982889993386251e-08, "loss": 1.0365, "step": 26660 }, { "epoch": 0.966293356529303, "grad_norm": 2.600050850317327, "learning_rate": 2.9764918697479105e-08, "loss": 0.8576, "step": 26661 }, { "epoch": 0.9663296002319597, "grad_norm": 2.2572079200400035, "learning_rate": 2.970100594792913e-08, "loss": 0.7702, "step": 26662 }, { "epoch": 0.9663658439346163, "grad_norm": 2.3119066445173635, "learning_rate": 2.9637161686094094e-08, "loss": 0.9185, "step": 26663 }, { "epoch": 0.966402087637273, "grad_norm": 2.3909253672787263, "learning_rate": 2.957338591285219e-08, "loss": 0.8869, "step": 26664 }, { "epoch": 0.9664383313399297, "grad_norm": 2.479568596868105, "learning_rate": 2.950967862908327e-08, "loss": 0.9052, "step": 26665 }, { "epoch": 0.9664745750425864, "grad_norm": 2.4976867495700747, "learning_rate": 2.9446039835664965e-08, "loss": 0.8958, "step": 26666 }, { "epoch": 0.966510818745243, "grad_norm": 2.3875732421536235, "learning_rate": 2.938246953347379e-08, "loss": 0.8464, "step": 26667 }, { "epoch": 0.9665470624478997, "grad_norm": 2.4926477381464447, "learning_rate": 2.9318967723386825e-08, "loss": 0.7474, "step": 26668 }, { "epoch": 0.9665833061505563, "grad_norm": 2.467249664130816, "learning_rate": 2.9255534406277263e-08, "loss": 0.9987, "step": 26669 }, { "epoch": 0.966619549853213, "grad_norm": 1.8756595848124784, "learning_rate": 2.9192169583021068e-08, "loss": 0.7138, "step": 26670 }, { "epoch": 0.9666557935558696, "grad_norm": 2.3028742006272034, "learning_rate": 2.912887325449032e-08, "loss": 0.864, "step": 26671 }, { "epoch": 0.9666920372585264, "grad_norm": 2.3841309429792292, "learning_rate": 2.9065645421557097e-08, "loss": 0.9714, "step": 26672 }, { "epoch": 0.966728280961183, "grad_norm": 2.596828824068824, "learning_rate": 2.9002486085093486e-08, "loss": 0.8145, "step": 26673 }, { "epoch": 0.9667645246638397, "grad_norm": 2.4003823861095315, "learning_rate": 2.893939524596878e-08, "loss": 0.7565, "step": 26674 }, { "epoch": 0.9668007683664963, "grad_norm": 2.418528704686214, "learning_rate": 2.8876372905053407e-08, "loss": 0.8902, "step": 26675 }, { "epoch": 0.966837012069153, "grad_norm": 2.527665840680461, "learning_rate": 2.8813419063214442e-08, "loss": 0.9223, "step": 26676 }, { "epoch": 0.9668732557718096, "grad_norm": 2.4436507189421044, "learning_rate": 2.875053372132064e-08, "loss": 0.9095, "step": 26677 }, { "epoch": 0.9669094994744664, "grad_norm": 2.312785010239062, "learning_rate": 2.8687716880237414e-08, "loss": 0.7333, "step": 26678 }, { "epoch": 0.966945743177123, "grad_norm": 2.2604496796498106, "learning_rate": 2.86249685408313e-08, "loss": 0.9721, "step": 26679 }, { "epoch": 0.9669819868797797, "grad_norm": 2.7331323465706236, "learning_rate": 2.85622887039666e-08, "loss": 0.9065, "step": 26680 }, { "epoch": 0.9670182305824363, "grad_norm": 2.5011398445757855, "learning_rate": 2.8499677370506518e-08, "loss": 0.7502, "step": 26681 }, { "epoch": 0.967054474285093, "grad_norm": 2.3099434583919516, "learning_rate": 2.8437134541314247e-08, "loss": 0.7181, "step": 26682 }, { "epoch": 0.9670907179877496, "grad_norm": 2.690461348807175, "learning_rate": 2.8374660217252435e-08, "loss": 0.893, "step": 26683 }, { "epoch": 0.9671269616904062, "grad_norm": 2.1334751443590045, "learning_rate": 2.8312254399179838e-08, "loss": 0.9881, "step": 26684 }, { "epoch": 0.967163205393063, "grad_norm": 2.486430097535689, "learning_rate": 2.8249917087958546e-08, "loss": 0.8223, "step": 26685 }, { "epoch": 0.9671994490957196, "grad_norm": 2.550695640459295, "learning_rate": 2.81876482844462e-08, "loss": 0.9164, "step": 26686 }, { "epoch": 0.9672356927983763, "grad_norm": 2.522092511560327, "learning_rate": 2.812544798950101e-08, "loss": 0.8227, "step": 26687 }, { "epoch": 0.9672719365010329, "grad_norm": 3.797098086373866, "learning_rate": 2.8063316203980618e-08, "loss": 1.0023, "step": 26688 }, { "epoch": 0.9673081802036896, "grad_norm": 2.2622336278514372, "learning_rate": 2.8001252928740453e-08, "loss": 0.7641, "step": 26689 }, { "epoch": 0.9673444239063462, "grad_norm": 2.4564496162077467, "learning_rate": 2.7939258164636496e-08, "loss": 0.7266, "step": 26690 }, { "epoch": 0.967380667609003, "grad_norm": 2.5165306809020342, "learning_rate": 2.7877331912521953e-08, "loss": 0.9145, "step": 26691 }, { "epoch": 0.9674169113116596, "grad_norm": 2.3184437604296235, "learning_rate": 2.7815474173251143e-08, "loss": 0.8696, "step": 26692 }, { "epoch": 0.9674531550143163, "grad_norm": 2.4389961069807247, "learning_rate": 2.775368494767616e-08, "loss": 0.8203, "step": 26693 }, { "epoch": 0.9674893987169729, "grad_norm": 2.2164801948444675, "learning_rate": 2.7691964236647995e-08, "loss": 0.8809, "step": 26694 }, { "epoch": 0.9675256424196296, "grad_norm": 2.549235597616041, "learning_rate": 2.7630312041017627e-08, "loss": 0.8258, "step": 26695 }, { "epoch": 0.9675618861222862, "grad_norm": 2.309542335278134, "learning_rate": 2.7568728361634377e-08, "loss": 0.6782, "step": 26696 }, { "epoch": 0.9675981298249429, "grad_norm": 2.773917906026145, "learning_rate": 2.7507213199346462e-08, "loss": 1.0347, "step": 26697 }, { "epoch": 0.9676343735275996, "grad_norm": 2.2589563101558605, "learning_rate": 2.7445766555002084e-08, "loss": 0.8803, "step": 26698 }, { "epoch": 0.9676706172302563, "grad_norm": 2.150603179348984, "learning_rate": 2.7384388429447796e-08, "loss": 0.8686, "step": 26699 }, { "epoch": 0.9677068609329129, "grad_norm": 2.2985126342925115, "learning_rate": 2.7323078823529025e-08, "loss": 1.0017, "step": 26700 }, { "epoch": 0.9677431046355696, "grad_norm": 2.1976187496056676, "learning_rate": 2.7261837738091213e-08, "loss": 0.7713, "step": 26701 }, { "epoch": 0.9677793483382262, "grad_norm": 2.551630569052256, "learning_rate": 2.720066517397757e-08, "loss": 1.1359, "step": 26702 }, { "epoch": 0.9678155920408829, "grad_norm": 2.413801130250364, "learning_rate": 2.7139561132031313e-08, "loss": 0.9181, "step": 26703 }, { "epoch": 0.9678518357435396, "grad_norm": 2.6618710146860023, "learning_rate": 2.7078525613094542e-08, "loss": 0.7837, "step": 26704 }, { "epoch": 0.9678880794461963, "grad_norm": 2.466835307239114, "learning_rate": 2.7017558618008254e-08, "loss": 0.8494, "step": 26705 }, { "epoch": 0.9679243231488529, "grad_norm": 2.409552394419739, "learning_rate": 2.6956660147612334e-08, "loss": 0.8404, "step": 26706 }, { "epoch": 0.9679605668515096, "grad_norm": 2.409769220132201, "learning_rate": 2.689583020274611e-08, "loss": 0.8578, "step": 26707 }, { "epoch": 0.9679968105541662, "grad_norm": 2.2813095238244028, "learning_rate": 2.6835068784247798e-08, "loss": 0.9571, "step": 26708 }, { "epoch": 0.9680330542568228, "grad_norm": 2.2192615048144106, "learning_rate": 2.677437589295395e-08, "loss": 0.9666, "step": 26709 }, { "epoch": 0.9680692979594795, "grad_norm": 2.1959418926186833, "learning_rate": 2.6713751529702238e-08, "loss": 0.9017, "step": 26710 }, { "epoch": 0.9681055416621362, "grad_norm": 2.4648507468894976, "learning_rate": 2.6653195695326984e-08, "loss": 0.9225, "step": 26711 }, { "epoch": 0.9681417853647929, "grad_norm": 2.1738219523606093, "learning_rate": 2.659270839066308e-08, "loss": 0.9449, "step": 26712 }, { "epoch": 0.9681780290674495, "grad_norm": 2.258641089963211, "learning_rate": 2.653228961654375e-08, "loss": 0.7821, "step": 26713 }, { "epoch": 0.9682142727701062, "grad_norm": 2.4071628314642797, "learning_rate": 2.647193937380166e-08, "loss": 0.8217, "step": 26714 }, { "epoch": 0.9682505164727628, "grad_norm": 2.2751922738351986, "learning_rate": 2.6411657663268364e-08, "loss": 0.9517, "step": 26715 }, { "epoch": 0.9682867601754195, "grad_norm": 2.378406799611099, "learning_rate": 2.6351444485773757e-08, "loss": 0.9464, "step": 26716 }, { "epoch": 0.9683230038780762, "grad_norm": 2.5288875120783385, "learning_rate": 2.6291299842149398e-08, "loss": 0.8327, "step": 26717 }, { "epoch": 0.9683592475807329, "grad_norm": 2.577507718080269, "learning_rate": 2.62312237332224e-08, "loss": 0.7966, "step": 26718 }, { "epoch": 0.9683954912833895, "grad_norm": 2.3155507547069507, "learning_rate": 2.6171216159820988e-08, "loss": 0.8162, "step": 26719 }, { "epoch": 0.9684317349860462, "grad_norm": 2.411767324567162, "learning_rate": 2.6111277122772284e-08, "loss": 0.886, "step": 26720 }, { "epoch": 0.9684679786887028, "grad_norm": 2.5758331402976014, "learning_rate": 2.6051406622901733e-08, "loss": 0.8403, "step": 26721 }, { "epoch": 0.9685042223913595, "grad_norm": 2.394741742915782, "learning_rate": 2.599160466103534e-08, "loss": 0.8022, "step": 26722 }, { "epoch": 0.9685404660940161, "grad_norm": 2.4522219866422255, "learning_rate": 2.5931871237996342e-08, "loss": 0.9102, "step": 26723 }, { "epoch": 0.9685767097966729, "grad_norm": 2.1958356142189164, "learning_rate": 2.587220635460741e-08, "loss": 0.7052, "step": 26724 }, { "epoch": 0.9686129534993295, "grad_norm": 2.6033322301761372, "learning_rate": 2.581261001169122e-08, "loss": 0.8967, "step": 26725 }, { "epoch": 0.9686491972019862, "grad_norm": 2.7035129187216587, "learning_rate": 2.5753082210069335e-08, "loss": 0.9529, "step": 26726 }, { "epoch": 0.9686854409046428, "grad_norm": 2.4278744123970215, "learning_rate": 2.569362295056166e-08, "loss": 0.9245, "step": 26727 }, { "epoch": 0.9687216846072995, "grad_norm": 2.342251342958067, "learning_rate": 2.5634232233987532e-08, "loss": 0.9399, "step": 26728 }, { "epoch": 0.9687579283099561, "grad_norm": 2.381880150440012, "learning_rate": 2.5574910061165192e-08, "loss": 0.8905, "step": 26729 }, { "epoch": 0.9687941720126128, "grad_norm": 2.730723203949451, "learning_rate": 2.551565643291176e-08, "loss": 1.0736, "step": 26730 }, { "epoch": 0.9688304157152695, "grad_norm": 2.373479911483664, "learning_rate": 2.545647135004492e-08, "loss": 0.792, "step": 26731 }, { "epoch": 0.9688666594179262, "grad_norm": 2.4679042151443316, "learning_rate": 2.539735481337846e-08, "loss": 0.8811, "step": 26732 }, { "epoch": 0.9689029031205828, "grad_norm": 2.456031626080923, "learning_rate": 2.5338306823728953e-08, "loss": 0.9985, "step": 26733 }, { "epoch": 0.9689391468232394, "grad_norm": 2.321310776651317, "learning_rate": 2.527932738190797e-08, "loss": 0.8782, "step": 26734 }, { "epoch": 0.9689753905258961, "grad_norm": 2.304439701994012, "learning_rate": 2.5220416488729858e-08, "loss": 0.8363, "step": 26735 }, { "epoch": 0.9690116342285527, "grad_norm": 2.333768937871579, "learning_rate": 2.5161574145005642e-08, "loss": 0.7828, "step": 26736 }, { "epoch": 0.9690478779312095, "grad_norm": 2.3414609727815754, "learning_rate": 2.510280035154633e-08, "loss": 1.066, "step": 26737 }, { "epoch": 0.9690841216338661, "grad_norm": 2.3838771286393805, "learning_rate": 2.504409510916128e-08, "loss": 1.0363, "step": 26738 }, { "epoch": 0.9691203653365228, "grad_norm": 2.4310236967121153, "learning_rate": 2.4985458418660403e-08, "loss": 0.8287, "step": 26739 }, { "epoch": 0.9691566090391794, "grad_norm": 2.3092718789150313, "learning_rate": 2.4926890280850824e-08, "loss": 0.9028, "step": 26740 }, { "epoch": 0.9691928527418361, "grad_norm": 2.211910736782376, "learning_rate": 2.486839069653968e-08, "loss": 0.8803, "step": 26741 }, { "epoch": 0.9692290964444927, "grad_norm": 2.4692604147110773, "learning_rate": 2.4809959666533543e-08, "loss": 0.9105, "step": 26742 }, { "epoch": 0.9692653401471494, "grad_norm": 2.31010577172117, "learning_rate": 2.4751597191637333e-08, "loss": 0.7337, "step": 26743 }, { "epoch": 0.9693015838498061, "grad_norm": 2.5380851260161617, "learning_rate": 2.4693303272654845e-08, "loss": 0.9773, "step": 26744 }, { "epoch": 0.9693378275524628, "grad_norm": 2.4255467340898536, "learning_rate": 2.4635077910389883e-08, "loss": 0.7371, "step": 26745 }, { "epoch": 0.9693740712551194, "grad_norm": 2.5095419961995487, "learning_rate": 2.457692110564458e-08, "loss": 0.9736, "step": 26746 }, { "epoch": 0.9694103149577761, "grad_norm": 2.5917306043660098, "learning_rate": 2.451883285922052e-08, "loss": 0.7555, "step": 26747 }, { "epoch": 0.9694465586604327, "grad_norm": 2.3913922470344042, "learning_rate": 2.446081317191762e-08, "loss": 0.95, "step": 26748 }, { "epoch": 0.9694828023630894, "grad_norm": 2.108545406295391, "learning_rate": 2.4402862044535235e-08, "loss": 0.8008, "step": 26749 }, { "epoch": 0.9695190460657461, "grad_norm": 2.291113358357015, "learning_rate": 2.4344979477872732e-08, "loss": 0.6829, "step": 26750 }, { "epoch": 0.9695552897684028, "grad_norm": 2.1648153869316578, "learning_rate": 2.428716547272725e-08, "loss": 0.9478, "step": 26751 }, { "epoch": 0.9695915334710594, "grad_norm": 2.266124660429909, "learning_rate": 2.4229420029894813e-08, "loss": 0.7852, "step": 26752 }, { "epoch": 0.9696277771737161, "grad_norm": 2.379134479065015, "learning_rate": 2.4171743150172566e-08, "loss": 0.8017, "step": 26753 }, { "epoch": 0.9696640208763727, "grad_norm": 2.4383027484908313, "learning_rate": 2.4114134834353764e-08, "loss": 1.0452, "step": 26754 }, { "epoch": 0.9697002645790294, "grad_norm": 1.9949328983556724, "learning_rate": 2.4056595083233324e-08, "loss": 0.7227, "step": 26755 }, { "epoch": 0.969736508281686, "grad_norm": 2.5659964813310494, "learning_rate": 2.3999123897602837e-08, "loss": 0.9968, "step": 26756 }, { "epoch": 0.9697727519843428, "grad_norm": 2.6335871542858, "learning_rate": 2.3941721278256114e-08, "loss": 0.8432, "step": 26757 }, { "epoch": 0.9698089956869994, "grad_norm": 2.6884856891288176, "learning_rate": 2.388438722598252e-08, "loss": 0.8808, "step": 26758 }, { "epoch": 0.969845239389656, "grad_norm": 2.569681444163195, "learning_rate": 2.3827121741572536e-08, "loss": 0.9229, "step": 26759 }, { "epoch": 0.9698814830923127, "grad_norm": 2.28873690518335, "learning_rate": 2.3769924825815526e-08, "loss": 0.8668, "step": 26760 }, { "epoch": 0.9699177267949693, "grad_norm": 2.7605177691692644, "learning_rate": 2.37127964794992e-08, "loss": 0.744, "step": 26761 }, { "epoch": 0.969953970497626, "grad_norm": 2.490615655774322, "learning_rate": 2.3655736703411257e-08, "loss": 0.9612, "step": 26762 }, { "epoch": 0.9699902142002828, "grad_norm": 2.498392494571374, "learning_rate": 2.359874549833774e-08, "loss": 0.9186, "step": 26763 }, { "epoch": 0.9700264579029394, "grad_norm": 2.183247553021875, "learning_rate": 2.3541822865063568e-08, "loss": 0.7056, "step": 26764 }, { "epoch": 0.970062701605596, "grad_norm": 2.3545465259145293, "learning_rate": 2.348496880437312e-08, "loss": 0.931, "step": 26765 }, { "epoch": 0.9700989453082527, "grad_norm": 2.5162323086868086, "learning_rate": 2.3428183317050767e-08, "loss": 0.8858, "step": 26766 }, { "epoch": 0.9701351890109093, "grad_norm": 2.4312236234427966, "learning_rate": 2.3371466403878108e-08, "loss": 0.7291, "step": 26767 }, { "epoch": 0.970171432713566, "grad_norm": 2.198431401142583, "learning_rate": 2.3314818065636734e-08, "loss": 1.0093, "step": 26768 }, { "epoch": 0.9702076764162226, "grad_norm": 2.784015893431531, "learning_rate": 2.325823830310714e-08, "loss": 1.0886, "step": 26769 }, { "epoch": 0.9702439201188794, "grad_norm": 2.416820428652327, "learning_rate": 2.3201727117069804e-08, "loss": 0.8882, "step": 26770 }, { "epoch": 0.970280163821536, "grad_norm": 2.2842983705251467, "learning_rate": 2.3145284508302444e-08, "loss": 0.7627, "step": 26771 }, { "epoch": 0.9703164075241927, "grad_norm": 2.536731992634616, "learning_rate": 2.3088910477582772e-08, "loss": 0.8226, "step": 26772 }, { "epoch": 0.9703526512268493, "grad_norm": 2.389415451580003, "learning_rate": 2.3032605025688493e-08, "loss": 0.8521, "step": 26773 }, { "epoch": 0.970388894929506, "grad_norm": 2.3133808270147433, "learning_rate": 2.29763681533951e-08, "loss": 0.8817, "step": 26774 }, { "epoch": 0.9704251386321626, "grad_norm": 2.5039532764371546, "learning_rate": 2.2920199861476977e-08, "loss": 0.8414, "step": 26775 }, { "epoch": 0.9704613823348194, "grad_norm": 2.3662507837943623, "learning_rate": 2.28641001507085e-08, "loss": 0.746, "step": 26776 }, { "epoch": 0.970497626037476, "grad_norm": 2.3526103801584695, "learning_rate": 2.2808069021862943e-08, "loss": 0.8963, "step": 26777 }, { "epoch": 0.9705338697401327, "grad_norm": 2.3242974972339843, "learning_rate": 2.2752106475711357e-08, "loss": 1.0289, "step": 26778 }, { "epoch": 0.9705701134427893, "grad_norm": 2.2113768846622506, "learning_rate": 2.269621251302645e-08, "loss": 0.9494, "step": 26779 }, { "epoch": 0.970606357145446, "grad_norm": 2.2689238578548263, "learning_rate": 2.264038713457706e-08, "loss": 0.8713, "step": 26780 }, { "epoch": 0.9706426008481026, "grad_norm": 2.2075855192855416, "learning_rate": 2.2584630341133117e-08, "loss": 0.9012, "step": 26781 }, { "epoch": 0.9706788445507593, "grad_norm": 2.496893746914051, "learning_rate": 2.2528942133462906e-08, "loss": 0.9507, "step": 26782 }, { "epoch": 0.970715088253416, "grad_norm": 2.5454641268078757, "learning_rate": 2.247332251233303e-08, "loss": 1.0488, "step": 26783 }, { "epoch": 0.9707513319560727, "grad_norm": 2.2777874660014654, "learning_rate": 2.2417771478510653e-08, "loss": 0.884, "step": 26784 }, { "epoch": 0.9707875756587293, "grad_norm": 2.458809643868807, "learning_rate": 2.2362289032761274e-08, "loss": 0.7427, "step": 26785 }, { "epoch": 0.970823819361386, "grad_norm": 2.2947874873772025, "learning_rate": 2.2306875175849287e-08, "loss": 0.803, "step": 26786 }, { "epoch": 0.9708600630640426, "grad_norm": 2.5578456086200654, "learning_rate": 2.225152990853796e-08, "loss": 0.9644, "step": 26787 }, { "epoch": 0.9708963067666992, "grad_norm": 2.0635456713872826, "learning_rate": 2.2196253231590025e-08, "loss": 0.8606, "step": 26788 }, { "epoch": 0.970932550469356, "grad_norm": 2.5115228851882927, "learning_rate": 2.2141045145767092e-08, "loss": 0.7922, "step": 26789 }, { "epoch": 0.9709687941720127, "grad_norm": 2.1730423588819523, "learning_rate": 2.208590565183022e-08, "loss": 0.9346, "step": 26790 }, { "epoch": 0.9710050378746693, "grad_norm": 2.2854118135291355, "learning_rate": 2.2030834750539355e-08, "loss": 0.8822, "step": 26791 }, { "epoch": 0.9710412815773259, "grad_norm": 2.004737315871997, "learning_rate": 2.1975832442652222e-08, "loss": 0.9603, "step": 26792 }, { "epoch": 0.9710775252799826, "grad_norm": 2.4447532112160895, "learning_rate": 2.192089872892822e-08, "loss": 0.8022, "step": 26793 }, { "epoch": 0.9711137689826392, "grad_norm": 2.4725258728741832, "learning_rate": 2.1866033610123406e-08, "loss": 0.9488, "step": 26794 }, { "epoch": 0.9711500126852959, "grad_norm": 2.4001247728897357, "learning_rate": 2.1811237086993843e-08, "loss": 0.9019, "step": 26795 }, { "epoch": 0.9711862563879526, "grad_norm": 2.167218217476562, "learning_rate": 2.175650916029448e-08, "loss": 0.6784, "step": 26796 }, { "epoch": 0.9712225000906093, "grad_norm": 2.2675797519646497, "learning_rate": 2.1701849830780274e-08, "loss": 0.6687, "step": 26797 }, { "epoch": 0.9712587437932659, "grad_norm": 2.61570537276518, "learning_rate": 2.16472590992034e-08, "loss": 0.8606, "step": 26798 }, { "epoch": 0.9712949874959226, "grad_norm": 2.2572229713631313, "learning_rate": 2.1592736966316586e-08, "loss": 0.9285, "step": 26799 }, { "epoch": 0.9713312311985792, "grad_norm": 2.53824983504255, "learning_rate": 2.1538283432871454e-08, "loss": 0.7631, "step": 26800 }, { "epoch": 0.9713674749012359, "grad_norm": 2.672883261743204, "learning_rate": 2.1483898499617407e-08, "loss": 0.9812, "step": 26801 }, { "epoch": 0.9714037186038925, "grad_norm": 2.2725307250705087, "learning_rate": 2.1429582167304396e-08, "loss": 0.8832, "step": 26802 }, { "epoch": 0.9714399623065493, "grad_norm": 2.274242734053884, "learning_rate": 2.137533443668016e-08, "loss": 0.808, "step": 26803 }, { "epoch": 0.9714762060092059, "grad_norm": 2.2431973586038763, "learning_rate": 2.13211553084941e-08, "loss": 0.9314, "step": 26804 }, { "epoch": 0.9715124497118626, "grad_norm": 2.580881923706948, "learning_rate": 2.126704478349062e-08, "loss": 1.1022, "step": 26805 }, { "epoch": 0.9715486934145192, "grad_norm": 2.461112547874835, "learning_rate": 2.1213002862416343e-08, "loss": 0.7769, "step": 26806 }, { "epoch": 0.9715849371171759, "grad_norm": 2.169390812214771, "learning_rate": 2.115902954601623e-08, "loss": 0.6805, "step": 26807 }, { "epoch": 0.9716211808198325, "grad_norm": 2.4427285414738105, "learning_rate": 2.110512483503302e-08, "loss": 0.719, "step": 26808 }, { "epoch": 0.9716574245224893, "grad_norm": 2.5434544484949324, "learning_rate": 2.105128873021056e-08, "loss": 0.9879, "step": 26809 }, { "epoch": 0.9716936682251459, "grad_norm": 2.4549979163455022, "learning_rate": 2.099752123228993e-08, "loss": 0.8218, "step": 26810 }, { "epoch": 0.9717299119278026, "grad_norm": 2.1344877293549853, "learning_rate": 2.09438223420122e-08, "loss": 0.7174, "step": 26811 }, { "epoch": 0.9717661556304592, "grad_norm": 2.590797710520532, "learning_rate": 2.0890192060117332e-08, "loss": 0.6664, "step": 26812 }, { "epoch": 0.9718023993331159, "grad_norm": 2.6253196139530837, "learning_rate": 2.0836630387344737e-08, "loss": 0.9678, "step": 26813 }, { "epoch": 0.9718386430357725, "grad_norm": 2.3867305032687867, "learning_rate": 2.07831373244316e-08, "loss": 0.9856, "step": 26814 }, { "epoch": 0.9718748867384291, "grad_norm": 2.2724092797703346, "learning_rate": 2.072971287211567e-08, "loss": 0.8163, "step": 26815 }, { "epoch": 0.9719111304410859, "grad_norm": 2.4004967863446303, "learning_rate": 2.067635703113302e-08, "loss": 0.8422, "step": 26816 }, { "epoch": 0.9719473741437425, "grad_norm": 2.4078572761091084, "learning_rate": 2.0623069802218622e-08, "loss": 0.8742, "step": 26817 }, { "epoch": 0.9719836178463992, "grad_norm": 2.681413738419261, "learning_rate": 2.0569851186106883e-08, "loss": 0.9865, "step": 26818 }, { "epoch": 0.9720198615490558, "grad_norm": 2.4931999399188682, "learning_rate": 2.0516701183531107e-08, "loss": 0.9535, "step": 26819 }, { "epoch": 0.9720561052517125, "grad_norm": 2.4147699850936775, "learning_rate": 2.0463619795224043e-08, "loss": 0.9216, "step": 26820 }, { "epoch": 0.9720923489543691, "grad_norm": 2.526566733418284, "learning_rate": 2.0410607021916773e-08, "loss": 0.8745, "step": 26821 }, { "epoch": 0.9721285926570259, "grad_norm": 2.795666514603105, "learning_rate": 2.0357662864339267e-08, "loss": 0.8342, "step": 26822 }, { "epoch": 0.9721648363596825, "grad_norm": 2.2260464768137695, "learning_rate": 2.0304787323221496e-08, "loss": 0.8911, "step": 26823 }, { "epoch": 0.9722010800623392, "grad_norm": 2.112926351443375, "learning_rate": 2.0251980399292882e-08, "loss": 0.7523, "step": 26824 }, { "epoch": 0.9722373237649958, "grad_norm": 2.4619161375339695, "learning_rate": 2.0199242093280058e-08, "loss": 0.74, "step": 26825 }, { "epoch": 0.9722735674676525, "grad_norm": 2.558769964518255, "learning_rate": 2.014657240590967e-08, "loss": 0.8858, "step": 26826 }, { "epoch": 0.9723098111703091, "grad_norm": 2.335645968417271, "learning_rate": 2.0093971337907802e-08, "loss": 0.7741, "step": 26827 }, { "epoch": 0.9723460548729658, "grad_norm": 2.277918698234499, "learning_rate": 2.0041438889999433e-08, "loss": 0.8207, "step": 26828 }, { "epoch": 0.9723822985756225, "grad_norm": 2.2066879588613166, "learning_rate": 1.9988975062908422e-08, "loss": 0.8174, "step": 26829 }, { "epoch": 0.9724185422782792, "grad_norm": 2.558618772358114, "learning_rate": 1.993657985735753e-08, "loss": 0.9124, "step": 26830 }, { "epoch": 0.9724547859809358, "grad_norm": 2.2780289388258352, "learning_rate": 1.9884253274068398e-08, "loss": 0.9002, "step": 26831 }, { "epoch": 0.9724910296835925, "grad_norm": 2.379631458369924, "learning_rate": 1.9831995313762676e-08, "loss": 0.8772, "step": 26832 }, { "epoch": 0.9725272733862491, "grad_norm": 2.1722259564762667, "learning_rate": 1.9779805977159783e-08, "loss": 0.9404, "step": 26833 }, { "epoch": 0.9725635170889058, "grad_norm": 2.70033083180968, "learning_rate": 1.97276852649797e-08, "loss": 0.8418, "step": 26834 }, { "epoch": 0.9725997607915625, "grad_norm": 2.3782996525942064, "learning_rate": 1.9675633177939635e-08, "loss": 0.9204, "step": 26835 }, { "epoch": 0.9726360044942192, "grad_norm": 2.085209203051201, "learning_rate": 1.9623649716757343e-08, "loss": 0.7018, "step": 26836 }, { "epoch": 0.9726722481968758, "grad_norm": 2.52034747076191, "learning_rate": 1.9571734882149474e-08, "loss": 0.777, "step": 26837 }, { "epoch": 0.9727084918995325, "grad_norm": 2.5373970193744086, "learning_rate": 1.9519888674831013e-08, "loss": 0.9641, "step": 26838 }, { "epoch": 0.9727447356021891, "grad_norm": 2.035422568210206, "learning_rate": 1.9468111095516383e-08, "loss": 0.734, "step": 26839 }, { "epoch": 0.9727809793048457, "grad_norm": 1.9808672470477835, "learning_rate": 1.9416402144918912e-08, "loss": 0.7173, "step": 26840 }, { "epoch": 0.9728172230075024, "grad_norm": 2.5342597168296197, "learning_rate": 1.9364761823751355e-08, "loss": 0.8041, "step": 26841 }, { "epoch": 0.9728534667101592, "grad_norm": 2.2661377629163613, "learning_rate": 1.9313190132725368e-08, "loss": 0.8088, "step": 26842 }, { "epoch": 0.9728897104128158, "grad_norm": 2.029719508008864, "learning_rate": 1.926168707255094e-08, "loss": 0.7049, "step": 26843 }, { "epoch": 0.9729259541154724, "grad_norm": 2.3127941849698184, "learning_rate": 1.921025264393861e-08, "loss": 0.8693, "step": 26844 }, { "epoch": 0.9729621978181291, "grad_norm": 2.6675300230386387, "learning_rate": 1.9158886847596148e-08, "loss": 0.9064, "step": 26845 }, { "epoch": 0.9729984415207857, "grad_norm": 2.338711065226723, "learning_rate": 1.9107589684232986e-08, "loss": 0.7663, "step": 26846 }, { "epoch": 0.9730346852234424, "grad_norm": 2.4122328013593135, "learning_rate": 1.9056361154554116e-08, "loss": 0.8792, "step": 26847 }, { "epoch": 0.9730709289260991, "grad_norm": 2.465204770702677, "learning_rate": 1.9005201259266192e-08, "loss": 0.9147, "step": 26848 }, { "epoch": 0.9731071726287558, "grad_norm": 2.2952066219794203, "learning_rate": 1.895410999907421e-08, "loss": 0.9758, "step": 26849 }, { "epoch": 0.9731434163314124, "grad_norm": 2.503533492160827, "learning_rate": 1.8903087374682048e-08, "loss": 0.9058, "step": 26850 }, { "epoch": 0.9731796600340691, "grad_norm": 2.3453013270066823, "learning_rate": 1.8852133386793038e-08, "loss": 0.713, "step": 26851 }, { "epoch": 0.9732159037367257, "grad_norm": 2.3439012504398122, "learning_rate": 1.880124803610939e-08, "loss": 0.7207, "step": 26852 }, { "epoch": 0.9732521474393824, "grad_norm": 2.400151187949425, "learning_rate": 1.8750431323331654e-08, "loss": 0.9963, "step": 26853 }, { "epoch": 0.973288391142039, "grad_norm": 2.3072390221850787, "learning_rate": 1.869968324916094e-08, "loss": 0.9006, "step": 26854 }, { "epoch": 0.9733246348446958, "grad_norm": 2.537166490626268, "learning_rate": 1.864900381429502e-08, "loss": 0.8078, "step": 26855 }, { "epoch": 0.9733608785473524, "grad_norm": 2.419845044515899, "learning_rate": 1.8598393019433892e-08, "loss": 0.6327, "step": 26856 }, { "epoch": 0.9733971222500091, "grad_norm": 2.3616030656498355, "learning_rate": 1.854785086527422e-08, "loss": 0.7942, "step": 26857 }, { "epoch": 0.9734333659526657, "grad_norm": 2.4116678218073226, "learning_rate": 1.8497377352512667e-08, "loss": 1.1901, "step": 26858 }, { "epoch": 0.9734696096553224, "grad_norm": 2.5796808598892627, "learning_rate": 1.844697248184424e-08, "loss": 1.0063, "step": 26859 }, { "epoch": 0.973505853357979, "grad_norm": 2.506418548179301, "learning_rate": 1.839663625396393e-08, "loss": 0.9097, "step": 26860 }, { "epoch": 0.9735420970606358, "grad_norm": 2.5137155601992225, "learning_rate": 1.8346368669565075e-08, "loss": 1.0146, "step": 26861 }, { "epoch": 0.9735783407632924, "grad_norm": 2.309163067161272, "learning_rate": 1.8296169729339898e-08, "loss": 0.8679, "step": 26862 }, { "epoch": 0.9736145844659491, "grad_norm": 2.2615910288015657, "learning_rate": 1.8246039433981178e-08, "loss": 0.8664, "step": 26863 }, { "epoch": 0.9736508281686057, "grad_norm": 2.6078934900635535, "learning_rate": 1.8195977784179475e-08, "loss": 0.9593, "step": 26864 }, { "epoch": 0.9736870718712624, "grad_norm": 2.2293288118476755, "learning_rate": 1.8145984780624238e-08, "loss": 1.02, "step": 26865 }, { "epoch": 0.973723315573919, "grad_norm": 2.198758249133059, "learning_rate": 1.8096060424003803e-08, "loss": 1.0135, "step": 26866 }, { "epoch": 0.9737595592765756, "grad_norm": 2.4970983511641847, "learning_rate": 1.804620471500762e-08, "loss": 1.1065, "step": 26867 }, { "epoch": 0.9737958029792324, "grad_norm": 2.243727226159782, "learning_rate": 1.799641765432125e-08, "loss": 0.7502, "step": 26868 }, { "epoch": 0.973832046681889, "grad_norm": 2.3844485702451887, "learning_rate": 1.7946699242630816e-08, "loss": 1.0228, "step": 26869 }, { "epoch": 0.9738682903845457, "grad_norm": 2.333272996915078, "learning_rate": 1.789704948062243e-08, "loss": 0.9094, "step": 26870 }, { "epoch": 0.9739045340872023, "grad_norm": 2.3115315354051797, "learning_rate": 1.7847468368979438e-08, "loss": 0.7351, "step": 26871 }, { "epoch": 0.973940777789859, "grad_norm": 2.3809356639874264, "learning_rate": 1.779795590838518e-08, "loss": 0.8004, "step": 26872 }, { "epoch": 0.9739770214925156, "grad_norm": 2.513146544923686, "learning_rate": 1.7748512099521887e-08, "loss": 0.7467, "step": 26873 }, { "epoch": 0.9740132651951723, "grad_norm": 2.279072343797211, "learning_rate": 1.7699136943070683e-08, "loss": 0.8967, "step": 26874 }, { "epoch": 0.974049508897829, "grad_norm": 2.291161936273691, "learning_rate": 1.764983043971269e-08, "loss": 0.8594, "step": 26875 }, { "epoch": 0.9740857526004857, "grad_norm": 2.233703155035795, "learning_rate": 1.7600592590126808e-08, "loss": 0.7221, "step": 26876 }, { "epoch": 0.9741219963031423, "grad_norm": 2.5424308032421945, "learning_rate": 1.7551423394991384e-08, "loss": 0.9706, "step": 26877 }, { "epoch": 0.974158240005799, "grad_norm": 2.4181655994905187, "learning_rate": 1.7502322854983655e-08, "loss": 1.1392, "step": 26878 }, { "epoch": 0.9741944837084556, "grad_norm": 2.468792318052397, "learning_rate": 1.7453290970780855e-08, "loss": 0.8081, "step": 26879 }, { "epoch": 0.9742307274111123, "grad_norm": 2.520739947903441, "learning_rate": 1.7404327743058558e-08, "loss": 0.8995, "step": 26880 }, { "epoch": 0.974266971113769, "grad_norm": 2.4399624856066184, "learning_rate": 1.735543317249122e-08, "loss": 0.9696, "step": 26881 }, { "epoch": 0.9743032148164257, "grad_norm": 2.431949130713081, "learning_rate": 1.7306607259752193e-08, "loss": 0.9067, "step": 26882 }, { "epoch": 0.9743394585190823, "grad_norm": 2.19140190720512, "learning_rate": 1.725785000551483e-08, "loss": 0.7435, "step": 26883 }, { "epoch": 0.974375702221739, "grad_norm": 2.4934771554768216, "learning_rate": 1.7209161410451368e-08, "loss": 0.8012, "step": 26884 }, { "epoch": 0.9744119459243956, "grad_norm": 2.451840497839571, "learning_rate": 1.7160541475231273e-08, "loss": 0.9392, "step": 26885 }, { "epoch": 0.9744481896270523, "grad_norm": 2.48821891186431, "learning_rate": 1.711199020052623e-08, "loss": 0.8957, "step": 26886 }, { "epoch": 0.9744844333297089, "grad_norm": 2.351232517955249, "learning_rate": 1.7063507587004035e-08, "loss": 0.9952, "step": 26887 }, { "epoch": 0.9745206770323657, "grad_norm": 2.218244478031286, "learning_rate": 1.7015093635333047e-08, "loss": 0.7535, "step": 26888 }, { "epoch": 0.9745569207350223, "grad_norm": 2.3759327849004985, "learning_rate": 1.6966748346180507e-08, "loss": 0.9906, "step": 26889 }, { "epoch": 0.974593164437679, "grad_norm": 2.4471487980686666, "learning_rate": 1.6918471720212548e-08, "loss": 0.8399, "step": 26890 }, { "epoch": 0.9746294081403356, "grad_norm": 2.154996223110518, "learning_rate": 1.6870263758094752e-08, "loss": 0.7133, "step": 26891 }, { "epoch": 0.9746656518429923, "grad_norm": 2.528878955691326, "learning_rate": 1.6822124460490474e-08, "loss": 0.7592, "step": 26892 }, { "epoch": 0.9747018955456489, "grad_norm": 2.609286989827796, "learning_rate": 1.6774053828064186e-08, "loss": 1.0277, "step": 26893 }, { "epoch": 0.9747381392483057, "grad_norm": 2.20319023012144, "learning_rate": 1.6726051861477023e-08, "loss": 0.7452, "step": 26894 }, { "epoch": 0.9747743829509623, "grad_norm": 2.7393355700969333, "learning_rate": 1.6678118561391233e-08, "loss": 0.9531, "step": 26895 }, { "epoch": 0.974810626653619, "grad_norm": 2.2530271745226216, "learning_rate": 1.6630253928467398e-08, "loss": 0.7778, "step": 26896 }, { "epoch": 0.9748468703562756, "grad_norm": 1.8454910077121915, "learning_rate": 1.658245796336444e-08, "loss": 0.7227, "step": 26897 }, { "epoch": 0.9748831140589322, "grad_norm": 2.287816896578023, "learning_rate": 1.6534730666741828e-08, "loss": 0.8546, "step": 26898 }, { "epoch": 0.9749193577615889, "grad_norm": 2.219067133385226, "learning_rate": 1.648707203925626e-08, "loss": 0.7834, "step": 26899 }, { "epoch": 0.9749556014642455, "grad_norm": 2.6237157235392305, "learning_rate": 1.6439482081565538e-08, "loss": 0.8979, "step": 26900 }, { "epoch": 0.9749918451669023, "grad_norm": 2.402470487054079, "learning_rate": 1.6391960794324148e-08, "loss": 0.9697, "step": 26901 }, { "epoch": 0.9750280888695589, "grad_norm": 2.553888883798052, "learning_rate": 1.6344508178187668e-08, "loss": 0.7328, "step": 26902 }, { "epoch": 0.9750643325722156, "grad_norm": 2.3835065782771485, "learning_rate": 1.6297124233810025e-08, "loss": 0.8056, "step": 26903 }, { "epoch": 0.9751005762748722, "grad_norm": 2.365281566614305, "learning_rate": 1.6249808961844026e-08, "loss": 0.8796, "step": 26904 }, { "epoch": 0.9751368199775289, "grad_norm": 2.152729163694319, "learning_rate": 1.6202562362941378e-08, "loss": 0.7919, "step": 26905 }, { "epoch": 0.9751730636801855, "grad_norm": 2.490035697443991, "learning_rate": 1.615538443775322e-08, "loss": 1.0611, "step": 26906 }, { "epoch": 0.9752093073828423, "grad_norm": 2.4266285902496056, "learning_rate": 1.6108275186930145e-08, "loss": 0.8483, "step": 26907 }, { "epoch": 0.9752455510854989, "grad_norm": 2.611323016939622, "learning_rate": 1.6061234611120523e-08, "loss": 0.8707, "step": 26908 }, { "epoch": 0.9752817947881556, "grad_norm": 2.4581741554901497, "learning_rate": 1.6014262710973282e-08, "loss": 0.9306, "step": 26909 }, { "epoch": 0.9753180384908122, "grad_norm": 2.274446096377958, "learning_rate": 1.596735948713457e-08, "loss": 0.9987, "step": 26910 }, { "epoch": 0.9753542821934689, "grad_norm": 2.523262858191956, "learning_rate": 1.5920524940252203e-08, "loss": 0.9375, "step": 26911 }, { "epoch": 0.9753905258961255, "grad_norm": 2.327272817012774, "learning_rate": 1.5873759070970108e-08, "loss": 0.8849, "step": 26912 }, { "epoch": 0.9754267695987822, "grad_norm": 2.4434223425068353, "learning_rate": 1.582706187993388e-08, "loss": 0.8468, "step": 26913 }, { "epoch": 0.9754630133014389, "grad_norm": 2.338629165636888, "learning_rate": 1.5780433367785784e-08, "loss": 1.02, "step": 26914 }, { "epoch": 0.9754992570040956, "grad_norm": 2.516159589984912, "learning_rate": 1.5733873535169197e-08, "loss": 0.9702, "step": 26915 }, { "epoch": 0.9755355007067522, "grad_norm": 2.3112637172698527, "learning_rate": 1.5687382382725825e-08, "loss": 0.8957, "step": 26916 }, { "epoch": 0.9755717444094089, "grad_norm": 2.4147180099908407, "learning_rate": 1.564095991109571e-08, "loss": 0.7473, "step": 26917 }, { "epoch": 0.9756079881120655, "grad_norm": 2.300090556811081, "learning_rate": 1.5594606120918342e-08, "loss": 0.9964, "step": 26918 }, { "epoch": 0.9756442318147222, "grad_norm": 2.3869234393365804, "learning_rate": 1.5548321012833212e-08, "loss": 0.7143, "step": 26919 }, { "epoch": 0.9756804755173789, "grad_norm": 2.434267530068133, "learning_rate": 1.5502104587477583e-08, "loss": 0.8475, "step": 26920 }, { "epoch": 0.9757167192200356, "grad_norm": 2.34862044923966, "learning_rate": 1.5455956845488173e-08, "loss": 0.9898, "step": 26921 }, { "epoch": 0.9757529629226922, "grad_norm": 2.2263460161483564, "learning_rate": 1.5409877787501692e-08, "loss": 0.8788, "step": 26922 }, { "epoch": 0.9757892066253488, "grad_norm": 2.6209998694992445, "learning_rate": 1.536386741415208e-08, "loss": 0.7854, "step": 26923 }, { "epoch": 0.9758254503280055, "grad_norm": 2.096116988345453, "learning_rate": 1.5317925726073825e-08, "loss": 0.9219, "step": 26924 }, { "epoch": 0.9758616940306621, "grad_norm": 2.4257668808481134, "learning_rate": 1.527205272390031e-08, "loss": 0.9209, "step": 26925 }, { "epoch": 0.9758979377333188, "grad_norm": 2.0190666156110333, "learning_rate": 1.52262484082627e-08, "loss": 0.9, "step": 26926 }, { "epoch": 0.9759341814359755, "grad_norm": 2.571746589931008, "learning_rate": 1.5180512779793267e-08, "loss": 0.9634, "step": 26927 }, { "epoch": 0.9759704251386322, "grad_norm": 2.5667982069236768, "learning_rate": 1.5134845839120948e-08, "loss": 0.9135, "step": 26928 }, { "epoch": 0.9760066688412888, "grad_norm": 2.421951078080432, "learning_rate": 1.5089247586876356e-08, "loss": 0.8934, "step": 26929 }, { "epoch": 0.9760429125439455, "grad_norm": 2.5317275128757855, "learning_rate": 1.504371802368676e-08, "loss": 0.9218, "step": 26930 }, { "epoch": 0.9760791562466021, "grad_norm": 2.305403403220665, "learning_rate": 1.4998257150179996e-08, "loss": 0.792, "step": 26931 }, { "epoch": 0.9761153999492588, "grad_norm": 2.0856722904417246, "learning_rate": 1.4952864966982227e-08, "loss": 0.8387, "step": 26932 }, { "epoch": 0.9761516436519155, "grad_norm": 2.55465767503323, "learning_rate": 1.490754147471962e-08, "loss": 0.9486, "step": 26933 }, { "epoch": 0.9761878873545722, "grad_norm": 2.7128854560373252, "learning_rate": 1.4862286674015568e-08, "loss": 0.6604, "step": 26934 }, { "epoch": 0.9762241310572288, "grad_norm": 2.231124802430366, "learning_rate": 1.4817100565494568e-08, "loss": 0.9433, "step": 26935 }, { "epoch": 0.9762603747598855, "grad_norm": 2.1193024416674016, "learning_rate": 1.4771983149779456e-08, "loss": 0.9748, "step": 26936 }, { "epoch": 0.9762966184625421, "grad_norm": 2.8560503570481126, "learning_rate": 1.4726934427490847e-08, "loss": 0.9077, "step": 26937 }, { "epoch": 0.9763328621651988, "grad_norm": 1.997502076290987, "learning_rate": 1.4681954399250464e-08, "loss": 0.8181, "step": 26938 }, { "epoch": 0.9763691058678554, "grad_norm": 2.2593805368813342, "learning_rate": 1.4637043065677258e-08, "loss": 0.7933, "step": 26939 }, { "epoch": 0.9764053495705122, "grad_norm": 2.4200519290124523, "learning_rate": 1.4592200427390735e-08, "loss": 0.8525, "step": 26940 }, { "epoch": 0.9764415932731688, "grad_norm": 2.6730428228888403, "learning_rate": 1.4547426485008731e-08, "loss": 1.0521, "step": 26941 }, { "epoch": 0.9764778369758255, "grad_norm": 2.182117272443869, "learning_rate": 1.4502721239147977e-08, "loss": 0.8823, "step": 26942 }, { "epoch": 0.9765140806784821, "grad_norm": 2.344501091243367, "learning_rate": 1.4458084690424645e-08, "loss": 0.9232, "step": 26943 }, { "epoch": 0.9765503243811388, "grad_norm": 2.6159325102454436, "learning_rate": 1.4413516839453801e-08, "loss": 0.8785, "step": 26944 }, { "epoch": 0.9765865680837954, "grad_norm": 2.518700400255219, "learning_rate": 1.4369017686849951e-08, "loss": 0.9234, "step": 26945 }, { "epoch": 0.976622811786452, "grad_norm": 2.5408524617075434, "learning_rate": 1.432458723322483e-08, "loss": 0.7503, "step": 26946 }, { "epoch": 0.9766590554891088, "grad_norm": 2.511228109031489, "learning_rate": 1.428022547919239e-08, "loss": 0.7599, "step": 26947 }, { "epoch": 0.9766952991917655, "grad_norm": 2.537339086056514, "learning_rate": 1.4235932425363253e-08, "loss": 1.021, "step": 26948 }, { "epoch": 0.9767315428944221, "grad_norm": 2.0845041754980635, "learning_rate": 1.4191708072346932e-08, "loss": 0.7353, "step": 26949 }, { "epoch": 0.9767677865970787, "grad_norm": 2.3472276936213894, "learning_rate": 1.4147552420754051e-08, "loss": 0.9257, "step": 26950 }, { "epoch": 0.9768040302997354, "grad_norm": 2.628864588272872, "learning_rate": 1.4103465471192458e-08, "loss": 0.8779, "step": 26951 }, { "epoch": 0.976840274002392, "grad_norm": 2.2712852075363887, "learning_rate": 1.4059447224269996e-08, "loss": 1.0288, "step": 26952 }, { "epoch": 0.9768765177050488, "grad_norm": 2.01702435185726, "learning_rate": 1.4015497680592849e-08, "loss": 0.7437, "step": 26953 }, { "epoch": 0.9769127614077054, "grad_norm": 2.3592897355851483, "learning_rate": 1.3971616840766645e-08, "loss": 0.8425, "step": 26954 }, { "epoch": 0.9769490051103621, "grad_norm": 2.410082641985463, "learning_rate": 1.3927804705395897e-08, "loss": 0.838, "step": 26955 }, { "epoch": 0.9769852488130187, "grad_norm": 2.518117525602738, "learning_rate": 1.3884061275085125e-08, "loss": 0.8531, "step": 26956 }, { "epoch": 0.9770214925156754, "grad_norm": 2.0733757195060853, "learning_rate": 1.3840386550435514e-08, "loss": 0.8231, "step": 26957 }, { "epoch": 0.977057736218332, "grad_norm": 2.519790286121611, "learning_rate": 1.3796780532050468e-08, "loss": 0.8076, "step": 26958 }, { "epoch": 0.9770939799209887, "grad_norm": 2.0068031628625422, "learning_rate": 1.3753243220530066e-08, "loss": 0.6676, "step": 26959 }, { "epoch": 0.9771302236236454, "grad_norm": 1.9844987742520668, "learning_rate": 1.3709774616474936e-08, "loss": 0.772, "step": 26960 }, { "epoch": 0.9771664673263021, "grad_norm": 2.1350847005881923, "learning_rate": 1.3666374720482933e-08, "loss": 0.9627, "step": 26961 }, { "epoch": 0.9772027110289587, "grad_norm": 2.3020090721719977, "learning_rate": 1.3623043533152469e-08, "loss": 0.9463, "step": 26962 }, { "epoch": 0.9772389547316154, "grad_norm": 2.25881972714993, "learning_rate": 1.3579781055080843e-08, "loss": 0.8597, "step": 26963 }, { "epoch": 0.977275198434272, "grad_norm": 2.2190098956840507, "learning_rate": 1.35365872868648e-08, "loss": 0.9647, "step": 26964 }, { "epoch": 0.9773114421369287, "grad_norm": 2.456481247103349, "learning_rate": 1.3493462229098309e-08, "loss": 0.9329, "step": 26965 }, { "epoch": 0.9773476858395854, "grad_norm": 2.3106627997350713, "learning_rate": 1.3450405882376449e-08, "loss": 0.8743, "step": 26966 }, { "epoch": 0.9773839295422421, "grad_norm": 2.226244115759113, "learning_rate": 1.3407418247292081e-08, "loss": 0.9375, "step": 26967 }, { "epoch": 0.9774201732448987, "grad_norm": 2.4085737275960724, "learning_rate": 1.3364499324437508e-08, "loss": 0.7847, "step": 26968 }, { "epoch": 0.9774564169475554, "grad_norm": 2.5039083601960583, "learning_rate": 1.3321649114404478e-08, "loss": 0.874, "step": 26969 }, { "epoch": 0.977492660650212, "grad_norm": 2.222205282623347, "learning_rate": 1.3278867617783076e-08, "loss": 0.8802, "step": 26970 }, { "epoch": 0.9775289043528687, "grad_norm": 2.3437673709293354, "learning_rate": 1.3236154835163383e-08, "loss": 0.8923, "step": 26971 }, { "epoch": 0.9775651480555253, "grad_norm": 2.1506556852405083, "learning_rate": 1.3193510767133821e-08, "loss": 0.7377, "step": 26972 }, { "epoch": 0.9776013917581821, "grad_norm": 2.347146872891755, "learning_rate": 1.315093541428114e-08, "loss": 0.8111, "step": 26973 }, { "epoch": 0.9776376354608387, "grad_norm": 2.4748399736907216, "learning_rate": 1.310842877719265e-08, "loss": 1.0803, "step": 26974 }, { "epoch": 0.9776738791634954, "grad_norm": 2.6138230648759904, "learning_rate": 1.3065990856454547e-08, "loss": 0.9309, "step": 26975 }, { "epoch": 0.977710122866152, "grad_norm": 2.250784206341617, "learning_rate": 1.3023621652650808e-08, "loss": 0.9413, "step": 26976 }, { "epoch": 0.9777463665688086, "grad_norm": 2.1356534797535347, "learning_rate": 1.2981321166365412e-08, "loss": 0.7792, "step": 26977 }, { "epoch": 0.9777826102714653, "grad_norm": 2.3836080296306106, "learning_rate": 1.2939089398181226e-08, "loss": 1.0188, "step": 26978 }, { "epoch": 0.977818853974122, "grad_norm": 2.72715796917778, "learning_rate": 1.2896926348680561e-08, "loss": 0.9592, "step": 26979 }, { "epoch": 0.9778550976767787, "grad_norm": 2.0720390799439925, "learning_rate": 1.2854832018444064e-08, "loss": 0.964, "step": 26980 }, { "epoch": 0.9778913413794353, "grad_norm": 2.3308752071744556, "learning_rate": 1.2812806408051826e-08, "loss": 0.9231, "step": 26981 }, { "epoch": 0.977927585082092, "grad_norm": 2.2252173523202075, "learning_rate": 1.2770849518083383e-08, "loss": 0.9111, "step": 26982 }, { "epoch": 0.9779638287847486, "grad_norm": 2.444732722464025, "learning_rate": 1.2728961349116053e-08, "loss": 0.9119, "step": 26983 }, { "epoch": 0.9780000724874053, "grad_norm": 2.372167680279559, "learning_rate": 1.2687141901727706e-08, "loss": 1.0782, "step": 26984 }, { "epoch": 0.9780363161900619, "grad_norm": 2.8243689154369447, "learning_rate": 1.2645391176493993e-08, "loss": 0.9182, "step": 26985 }, { "epoch": 0.9780725598927187, "grad_norm": 2.197690679187989, "learning_rate": 1.2603709173990564e-08, "loss": 0.8547, "step": 26986 }, { "epoch": 0.9781088035953753, "grad_norm": 2.3351165402527516, "learning_rate": 1.256209589479196e-08, "loss": 0.9166, "step": 26987 }, { "epoch": 0.978145047298032, "grad_norm": 2.440019502699165, "learning_rate": 1.2520551339471055e-08, "loss": 0.9072, "step": 26988 }, { "epoch": 0.9781812910006886, "grad_norm": 2.3667211420775374, "learning_rate": 1.2479075508600724e-08, "loss": 0.9517, "step": 26989 }, { "epoch": 0.9782175347033453, "grad_norm": 2.488460366463051, "learning_rate": 1.243766840275218e-08, "loss": 0.8621, "step": 26990 }, { "epoch": 0.9782537784060019, "grad_norm": 2.350976971079511, "learning_rate": 1.2396330022496628e-08, "loss": 0.7618, "step": 26991 }, { "epoch": 0.9782900221086587, "grad_norm": 2.38640760585383, "learning_rate": 1.235506036840306e-08, "loss": 0.8633, "step": 26992 }, { "epoch": 0.9783262658113153, "grad_norm": 2.4493704952225936, "learning_rate": 1.2313859441040465e-08, "loss": 1.0255, "step": 26993 }, { "epoch": 0.978362509513972, "grad_norm": 2.451459212066336, "learning_rate": 1.2272727240976168e-08, "loss": 0.8783, "step": 26994 }, { "epoch": 0.9783987532166286, "grad_norm": 2.1039986906355144, "learning_rate": 1.223166376877749e-08, "loss": 0.6894, "step": 26995 }, { "epoch": 0.9784349969192853, "grad_norm": 2.279907249920557, "learning_rate": 1.2190669025009538e-08, "loss": 0.9661, "step": 26996 }, { "epoch": 0.9784712406219419, "grad_norm": 2.6725498900710654, "learning_rate": 1.2149743010237969e-08, "loss": 0.9679, "step": 26997 }, { "epoch": 0.9785074843245986, "grad_norm": 2.3575637768500415, "learning_rate": 1.210888572502622e-08, "loss": 0.7721, "step": 26998 }, { "epoch": 0.9785437280272553, "grad_norm": 2.168707586212212, "learning_rate": 1.206809716993773e-08, "loss": 0.9633, "step": 26999 }, { "epoch": 0.978579971729912, "grad_norm": 2.4399856734269867, "learning_rate": 1.2027377345534275e-08, "loss": 0.9402, "step": 27000 }, { "epoch": 0.9786162154325686, "grad_norm": 2.427453272098403, "learning_rate": 1.1986726252376513e-08, "loss": 1.009, "step": 27001 }, { "epoch": 0.9786524591352253, "grad_norm": 2.763494850576783, "learning_rate": 1.1946143891025108e-08, "loss": 0.8729, "step": 27002 }, { "epoch": 0.9786887028378819, "grad_norm": 2.3249026463477835, "learning_rate": 1.1905630262039613e-08, "loss": 0.9406, "step": 27003 }, { "epoch": 0.9787249465405385, "grad_norm": 2.3861701142625282, "learning_rate": 1.186518536597736e-08, "loss": 0.8798, "step": 27004 }, { "epoch": 0.9787611902431952, "grad_norm": 2.2659564500799276, "learning_rate": 1.1824809203396237e-08, "loss": 0.8754, "step": 27005 }, { "epoch": 0.978797433945852, "grad_norm": 2.5212440570899766, "learning_rate": 1.1784501774852463e-08, "loss": 0.7999, "step": 27006 }, { "epoch": 0.9788336776485086, "grad_norm": 2.195551780552908, "learning_rate": 1.1744263080901709e-08, "loss": 0.738, "step": 27007 }, { "epoch": 0.9788699213511652, "grad_norm": 2.203448118439866, "learning_rate": 1.170409312209797e-08, "loss": 0.8723, "step": 27008 }, { "epoch": 0.9789061650538219, "grad_norm": 2.2655036221085982, "learning_rate": 1.1663991898995252e-08, "loss": 0.7889, "step": 27009 }, { "epoch": 0.9789424087564785, "grad_norm": 2.430539841057522, "learning_rate": 1.1623959412145336e-08, "loss": 0.6675, "step": 27010 }, { "epoch": 0.9789786524591352, "grad_norm": 2.64008733582824, "learning_rate": 1.158399566210111e-08, "loss": 0.9266, "step": 27011 }, { "epoch": 0.9790148961617919, "grad_norm": 2.138640642766302, "learning_rate": 1.1544100649412138e-08, "loss": 0.8511, "step": 27012 }, { "epoch": 0.9790511398644486, "grad_norm": 2.3128289126252466, "learning_rate": 1.1504274374628532e-08, "loss": 0.7929, "step": 27013 }, { "epoch": 0.9790873835671052, "grad_norm": 2.3802835921523147, "learning_rate": 1.14645168382993e-08, "loss": 0.8571, "step": 27014 }, { "epoch": 0.9791236272697619, "grad_norm": 2.2971295612635987, "learning_rate": 1.142482804097178e-08, "loss": 0.731, "step": 27015 }, { "epoch": 0.9791598709724185, "grad_norm": 2.2344835015569915, "learning_rate": 1.1385207983193314e-08, "loss": 0.9236, "step": 27016 }, { "epoch": 0.9791961146750752, "grad_norm": 2.2335335663969005, "learning_rate": 1.1345656665509575e-08, "loss": 0.9326, "step": 27017 }, { "epoch": 0.9792323583777318, "grad_norm": 2.380368607069477, "learning_rate": 1.1306174088465682e-08, "loss": 0.84, "step": 27018 }, { "epoch": 0.9792686020803886, "grad_norm": 2.4937282723208676, "learning_rate": 1.1266760252605647e-08, "loss": 0.8734, "step": 27019 }, { "epoch": 0.9793048457830452, "grad_norm": 2.3997412837321925, "learning_rate": 1.1227415158472365e-08, "loss": 1.0378, "step": 27020 }, { "epoch": 0.9793410894857019, "grad_norm": 2.0655504614841225, "learning_rate": 1.1188138806608184e-08, "loss": 1.0257, "step": 27021 }, { "epoch": 0.9793773331883585, "grad_norm": 2.2925807738965456, "learning_rate": 1.1148931197554891e-08, "loss": 0.7909, "step": 27022 }, { "epoch": 0.9794135768910152, "grad_norm": 2.158427621442338, "learning_rate": 1.11097923318515e-08, "loss": 0.7849, "step": 27023 }, { "epoch": 0.9794498205936718, "grad_norm": 2.3386390933414063, "learning_rate": 1.1070722210038132e-08, "loss": 0.8814, "step": 27024 }, { "epoch": 0.9794860642963286, "grad_norm": 2.5128469933805704, "learning_rate": 1.1031720832653247e-08, "loss": 0.7279, "step": 27025 }, { "epoch": 0.9795223079989852, "grad_norm": 1.935973236011096, "learning_rate": 1.0992788200234195e-08, "loss": 0.6595, "step": 27026 }, { "epoch": 0.9795585517016419, "grad_norm": 2.402140348586146, "learning_rate": 1.0953924313316656e-08, "loss": 0.9525, "step": 27027 }, { "epoch": 0.9795947954042985, "grad_norm": 2.3793174198401985, "learning_rate": 1.0915129172436867e-08, "loss": 0.8825, "step": 27028 }, { "epoch": 0.9796310391069551, "grad_norm": 2.762634731777451, "learning_rate": 1.0876402778129957e-08, "loss": 0.9507, "step": 27029 }, { "epoch": 0.9796672828096118, "grad_norm": 2.3954342193059808, "learning_rate": 1.0837745130928279e-08, "loss": 0.8477, "step": 27030 }, { "epoch": 0.9797035265122684, "grad_norm": 2.3996397580706628, "learning_rate": 1.0799156231365292e-08, "loss": 0.9751, "step": 27031 }, { "epoch": 0.9797397702149252, "grad_norm": 2.2627241835687295, "learning_rate": 1.0760636079972797e-08, "loss": 0.7662, "step": 27032 }, { "epoch": 0.9797760139175818, "grad_norm": 2.501351299479606, "learning_rate": 1.0722184677280923e-08, "loss": 0.9549, "step": 27033 }, { "epoch": 0.9798122576202385, "grad_norm": 2.4761780923473795, "learning_rate": 1.06838020238198e-08, "loss": 0.7679, "step": 27034 }, { "epoch": 0.9798485013228951, "grad_norm": 2.310380948400895, "learning_rate": 1.0645488120119008e-08, "loss": 0.7984, "step": 27035 }, { "epoch": 0.9798847450255518, "grad_norm": 2.4569174136134375, "learning_rate": 1.0607242966705345e-08, "loss": 0.7773, "step": 27036 }, { "epoch": 0.9799209887282084, "grad_norm": 2.3334454530721245, "learning_rate": 1.0569066564106722e-08, "loss": 0.8493, "step": 27037 }, { "epoch": 0.9799572324308652, "grad_norm": 2.220439330826295, "learning_rate": 1.0530958912848832e-08, "loss": 0.8359, "step": 27038 }, { "epoch": 0.9799934761335218, "grad_norm": 2.4384261485631793, "learning_rate": 1.0492920013456808e-08, "loss": 0.7782, "step": 27039 }, { "epoch": 0.9800297198361785, "grad_norm": 2.966977128516652, "learning_rate": 1.0454949866454678e-08, "loss": 0.8089, "step": 27040 }, { "epoch": 0.9800659635388351, "grad_norm": 2.548678266846551, "learning_rate": 1.0417048472366464e-08, "loss": 0.8438, "step": 27041 }, { "epoch": 0.9801022072414918, "grad_norm": 2.5613427947649083, "learning_rate": 1.0379215831712863e-08, "loss": 0.9112, "step": 27042 }, { "epoch": 0.9801384509441484, "grad_norm": 2.5767964565814103, "learning_rate": 1.0341451945016234e-08, "loss": 0.9097, "step": 27043 }, { "epoch": 0.9801746946468051, "grad_norm": 2.14946218307354, "learning_rate": 1.0303756812796717e-08, "loss": 0.6919, "step": 27044 }, { "epoch": 0.9802109383494618, "grad_norm": 2.371020694257801, "learning_rate": 1.0266130435573896e-08, "loss": 0.8097, "step": 27045 }, { "epoch": 0.9802471820521185, "grad_norm": 2.2296676305414564, "learning_rate": 1.0228572813866245e-08, "loss": 0.8979, "step": 27046 }, { "epoch": 0.9802834257547751, "grad_norm": 2.374650261876308, "learning_rate": 1.0191083948190572e-08, "loss": 0.981, "step": 27047 }, { "epoch": 0.9803196694574318, "grad_norm": 2.384670837931495, "learning_rate": 1.0153663839064243e-08, "loss": 1.2707, "step": 27048 }, { "epoch": 0.9803559131600884, "grad_norm": 2.4829819542136113, "learning_rate": 1.0116312487002956e-08, "loss": 0.9703, "step": 27049 }, { "epoch": 0.9803921568627451, "grad_norm": 2.50254006987801, "learning_rate": 1.0079029892521296e-08, "loss": 0.8213, "step": 27050 }, { "epoch": 0.9804284005654018, "grad_norm": 2.1937205223328764, "learning_rate": 1.0041816056132193e-08, "loss": 0.8137, "step": 27051 }, { "epoch": 0.9804646442680585, "grad_norm": 2.644274464291447, "learning_rate": 1.0004670978349673e-08, "loss": 0.9952, "step": 27052 }, { "epoch": 0.9805008879707151, "grad_norm": 2.4886438166488, "learning_rate": 9.967594659684443e-09, "loss": 0.8741, "step": 27053 }, { "epoch": 0.9805371316733718, "grad_norm": 2.3026830008756884, "learning_rate": 9.930587100647759e-09, "loss": 0.9083, "step": 27054 }, { "epoch": 0.9805733753760284, "grad_norm": 2.2951232551426775, "learning_rate": 9.893648301750325e-09, "loss": 0.8686, "step": 27055 }, { "epoch": 0.980609619078685, "grad_norm": 2.3933384873704346, "learning_rate": 9.856778263500067e-09, "loss": 0.9048, "step": 27056 }, { "epoch": 0.9806458627813417, "grad_norm": 2.340858661520491, "learning_rate": 9.819976986405466e-09, "loss": 0.8863, "step": 27057 }, { "epoch": 0.9806821064839985, "grad_norm": 2.6060076847655402, "learning_rate": 9.783244470973341e-09, "loss": 0.7843, "step": 27058 }, { "epoch": 0.9807183501866551, "grad_norm": 2.4233672473578323, "learning_rate": 9.746580717710508e-09, "loss": 0.8373, "step": 27059 }, { "epoch": 0.9807545938893117, "grad_norm": 2.1820107138353553, "learning_rate": 9.709985727121563e-09, "loss": 0.7668, "step": 27060 }, { "epoch": 0.9807908375919684, "grad_norm": 2.5566821818927923, "learning_rate": 9.673459499711101e-09, "loss": 0.865, "step": 27061 }, { "epoch": 0.980827081294625, "grad_norm": 2.5737233959317924, "learning_rate": 9.637002035982613e-09, "loss": 0.7632, "step": 27062 }, { "epoch": 0.9808633249972817, "grad_norm": 2.3136039391874634, "learning_rate": 9.600613336437913e-09, "loss": 0.7559, "step": 27063 }, { "epoch": 0.9808995686999384, "grad_norm": 2.5697499013544287, "learning_rate": 9.564293401578274e-09, "loss": 0.8959, "step": 27064 }, { "epoch": 0.9809358124025951, "grad_norm": 2.4631219298025573, "learning_rate": 9.528042231904955e-09, "loss": 0.9653, "step": 27065 }, { "epoch": 0.9809720561052517, "grad_norm": 2.2908177111437213, "learning_rate": 9.491859827917005e-09, "loss": 0.9833, "step": 27066 }, { "epoch": 0.9810082998079084, "grad_norm": 2.1729963610372014, "learning_rate": 9.455746190113468e-09, "loss": 0.7957, "step": 27067 }, { "epoch": 0.981044543510565, "grad_norm": 2.158926580297243, "learning_rate": 9.41970131899117e-09, "loss": 0.7962, "step": 27068 }, { "epoch": 0.9810807872132217, "grad_norm": 2.146586852275077, "learning_rate": 9.383725215046935e-09, "loss": 0.9182, "step": 27069 }, { "epoch": 0.9811170309158783, "grad_norm": 2.7271633041420524, "learning_rate": 9.347817878777588e-09, "loss": 0.9818, "step": 27070 }, { "epoch": 0.9811532746185351, "grad_norm": 2.370273054569788, "learning_rate": 9.311979310676067e-09, "loss": 0.9714, "step": 27071 }, { "epoch": 0.9811895183211917, "grad_norm": 2.146055683922078, "learning_rate": 9.276209511237534e-09, "loss": 0.5842, "step": 27072 }, { "epoch": 0.9812257620238484, "grad_norm": 2.3496761091759746, "learning_rate": 9.240508480954924e-09, "loss": 0.8208, "step": 27073 }, { "epoch": 0.981262005726505, "grad_norm": 2.2045006383551833, "learning_rate": 9.204876220319514e-09, "loss": 0.6963, "step": 27074 }, { "epoch": 0.9812982494291617, "grad_norm": 2.52212159083175, "learning_rate": 9.169312729822577e-09, "loss": 0.9767, "step": 27075 }, { "epoch": 0.9813344931318183, "grad_norm": 2.431789891035873, "learning_rate": 9.133818009953722e-09, "loss": 0.7392, "step": 27076 }, { "epoch": 0.981370736834475, "grad_norm": 2.4373959726761085, "learning_rate": 9.098392061203109e-09, "loss": 0.9314, "step": 27077 }, { "epoch": 0.9814069805371317, "grad_norm": 2.2229735819943426, "learning_rate": 9.063034884058131e-09, "loss": 0.8236, "step": 27078 }, { "epoch": 0.9814432242397884, "grad_norm": 2.4130910706475013, "learning_rate": 9.027746479005617e-09, "loss": 0.7436, "step": 27079 }, { "epoch": 0.981479467942445, "grad_norm": 2.5163243256816865, "learning_rate": 8.992526846532956e-09, "loss": 0.8222, "step": 27080 }, { "epoch": 0.9815157116451017, "grad_norm": 2.509517789627575, "learning_rate": 8.957375987124761e-09, "loss": 0.9027, "step": 27081 }, { "epoch": 0.9815519553477583, "grad_norm": 2.469202815113817, "learning_rate": 8.922293901265088e-09, "loss": 0.9059, "step": 27082 }, { "epoch": 0.981588199050415, "grad_norm": 2.2015859396367117, "learning_rate": 8.887280589437996e-09, "loss": 0.8594, "step": 27083 }, { "epoch": 0.9816244427530717, "grad_norm": 2.5348198173852157, "learning_rate": 8.85233605212532e-09, "loss": 0.9068, "step": 27084 }, { "epoch": 0.9816606864557283, "grad_norm": 2.34144802978872, "learning_rate": 8.817460289808898e-09, "loss": 0.8214, "step": 27085 }, { "epoch": 0.981696930158385, "grad_norm": 2.3952106813675336, "learning_rate": 8.782653302969458e-09, "loss": 1.027, "step": 27086 }, { "epoch": 0.9817331738610416, "grad_norm": 2.5200483731273526, "learning_rate": 8.747915092086612e-09, "loss": 0.827, "step": 27087 }, { "epoch": 0.9817694175636983, "grad_norm": 2.2900411418880253, "learning_rate": 8.713245657638869e-09, "loss": 1.0029, "step": 27088 }, { "epoch": 0.9818056612663549, "grad_norm": 2.0998371066968637, "learning_rate": 8.678645000103625e-09, "loss": 0.8166, "step": 27089 }, { "epoch": 0.9818419049690116, "grad_norm": 2.4362331712034924, "learning_rate": 8.644113119958276e-09, "loss": 0.8224, "step": 27090 }, { "epoch": 0.9818781486716683, "grad_norm": 2.224819365404092, "learning_rate": 8.60965001767855e-09, "loss": 0.8733, "step": 27091 }, { "epoch": 0.981914392374325, "grad_norm": 2.394562146804405, "learning_rate": 8.575255693739071e-09, "loss": 0.8323, "step": 27092 }, { "epoch": 0.9819506360769816, "grad_norm": 2.308674592704872, "learning_rate": 8.540930148613346e-09, "loss": 0.8657, "step": 27093 }, { "epoch": 0.9819868797796383, "grad_norm": 2.3035098104081655, "learning_rate": 8.506673382775443e-09, "loss": 0.8547, "step": 27094 }, { "epoch": 0.9820231234822949, "grad_norm": 2.288964339054107, "learning_rate": 8.47248539669665e-09, "loss": 0.9078, "step": 27095 }, { "epoch": 0.9820593671849516, "grad_norm": 2.1817039312283675, "learning_rate": 8.438366190848257e-09, "loss": 0.8591, "step": 27096 }, { "epoch": 0.9820956108876083, "grad_norm": 2.187784746402622, "learning_rate": 8.404315765699889e-09, "loss": 0.8142, "step": 27097 }, { "epoch": 0.982131854590265, "grad_norm": 2.1590498456866034, "learning_rate": 8.370334121721724e-09, "loss": 0.7559, "step": 27098 }, { "epoch": 0.9821680982929216, "grad_norm": 2.479256583064742, "learning_rate": 8.336421259381722e-09, "loss": 1.0723, "step": 27099 }, { "epoch": 0.9822043419955783, "grad_norm": 2.2139561069028324, "learning_rate": 8.302577179146731e-09, "loss": 0.7979, "step": 27100 }, { "epoch": 0.9822405856982349, "grad_norm": 2.1086492503743752, "learning_rate": 8.268801881483047e-09, "loss": 0.8338, "step": 27101 }, { "epoch": 0.9822768294008916, "grad_norm": 2.84912877071222, "learning_rate": 8.23509536685696e-09, "loss": 0.9234, "step": 27102 }, { "epoch": 0.9823130731035482, "grad_norm": 2.4229142876828567, "learning_rate": 8.201457635731991e-09, "loss": 1.0088, "step": 27103 }, { "epoch": 0.982349316806205, "grad_norm": 2.16303997971198, "learning_rate": 8.167888688571656e-09, "loss": 0.7723, "step": 27104 }, { "epoch": 0.9823855605088616, "grad_norm": 2.5268186090644713, "learning_rate": 8.134388525839476e-09, "loss": 0.7608, "step": 27105 }, { "epoch": 0.9824218042115183, "grad_norm": 2.332841741491645, "learning_rate": 8.100957147996191e-09, "loss": 0.9492, "step": 27106 }, { "epoch": 0.9824580479141749, "grad_norm": 2.8244935266021582, "learning_rate": 8.067594555502545e-09, "loss": 0.8564, "step": 27107 }, { "epoch": 0.9824942916168315, "grad_norm": 2.4489983765390675, "learning_rate": 8.034300748818723e-09, "loss": 0.8533, "step": 27108 }, { "epoch": 0.9825305353194882, "grad_norm": 2.266256972808945, "learning_rate": 8.001075728402697e-09, "loss": 0.8612, "step": 27109 }, { "epoch": 0.982566779022145, "grad_norm": 2.3638693985872044, "learning_rate": 7.967919494713538e-09, "loss": 0.8201, "step": 27110 }, { "epoch": 0.9826030227248016, "grad_norm": 2.095890360164262, "learning_rate": 7.934832048206442e-09, "loss": 0.7418, "step": 27111 }, { "epoch": 0.9826392664274582, "grad_norm": 2.4539944358030654, "learning_rate": 7.90181338933882e-09, "loss": 0.8379, "step": 27112 }, { "epoch": 0.9826755101301149, "grad_norm": 2.384643851143405, "learning_rate": 7.868863518565307e-09, "loss": 0.9325, "step": 27113 }, { "epoch": 0.9827117538327715, "grad_norm": 2.634293792114545, "learning_rate": 7.835982436339428e-09, "loss": 0.8038, "step": 27114 }, { "epoch": 0.9827479975354282, "grad_norm": 2.3911100782079937, "learning_rate": 7.803170143114157e-09, "loss": 0.8085, "step": 27115 }, { "epoch": 0.9827842412380848, "grad_norm": 2.1769509706999095, "learning_rate": 7.770426639342465e-09, "loss": 0.8897, "step": 27116 }, { "epoch": 0.9828204849407416, "grad_norm": 2.2311731537063455, "learning_rate": 7.737751925475101e-09, "loss": 0.8083, "step": 27117 }, { "epoch": 0.9828567286433982, "grad_norm": 2.3673220620007847, "learning_rate": 7.70514600196226e-09, "loss": 0.776, "step": 27118 }, { "epoch": 0.9828929723460549, "grad_norm": 2.2737372745616256, "learning_rate": 7.672608869253584e-09, "loss": 0.781, "step": 27119 }, { "epoch": 0.9829292160487115, "grad_norm": 2.2834776408772823, "learning_rate": 7.64014052779649e-09, "loss": 0.771, "step": 27120 }, { "epoch": 0.9829654597513682, "grad_norm": 2.316329826494842, "learning_rate": 7.607740978039512e-09, "loss": 0.9279, "step": 27121 }, { "epoch": 0.9830017034540248, "grad_norm": 2.8947718517035876, "learning_rate": 7.575410220427847e-09, "loss": 0.8712, "step": 27122 }, { "epoch": 0.9830379471566816, "grad_norm": 2.298699672319747, "learning_rate": 7.543148255408362e-09, "loss": 0.8969, "step": 27123 }, { "epoch": 0.9830741908593382, "grad_norm": 2.5055892984862527, "learning_rate": 7.510955083424587e-09, "loss": 0.8694, "step": 27124 }, { "epoch": 0.9831104345619949, "grad_norm": 2.6652340362586826, "learning_rate": 7.478830704920614e-09, "loss": 0.9227, "step": 27125 }, { "epoch": 0.9831466782646515, "grad_norm": 2.484251344124277, "learning_rate": 7.446775120338867e-09, "loss": 0.9642, "step": 27126 }, { "epoch": 0.9831829219673082, "grad_norm": 2.2605479476707133, "learning_rate": 7.414788330121215e-09, "loss": 0.786, "step": 27127 }, { "epoch": 0.9832191656699648, "grad_norm": 2.034226410632997, "learning_rate": 7.382870334708969e-09, "loss": 0.8983, "step": 27128 }, { "epoch": 0.9832554093726215, "grad_norm": 1.962249100490395, "learning_rate": 7.35102113454067e-09, "loss": 0.714, "step": 27129 }, { "epoch": 0.9832916530752782, "grad_norm": 2.1197254246336783, "learning_rate": 7.3192407300559656e-09, "loss": 0.7668, "step": 27130 }, { "epoch": 0.9833278967779349, "grad_norm": 2.450468760541445, "learning_rate": 7.287529121692283e-09, "loss": 0.7911, "step": 27131 }, { "epoch": 0.9833641404805915, "grad_norm": 2.2444662537393745, "learning_rate": 7.255886309887605e-09, "loss": 1.0026, "step": 27132 }, { "epoch": 0.9834003841832482, "grad_norm": 2.600841027799082, "learning_rate": 7.224312295077141e-09, "loss": 0.9842, "step": 27133 }, { "epoch": 0.9834366278859048, "grad_norm": 2.8152188859954093, "learning_rate": 7.1928070776960955e-09, "loss": 0.9288, "step": 27134 }, { "epoch": 0.9834728715885614, "grad_norm": 2.713119675395692, "learning_rate": 7.161370658178013e-09, "loss": 1.0251, "step": 27135 }, { "epoch": 0.9835091152912182, "grad_norm": 2.321882444091402, "learning_rate": 7.130003036957545e-09, "loss": 0.7075, "step": 27136 }, { "epoch": 0.9835453589938749, "grad_norm": 2.451749316087343, "learning_rate": 7.098704214465457e-09, "loss": 0.8663, "step": 27137 }, { "epoch": 0.9835816026965315, "grad_norm": 2.4987042355503033, "learning_rate": 7.0674741911336275e-09, "loss": 1.1025, "step": 27138 }, { "epoch": 0.9836178463991881, "grad_norm": 2.2975345300859917, "learning_rate": 7.036312967392822e-09, "loss": 0.9251, "step": 27139 }, { "epoch": 0.9836540901018448, "grad_norm": 2.2306533922216047, "learning_rate": 7.0052205436715866e-09, "loss": 0.8567, "step": 27140 }, { "epoch": 0.9836903338045014, "grad_norm": 2.344001036603848, "learning_rate": 6.974196920399023e-09, "loss": 1.087, "step": 27141 }, { "epoch": 0.9837265775071581, "grad_norm": 2.6969367647298728, "learning_rate": 6.943242098002012e-09, "loss": 0.8323, "step": 27142 }, { "epoch": 0.9837628212098148, "grad_norm": 2.284139416228649, "learning_rate": 6.912356076907989e-09, "loss": 0.8041, "step": 27143 }, { "epoch": 0.9837990649124715, "grad_norm": 2.380669889017554, "learning_rate": 6.881538857541059e-09, "loss": 0.998, "step": 27144 }, { "epoch": 0.9838353086151281, "grad_norm": 2.0856375397100417, "learning_rate": 6.850790440327549e-09, "loss": 0.9468, "step": 27145 }, { "epoch": 0.9838715523177848, "grad_norm": 2.3006890714024633, "learning_rate": 6.820110825690451e-09, "loss": 0.7885, "step": 27146 }, { "epoch": 0.9839077960204414, "grad_norm": 2.175665967109636, "learning_rate": 6.789500014051653e-09, "loss": 0.8928, "step": 27147 }, { "epoch": 0.9839440397230981, "grad_norm": 2.2670920918517115, "learning_rate": 6.758958005834704e-09, "loss": 0.8531, "step": 27148 }, { "epoch": 0.9839802834257547, "grad_norm": 2.5353974805562967, "learning_rate": 6.728484801458712e-09, "loss": 1.0877, "step": 27149 }, { "epoch": 0.9840165271284115, "grad_norm": 2.2000169544981194, "learning_rate": 6.698080401345009e-09, "loss": 0.8147, "step": 27150 }, { "epoch": 0.9840527708310681, "grad_norm": 2.4638994763881583, "learning_rate": 6.667744805911591e-09, "loss": 1.0363, "step": 27151 }, { "epoch": 0.9840890145337248, "grad_norm": 2.345016010754244, "learning_rate": 6.63747801557646e-09, "loss": 0.9365, "step": 27152 }, { "epoch": 0.9841252582363814, "grad_norm": 2.150537418995592, "learning_rate": 6.6072800307576125e-09, "loss": 0.8405, "step": 27153 }, { "epoch": 0.9841615019390381, "grad_norm": 2.436692166900018, "learning_rate": 6.577150851870273e-09, "loss": 0.8963, "step": 27154 }, { "epoch": 0.9841977456416947, "grad_norm": 2.2206315531313905, "learning_rate": 6.547090479329665e-09, "loss": 0.7872, "step": 27155 }, { "epoch": 0.9842339893443515, "grad_norm": 2.251341523994701, "learning_rate": 6.517098913550457e-09, "loss": 0.9269, "step": 27156 }, { "epoch": 0.9842702330470081, "grad_norm": 2.6621645226642263, "learning_rate": 6.487176154945651e-09, "loss": 0.9302, "step": 27157 }, { "epoch": 0.9843064767496648, "grad_norm": 2.3900204097537094, "learning_rate": 6.457322203927696e-09, "loss": 0.7699, "step": 27158 }, { "epoch": 0.9843427204523214, "grad_norm": 2.254179948472216, "learning_rate": 6.427537060907374e-09, "loss": 0.8358, "step": 27159 }, { "epoch": 0.984378964154978, "grad_norm": 2.501021352911626, "learning_rate": 6.397820726296022e-09, "loss": 0.8995, "step": 27160 }, { "epoch": 0.9844152078576347, "grad_norm": 2.2292817003897896, "learning_rate": 6.368173200502204e-09, "loss": 0.9233, "step": 27161 }, { "epoch": 0.9844514515602913, "grad_norm": 2.5525183406717735, "learning_rate": 6.3385944839355895e-09, "loss": 0.8602, "step": 27162 }, { "epoch": 0.9844876952629481, "grad_norm": 2.241952568859759, "learning_rate": 6.309084577002522e-09, "loss": 0.7988, "step": 27163 }, { "epoch": 0.9845239389656048, "grad_norm": 2.329453999041306, "learning_rate": 6.279643480110453e-09, "loss": 0.923, "step": 27164 }, { "epoch": 0.9845601826682614, "grad_norm": 2.608956481666754, "learning_rate": 6.250271193664615e-09, "loss": 0.9257, "step": 27165 }, { "epoch": 0.984596426370918, "grad_norm": 2.3376270030001756, "learning_rate": 6.2209677180696814e-09, "loss": 0.9099, "step": 27166 }, { "epoch": 0.9846326700735747, "grad_norm": 2.5580185956293664, "learning_rate": 6.191733053730331e-09, "loss": 0.9101, "step": 27167 }, { "epoch": 0.9846689137762313, "grad_norm": 2.3923866301161434, "learning_rate": 6.1625672010484635e-09, "loss": 0.9388, "step": 27168 }, { "epoch": 0.9847051574788881, "grad_norm": 2.297936292703622, "learning_rate": 6.133470160425981e-09, "loss": 0.9792, "step": 27169 }, { "epoch": 0.9847414011815447, "grad_norm": 2.259756669359482, "learning_rate": 6.104441932264227e-09, "loss": 0.844, "step": 27170 }, { "epoch": 0.9847776448842014, "grad_norm": 2.0115759002066325, "learning_rate": 6.0754825169628826e-09, "loss": 0.6249, "step": 27171 }, { "epoch": 0.984813888586858, "grad_norm": 2.23755810275128, "learning_rate": 6.046591914921629e-09, "loss": 0.8507, "step": 27172 }, { "epoch": 0.9848501322895147, "grad_norm": 2.2149176613812465, "learning_rate": 6.017770126537925e-09, "loss": 0.8862, "step": 27173 }, { "epoch": 0.9848863759921713, "grad_norm": 2.3800351528657346, "learning_rate": 5.989017152209231e-09, "loss": 0.7994, "step": 27174 }, { "epoch": 0.984922619694828, "grad_norm": 2.415500324025337, "learning_rate": 5.960332992331341e-09, "loss": 0.8611, "step": 27175 }, { "epoch": 0.9849588633974847, "grad_norm": 2.5713883526350996, "learning_rate": 5.931717647299495e-09, "loss": 0.8896, "step": 27176 }, { "epoch": 0.9849951071001414, "grad_norm": 2.6345952536577886, "learning_rate": 5.903171117508932e-09, "loss": 0.8705, "step": 27177 }, { "epoch": 0.985031350802798, "grad_norm": 2.141457143823081, "learning_rate": 5.874693403351561e-09, "loss": 0.8132, "step": 27178 }, { "epoch": 0.9850675945054547, "grad_norm": 2.6368941396705097, "learning_rate": 5.846284505220956e-09, "loss": 0.8275, "step": 27179 }, { "epoch": 0.9851038382081113, "grad_norm": 2.1827708924842164, "learning_rate": 5.8179444235084706e-09, "loss": 0.9984, "step": 27180 }, { "epoch": 0.985140081910768, "grad_norm": 2.357726940665655, "learning_rate": 5.789673158603793e-09, "loss": 0.8407, "step": 27181 }, { "epoch": 0.9851763256134247, "grad_norm": 2.35866177990812, "learning_rate": 5.7614707108971655e-09, "loss": 0.8968, "step": 27182 }, { "epoch": 0.9852125693160814, "grad_norm": 2.21136813392235, "learning_rate": 5.733337080777168e-09, "loss": 0.8819, "step": 27183 }, { "epoch": 0.985248813018738, "grad_norm": 2.335899250062486, "learning_rate": 5.705272268631823e-09, "loss": 0.8677, "step": 27184 }, { "epoch": 0.9852850567213947, "grad_norm": 2.3019914572780302, "learning_rate": 5.677276274846932e-09, "loss": 0.762, "step": 27185 }, { "epoch": 0.9853213004240513, "grad_norm": 2.311902492865915, "learning_rate": 5.649349099808299e-09, "loss": 0.7817, "step": 27186 }, { "epoch": 0.985357544126708, "grad_norm": 2.4482570998648585, "learning_rate": 5.6214907439017255e-09, "loss": 0.9099, "step": 27187 }, { "epoch": 0.9853937878293646, "grad_norm": 2.441695503038015, "learning_rate": 5.5937012075102385e-09, "loss": 0.9366, "step": 27188 }, { "epoch": 0.9854300315320214, "grad_norm": 2.124337522824402, "learning_rate": 5.5659804910168645e-09, "loss": 0.8651, "step": 27189 }, { "epoch": 0.985466275234678, "grad_norm": 2.286918103265974, "learning_rate": 5.538328594804077e-09, "loss": 0.9139, "step": 27190 }, { "epoch": 0.9855025189373346, "grad_norm": 2.3954266034091893, "learning_rate": 5.5107455192526805e-09, "loss": 0.9631, "step": 27191 }, { "epoch": 0.9855387626399913, "grad_norm": 2.1368013929639775, "learning_rate": 5.483231264742372e-09, "loss": 0.9742, "step": 27192 }, { "epoch": 0.9855750063426479, "grad_norm": 2.130473314710875, "learning_rate": 5.455785831652294e-09, "loss": 0.9206, "step": 27193 }, { "epoch": 0.9856112500453046, "grad_norm": 2.393356499677918, "learning_rate": 5.4284092203615856e-09, "loss": 0.8655, "step": 27194 }, { "epoch": 0.9856474937479613, "grad_norm": 2.38815584184877, "learning_rate": 5.4011014312460585e-09, "loss": 0.7474, "step": 27195 }, { "epoch": 0.985683737450618, "grad_norm": 2.5353483025330217, "learning_rate": 5.373862464683189e-09, "loss": 0.8705, "step": 27196 }, { "epoch": 0.9857199811532746, "grad_norm": 2.348315049022983, "learning_rate": 5.346692321047675e-09, "loss": 0.8198, "step": 27197 }, { "epoch": 0.9857562248559313, "grad_norm": 2.248140421375514, "learning_rate": 5.319591000714219e-09, "loss": 0.8794, "step": 27198 }, { "epoch": 0.9857924685585879, "grad_norm": 2.46313292233538, "learning_rate": 5.292558504055856e-09, "loss": 0.9005, "step": 27199 }, { "epoch": 0.9858287122612446, "grad_norm": 2.2192921871963778, "learning_rate": 5.26559483144562e-09, "loss": 1.0809, "step": 27200 }, { "epoch": 0.9858649559639012, "grad_norm": 2.458290478030316, "learning_rate": 5.238699983254325e-09, "loss": 0.9183, "step": 27201 }, { "epoch": 0.985901199666558, "grad_norm": 2.491628924748589, "learning_rate": 5.211873959853342e-09, "loss": 0.8124, "step": 27202 }, { "epoch": 0.9859374433692146, "grad_norm": 2.5124801641718686, "learning_rate": 5.185116761611819e-09, "loss": 0.9545, "step": 27203 }, { "epoch": 0.9859736870718713, "grad_norm": 2.4543705208087747, "learning_rate": 5.158428388898906e-09, "loss": 0.8877, "step": 27204 }, { "epoch": 0.9860099307745279, "grad_norm": 2.5425145802676954, "learning_rate": 5.131808842082087e-09, "loss": 0.6986, "step": 27205 }, { "epoch": 0.9860461744771846, "grad_norm": 2.155315496414346, "learning_rate": 5.105258121527734e-09, "loss": 0.7513, "step": 27206 }, { "epoch": 0.9860824181798412, "grad_norm": 2.5880552278156626, "learning_rate": 5.078776227602222e-09, "loss": 0.9349, "step": 27207 }, { "epoch": 0.986118661882498, "grad_norm": 2.4722283674168546, "learning_rate": 5.052363160670815e-09, "loss": 1.0011, "step": 27208 }, { "epoch": 0.9861549055851546, "grad_norm": 2.2995852185577856, "learning_rate": 5.026018921096553e-09, "loss": 0.8343, "step": 27209 }, { "epoch": 0.9861911492878113, "grad_norm": 2.279952744140477, "learning_rate": 4.999743509243038e-09, "loss": 0.9452, "step": 27210 }, { "epoch": 0.9862273929904679, "grad_norm": 2.155950810209917, "learning_rate": 4.9735369254722e-09, "loss": 0.8284, "step": 27211 }, { "epoch": 0.9862636366931246, "grad_norm": 2.4676638232194725, "learning_rate": 4.947399170144862e-09, "loss": 0.9931, "step": 27212 }, { "epoch": 0.9862998803957812, "grad_norm": 2.157716047163709, "learning_rate": 4.921330243621847e-09, "loss": 0.8455, "step": 27213 }, { "epoch": 0.9863361240984378, "grad_norm": 2.441262026316574, "learning_rate": 4.895330146261756e-09, "loss": 0.8485, "step": 27214 }, { "epoch": 0.9863723678010946, "grad_norm": 2.4367196068994583, "learning_rate": 4.8693988784226374e-09, "loss": 0.9196, "step": 27215 }, { "epoch": 0.9864086115037513, "grad_norm": 2.3162934710906034, "learning_rate": 4.843536440463093e-09, "loss": 0.8803, "step": 27216 }, { "epoch": 0.9864448552064079, "grad_norm": 2.1688822344095824, "learning_rate": 4.817742832737837e-09, "loss": 0.743, "step": 27217 }, { "epoch": 0.9864810989090645, "grad_norm": 2.561457011654579, "learning_rate": 4.792018055603809e-09, "loss": 0.773, "step": 27218 }, { "epoch": 0.9865173426117212, "grad_norm": 2.620016237922411, "learning_rate": 4.766362109414058e-09, "loss": 0.9486, "step": 27219 }, { "epoch": 0.9865535863143778, "grad_norm": 2.7707816174299573, "learning_rate": 4.7407749945233e-09, "loss": 1.0314, "step": 27220 }, { "epoch": 0.9865898300170345, "grad_norm": 2.2187755885849265, "learning_rate": 4.7152567112834776e-09, "loss": 0.7743, "step": 27221 }, { "epoch": 0.9866260737196912, "grad_norm": 2.48016155251585, "learning_rate": 4.68980726004653e-09, "loss": 0.8705, "step": 27222 }, { "epoch": 0.9866623174223479, "grad_norm": 2.7029828028855087, "learning_rate": 4.664426641162734e-09, "loss": 0.8071, "step": 27223 }, { "epoch": 0.9866985611250045, "grad_norm": 2.4401107411448364, "learning_rate": 4.639114854982363e-09, "loss": 0.8903, "step": 27224 }, { "epoch": 0.9867348048276612, "grad_norm": 2.18573211427401, "learning_rate": 4.613871901853472e-09, "loss": 0.9117, "step": 27225 }, { "epoch": 0.9867710485303178, "grad_norm": 2.5599047425733272, "learning_rate": 4.5886977821246735e-09, "loss": 0.8283, "step": 27226 }, { "epoch": 0.9868072922329745, "grad_norm": 2.5939617876568994, "learning_rate": 4.563592496142355e-09, "loss": 0.7122, "step": 27227 }, { "epoch": 0.9868435359356312, "grad_norm": 2.3903928670480292, "learning_rate": 4.538556044252906e-09, "loss": 0.8754, "step": 27228 }, { "epoch": 0.9868797796382879, "grad_norm": 2.167240188186426, "learning_rate": 4.513588426800497e-09, "loss": 0.7172, "step": 27229 }, { "epoch": 0.9869160233409445, "grad_norm": 2.4560673887475897, "learning_rate": 4.488689644129851e-09, "loss": 0.8745, "step": 27230 }, { "epoch": 0.9869522670436012, "grad_norm": 2.4781956136688628, "learning_rate": 4.463859696584027e-09, "loss": 0.905, "step": 27231 }, { "epoch": 0.9869885107462578, "grad_norm": 2.1065608900653525, "learning_rate": 4.439098584504975e-09, "loss": 0.6894, "step": 27232 }, { "epoch": 0.9870247544489145, "grad_norm": 2.206315673786684, "learning_rate": 4.414406308233532e-09, "loss": 0.8377, "step": 27233 }, { "epoch": 0.9870609981515711, "grad_norm": 2.6612720200081594, "learning_rate": 4.389782868110537e-09, "loss": 0.9141, "step": 27234 }, { "epoch": 0.9870972418542279, "grad_norm": 2.4050988817940757, "learning_rate": 4.365228264475163e-09, "loss": 0.8193, "step": 27235 }, { "epoch": 0.9871334855568845, "grad_norm": 2.3950755536301296, "learning_rate": 4.3407424976660284e-09, "loss": 0.8494, "step": 27236 }, { "epoch": 0.9871697292595412, "grad_norm": 2.4695290779683323, "learning_rate": 4.316325568019531e-09, "loss": 0.8634, "step": 27237 }, { "epoch": 0.9872059729621978, "grad_norm": 2.331975836830496, "learning_rate": 4.291977475873177e-09, "loss": 0.9266, "step": 27238 }, { "epoch": 0.9872422166648545, "grad_norm": 2.825996424509275, "learning_rate": 4.2676982215622555e-09, "loss": 0.8527, "step": 27239 }, { "epoch": 0.9872784603675111, "grad_norm": 2.2885758440966253, "learning_rate": 4.2434878054203875e-09, "loss": 0.8242, "step": 27240 }, { "epoch": 0.9873147040701679, "grad_norm": 2.3501202507012824, "learning_rate": 4.219346227782861e-09, "loss": 0.9631, "step": 27241 }, { "epoch": 0.9873509477728245, "grad_norm": 2.8083354262400446, "learning_rate": 4.195273488980522e-09, "loss": 0.8382, "step": 27242 }, { "epoch": 0.9873871914754812, "grad_norm": 2.6310362353974357, "learning_rate": 4.171269589346439e-09, "loss": 0.8481, "step": 27243 }, { "epoch": 0.9874234351781378, "grad_norm": 2.35367268801307, "learning_rate": 4.1473345292109e-09, "loss": 0.8298, "step": 27244 }, { "epoch": 0.9874596788807944, "grad_norm": 2.57627597209804, "learning_rate": 4.123468308903644e-09, "loss": 1.0686, "step": 27245 }, { "epoch": 0.9874959225834511, "grad_norm": 2.3949588495389347, "learning_rate": 4.0996709287532965e-09, "loss": 0.9668, "step": 27246 }, { "epoch": 0.9875321662861077, "grad_norm": 2.4368334071599724, "learning_rate": 4.075942389088483e-09, "loss": 0.9513, "step": 27247 }, { "epoch": 0.9875684099887645, "grad_norm": 2.2947540205615087, "learning_rate": 4.05228269023561e-09, "loss": 0.7504, "step": 27248 }, { "epoch": 0.9876046536914211, "grad_norm": 2.2154133540964955, "learning_rate": 4.028691832520526e-09, "loss": 0.7286, "step": 27249 }, { "epoch": 0.9876408973940778, "grad_norm": 2.1375513529423302, "learning_rate": 4.005169816269083e-09, "loss": 1.0272, "step": 27250 }, { "epoch": 0.9876771410967344, "grad_norm": 2.298732344791394, "learning_rate": 3.981716641804356e-09, "loss": 1.0442, "step": 27251 }, { "epoch": 0.9877133847993911, "grad_norm": 2.6314833847472445, "learning_rate": 3.95833230945053e-09, "loss": 0.9402, "step": 27252 }, { "epoch": 0.9877496285020477, "grad_norm": 2.5518848147691733, "learning_rate": 3.935016819529569e-09, "loss": 0.969, "step": 27253 }, { "epoch": 0.9877858722047045, "grad_norm": 2.487707470799133, "learning_rate": 3.911770172361773e-09, "loss": 0.785, "step": 27254 }, { "epoch": 0.9878221159073611, "grad_norm": 2.407951011262497, "learning_rate": 3.888592368269106e-09, "loss": 0.9187, "step": 27255 }, { "epoch": 0.9878583596100178, "grad_norm": 2.7074953726917297, "learning_rate": 3.865483407569648e-09, "loss": 0.9297, "step": 27256 }, { "epoch": 0.9878946033126744, "grad_norm": 2.4533719002163705, "learning_rate": 3.842443290582032e-09, "loss": 0.8056, "step": 27257 }, { "epoch": 0.9879308470153311, "grad_norm": 2.671782956592897, "learning_rate": 3.819472017624337e-09, "loss": 0.8863, "step": 27258 }, { "epoch": 0.9879670907179877, "grad_norm": 2.187355973903963, "learning_rate": 3.796569589012422e-09, "loss": 0.6937, "step": 27259 }, { "epoch": 0.9880033344206444, "grad_norm": 2.2672121628745727, "learning_rate": 3.7737360050627e-09, "loss": 1.0212, "step": 27260 }, { "epoch": 0.9880395781233011, "grad_norm": 2.3498068579569176, "learning_rate": 3.75097126608881e-09, "loss": 0.8545, "step": 27261 }, { "epoch": 0.9880758218259578, "grad_norm": 2.5005728215470664, "learning_rate": 3.728275372404943e-09, "loss": 0.9138, "step": 27262 }, { "epoch": 0.9881120655286144, "grad_norm": 2.5105267193304, "learning_rate": 3.7056483243241846e-09, "loss": 0.6477, "step": 27263 }, { "epoch": 0.9881483092312711, "grad_norm": 2.3921299594721543, "learning_rate": 3.6830901221573955e-09, "loss": 0.8518, "step": 27264 }, { "epoch": 0.9881845529339277, "grad_norm": 2.2685227647997896, "learning_rate": 3.660600766216549e-09, "loss": 1.1314, "step": 27265 }, { "epoch": 0.9882207966365844, "grad_norm": 2.416952780965497, "learning_rate": 3.6381802568102864e-09, "loss": 0.7045, "step": 27266 }, { "epoch": 0.9882570403392411, "grad_norm": 2.034601885472288, "learning_rate": 3.6158285942483608e-09, "loss": 0.9032, "step": 27267 }, { "epoch": 0.9882932840418978, "grad_norm": 2.2556746106917593, "learning_rate": 3.5935457788388585e-09, "loss": 0.7691, "step": 27268 }, { "epoch": 0.9883295277445544, "grad_norm": 2.2810070981116515, "learning_rate": 3.5713318108882012e-09, "loss": 0.8003, "step": 27269 }, { "epoch": 0.988365771447211, "grad_norm": 2.0613726121600457, "learning_rate": 3.5491866907033655e-09, "loss": 0.7257, "step": 27270 }, { "epoch": 0.9884020151498677, "grad_norm": 2.1912735303999535, "learning_rate": 3.527110418588553e-09, "loss": 0.767, "step": 27271 }, { "epoch": 0.9884382588525243, "grad_norm": 2.468448003456686, "learning_rate": 3.5051029948485193e-09, "loss": 0.8655, "step": 27272 }, { "epoch": 0.988474502555181, "grad_norm": 2.6395145527649513, "learning_rate": 3.483164419786356e-09, "loss": 0.869, "step": 27273 }, { "epoch": 0.9885107462578377, "grad_norm": 2.5504029333428457, "learning_rate": 3.4612946937045976e-09, "loss": 0.9945, "step": 27274 }, { "epoch": 0.9885469899604944, "grad_norm": 2.2586087829290467, "learning_rate": 3.439493816904116e-09, "loss": 0.9012, "step": 27275 }, { "epoch": 0.988583233663151, "grad_norm": 2.202053524213306, "learning_rate": 3.4177617896857805e-09, "loss": 0.8928, "step": 27276 }, { "epoch": 0.9886194773658077, "grad_norm": 2.47736003287269, "learning_rate": 3.3960986123487972e-09, "loss": 0.936, "step": 27277 }, { "epoch": 0.9886557210684643, "grad_norm": 2.2688116830912812, "learning_rate": 3.374504285191815e-09, "loss": 0.8653, "step": 27278 }, { "epoch": 0.988691964771121, "grad_norm": 2.3041053856497724, "learning_rate": 3.3529788085118198e-09, "loss": 0.8222, "step": 27279 }, { "epoch": 0.9887282084737777, "grad_norm": 2.497104017168215, "learning_rate": 3.3315221826063504e-09, "loss": 0.8189, "step": 27280 }, { "epoch": 0.9887644521764344, "grad_norm": 2.2051732700377453, "learning_rate": 3.3101344077707264e-09, "loss": 0.8009, "step": 27281 }, { "epoch": 0.988800695879091, "grad_norm": 2.6215650172188654, "learning_rate": 3.288815484299157e-09, "loss": 0.7944, "step": 27282 }, { "epoch": 0.9888369395817477, "grad_norm": 2.2827106982918877, "learning_rate": 3.267565412485851e-09, "loss": 0.7835, "step": 27283 }, { "epoch": 0.9888731832844043, "grad_norm": 2.4402036766933093, "learning_rate": 3.2463841926239083e-09, "loss": 0.9974, "step": 27284 }, { "epoch": 0.988909426987061, "grad_norm": 2.0208611236466005, "learning_rate": 3.225271825004761e-09, "loss": 0.9763, "step": 27285 }, { "epoch": 0.9889456706897176, "grad_norm": 2.3626121809004608, "learning_rate": 3.204228309919288e-09, "loss": 0.7574, "step": 27286 }, { "epoch": 0.9889819143923744, "grad_norm": 2.585751596452476, "learning_rate": 3.1832536476572583e-09, "loss": 0.991, "step": 27287 }, { "epoch": 0.989018158095031, "grad_norm": 2.45107057149207, "learning_rate": 3.1623478385084393e-09, "loss": 0.9222, "step": 27288 }, { "epoch": 0.9890544017976877, "grad_norm": 2.2185553973921928, "learning_rate": 3.141510882760379e-09, "loss": 0.7134, "step": 27289 }, { "epoch": 0.9890906455003443, "grad_norm": 2.3110559427818536, "learning_rate": 3.1207427807006254e-09, "loss": 0.9857, "step": 27290 }, { "epoch": 0.989126889203001, "grad_norm": 2.6057502373051156, "learning_rate": 3.100043532614505e-09, "loss": 0.8799, "step": 27291 }, { "epoch": 0.9891631329056576, "grad_norm": 2.290021163560224, "learning_rate": 3.079413138787901e-09, "loss": 0.8949, "step": 27292 }, { "epoch": 0.9891993766083143, "grad_norm": 2.4393431940322756, "learning_rate": 3.0588515995050306e-09, "loss": 0.9936, "step": 27293 }, { "epoch": 0.989235620310971, "grad_norm": 2.234064109776484, "learning_rate": 3.038358915049e-09, "loss": 0.9273, "step": 27294 }, { "epoch": 0.9892718640136277, "grad_norm": 2.3386048101775634, "learning_rate": 3.0179350857029165e-09, "loss": 1.1971, "step": 27295 }, { "epoch": 0.9893081077162843, "grad_norm": 2.1746675074281097, "learning_rate": 2.9975801117465565e-09, "loss": 0.8376, "step": 27296 }, { "epoch": 0.989344351418941, "grad_norm": 2.31294127016231, "learning_rate": 2.9772939934624713e-09, "loss": 0.7603, "step": 27297 }, { "epoch": 0.9893805951215976, "grad_norm": 2.4444497267740997, "learning_rate": 2.957076731128217e-09, "loss": 0.8912, "step": 27298 }, { "epoch": 0.9894168388242542, "grad_norm": 2.4271924671576315, "learning_rate": 2.9369283250241243e-09, "loss": 0.8277, "step": 27299 }, { "epoch": 0.989453082526911, "grad_norm": 2.2967184591701972, "learning_rate": 2.9168487754266394e-09, "loss": 0.8813, "step": 27300 }, { "epoch": 0.9894893262295676, "grad_norm": 2.4231862584665302, "learning_rate": 2.8968380826127627e-09, "loss": 0.8332, "step": 27301 }, { "epoch": 0.9895255699322243, "grad_norm": 2.2837296971169536, "learning_rate": 2.87689624685894e-09, "loss": 0.8716, "step": 27302 }, { "epoch": 0.9895618136348809, "grad_norm": 2.363003539329173, "learning_rate": 2.857023268438841e-09, "loss": 0.7318, "step": 27303 }, { "epoch": 0.9895980573375376, "grad_norm": 2.2169903549272956, "learning_rate": 2.837219147626691e-09, "loss": 0.8186, "step": 27304 }, { "epoch": 0.9896343010401942, "grad_norm": 2.3402802828352094, "learning_rate": 2.8174838846956045e-09, "loss": 0.8255, "step": 27305 }, { "epoch": 0.9896705447428509, "grad_norm": 2.5110214865713885, "learning_rate": 2.797817479917031e-09, "loss": 0.8335, "step": 27306 }, { "epoch": 0.9897067884455076, "grad_norm": 2.864553565322187, "learning_rate": 2.7782199335629757e-09, "loss": 0.8578, "step": 27307 }, { "epoch": 0.9897430321481643, "grad_norm": 2.1150733573480855, "learning_rate": 2.7586912459026672e-09, "loss": 0.6912, "step": 27308 }, { "epoch": 0.9897792758508209, "grad_norm": 2.4545123203292487, "learning_rate": 2.7392314172047797e-09, "loss": 0.7807, "step": 27309 }, { "epoch": 0.9898155195534776, "grad_norm": 2.5384686418160403, "learning_rate": 2.719840447738542e-09, "loss": 0.9281, "step": 27310 }, { "epoch": 0.9898517632561342, "grad_norm": 2.595836295503162, "learning_rate": 2.700518337770408e-09, "loss": 0.7737, "step": 27311 }, { "epoch": 0.9898880069587909, "grad_norm": 2.308001583420447, "learning_rate": 2.6812650875673863e-09, "loss": 0.8773, "step": 27312 }, { "epoch": 0.9899242506614476, "grad_norm": 2.0568924811021514, "learning_rate": 2.6620806973937096e-09, "loss": 0.6953, "step": 27313 }, { "epoch": 0.9899604943641043, "grad_norm": 2.458342514547041, "learning_rate": 2.6429651675141666e-09, "loss": 0.8719, "step": 27314 }, { "epoch": 0.9899967380667609, "grad_norm": 2.502411165937185, "learning_rate": 2.6239184981929897e-09, "loss": 0.9768, "step": 27315 }, { "epoch": 0.9900329817694176, "grad_norm": 2.830148442487658, "learning_rate": 2.6049406896910822e-09, "loss": 0.8926, "step": 27316 }, { "epoch": 0.9900692254720742, "grad_norm": 2.57429566683946, "learning_rate": 2.5860317422710115e-09, "loss": 0.9199, "step": 27317 }, { "epoch": 0.9901054691747309, "grad_norm": 2.3820060485095915, "learning_rate": 2.5671916561936793e-09, "loss": 0.7207, "step": 27318 }, { "epoch": 0.9901417128773875, "grad_norm": 2.299709504835753, "learning_rate": 2.5484204317172135e-09, "loss": 0.6561, "step": 27319 }, { "epoch": 0.9901779565800443, "grad_norm": 2.3552054883381013, "learning_rate": 2.5297180691019607e-09, "loss": 0.8341, "step": 27320 }, { "epoch": 0.9902142002827009, "grad_norm": 2.3493832531530128, "learning_rate": 2.511084568604383e-09, "loss": 0.5925, "step": 27321 }, { "epoch": 0.9902504439853576, "grad_norm": 2.4530577416151975, "learning_rate": 2.4925199304814963e-09, "loss": 0.9096, "step": 27322 }, { "epoch": 0.9902866876880142, "grad_norm": 2.3517920082409574, "learning_rate": 2.4740241549892075e-09, "loss": 0.7159, "step": 27323 }, { "epoch": 0.9903229313906708, "grad_norm": 2.643629615644934, "learning_rate": 2.4555972423828677e-09, "loss": 0.7197, "step": 27324 }, { "epoch": 0.9903591750933275, "grad_norm": 2.3538517467320514, "learning_rate": 2.4372391929161633e-09, "loss": 0.9452, "step": 27325 }, { "epoch": 0.9903954187959843, "grad_norm": 2.3585031663314466, "learning_rate": 2.4189500068416694e-09, "loss": 0.893, "step": 27326 }, { "epoch": 0.9904316624986409, "grad_norm": 2.3492258195505307, "learning_rate": 2.400729684411407e-09, "loss": 0.6613, "step": 27327 }, { "epoch": 0.9904679062012975, "grad_norm": 2.2520596110476614, "learning_rate": 2.3825782258768413e-09, "loss": 0.94, "step": 27328 }, { "epoch": 0.9905041499039542, "grad_norm": 2.6270405814215665, "learning_rate": 2.3644956314877733e-09, "loss": 0.8146, "step": 27329 }, { "epoch": 0.9905403936066108, "grad_norm": 2.4093876251108575, "learning_rate": 2.346481901493447e-09, "loss": 0.8244, "step": 27330 }, { "epoch": 0.9905766373092675, "grad_norm": 2.7204158281973125, "learning_rate": 2.3285370361419977e-09, "loss": 0.8238, "step": 27331 }, { "epoch": 0.9906128810119241, "grad_norm": 2.3728422942856073, "learning_rate": 2.3106610356810055e-09, "loss": 0.8982, "step": 27332 }, { "epoch": 0.9906491247145809, "grad_norm": 2.4495186404785048, "learning_rate": 2.292853900356384e-09, "loss": 0.8956, "step": 27333 }, { "epoch": 0.9906853684172375, "grad_norm": 2.4794523862123445, "learning_rate": 2.2751156304140486e-09, "loss": 0.884, "step": 27334 }, { "epoch": 0.9907216121198942, "grad_norm": 2.354937323951898, "learning_rate": 2.2574462260976926e-09, "loss": 0.8191, "step": 27335 }, { "epoch": 0.9907578558225508, "grad_norm": 2.3277489002747536, "learning_rate": 2.239845687651565e-09, "loss": 0.8876, "step": 27336 }, { "epoch": 0.9907940995252075, "grad_norm": 2.462027974064841, "learning_rate": 2.2223140153176946e-09, "loss": 1.1749, "step": 27337 }, { "epoch": 0.9908303432278641, "grad_norm": 2.522508332476839, "learning_rate": 2.204851209337555e-09, "loss": 0.8517, "step": 27338 }, { "epoch": 0.9908665869305209, "grad_norm": 2.309479265936391, "learning_rate": 2.1874572699526198e-09, "loss": 0.8948, "step": 27339 }, { "epoch": 0.9909028306331775, "grad_norm": 2.428744718721809, "learning_rate": 2.1701321974015863e-09, "loss": 0.67, "step": 27340 }, { "epoch": 0.9909390743358342, "grad_norm": 2.313223349227192, "learning_rate": 2.1528759919231534e-09, "loss": 1.0159, "step": 27341 }, { "epoch": 0.9909753180384908, "grad_norm": 2.4499808639531144, "learning_rate": 2.1356886537560184e-09, "loss": 0.9768, "step": 27342 }, { "epoch": 0.9910115617411475, "grad_norm": 2.1949363902291137, "learning_rate": 2.118570183136104e-09, "loss": 1.0115, "step": 27343 }, { "epoch": 0.9910478054438041, "grad_norm": 2.3038283033944422, "learning_rate": 2.1015205802998874e-09, "loss": 0.791, "step": 27344 }, { "epoch": 0.9910840491464608, "grad_norm": 2.4834544164311256, "learning_rate": 2.084539845482181e-09, "loss": 1.0484, "step": 27345 }, { "epoch": 0.9911202928491175, "grad_norm": 2.2959767164733837, "learning_rate": 2.0676279789166866e-09, "loss": 0.8518, "step": 27346 }, { "epoch": 0.9911565365517742, "grad_norm": 2.45428980929188, "learning_rate": 2.050784980836551e-09, "loss": 0.903, "step": 27347 }, { "epoch": 0.9911927802544308, "grad_norm": 2.4543066739117467, "learning_rate": 2.0340108514743663e-09, "loss": 0.9445, "step": 27348 }, { "epoch": 0.9912290239570875, "grad_norm": 2.4114816681461724, "learning_rate": 2.0173055910605034e-09, "loss": 0.9502, "step": 27349 }, { "epoch": 0.9912652676597441, "grad_norm": 1.9907677663616192, "learning_rate": 2.0006691998253334e-09, "loss": 0.8993, "step": 27350 }, { "epoch": 0.9913015113624007, "grad_norm": 2.484634955979756, "learning_rate": 1.984101677998673e-09, "loss": 0.781, "step": 27351 }, { "epoch": 0.9913377550650574, "grad_norm": 2.654307246567949, "learning_rate": 1.967603025808118e-09, "loss": 0.9407, "step": 27352 }, { "epoch": 0.9913739987677141, "grad_norm": 2.230206165968961, "learning_rate": 1.9511732434812636e-09, "loss": 0.6296, "step": 27353 }, { "epoch": 0.9914102424703708, "grad_norm": 2.444756854758777, "learning_rate": 1.9348123312445955e-09, "loss": 0.6995, "step": 27354 }, { "epoch": 0.9914464861730274, "grad_norm": 2.366154307620676, "learning_rate": 1.918520289323489e-09, "loss": 0.8783, "step": 27355 }, { "epoch": 0.9914827298756841, "grad_norm": 2.475326313788575, "learning_rate": 1.902297117942209e-09, "loss": 0.8741, "step": 27356 }, { "epoch": 0.9915189735783407, "grad_norm": 2.6635779908028523, "learning_rate": 1.8861428173250207e-09, "loss": 1.0139, "step": 27357 }, { "epoch": 0.9915552172809974, "grad_norm": 2.6693959748962537, "learning_rate": 1.8700573876934137e-09, "loss": 0.906, "step": 27358 }, { "epoch": 0.9915914609836541, "grad_norm": 2.167232619465893, "learning_rate": 1.8540408292699874e-09, "loss": 0.9242, "step": 27359 }, { "epoch": 0.9916277046863108, "grad_norm": 2.3912352905617236, "learning_rate": 1.8380931422751213e-09, "loss": 0.9148, "step": 27360 }, { "epoch": 0.9916639483889674, "grad_norm": 2.4516848530094313, "learning_rate": 1.8222143269286396e-09, "loss": 0.7754, "step": 27361 }, { "epoch": 0.9917001920916241, "grad_norm": 2.4953854397227233, "learning_rate": 1.806404383448701e-09, "loss": 0.8832, "step": 27362 }, { "epoch": 0.9917364357942807, "grad_norm": 2.524605504387129, "learning_rate": 1.7906633120540195e-09, "loss": 0.9225, "step": 27363 }, { "epoch": 0.9917726794969374, "grad_norm": 2.5854780117909453, "learning_rate": 1.7749911129616437e-09, "loss": 0.8413, "step": 27364 }, { "epoch": 0.991808923199594, "grad_norm": 2.490726300195544, "learning_rate": 1.7593877863864017e-09, "loss": 0.7416, "step": 27365 }, { "epoch": 0.9918451669022508, "grad_norm": 2.301564027410254, "learning_rate": 1.7438533325442319e-09, "loss": 0.6589, "step": 27366 }, { "epoch": 0.9918814106049074, "grad_norm": 2.4608300261761626, "learning_rate": 1.7283877516488524e-09, "loss": 0.9126, "step": 27367 }, { "epoch": 0.9919176543075641, "grad_norm": 2.151998942009448, "learning_rate": 1.7129910439134256e-09, "loss": 0.7766, "step": 27368 }, { "epoch": 0.9919538980102207, "grad_norm": 2.3172705502345305, "learning_rate": 1.6976632095505596e-09, "loss": 0.843, "step": 27369 }, { "epoch": 0.9919901417128774, "grad_norm": 2.765186874825296, "learning_rate": 1.6824042487700865e-09, "loss": 0.9679, "step": 27370 }, { "epoch": 0.992026385415534, "grad_norm": 2.592624140864522, "learning_rate": 1.6672141617840588e-09, "loss": 1.0918, "step": 27371 }, { "epoch": 0.9920626291181908, "grad_norm": 2.2092229351638277, "learning_rate": 1.6520929488006433e-09, "loss": 0.6975, "step": 27372 }, { "epoch": 0.9920988728208474, "grad_norm": 2.507134822535017, "learning_rate": 1.637040610028562e-09, "loss": 1.0554, "step": 27373 }, { "epoch": 0.9921351165235041, "grad_norm": 2.3355002680526, "learning_rate": 1.6220571456748713e-09, "loss": 0.8618, "step": 27374 }, { "epoch": 0.9921713602261607, "grad_norm": 2.421327942306995, "learning_rate": 1.607142555946628e-09, "loss": 0.86, "step": 27375 }, { "epoch": 0.9922076039288173, "grad_norm": 2.3587352471461704, "learning_rate": 1.5922968410486683e-09, "loss": 0.7615, "step": 27376 }, { "epoch": 0.992243847631474, "grad_norm": 2.306844352224924, "learning_rate": 1.5775200011858283e-09, "loss": 0.747, "step": 27377 }, { "epoch": 0.9922800913341306, "grad_norm": 2.250937471096398, "learning_rate": 1.5628120365623888e-09, "loss": 0.8171, "step": 27378 }, { "epoch": 0.9923163350367874, "grad_norm": 2.065690455868502, "learning_rate": 1.5481729473798557e-09, "loss": 0.9056, "step": 27379 }, { "epoch": 0.992352578739444, "grad_norm": 2.9474711845868335, "learning_rate": 1.5336027338408443e-09, "loss": 1.1086, "step": 27380 }, { "epoch": 0.9923888224421007, "grad_norm": 2.2450054794465837, "learning_rate": 1.5191013961457502e-09, "loss": 0.7376, "step": 27381 }, { "epoch": 0.9924250661447573, "grad_norm": 2.0948801720630046, "learning_rate": 1.5046689344949682e-09, "loss": 0.9078, "step": 27382 }, { "epoch": 0.992461309847414, "grad_norm": 2.5470564822019175, "learning_rate": 1.4903053490861186e-09, "loss": 0.7817, "step": 27383 }, { "epoch": 0.9924975535500706, "grad_norm": 2.4547771877492925, "learning_rate": 1.4760106401179308e-09, "loss": 0.862, "step": 27384 }, { "epoch": 0.9925337972527274, "grad_norm": 2.3862675697081097, "learning_rate": 1.4617848077874697e-09, "loss": 0.8781, "step": 27385 }, { "epoch": 0.992570040955384, "grad_norm": 2.275327891065778, "learning_rate": 1.4476278522906894e-09, "loss": 1.044, "step": 27386 }, { "epoch": 0.9926062846580407, "grad_norm": 2.5652809218309907, "learning_rate": 1.4335397738224343e-09, "loss": 0.7933, "step": 27387 }, { "epoch": 0.9926425283606973, "grad_norm": 2.468454100072889, "learning_rate": 1.4195205725769934e-09, "loss": 0.8151, "step": 27388 }, { "epoch": 0.992678772063354, "grad_norm": 2.3017753546303252, "learning_rate": 1.4055702487481004e-09, "loss": 0.8471, "step": 27389 }, { "epoch": 0.9927150157660106, "grad_norm": 2.6327185188841056, "learning_rate": 1.391688802526714e-09, "loss": 0.6724, "step": 27390 }, { "epoch": 0.9927512594686673, "grad_norm": 2.066291774331764, "learning_rate": 1.3778762341049024e-09, "loss": 1.0022, "step": 27391 }, { "epoch": 0.992787503171324, "grad_norm": 2.448465836644454, "learning_rate": 1.3641325436725139e-09, "loss": 0.8098, "step": 27392 }, { "epoch": 0.9928237468739807, "grad_norm": 2.4494401531207455, "learning_rate": 1.3504577314199518e-09, "loss": 0.9287, "step": 27393 }, { "epoch": 0.9928599905766373, "grad_norm": 2.5007360307148514, "learning_rate": 1.336851797534844e-09, "loss": 1.0228, "step": 27394 }, { "epoch": 0.992896234279294, "grad_norm": 2.696985359920509, "learning_rate": 1.3233147422048175e-09, "loss": 0.7938, "step": 27395 }, { "epoch": 0.9929324779819506, "grad_norm": 2.5400066761710693, "learning_rate": 1.3098465656163905e-09, "loss": 0.9481, "step": 27396 }, { "epoch": 0.9929687216846073, "grad_norm": 2.2816281641664107, "learning_rate": 1.29644726795497e-09, "loss": 0.954, "step": 27397 }, { "epoch": 0.993004965387264, "grad_norm": 2.3677068794872858, "learning_rate": 1.2831168494054081e-09, "loss": 1.0927, "step": 27398 }, { "epoch": 0.9930412090899207, "grad_norm": 2.4260064321433745, "learning_rate": 1.2698553101514466e-09, "loss": 0.925, "step": 27399 }, { "epoch": 0.9930774527925773, "grad_norm": 2.3017553214306172, "learning_rate": 1.2566626503757173e-09, "loss": 0.8765, "step": 27400 }, { "epoch": 0.993113696495234, "grad_norm": 2.371967685351936, "learning_rate": 1.243538870260297e-09, "loss": 0.8986, "step": 27401 }, { "epoch": 0.9931499401978906, "grad_norm": 2.410998148070616, "learning_rate": 1.2304839699855963e-09, "loss": 0.8405, "step": 27402 }, { "epoch": 0.9931861839005472, "grad_norm": 2.2534543539171774, "learning_rate": 1.217497949731472e-09, "loss": 1.0491, "step": 27403 }, { "epoch": 0.9932224276032039, "grad_norm": 2.4978205802600693, "learning_rate": 1.204580809677225e-09, "loss": 0.8461, "step": 27404 }, { "epoch": 0.9932586713058607, "grad_norm": 2.064490934972765, "learning_rate": 1.1917325500004907e-09, "loss": 0.9106, "step": 27405 }, { "epoch": 0.9932949150085173, "grad_norm": 2.647266368925937, "learning_rate": 1.1789531708789048e-09, "loss": 0.8577, "step": 27406 }, { "epoch": 0.993331158711174, "grad_norm": 2.449482391715236, "learning_rate": 1.1662426724878828e-09, "loss": 1.0389, "step": 27407 }, { "epoch": 0.9933674024138306, "grad_norm": 2.337687868408649, "learning_rate": 1.1536010550028398e-09, "loss": 0.8626, "step": 27408 }, { "epoch": 0.9934036461164872, "grad_norm": 2.1707360979300963, "learning_rate": 1.1410283185980808e-09, "loss": 0.8353, "step": 27409 }, { "epoch": 0.9934398898191439, "grad_norm": 2.1741897945503577, "learning_rate": 1.1285244634468006e-09, "loss": 0.9268, "step": 27410 }, { "epoch": 0.9934761335218006, "grad_norm": 2.2686682997103738, "learning_rate": 1.1160894897210838e-09, "loss": 0.8777, "step": 27411 }, { "epoch": 0.9935123772244573, "grad_norm": 2.745465239560602, "learning_rate": 1.1037233975924598e-09, "loss": 0.8558, "step": 27412 }, { "epoch": 0.9935486209271139, "grad_norm": 2.2272880808273996, "learning_rate": 1.0914261872307929e-09, "loss": 0.831, "step": 27413 }, { "epoch": 0.9935848646297706, "grad_norm": 2.3170595078835095, "learning_rate": 1.0791978588065022e-09, "loss": 0.8645, "step": 27414 }, { "epoch": 0.9936211083324272, "grad_norm": 2.4723737654029003, "learning_rate": 1.0670384124877864e-09, "loss": 0.8739, "step": 27415 }, { "epoch": 0.9936573520350839, "grad_norm": 2.1431344857275896, "learning_rate": 1.054947848441734e-09, "loss": 0.9058, "step": 27416 }, { "epoch": 0.9936935957377405, "grad_norm": 2.1276215763596387, "learning_rate": 1.042926166835434e-09, "loss": 0.6872, "step": 27417 }, { "epoch": 0.9937298394403973, "grad_norm": 2.269082860442491, "learning_rate": 1.0309733678343091e-09, "loss": 0.8144, "step": 27418 }, { "epoch": 0.9937660831430539, "grad_norm": 2.4319975199478785, "learning_rate": 1.0190894516032279e-09, "loss": 1.0376, "step": 27419 }, { "epoch": 0.9938023268457106, "grad_norm": 2.4170509887167486, "learning_rate": 1.007274418305393e-09, "loss": 0.8939, "step": 27420 }, { "epoch": 0.9938385705483672, "grad_norm": 2.3079280349200686, "learning_rate": 9.955282681045619e-10, "loss": 0.99, "step": 27421 }, { "epoch": 0.9938748142510239, "grad_norm": 2.274347053384478, "learning_rate": 9.838510011617175e-10, "loss": 1.0159, "step": 27422 }, { "epoch": 0.9939110579536805, "grad_norm": 2.462119062220214, "learning_rate": 9.722426176378419e-10, "loss": 1.0153, "step": 27423 }, { "epoch": 0.9939473016563372, "grad_norm": 2.5132842858600006, "learning_rate": 9.607031176933622e-10, "loss": 0.935, "step": 27424 }, { "epoch": 0.9939835453589939, "grad_norm": 2.4249784953902176, "learning_rate": 9.492325014875958e-10, "loss": 0.9826, "step": 27425 }, { "epoch": 0.9940197890616506, "grad_norm": 2.4628264215502167, "learning_rate": 9.37830769177639e-10, "loss": 0.941, "step": 27426 }, { "epoch": 0.9940560327643072, "grad_norm": 2.1301785518687075, "learning_rate": 9.264979209205882e-10, "loss": 0.6914, "step": 27427 }, { "epoch": 0.9940922764669639, "grad_norm": 2.477205118227555, "learning_rate": 9.152339568735402e-10, "loss": 0.8479, "step": 27428 }, { "epoch": 0.9941285201696205, "grad_norm": 2.330206292456086, "learning_rate": 9.040388771913711e-10, "loss": 0.7039, "step": 27429 }, { "epoch": 0.9941647638722771, "grad_norm": 2.390991213691351, "learning_rate": 8.929126820278466e-10, "loss": 0.8235, "step": 27430 }, { "epoch": 0.9942010075749339, "grad_norm": 2.2491239356806734, "learning_rate": 8.818553715367329e-10, "loss": 0.8228, "step": 27431 }, { "epoch": 0.9942372512775906, "grad_norm": 2.3076945339245243, "learning_rate": 8.708669458701303e-10, "loss": 1.0604, "step": 27432 }, { "epoch": 0.9942734949802472, "grad_norm": 2.3607541761714415, "learning_rate": 8.599474051801393e-10, "loss": 0.7777, "step": 27433 }, { "epoch": 0.9943097386829038, "grad_norm": 2.4450304409180212, "learning_rate": 8.490967496160851e-10, "loss": 0.9283, "step": 27434 }, { "epoch": 0.9943459823855605, "grad_norm": 2.5835770882582243, "learning_rate": 8.383149793284029e-10, "loss": 0.9709, "step": 27435 }, { "epoch": 0.9943822260882171, "grad_norm": 2.478718832151696, "learning_rate": 8.276020944653074e-10, "loss": 0.7, "step": 27436 }, { "epoch": 0.9944184697908738, "grad_norm": 2.447267799909442, "learning_rate": 8.169580951744582e-10, "loss": 0.8959, "step": 27437 }, { "epoch": 0.9944547134935305, "grad_norm": 2.7456410787165213, "learning_rate": 8.063829816024049e-10, "loss": 0.9803, "step": 27438 }, { "epoch": 0.9944909571961872, "grad_norm": 2.828345330258746, "learning_rate": 7.958767538951417e-10, "loss": 0.9573, "step": 27439 }, { "epoch": 0.9945272008988438, "grad_norm": 2.408671896016146, "learning_rate": 7.854394121969977e-10, "loss": 0.8125, "step": 27440 }, { "epoch": 0.9945634446015005, "grad_norm": 2.2799028583975733, "learning_rate": 7.750709566523018e-10, "loss": 0.769, "step": 27441 }, { "epoch": 0.9945996883041571, "grad_norm": 2.1670148700862804, "learning_rate": 7.647713874037177e-10, "loss": 0.951, "step": 27442 }, { "epoch": 0.9946359320068138, "grad_norm": 2.316159254313585, "learning_rate": 7.54540704593354e-10, "loss": 0.9247, "step": 27443 }, { "epoch": 0.9946721757094705, "grad_norm": 2.429938538011542, "learning_rate": 7.443789083616537e-10, "loss": 0.8192, "step": 27444 }, { "epoch": 0.9947084194121272, "grad_norm": 2.065699966268272, "learning_rate": 7.342859988490603e-10, "loss": 0.9672, "step": 27445 }, { "epoch": 0.9947446631147838, "grad_norm": 2.1214923577834464, "learning_rate": 7.242619761943515e-10, "loss": 0.8012, "step": 27446 }, { "epoch": 0.9947809068174405, "grad_norm": 2.4310065442142443, "learning_rate": 7.143068405357501e-10, "loss": 0.8548, "step": 27447 }, { "epoch": 0.9948171505200971, "grad_norm": 2.5266764812299636, "learning_rate": 7.044205920109237e-10, "loss": 0.904, "step": 27448 }, { "epoch": 0.9948533942227538, "grad_norm": 2.6996802138764244, "learning_rate": 6.946032307553197e-10, "loss": 0.8649, "step": 27449 }, { "epoch": 0.9948896379254104, "grad_norm": 2.67291668271009, "learning_rate": 6.848547569049401e-10, "loss": 0.7731, "step": 27450 }, { "epoch": 0.9949258816280672, "grad_norm": 2.4861839229845466, "learning_rate": 6.751751705935672e-10, "loss": 0.7647, "step": 27451 }, { "epoch": 0.9949621253307238, "grad_norm": 2.5578392780065746, "learning_rate": 6.655644719549825e-10, "loss": 0.8974, "step": 27452 }, { "epoch": 0.9949983690333805, "grad_norm": 2.223273232018475, "learning_rate": 6.560226611213027e-10, "loss": 0.7013, "step": 27453 }, { "epoch": 0.9950346127360371, "grad_norm": 2.3170150118356636, "learning_rate": 6.465497382240893e-10, "loss": 1.0099, "step": 27454 }, { "epoch": 0.9950708564386938, "grad_norm": 2.087105930138889, "learning_rate": 6.371457033943485e-10, "loss": 0.9108, "step": 27455 }, { "epoch": 0.9951071001413504, "grad_norm": 2.1108083975784515, "learning_rate": 6.27810556760311e-10, "loss": 0.6618, "step": 27456 }, { "epoch": 0.9951433438440072, "grad_norm": 2.2639181661425973, "learning_rate": 6.185442984524281e-10, "loss": 0.7075, "step": 27457 }, { "epoch": 0.9951795875466638, "grad_norm": 2.2782647443907695, "learning_rate": 6.093469285967102e-10, "loss": 0.9535, "step": 27458 }, { "epoch": 0.9952158312493204, "grad_norm": 2.6754806199848185, "learning_rate": 6.002184473213879e-10, "loss": 0.9967, "step": 27459 }, { "epoch": 0.9952520749519771, "grad_norm": 2.5915908606885316, "learning_rate": 5.911588547508063e-10, "loss": 0.8995, "step": 27460 }, { "epoch": 0.9952883186546337, "grad_norm": 2.288983266033238, "learning_rate": 5.821681510109756e-10, "loss": 0.9278, "step": 27461 }, { "epoch": 0.9953245623572904, "grad_norm": 2.182371394834092, "learning_rate": 5.732463362251306e-10, "loss": 0.9557, "step": 27462 }, { "epoch": 0.995360806059947, "grad_norm": 3.327731571278099, "learning_rate": 5.643934105165061e-10, "loss": 0.9725, "step": 27463 }, { "epoch": 0.9953970497626038, "grad_norm": 2.3253430942555133, "learning_rate": 5.556093740072266e-10, "loss": 0.9817, "step": 27464 }, { "epoch": 0.9954332934652604, "grad_norm": 2.4260859382105955, "learning_rate": 5.468942268177513e-10, "loss": 0.9213, "step": 27465 }, { "epoch": 0.9954695371679171, "grad_norm": 2.8860980421282996, "learning_rate": 5.382479690685393e-10, "loss": 0.8447, "step": 27466 }, { "epoch": 0.9955057808705737, "grad_norm": 2.4133263436953443, "learning_rate": 5.296706008783847e-10, "loss": 0.8607, "step": 27467 }, { "epoch": 0.9955420245732304, "grad_norm": 2.6172662787491943, "learning_rate": 5.211621223660812e-10, "loss": 0.8608, "step": 27468 }, { "epoch": 0.995578268275887, "grad_norm": 2.44547151604168, "learning_rate": 5.127225336487573e-10, "loss": 0.8474, "step": 27469 }, { "epoch": 0.9956145119785438, "grad_norm": 2.16693543864759, "learning_rate": 5.043518348424314e-10, "loss": 0.9309, "step": 27470 }, { "epoch": 0.9956507556812004, "grad_norm": 1.955541150011923, "learning_rate": 4.960500260625667e-10, "loss": 0.7384, "step": 27471 }, { "epoch": 0.9956869993838571, "grad_norm": 2.2986299942099606, "learning_rate": 4.878171074235161e-10, "loss": 0.8297, "step": 27472 }, { "epoch": 0.9957232430865137, "grad_norm": 2.397470086740329, "learning_rate": 4.796530790385223e-10, "loss": 0.7856, "step": 27473 }, { "epoch": 0.9957594867891704, "grad_norm": 2.321178506779192, "learning_rate": 4.715579410202731e-10, "loss": 1.0213, "step": 27474 }, { "epoch": 0.995795730491827, "grad_norm": 2.2331763380247542, "learning_rate": 4.6353169348034576e-10, "loss": 0.8724, "step": 27475 }, { "epoch": 0.9958319741944837, "grad_norm": 2.2779088966845955, "learning_rate": 4.555743365297627e-10, "loss": 0.8054, "step": 27476 }, { "epoch": 0.9958682178971404, "grad_norm": 2.35008669165003, "learning_rate": 4.476858702773257e-10, "loss": 0.8532, "step": 27477 }, { "epoch": 0.9959044615997971, "grad_norm": 2.6096298891656935, "learning_rate": 4.3986629483239174e-10, "loss": 0.9929, "step": 27478 }, { "epoch": 0.9959407053024537, "grad_norm": 2.2746944696987335, "learning_rate": 4.3211561030209736e-10, "loss": 0.8873, "step": 27479 }, { "epoch": 0.9959769490051104, "grad_norm": 2.373524830473526, "learning_rate": 4.244338167941342e-10, "loss": 1.0272, "step": 27480 }, { "epoch": 0.996013192707767, "grad_norm": 2.10375130926108, "learning_rate": 4.168209144134183e-10, "loss": 0.8436, "step": 27481 }, { "epoch": 0.9960494364104236, "grad_norm": 1.9925016004895133, "learning_rate": 4.092769032654209e-10, "loss": 0.8627, "step": 27482 }, { "epoch": 0.9960856801130804, "grad_norm": 2.3811068969442757, "learning_rate": 4.0180178345394783e-10, "loss": 0.6351, "step": 27483 }, { "epoch": 0.996121923815737, "grad_norm": 2.600728116337233, "learning_rate": 3.943955550816947e-10, "loss": 0.9222, "step": 27484 }, { "epoch": 0.9961581675183937, "grad_norm": 2.598230087006041, "learning_rate": 3.8705821825135713e-10, "loss": 0.8951, "step": 27485 }, { "epoch": 0.9961944112210503, "grad_norm": 2.469239759237168, "learning_rate": 3.7978977306341035e-10, "loss": 0.8773, "step": 27486 }, { "epoch": 0.996230654923707, "grad_norm": 2.280508395277989, "learning_rate": 3.7259021961888465e-10, "loss": 1.0291, "step": 27487 }, { "epoch": 0.9962668986263636, "grad_norm": 2.41421465126606, "learning_rate": 3.654595580160347e-10, "loss": 0.9355, "step": 27488 }, { "epoch": 0.9963031423290203, "grad_norm": 2.1120957194845014, "learning_rate": 3.5839778835311535e-10, "loss": 0.7658, "step": 27489 }, { "epoch": 0.996339386031677, "grad_norm": 2.371928563878254, "learning_rate": 3.5140491072782614e-10, "loss": 0.994, "step": 27490 }, { "epoch": 0.9963756297343337, "grad_norm": 2.575781203048203, "learning_rate": 3.444809252367565e-10, "loss": 1.0584, "step": 27491 }, { "epoch": 0.9964118734369903, "grad_norm": 2.5534215935749685, "learning_rate": 3.3762583197538556e-10, "loss": 1.0252, "step": 27492 }, { "epoch": 0.996448117139647, "grad_norm": 2.4395428828507297, "learning_rate": 3.3083963103752724e-10, "loss": 0.9223, "step": 27493 }, { "epoch": 0.9964843608423036, "grad_norm": 2.2566277469714064, "learning_rate": 3.241223225169954e-10, "loss": 0.977, "step": 27494 }, { "epoch": 0.9965206045449603, "grad_norm": 1.8129705607172857, "learning_rate": 3.1747390650593847e-10, "loss": 0.8831, "step": 27495 }, { "epoch": 0.9965568482476169, "grad_norm": 2.431468634289718, "learning_rate": 3.108943830970601e-10, "loss": 0.8502, "step": 27496 }, { "epoch": 0.9965930919502737, "grad_norm": 2.002093986233504, "learning_rate": 3.0438375237973327e-10, "loss": 0.9055, "step": 27497 }, { "epoch": 0.9966293356529303, "grad_norm": 2.4239447467823996, "learning_rate": 2.9794201444444114e-10, "loss": 0.9639, "step": 27498 }, { "epoch": 0.996665579355587, "grad_norm": 2.638176385978719, "learning_rate": 2.915691693800016e-10, "loss": 1.0051, "step": 27499 }, { "epoch": 0.9967018230582436, "grad_norm": 2.2965593324557485, "learning_rate": 2.8526521727412214e-10, "loss": 0.8176, "step": 27500 }, { "epoch": 0.9967380667609003, "grad_norm": 2.39922420238013, "learning_rate": 2.790301582134003e-10, "loss": 0.8453, "step": 27501 }, { "epoch": 0.9967743104635569, "grad_norm": 2.357034453033707, "learning_rate": 2.7286399228387826e-10, "loss": 0.8833, "step": 27502 }, { "epoch": 0.9968105541662137, "grad_norm": 2.341325532586755, "learning_rate": 2.6676671957048814e-10, "loss": 0.826, "step": 27503 }, { "epoch": 0.9968467978688703, "grad_norm": 2.281861116488067, "learning_rate": 2.607383401570518e-10, "loss": 0.6934, "step": 27504 }, { "epoch": 0.996883041571527, "grad_norm": 2.541342274596252, "learning_rate": 2.5477885412739103e-10, "loss": 0.865, "step": 27505 }, { "epoch": 0.9969192852741836, "grad_norm": 2.5372041994343055, "learning_rate": 2.488882615625521e-10, "loss": 0.9442, "step": 27506 }, { "epoch": 0.9969555289768403, "grad_norm": 2.4497523381815633, "learning_rate": 2.4306656254469154e-10, "loss": 0.9208, "step": 27507 }, { "epoch": 0.9969917726794969, "grad_norm": 2.467710438675686, "learning_rate": 2.3731375715374537e-10, "loss": 0.7824, "step": 27508 }, { "epoch": 0.9970280163821535, "grad_norm": 2.3593654195579243, "learning_rate": 2.3162984546853951e-10, "loss": 0.9013, "step": 27509 }, { "epoch": 0.9970642600848103, "grad_norm": 2.5022265616712867, "learning_rate": 2.2601482756789972e-10, "loss": 0.8629, "step": 27510 }, { "epoch": 0.997100503787467, "grad_norm": 2.5459630071555925, "learning_rate": 2.2046870352898652e-10, "loss": 0.9425, "step": 27511 }, { "epoch": 0.9971367474901236, "grad_norm": 2.0562605592267813, "learning_rate": 2.149914734284053e-10, "loss": 0.7503, "step": 27512 }, { "epoch": 0.9971729911927802, "grad_norm": 2.2634334694185734, "learning_rate": 2.0958313734109616e-10, "loss": 1.0133, "step": 27513 }, { "epoch": 0.9972092348954369, "grad_norm": 2.110636444843734, "learning_rate": 2.0424369534255416e-10, "loss": 0.8602, "step": 27514 }, { "epoch": 0.9972454785980935, "grad_norm": 2.1931570353735665, "learning_rate": 1.989731475049439e-10, "loss": 0.8311, "step": 27515 }, { "epoch": 0.9972817223007503, "grad_norm": 2.1962659364701618, "learning_rate": 1.9377149390209515e-10, "loss": 0.903, "step": 27516 }, { "epoch": 0.9973179660034069, "grad_norm": 2.6608353850075734, "learning_rate": 1.8863873460561733e-10, "loss": 0.9172, "step": 27517 }, { "epoch": 0.9973542097060636, "grad_norm": 2.328627024217788, "learning_rate": 1.8357486968545446e-10, "loss": 0.9339, "step": 27518 }, { "epoch": 0.9973904534087202, "grad_norm": 2.5085754562278813, "learning_rate": 1.7857989921210572e-10, "loss": 0.9876, "step": 27519 }, { "epoch": 0.9974266971113769, "grad_norm": 2.195262885116739, "learning_rate": 1.7365382325384982e-10, "loss": 0.9922, "step": 27520 }, { "epoch": 0.9974629408140335, "grad_norm": 2.763229622358229, "learning_rate": 1.687966418789655e-10, "loss": 0.8883, "step": 27521 }, { "epoch": 0.9974991845166902, "grad_norm": 2.2950567632072425, "learning_rate": 1.6400835515462123e-10, "loss": 0.8158, "step": 27522 }, { "epoch": 0.9975354282193469, "grad_norm": 2.3897509984513454, "learning_rate": 1.5928896314576502e-10, "loss": 0.7415, "step": 27523 }, { "epoch": 0.9975716719220036, "grad_norm": 2.3546496234135508, "learning_rate": 1.5463846591845522e-10, "loss": 0.836, "step": 27524 }, { "epoch": 0.9976079156246602, "grad_norm": 2.4687727961847337, "learning_rate": 1.500568635365296e-10, "loss": 0.8517, "step": 27525 }, { "epoch": 0.9976441593273169, "grad_norm": 2.2184755533218166, "learning_rate": 1.4554415606271577e-10, "loss": 0.8726, "step": 27526 }, { "epoch": 0.9976804030299735, "grad_norm": 2.580411799539454, "learning_rate": 1.4110034355974134e-10, "loss": 0.9991, "step": 27527 }, { "epoch": 0.9977166467326302, "grad_norm": 2.4651495475262934, "learning_rate": 1.3672542608866858e-10, "loss": 0.9626, "step": 27528 }, { "epoch": 0.9977528904352869, "grad_norm": 2.11662274128275, "learning_rate": 1.324194037094495e-10, "loss": 0.9672, "step": 27529 }, { "epoch": 0.9977891341379436, "grad_norm": 2.3434877445063815, "learning_rate": 1.2818227648203618e-10, "loss": 1.0129, "step": 27530 }, { "epoch": 0.9978253778406002, "grad_norm": 2.8663300961591935, "learning_rate": 1.240140444641602e-10, "loss": 1.0774, "step": 27531 }, { "epoch": 0.9978616215432569, "grad_norm": 2.248467799866872, "learning_rate": 1.1991470771355318e-10, "loss": 0.7909, "step": 27532 }, { "epoch": 0.9978978652459135, "grad_norm": 2.5473634820394526, "learning_rate": 1.1588426628628135e-10, "loss": 0.8329, "step": 27533 }, { "epoch": 0.9979341089485702, "grad_norm": 2.2195985423587485, "learning_rate": 1.1192272023896611e-10, "loss": 0.7614, "step": 27534 }, { "epoch": 0.9979703526512268, "grad_norm": 2.7749047161263154, "learning_rate": 1.0803006962489815e-10, "loss": 0.7288, "step": 27535 }, { "epoch": 0.9980065963538836, "grad_norm": 2.278346980235685, "learning_rate": 1.0420631449903351e-10, "loss": 0.8653, "step": 27536 }, { "epoch": 0.9980428400565402, "grad_norm": 1.981812064630048, "learning_rate": 1.0045145491299758e-10, "loss": 0.8346, "step": 27537 }, { "epoch": 0.9980790837591969, "grad_norm": 2.5625604253297722, "learning_rate": 9.67654909189708e-11, "loss": 0.8217, "step": 27538 }, { "epoch": 0.9981153274618535, "grad_norm": 2.4234633127600915, "learning_rate": 9.314842256746837e-11, "loss": 0.9016, "step": 27539 }, { "epoch": 0.9981515711645101, "grad_norm": 2.1526346103979503, "learning_rate": 8.96002499090054e-11, "loss": 0.6798, "step": 27540 }, { "epoch": 0.9981878148671668, "grad_norm": 2.3403832820102215, "learning_rate": 8.61209729913215e-11, "loss": 0.8048, "step": 27541 }, { "epoch": 0.9982240585698235, "grad_norm": 2.7699120096554024, "learning_rate": 8.271059186326646e-11, "loss": 0.9446, "step": 27542 }, { "epoch": 0.9982603022724802, "grad_norm": 2.34064024237613, "learning_rate": 7.936910657202479e-11, "loss": 0.9898, "step": 27543 }, { "epoch": 0.9982965459751368, "grad_norm": 2.22489409114156, "learning_rate": 7.609651716311561e-11, "loss": 0.9415, "step": 27544 }, { "epoch": 0.9983327896777935, "grad_norm": 2.1787964597523692, "learning_rate": 7.289282368150297e-11, "loss": 0.809, "step": 27545 }, { "epoch": 0.9983690333804501, "grad_norm": 2.3122317379301567, "learning_rate": 6.975802617159577e-11, "loss": 0.8925, "step": 27546 }, { "epoch": 0.9984052770831068, "grad_norm": 2.2966003063177944, "learning_rate": 6.669212467669273e-11, "loss": 0.8707, "step": 27547 }, { "epoch": 0.9984415207857634, "grad_norm": 2.5843499234950142, "learning_rate": 6.369511923842719e-11, "loss": 0.9009, "step": 27548 }, { "epoch": 0.9984777644884202, "grad_norm": 2.4368809694409896, "learning_rate": 6.076700989898764e-11, "loss": 1.0631, "step": 27549 }, { "epoch": 0.9985140081910768, "grad_norm": 2.5709810590112405, "learning_rate": 5.7907796698342125e-11, "loss": 0.8474, "step": 27550 }, { "epoch": 0.9985502518937335, "grad_norm": 2.6143888377424713, "learning_rate": 5.5117479675903526e-11, "loss": 0.766, "step": 27551 }, { "epoch": 0.9985864955963901, "grad_norm": 2.2628282423714303, "learning_rate": 5.239605886997456e-11, "loss": 0.8101, "step": 27552 }, { "epoch": 0.9986227392990468, "grad_norm": 2.4070563082542256, "learning_rate": 4.974353431830281e-11, "loss": 1.0178, "step": 27553 }, { "epoch": 0.9986589830017034, "grad_norm": 2.339538234451575, "learning_rate": 4.715990605697052e-11, "loss": 0.974, "step": 27554 }, { "epoch": 0.9986952267043602, "grad_norm": 2.4999758706321304, "learning_rate": 4.464517412205993e-11, "loss": 0.6938, "step": 27555 }, { "epoch": 0.9987314704070168, "grad_norm": 2.3166608841038805, "learning_rate": 4.219933854798797e-11, "loss": 0.8462, "step": 27556 }, { "epoch": 0.9987677141096735, "grad_norm": 2.3587097499255827, "learning_rate": 3.9822399368616425e-11, "loss": 0.8811, "step": 27557 }, { "epoch": 0.9988039578123301, "grad_norm": 2.652950316730645, "learning_rate": 3.751435661669689e-11, "loss": 0.9255, "step": 27558 }, { "epoch": 0.9988402015149868, "grad_norm": 2.4085117125415976, "learning_rate": 3.527521032387071e-11, "loss": 0.8617, "step": 27559 }, { "epoch": 0.9988764452176434, "grad_norm": 2.1913139848879477, "learning_rate": 3.310496052122414e-11, "loss": 0.8839, "step": 27560 }, { "epoch": 0.9989126889203, "grad_norm": 2.353769224393216, "learning_rate": 3.100360723817808e-11, "loss": 0.824, "step": 27561 }, { "epoch": 0.9989489326229568, "grad_norm": 2.4101100790333763, "learning_rate": 2.8971150504153445e-11, "loss": 1.1155, "step": 27562 }, { "epoch": 0.9989851763256135, "grad_norm": 2.489804079681875, "learning_rate": 2.7007590347460923e-11, "loss": 0.8088, "step": 27563 }, { "epoch": 0.9990214200282701, "grad_norm": 2.1647456080362693, "learning_rate": 2.511292679419075e-11, "loss": 0.7989, "step": 27564 }, { "epoch": 0.9990576637309267, "grad_norm": 2.413254520557553, "learning_rate": 2.3287159870988285e-11, "loss": 1.0356, "step": 27565 }, { "epoch": 0.9990939074335834, "grad_norm": 2.106597283696622, "learning_rate": 2.1530289603388653e-11, "loss": 0.8468, "step": 27566 }, { "epoch": 0.99913015113624, "grad_norm": 2.2406863967239983, "learning_rate": 1.9842316014706543e-11, "loss": 0.9583, "step": 27567 }, { "epoch": 0.9991663948388967, "grad_norm": 2.586860846841675, "learning_rate": 1.8223239128811743e-11, "loss": 1.1993, "step": 27568 }, { "epoch": 0.9992026385415534, "grad_norm": 2.301200309263739, "learning_rate": 1.667305896846383e-11, "loss": 0.9822, "step": 27569 }, { "epoch": 0.9992388822442101, "grad_norm": 2.3959713888913297, "learning_rate": 1.5191775553646814e-11, "loss": 0.7358, "step": 27570 }, { "epoch": 0.9992751259468667, "grad_norm": 2.171760711506529, "learning_rate": 1.377938890601005e-11, "loss": 0.7965, "step": 27571 }, { "epoch": 0.9993113696495234, "grad_norm": 2.442201529195446, "learning_rate": 1.2435899044982436e-11, "loss": 0.8856, "step": 27572 }, { "epoch": 0.99934761335218, "grad_norm": 2.273634504594952, "learning_rate": 1.116130598777243e-11, "loss": 0.8163, "step": 27573 }, { "epoch": 0.9993838570548367, "grad_norm": 2.1766744699469425, "learning_rate": 9.955609753808937e-12, "loss": 0.914, "step": 27574 }, { "epoch": 0.9994201007574934, "grad_norm": 2.2288118893809474, "learning_rate": 8.818810358079966e-12, "loss": 0.9528, "step": 27575 }, { "epoch": 0.9994563444601501, "grad_norm": 2.5129805601822097, "learning_rate": 7.750907817238862e-12, "loss": 0.9, "step": 27576 }, { "epoch": 0.9994925881628067, "grad_norm": 2.389784292012372, "learning_rate": 6.751902145718525e-12, "loss": 0.8806, "step": 27577 }, { "epoch": 0.9995288318654634, "grad_norm": 2.5037190099402618, "learning_rate": 5.821793356841632e-12, "loss": 0.8648, "step": 27578 }, { "epoch": 0.99956507556812, "grad_norm": 2.3785959680795656, "learning_rate": 4.960581464485969e-12, "loss": 0.8767, "step": 27579 }, { "epoch": 0.9996013192707767, "grad_norm": 2.27038853730424, "learning_rate": 4.1682664797537686e-12, "loss": 0.8661, "step": 27580 }, { "epoch": 0.9996375629734333, "grad_norm": 1.9304030677503914, "learning_rate": 3.444848413192148e-12, "loss": 0.7548, "step": 27581 }, { "epoch": 0.9996738066760901, "grad_norm": 2.6576327373350597, "learning_rate": 2.7903272753482256e-12, "loss": 0.9124, "step": 27582 }, { "epoch": 0.9997100503787467, "grad_norm": 2.448072383858567, "learning_rate": 2.2047030756588985e-12, "loss": 0.9266, "step": 27583 }, { "epoch": 0.9997462940814034, "grad_norm": 2.9925385089577405, "learning_rate": 1.687975821340615e-12, "loss": 0.9211, "step": 27584 }, { "epoch": 0.99978253778406, "grad_norm": 2.4670993827156935, "learning_rate": 1.240145520164937e-12, "loss": 0.8071, "step": 27585 }, { "epoch": 0.9998187814867167, "grad_norm": 2.744199563946858, "learning_rate": 8.612121776829796e-13, "loss": 1.0384, "step": 27586 }, { "epoch": 0.9998550251893733, "grad_norm": 2.225480896481343, "learning_rate": 5.511757994458577e-13, "loss": 0.8293, "step": 27587 }, { "epoch": 0.9998912688920301, "grad_norm": 2.2899985891617205, "learning_rate": 3.100363893393521e-13, "loss": 0.7671, "step": 27588 }, { "epoch": 0.9999275125946867, "grad_norm": 2.502344492752972, "learning_rate": 1.3779395180435473e-13, "loss": 0.9667, "step": 27589 }, { "epoch": 0.9999637562973434, "grad_norm": 2.317588792788768, "learning_rate": 3.444848795108868e-14, "loss": 0.9189, "step": 27590 }, { "epoch": 1.0, "grad_norm": 2.286576293122536, "learning_rate": 0.0, "loss": 0.9514, "step": 27591 }, { "epoch": 1.0, "step": 27591, "total_flos": 2.625495242847027e+16, "train_loss": 0.9322727440897679, "train_runtime": 109197.9812, "train_samples_per_second": 32.343, "train_steps_per_second": 0.253 } ], "logging_steps": 1.0, "max_steps": 27591, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.625495242847027e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }