{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 23049, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003904724716907458, "grad_norm": 22.37471580505371, "learning_rate": 6.000000000000001e-08, "loss": 1.5848, "step": 3 }, { "epoch": 0.0007809449433814916, "grad_norm": 24.825096130371094, "learning_rate": 1.2000000000000002e-07, "loss": 1.4111, "step": 6 }, { "epoch": 0.0011714174150722373, "grad_norm": 22.30377769470215, "learning_rate": 1.8e-07, "loss": 1.4883, "step": 9 }, { "epoch": 0.0015618898867629833, "grad_norm": 22.412364959716797, "learning_rate": 2.4000000000000003e-07, "loss": 1.5305, "step": 12 }, { "epoch": 0.001952362358453729, "grad_norm": 22.382173538208008, "learning_rate": 3.0000000000000004e-07, "loss": 1.4476, "step": 15 }, { "epoch": 0.0023428348301444747, "grad_norm": 18.971206665039062, "learning_rate": 3.6e-07, "loss": 1.3828, "step": 18 }, { "epoch": 0.002733307301835221, "grad_norm": 17.42239761352539, "learning_rate": 4.2000000000000006e-07, "loss": 1.4491, "step": 21 }, { "epoch": 0.0031237797735259665, "grad_norm": 16.123905181884766, "learning_rate": 4.800000000000001e-07, "loss": 1.418, "step": 24 }, { "epoch": 0.003514252245216712, "grad_norm": 15.892184257507324, "learning_rate": 5.4e-07, "loss": 1.4415, "step": 27 }, { "epoch": 0.003904724716907458, "grad_norm": 12.987159729003906, "learning_rate": 6.000000000000001e-07, "loss": 1.3907, "step": 30 }, { "epoch": 0.004295197188598204, "grad_norm": 11.818194389343262, "learning_rate": 6.6e-07, "loss": 1.421, "step": 33 }, { "epoch": 0.004685669660288949, "grad_norm": 11.758996963500977, "learning_rate": 7.2e-07, "loss": 1.2863, "step": 36 }, { "epoch": 0.005076142131979695, "grad_norm": 11.128711700439453, "learning_rate": 7.8e-07, "loss": 1.2689, "step": 39 }, { "epoch": 0.005466614603670442, "grad_norm": 10.793634414672852, "learning_rate": 8.400000000000001e-07, "loss": 1.3767, "step": 42 }, { "epoch": 0.005857087075361187, "grad_norm": 10.585275650024414, "learning_rate": 9.000000000000001e-07, "loss": 1.2601, "step": 45 }, { "epoch": 0.006247559547051933, "grad_norm": 9.969756126403809, "learning_rate": 9.600000000000001e-07, "loss": 1.1707, "step": 48 }, { "epoch": 0.006638032018742679, "grad_norm": 9.342447280883789, "learning_rate": 1.02e-06, "loss": 1.1647, "step": 51 }, { "epoch": 0.007028504490433424, "grad_norm": 9.255729675292969, "learning_rate": 1.08e-06, "loss": 1.1294, "step": 54 }, { "epoch": 0.00741897696212417, "grad_norm": 8.989304542541504, "learning_rate": 1.14e-06, "loss": 1.1128, "step": 57 }, { "epoch": 0.007809449433814916, "grad_norm": 8.631978034973145, "learning_rate": 1.2000000000000002e-06, "loss": 1.2147, "step": 60 }, { "epoch": 0.008199921905505662, "grad_norm": 9.135863304138184, "learning_rate": 1.26e-06, "loss": 1.052, "step": 63 }, { "epoch": 0.008590394377196407, "grad_norm": 7.280301570892334, "learning_rate": 1.32e-06, "loss": 0.9579, "step": 66 }, { "epoch": 0.008980866848887154, "grad_norm": 6.385473728179932, "learning_rate": 1.3800000000000001e-06, "loss": 1.1031, "step": 69 }, { "epoch": 0.009371339320577899, "grad_norm": 4.986073017120361, "learning_rate": 1.44e-06, "loss": 0.9746, "step": 72 }, { "epoch": 0.009761811792268645, "grad_norm": 4.2078423500061035, "learning_rate": 1.5e-06, "loss": 1.0148, "step": 75 }, { "epoch": 0.01015228426395939, "grad_norm": 4.854979038238525, "learning_rate": 1.56e-06, "loss": 1.005, "step": 78 }, { "epoch": 0.010542756735650137, "grad_norm": 3.9257068634033203, "learning_rate": 1.6200000000000002e-06, "loss": 0.8129, "step": 81 }, { "epoch": 0.010933229207340883, "grad_norm": 4.077581882476807, "learning_rate": 1.6800000000000002e-06, "loss": 1.0132, "step": 84 }, { "epoch": 0.011323701679031628, "grad_norm": 4.484386444091797, "learning_rate": 1.74e-06, "loss": 0.9022, "step": 87 }, { "epoch": 0.011714174150722375, "grad_norm": 3.997361898422241, "learning_rate": 1.8000000000000001e-06, "loss": 0.8801, "step": 90 }, { "epoch": 0.01210464662241312, "grad_norm": 3.9567580223083496, "learning_rate": 1.8600000000000002e-06, "loss": 0.9124, "step": 93 }, { "epoch": 0.012495119094103866, "grad_norm": 4.3611297607421875, "learning_rate": 1.9200000000000003e-06, "loss": 0.939, "step": 96 }, { "epoch": 0.012885591565794611, "grad_norm": 7.97072172164917, "learning_rate": 1.98e-06, "loss": 0.9517, "step": 99 }, { "epoch": 0.013276064037485357, "grad_norm": 3.360591173171997, "learning_rate": 2.04e-06, "loss": 0.7842, "step": 102 }, { "epoch": 0.013666536509176102, "grad_norm": 8.188989639282227, "learning_rate": 2.1000000000000002e-06, "loss": 0.9067, "step": 105 }, { "epoch": 0.014057008980866849, "grad_norm": 5.126707077026367, "learning_rate": 2.16e-06, "loss": 0.8402, "step": 108 }, { "epoch": 0.014447481452557595, "grad_norm": 3.1572999954223633, "learning_rate": 2.2200000000000003e-06, "loss": 0.7993, "step": 111 }, { "epoch": 0.01483795392424834, "grad_norm": 3.5879690647125244, "learning_rate": 2.28e-06, "loss": 0.8593, "step": 114 }, { "epoch": 0.015228426395939087, "grad_norm": 3.5023837089538574, "learning_rate": 2.3400000000000005e-06, "loss": 0.8434, "step": 117 }, { "epoch": 0.015618898867629832, "grad_norm": 3.2916157245635986, "learning_rate": 2.4000000000000003e-06, "loss": 0.8904, "step": 120 }, { "epoch": 0.016009371339320577, "grad_norm": 4.094881534576416, "learning_rate": 2.46e-06, "loss": 0.8106, "step": 123 }, { "epoch": 0.016399843811011325, "grad_norm": 7.149028301239014, "learning_rate": 2.52e-06, "loss": 0.8551, "step": 126 }, { "epoch": 0.01679031628270207, "grad_norm": 3.3029215335845947, "learning_rate": 2.5800000000000003e-06, "loss": 0.8387, "step": 129 }, { "epoch": 0.017180788754392814, "grad_norm": 3.166794538497925, "learning_rate": 2.64e-06, "loss": 0.819, "step": 132 }, { "epoch": 0.017571261226083563, "grad_norm": 4.197257995605469, "learning_rate": 2.7000000000000004e-06, "loss": 0.7449, "step": 135 }, { "epoch": 0.017961733697774308, "grad_norm": 3.171112060546875, "learning_rate": 2.7600000000000003e-06, "loss": 0.7152, "step": 138 }, { "epoch": 0.018352206169465052, "grad_norm": 3.2386510372161865, "learning_rate": 2.82e-06, "loss": 0.7647, "step": 141 }, { "epoch": 0.018742678641155797, "grad_norm": 3.2060446739196777, "learning_rate": 2.88e-06, "loss": 0.737, "step": 144 }, { "epoch": 0.019133151112846546, "grad_norm": 3.846658706665039, "learning_rate": 2.9400000000000002e-06, "loss": 0.7577, "step": 147 }, { "epoch": 0.01952362358453729, "grad_norm": 3.3511500358581543, "learning_rate": 3e-06, "loss": 0.7303, "step": 150 }, { "epoch": 0.019914096056228035, "grad_norm": 3.5826847553253174, "learning_rate": 3.0600000000000003e-06, "loss": 0.8347, "step": 153 }, { "epoch": 0.02030456852791878, "grad_norm": 3.3204636573791504, "learning_rate": 3.12e-06, "loss": 0.7353, "step": 156 }, { "epoch": 0.02069504099960953, "grad_norm": 3.548901319503784, "learning_rate": 3.1800000000000005e-06, "loss": 0.8361, "step": 159 }, { "epoch": 0.021085513471300273, "grad_norm": 3.251668691635132, "learning_rate": 3.2400000000000003e-06, "loss": 0.7772, "step": 162 }, { "epoch": 0.021475985942991018, "grad_norm": 3.0531423091888428, "learning_rate": 3.3000000000000006e-06, "loss": 0.724, "step": 165 }, { "epoch": 0.021866458414681766, "grad_norm": 3.8247272968292236, "learning_rate": 3.3600000000000004e-06, "loss": 0.6697, "step": 168 }, { "epoch": 0.02225693088637251, "grad_norm": 6.110504627227783, "learning_rate": 3.4200000000000007e-06, "loss": 0.8024, "step": 171 }, { "epoch": 0.022647403358063256, "grad_norm": 3.497457504272461, "learning_rate": 3.48e-06, "loss": 0.7605, "step": 174 }, { "epoch": 0.023037875829754, "grad_norm": 3.874572515487671, "learning_rate": 3.54e-06, "loss": 0.8033, "step": 177 }, { "epoch": 0.02342834830144475, "grad_norm": 3.286510467529297, "learning_rate": 3.6000000000000003e-06, "loss": 0.7136, "step": 180 }, { "epoch": 0.023818820773135494, "grad_norm": 3.194321870803833, "learning_rate": 3.66e-06, "loss": 0.804, "step": 183 }, { "epoch": 0.02420929324482624, "grad_norm": 3.3385202884674072, "learning_rate": 3.7200000000000004e-06, "loss": 0.7735, "step": 186 }, { "epoch": 0.024599765716516987, "grad_norm": 3.5652225017547607, "learning_rate": 3.7800000000000002e-06, "loss": 0.8019, "step": 189 }, { "epoch": 0.024990238188207732, "grad_norm": 3.887373447418213, "learning_rate": 3.8400000000000005e-06, "loss": 0.732, "step": 192 }, { "epoch": 0.025380710659898477, "grad_norm": 3.134536027908325, "learning_rate": 3.900000000000001e-06, "loss": 0.7466, "step": 195 }, { "epoch": 0.025771183131589222, "grad_norm": 3.26898455619812, "learning_rate": 3.96e-06, "loss": 0.7262, "step": 198 }, { "epoch": 0.02616165560327997, "grad_norm": 3.5477030277252197, "learning_rate": 4.0200000000000005e-06, "loss": 0.8538, "step": 201 }, { "epoch": 0.026552128074970715, "grad_norm": 3.780203104019165, "learning_rate": 4.08e-06, "loss": 0.6764, "step": 204 }, { "epoch": 0.02694260054666146, "grad_norm": 3.250094413757324, "learning_rate": 4.14e-06, "loss": 0.8611, "step": 207 }, { "epoch": 0.027333073018352205, "grad_norm": 5.036427021026611, "learning_rate": 4.2000000000000004e-06, "loss": 0.8276, "step": 210 }, { "epoch": 0.027723545490042953, "grad_norm": 2.8822755813598633, "learning_rate": 4.26e-06, "loss": 0.8168, "step": 213 }, { "epoch": 0.028114017961733698, "grad_norm": 3.374403953552246, "learning_rate": 4.32e-06, "loss": 0.8104, "step": 216 }, { "epoch": 0.028504490433424443, "grad_norm": 4.382564544677734, "learning_rate": 4.38e-06, "loss": 0.769, "step": 219 }, { "epoch": 0.02889496290511519, "grad_norm": 4.98586893081665, "learning_rate": 4.440000000000001e-06, "loss": 0.8022, "step": 222 }, { "epoch": 0.029285435376805936, "grad_norm": 3.1688528060913086, "learning_rate": 4.5e-06, "loss": 0.7269, "step": 225 }, { "epoch": 0.02967590784849668, "grad_norm": 3.384666681289673, "learning_rate": 4.56e-06, "loss": 0.7402, "step": 228 }, { "epoch": 0.030066380320187425, "grad_norm": 3.3539681434631348, "learning_rate": 4.620000000000001e-06, "loss": 0.8786, "step": 231 }, { "epoch": 0.030456852791878174, "grad_norm": 3.280231237411499, "learning_rate": 4.680000000000001e-06, "loss": 0.6507, "step": 234 }, { "epoch": 0.03084732526356892, "grad_norm": 2.9844179153442383, "learning_rate": 4.74e-06, "loss": 0.7314, "step": 237 }, { "epoch": 0.031237797735259663, "grad_norm": 5.182901382446289, "learning_rate": 4.800000000000001e-06, "loss": 0.7851, "step": 240 }, { "epoch": 0.03162827020695041, "grad_norm": 4.9192023277282715, "learning_rate": 4.86e-06, "loss": 0.7062, "step": 243 }, { "epoch": 0.03201874267864115, "grad_norm": 3.2586448192596436, "learning_rate": 4.92e-06, "loss": 0.778, "step": 246 }, { "epoch": 0.032409215150331905, "grad_norm": 3.3434247970581055, "learning_rate": 4.980000000000001e-06, "loss": 0.7214, "step": 249 }, { "epoch": 0.03279968762202265, "grad_norm": 3.2206249237060547, "learning_rate": 5.04e-06, "loss": 0.75, "step": 252 }, { "epoch": 0.033190160093713394, "grad_norm": 4.057199954986572, "learning_rate": 5.1e-06, "loss": 0.8155, "step": 255 }, { "epoch": 0.03358063256540414, "grad_norm": 3.0813217163085938, "learning_rate": 5.1600000000000006e-06, "loss": 0.7603, "step": 258 }, { "epoch": 0.033971105037094884, "grad_norm": 3.3523905277252197, "learning_rate": 5.220000000000001e-06, "loss": 0.738, "step": 261 }, { "epoch": 0.03436157750878563, "grad_norm": 5.289391994476318, "learning_rate": 5.28e-06, "loss": 0.6952, "step": 264 }, { "epoch": 0.034752049980476374, "grad_norm": 3.419605016708374, "learning_rate": 5.3400000000000005e-06, "loss": 0.7415, "step": 267 }, { "epoch": 0.035142522452167126, "grad_norm": 2.9998910427093506, "learning_rate": 5.400000000000001e-06, "loss": 0.813, "step": 270 }, { "epoch": 0.03553299492385787, "grad_norm": 3.2090444564819336, "learning_rate": 5.460000000000001e-06, "loss": 0.7232, "step": 273 }, { "epoch": 0.035923467395548615, "grad_norm": 4.806464672088623, "learning_rate": 5.5200000000000005e-06, "loss": 0.865, "step": 276 }, { "epoch": 0.03631393986723936, "grad_norm": 3.2608954906463623, "learning_rate": 5.580000000000001e-06, "loss": 0.7011, "step": 279 }, { "epoch": 0.036704412338930105, "grad_norm": 3.387782096862793, "learning_rate": 5.64e-06, "loss": 0.7094, "step": 282 }, { "epoch": 0.03709488481062085, "grad_norm": 3.7122697830200195, "learning_rate": 5.7e-06, "loss": 0.7027, "step": 285 }, { "epoch": 0.037485357282311595, "grad_norm": 2.901911497116089, "learning_rate": 5.76e-06, "loss": 0.6686, "step": 288 }, { "epoch": 0.03787582975400234, "grad_norm": 4.1798996925354, "learning_rate": 5.82e-06, "loss": 0.7313, "step": 291 }, { "epoch": 0.03826630222569309, "grad_norm": 3.4261515140533447, "learning_rate": 5.8800000000000005e-06, "loss": 0.7415, "step": 294 }, { "epoch": 0.038656774697383836, "grad_norm": 3.0005760192871094, "learning_rate": 5.94e-06, "loss": 0.7753, "step": 297 }, { "epoch": 0.03904724716907458, "grad_norm": 3.1258742809295654, "learning_rate": 6e-06, "loss": 0.6959, "step": 300 }, { "epoch": 0.039437719640765326, "grad_norm": 4.299694538116455, "learning_rate": 6.0600000000000004e-06, "loss": 0.6857, "step": 303 }, { "epoch": 0.03982819211245607, "grad_norm": 3.4530158042907715, "learning_rate": 6.120000000000001e-06, "loss": 0.7072, "step": 306 }, { "epoch": 0.040218664584146815, "grad_norm": 3.939462900161743, "learning_rate": 6.18e-06, "loss": 0.7783, "step": 309 }, { "epoch": 0.04060913705583756, "grad_norm": 3.1602437496185303, "learning_rate": 6.24e-06, "loss": 0.7953, "step": 312 }, { "epoch": 0.04099960952752831, "grad_norm": 2.8542044162750244, "learning_rate": 6.300000000000001e-06, "loss": 0.779, "step": 315 }, { "epoch": 0.04139008199921906, "grad_norm": 3.5136380195617676, "learning_rate": 6.360000000000001e-06, "loss": 0.755, "step": 318 }, { "epoch": 0.0417805544709098, "grad_norm": 3.060673713684082, "learning_rate": 6.42e-06, "loss": 0.7327, "step": 321 }, { "epoch": 0.04217102694260055, "grad_norm": 3.174912691116333, "learning_rate": 6.480000000000001e-06, "loss": 0.7553, "step": 324 }, { "epoch": 0.04256149941429129, "grad_norm": 3.189807653427124, "learning_rate": 6.540000000000001e-06, "loss": 0.815, "step": 327 }, { "epoch": 0.042951971885982036, "grad_norm": 2.7848079204559326, "learning_rate": 6.600000000000001e-06, "loss": 0.6674, "step": 330 }, { "epoch": 0.04334244435767278, "grad_norm": 3.377772331237793, "learning_rate": 6.660000000000001e-06, "loss": 0.6562, "step": 333 }, { "epoch": 0.04373291682936353, "grad_norm": 3.255612373352051, "learning_rate": 6.720000000000001e-06, "loss": 0.71, "step": 336 }, { "epoch": 0.04412338930105428, "grad_norm": 3.997131824493408, "learning_rate": 6.780000000000001e-06, "loss": 0.7364, "step": 339 }, { "epoch": 0.04451386177274502, "grad_norm": 3.373277425765991, "learning_rate": 6.8400000000000014e-06, "loss": 0.7041, "step": 342 }, { "epoch": 0.04490433424443577, "grad_norm": 3.901888608932495, "learning_rate": 6.9e-06, "loss": 0.6684, "step": 345 }, { "epoch": 0.04529480671612651, "grad_norm": 3.7340526580810547, "learning_rate": 6.96e-06, "loss": 0.7079, "step": 348 }, { "epoch": 0.04568527918781726, "grad_norm": 3.2724201679229736, "learning_rate": 7.0200000000000006e-06, "loss": 0.7308, "step": 351 }, { "epoch": 0.046075751659508, "grad_norm": 2.8971149921417236, "learning_rate": 7.08e-06, "loss": 0.7238, "step": 354 }, { "epoch": 0.046466224131198754, "grad_norm": 2.782917022705078, "learning_rate": 7.14e-06, "loss": 0.637, "step": 357 }, { "epoch": 0.0468566966028895, "grad_norm": 4.860075950622559, "learning_rate": 7.2000000000000005e-06, "loss": 0.6668, "step": 360 }, { "epoch": 0.04724716907458024, "grad_norm": 3.7025034427642822, "learning_rate": 7.260000000000001e-06, "loss": 0.7802, "step": 363 }, { "epoch": 0.04763764154627099, "grad_norm": 3.697002410888672, "learning_rate": 7.32e-06, "loss": 0.7448, "step": 366 }, { "epoch": 0.04802811401796173, "grad_norm": 4.358737468719482, "learning_rate": 7.3800000000000005e-06, "loss": 0.7448, "step": 369 }, { "epoch": 0.04841858648965248, "grad_norm": 3.1445984840393066, "learning_rate": 7.440000000000001e-06, "loss": 0.7037, "step": 372 }, { "epoch": 0.04880905896134322, "grad_norm": 5.41956090927124, "learning_rate": 7.500000000000001e-06, "loss": 0.7322, "step": 375 }, { "epoch": 0.049199531433033974, "grad_norm": 3.0740654468536377, "learning_rate": 7.5600000000000005e-06, "loss": 0.773, "step": 378 }, { "epoch": 0.04959000390472472, "grad_norm": 3.2865071296691895, "learning_rate": 7.620000000000001e-06, "loss": 0.7685, "step": 381 }, { "epoch": 0.049980476376415464, "grad_norm": 3.2605721950531006, "learning_rate": 7.680000000000001e-06, "loss": 0.7097, "step": 384 }, { "epoch": 0.05037094884810621, "grad_norm": 2.969805955886841, "learning_rate": 7.74e-06, "loss": 0.5873, "step": 387 }, { "epoch": 0.050761421319796954, "grad_norm": 3.413188934326172, "learning_rate": 7.800000000000002e-06, "loss": 0.7874, "step": 390 }, { "epoch": 0.0511518937914877, "grad_norm": 4.274326324462891, "learning_rate": 7.860000000000001e-06, "loss": 0.7592, "step": 393 }, { "epoch": 0.051542366263178443, "grad_norm": 2.9528450965881348, "learning_rate": 7.92e-06, "loss": 0.6821, "step": 396 }, { "epoch": 0.05193283873486919, "grad_norm": 2.8550336360931396, "learning_rate": 7.980000000000002e-06, "loss": 0.6741, "step": 399 }, { "epoch": 0.05232331120655994, "grad_norm": 2.7226080894470215, "learning_rate": 8.040000000000001e-06, "loss": 0.5926, "step": 402 }, { "epoch": 0.052713783678250685, "grad_norm": 2.704489231109619, "learning_rate": 8.1e-06, "loss": 0.7503, "step": 405 }, { "epoch": 0.05310425614994143, "grad_norm": 3.636996269226074, "learning_rate": 8.16e-06, "loss": 0.7294, "step": 408 }, { "epoch": 0.053494728621632175, "grad_norm": 3.1197781562805176, "learning_rate": 8.220000000000001e-06, "loss": 0.7368, "step": 411 }, { "epoch": 0.05388520109332292, "grad_norm": 3.0387392044067383, "learning_rate": 8.28e-06, "loss": 0.752, "step": 414 }, { "epoch": 0.054275673565013664, "grad_norm": 3.60607647895813, "learning_rate": 8.34e-06, "loss": 0.7767, "step": 417 }, { "epoch": 0.05466614603670441, "grad_norm": 3.62844181060791, "learning_rate": 8.400000000000001e-06, "loss": 0.6498, "step": 420 }, { "epoch": 0.05505661850839516, "grad_norm": 3.531270980834961, "learning_rate": 8.46e-06, "loss": 0.7256, "step": 423 }, { "epoch": 0.055447090980085906, "grad_norm": 3.4062726497650146, "learning_rate": 8.52e-06, "loss": 0.7756, "step": 426 }, { "epoch": 0.05583756345177665, "grad_norm": 3.633138656616211, "learning_rate": 8.580000000000001e-06, "loss": 0.7159, "step": 429 }, { "epoch": 0.056228035923467395, "grad_norm": 4.716714859008789, "learning_rate": 8.64e-06, "loss": 0.7503, "step": 432 }, { "epoch": 0.05661850839515814, "grad_norm": 3.8363606929779053, "learning_rate": 8.700000000000001e-06, "loss": 0.708, "step": 435 }, { "epoch": 0.057008980866848885, "grad_norm": 3.476318597793579, "learning_rate": 8.76e-06, "loss": 0.6928, "step": 438 }, { "epoch": 0.05739945333853963, "grad_norm": 4.243283748626709, "learning_rate": 8.82e-06, "loss": 0.5729, "step": 441 }, { "epoch": 0.05778992581023038, "grad_norm": 18.63896942138672, "learning_rate": 8.880000000000001e-06, "loss": 0.893, "step": 444 }, { "epoch": 0.05818039828192113, "grad_norm": 3.8758761882781982, "learning_rate": 8.94e-06, "loss": 0.655, "step": 447 }, { "epoch": 0.05857087075361187, "grad_norm": 2.645306348800659, "learning_rate": 9e-06, "loss": 0.6287, "step": 450 }, { "epoch": 0.058961343225302616, "grad_norm": 2.911987066268921, "learning_rate": 9.060000000000001e-06, "loss": 0.7451, "step": 453 }, { "epoch": 0.05935181569699336, "grad_norm": 3.4977638721466064, "learning_rate": 9.12e-06, "loss": 0.6821, "step": 456 }, { "epoch": 0.059742288168684106, "grad_norm": 3.464571714401245, "learning_rate": 9.180000000000002e-06, "loss": 0.6449, "step": 459 }, { "epoch": 0.06013276064037485, "grad_norm": 3.0574846267700195, "learning_rate": 9.240000000000001e-06, "loss": 0.7833, "step": 462 }, { "epoch": 0.0605232331120656, "grad_norm": 3.181323766708374, "learning_rate": 9.3e-06, "loss": 0.759, "step": 465 }, { "epoch": 0.06091370558375635, "grad_norm": 2.9368088245391846, "learning_rate": 9.360000000000002e-06, "loss": 0.681, "step": 468 }, { "epoch": 0.06130417805544709, "grad_norm": 2.9919021129608154, "learning_rate": 9.42e-06, "loss": 0.6649, "step": 471 }, { "epoch": 0.06169465052713784, "grad_norm": 3.4639129638671875, "learning_rate": 9.48e-06, "loss": 0.6816, "step": 474 }, { "epoch": 0.06208512299882858, "grad_norm": 4.285403251647949, "learning_rate": 9.54e-06, "loss": 0.8209, "step": 477 }, { "epoch": 0.06247559547051933, "grad_norm": 2.8827459812164307, "learning_rate": 9.600000000000001e-06, "loss": 0.7042, "step": 480 }, { "epoch": 0.06286606794221007, "grad_norm": 3.176536798477173, "learning_rate": 9.66e-06, "loss": 0.7312, "step": 483 }, { "epoch": 0.06325654041390082, "grad_norm": 4.091725826263428, "learning_rate": 9.72e-06, "loss": 0.6797, "step": 486 }, { "epoch": 0.06364701288559156, "grad_norm": 3.53378963470459, "learning_rate": 9.780000000000001e-06, "loss": 0.6557, "step": 489 }, { "epoch": 0.0640374853572823, "grad_norm": 3.440068006515503, "learning_rate": 9.84e-06, "loss": 0.6972, "step": 492 }, { "epoch": 0.06442795782897305, "grad_norm": 3.1737592220306396, "learning_rate": 9.9e-06, "loss": 0.6832, "step": 495 }, { "epoch": 0.06481843030066381, "grad_norm": 2.9271981716156006, "learning_rate": 9.960000000000001e-06, "loss": 0.7136, "step": 498 }, { "epoch": 0.06520890277235455, "grad_norm": 4.281307697296143, "learning_rate": 9.999999951472807e-06, "loss": 0.7126, "step": 501 }, { "epoch": 0.0655993752440453, "grad_norm": 4.331837177276611, "learning_rate": 9.999999223564915e-06, "loss": 0.6736, "step": 504 }, { "epoch": 0.06598984771573604, "grad_norm": 3.0359110832214355, "learning_rate": 9.999997622167676e-06, "loss": 0.7754, "step": 507 }, { "epoch": 0.06638032018742679, "grad_norm": 2.992680072784424, "learning_rate": 9.999995147281374e-06, "loss": 0.8101, "step": 510 }, { "epoch": 0.06677079265911753, "grad_norm": 2.822829484939575, "learning_rate": 9.999991798906435e-06, "loss": 0.6705, "step": 513 }, { "epoch": 0.06716126513080828, "grad_norm": 3.342939615249634, "learning_rate": 9.999987577043449e-06, "loss": 0.7045, "step": 516 }, { "epoch": 0.06755173760249902, "grad_norm": 4.064047813415527, "learning_rate": 9.999982481693151e-06, "loss": 0.7007, "step": 519 }, { "epoch": 0.06794221007418977, "grad_norm": 2.783782720565796, "learning_rate": 9.999976512856434e-06, "loss": 0.5655, "step": 522 }, { "epoch": 0.06833268254588051, "grad_norm": 2.931049346923828, "learning_rate": 9.999969670534335e-06, "loss": 0.677, "step": 525 }, { "epoch": 0.06872315501757126, "grad_norm": 3.2958552837371826, "learning_rate": 9.999961954728054e-06, "loss": 0.7741, "step": 528 }, { "epoch": 0.069113627489262, "grad_norm": 2.8830935955047607, "learning_rate": 9.999953365438939e-06, "loss": 0.6607, "step": 531 }, { "epoch": 0.06950409996095275, "grad_norm": 3.2814905643463135, "learning_rate": 9.99994390266849e-06, "loss": 0.7587, "step": 534 }, { "epoch": 0.06989457243264349, "grad_norm": 4.126666069030762, "learning_rate": 9.999933566418358e-06, "loss": 0.7806, "step": 537 }, { "epoch": 0.07028504490433425, "grad_norm": 3.1992428302764893, "learning_rate": 9.99992235669035e-06, "loss": 0.7045, "step": 540 }, { "epoch": 0.070675517376025, "grad_norm": 3.3059449195861816, "learning_rate": 9.999910273486427e-06, "loss": 0.8061, "step": 543 }, { "epoch": 0.07106598984771574, "grad_norm": 5.540506839752197, "learning_rate": 9.999897316808695e-06, "loss": 0.7123, "step": 546 }, { "epoch": 0.07145646231940649, "grad_norm": 2.840941905975342, "learning_rate": 9.999883486659421e-06, "loss": 0.6211, "step": 549 }, { "epoch": 0.07184693479109723, "grad_norm": 2.806445837020874, "learning_rate": 9.99986878304102e-06, "loss": 0.7114, "step": 552 }, { "epoch": 0.07223740726278798, "grad_norm": 4.31725549697876, "learning_rate": 9.99985320595606e-06, "loss": 0.6363, "step": 555 }, { "epoch": 0.07262787973447872, "grad_norm": 3.1201353073120117, "learning_rate": 9.999836755407264e-06, "loss": 0.7572, "step": 558 }, { "epoch": 0.07301835220616947, "grad_norm": 4.234785079956055, "learning_rate": 9.999819431397506e-06, "loss": 0.6565, "step": 561 }, { "epoch": 0.07340882467786021, "grad_norm": 3.2616043090820312, "learning_rate": 9.999801233929808e-06, "loss": 0.7319, "step": 564 }, { "epoch": 0.07379929714955095, "grad_norm": 2.796783924102783, "learning_rate": 9.999782163007357e-06, "loss": 0.7293, "step": 567 }, { "epoch": 0.0741897696212417, "grad_norm": 3.4508984088897705, "learning_rate": 9.999762218633478e-06, "loss": 0.8077, "step": 570 }, { "epoch": 0.07458024209293244, "grad_norm": 2.6913444995880127, "learning_rate": 9.999741400811656e-06, "loss": 0.6777, "step": 573 }, { "epoch": 0.07497071456462319, "grad_norm": 3.599764108657837, "learning_rate": 9.99971970954553e-06, "loss": 0.7533, "step": 576 }, { "epoch": 0.07536118703631393, "grad_norm": 3.3088502883911133, "learning_rate": 9.999697144838889e-06, "loss": 0.605, "step": 579 }, { "epoch": 0.07575165950800468, "grad_norm": 3.051250457763672, "learning_rate": 9.999673706695676e-06, "loss": 0.736, "step": 582 }, { "epoch": 0.07614213197969544, "grad_norm": 2.775729179382324, "learning_rate": 9.999649395119983e-06, "loss": 0.7246, "step": 585 }, { "epoch": 0.07653260445138618, "grad_norm": 3.2636523246765137, "learning_rate": 9.999624210116057e-06, "loss": 0.8448, "step": 588 }, { "epoch": 0.07692307692307693, "grad_norm": 2.841562032699585, "learning_rate": 9.999598151688301e-06, "loss": 0.6813, "step": 591 }, { "epoch": 0.07731354939476767, "grad_norm": 3.2505574226379395, "learning_rate": 9.999571219841264e-06, "loss": 0.6421, "step": 594 }, { "epoch": 0.07770402186645842, "grad_norm": 4.958602428436279, "learning_rate": 9.999543414579655e-06, "loss": 0.7551, "step": 597 }, { "epoch": 0.07809449433814916, "grad_norm": 3.9999706745147705, "learning_rate": 9.999514735908326e-06, "loss": 0.6695, "step": 600 }, { "epoch": 0.0784849668098399, "grad_norm": 4.219988822937012, "learning_rate": 9.999485183832291e-06, "loss": 0.7231, "step": 603 }, { "epoch": 0.07887543928153065, "grad_norm": 2.954352855682373, "learning_rate": 9.999454758356713e-06, "loss": 0.6714, "step": 606 }, { "epoch": 0.0792659117532214, "grad_norm": 4.073672294616699, "learning_rate": 9.999423459486906e-06, "loss": 0.7234, "step": 609 }, { "epoch": 0.07965638422491214, "grad_norm": 3.4436042308807373, "learning_rate": 9.999391287228337e-06, "loss": 0.8006, "step": 612 }, { "epoch": 0.08004685669660289, "grad_norm": 3.66202974319458, "learning_rate": 9.999358241586627e-06, "loss": 0.6682, "step": 615 }, { "epoch": 0.08043732916829363, "grad_norm": 4.19435977935791, "learning_rate": 9.99932432256755e-06, "loss": 0.7799, "step": 618 }, { "epoch": 0.08082780163998438, "grad_norm": 3.3048112392425537, "learning_rate": 9.99928953017703e-06, "loss": 0.7087, "step": 621 }, { "epoch": 0.08121827411167512, "grad_norm": 2.822140693664551, "learning_rate": 9.999253864421147e-06, "loss": 0.6894, "step": 624 }, { "epoch": 0.08160874658336588, "grad_norm": 2.9324841499328613, "learning_rate": 9.99921732530613e-06, "loss": 0.6938, "step": 627 }, { "epoch": 0.08199921905505662, "grad_norm": 3.84236478805542, "learning_rate": 9.999179912838364e-06, "loss": 0.783, "step": 630 }, { "epoch": 0.08238969152674737, "grad_norm": 3.0340070724487305, "learning_rate": 9.999141627024384e-06, "loss": 0.7381, "step": 633 }, { "epoch": 0.08278016399843811, "grad_norm": 4.351922512054443, "learning_rate": 9.999102467870877e-06, "loss": 0.7803, "step": 636 }, { "epoch": 0.08317063647012886, "grad_norm": 2.6362740993499756, "learning_rate": 9.999062435384686e-06, "loss": 0.598, "step": 639 }, { "epoch": 0.0835611089418196, "grad_norm": 3.186671495437622, "learning_rate": 9.999021529572806e-06, "loss": 0.7747, "step": 642 }, { "epoch": 0.08395158141351035, "grad_norm": 3.305826187133789, "learning_rate": 9.99897975044238e-06, "loss": 0.7638, "step": 645 }, { "epoch": 0.0843420538852011, "grad_norm": 3.8169655799865723, "learning_rate": 9.998937098000705e-06, "loss": 0.7015, "step": 648 }, { "epoch": 0.08473252635689184, "grad_norm": 3.1275665760040283, "learning_rate": 9.99889357225524e-06, "loss": 0.6977, "step": 651 }, { "epoch": 0.08512299882858258, "grad_norm": 3.6872782707214355, "learning_rate": 9.998849173213581e-06, "loss": 0.7307, "step": 654 }, { "epoch": 0.08551347130027333, "grad_norm": 3.6949827671051025, "learning_rate": 9.998803900883487e-06, "loss": 0.6681, "step": 657 }, { "epoch": 0.08590394377196407, "grad_norm": 3.2044758796691895, "learning_rate": 9.99875775527287e-06, "loss": 0.6699, "step": 660 }, { "epoch": 0.08629441624365482, "grad_norm": 3.4927258491516113, "learning_rate": 9.998710736389787e-06, "loss": 0.6393, "step": 663 }, { "epoch": 0.08668488871534556, "grad_norm": 3.450648784637451, "learning_rate": 9.998662844242456e-06, "loss": 0.6751, "step": 666 }, { "epoch": 0.08707536118703631, "grad_norm": 3.036738157272339, "learning_rate": 9.99861407883924e-06, "loss": 0.7606, "step": 669 }, { "epoch": 0.08746583365872707, "grad_norm": 3.3343772888183594, "learning_rate": 9.998564440188661e-06, "loss": 0.7669, "step": 672 }, { "epoch": 0.08785630613041781, "grad_norm": 3.513052225112915, "learning_rate": 9.998513928299389e-06, "loss": 0.6073, "step": 675 }, { "epoch": 0.08824677860210856, "grad_norm": 2.9392402172088623, "learning_rate": 9.998462543180249e-06, "loss": 0.6474, "step": 678 }, { "epoch": 0.0886372510737993, "grad_norm": 3.289640188217163, "learning_rate": 9.998410284840217e-06, "loss": 0.6522, "step": 681 }, { "epoch": 0.08902772354549005, "grad_norm": 5.727564811706543, "learning_rate": 9.998357153288425e-06, "loss": 0.6354, "step": 684 }, { "epoch": 0.08941819601718079, "grad_norm": 2.87939715385437, "learning_rate": 9.998303148534153e-06, "loss": 0.6943, "step": 687 }, { "epoch": 0.08980866848887153, "grad_norm": 5.636757850646973, "learning_rate": 9.998248270586837e-06, "loss": 0.651, "step": 690 }, { "epoch": 0.09019914096056228, "grad_norm": 4.383195400238037, "learning_rate": 9.99819251945606e-06, "loss": 0.7694, "step": 693 }, { "epoch": 0.09058961343225302, "grad_norm": 3.0773353576660156, "learning_rate": 9.998135895151567e-06, "loss": 0.6747, "step": 696 }, { "epoch": 0.09098008590394377, "grad_norm": 3.2680556774139404, "learning_rate": 9.998078397683246e-06, "loss": 0.7254, "step": 699 }, { "epoch": 0.09137055837563451, "grad_norm": 7.307571887969971, "learning_rate": 9.998020027061145e-06, "loss": 0.671, "step": 702 }, { "epoch": 0.09176103084732526, "grad_norm": 2.9415576457977295, "learning_rate": 9.99796078329546e-06, "loss": 0.7709, "step": 705 }, { "epoch": 0.092151503319016, "grad_norm": 2.814495086669922, "learning_rate": 9.997900666396539e-06, "loss": 0.6947, "step": 708 }, { "epoch": 0.09254197579070675, "grad_norm": 5.5829081535339355, "learning_rate": 9.997839676374885e-06, "loss": 0.73, "step": 711 }, { "epoch": 0.09293244826239751, "grad_norm": 5.697535037994385, "learning_rate": 9.997777813241154e-06, "loss": 0.6227, "step": 714 }, { "epoch": 0.09332292073408825, "grad_norm": 3.0062732696533203, "learning_rate": 9.997715077006152e-06, "loss": 0.6035, "step": 717 }, { "epoch": 0.093713393205779, "grad_norm": 2.8236165046691895, "learning_rate": 9.997651467680843e-06, "loss": 0.7009, "step": 720 }, { "epoch": 0.09410386567746974, "grad_norm": 2.7727997303009033, "learning_rate": 9.997586985276333e-06, "loss": 0.6508, "step": 723 }, { "epoch": 0.09449433814916049, "grad_norm": 3.65282940864563, "learning_rate": 9.99752162980389e-06, "loss": 0.7025, "step": 726 }, { "epoch": 0.09488481062085123, "grad_norm": 3.1343119144439697, "learning_rate": 9.997455401274932e-06, "loss": 0.7112, "step": 729 }, { "epoch": 0.09527528309254198, "grad_norm": 3.504101037979126, "learning_rate": 9.99738829970103e-06, "loss": 0.7705, "step": 732 }, { "epoch": 0.09566575556423272, "grad_norm": 2.7946465015411377, "learning_rate": 9.997320325093903e-06, "loss": 0.5736, "step": 735 }, { "epoch": 0.09605622803592347, "grad_norm": 3.221226215362549, "learning_rate": 9.99725147746543e-06, "loss": 0.647, "step": 738 }, { "epoch": 0.09644670050761421, "grad_norm": 5.345529079437256, "learning_rate": 9.997181756827634e-06, "loss": 0.5896, "step": 741 }, { "epoch": 0.09683717297930496, "grad_norm": 3.8715436458587646, "learning_rate": 9.9971111631927e-06, "loss": 0.7666, "step": 744 }, { "epoch": 0.0972276454509957, "grad_norm": 2.9973673820495605, "learning_rate": 9.997039696572956e-06, "loss": 0.6106, "step": 747 }, { "epoch": 0.09761811792268645, "grad_norm": 5.8889851570129395, "learning_rate": 9.996967356980891e-06, "loss": 0.6416, "step": 750 }, { "epoch": 0.09800859039437719, "grad_norm": 3.953415632247925, "learning_rate": 9.99689414442914e-06, "loss": 0.744, "step": 753 }, { "epoch": 0.09839906286606795, "grad_norm": 4.9584760665893555, "learning_rate": 9.996820058930495e-06, "loss": 0.6435, "step": 756 }, { "epoch": 0.0987895353377587, "grad_norm": 2.774200916290283, "learning_rate": 9.996745100497898e-06, "loss": 0.6054, "step": 759 }, { "epoch": 0.09918000780944944, "grad_norm": 4.4339118003845215, "learning_rate": 9.996669269144442e-06, "loss": 0.6135, "step": 762 }, { "epoch": 0.09957048028114018, "grad_norm": 3.3275766372680664, "learning_rate": 9.996592564883376e-06, "loss": 0.7489, "step": 765 }, { "epoch": 0.09996095275283093, "grad_norm": 3.1275625228881836, "learning_rate": 9.996514987728101e-06, "loss": 0.825, "step": 768 }, { "epoch": 0.10035142522452167, "grad_norm": 3.67669415473938, "learning_rate": 9.99643653769217e-06, "loss": 0.8093, "step": 771 }, { "epoch": 0.10074189769621242, "grad_norm": 3.200958013534546, "learning_rate": 9.996357214789284e-06, "loss": 0.7373, "step": 774 }, { "epoch": 0.10113237016790316, "grad_norm": 2.847730875015259, "learning_rate": 9.996277019033305e-06, "loss": 0.7335, "step": 777 }, { "epoch": 0.10152284263959391, "grad_norm": 4.33748722076416, "learning_rate": 9.99619595043824e-06, "loss": 0.6688, "step": 780 }, { "epoch": 0.10191331511128465, "grad_norm": 3.9746432304382324, "learning_rate": 9.996114009018254e-06, "loss": 0.7776, "step": 783 }, { "epoch": 0.1023037875829754, "grad_norm": 3.600278854370117, "learning_rate": 9.996031194787661e-06, "loss": 0.7403, "step": 786 }, { "epoch": 0.10269426005466614, "grad_norm": 2.957000494003296, "learning_rate": 9.995947507760928e-06, "loss": 0.7485, "step": 789 }, { "epoch": 0.10308473252635689, "grad_norm": 4.908221244812012, "learning_rate": 9.995862947952676e-06, "loss": 0.748, "step": 792 }, { "epoch": 0.10347520499804763, "grad_norm": 3.816086769104004, "learning_rate": 9.995777515377677e-06, "loss": 0.6787, "step": 795 }, { "epoch": 0.10386567746973838, "grad_norm": 2.948234796524048, "learning_rate": 9.995691210050854e-06, "loss": 0.7267, "step": 798 }, { "epoch": 0.10425614994142914, "grad_norm": 2.818983793258667, "learning_rate": 9.995604031987287e-06, "loss": 0.6914, "step": 801 }, { "epoch": 0.10464662241311988, "grad_norm": 3.0057449340820312, "learning_rate": 9.995515981202206e-06, "loss": 0.7294, "step": 804 }, { "epoch": 0.10503709488481063, "grad_norm": 3.676535129547119, "learning_rate": 9.99542705771099e-06, "loss": 0.6236, "step": 807 }, { "epoch": 0.10542756735650137, "grad_norm": 3.7774033546447754, "learning_rate": 9.995337261529176e-06, "loss": 0.6749, "step": 810 }, { "epoch": 0.10581803982819211, "grad_norm": 3.2406165599823, "learning_rate": 9.995246592672451e-06, "loss": 0.729, "step": 813 }, { "epoch": 0.10620851229988286, "grad_norm": 5.026638984680176, "learning_rate": 9.995155051156657e-06, "loss": 0.7358, "step": 816 }, { "epoch": 0.1065989847715736, "grad_norm": 3.2842941284179688, "learning_rate": 9.995062636997783e-06, "loss": 0.6659, "step": 819 }, { "epoch": 0.10698945724326435, "grad_norm": 2.9089906215667725, "learning_rate": 9.994969350211974e-06, "loss": 0.6672, "step": 822 }, { "epoch": 0.1073799297149551, "grad_norm": 2.6798436641693115, "learning_rate": 9.994875190815527e-06, "loss": 0.6423, "step": 825 }, { "epoch": 0.10777040218664584, "grad_norm": 2.918283462524414, "learning_rate": 9.994780158824891e-06, "loss": 0.7637, "step": 828 }, { "epoch": 0.10816087465833658, "grad_norm": 3.470376491546631, "learning_rate": 9.99468425425667e-06, "loss": 0.7711, "step": 831 }, { "epoch": 0.10855134713002733, "grad_norm": 3.4783475399017334, "learning_rate": 9.994587477127617e-06, "loss": 0.8156, "step": 834 }, { "epoch": 0.10894181960171807, "grad_norm": 2.7268190383911133, "learning_rate": 9.994489827454638e-06, "loss": 0.6253, "step": 837 }, { "epoch": 0.10933229207340882, "grad_norm": 3.936289072036743, "learning_rate": 9.994391305254795e-06, "loss": 0.6787, "step": 840 }, { "epoch": 0.10972276454509958, "grad_norm": 3.084895610809326, "learning_rate": 9.994291910545296e-06, "loss": 0.6393, "step": 843 }, { "epoch": 0.11011323701679032, "grad_norm": 2.9369547367095947, "learning_rate": 9.994191643343508e-06, "loss": 0.6388, "step": 846 }, { "epoch": 0.11050370948848107, "grad_norm": 2.8913955688476562, "learning_rate": 9.994090503666945e-06, "loss": 0.6447, "step": 849 }, { "epoch": 0.11089418196017181, "grad_norm": 2.990058183670044, "learning_rate": 9.993988491533277e-06, "loss": 0.6725, "step": 852 }, { "epoch": 0.11128465443186256, "grad_norm": 3.0597927570343018, "learning_rate": 9.993885606960325e-06, "loss": 0.7052, "step": 855 }, { "epoch": 0.1116751269035533, "grad_norm": 4.286864280700684, "learning_rate": 9.993781849966064e-06, "loss": 0.6557, "step": 858 }, { "epoch": 0.11206559937524405, "grad_norm": 2.70944881439209, "learning_rate": 9.993677220568619e-06, "loss": 0.7614, "step": 861 }, { "epoch": 0.11245607184693479, "grad_norm": 2.6077942848205566, "learning_rate": 9.993571718786268e-06, "loss": 0.713, "step": 864 }, { "epoch": 0.11284654431862554, "grad_norm": 2.833029270172119, "learning_rate": 9.993465344637443e-06, "loss": 0.6649, "step": 867 }, { "epoch": 0.11323701679031628, "grad_norm": 2.6848433017730713, "learning_rate": 9.993358098140727e-06, "loss": 0.7228, "step": 870 }, { "epoch": 0.11362748926200703, "grad_norm": 3.404714822769165, "learning_rate": 9.993249979314857e-06, "loss": 0.6024, "step": 873 }, { "epoch": 0.11401796173369777, "grad_norm": 4.099708557128906, "learning_rate": 9.993140988178718e-06, "loss": 0.664, "step": 876 }, { "epoch": 0.11440843420538852, "grad_norm": 2.908158302307129, "learning_rate": 9.993031124751353e-06, "loss": 0.7663, "step": 879 }, { "epoch": 0.11479890667707926, "grad_norm": 2.5075185298919678, "learning_rate": 9.992920389051955e-06, "loss": 0.7454, "step": 882 }, { "epoch": 0.11518937914877002, "grad_norm": 3.0805585384368896, "learning_rate": 9.992808781099868e-06, "loss": 0.7226, "step": 885 }, { "epoch": 0.11557985162046076, "grad_norm": 3.1145894527435303, "learning_rate": 9.992696300914591e-06, "loss": 0.732, "step": 888 }, { "epoch": 0.11597032409215151, "grad_norm": 3.146247386932373, "learning_rate": 9.992582948515772e-06, "loss": 0.785, "step": 891 }, { "epoch": 0.11636079656384225, "grad_norm": 3.7035303115844727, "learning_rate": 9.992468723923216e-06, "loss": 0.7226, "step": 894 }, { "epoch": 0.116751269035533, "grad_norm": 3.59032940864563, "learning_rate": 9.992353627156876e-06, "loss": 0.6496, "step": 897 }, { "epoch": 0.11714174150722374, "grad_norm": 3.1689419746398926, "learning_rate": 9.992237658236859e-06, "loss": 0.6467, "step": 900 }, { "epoch": 0.11753221397891449, "grad_norm": 2.9618992805480957, "learning_rate": 9.992120817183427e-06, "loss": 0.6797, "step": 903 }, { "epoch": 0.11792268645060523, "grad_norm": 2.657771587371826, "learning_rate": 9.992003104016988e-06, "loss": 0.6924, "step": 906 }, { "epoch": 0.11831315892229598, "grad_norm": 2.938518524169922, "learning_rate": 9.99188451875811e-06, "loss": 0.722, "step": 909 }, { "epoch": 0.11870363139398672, "grad_norm": 2.878551959991455, "learning_rate": 9.991765061427508e-06, "loss": 0.6749, "step": 912 }, { "epoch": 0.11909410386567747, "grad_norm": 3.0917224884033203, "learning_rate": 9.99164473204605e-06, "loss": 0.634, "step": 915 }, { "epoch": 0.11948457633736821, "grad_norm": 3.893941879272461, "learning_rate": 9.991523530634758e-06, "loss": 0.7202, "step": 918 }, { "epoch": 0.11987504880905896, "grad_norm": 12.49660873413086, "learning_rate": 9.991401457214807e-06, "loss": 0.7348, "step": 921 }, { "epoch": 0.1202655212807497, "grad_norm": 2.8306703567504883, "learning_rate": 9.99127851180752e-06, "loss": 0.7267, "step": 924 }, { "epoch": 0.12065599375244045, "grad_norm": 3.2142062187194824, "learning_rate": 9.99115469443438e-06, "loss": 0.6601, "step": 927 }, { "epoch": 0.1210464662241312, "grad_norm": 2.776210308074951, "learning_rate": 9.991030005117013e-06, "loss": 0.671, "step": 930 }, { "epoch": 0.12143693869582195, "grad_norm": 3.657951593399048, "learning_rate": 9.990904443877203e-06, "loss": 0.6504, "step": 933 }, { "epoch": 0.1218274111675127, "grad_norm": 2.87703800201416, "learning_rate": 9.990778010736885e-06, "loss": 0.7212, "step": 936 }, { "epoch": 0.12221788363920344, "grad_norm": 4.6955718994140625, "learning_rate": 9.99065070571815e-06, "loss": 0.6704, "step": 939 }, { "epoch": 0.12260835611089418, "grad_norm": 4.890464782714844, "learning_rate": 9.990522528843236e-06, "loss": 0.587, "step": 942 }, { "epoch": 0.12299882858258493, "grad_norm": 3.7932112216949463, "learning_rate": 9.990393480134532e-06, "loss": 0.6647, "step": 945 }, { "epoch": 0.12338930105427567, "grad_norm": 2.9928741455078125, "learning_rate": 9.990263559614589e-06, "loss": 0.7145, "step": 948 }, { "epoch": 0.12377977352596642, "grad_norm": 2.880676031112671, "learning_rate": 9.990132767306097e-06, "loss": 0.6792, "step": 951 }, { "epoch": 0.12417024599765716, "grad_norm": 2.795924186706543, "learning_rate": 9.990001103231909e-06, "loss": 0.6022, "step": 954 }, { "epoch": 0.12456071846934791, "grad_norm": 2.9928035736083984, "learning_rate": 9.989868567415027e-06, "loss": 0.6714, "step": 957 }, { "epoch": 0.12495119094103865, "grad_norm": 2.6535706520080566, "learning_rate": 9.989735159878601e-06, "loss": 0.6269, "step": 960 }, { "epoch": 0.1253416634127294, "grad_norm": 2.8510921001434326, "learning_rate": 9.98960088064594e-06, "loss": 0.7881, "step": 963 }, { "epoch": 0.12573213588442014, "grad_norm": 2.8703200817108154, "learning_rate": 9.989465729740504e-06, "loss": 0.641, "step": 966 }, { "epoch": 0.1261226083561109, "grad_norm": 2.8146934509277344, "learning_rate": 9.989329707185899e-06, "loss": 0.5499, "step": 969 }, { "epoch": 0.12651308082780163, "grad_norm": 2.848714590072632, "learning_rate": 9.989192813005891e-06, "loss": 0.6357, "step": 972 }, { "epoch": 0.12690355329949238, "grad_norm": 3.125911235809326, "learning_rate": 9.989055047224393e-06, "loss": 0.8084, "step": 975 }, { "epoch": 0.12729402577118312, "grad_norm": 3.852508306503296, "learning_rate": 9.988916409865476e-06, "loss": 0.6652, "step": 978 }, { "epoch": 0.12768449824287387, "grad_norm": 3.4313900470733643, "learning_rate": 9.988776900953356e-06, "loss": 0.6909, "step": 981 }, { "epoch": 0.1280749707145646, "grad_norm": 2.9528424739837646, "learning_rate": 9.988636520512407e-06, "loss": 0.6335, "step": 984 }, { "epoch": 0.12846544318625536, "grad_norm": 3.1923022270202637, "learning_rate": 9.988495268567152e-06, "loss": 0.7599, "step": 987 }, { "epoch": 0.1288559156579461, "grad_norm": 2.8430709838867188, "learning_rate": 9.988353145142267e-06, "loss": 0.659, "step": 990 }, { "epoch": 0.12924638812963687, "grad_norm": 4.609011173248291, "learning_rate": 9.988210150262582e-06, "loss": 0.7288, "step": 993 }, { "epoch": 0.12963686060132762, "grad_norm": 3.1884047985076904, "learning_rate": 9.98806628395308e-06, "loss": 0.8857, "step": 996 }, { "epoch": 0.13002733307301836, "grad_norm": 4.605678081512451, "learning_rate": 9.987921546238888e-06, "loss": 0.6314, "step": 999 }, { "epoch": 0.1304178055447091, "grad_norm": 2.8281877040863037, "learning_rate": 9.987775937145297e-06, "loss": 0.6079, "step": 1002 }, { "epoch": 0.13080827801639985, "grad_norm": 7.240689277648926, "learning_rate": 9.987629456697741e-06, "loss": 0.6683, "step": 1005 }, { "epoch": 0.1311987504880906, "grad_norm": 3.1431357860565186, "learning_rate": 9.987482104921813e-06, "loss": 0.7741, "step": 1008 }, { "epoch": 0.13158922295978134, "grad_norm": 3.569730043411255, "learning_rate": 9.98733388184325e-06, "loss": 0.83, "step": 1011 }, { "epoch": 0.1319796954314721, "grad_norm": 2.689365863800049, "learning_rate": 9.987184787487953e-06, "loss": 0.7279, "step": 1014 }, { "epoch": 0.13237016790316283, "grad_norm": 2.8707399368286133, "learning_rate": 9.987034821881965e-06, "loss": 0.7021, "step": 1017 }, { "epoch": 0.13276064037485358, "grad_norm": 2.8549368381500244, "learning_rate": 9.986883985051485e-06, "loss": 0.6305, "step": 1020 }, { "epoch": 0.13315111284654432, "grad_norm": 2.8607091903686523, "learning_rate": 9.986732277022862e-06, "loss": 0.647, "step": 1023 }, { "epoch": 0.13354158531823507, "grad_norm": 3.3489599227905273, "learning_rate": 9.986579697822601e-06, "loss": 0.624, "step": 1026 }, { "epoch": 0.1339320577899258, "grad_norm": 3.7060370445251465, "learning_rate": 9.986426247477358e-06, "loss": 0.7278, "step": 1029 }, { "epoch": 0.13432253026161656, "grad_norm": 3.295768976211548, "learning_rate": 9.98627192601394e-06, "loss": 0.6298, "step": 1032 }, { "epoch": 0.1347130027333073, "grad_norm": 4.761738300323486, "learning_rate": 9.986116733459303e-06, "loss": 0.8255, "step": 1035 }, { "epoch": 0.13510347520499805, "grad_norm": 2.9362332820892334, "learning_rate": 9.985960669840564e-06, "loss": 0.6209, "step": 1038 }, { "epoch": 0.1354939476766888, "grad_norm": 2.879631519317627, "learning_rate": 9.985803735184986e-06, "loss": 0.6995, "step": 1041 }, { "epoch": 0.13588442014837954, "grad_norm": 2.8256990909576416, "learning_rate": 9.985645929519983e-06, "loss": 0.6906, "step": 1044 }, { "epoch": 0.13627489262007028, "grad_norm": 2.7403573989868164, "learning_rate": 9.985487252873125e-06, "loss": 0.5904, "step": 1047 }, { "epoch": 0.13666536509176103, "grad_norm": 2.701652765274048, "learning_rate": 9.98532770527213e-06, "loss": 0.6527, "step": 1050 }, { "epoch": 0.13705583756345177, "grad_norm": 2.5606539249420166, "learning_rate": 9.985167286744875e-06, "loss": 0.7061, "step": 1053 }, { "epoch": 0.13744631003514252, "grad_norm": 2.821444511413574, "learning_rate": 9.98500599731938e-06, "loss": 0.671, "step": 1056 }, { "epoch": 0.13783678250683326, "grad_norm": 3.4483652114868164, "learning_rate": 9.984843837023826e-06, "loss": 0.5973, "step": 1059 }, { "epoch": 0.138227254978524, "grad_norm": 2.7346315383911133, "learning_rate": 9.984680805886538e-06, "loss": 0.6508, "step": 1062 }, { "epoch": 0.13861772745021475, "grad_norm": 3.0941390991210938, "learning_rate": 9.984516903936002e-06, "loss": 0.6686, "step": 1065 }, { "epoch": 0.1390081999219055, "grad_norm": 2.95706844329834, "learning_rate": 9.984352131200847e-06, "loss": 0.7592, "step": 1068 }, { "epoch": 0.13939867239359624, "grad_norm": 2.627673864364624, "learning_rate": 9.984186487709862e-06, "loss": 0.6032, "step": 1071 }, { "epoch": 0.13978914486528699, "grad_norm": 3.3096377849578857, "learning_rate": 9.984019973491981e-06, "loss": 0.7385, "step": 1074 }, { "epoch": 0.14017961733697773, "grad_norm": 2.916146993637085, "learning_rate": 9.983852588576296e-06, "loss": 0.6303, "step": 1077 }, { "epoch": 0.1405700898086685, "grad_norm": 6.1797614097595215, "learning_rate": 9.983684332992049e-06, "loss": 0.6442, "step": 1080 }, { "epoch": 0.14096056228035925, "grad_norm": 4.810859203338623, "learning_rate": 9.983515206768633e-06, "loss": 0.7992, "step": 1083 }, { "epoch": 0.14135103475205, "grad_norm": 2.918363094329834, "learning_rate": 9.983345209935593e-06, "loss": 0.6568, "step": 1086 }, { "epoch": 0.14174150722374074, "grad_norm": 2.725175380706787, "learning_rate": 9.983174342522628e-06, "loss": 0.6201, "step": 1089 }, { "epoch": 0.14213197969543148, "grad_norm": 3.9198555946350098, "learning_rate": 9.983002604559591e-06, "loss": 0.6546, "step": 1092 }, { "epoch": 0.14252245216712223, "grad_norm": 2.7483971118927, "learning_rate": 9.98282999607648e-06, "loss": 0.7721, "step": 1095 }, { "epoch": 0.14291292463881297, "grad_norm": 3.3657147884368896, "learning_rate": 9.982656517103451e-06, "loss": 0.7828, "step": 1098 }, { "epoch": 0.14330339711050372, "grad_norm": 2.881145715713501, "learning_rate": 9.982482167670811e-06, "loss": 0.6587, "step": 1101 }, { "epoch": 0.14369386958219446, "grad_norm": 3.116957187652588, "learning_rate": 9.982306947809016e-06, "loss": 0.8168, "step": 1104 }, { "epoch": 0.1440843420538852, "grad_norm": 3.0553340911865234, "learning_rate": 9.98213085754868e-06, "loss": 0.6254, "step": 1107 }, { "epoch": 0.14447481452557595, "grad_norm": 2.685007095336914, "learning_rate": 9.981953896920564e-06, "loss": 0.6368, "step": 1110 }, { "epoch": 0.1448652869972667, "grad_norm": 3.4758572578430176, "learning_rate": 9.981776065955583e-06, "loss": 0.6683, "step": 1113 }, { "epoch": 0.14525575946895744, "grad_norm": 3.288086414337158, "learning_rate": 9.981597364684804e-06, "loss": 0.6544, "step": 1116 }, { "epoch": 0.14564623194064819, "grad_norm": 2.9728589057922363, "learning_rate": 9.981417793139443e-06, "loss": 0.7256, "step": 1119 }, { "epoch": 0.14603670441233893, "grad_norm": 4.481566429138184, "learning_rate": 9.981237351350874e-06, "loss": 0.6933, "step": 1122 }, { "epoch": 0.14642717688402968, "grad_norm": 3.00492000579834, "learning_rate": 9.98105603935062e-06, "loss": 0.7278, "step": 1125 }, { "epoch": 0.14681764935572042, "grad_norm": 3.2771825790405273, "learning_rate": 9.980873857170352e-06, "loss": 0.7472, "step": 1128 }, { "epoch": 0.14720812182741116, "grad_norm": 4.5184550285339355, "learning_rate": 9.980690804841901e-06, "loss": 0.6744, "step": 1131 }, { "epoch": 0.1475985942991019, "grad_norm": 3.881673574447632, "learning_rate": 9.980506882397246e-06, "loss": 0.5923, "step": 1134 }, { "epoch": 0.14798906677079265, "grad_norm": 3.5104172229766846, "learning_rate": 9.980322089868512e-06, "loss": 0.6073, "step": 1137 }, { "epoch": 0.1483795392424834, "grad_norm": 4.261860370635986, "learning_rate": 9.980136427287989e-06, "loss": 0.6716, "step": 1140 }, { "epoch": 0.14877001171417414, "grad_norm": 3.353560209274292, "learning_rate": 9.979949894688108e-06, "loss": 0.7559, "step": 1143 }, { "epoch": 0.1491604841858649, "grad_norm": 3.719203233718872, "learning_rate": 9.979762492101456e-06, "loss": 0.7475, "step": 1146 }, { "epoch": 0.14955095665755563, "grad_norm": 3.833672523498535, "learning_rate": 9.979574219560773e-06, "loss": 0.7024, "step": 1149 }, { "epoch": 0.14994142912924638, "grad_norm": 3.349752426147461, "learning_rate": 9.97938507709895e-06, "loss": 0.681, "step": 1152 }, { "epoch": 0.15033190160093712, "grad_norm": 3.8509340286254883, "learning_rate": 9.979195064749029e-06, "loss": 0.7446, "step": 1155 }, { "epoch": 0.15072237407262787, "grad_norm": 3.9146859645843506, "learning_rate": 9.979004182544204e-06, "loss": 0.6898, "step": 1158 }, { "epoch": 0.1511128465443186, "grad_norm": 3.239340305328369, "learning_rate": 9.978812430517824e-06, "loss": 0.7282, "step": 1161 }, { "epoch": 0.15150331901600936, "grad_norm": 4.0539727210998535, "learning_rate": 9.978619808703385e-06, "loss": 0.6493, "step": 1164 }, { "epoch": 0.15189379148770013, "grad_norm": 3.1297805309295654, "learning_rate": 9.978426317134538e-06, "loss": 0.7316, "step": 1167 }, { "epoch": 0.15228426395939088, "grad_norm": 3.2664248943328857, "learning_rate": 9.978231955845089e-06, "loss": 0.6631, "step": 1170 }, { "epoch": 0.15267473643108162, "grad_norm": 3.597419023513794, "learning_rate": 9.978036724868989e-06, "loss": 0.661, "step": 1173 }, { "epoch": 0.15306520890277237, "grad_norm": 2.828190565109253, "learning_rate": 9.977840624240345e-06, "loss": 0.6447, "step": 1176 }, { "epoch": 0.1534556813744631, "grad_norm": 2.887118339538574, "learning_rate": 9.977643653993415e-06, "loss": 0.7544, "step": 1179 }, { "epoch": 0.15384615384615385, "grad_norm": 4.281754016876221, "learning_rate": 9.977445814162612e-06, "loss": 0.7907, "step": 1182 }, { "epoch": 0.1542366263178446, "grad_norm": 2.87988018989563, "learning_rate": 9.977247104782496e-06, "loss": 0.6872, "step": 1185 }, { "epoch": 0.15462709878953534, "grad_norm": 3.0348126888275146, "learning_rate": 9.97704752588778e-06, "loss": 0.6695, "step": 1188 }, { "epoch": 0.1550175712612261, "grad_norm": 2.9127049446105957, "learning_rate": 9.976847077513331e-06, "loss": 0.6395, "step": 1191 }, { "epoch": 0.15540804373291683, "grad_norm": 2.811537981033325, "learning_rate": 9.976645759694167e-06, "loss": 0.6382, "step": 1194 }, { "epoch": 0.15579851620460758, "grad_norm": 3.1080002784729004, "learning_rate": 9.976443572465462e-06, "loss": 0.6556, "step": 1197 }, { "epoch": 0.15618898867629832, "grad_norm": 3.4639227390289307, "learning_rate": 9.97624051586253e-06, "loss": 0.6893, "step": 1200 }, { "epoch": 0.15657946114798907, "grad_norm": 3.166987895965576, "learning_rate": 9.97603658992085e-06, "loss": 0.669, "step": 1203 }, { "epoch": 0.1569699336196798, "grad_norm": 3.154684543609619, "learning_rate": 9.975831794676045e-06, "loss": 0.7224, "step": 1206 }, { "epoch": 0.15736040609137056, "grad_norm": 4.444057464599609, "learning_rate": 9.975626130163893e-06, "loss": 0.6632, "step": 1209 }, { "epoch": 0.1577508785630613, "grad_norm": 2.8680520057678223, "learning_rate": 9.975419596420325e-06, "loss": 0.7838, "step": 1212 }, { "epoch": 0.15814135103475205, "grad_norm": 2.656010866165161, "learning_rate": 9.975212193481419e-06, "loss": 0.6376, "step": 1215 }, { "epoch": 0.1585318235064428, "grad_norm": 2.9363276958465576, "learning_rate": 9.975003921383409e-06, "loss": 0.7675, "step": 1218 }, { "epoch": 0.15892229597813354, "grad_norm": 2.935682535171509, "learning_rate": 9.97479478016268e-06, "loss": 0.749, "step": 1221 }, { "epoch": 0.15931276844982428, "grad_norm": 4.171043395996094, "learning_rate": 9.974584769855768e-06, "loss": 0.64, "step": 1224 }, { "epoch": 0.15970324092151503, "grad_norm": 3.761404037475586, "learning_rate": 9.974373890499363e-06, "loss": 0.6779, "step": 1227 }, { "epoch": 0.16009371339320577, "grad_norm": 3.7097084522247314, "learning_rate": 9.974162142130302e-06, "loss": 0.6745, "step": 1230 }, { "epoch": 0.16048418586489652, "grad_norm": 2.918391704559326, "learning_rate": 9.97394952478558e-06, "loss": 0.6632, "step": 1233 }, { "epoch": 0.16087465833658726, "grad_norm": 2.916435956954956, "learning_rate": 9.97373603850234e-06, "loss": 0.7137, "step": 1236 }, { "epoch": 0.161265130808278, "grad_norm": 4.109777450561523, "learning_rate": 9.973521683317877e-06, "loss": 0.6859, "step": 1239 }, { "epoch": 0.16165560327996875, "grad_norm": 3.2378978729248047, "learning_rate": 9.973306459269639e-06, "loss": 0.6296, "step": 1242 }, { "epoch": 0.1620460757516595, "grad_norm": 3.034580707550049, "learning_rate": 9.973090366395223e-06, "loss": 0.6648, "step": 1245 }, { "epoch": 0.16243654822335024, "grad_norm": 2.901724100112915, "learning_rate": 9.972873404732383e-06, "loss": 0.6546, "step": 1248 }, { "epoch": 0.16282702069504099, "grad_norm": 4.608283519744873, "learning_rate": 9.972655574319022e-06, "loss": 0.6832, "step": 1251 }, { "epoch": 0.16321749316673176, "grad_norm": 2.890130043029785, "learning_rate": 9.972436875193191e-06, "loss": 0.7566, "step": 1254 }, { "epoch": 0.1636079656384225, "grad_norm": 2.8199384212493896, "learning_rate": 9.972217307393099e-06, "loss": 0.654, "step": 1257 }, { "epoch": 0.16399843811011325, "grad_norm": 3.453725576400757, "learning_rate": 9.971996870957104e-06, "loss": 0.6728, "step": 1260 }, { "epoch": 0.164388910581804, "grad_norm": 2.9947872161865234, "learning_rate": 9.971775565923715e-06, "loss": 0.8324, "step": 1263 }, { "epoch": 0.16477938305349474, "grad_norm": 4.272884368896484, "learning_rate": 9.971553392331593e-06, "loss": 0.6809, "step": 1266 }, { "epoch": 0.16516985552518548, "grad_norm": 3.4897780418395996, "learning_rate": 9.971330350219553e-06, "loss": 0.6167, "step": 1269 }, { "epoch": 0.16556032799687623, "grad_norm": 3.0097784996032715, "learning_rate": 9.971106439626559e-06, "loss": 0.5921, "step": 1272 }, { "epoch": 0.16595080046856697, "grad_norm": 5.600336074829102, "learning_rate": 9.970881660591727e-06, "loss": 0.6879, "step": 1275 }, { "epoch": 0.16634127294025772, "grad_norm": 2.6105566024780273, "learning_rate": 9.970656013154326e-06, "loss": 0.651, "step": 1278 }, { "epoch": 0.16673174541194846, "grad_norm": 2.573819875717163, "learning_rate": 9.970429497353777e-06, "loss": 0.6906, "step": 1281 }, { "epoch": 0.1671222178836392, "grad_norm": 2.971766233444214, "learning_rate": 9.97020211322965e-06, "loss": 0.6863, "step": 1284 }, { "epoch": 0.16751269035532995, "grad_norm": 2.8079757690429688, "learning_rate": 9.96997386082167e-06, "loss": 0.6304, "step": 1287 }, { "epoch": 0.1679031628270207, "grad_norm": 3.1326887607574463, "learning_rate": 9.969744740169713e-06, "loss": 0.6407, "step": 1290 }, { "epoch": 0.16829363529871144, "grad_norm": 2.8672449588775635, "learning_rate": 9.969514751313803e-06, "loss": 0.764, "step": 1293 }, { "epoch": 0.1686841077704022, "grad_norm": 2.8906450271606445, "learning_rate": 9.969283894294121e-06, "loss": 0.8143, "step": 1296 }, { "epoch": 0.16907458024209293, "grad_norm": 2.724889039993286, "learning_rate": 9.969052169150997e-06, "loss": 0.6327, "step": 1299 }, { "epoch": 0.16946505271378368, "grad_norm": 2.980803966522217, "learning_rate": 9.968819575924911e-06, "loss": 0.6832, "step": 1302 }, { "epoch": 0.16985552518547442, "grad_norm": 2.6396119594573975, "learning_rate": 9.9685861146565e-06, "loss": 0.6276, "step": 1305 }, { "epoch": 0.17024599765716517, "grad_norm": 5.2581257820129395, "learning_rate": 9.968351785386545e-06, "loss": 0.6789, "step": 1308 }, { "epoch": 0.1706364701288559, "grad_norm": 2.702031373977661, "learning_rate": 9.968116588155986e-06, "loss": 0.7108, "step": 1311 }, { "epoch": 0.17102694260054666, "grad_norm": 2.8001067638397217, "learning_rate": 9.967880523005911e-06, "loss": 0.6184, "step": 1314 }, { "epoch": 0.1714174150722374, "grad_norm": 2.5163309574127197, "learning_rate": 9.967643589977559e-06, "loss": 0.6862, "step": 1317 }, { "epoch": 0.17180788754392814, "grad_norm": 3.189988374710083, "learning_rate": 9.967405789112322e-06, "loss": 0.6618, "step": 1320 }, { "epoch": 0.1721983600156189, "grad_norm": 3.1581954956054688, "learning_rate": 9.967167120451744e-06, "loss": 0.6224, "step": 1323 }, { "epoch": 0.17258883248730963, "grad_norm": 4.420681953430176, "learning_rate": 9.966927584037518e-06, "loss": 0.7317, "step": 1326 }, { "epoch": 0.17297930495900038, "grad_norm": 2.627751350402832, "learning_rate": 9.966687179911494e-06, "loss": 0.706, "step": 1329 }, { "epoch": 0.17336977743069112, "grad_norm": 2.905385971069336, "learning_rate": 9.966445908115668e-06, "loss": 0.6165, "step": 1332 }, { "epoch": 0.17376024990238187, "grad_norm": 2.9616243839263916, "learning_rate": 9.966203768692189e-06, "loss": 0.619, "step": 1335 }, { "epoch": 0.17415072237407261, "grad_norm": 3.9424779415130615, "learning_rate": 9.965960761683358e-06, "loss": 0.6366, "step": 1338 }, { "epoch": 0.1745411948457634, "grad_norm": 2.880329132080078, "learning_rate": 9.965716887131628e-06, "loss": 0.7437, "step": 1341 }, { "epoch": 0.17493166731745413, "grad_norm": 2.994891405105591, "learning_rate": 9.965472145079606e-06, "loss": 0.7427, "step": 1344 }, { "epoch": 0.17532213978914488, "grad_norm": 3.0199975967407227, "learning_rate": 9.965226535570047e-06, "loss": 0.6787, "step": 1347 }, { "epoch": 0.17571261226083562, "grad_norm": 4.0724873542785645, "learning_rate": 9.964980058645856e-06, "loss": 0.6798, "step": 1350 }, { "epoch": 0.17610308473252637, "grad_norm": 3.5381436347961426, "learning_rate": 9.964732714350093e-06, "loss": 0.6456, "step": 1353 }, { "epoch": 0.1764935572042171, "grad_norm": 2.9115025997161865, "learning_rate": 9.964484502725972e-06, "loss": 0.6812, "step": 1356 }, { "epoch": 0.17688402967590786, "grad_norm": 2.9412219524383545, "learning_rate": 9.964235423816851e-06, "loss": 0.6854, "step": 1359 }, { "epoch": 0.1772745021475986, "grad_norm": 3.002401351928711, "learning_rate": 9.963985477666242e-06, "loss": 0.6684, "step": 1362 }, { "epoch": 0.17766497461928935, "grad_norm": 5.3304877281188965, "learning_rate": 9.963734664317816e-06, "loss": 0.7724, "step": 1365 }, { "epoch": 0.1780554470909801, "grad_norm": 3.2181010246276855, "learning_rate": 9.963482983815385e-06, "loss": 0.6604, "step": 1368 }, { "epoch": 0.17844591956267083, "grad_norm": 3.477856397628784, "learning_rate": 9.963230436202918e-06, "loss": 0.6447, "step": 1371 }, { "epoch": 0.17883639203436158, "grad_norm": 3.450572967529297, "learning_rate": 9.962977021524535e-06, "loss": 0.5988, "step": 1374 }, { "epoch": 0.17922686450605232, "grad_norm": 4.131798267364502, "learning_rate": 9.962722739824506e-06, "loss": 0.6665, "step": 1377 }, { "epoch": 0.17961733697774307, "grad_norm": 3.188093423843384, "learning_rate": 9.962467591147256e-06, "loss": 0.6613, "step": 1380 }, { "epoch": 0.18000780944943381, "grad_norm": 3.239835023880005, "learning_rate": 9.962211575537357e-06, "loss": 0.6192, "step": 1383 }, { "epoch": 0.18039828192112456, "grad_norm": 3.1334681510925293, "learning_rate": 9.961954693039535e-06, "loss": 0.6414, "step": 1386 }, { "epoch": 0.1807887543928153, "grad_norm": 2.906036376953125, "learning_rate": 9.961696943698667e-06, "loss": 0.7304, "step": 1389 }, { "epoch": 0.18117922686450605, "grad_norm": 4.601681709289551, "learning_rate": 9.961438327559778e-06, "loss": 0.6493, "step": 1392 }, { "epoch": 0.1815696993361968, "grad_norm": 3.0899908542633057, "learning_rate": 9.961178844668054e-06, "loss": 0.6764, "step": 1395 }, { "epoch": 0.18196017180788754, "grad_norm": 3.128690242767334, "learning_rate": 9.96091849506882e-06, "loss": 0.7436, "step": 1398 }, { "epoch": 0.18235064427957828, "grad_norm": 4.097222805023193, "learning_rate": 9.960657278807562e-06, "loss": 0.6236, "step": 1401 }, { "epoch": 0.18274111675126903, "grad_norm": 2.8826756477355957, "learning_rate": 9.960395195929915e-06, "loss": 0.7283, "step": 1404 }, { "epoch": 0.18313158922295977, "grad_norm": 2.9129467010498047, "learning_rate": 9.96013224648166e-06, "loss": 0.7447, "step": 1407 }, { "epoch": 0.18352206169465052, "grad_norm": 3.429777145385742, "learning_rate": 9.959868430508737e-06, "loss": 0.5577, "step": 1410 }, { "epoch": 0.18391253416634126, "grad_norm": 6.315993785858154, "learning_rate": 9.959603748057234e-06, "loss": 0.5291, "step": 1413 }, { "epoch": 0.184303006638032, "grad_norm": 2.7502191066741943, "learning_rate": 9.959338199173387e-06, "loss": 0.7087, "step": 1416 }, { "epoch": 0.18469347910972275, "grad_norm": 3.25874662399292, "learning_rate": 9.959071783903592e-06, "loss": 0.6868, "step": 1419 }, { "epoch": 0.1850839515814135, "grad_norm": 2.6682872772216797, "learning_rate": 9.958804502294388e-06, "loss": 0.598, "step": 1422 }, { "epoch": 0.18547442405310427, "grad_norm": 4.8309221267700195, "learning_rate": 9.95853635439247e-06, "loss": 0.7511, "step": 1425 }, { "epoch": 0.18586489652479501, "grad_norm": 5.207489013671875, "learning_rate": 9.95826734024468e-06, "loss": 0.7661, "step": 1428 }, { "epoch": 0.18625536899648576, "grad_norm": 2.6510419845581055, "learning_rate": 9.95799745989802e-06, "loss": 0.6397, "step": 1431 }, { "epoch": 0.1866458414681765, "grad_norm": 3.0544328689575195, "learning_rate": 9.957726713399631e-06, "loss": 0.6224, "step": 1434 }, { "epoch": 0.18703631393986725, "grad_norm": 2.7472503185272217, "learning_rate": 9.957455100796815e-06, "loss": 0.6854, "step": 1437 }, { "epoch": 0.187426786411558, "grad_norm": 3.071814775466919, "learning_rate": 9.957182622137022e-06, "loss": 0.6508, "step": 1440 }, { "epoch": 0.18781725888324874, "grad_norm": 2.8780770301818848, "learning_rate": 9.956909277467854e-06, "loss": 0.6678, "step": 1443 }, { "epoch": 0.18820773135493948, "grad_norm": 3.1620678901672363, "learning_rate": 9.956635066837062e-06, "loss": 0.6834, "step": 1446 }, { "epoch": 0.18859820382663023, "grad_norm": 2.79729962348938, "learning_rate": 9.956359990292552e-06, "loss": 0.6528, "step": 1449 }, { "epoch": 0.18898867629832097, "grad_norm": 2.7644762992858887, "learning_rate": 9.956084047882377e-06, "loss": 0.7051, "step": 1452 }, { "epoch": 0.18937914877001172, "grad_norm": 3.1195924282073975, "learning_rate": 9.955807239654746e-06, "loss": 0.7568, "step": 1455 }, { "epoch": 0.18976962124170246, "grad_norm": 2.780245065689087, "learning_rate": 9.955529565658017e-06, "loss": 0.55, "step": 1458 }, { "epoch": 0.1901600937133932, "grad_norm": 5.350193977355957, "learning_rate": 9.955251025940696e-06, "loss": 0.6525, "step": 1461 }, { "epoch": 0.19055056618508395, "grad_norm": 2.754183530807495, "learning_rate": 9.954971620551446e-06, "loss": 0.6989, "step": 1464 }, { "epoch": 0.1909410386567747, "grad_norm": 2.848510980606079, "learning_rate": 9.954691349539076e-06, "loss": 0.7376, "step": 1467 }, { "epoch": 0.19133151112846544, "grad_norm": 4.006906509399414, "learning_rate": 9.954410212952551e-06, "loss": 0.68, "step": 1470 }, { "epoch": 0.1917219836001562, "grad_norm": 2.6759417057037354, "learning_rate": 9.954128210840985e-06, "loss": 0.6434, "step": 1473 }, { "epoch": 0.19211245607184693, "grad_norm": 2.9079430103302, "learning_rate": 9.953845343253643e-06, "loss": 0.6194, "step": 1476 }, { "epoch": 0.19250292854353768, "grad_norm": 2.6229822635650635, "learning_rate": 9.953561610239941e-06, "loss": 0.6018, "step": 1479 }, { "epoch": 0.19289340101522842, "grad_norm": 2.8164374828338623, "learning_rate": 9.953277011849444e-06, "loss": 0.6276, "step": 1482 }, { "epoch": 0.19328387348691917, "grad_norm": 3.2668967247009277, "learning_rate": 9.952991548131876e-06, "loss": 0.722, "step": 1485 }, { "epoch": 0.1936743459586099, "grad_norm": 2.878713846206665, "learning_rate": 9.952705219137102e-06, "loss": 0.7137, "step": 1488 }, { "epoch": 0.19406481843030066, "grad_norm": 2.783433437347412, "learning_rate": 9.952418024915146e-06, "loss": 0.6569, "step": 1491 }, { "epoch": 0.1944552909019914, "grad_norm": 3.105311870574951, "learning_rate": 9.95212996551618e-06, "loss": 0.6587, "step": 1494 }, { "epoch": 0.19484576337368215, "grad_norm": 6.641456604003906, "learning_rate": 9.951841040990527e-06, "loss": 0.6604, "step": 1497 }, { "epoch": 0.1952362358453729, "grad_norm": 3.2183666229248047, "learning_rate": 9.951551251388661e-06, "loss": 0.74, "step": 1500 }, { "epoch": 0.19562670831706364, "grad_norm": 2.9036731719970703, "learning_rate": 9.951260596761208e-06, "loss": 0.6216, "step": 1503 }, { "epoch": 0.19601718078875438, "grad_norm": 3.435842752456665, "learning_rate": 9.950969077158944e-06, "loss": 0.7316, "step": 1506 }, { "epoch": 0.19640765326044513, "grad_norm": 2.714198350906372, "learning_rate": 9.950676692632797e-06, "loss": 0.566, "step": 1509 }, { "epoch": 0.1967981257321359, "grad_norm": 3.2397942543029785, "learning_rate": 9.950383443233848e-06, "loss": 0.702, "step": 1512 }, { "epoch": 0.19718859820382664, "grad_norm": 2.9357926845550537, "learning_rate": 9.950089329013324e-06, "loss": 0.7299, "step": 1515 }, { "epoch": 0.1975790706755174, "grad_norm": 4.893215179443359, "learning_rate": 9.949794350022609e-06, "loss": 0.7201, "step": 1518 }, { "epoch": 0.19796954314720813, "grad_norm": 2.9395291805267334, "learning_rate": 9.949498506313232e-06, "loss": 0.7369, "step": 1521 }, { "epoch": 0.19836001561889888, "grad_norm": 2.985732078552246, "learning_rate": 9.949201797936882e-06, "loss": 0.7187, "step": 1524 }, { "epoch": 0.19875048809058962, "grad_norm": 3.205756425857544, "learning_rate": 9.948904224945386e-06, "loss": 0.6903, "step": 1527 }, { "epoch": 0.19914096056228037, "grad_norm": 3.1247010231018066, "learning_rate": 9.948605787390735e-06, "loss": 0.6934, "step": 1530 }, { "epoch": 0.1995314330339711, "grad_norm": 2.7591116428375244, "learning_rate": 9.948306485325061e-06, "loss": 0.6708, "step": 1533 }, { "epoch": 0.19992190550566186, "grad_norm": 3.138434648513794, "learning_rate": 9.948006318800657e-06, "loss": 0.7368, "step": 1536 }, { "epoch": 0.2003123779773526, "grad_norm": 6.248907089233398, "learning_rate": 9.947705287869956e-06, "loss": 0.8298, "step": 1539 }, { "epoch": 0.20070285044904335, "grad_norm": 4.321200847625732, "learning_rate": 9.947403392585548e-06, "loss": 0.6983, "step": 1542 }, { "epoch": 0.2010933229207341, "grad_norm": 2.919874429702759, "learning_rate": 9.947100633000178e-06, "loss": 0.6522, "step": 1545 }, { "epoch": 0.20148379539242484, "grad_norm": 4.117648601531982, "learning_rate": 9.946797009166732e-06, "loss": 0.5987, "step": 1548 }, { "epoch": 0.20187426786411558, "grad_norm": 3.4389774799346924, "learning_rate": 9.946492521138258e-06, "loss": 0.7094, "step": 1551 }, { "epoch": 0.20226474033580633, "grad_norm": 2.9114327430725098, "learning_rate": 9.946187168967944e-06, "loss": 0.7512, "step": 1554 }, { "epoch": 0.20265521280749707, "grad_norm": 2.9621176719665527, "learning_rate": 9.94588095270914e-06, "loss": 0.6346, "step": 1557 }, { "epoch": 0.20304568527918782, "grad_norm": 4.908132553100586, "learning_rate": 9.945573872415334e-06, "loss": 0.7007, "step": 1560 }, { "epoch": 0.20343615775087856, "grad_norm": 3.1218419075012207, "learning_rate": 9.94526592814018e-06, "loss": 0.6411, "step": 1563 }, { "epoch": 0.2038266302225693, "grad_norm": 2.8320937156677246, "learning_rate": 9.944957119937471e-06, "loss": 0.576, "step": 1566 }, { "epoch": 0.20421710269426005, "grad_norm": 2.7374327182769775, "learning_rate": 9.944647447861154e-06, "loss": 0.5977, "step": 1569 }, { "epoch": 0.2046075751659508, "grad_norm": 3.095136880874634, "learning_rate": 9.944336911965332e-06, "loss": 0.6583, "step": 1572 }, { "epoch": 0.20499804763764154, "grad_norm": 3.028735399246216, "learning_rate": 9.944025512304251e-06, "loss": 0.6727, "step": 1575 }, { "epoch": 0.20538852010933228, "grad_norm": 4.234666347503662, "learning_rate": 9.943713248932314e-06, "loss": 0.6762, "step": 1578 }, { "epoch": 0.20577899258102303, "grad_norm": 2.7858617305755615, "learning_rate": 9.943400121904074e-06, "loss": 0.7175, "step": 1581 }, { "epoch": 0.20616946505271377, "grad_norm": 2.894981861114502, "learning_rate": 9.943086131274231e-06, "loss": 0.6666, "step": 1584 }, { "epoch": 0.20655993752440452, "grad_norm": 2.6675071716308594, "learning_rate": 9.94277127709764e-06, "loss": 0.6473, "step": 1587 }, { "epoch": 0.20695040999609526, "grad_norm": 2.9758174419403076, "learning_rate": 9.942455559429304e-06, "loss": 0.6711, "step": 1590 }, { "epoch": 0.207340882467786, "grad_norm": 4.3356781005859375, "learning_rate": 9.94213897832438e-06, "loss": 0.6205, "step": 1593 }, { "epoch": 0.20773135493947675, "grad_norm": 3.1128265857696533, "learning_rate": 9.941821533838172e-06, "loss": 0.677, "step": 1596 }, { "epoch": 0.20812182741116753, "grad_norm": 3.0443339347839355, "learning_rate": 9.941503226026139e-06, "loss": 0.7659, "step": 1599 }, { "epoch": 0.20851229988285827, "grad_norm": 3.6503114700317383, "learning_rate": 9.941184054943888e-06, "loss": 0.6364, "step": 1602 }, { "epoch": 0.20890277235454902, "grad_norm": 3.119065761566162, "learning_rate": 9.940864020647178e-06, "loss": 0.6696, "step": 1605 }, { "epoch": 0.20929324482623976, "grad_norm": 2.870157241821289, "learning_rate": 9.940543123191916e-06, "loss": 0.7786, "step": 1608 }, { "epoch": 0.2096837172979305, "grad_norm": 2.8346638679504395, "learning_rate": 9.940221362634165e-06, "loss": 0.7752, "step": 1611 }, { "epoch": 0.21007418976962125, "grad_norm": 2.9750826358795166, "learning_rate": 9.939898739030135e-06, "loss": 0.6307, "step": 1614 }, { "epoch": 0.210464662241312, "grad_norm": 3.138920307159424, "learning_rate": 9.939575252436186e-06, "loss": 0.8262, "step": 1617 }, { "epoch": 0.21085513471300274, "grad_norm": 2.8036742210388184, "learning_rate": 9.939250902908832e-06, "loss": 0.801, "step": 1620 }, { "epoch": 0.21124560718469348, "grad_norm": 2.724499225616455, "learning_rate": 9.938925690504737e-06, "loss": 0.65, "step": 1623 }, { "epoch": 0.21163607965638423, "grad_norm": 3.0661230087280273, "learning_rate": 9.938599615280713e-06, "loss": 0.7127, "step": 1626 }, { "epoch": 0.21202655212807497, "grad_norm": 2.5042243003845215, "learning_rate": 9.938272677293727e-06, "loss": 0.6147, "step": 1629 }, { "epoch": 0.21241702459976572, "grad_norm": 3.2207114696502686, "learning_rate": 9.937944876600891e-06, "loss": 0.7055, "step": 1632 }, { "epoch": 0.21280749707145646, "grad_norm": 3.2957639694213867, "learning_rate": 9.937616213259474e-06, "loss": 0.7794, "step": 1635 }, { "epoch": 0.2131979695431472, "grad_norm": 3.5852959156036377, "learning_rate": 9.93728668732689e-06, "loss": 0.6852, "step": 1638 }, { "epoch": 0.21358844201483795, "grad_norm": 3.0781290531158447, "learning_rate": 9.936956298860711e-06, "loss": 0.7384, "step": 1641 }, { "epoch": 0.2139789144865287, "grad_norm": 2.6453282833099365, "learning_rate": 9.93662504791865e-06, "loss": 0.6695, "step": 1644 }, { "epoch": 0.21436938695821944, "grad_norm": 3.183838367462158, "learning_rate": 9.93629293455858e-06, "loss": 0.7207, "step": 1647 }, { "epoch": 0.2147598594299102, "grad_norm": 2.969679832458496, "learning_rate": 9.935959958838519e-06, "loss": 0.6558, "step": 1650 }, { "epoch": 0.21515033190160093, "grad_norm": 2.812744617462158, "learning_rate": 9.935626120816636e-06, "loss": 0.6737, "step": 1653 }, { "epoch": 0.21554080437329168, "grad_norm": 2.789893627166748, "learning_rate": 9.935291420551252e-06, "loss": 0.6972, "step": 1656 }, { "epoch": 0.21593127684498242, "grad_norm": 2.601032018661499, "learning_rate": 9.934955858100838e-06, "loss": 0.6458, "step": 1659 }, { "epoch": 0.21632174931667317, "grad_norm": 3.639371395111084, "learning_rate": 9.93461943352402e-06, "loss": 0.7118, "step": 1662 }, { "epoch": 0.2167122217883639, "grad_norm": 3.122035264968872, "learning_rate": 9.934282146879568e-06, "loss": 0.7283, "step": 1665 }, { "epoch": 0.21710269426005466, "grad_norm": 2.9482860565185547, "learning_rate": 9.933943998226403e-06, "loss": 0.6477, "step": 1668 }, { "epoch": 0.2174931667317454, "grad_norm": 2.530093193054199, "learning_rate": 9.933604987623603e-06, "loss": 0.6032, "step": 1671 }, { "epoch": 0.21788363920343615, "grad_norm": 2.5820152759552, "learning_rate": 9.93326511513039e-06, "loss": 0.6156, "step": 1674 }, { "epoch": 0.2182741116751269, "grad_norm": 3.059326410293579, "learning_rate": 9.93292438080614e-06, "loss": 0.6979, "step": 1677 }, { "epoch": 0.21866458414681764, "grad_norm": 3.3234145641326904, "learning_rate": 9.932582784710377e-06, "loss": 0.6619, "step": 1680 }, { "epoch": 0.21905505661850838, "grad_norm": 2.865112066268921, "learning_rate": 9.932240326902777e-06, "loss": 0.6367, "step": 1683 }, { "epoch": 0.21944552909019915, "grad_norm": 2.950484037399292, "learning_rate": 9.93189700744317e-06, "loss": 0.7528, "step": 1686 }, { "epoch": 0.2198360015618899, "grad_norm": 2.690485715866089, "learning_rate": 9.931552826391529e-06, "loss": 0.6232, "step": 1689 }, { "epoch": 0.22022647403358064, "grad_norm": 2.7388739585876465, "learning_rate": 9.931207783807984e-06, "loss": 0.7912, "step": 1692 }, { "epoch": 0.2206169465052714, "grad_norm": 2.996173620223999, "learning_rate": 9.930861879752814e-06, "loss": 0.7042, "step": 1695 }, { "epoch": 0.22100741897696213, "grad_norm": 3.6289913654327393, "learning_rate": 9.930515114286446e-06, "loss": 0.713, "step": 1698 }, { "epoch": 0.22139789144865288, "grad_norm": 2.484739065170288, "learning_rate": 9.93016748746946e-06, "loss": 0.7148, "step": 1701 }, { "epoch": 0.22178836392034362, "grad_norm": 3.885227918624878, "learning_rate": 9.929818999362585e-06, "loss": 0.5977, "step": 1704 }, { "epoch": 0.22217883639203437, "grad_norm": 3.2628114223480225, "learning_rate": 9.929469650026705e-06, "loss": 0.6511, "step": 1707 }, { "epoch": 0.2225693088637251, "grad_norm": 2.676222085952759, "learning_rate": 9.929119439522843e-06, "loss": 0.7086, "step": 1710 }, { "epoch": 0.22295978133541586, "grad_norm": 2.7704615592956543, "learning_rate": 9.928768367912186e-06, "loss": 0.6214, "step": 1713 }, { "epoch": 0.2233502538071066, "grad_norm": 2.829817295074463, "learning_rate": 9.928416435256062e-06, "loss": 0.7286, "step": 1716 }, { "epoch": 0.22374072627879735, "grad_norm": 4.687877178192139, "learning_rate": 9.928063641615958e-06, "loss": 0.6466, "step": 1719 }, { "epoch": 0.2241311987504881, "grad_norm": 2.777306079864502, "learning_rate": 9.9277099870535e-06, "loss": 0.7183, "step": 1722 }, { "epoch": 0.22452167122217884, "grad_norm": 3.198352575302124, "learning_rate": 9.927355471630475e-06, "loss": 0.6335, "step": 1725 }, { "epoch": 0.22491214369386958, "grad_norm": 3.063441276550293, "learning_rate": 9.927000095408814e-06, "loss": 0.7046, "step": 1728 }, { "epoch": 0.22530261616556033, "grad_norm": 2.7282402515411377, "learning_rate": 9.926643858450602e-06, "loss": 0.6636, "step": 1731 }, { "epoch": 0.22569308863725107, "grad_norm": 3.712812900543213, "learning_rate": 9.926286760818072e-06, "loss": 0.763, "step": 1734 }, { "epoch": 0.22608356110894182, "grad_norm": 2.7162492275238037, "learning_rate": 9.925928802573608e-06, "loss": 0.7452, "step": 1737 }, { "epoch": 0.22647403358063256, "grad_norm": 2.689542770385742, "learning_rate": 9.925569983779744e-06, "loss": 0.6457, "step": 1740 }, { "epoch": 0.2268645060523233, "grad_norm": 3.449857234954834, "learning_rate": 9.925210304499168e-06, "loss": 0.6226, "step": 1743 }, { "epoch": 0.22725497852401405, "grad_norm": 3.3526997566223145, "learning_rate": 9.92484976479471e-06, "loss": 0.6801, "step": 1746 }, { "epoch": 0.2276454509957048, "grad_norm": 3.0590097904205322, "learning_rate": 9.924488364729362e-06, "loss": 0.7149, "step": 1749 }, { "epoch": 0.22803592346739554, "grad_norm": 4.266837120056152, "learning_rate": 9.924126104366255e-06, "loss": 0.6816, "step": 1752 }, { "epoch": 0.22842639593908629, "grad_norm": 2.6770737171173096, "learning_rate": 9.923762983768674e-06, "loss": 0.7303, "step": 1755 }, { "epoch": 0.22881686841077703, "grad_norm": 2.922563314437866, "learning_rate": 9.92339900300006e-06, "loss": 0.6389, "step": 1758 }, { "epoch": 0.22920734088246777, "grad_norm": 3.8620433807373047, "learning_rate": 9.923034162123996e-06, "loss": 0.6251, "step": 1761 }, { "epoch": 0.22959781335415852, "grad_norm": 3.186556339263916, "learning_rate": 9.922668461204222e-06, "loss": 0.8476, "step": 1764 }, { "epoch": 0.22998828582584926, "grad_norm": 3.0054855346679688, "learning_rate": 9.922301900304622e-06, "loss": 0.6719, "step": 1767 }, { "epoch": 0.23037875829754004, "grad_norm": 3.260510206222534, "learning_rate": 9.921934479489236e-06, "loss": 0.6796, "step": 1770 }, { "epoch": 0.23076923076923078, "grad_norm": 3.021167755126953, "learning_rate": 9.921566198822252e-06, "loss": 0.667, "step": 1773 }, { "epoch": 0.23115970324092153, "grad_norm": 3.6485886573791504, "learning_rate": 9.921197058368005e-06, "loss": 0.7357, "step": 1776 }, { "epoch": 0.23155017571261227, "grad_norm": 2.8014872074127197, "learning_rate": 9.920827058190984e-06, "loss": 0.8078, "step": 1779 }, { "epoch": 0.23194064818430302, "grad_norm": 2.7139265537261963, "learning_rate": 9.92045619835583e-06, "loss": 0.7207, "step": 1782 }, { "epoch": 0.23233112065599376, "grad_norm": 2.5168399810791016, "learning_rate": 9.920084478927327e-06, "loss": 0.5873, "step": 1785 }, { "epoch": 0.2327215931276845, "grad_norm": 3.7160627841949463, "learning_rate": 9.919711899970417e-06, "loss": 0.7382, "step": 1788 }, { "epoch": 0.23311206559937525, "grad_norm": 2.766188383102417, "learning_rate": 9.919338461550188e-06, "loss": 0.6981, "step": 1791 }, { "epoch": 0.233502538071066, "grad_norm": 3.4832241535186768, "learning_rate": 9.918964163731878e-06, "loss": 0.6844, "step": 1794 }, { "epoch": 0.23389301054275674, "grad_norm": 2.998603582382202, "learning_rate": 9.918589006580877e-06, "loss": 0.6986, "step": 1797 }, { "epoch": 0.23428348301444749, "grad_norm": 2.618968963623047, "learning_rate": 9.918212990162724e-06, "loss": 0.6967, "step": 1800 }, { "epoch": 0.23467395548613823, "grad_norm": 3.121068000793457, "learning_rate": 9.917836114543105e-06, "loss": 0.5803, "step": 1803 }, { "epoch": 0.23506442795782898, "grad_norm": 2.8870410919189453, "learning_rate": 9.917458379787865e-06, "loss": 0.66, "step": 1806 }, { "epoch": 0.23545490042951972, "grad_norm": 3.4096627235412598, "learning_rate": 9.917079785962991e-06, "loss": 0.644, "step": 1809 }, { "epoch": 0.23584537290121046, "grad_norm": 3.3763647079467773, "learning_rate": 9.916700333134622e-06, "loss": 0.7743, "step": 1812 }, { "epoch": 0.2362358453729012, "grad_norm": 3.2882561683654785, "learning_rate": 9.916320021369049e-06, "loss": 0.7217, "step": 1815 }, { "epoch": 0.23662631784459195, "grad_norm": 2.849679946899414, "learning_rate": 9.91593885073271e-06, "loss": 0.6473, "step": 1818 }, { "epoch": 0.2370167903162827, "grad_norm": 2.5509214401245117, "learning_rate": 9.915556821292194e-06, "loss": 0.6598, "step": 1821 }, { "epoch": 0.23740726278797344, "grad_norm": 4.192689895629883, "learning_rate": 9.915173933114243e-06, "loss": 0.737, "step": 1824 }, { "epoch": 0.2377977352596642, "grad_norm": 3.805640459060669, "learning_rate": 9.914790186265747e-06, "loss": 0.6514, "step": 1827 }, { "epoch": 0.23818820773135493, "grad_norm": 2.7506203651428223, "learning_rate": 9.914405580813744e-06, "loss": 0.6088, "step": 1830 }, { "epoch": 0.23857868020304568, "grad_norm": 2.9429638385772705, "learning_rate": 9.914020116825425e-06, "loss": 0.6348, "step": 1833 }, { "epoch": 0.23896915267473642, "grad_norm": 2.8457984924316406, "learning_rate": 9.913633794368128e-06, "loss": 0.6043, "step": 1836 }, { "epoch": 0.23935962514642717, "grad_norm": 3.119004011154175, "learning_rate": 9.913246613509344e-06, "loss": 0.6326, "step": 1839 }, { "epoch": 0.2397500976181179, "grad_norm": 2.674212694168091, "learning_rate": 9.912858574316714e-06, "loss": 0.6505, "step": 1842 }, { "epoch": 0.24014057008980866, "grad_norm": 4.301478862762451, "learning_rate": 9.912469676858025e-06, "loss": 0.6864, "step": 1845 }, { "epoch": 0.2405310425614994, "grad_norm": 3.199939727783203, "learning_rate": 9.912079921201216e-06, "loss": 0.716, "step": 1848 }, { "epoch": 0.24092151503319015, "grad_norm": 3.2009308338165283, "learning_rate": 9.911689307414381e-06, "loss": 0.7442, "step": 1851 }, { "epoch": 0.2413119875048809, "grad_norm": 4.463657855987549, "learning_rate": 9.911297835565755e-06, "loss": 0.6488, "step": 1854 }, { "epoch": 0.24170245997657167, "grad_norm": 2.5389511585235596, "learning_rate": 9.91090550572373e-06, "loss": 0.6598, "step": 1857 }, { "epoch": 0.2420929324482624, "grad_norm": 2.7800350189208984, "learning_rate": 9.910512317956845e-06, "loss": 0.6538, "step": 1860 }, { "epoch": 0.24248340491995315, "grad_norm": 4.617105007171631, "learning_rate": 9.910118272333787e-06, "loss": 0.6534, "step": 1863 }, { "epoch": 0.2428738773916439, "grad_norm": 4.014925479888916, "learning_rate": 9.909723368923397e-06, "loss": 0.7108, "step": 1866 }, { "epoch": 0.24326434986333464, "grad_norm": 3.7771573066711426, "learning_rate": 9.909327607794663e-06, "loss": 0.7403, "step": 1869 }, { "epoch": 0.2436548223350254, "grad_norm": 4.131522178649902, "learning_rate": 9.908930989016723e-06, "loss": 0.6846, "step": 1872 }, { "epoch": 0.24404529480671613, "grad_norm": 2.5078864097595215, "learning_rate": 9.908533512658867e-06, "loss": 0.7045, "step": 1875 }, { "epoch": 0.24443576727840688, "grad_norm": 3.6496095657348633, "learning_rate": 9.90813517879053e-06, "loss": 0.6804, "step": 1878 }, { "epoch": 0.24482623975009762, "grad_norm": 3.153731107711792, "learning_rate": 9.907735987481306e-06, "loss": 0.77, "step": 1881 }, { "epoch": 0.24521671222178837, "grad_norm": 3.234714984893799, "learning_rate": 9.907335938800925e-06, "loss": 0.6396, "step": 1884 }, { "epoch": 0.2456071846934791, "grad_norm": 4.209356784820557, "learning_rate": 9.906935032819283e-06, "loss": 0.6937, "step": 1887 }, { "epoch": 0.24599765716516986, "grad_norm": 2.8862154483795166, "learning_rate": 9.906533269606412e-06, "loss": 0.7209, "step": 1890 }, { "epoch": 0.2463881296368606, "grad_norm": 3.8933069705963135, "learning_rate": 9.9061306492325e-06, "loss": 0.7016, "step": 1893 }, { "epoch": 0.24677860210855135, "grad_norm": 3.536663770675659, "learning_rate": 9.905727171767885e-06, "loss": 0.6479, "step": 1896 }, { "epoch": 0.2471690745802421, "grad_norm": 2.742396354675293, "learning_rate": 9.905322837283054e-06, "loss": 0.7213, "step": 1899 }, { "epoch": 0.24755954705193284, "grad_norm": 3.2880959510803223, "learning_rate": 9.904917645848642e-06, "loss": 0.7038, "step": 1902 }, { "epoch": 0.24795001952362358, "grad_norm": 2.86783504486084, "learning_rate": 9.904511597535435e-06, "loss": 0.7333, "step": 1905 }, { "epoch": 0.24834049199531433, "grad_norm": 2.66743803024292, "learning_rate": 9.904104692414372e-06, "loss": 0.6619, "step": 1908 }, { "epoch": 0.24873096446700507, "grad_norm": 2.8109350204467773, "learning_rate": 9.903696930556534e-06, "loss": 0.6984, "step": 1911 }, { "epoch": 0.24912143693869582, "grad_norm": 4.756025791168213, "learning_rate": 9.903288312033158e-06, "loss": 0.6782, "step": 1914 }, { "epoch": 0.24951190941038656, "grad_norm": 3.219858169555664, "learning_rate": 9.902878836915628e-06, "loss": 0.7427, "step": 1917 }, { "epoch": 0.2499023818820773, "grad_norm": 2.6937522888183594, "learning_rate": 9.902468505275481e-06, "loss": 0.6722, "step": 1920 }, { "epoch": 0.2502928543537681, "grad_norm": 3.728074073791504, "learning_rate": 9.9020573171844e-06, "loss": 0.667, "step": 1923 }, { "epoch": 0.2506833268254588, "grad_norm": 2.520673990249634, "learning_rate": 9.901645272714216e-06, "loss": 0.6062, "step": 1926 }, { "epoch": 0.25107379929714957, "grad_norm": 3.0606751441955566, "learning_rate": 9.901232371936916e-06, "loss": 0.6204, "step": 1929 }, { "epoch": 0.2514642717688403, "grad_norm": 3.0006275177001953, "learning_rate": 9.90081861492463e-06, "loss": 0.7612, "step": 1932 }, { "epoch": 0.25185474424053106, "grad_norm": 3.9713451862335205, "learning_rate": 9.900404001749643e-06, "loss": 0.7894, "step": 1935 }, { "epoch": 0.2522452167122218, "grad_norm": 2.761209726333618, "learning_rate": 9.899988532484386e-06, "loss": 0.6158, "step": 1938 }, { "epoch": 0.25263568918391255, "grad_norm": 2.8086750507354736, "learning_rate": 9.89957220720144e-06, "loss": 0.6952, "step": 1941 }, { "epoch": 0.25302616165560327, "grad_norm": 5.129540920257568, "learning_rate": 9.899155025973535e-06, "loss": 0.6314, "step": 1944 }, { "epoch": 0.25341663412729404, "grad_norm": 2.605057954788208, "learning_rate": 9.898736988873555e-06, "loss": 0.7729, "step": 1947 }, { "epoch": 0.25380710659898476, "grad_norm": 4.0138044357299805, "learning_rate": 9.898318095974529e-06, "loss": 0.7074, "step": 1950 }, { "epoch": 0.25419757907067553, "grad_norm": 2.733633041381836, "learning_rate": 9.897898347349635e-06, "loss": 0.6754, "step": 1953 }, { "epoch": 0.25458805154236624, "grad_norm": 2.873657703399658, "learning_rate": 9.897477743072203e-06, "loss": 0.6609, "step": 1956 }, { "epoch": 0.254978524014057, "grad_norm": 3.4984524250030518, "learning_rate": 9.897056283215713e-06, "loss": 0.6305, "step": 1959 }, { "epoch": 0.25536899648574773, "grad_norm": 2.7125892639160156, "learning_rate": 9.896633967853793e-06, "loss": 0.6463, "step": 1962 }, { "epoch": 0.2557594689574385, "grad_norm": 2.670562505722046, "learning_rate": 9.896210797060218e-06, "loss": 0.6669, "step": 1965 }, { "epoch": 0.2561499414291292, "grad_norm": 3.608865261077881, "learning_rate": 9.895786770908918e-06, "loss": 0.6898, "step": 1968 }, { "epoch": 0.25654041390082, "grad_norm": 3.5786221027374268, "learning_rate": 9.895361889473969e-06, "loss": 0.7525, "step": 1971 }, { "epoch": 0.2569308863725107, "grad_norm": 3.3023688793182373, "learning_rate": 9.894936152829595e-06, "loss": 0.7672, "step": 1974 }, { "epoch": 0.2573213588442015, "grad_norm": 2.93623948097229, "learning_rate": 9.894509561050173e-06, "loss": 0.7114, "step": 1977 }, { "epoch": 0.2577118313158922, "grad_norm": 3.6028199195861816, "learning_rate": 9.894082114210226e-06, "loss": 0.6342, "step": 1980 }, { "epoch": 0.258102303787583, "grad_norm": 3.522853374481201, "learning_rate": 9.893653812384432e-06, "loss": 0.6205, "step": 1983 }, { "epoch": 0.25849277625927375, "grad_norm": 2.625506639480591, "learning_rate": 9.893224655647609e-06, "loss": 0.6737, "step": 1986 }, { "epoch": 0.25888324873096447, "grad_norm": 3.548536777496338, "learning_rate": 9.892794644074735e-06, "loss": 0.6586, "step": 1989 }, { "epoch": 0.25927372120265524, "grad_norm": 4.54545783996582, "learning_rate": 9.892363777740928e-06, "loss": 0.7415, "step": 1992 }, { "epoch": 0.25966419367434596, "grad_norm": 2.687030792236328, "learning_rate": 9.89193205672146e-06, "loss": 0.5988, "step": 1995 }, { "epoch": 0.26005466614603673, "grad_norm": 4.913947582244873, "learning_rate": 9.891499481091755e-06, "loss": 0.707, "step": 1998 }, { "epoch": 0.26044513861772745, "grad_norm": 2.828240394592285, "learning_rate": 9.891066050927381e-06, "loss": 0.6659, "step": 2001 }, { "epoch": 0.2608356110894182, "grad_norm": 2.456120014190674, "learning_rate": 9.890631766304054e-06, "loss": 0.743, "step": 2004 }, { "epoch": 0.26122608356110893, "grad_norm": 3.4322896003723145, "learning_rate": 9.890196627297649e-06, "loss": 0.6586, "step": 2007 }, { "epoch": 0.2616165560327997, "grad_norm": 2.4019174575805664, "learning_rate": 9.88976063398418e-06, "loss": 0.6277, "step": 2010 }, { "epoch": 0.2620070285044904, "grad_norm": 2.6000640392303467, "learning_rate": 9.889323786439815e-06, "loss": 0.6874, "step": 2013 }, { "epoch": 0.2623975009761812, "grad_norm": 2.7879741191864014, "learning_rate": 9.88888608474087e-06, "loss": 0.6844, "step": 2016 }, { "epoch": 0.2627879734478719, "grad_norm": 2.620607852935791, "learning_rate": 9.888447528963809e-06, "loss": 0.6139, "step": 2019 }, { "epoch": 0.2631784459195627, "grad_norm": 3.1650354862213135, "learning_rate": 9.88800811918525e-06, "loss": 0.6203, "step": 2022 }, { "epoch": 0.2635689183912534, "grad_norm": 2.8393919467926025, "learning_rate": 9.887567855481955e-06, "loss": 0.6931, "step": 2025 }, { "epoch": 0.2639593908629442, "grad_norm": 3.6173481941223145, "learning_rate": 9.88712673793084e-06, "loss": 0.6675, "step": 2028 }, { "epoch": 0.2643498633346349, "grad_norm": 3.9958341121673584, "learning_rate": 9.88668476660896e-06, "loss": 0.6857, "step": 2031 }, { "epoch": 0.26474033580632567, "grad_norm": 2.9784352779388428, "learning_rate": 9.886241941593535e-06, "loss": 0.7343, "step": 2034 }, { "epoch": 0.2651308082780164, "grad_norm": 3.043720006942749, "learning_rate": 9.885798262961921e-06, "loss": 0.6138, "step": 2037 }, { "epoch": 0.26552128074970716, "grad_norm": 2.8800082206726074, "learning_rate": 9.88535373079163e-06, "loss": 0.7174, "step": 2040 }, { "epoch": 0.2659117532213979, "grad_norm": 2.96820330619812, "learning_rate": 9.884908345160318e-06, "loss": 0.6844, "step": 2043 }, { "epoch": 0.26630222569308865, "grad_norm": 4.006218910217285, "learning_rate": 9.884462106145794e-06, "loss": 0.5799, "step": 2046 }, { "epoch": 0.26669269816477936, "grad_norm": 4.129579544067383, "learning_rate": 9.884015013826015e-06, "loss": 0.7025, "step": 2049 }, { "epoch": 0.26708317063647014, "grad_norm": 6.496399879455566, "learning_rate": 9.88356706827909e-06, "loss": 0.6598, "step": 2052 }, { "epoch": 0.26747364310816085, "grad_norm": 10.179603576660156, "learning_rate": 9.88311826958327e-06, "loss": 0.7294, "step": 2055 }, { "epoch": 0.2678641155798516, "grad_norm": 4.363855838775635, "learning_rate": 9.882668617816962e-06, "loss": 0.6529, "step": 2058 }, { "epoch": 0.26825458805154234, "grad_norm": 2.7566702365875244, "learning_rate": 9.882218113058716e-06, "loss": 0.6574, "step": 2061 }, { "epoch": 0.2686450605232331, "grad_norm": 2.922041416168213, "learning_rate": 9.88176675538724e-06, "loss": 0.6732, "step": 2064 }, { "epoch": 0.26903553299492383, "grad_norm": 4.118600368499756, "learning_rate": 9.881314544881377e-06, "loss": 0.6912, "step": 2067 }, { "epoch": 0.2694260054666146, "grad_norm": 2.772783041000366, "learning_rate": 9.880861481620134e-06, "loss": 0.7012, "step": 2070 }, { "epoch": 0.2698164779383054, "grad_norm": 2.855739116668701, "learning_rate": 9.88040756568266e-06, "loss": 0.7134, "step": 2073 }, { "epoch": 0.2702069504099961, "grad_norm": 2.6456918716430664, "learning_rate": 9.879952797148249e-06, "loss": 0.6548, "step": 2076 }, { "epoch": 0.27059742288168687, "grad_norm": 2.943904399871826, "learning_rate": 9.87949717609635e-06, "loss": 0.6378, "step": 2079 }, { "epoch": 0.2709878953533776, "grad_norm": 2.9040253162384033, "learning_rate": 9.87904070260656e-06, "loss": 0.649, "step": 2082 }, { "epoch": 0.27137836782506836, "grad_norm": 2.9723544120788574, "learning_rate": 9.878583376758623e-06, "loss": 0.705, "step": 2085 }, { "epoch": 0.2717688402967591, "grad_norm": 3.080580949783325, "learning_rate": 9.878125198632433e-06, "loss": 0.7454, "step": 2088 }, { "epoch": 0.27215931276844985, "grad_norm": 2.676499843597412, "learning_rate": 9.877666168308034e-06, "loss": 0.7742, "step": 2091 }, { "epoch": 0.27254978524014056, "grad_norm": 2.981276035308838, "learning_rate": 9.877206285865614e-06, "loss": 0.7173, "step": 2094 }, { "epoch": 0.27294025771183134, "grad_norm": 2.9143402576446533, "learning_rate": 9.876745551385519e-06, "loss": 0.6747, "step": 2097 }, { "epoch": 0.27333073018352205, "grad_norm": 2.7891063690185547, "learning_rate": 9.876283964948232e-06, "loss": 0.5657, "step": 2100 }, { "epoch": 0.2737212026552128, "grad_norm": 2.83236026763916, "learning_rate": 9.875821526634397e-06, "loss": 0.7013, "step": 2103 }, { "epoch": 0.27411167512690354, "grad_norm": 2.9020628929138184, "learning_rate": 9.875358236524798e-06, "loss": 0.6061, "step": 2106 }, { "epoch": 0.2745021475985943, "grad_norm": 2.637514352798462, "learning_rate": 9.874894094700372e-06, "loss": 0.6832, "step": 2109 }, { "epoch": 0.27489262007028503, "grad_norm": 2.8690152168273926, "learning_rate": 9.874429101242202e-06, "loss": 0.7544, "step": 2112 }, { "epoch": 0.2752830925419758, "grad_norm": 3.029694080352783, "learning_rate": 9.873963256231522e-06, "loss": 0.7404, "step": 2115 }, { "epoch": 0.2756735650136665, "grad_norm": 3.4346494674682617, "learning_rate": 9.873496559749716e-06, "loss": 0.6089, "step": 2118 }, { "epoch": 0.2760640374853573, "grad_norm": 2.6336498260498047, "learning_rate": 9.873029011878312e-06, "loss": 0.7007, "step": 2121 }, { "epoch": 0.276454509957048, "grad_norm": 2.4632461071014404, "learning_rate": 9.872560612698992e-06, "loss": 0.6416, "step": 2124 }, { "epoch": 0.2768449824287388, "grad_norm": 2.5596401691436768, "learning_rate": 9.872091362293581e-06, "loss": 0.6107, "step": 2127 }, { "epoch": 0.2772354549004295, "grad_norm": 3.3999154567718506, "learning_rate": 9.87162126074406e-06, "loss": 0.6935, "step": 2130 }, { "epoch": 0.2776259273721203, "grad_norm": 3.2630927562713623, "learning_rate": 9.871150308132554e-06, "loss": 0.6553, "step": 2133 }, { "epoch": 0.278016399843811, "grad_norm": 3.5304367542266846, "learning_rate": 9.870678504541336e-06, "loss": 0.7273, "step": 2136 }, { "epoch": 0.27840687231550176, "grad_norm": 3.2613844871520996, "learning_rate": 9.87020585005283e-06, "loss": 0.6989, "step": 2139 }, { "epoch": 0.2787973447871925, "grad_norm": 2.5427675247192383, "learning_rate": 9.869732344749605e-06, "loss": 0.7172, "step": 2142 }, { "epoch": 0.27918781725888325, "grad_norm": 2.583522081375122, "learning_rate": 9.869257988714386e-06, "loss": 0.688, "step": 2145 }, { "epoch": 0.27957828973057397, "grad_norm": 3.7895023822784424, "learning_rate": 9.86878278203004e-06, "loss": 0.6906, "step": 2148 }, { "epoch": 0.27996876220226474, "grad_norm": 2.4927175045013428, "learning_rate": 9.868306724779584e-06, "loss": 0.661, "step": 2151 }, { "epoch": 0.28035923467395546, "grad_norm": 2.5533857345581055, "learning_rate": 9.867829817046183e-06, "loss": 0.6139, "step": 2154 }, { "epoch": 0.28074970714564623, "grad_norm": 3.4451935291290283, "learning_rate": 9.867352058913156e-06, "loss": 0.78, "step": 2157 }, { "epoch": 0.281140179617337, "grad_norm": 2.7176883220672607, "learning_rate": 9.866873450463963e-06, "loss": 0.7304, "step": 2160 }, { "epoch": 0.2815306520890277, "grad_norm": 2.8105108737945557, "learning_rate": 9.866393991782215e-06, "loss": 0.6057, "step": 2163 }, { "epoch": 0.2819211245607185, "grad_norm": 4.555068492889404, "learning_rate": 9.865913682951675e-06, "loss": 0.7168, "step": 2166 }, { "epoch": 0.2823115970324092, "grad_norm": 4.762774467468262, "learning_rate": 9.865432524056252e-06, "loss": 0.6895, "step": 2169 }, { "epoch": 0.2827020695041, "grad_norm": 2.9473941326141357, "learning_rate": 9.86495051518e-06, "loss": 0.6986, "step": 2172 }, { "epoch": 0.2830925419757907, "grad_norm": 3.883134603500366, "learning_rate": 9.86446765640713e-06, "loss": 0.6505, "step": 2175 }, { "epoch": 0.2834830144474815, "grad_norm": 2.467970132827759, "learning_rate": 9.863983947821993e-06, "loss": 0.7058, "step": 2178 }, { "epoch": 0.2838734869191722, "grad_norm": 2.5856029987335205, "learning_rate": 9.863499389509092e-06, "loss": 0.6564, "step": 2181 }, { "epoch": 0.28426395939086296, "grad_norm": 3.4759273529052734, "learning_rate": 9.86301398155308e-06, "loss": 0.644, "step": 2184 }, { "epoch": 0.2846544318625537, "grad_norm": 3.2896201610565186, "learning_rate": 9.862527724038755e-06, "loss": 0.6862, "step": 2187 }, { "epoch": 0.28504490433424445, "grad_norm": 3.064748764038086, "learning_rate": 9.862040617051065e-06, "loss": 0.6644, "step": 2190 }, { "epoch": 0.28543537680593517, "grad_norm": 2.8241119384765625, "learning_rate": 9.861552660675109e-06, "loss": 0.6167, "step": 2193 }, { "epoch": 0.28582584927762594, "grad_norm": 2.6539626121520996, "learning_rate": 9.86106385499613e-06, "loss": 0.6695, "step": 2196 }, { "epoch": 0.28621632174931666, "grad_norm": 3.7717602252960205, "learning_rate": 9.86057420009952e-06, "loss": 0.7306, "step": 2199 }, { "epoch": 0.28660679422100743, "grad_norm": 3.5839555263519287, "learning_rate": 9.860083696070825e-06, "loss": 0.6344, "step": 2202 }, { "epoch": 0.28699726669269815, "grad_norm": 2.718700408935547, "learning_rate": 9.85959234299573e-06, "loss": 0.6781, "step": 2205 }, { "epoch": 0.2873877391643889, "grad_norm": 3.010596990585327, "learning_rate": 9.859100140960079e-06, "loss": 0.7763, "step": 2208 }, { "epoch": 0.28777821163607964, "grad_norm": 2.9677681922912598, "learning_rate": 9.858607090049851e-06, "loss": 0.6675, "step": 2211 }, { "epoch": 0.2881686841077704, "grad_norm": 2.874814510345459, "learning_rate": 9.858113190351189e-06, "loss": 0.8113, "step": 2214 }, { "epoch": 0.28855915657946113, "grad_norm": 2.915309190750122, "learning_rate": 9.857618441950372e-06, "loss": 0.7219, "step": 2217 }, { "epoch": 0.2889496290511519, "grad_norm": 3.3547255992889404, "learning_rate": 9.857122844933831e-06, "loss": 0.689, "step": 2220 }, { "epoch": 0.2893401015228426, "grad_norm": 2.769690752029419, "learning_rate": 9.856626399388146e-06, "loss": 0.6282, "step": 2223 }, { "epoch": 0.2897305739945334, "grad_norm": 2.8011720180511475, "learning_rate": 9.856129105400048e-06, "loss": 0.6347, "step": 2226 }, { "epoch": 0.2901210464662241, "grad_norm": 4.004727363586426, "learning_rate": 9.855630963056411e-06, "loss": 0.6332, "step": 2229 }, { "epoch": 0.2905115189379149, "grad_norm": 3.3377795219421387, "learning_rate": 9.855131972444259e-06, "loss": 0.7273, "step": 2232 }, { "epoch": 0.2909019914096056, "grad_norm": 4.1533379554748535, "learning_rate": 9.854632133650765e-06, "loss": 0.6631, "step": 2235 }, { "epoch": 0.29129246388129637, "grad_norm": 3.070554256439209, "learning_rate": 9.85413144676325e-06, "loss": 0.6178, "step": 2238 }, { "epoch": 0.2916829363529871, "grad_norm": 4.225495338439941, "learning_rate": 9.853629911869182e-06, "loss": 0.6509, "step": 2241 }, { "epoch": 0.29207340882467786, "grad_norm": 3.233717441558838, "learning_rate": 9.853127529056182e-06, "loss": 0.7106, "step": 2244 }, { "epoch": 0.29246388129636863, "grad_norm": 2.8367974758148193, "learning_rate": 9.852624298412008e-06, "loss": 0.6732, "step": 2247 }, { "epoch": 0.29285435376805935, "grad_norm": 3.421760320663452, "learning_rate": 9.852120220024579e-06, "loss": 0.7217, "step": 2250 }, { "epoch": 0.2932448262397501, "grad_norm": 3.042901039123535, "learning_rate": 9.851615293981956e-06, "loss": 0.7075, "step": 2253 }, { "epoch": 0.29363529871144084, "grad_norm": 2.7977664470672607, "learning_rate": 9.851109520372346e-06, "loss": 0.6137, "step": 2256 }, { "epoch": 0.2940257711831316, "grad_norm": 2.4614906311035156, "learning_rate": 9.85060289928411e-06, "loss": 0.621, "step": 2259 }, { "epoch": 0.29441624365482233, "grad_norm": 2.9462318420410156, "learning_rate": 9.85009543080575e-06, "loss": 0.7196, "step": 2262 }, { "epoch": 0.2948067161265131, "grad_norm": 2.9534754753112793, "learning_rate": 9.849587115025923e-06, "loss": 0.6909, "step": 2265 }, { "epoch": 0.2951971885982038, "grad_norm": 2.725046157836914, "learning_rate": 9.849077952033427e-06, "loss": 0.676, "step": 2268 }, { "epoch": 0.2955876610698946, "grad_norm": 2.515141487121582, "learning_rate": 9.848567941917216e-06, "loss": 0.6987, "step": 2271 }, { "epoch": 0.2959781335415853, "grad_norm": 2.8634941577911377, "learning_rate": 9.848057084766382e-06, "loss": 0.6337, "step": 2274 }, { "epoch": 0.2963686060132761, "grad_norm": 3.2547008991241455, "learning_rate": 9.847545380670176e-06, "loss": 0.6618, "step": 2277 }, { "epoch": 0.2967590784849668, "grad_norm": 3.159667730331421, "learning_rate": 9.847032829717991e-06, "loss": 0.6848, "step": 2280 }, { "epoch": 0.29714955095665757, "grad_norm": 3.1204230785369873, "learning_rate": 9.846519431999366e-06, "loss": 0.6681, "step": 2283 }, { "epoch": 0.2975400234283483, "grad_norm": 2.5976922512054443, "learning_rate": 9.846005187603992e-06, "loss": 0.8029, "step": 2286 }, { "epoch": 0.29793049590003906, "grad_norm": 3.485644578933716, "learning_rate": 9.845490096621706e-06, "loss": 0.6945, "step": 2289 }, { "epoch": 0.2983209683717298, "grad_norm": 3.9062230587005615, "learning_rate": 9.844974159142494e-06, "loss": 0.6611, "step": 2292 }, { "epoch": 0.29871144084342055, "grad_norm": 2.7590322494506836, "learning_rate": 9.844457375256488e-06, "loss": 0.6508, "step": 2295 }, { "epoch": 0.29910191331511127, "grad_norm": 2.883934736251831, "learning_rate": 9.84393974505397e-06, "loss": 0.7574, "step": 2298 }, { "epoch": 0.29949238578680204, "grad_norm": 2.7723381519317627, "learning_rate": 9.843421268625371e-06, "loss": 0.604, "step": 2301 }, { "epoch": 0.29988285825849276, "grad_norm": 2.5612845420837402, "learning_rate": 9.842901946061263e-06, "loss": 0.6271, "step": 2304 }, { "epoch": 0.30027333073018353, "grad_norm": 2.4496188163757324, "learning_rate": 9.842381777452373e-06, "loss": 0.6317, "step": 2307 }, { "epoch": 0.30066380320187425, "grad_norm": 2.936002731323242, "learning_rate": 9.841860762889574e-06, "loss": 0.6229, "step": 2310 }, { "epoch": 0.301054275673565, "grad_norm": 2.9964542388916016, "learning_rate": 9.841338902463885e-06, "loss": 0.667, "step": 2313 }, { "epoch": 0.30144474814525574, "grad_norm": 3.358015775680542, "learning_rate": 9.840816196266475e-06, "loss": 0.6421, "step": 2316 }, { "epoch": 0.3018352206169465, "grad_norm": 3.8633599281311035, "learning_rate": 9.840292644388659e-06, "loss": 0.6912, "step": 2319 }, { "epoch": 0.3022256930886372, "grad_norm": 2.9916861057281494, "learning_rate": 9.839768246921901e-06, "loss": 0.6766, "step": 2322 }, { "epoch": 0.302616165560328, "grad_norm": 3.3866844177246094, "learning_rate": 9.83924300395781e-06, "loss": 0.6397, "step": 2325 }, { "epoch": 0.3030066380320187, "grad_norm": 2.5590834617614746, "learning_rate": 9.838716915588148e-06, "loss": 0.7354, "step": 2328 }, { "epoch": 0.3033971105037095, "grad_norm": 2.6998791694641113, "learning_rate": 9.83818998190482e-06, "loss": 0.7159, "step": 2331 }, { "epoch": 0.30378758297540026, "grad_norm": 2.2209694385528564, "learning_rate": 9.837662202999879e-06, "loss": 0.677, "step": 2334 }, { "epoch": 0.304178055447091, "grad_norm": 3.1843392848968506, "learning_rate": 9.83713357896553e-06, "loss": 0.6385, "step": 2337 }, { "epoch": 0.30456852791878175, "grad_norm": 2.818430185317993, "learning_rate": 9.836604109894118e-06, "loss": 0.643, "step": 2340 }, { "epoch": 0.30495900039047247, "grad_norm": 2.380825996398926, "learning_rate": 9.836073795878144e-06, "loss": 0.6909, "step": 2343 }, { "epoch": 0.30534947286216324, "grad_norm": 2.392238140106201, "learning_rate": 9.835542637010253e-06, "loss": 0.6184, "step": 2346 }, { "epoch": 0.30573994533385396, "grad_norm": 3.895524263381958, "learning_rate": 9.835010633383234e-06, "loss": 0.735, "step": 2349 }, { "epoch": 0.30613041780554473, "grad_norm": 2.8263940811157227, "learning_rate": 9.834477785090028e-06, "loss": 0.6762, "step": 2352 }, { "epoch": 0.30652089027723545, "grad_norm": 3.5129168033599854, "learning_rate": 9.833944092223725e-06, "loss": 0.746, "step": 2355 }, { "epoch": 0.3069113627489262, "grad_norm": 2.720536947250366, "learning_rate": 9.833409554877558e-06, "loss": 0.6833, "step": 2358 }, { "epoch": 0.30730183522061694, "grad_norm": 2.6948177814483643, "learning_rate": 9.83287417314491e-06, "loss": 0.6223, "step": 2361 }, { "epoch": 0.3076923076923077, "grad_norm": 2.750875234603882, "learning_rate": 9.832337947119311e-06, "loss": 0.7024, "step": 2364 }, { "epoch": 0.3080827801639984, "grad_norm": 3.7236342430114746, "learning_rate": 9.831800876894436e-06, "loss": 0.7258, "step": 2367 }, { "epoch": 0.3084732526356892, "grad_norm": 2.5590686798095703, "learning_rate": 9.831262962564114e-06, "loss": 0.6718, "step": 2370 }, { "epoch": 0.3088637251073799, "grad_norm": 3.692631483078003, "learning_rate": 9.830724204222316e-06, "loss": 0.6383, "step": 2373 }, { "epoch": 0.3092541975790707, "grad_norm": 2.6844470500946045, "learning_rate": 9.830184601963162e-06, "loss": 0.6747, "step": 2376 }, { "epoch": 0.3096446700507614, "grad_norm": 3.532243490219116, "learning_rate": 9.82964415588092e-06, "loss": 0.72, "step": 2379 }, { "epoch": 0.3100351425224522, "grad_norm": 2.9188520908355713, "learning_rate": 9.829102866070002e-06, "loss": 0.6895, "step": 2382 }, { "epoch": 0.3104256149941429, "grad_norm": 2.847670316696167, "learning_rate": 9.828560732624974e-06, "loss": 0.7294, "step": 2385 }, { "epoch": 0.31081608746583367, "grad_norm": 2.8164069652557373, "learning_rate": 9.828017755640543e-06, "loss": 0.7019, "step": 2388 }, { "epoch": 0.3112065599375244, "grad_norm": 2.716837167739868, "learning_rate": 9.827473935211567e-06, "loss": 0.7291, "step": 2391 }, { "epoch": 0.31159703240921516, "grad_norm": 2.867050886154175, "learning_rate": 9.82692927143305e-06, "loss": 0.6845, "step": 2394 }, { "epoch": 0.3119875048809059, "grad_norm": 2.4712677001953125, "learning_rate": 9.826383764400143e-06, "loss": 0.5568, "step": 2397 }, { "epoch": 0.31237797735259665, "grad_norm": 2.6688764095306396, "learning_rate": 9.825837414208147e-06, "loss": 0.6441, "step": 2400 }, { "epoch": 0.31276844982428736, "grad_norm": 2.8412606716156006, "learning_rate": 9.825290220952507e-06, "loss": 0.7189, "step": 2403 }, { "epoch": 0.31315892229597814, "grad_norm": 4.064339637756348, "learning_rate": 9.824742184728815e-06, "loss": 0.595, "step": 2406 }, { "epoch": 0.31354939476766885, "grad_norm": 2.6639394760131836, "learning_rate": 9.824193305632814e-06, "loss": 0.6398, "step": 2409 }, { "epoch": 0.3139398672393596, "grad_norm": 3.3248283863067627, "learning_rate": 9.823643583760389e-06, "loss": 0.6377, "step": 2412 }, { "epoch": 0.31433033971105034, "grad_norm": 4.568389415740967, "learning_rate": 9.823093019207578e-06, "loss": 0.687, "step": 2415 }, { "epoch": 0.3147208121827411, "grad_norm": 3.337682008743286, "learning_rate": 9.822541612070566e-06, "loss": 0.6771, "step": 2418 }, { "epoch": 0.3151112846544319, "grad_norm": 2.871934652328491, "learning_rate": 9.821989362445676e-06, "loss": 0.7428, "step": 2421 }, { "epoch": 0.3155017571261226, "grad_norm": 3.3546926975250244, "learning_rate": 9.82143627042939e-06, "loss": 0.6353, "step": 2424 }, { "epoch": 0.3158922295978134, "grad_norm": 2.4146947860717773, "learning_rate": 9.820882336118332e-06, "loss": 0.6301, "step": 2427 }, { "epoch": 0.3162827020695041, "grad_norm": 2.7984747886657715, "learning_rate": 9.820327559609268e-06, "loss": 0.5877, "step": 2430 }, { "epoch": 0.31667317454119487, "grad_norm": 4.265294551849365, "learning_rate": 9.819771940999123e-06, "loss": 0.6227, "step": 2433 }, { "epoch": 0.3170636470128856, "grad_norm": 2.921192169189453, "learning_rate": 9.819215480384956e-06, "loss": 0.6851, "step": 2436 }, { "epoch": 0.31745411948457636, "grad_norm": 2.67498517036438, "learning_rate": 9.818658177863985e-06, "loss": 0.5766, "step": 2439 }, { "epoch": 0.3178445919562671, "grad_norm": 2.93373703956604, "learning_rate": 9.818100033533567e-06, "loss": 0.7525, "step": 2442 }, { "epoch": 0.31823506442795785, "grad_norm": 2.9454703330993652, "learning_rate": 9.817541047491209e-06, "loss": 0.7152, "step": 2445 }, { "epoch": 0.31862553689964856, "grad_norm": 2.5796053409576416, "learning_rate": 9.816981219834565e-06, "loss": 0.6848, "step": 2448 }, { "epoch": 0.31901600937133934, "grad_norm": 3.1265947818756104, "learning_rate": 9.816420550661434e-06, "loss": 0.7028, "step": 2451 }, { "epoch": 0.31940648184303005, "grad_norm": 2.7802746295928955, "learning_rate": 9.815859040069766e-06, "loss": 0.6456, "step": 2454 }, { "epoch": 0.3197969543147208, "grad_norm": 3.0092685222625732, "learning_rate": 9.815296688157654e-06, "loss": 0.6739, "step": 2457 }, { "epoch": 0.32018742678641154, "grad_norm": 3.065697193145752, "learning_rate": 9.81473349502334e-06, "loss": 0.6604, "step": 2460 }, { "epoch": 0.3205778992581023, "grad_norm": 2.455078601837158, "learning_rate": 9.814169460765215e-06, "loss": 0.6433, "step": 2463 }, { "epoch": 0.32096837172979303, "grad_norm": 2.841989040374756, "learning_rate": 9.813604585481812e-06, "loss": 0.6915, "step": 2466 }, { "epoch": 0.3213588442014838, "grad_norm": 2.2834136486053467, "learning_rate": 9.813038869271814e-06, "loss": 0.634, "step": 2469 }, { "epoch": 0.3217493166731745, "grad_norm": 3.262573719024658, "learning_rate": 9.81247231223405e-06, "loss": 0.6987, "step": 2472 }, { "epoch": 0.3221397891448653, "grad_norm": 2.7389028072357178, "learning_rate": 9.811904914467498e-06, "loss": 0.6553, "step": 2475 }, { "epoch": 0.322530261616556, "grad_norm": 2.779263973236084, "learning_rate": 9.81133667607128e-06, "loss": 0.7449, "step": 2478 }, { "epoch": 0.3229207340882468, "grad_norm": 2.815535306930542, "learning_rate": 9.810767597144668e-06, "loss": 0.5643, "step": 2481 }, { "epoch": 0.3233112065599375, "grad_norm": 2.794158458709717, "learning_rate": 9.810197677787074e-06, "loss": 0.7033, "step": 2484 }, { "epoch": 0.3237016790316283, "grad_norm": 3.1413052082061768, "learning_rate": 9.809626918098066e-06, "loss": 0.6482, "step": 2487 }, { "epoch": 0.324092151503319, "grad_norm": 2.798581838607788, "learning_rate": 9.809055318177353e-06, "loss": 0.6147, "step": 2490 }, { "epoch": 0.32448262397500977, "grad_norm": 3.2870383262634277, "learning_rate": 9.808482878124794e-06, "loss": 0.6716, "step": 2493 }, { "epoch": 0.3248730964467005, "grad_norm": 2.5646088123321533, "learning_rate": 9.807909598040392e-06, "loss": 0.6022, "step": 2496 }, { "epoch": 0.32526356891839125, "grad_norm": 3.979936361312866, "learning_rate": 9.807335478024297e-06, "loss": 0.6804, "step": 2499 }, { "epoch": 0.32565404139008197, "grad_norm": 3.786860704421997, "learning_rate": 9.806760518176806e-06, "loss": 0.6807, "step": 2502 }, { "epoch": 0.32604451386177274, "grad_norm": 3.001896619796753, "learning_rate": 9.806184718598365e-06, "loss": 0.6799, "step": 2505 }, { "epoch": 0.3264349863334635, "grad_norm": 2.965797185897827, "learning_rate": 9.805608079389566e-06, "loss": 0.7123, "step": 2508 }, { "epoch": 0.32682545880515423, "grad_norm": 5.5680460929870605, "learning_rate": 9.805030600651143e-06, "loss": 0.6111, "step": 2511 }, { "epoch": 0.327215931276845, "grad_norm": 3.2845263481140137, "learning_rate": 9.804452282483983e-06, "loss": 0.614, "step": 2514 }, { "epoch": 0.3276064037485357, "grad_norm": 2.4378509521484375, "learning_rate": 9.803873124989116e-06, "loss": 0.712, "step": 2517 }, { "epoch": 0.3279968762202265, "grad_norm": 2.590458393096924, "learning_rate": 9.803293128267721e-06, "loss": 0.62, "step": 2520 }, { "epoch": 0.3283873486919172, "grad_norm": 2.5005409717559814, "learning_rate": 9.802712292421121e-06, "loss": 0.7454, "step": 2523 }, { "epoch": 0.328777821163608, "grad_norm": 3.139564037322998, "learning_rate": 9.802130617550788e-06, "loss": 0.7651, "step": 2526 }, { "epoch": 0.3291682936352987, "grad_norm": 3.2621004581451416, "learning_rate": 9.801548103758335e-06, "loss": 0.7262, "step": 2529 }, { "epoch": 0.3295587661069895, "grad_norm": 2.5339832305908203, "learning_rate": 9.800964751145533e-06, "loss": 0.6914, "step": 2532 }, { "epoch": 0.3299492385786802, "grad_norm": 3.3442342281341553, "learning_rate": 9.800380559814284e-06, "loss": 0.6403, "step": 2535 }, { "epoch": 0.33033971105037097, "grad_norm": 2.522374153137207, "learning_rate": 9.799795529866654e-06, "loss": 0.6177, "step": 2538 }, { "epoch": 0.3307301835220617, "grad_norm": 2.8395447731018066, "learning_rate": 9.79920966140484e-06, "loss": 0.6954, "step": 2541 }, { "epoch": 0.33112065599375246, "grad_norm": 2.558065891265869, "learning_rate": 9.798622954531194e-06, "loss": 0.6864, "step": 2544 }, { "epoch": 0.3315111284654432, "grad_norm": 3.8972573280334473, "learning_rate": 9.798035409348214e-06, "loss": 0.6951, "step": 2547 }, { "epoch": 0.33190160093713394, "grad_norm": 3.3120248317718506, "learning_rate": 9.797447025958542e-06, "loss": 0.5917, "step": 2550 }, { "epoch": 0.33229207340882466, "grad_norm": 2.999908685684204, "learning_rate": 9.796857804464966e-06, "loss": 0.6607, "step": 2553 }, { "epoch": 0.33268254588051543, "grad_norm": 2.7225866317749023, "learning_rate": 9.796267744970423e-06, "loss": 0.7139, "step": 2556 }, { "epoch": 0.33307301835220615, "grad_norm": 2.655848741531372, "learning_rate": 9.795676847577995e-06, "loss": 0.7146, "step": 2559 }, { "epoch": 0.3334634908238969, "grad_norm": 3.1451730728149414, "learning_rate": 9.795085112390909e-06, "loss": 0.5869, "step": 2562 }, { "epoch": 0.33385396329558764, "grad_norm": 3.549569845199585, "learning_rate": 9.794492539512544e-06, "loss": 0.6792, "step": 2565 }, { "epoch": 0.3342444357672784, "grad_norm": 2.678586959838867, "learning_rate": 9.793899129046417e-06, "loss": 0.5984, "step": 2568 }, { "epoch": 0.33463490823896913, "grad_norm": 2.842653274536133, "learning_rate": 9.793304881096195e-06, "loss": 0.6587, "step": 2571 }, { "epoch": 0.3350253807106599, "grad_norm": 2.715420722961426, "learning_rate": 9.792709795765695e-06, "loss": 0.6969, "step": 2574 }, { "epoch": 0.3354158531823506, "grad_norm": 3.2462103366851807, "learning_rate": 9.792113873158877e-06, "loss": 0.7084, "step": 2577 }, { "epoch": 0.3358063256540414, "grad_norm": 3.362494468688965, "learning_rate": 9.791517113379847e-06, "loss": 0.7104, "step": 2580 }, { "epoch": 0.3361967981257321, "grad_norm": 2.433021306991577, "learning_rate": 9.790919516532856e-06, "loss": 0.6128, "step": 2583 }, { "epoch": 0.3365872705974229, "grad_norm": 2.534708261489868, "learning_rate": 9.790321082722306e-06, "loss": 0.6569, "step": 2586 }, { "epoch": 0.3369777430691136, "grad_norm": 2.7385549545288086, "learning_rate": 9.789721812052738e-06, "loss": 0.6337, "step": 2589 }, { "epoch": 0.3373682155408044, "grad_norm": 2.9946787357330322, "learning_rate": 9.789121704628845e-06, "loss": 0.6794, "step": 2592 }, { "epoch": 0.33775868801249515, "grad_norm": 2.673137664794922, "learning_rate": 9.788520760555467e-06, "loss": 0.6952, "step": 2595 }, { "epoch": 0.33814916048418586, "grad_norm": 2.8383102416992188, "learning_rate": 9.787918979937584e-06, "loss": 0.7175, "step": 2598 }, { "epoch": 0.33853963295587663, "grad_norm": 2.792264223098755, "learning_rate": 9.78731636288033e-06, "loss": 0.6472, "step": 2601 }, { "epoch": 0.33893010542756735, "grad_norm": 3.444775342941284, "learning_rate": 9.786712909488976e-06, "loss": 0.6164, "step": 2604 }, { "epoch": 0.3393205778992581, "grad_norm": 2.6704118251800537, "learning_rate": 9.786108619868948e-06, "loss": 0.6913, "step": 2607 }, { "epoch": 0.33971105037094884, "grad_norm": 3.128438949584961, "learning_rate": 9.785503494125812e-06, "loss": 0.6493, "step": 2610 }, { "epoch": 0.3401015228426396, "grad_norm": 3.342224359512329, "learning_rate": 9.784897532365283e-06, "loss": 0.7241, "step": 2613 }, { "epoch": 0.34049199531433033, "grad_norm": 3.057779550552368, "learning_rate": 9.78429073469322e-06, "loss": 0.6189, "step": 2616 }, { "epoch": 0.3408824677860211, "grad_norm": 2.809861898422241, "learning_rate": 9.783683101215632e-06, "loss": 0.6926, "step": 2619 }, { "epoch": 0.3412729402577118, "grad_norm": 2.8063440322875977, "learning_rate": 9.783074632038669e-06, "loss": 0.7242, "step": 2622 }, { "epoch": 0.3416634127294026, "grad_norm": 2.8597209453582764, "learning_rate": 9.78246532726863e-06, "loss": 0.6391, "step": 2625 }, { "epoch": 0.3420538852010933, "grad_norm": 2.6011080741882324, "learning_rate": 9.78185518701196e-06, "loss": 0.6819, "step": 2628 }, { "epoch": 0.3424443576727841, "grad_norm": 3.5617270469665527, "learning_rate": 9.781244211375247e-06, "loss": 0.733, "step": 2631 }, { "epoch": 0.3428348301444748, "grad_norm": 3.6274831295013428, "learning_rate": 9.78063240046523e-06, "loss": 0.7171, "step": 2634 }, { "epoch": 0.3432253026161656, "grad_norm": 2.6554691791534424, "learning_rate": 9.780019754388786e-06, "loss": 0.7321, "step": 2637 }, { "epoch": 0.3436157750878563, "grad_norm": 2.574505090713501, "learning_rate": 9.779406273252949e-06, "loss": 0.7734, "step": 2640 }, { "epoch": 0.34400624755954706, "grad_norm": 4.732491970062256, "learning_rate": 9.77879195716489e-06, "loss": 0.7968, "step": 2643 }, { "epoch": 0.3443967200312378, "grad_norm": 3.508035182952881, "learning_rate": 9.778176806231931e-06, "loss": 0.6802, "step": 2646 }, { "epoch": 0.34478719250292855, "grad_norm": 2.745506763458252, "learning_rate": 9.777560820561533e-06, "loss": 0.6712, "step": 2649 }, { "epoch": 0.34517766497461927, "grad_norm": 2.6209089756011963, "learning_rate": 9.776944000261313e-06, "loss": 0.6477, "step": 2652 }, { "epoch": 0.34556813744631004, "grad_norm": 3.0945277214050293, "learning_rate": 9.776326345439024e-06, "loss": 0.6644, "step": 2655 }, { "epoch": 0.34595860991800076, "grad_norm": 3.156196117401123, "learning_rate": 9.77570785620257e-06, "loss": 0.6713, "step": 2658 }, { "epoch": 0.34634908238969153, "grad_norm": 3.839165210723877, "learning_rate": 9.77508853266e-06, "loss": 0.7557, "step": 2661 }, { "epoch": 0.34673955486138225, "grad_norm": 3.0956363677978516, "learning_rate": 9.77446837491951e-06, "loss": 0.7111, "step": 2664 }, { "epoch": 0.347130027333073, "grad_norm": 2.516638994216919, "learning_rate": 9.773847383089439e-06, "loss": 0.6385, "step": 2667 }, { "epoch": 0.34752049980476374, "grad_norm": 2.5300917625427246, "learning_rate": 9.773225557278272e-06, "loss": 0.6205, "step": 2670 }, { "epoch": 0.3479109722764545, "grad_norm": 2.5988070964813232, "learning_rate": 9.77260289759464e-06, "loss": 0.6469, "step": 2673 }, { "epoch": 0.34830144474814523, "grad_norm": 2.493129014968872, "learning_rate": 9.771979404147324e-06, "loss": 0.6266, "step": 2676 }, { "epoch": 0.348691917219836, "grad_norm": 2.7826688289642334, "learning_rate": 9.771355077045244e-06, "loss": 0.6741, "step": 2679 }, { "epoch": 0.3490823896915268, "grad_norm": 2.9177565574645996, "learning_rate": 9.77072991639747e-06, "loss": 0.764, "step": 2682 }, { "epoch": 0.3494728621632175, "grad_norm": 3.730968475341797, "learning_rate": 9.770103922313215e-06, "loss": 0.5789, "step": 2685 }, { "epoch": 0.34986333463490826, "grad_norm": 2.848114490509033, "learning_rate": 9.76947709490184e-06, "loss": 0.6656, "step": 2688 }, { "epoch": 0.350253807106599, "grad_norm": 3.0486176013946533, "learning_rate": 9.768849434272851e-06, "loss": 0.6857, "step": 2691 }, { "epoch": 0.35064427957828975, "grad_norm": 2.6924357414245605, "learning_rate": 9.768220940535897e-06, "loss": 0.6939, "step": 2694 }, { "epoch": 0.35103475204998047, "grad_norm": 3.603853225708008, "learning_rate": 9.767591613800775e-06, "loss": 0.6395, "step": 2697 }, { "epoch": 0.35142522452167124, "grad_norm": 2.700711250305176, "learning_rate": 9.76696145417743e-06, "loss": 0.6732, "step": 2700 }, { "epoch": 0.35181569699336196, "grad_norm": 2.3906078338623047, "learning_rate": 9.766330461775944e-06, "loss": 0.5556, "step": 2703 }, { "epoch": 0.35220616946505273, "grad_norm": 2.9107656478881836, "learning_rate": 9.765698636706555e-06, "loss": 0.7257, "step": 2706 }, { "epoch": 0.35259664193674345, "grad_norm": 2.9227166175842285, "learning_rate": 9.765065979079639e-06, "loss": 0.5716, "step": 2709 }, { "epoch": 0.3529871144084342, "grad_norm": 3.1230244636535645, "learning_rate": 9.764432489005722e-06, "loss": 0.6969, "step": 2712 }, { "epoch": 0.35337758688012494, "grad_norm": 2.984856128692627, "learning_rate": 9.763798166595473e-06, "loss": 0.6435, "step": 2715 }, { "epoch": 0.3537680593518157, "grad_norm": 2.4585700035095215, "learning_rate": 9.763163011959702e-06, "loss": 0.7096, "step": 2718 }, { "epoch": 0.35415853182350643, "grad_norm": 2.7415413856506348, "learning_rate": 9.762527025209377e-06, "loss": 0.7252, "step": 2721 }, { "epoch": 0.3545490042951972, "grad_norm": 3.0851871967315674, "learning_rate": 9.761890206455597e-06, "loss": 0.6221, "step": 2724 }, { "epoch": 0.3549394767668879, "grad_norm": 4.111907482147217, "learning_rate": 9.761252555809616e-06, "loss": 0.6155, "step": 2727 }, { "epoch": 0.3553299492385787, "grad_norm": 3.664571762084961, "learning_rate": 9.76061407338283e-06, "loss": 0.6705, "step": 2730 }, { "epoch": 0.3557204217102694, "grad_norm": 2.795311689376831, "learning_rate": 9.75997475928678e-06, "loss": 0.6785, "step": 2733 }, { "epoch": 0.3561108941819602, "grad_norm": 3.0778064727783203, "learning_rate": 9.759334613633154e-06, "loss": 0.7244, "step": 2736 }, { "epoch": 0.3565013666536509, "grad_norm": 3.36773943901062, "learning_rate": 9.758693636533782e-06, "loss": 0.6062, "step": 2739 }, { "epoch": 0.35689183912534167, "grad_norm": 2.6564176082611084, "learning_rate": 9.758051828100643e-06, "loss": 0.7016, "step": 2742 }, { "epoch": 0.3572823115970324, "grad_norm": 2.544980049133301, "learning_rate": 9.75740918844586e-06, "loss": 0.6046, "step": 2745 }, { "epoch": 0.35767278406872316, "grad_norm": 3.575075626373291, "learning_rate": 9.756765717681698e-06, "loss": 0.663, "step": 2748 }, { "epoch": 0.3580632565404139, "grad_norm": 2.658778429031372, "learning_rate": 9.756121415920572e-06, "loss": 0.6775, "step": 2751 }, { "epoch": 0.35845372901210465, "grad_norm": 2.7959978580474854, "learning_rate": 9.755476283275042e-06, "loss": 0.7469, "step": 2754 }, { "epoch": 0.35884420148379537, "grad_norm": 2.5027709007263184, "learning_rate": 9.754830319857809e-06, "loss": 0.6239, "step": 2757 }, { "epoch": 0.35923467395548614, "grad_norm": 2.6545188426971436, "learning_rate": 9.75418352578172e-06, "loss": 0.6756, "step": 2760 }, { "epoch": 0.3596251464271769, "grad_norm": 2.5996038913726807, "learning_rate": 9.753535901159772e-06, "loss": 0.7895, "step": 2763 }, { "epoch": 0.36001561889886763, "grad_norm": 2.747310161590576, "learning_rate": 9.752887446105101e-06, "loss": 0.6933, "step": 2766 }, { "epoch": 0.3604060913705584, "grad_norm": 2.4019601345062256, "learning_rate": 9.752238160730994e-06, "loss": 0.5939, "step": 2769 }, { "epoch": 0.3607965638422491, "grad_norm": 3.0222368240356445, "learning_rate": 9.751588045150875e-06, "loss": 0.7407, "step": 2772 }, { "epoch": 0.3611870363139399, "grad_norm": 3.187483310699463, "learning_rate": 9.750937099478322e-06, "loss": 0.6874, "step": 2775 }, { "epoch": 0.3615775087856306, "grad_norm": 2.897294282913208, "learning_rate": 9.750285323827051e-06, "loss": 0.7485, "step": 2778 }, { "epoch": 0.3619679812573214, "grad_norm": 2.5609171390533447, "learning_rate": 9.749632718310927e-06, "loss": 0.6426, "step": 2781 }, { "epoch": 0.3623584537290121, "grad_norm": 2.4874250888824463, "learning_rate": 9.74897928304396e-06, "loss": 0.6134, "step": 2784 }, { "epoch": 0.36274892620070287, "grad_norm": 3.737605333328247, "learning_rate": 9.748325018140301e-06, "loss": 0.661, "step": 2787 }, { "epoch": 0.3631393986723936, "grad_norm": 2.8321633338928223, "learning_rate": 9.747669923714252e-06, "loss": 0.7641, "step": 2790 }, { "epoch": 0.36352987114408436, "grad_norm": 2.5597903728485107, "learning_rate": 9.747013999880255e-06, "loss": 0.6625, "step": 2793 }, { "epoch": 0.3639203436157751, "grad_norm": 2.5689401626586914, "learning_rate": 9.746357246752898e-06, "loss": 0.6729, "step": 2796 }, { "epoch": 0.36431081608746585, "grad_norm": 2.8747706413269043, "learning_rate": 9.745699664446914e-06, "loss": 0.6406, "step": 2799 }, { "epoch": 0.36470128855915657, "grad_norm": 2.724287271499634, "learning_rate": 9.745041253077183e-06, "loss": 0.6439, "step": 2802 }, { "epoch": 0.36509176103084734, "grad_norm": 3.9166243076324463, "learning_rate": 9.744382012758727e-06, "loss": 0.6298, "step": 2805 }, { "epoch": 0.36548223350253806, "grad_norm": 3.197875499725342, "learning_rate": 9.743721943606715e-06, "loss": 0.6032, "step": 2808 }, { "epoch": 0.36587270597422883, "grad_norm": 2.6559009552001953, "learning_rate": 9.743061045736457e-06, "loss": 0.6065, "step": 2811 }, { "epoch": 0.36626317844591955, "grad_norm": 2.4632251262664795, "learning_rate": 9.742399319263414e-06, "loss": 0.6398, "step": 2814 }, { "epoch": 0.3666536509176103, "grad_norm": 3.159674882888794, "learning_rate": 9.741736764303185e-06, "loss": 0.5769, "step": 2817 }, { "epoch": 0.36704412338930104, "grad_norm": 2.6667957305908203, "learning_rate": 9.74107338097152e-06, "loss": 0.7085, "step": 2820 }, { "epoch": 0.3674345958609918, "grad_norm": 2.641826629638672, "learning_rate": 9.740409169384308e-06, "loss": 0.6944, "step": 2823 }, { "epoch": 0.3678250683326825, "grad_norm": 3.9662795066833496, "learning_rate": 9.739744129657586e-06, "loss": 0.6217, "step": 2826 }, { "epoch": 0.3682155408043733, "grad_norm": 3.6376278400421143, "learning_rate": 9.739078261907537e-06, "loss": 0.7337, "step": 2829 }, { "epoch": 0.368606013276064, "grad_norm": 3.675191640853882, "learning_rate": 9.738411566250485e-06, "loss": 0.6678, "step": 2832 }, { "epoch": 0.3689964857477548, "grad_norm": 2.5543577671051025, "learning_rate": 9.7377440428029e-06, "loss": 0.577, "step": 2835 }, { "epoch": 0.3693869582194455, "grad_norm": 3.843170166015625, "learning_rate": 9.737075691681398e-06, "loss": 0.7626, "step": 2838 }, { "epoch": 0.3697774306911363, "grad_norm": 2.62917423248291, "learning_rate": 9.736406513002737e-06, "loss": 0.6669, "step": 2841 }, { "epoch": 0.370167903162827, "grad_norm": 2.891164541244507, "learning_rate": 9.735736506883822e-06, "loss": 0.6709, "step": 2844 }, { "epoch": 0.37055837563451777, "grad_norm": 2.4671757221221924, "learning_rate": 9.735065673441702e-06, "loss": 0.5747, "step": 2847 }, { "epoch": 0.37094884810620854, "grad_norm": 3.6108286380767822, "learning_rate": 9.73439401279357e-06, "loss": 0.6485, "step": 2850 }, { "epoch": 0.37133932057789926, "grad_norm": 2.4093971252441406, "learning_rate": 9.733721525056764e-06, "loss": 0.6907, "step": 2853 }, { "epoch": 0.37172979304959003, "grad_norm": 4.289933681488037, "learning_rate": 9.733048210348767e-06, "loss": 0.6965, "step": 2856 }, { "epoch": 0.37212026552128075, "grad_norm": 2.8142073154449463, "learning_rate": 9.732374068787202e-06, "loss": 0.7055, "step": 2859 }, { "epoch": 0.3725107379929715, "grad_norm": 2.733341693878174, "learning_rate": 9.731699100489845e-06, "loss": 0.7459, "step": 2862 }, { "epoch": 0.37290121046466224, "grad_norm": 3.4828221797943115, "learning_rate": 9.731023305574608e-06, "loss": 0.6431, "step": 2865 }, { "epoch": 0.373291682936353, "grad_norm": 2.436321258544922, "learning_rate": 9.730346684159553e-06, "loss": 0.6435, "step": 2868 }, { "epoch": 0.3736821554080437, "grad_norm": 2.3857946395874023, "learning_rate": 9.729669236362882e-06, "loss": 0.6081, "step": 2871 }, { "epoch": 0.3740726278797345, "grad_norm": 3.848266839981079, "learning_rate": 9.728990962302946e-06, "loss": 0.6367, "step": 2874 }, { "epoch": 0.3744631003514252, "grad_norm": 2.9143803119659424, "learning_rate": 9.72831186209824e-06, "loss": 0.6555, "step": 2877 }, { "epoch": 0.374853572823116, "grad_norm": 2.645496129989624, "learning_rate": 9.727631935867394e-06, "loss": 0.6999, "step": 2880 }, { "epoch": 0.3752440452948067, "grad_norm": 2.874675750732422, "learning_rate": 9.726951183729196e-06, "loss": 0.6976, "step": 2883 }, { "epoch": 0.3756345177664975, "grad_norm": 2.555006742477417, "learning_rate": 9.726269605802569e-06, "loss": 0.6693, "step": 2886 }, { "epoch": 0.3760249902381882, "grad_norm": 2.870241641998291, "learning_rate": 9.725587202206588e-06, "loss": 0.6434, "step": 2889 }, { "epoch": 0.37641546270987897, "grad_norm": 4.20858097076416, "learning_rate": 9.724903973060461e-06, "loss": 0.7726, "step": 2892 }, { "epoch": 0.3768059351815697, "grad_norm": 2.6733803749084473, "learning_rate": 9.72421991848355e-06, "loss": 0.7146, "step": 2895 }, { "epoch": 0.37719640765326046, "grad_norm": 2.6210622787475586, "learning_rate": 9.723535038595358e-06, "loss": 0.7285, "step": 2898 }, { "epoch": 0.3775868801249512, "grad_norm": 3.7322444915771484, "learning_rate": 9.722849333515532e-06, "loss": 0.7671, "step": 2901 }, { "epoch": 0.37797735259664195, "grad_norm": 3.78657603263855, "learning_rate": 9.722162803363863e-06, "loss": 0.6344, "step": 2904 }, { "epoch": 0.37836782506833266, "grad_norm": 3.663477659225464, "learning_rate": 9.721475448260286e-06, "loss": 0.6887, "step": 2907 }, { "epoch": 0.37875829754002344, "grad_norm": 2.8419249057769775, "learning_rate": 9.72078726832488e-06, "loss": 0.7098, "step": 2910 }, { "epoch": 0.37914877001171415, "grad_norm": 5.145716190338135, "learning_rate": 9.72009826367787e-06, "loss": 0.6945, "step": 2913 }, { "epoch": 0.3795392424834049, "grad_norm": 3.3549532890319824, "learning_rate": 9.719408434439623e-06, "loss": 0.6757, "step": 2916 }, { "epoch": 0.37992971495509564, "grad_norm": 2.720118284225464, "learning_rate": 9.71871778073065e-06, "loss": 0.7617, "step": 2919 }, { "epoch": 0.3803201874267864, "grad_norm": 2.3909714221954346, "learning_rate": 9.718026302671608e-06, "loss": 0.6084, "step": 2922 }, { "epoch": 0.38071065989847713, "grad_norm": 2.6086878776550293, "learning_rate": 9.717334000383297e-06, "loss": 0.6376, "step": 2925 }, { "epoch": 0.3811011323701679, "grad_norm": 2.780679702758789, "learning_rate": 9.716640873986658e-06, "loss": 0.712, "step": 2928 }, { "epoch": 0.3814916048418586, "grad_norm": 2.720557451248169, "learning_rate": 9.715946923602781e-06, "loss": 0.643, "step": 2931 }, { "epoch": 0.3818820773135494, "grad_norm": 5.16661262512207, "learning_rate": 9.715252149352898e-06, "loss": 0.705, "step": 2934 }, { "epoch": 0.38227254978524017, "grad_norm": 3.2445664405822754, "learning_rate": 9.714556551358385e-06, "loss": 0.6784, "step": 2937 }, { "epoch": 0.3826630222569309, "grad_norm": 3.182467460632324, "learning_rate": 9.713860129740759e-06, "loss": 0.714, "step": 2940 }, { "epoch": 0.38305349472862166, "grad_norm": 2.596245288848877, "learning_rate": 9.713162884621686e-06, "loss": 0.5894, "step": 2943 }, { "epoch": 0.3834439672003124, "grad_norm": 2.317925453186035, "learning_rate": 9.71246481612297e-06, "loss": 0.5575, "step": 2946 }, { "epoch": 0.38383443967200315, "grad_norm": 2.5028085708618164, "learning_rate": 9.711765924366567e-06, "loss": 0.6765, "step": 2949 }, { "epoch": 0.38422491214369386, "grad_norm": 2.46987247467041, "learning_rate": 9.711066209474568e-06, "loss": 0.7498, "step": 2952 }, { "epoch": 0.38461538461538464, "grad_norm": 2.608255386352539, "learning_rate": 9.710365671569214e-06, "loss": 0.718, "step": 2955 }, { "epoch": 0.38500585708707535, "grad_norm": 3.6582038402557373, "learning_rate": 9.709664310772884e-06, "loss": 0.7416, "step": 2958 }, { "epoch": 0.3853963295587661, "grad_norm": 3.149799108505249, "learning_rate": 9.708962127208105e-06, "loss": 0.746, "step": 2961 }, { "epoch": 0.38578680203045684, "grad_norm": 2.3268182277679443, "learning_rate": 9.708259120997551e-06, "loss": 0.5427, "step": 2964 }, { "epoch": 0.3861772745021476, "grad_norm": 2.4898290634155273, "learning_rate": 9.707555292264035e-06, "loss": 0.6849, "step": 2967 }, { "epoch": 0.38656774697383833, "grad_norm": 2.5410478115081787, "learning_rate": 9.706850641130512e-06, "loss": 0.6692, "step": 2970 }, { "epoch": 0.3869582194455291, "grad_norm": 2.798302173614502, "learning_rate": 9.706145167720082e-06, "loss": 0.5719, "step": 2973 }, { "epoch": 0.3873486919172198, "grad_norm": 3.7535929679870605, "learning_rate": 9.705438872155993e-06, "loss": 0.7151, "step": 2976 }, { "epoch": 0.3877391643889106, "grad_norm": 2.450899124145508, "learning_rate": 9.70473175456163e-06, "loss": 0.7171, "step": 2979 }, { "epoch": 0.3881296368606013, "grad_norm": 3.063359260559082, "learning_rate": 9.704023815060528e-06, "loss": 0.7145, "step": 2982 }, { "epoch": 0.3885201093322921, "grad_norm": 2.760772228240967, "learning_rate": 9.703315053776362e-06, "loss": 0.7893, "step": 2985 }, { "epoch": 0.3889105818039828, "grad_norm": 2.4443037509918213, "learning_rate": 9.702605470832948e-06, "loss": 0.6993, "step": 2988 }, { "epoch": 0.3893010542756736, "grad_norm": 2.651404619216919, "learning_rate": 9.701895066354255e-06, "loss": 0.6193, "step": 2991 }, { "epoch": 0.3896915267473643, "grad_norm": 3.2075984477996826, "learning_rate": 9.701183840464383e-06, "loss": 0.6428, "step": 2994 }, { "epoch": 0.39008199921905506, "grad_norm": 2.8586585521698, "learning_rate": 9.700471793287582e-06, "loss": 0.7017, "step": 2997 }, { "epoch": 0.3904724716907458, "grad_norm": 2.386969804763794, "learning_rate": 9.69975892494825e-06, "loss": 0.5862, "step": 3000 }, { "epoch": 0.39086294416243655, "grad_norm": 2.74922776222229, "learning_rate": 9.69904523557092e-06, "loss": 0.6879, "step": 3003 }, { "epoch": 0.39125341663412727, "grad_norm": 2.8284764289855957, "learning_rate": 9.698330725280271e-06, "loss": 0.6019, "step": 3006 }, { "epoch": 0.39164388910581804, "grad_norm": 4.191535472869873, "learning_rate": 9.69761539420113e-06, "loss": 0.6605, "step": 3009 }, { "epoch": 0.39203436157750876, "grad_norm": 2.883096218109131, "learning_rate": 9.696899242458462e-06, "loss": 0.7188, "step": 3012 }, { "epoch": 0.39242483404919953, "grad_norm": 4.879593372344971, "learning_rate": 9.696182270177377e-06, "loss": 0.6858, "step": 3015 }, { "epoch": 0.39281530652089025, "grad_norm": 3.190108060836792, "learning_rate": 9.695464477483127e-06, "loss": 0.7377, "step": 3018 }, { "epoch": 0.393205778992581, "grad_norm": 2.7659029960632324, "learning_rate": 9.694745864501113e-06, "loss": 0.6033, "step": 3021 }, { "epoch": 0.3935962514642718, "grad_norm": 2.5670714378356934, "learning_rate": 9.694026431356872e-06, "loss": 0.6231, "step": 3024 }, { "epoch": 0.3939867239359625, "grad_norm": 2.614166498184204, "learning_rate": 9.693306178176086e-06, "loss": 0.696, "step": 3027 }, { "epoch": 0.3943771964076533, "grad_norm": 2.583684206008911, "learning_rate": 9.692585105084588e-06, "loss": 0.6598, "step": 3030 }, { "epoch": 0.394767668879344, "grad_norm": 2.6008903980255127, "learning_rate": 9.691863212208342e-06, "loss": 0.6207, "step": 3033 }, { "epoch": 0.3951581413510348, "grad_norm": 2.974407196044922, "learning_rate": 9.691140499673462e-06, "loss": 0.6982, "step": 3036 }, { "epoch": 0.3955486138227255, "grad_norm": 3.109865665435791, "learning_rate": 9.690416967606207e-06, "loss": 0.8394, "step": 3039 }, { "epoch": 0.39593908629441626, "grad_norm": 2.6200222969055176, "learning_rate": 9.689692616132975e-06, "loss": 0.6467, "step": 3042 }, { "epoch": 0.396329558766107, "grad_norm": 2.555178642272949, "learning_rate": 9.688967445380306e-06, "loss": 0.7058, "step": 3045 }, { "epoch": 0.39672003123779775, "grad_norm": 2.755859375, "learning_rate": 9.688241455474892e-06, "loss": 0.637, "step": 3048 }, { "epoch": 0.39711050370948847, "grad_norm": 2.5034401416778564, "learning_rate": 9.687514646543557e-06, "loss": 0.7212, "step": 3051 }, { "epoch": 0.39750097618117924, "grad_norm": 3.507744550704956, "learning_rate": 9.686787018713273e-06, "loss": 0.6072, "step": 3054 }, { "epoch": 0.39789144865286996, "grad_norm": 3.1820497512817383, "learning_rate": 9.686058572111157e-06, "loss": 0.7151, "step": 3057 }, { "epoch": 0.39828192112456073, "grad_norm": 2.979086399078369, "learning_rate": 9.685329306864468e-06, "loss": 0.8117, "step": 3060 }, { "epoch": 0.39867239359625145, "grad_norm": 2.7010393142700195, "learning_rate": 9.684599223100604e-06, "loss": 0.6791, "step": 3063 }, { "epoch": 0.3990628660679422, "grad_norm": 2.8512115478515625, "learning_rate": 9.68386832094711e-06, "loss": 0.6964, "step": 3066 }, { "epoch": 0.39945333853963294, "grad_norm": 2.7704756259918213, "learning_rate": 9.683136600531674e-06, "loss": 0.7149, "step": 3069 }, { "epoch": 0.3998438110113237, "grad_norm": 2.513730764389038, "learning_rate": 9.682404061982129e-06, "loss": 0.6607, "step": 3072 }, { "epoch": 0.40023428348301443, "grad_norm": 2.799468517303467, "learning_rate": 9.681670705426442e-06, "loss": 0.6806, "step": 3075 }, { "epoch": 0.4006247559547052, "grad_norm": 4.188741683959961, "learning_rate": 9.680936530992731e-06, "loss": 0.5816, "step": 3078 }, { "epoch": 0.4010152284263959, "grad_norm": 2.830998659133911, "learning_rate": 9.680201538809257e-06, "loss": 0.6774, "step": 3081 }, { "epoch": 0.4014057008980867, "grad_norm": 2.5481221675872803, "learning_rate": 9.679465729004419e-06, "loss": 0.5939, "step": 3084 }, { "epoch": 0.4017961733697774, "grad_norm": 2.98976469039917, "learning_rate": 9.678729101706763e-06, "loss": 0.6127, "step": 3087 }, { "epoch": 0.4021866458414682, "grad_norm": 3.0015671253204346, "learning_rate": 9.677991657044973e-06, "loss": 0.6858, "step": 3090 }, { "epoch": 0.4025771183131589, "grad_norm": 3.737126350402832, "learning_rate": 9.677253395147886e-06, "loss": 0.7177, "step": 3093 }, { "epoch": 0.40296759078484967, "grad_norm": 2.3965842723846436, "learning_rate": 9.676514316144468e-06, "loss": 0.6822, "step": 3096 }, { "epoch": 0.4033580632565404, "grad_norm": 2.6666600704193115, "learning_rate": 9.675774420163835e-06, "loss": 0.6958, "step": 3099 }, { "epoch": 0.40374853572823116, "grad_norm": 3.0011026859283447, "learning_rate": 9.675033707335249e-06, "loss": 0.7197, "step": 3102 }, { "epoch": 0.4041390081999219, "grad_norm": 2.5443384647369385, "learning_rate": 9.674292177788109e-06, "loss": 0.6865, "step": 3105 }, { "epoch": 0.40452948067161265, "grad_norm": 2.5568745136260986, "learning_rate": 9.67354983165196e-06, "loss": 0.624, "step": 3108 }, { "epoch": 0.4049199531433034, "grad_norm": 4.514736652374268, "learning_rate": 9.672806669056486e-06, "loss": 0.6752, "step": 3111 }, { "epoch": 0.40531042561499414, "grad_norm": 2.690507650375366, "learning_rate": 9.672062690131516e-06, "loss": 0.7393, "step": 3114 }, { "epoch": 0.4057008980866849, "grad_norm": 3.8276305198669434, "learning_rate": 9.671317895007025e-06, "loss": 0.6249, "step": 3117 }, { "epoch": 0.40609137055837563, "grad_norm": 2.517906427383423, "learning_rate": 9.670572283813123e-06, "loss": 0.5731, "step": 3120 }, { "epoch": 0.4064818430300664, "grad_norm": 2.6286120414733887, "learning_rate": 9.669825856680068e-06, "loss": 0.6554, "step": 3123 }, { "epoch": 0.4068723155017571, "grad_norm": 2.6419596672058105, "learning_rate": 9.669078613738263e-06, "loss": 0.6567, "step": 3126 }, { "epoch": 0.4072627879734479, "grad_norm": 2.7058355808258057, "learning_rate": 9.668330555118243e-06, "loss": 0.8195, "step": 3129 }, { "epoch": 0.4076532604451386, "grad_norm": 2.805713176727295, "learning_rate": 9.667581680950698e-06, "loss": 0.7018, "step": 3132 }, { "epoch": 0.4080437329168294, "grad_norm": 2.345257520675659, "learning_rate": 9.66683199136645e-06, "loss": 0.6237, "step": 3135 }, { "epoch": 0.4084342053885201, "grad_norm": 2.4524874687194824, "learning_rate": 9.666081486496472e-06, "loss": 0.6889, "step": 3138 }, { "epoch": 0.40882467786021087, "grad_norm": 2.8738114833831787, "learning_rate": 9.665330166471875e-06, "loss": 0.6038, "step": 3141 }, { "epoch": 0.4092151503319016, "grad_norm": 3.0100152492523193, "learning_rate": 9.664578031423913e-06, "loss": 0.6482, "step": 3144 }, { "epoch": 0.40960562280359236, "grad_norm": 2.748427391052246, "learning_rate": 9.663825081483979e-06, "loss": 0.6699, "step": 3147 }, { "epoch": 0.4099960952752831, "grad_norm": 2.4630000591278076, "learning_rate": 9.663071316783617e-06, "loss": 0.6064, "step": 3150 }, { "epoch": 0.41038656774697385, "grad_norm": 3.131740093231201, "learning_rate": 9.662316737454505e-06, "loss": 0.6362, "step": 3153 }, { "epoch": 0.41077704021866457, "grad_norm": 2.545337200164795, "learning_rate": 9.661561343628465e-06, "loss": 0.6804, "step": 3156 }, { "epoch": 0.41116751269035534, "grad_norm": 2.7401156425476074, "learning_rate": 9.660805135437468e-06, "loss": 0.679, "step": 3159 }, { "epoch": 0.41155798516204606, "grad_norm": 2.6069376468658447, "learning_rate": 9.660048113013616e-06, "loss": 0.6729, "step": 3162 }, { "epoch": 0.41194845763373683, "grad_norm": 2.470309257507324, "learning_rate": 9.659290276489164e-06, "loss": 0.6852, "step": 3165 }, { "epoch": 0.41233893010542755, "grad_norm": 2.4761130809783936, "learning_rate": 9.658531625996502e-06, "loss": 0.6221, "step": 3168 }, { "epoch": 0.4127294025771183, "grad_norm": 2.603297472000122, "learning_rate": 9.657772161668164e-06, "loss": 0.645, "step": 3171 }, { "epoch": 0.41311987504880904, "grad_norm": 5.437543869018555, "learning_rate": 9.657011883636828e-06, "loss": 0.7422, "step": 3174 }, { "epoch": 0.4135103475204998, "grad_norm": 2.724668025970459, "learning_rate": 9.656250792035314e-06, "loss": 0.6882, "step": 3177 }, { "epoch": 0.4139008199921905, "grad_norm": 2.5859644412994385, "learning_rate": 9.655488886996582e-06, "loss": 0.5831, "step": 3180 }, { "epoch": 0.4142912924638813, "grad_norm": 2.3966543674468994, "learning_rate": 9.654726168653733e-06, "loss": 0.5881, "step": 3183 }, { "epoch": 0.414681764935572, "grad_norm": 2.52030611038208, "learning_rate": 9.653962637140016e-06, "loss": 0.6998, "step": 3186 }, { "epoch": 0.4150722374072628, "grad_norm": 2.5317649841308594, "learning_rate": 9.653198292588816e-06, "loss": 0.7423, "step": 3189 }, { "epoch": 0.4154627098789535, "grad_norm": 2.806925058364868, "learning_rate": 9.652433135133666e-06, "loss": 0.7737, "step": 3192 }, { "epoch": 0.4158531823506443, "grad_norm": 2.7214372158050537, "learning_rate": 9.651667164908232e-06, "loss": 0.6412, "step": 3195 }, { "epoch": 0.41624365482233505, "grad_norm": 3.378222942352295, "learning_rate": 9.65090038204633e-06, "loss": 0.5841, "step": 3198 }, { "epoch": 0.41663412729402577, "grad_norm": 5.052935600280762, "learning_rate": 9.650132786681916e-06, "loss": 0.6651, "step": 3201 }, { "epoch": 0.41702459976571654, "grad_norm": 3.4297244548797607, "learning_rate": 9.649364378949087e-06, "loss": 0.6839, "step": 3204 }, { "epoch": 0.41741507223740726, "grad_norm": 3.513136148452759, "learning_rate": 9.64859515898208e-06, "loss": 0.727, "step": 3207 }, { "epoch": 0.41780554470909803, "grad_norm": 3.3228838443756104, "learning_rate": 9.64782512691528e-06, "loss": 0.7235, "step": 3210 }, { "epoch": 0.41819601718078875, "grad_norm": 3.5547192096710205, "learning_rate": 9.647054282883207e-06, "loss": 0.7343, "step": 3213 }, { "epoch": 0.4185864896524795, "grad_norm": 2.559657096862793, "learning_rate": 9.646282627020527e-06, "loss": 0.5984, "step": 3216 }, { "epoch": 0.41897696212417024, "grad_norm": 2.8096330165863037, "learning_rate": 9.645510159462047e-06, "loss": 0.7274, "step": 3219 }, { "epoch": 0.419367434595861, "grad_norm": 2.950134038925171, "learning_rate": 9.644736880342714e-06, "loss": 0.6025, "step": 3222 }, { "epoch": 0.4197579070675517, "grad_norm": 2.600390911102295, "learning_rate": 9.643962789797619e-06, "loss": 0.7445, "step": 3225 }, { "epoch": 0.4201483795392425, "grad_norm": 2.848917245864868, "learning_rate": 9.643187887961993e-06, "loss": 0.6197, "step": 3228 }, { "epoch": 0.4205388520109332, "grad_norm": 2.5057928562164307, "learning_rate": 9.642412174971214e-06, "loss": 0.7009, "step": 3231 }, { "epoch": 0.420929324482624, "grad_norm": 3.8111495971679688, "learning_rate": 9.641635650960792e-06, "loss": 0.589, "step": 3234 }, { "epoch": 0.4213197969543147, "grad_norm": 4.075084686279297, "learning_rate": 9.640858316066387e-06, "loss": 0.6813, "step": 3237 }, { "epoch": 0.4217102694260055, "grad_norm": 3.2589192390441895, "learning_rate": 9.640080170423796e-06, "loss": 0.6352, "step": 3240 }, { "epoch": 0.4221007418976962, "grad_norm": 3.0924248695373535, "learning_rate": 9.63930121416896e-06, "loss": 0.581, "step": 3243 }, { "epoch": 0.42249121436938697, "grad_norm": 3.0321390628814697, "learning_rate": 9.638521447437965e-06, "loss": 0.7448, "step": 3246 }, { "epoch": 0.4228816868410777, "grad_norm": 2.8482680320739746, "learning_rate": 9.637740870367028e-06, "loss": 0.7065, "step": 3249 }, { "epoch": 0.42327215931276846, "grad_norm": 2.815291404724121, "learning_rate": 9.636959483092518e-06, "loss": 0.5734, "step": 3252 }, { "epoch": 0.4236626317844592, "grad_norm": 2.5750129222869873, "learning_rate": 9.636177285750942e-06, "loss": 0.6645, "step": 3255 }, { "epoch": 0.42405310425614995, "grad_norm": 2.154196262359619, "learning_rate": 9.635394278478947e-06, "loss": 0.6641, "step": 3258 }, { "epoch": 0.42444357672784067, "grad_norm": 2.7113664150238037, "learning_rate": 9.634610461413322e-06, "loss": 0.6695, "step": 3261 }, { "epoch": 0.42483404919953144, "grad_norm": 2.7851827144622803, "learning_rate": 9.633825834691e-06, "loss": 0.6974, "step": 3264 }, { "epoch": 0.42522452167122216, "grad_norm": 2.6589601039886475, "learning_rate": 9.633040398449052e-06, "loss": 0.7072, "step": 3267 }, { "epoch": 0.42561499414291293, "grad_norm": 2.8155033588409424, "learning_rate": 9.632254152824693e-06, "loss": 0.652, "step": 3270 }, { "epoch": 0.42600546661460365, "grad_norm": 3.5873613357543945, "learning_rate": 9.63146709795528e-06, "loss": 0.6695, "step": 3273 }, { "epoch": 0.4263959390862944, "grad_norm": 2.7894959449768066, "learning_rate": 9.630679233978308e-06, "loss": 0.6908, "step": 3276 }, { "epoch": 0.42678641155798513, "grad_norm": 2.545112371444702, "learning_rate": 9.629890561031414e-06, "loss": 0.6758, "step": 3279 }, { "epoch": 0.4271768840296759, "grad_norm": 2.591367483139038, "learning_rate": 9.629101079252379e-06, "loss": 0.6303, "step": 3282 }, { "epoch": 0.4275673565013667, "grad_norm": 2.6541688442230225, "learning_rate": 9.628310788779125e-06, "loss": 0.728, "step": 3285 }, { "epoch": 0.4279578289730574, "grad_norm": 2.391240358352661, "learning_rate": 9.627519689749711e-06, "loss": 0.6469, "step": 3288 }, { "epoch": 0.42834830144474817, "grad_norm": 2.4447021484375, "learning_rate": 9.626727782302343e-06, "loss": 0.6447, "step": 3291 }, { "epoch": 0.4287387739164389, "grad_norm": 2.9437851905822754, "learning_rate": 9.625935066575364e-06, "loss": 0.7597, "step": 3294 }, { "epoch": 0.42912924638812966, "grad_norm": 2.9596848487854004, "learning_rate": 9.625141542707261e-06, "loss": 0.7102, "step": 3297 }, { "epoch": 0.4295197188598204, "grad_norm": 3.0737996101379395, "learning_rate": 9.62434721083666e-06, "loss": 0.597, "step": 3300 }, { "epoch": 0.42991019133151115, "grad_norm": 2.6365230083465576, "learning_rate": 9.62355207110233e-06, "loss": 0.6877, "step": 3303 }, { "epoch": 0.43030066380320187, "grad_norm": 3.259915351867676, "learning_rate": 9.62275612364318e-06, "loss": 0.6127, "step": 3306 }, { "epoch": 0.43069113627489264, "grad_norm": 2.5275659561157227, "learning_rate": 9.621959368598259e-06, "loss": 0.5959, "step": 3309 }, { "epoch": 0.43108160874658336, "grad_norm": 2.8045878410339355, "learning_rate": 9.62116180610676e-06, "loss": 0.582, "step": 3312 }, { "epoch": 0.43147208121827413, "grad_norm": 2.7796387672424316, "learning_rate": 9.620363436308015e-06, "loss": 0.714, "step": 3315 }, { "epoch": 0.43186255368996485, "grad_norm": 2.583375930786133, "learning_rate": 9.619564259341497e-06, "loss": 0.7435, "step": 3318 }, { "epoch": 0.4322530261616556, "grad_norm": 2.662533760070801, "learning_rate": 9.618764275346821e-06, "loss": 0.6005, "step": 3321 }, { "epoch": 0.43264349863334634, "grad_norm": 2.9329593181610107, "learning_rate": 9.617963484463744e-06, "loss": 0.6643, "step": 3324 }, { "epoch": 0.4330339711050371, "grad_norm": 2.6769914627075195, "learning_rate": 9.61716188683216e-06, "loss": 0.745, "step": 3327 }, { "epoch": 0.4334244435767278, "grad_norm": 2.9171302318573, "learning_rate": 9.616359482592108e-06, "loss": 0.7305, "step": 3330 }, { "epoch": 0.4338149160484186, "grad_norm": 3.290548086166382, "learning_rate": 9.615556271883766e-06, "loss": 0.7274, "step": 3333 }, { "epoch": 0.4342053885201093, "grad_norm": 2.999380588531494, "learning_rate": 9.61475225484745e-06, "loss": 0.628, "step": 3336 }, { "epoch": 0.4345958609918001, "grad_norm": 2.652371644973755, "learning_rate": 9.613947431623627e-06, "loss": 0.6668, "step": 3339 }, { "epoch": 0.4349863334634908, "grad_norm": 2.5840189456939697, "learning_rate": 9.613141802352893e-06, "loss": 0.664, "step": 3342 }, { "epoch": 0.4353768059351816, "grad_norm": 2.5839924812316895, "learning_rate": 9.61233536717599e-06, "loss": 0.7491, "step": 3345 }, { "epoch": 0.4357672784068723, "grad_norm": 2.6329665184020996, "learning_rate": 9.6115281262338e-06, "loss": 0.6435, "step": 3348 }, { "epoch": 0.43615775087856307, "grad_norm": 2.31581974029541, "learning_rate": 9.61072007966735e-06, "loss": 0.6642, "step": 3351 }, { "epoch": 0.4365482233502538, "grad_norm": 3.6810152530670166, "learning_rate": 9.609911227617802e-06, "loss": 0.677, "step": 3354 }, { "epoch": 0.43693869582194456, "grad_norm": 2.4327340126037598, "learning_rate": 9.609101570226458e-06, "loss": 0.6408, "step": 3357 }, { "epoch": 0.4373291682936353, "grad_norm": 2.6379623413085938, "learning_rate": 9.608291107634767e-06, "loss": 0.6083, "step": 3360 }, { "epoch": 0.43771964076532605, "grad_norm": 2.7450592517852783, "learning_rate": 9.607479839984313e-06, "loss": 0.6663, "step": 3363 }, { "epoch": 0.43811011323701676, "grad_norm": 2.3480405807495117, "learning_rate": 9.606667767416824e-06, "loss": 0.5952, "step": 3366 }, { "epoch": 0.43850058570870754, "grad_norm": 2.8158650398254395, "learning_rate": 9.605854890074169e-06, "loss": 0.745, "step": 3369 }, { "epoch": 0.4388910581803983, "grad_norm": 2.310330390930176, "learning_rate": 9.60504120809835e-06, "loss": 0.5277, "step": 3372 }, { "epoch": 0.439281530652089, "grad_norm": 2.6338181495666504, "learning_rate": 9.604226721631525e-06, "loss": 0.6792, "step": 3375 }, { "epoch": 0.4396720031237798, "grad_norm": 2.5001606941223145, "learning_rate": 9.603411430815974e-06, "loss": 0.61, "step": 3378 }, { "epoch": 0.4400624755954705, "grad_norm": 2.8085408210754395, "learning_rate": 9.60259533579413e-06, "loss": 0.6936, "step": 3381 }, { "epoch": 0.4404529480671613, "grad_norm": 2.701272964477539, "learning_rate": 9.601778436708564e-06, "loss": 0.584, "step": 3384 }, { "epoch": 0.440843420538852, "grad_norm": 2.5402166843414307, "learning_rate": 9.600960733701988e-06, "loss": 0.6306, "step": 3387 }, { "epoch": 0.4412338930105428, "grad_norm": 2.4810187816619873, "learning_rate": 9.600142226917248e-06, "loss": 0.6259, "step": 3390 }, { "epoch": 0.4416243654822335, "grad_norm": 2.939995765686035, "learning_rate": 9.599322916497338e-06, "loss": 0.6119, "step": 3393 }, { "epoch": 0.44201483795392427, "grad_norm": 2.663740873336792, "learning_rate": 9.598502802585392e-06, "loss": 0.6582, "step": 3396 }, { "epoch": 0.442405310425615, "grad_norm": 2.5204548835754395, "learning_rate": 9.597681885324679e-06, "loss": 0.7289, "step": 3399 }, { "epoch": 0.44279578289730576, "grad_norm": 2.44008207321167, "learning_rate": 9.596860164858612e-06, "loss": 0.5731, "step": 3402 }, { "epoch": 0.4431862553689965, "grad_norm": 2.584868907928467, "learning_rate": 9.596037641330746e-06, "loss": 0.6289, "step": 3405 }, { "epoch": 0.44357672784068725, "grad_norm": 3.060908079147339, "learning_rate": 9.595214314884773e-06, "loss": 0.7144, "step": 3408 }, { "epoch": 0.44396720031237796, "grad_norm": 2.6084563732147217, "learning_rate": 9.594390185664526e-06, "loss": 0.6185, "step": 3411 }, { "epoch": 0.44435767278406874, "grad_norm": 2.7041709423065186, "learning_rate": 9.593565253813978e-06, "loss": 0.6793, "step": 3414 }, { "epoch": 0.44474814525575945, "grad_norm": 2.9432315826416016, "learning_rate": 9.592739519477243e-06, "loss": 0.6493, "step": 3417 }, { "epoch": 0.4451386177274502, "grad_norm": 2.6161787509918213, "learning_rate": 9.591912982798576e-06, "loss": 0.6717, "step": 3420 }, { "epoch": 0.44552909019914094, "grad_norm": 3.7241930961608887, "learning_rate": 9.591085643922372e-06, "loss": 0.7395, "step": 3423 }, { "epoch": 0.4459195626708317, "grad_norm": 2.6377596855163574, "learning_rate": 9.590257502993164e-06, "loss": 0.733, "step": 3426 }, { "epoch": 0.44631003514252243, "grad_norm": 2.6171798706054688, "learning_rate": 9.589428560155627e-06, "loss": 0.6539, "step": 3429 }, { "epoch": 0.4467005076142132, "grad_norm": 3.0598702430725098, "learning_rate": 9.588598815554573e-06, "loss": 0.7271, "step": 3432 }, { "epoch": 0.4470909800859039, "grad_norm": 2.866503953933716, "learning_rate": 9.587768269334962e-06, "loss": 0.6864, "step": 3435 }, { "epoch": 0.4474814525575947, "grad_norm": 2.591628074645996, "learning_rate": 9.586936921641884e-06, "loss": 0.6335, "step": 3438 }, { "epoch": 0.4478719250292854, "grad_norm": 2.506284236907959, "learning_rate": 9.586104772620575e-06, "loss": 0.6376, "step": 3441 }, { "epoch": 0.4482623975009762, "grad_norm": 2.7341339588165283, "learning_rate": 9.585271822416412e-06, "loss": 0.7072, "step": 3444 }, { "epoch": 0.4486528699726669, "grad_norm": 2.406806230545044, "learning_rate": 9.584438071174904e-06, "loss": 0.5989, "step": 3447 }, { "epoch": 0.4490433424443577, "grad_norm": 2.4429965019226074, "learning_rate": 9.583603519041713e-06, "loss": 0.6158, "step": 3450 }, { "epoch": 0.4494338149160484, "grad_norm": 2.7472376823425293, "learning_rate": 9.582768166162628e-06, "loss": 0.6195, "step": 3453 }, { "epoch": 0.44982428738773916, "grad_norm": 2.816849946975708, "learning_rate": 9.581932012683583e-06, "loss": 0.7533, "step": 3456 }, { "epoch": 0.45021475985942994, "grad_norm": 2.6621181964874268, "learning_rate": 9.581095058750658e-06, "loss": 0.6995, "step": 3459 }, { "epoch": 0.45060523233112065, "grad_norm": 2.8741328716278076, "learning_rate": 9.580257304510062e-06, "loss": 0.6234, "step": 3462 }, { "epoch": 0.4509957048028114, "grad_norm": 2.814319372177124, "learning_rate": 9.579418750108149e-06, "loss": 0.7944, "step": 3465 }, { "epoch": 0.45138617727450214, "grad_norm": 2.745290517807007, "learning_rate": 9.578579395691417e-06, "loss": 0.6016, "step": 3468 }, { "epoch": 0.4517766497461929, "grad_norm": 4.0121541023254395, "learning_rate": 9.577739241406494e-06, "loss": 0.6709, "step": 3471 }, { "epoch": 0.45216712221788363, "grad_norm": 2.6668434143066406, "learning_rate": 9.576898287400155e-06, "loss": 0.6304, "step": 3474 }, { "epoch": 0.4525575946895744, "grad_norm": 2.569227933883667, "learning_rate": 9.576056533819316e-06, "loss": 0.6098, "step": 3477 }, { "epoch": 0.4529480671612651, "grad_norm": 2.43481183052063, "learning_rate": 9.575213980811027e-06, "loss": 0.6843, "step": 3480 }, { "epoch": 0.4533385396329559, "grad_norm": 2.772892713546753, "learning_rate": 9.57437062852248e-06, "loss": 0.6885, "step": 3483 }, { "epoch": 0.4537290121046466, "grad_norm": 2.392486333847046, "learning_rate": 9.573526477101006e-06, "loss": 0.6022, "step": 3486 }, { "epoch": 0.4541194845763374, "grad_norm": 2.679281234741211, "learning_rate": 9.572681526694079e-06, "loss": 0.6724, "step": 3489 }, { "epoch": 0.4545099570480281, "grad_norm": 2.970829725265503, "learning_rate": 9.571835777449307e-06, "loss": 0.6936, "step": 3492 }, { "epoch": 0.4549004295197189, "grad_norm": 2.3695054054260254, "learning_rate": 9.570989229514445e-06, "loss": 0.5728, "step": 3495 }, { "epoch": 0.4552909019914096, "grad_norm": 2.7799527645111084, "learning_rate": 9.57014188303738e-06, "loss": 0.6046, "step": 3498 }, { "epoch": 0.45568137446310036, "grad_norm": 2.7889013290405273, "learning_rate": 9.569293738166141e-06, "loss": 0.7424, "step": 3501 }, { "epoch": 0.4560718469347911, "grad_norm": 4.006762504577637, "learning_rate": 9.568444795048899e-06, "loss": 0.62, "step": 3504 }, { "epoch": 0.45646231940648185, "grad_norm": 2.510829448699951, "learning_rate": 9.567595053833963e-06, "loss": 0.6677, "step": 3507 }, { "epoch": 0.45685279187817257, "grad_norm": 2.6628668308258057, "learning_rate": 9.566744514669777e-06, "loss": 0.6483, "step": 3510 }, { "epoch": 0.45724326434986334, "grad_norm": 2.8696441650390625, "learning_rate": 9.565893177704934e-06, "loss": 0.6337, "step": 3513 }, { "epoch": 0.45763373682155406, "grad_norm": 2.641186237335205, "learning_rate": 9.565041043088157e-06, "loss": 0.6788, "step": 3516 }, { "epoch": 0.45802420929324483, "grad_norm": 3.316789150238037, "learning_rate": 9.564188110968314e-06, "loss": 0.6791, "step": 3519 }, { "epoch": 0.45841468176493555, "grad_norm": 2.8417608737945557, "learning_rate": 9.563334381494409e-06, "loss": 0.6841, "step": 3522 }, { "epoch": 0.4588051542366263, "grad_norm": 3.1960368156433105, "learning_rate": 9.562479854815587e-06, "loss": 0.6391, "step": 3525 }, { "epoch": 0.45919562670831704, "grad_norm": 2.585097074508667, "learning_rate": 9.561624531081132e-06, "loss": 0.7299, "step": 3528 }, { "epoch": 0.4595860991800078, "grad_norm": 2.7368311882019043, "learning_rate": 9.56076841044047e-06, "loss": 0.6428, "step": 3531 }, { "epoch": 0.45997657165169853, "grad_norm": 2.4325826168060303, "learning_rate": 9.559911493043159e-06, "loss": 0.617, "step": 3534 }, { "epoch": 0.4603670441233893, "grad_norm": 2.53668212890625, "learning_rate": 9.559053779038903e-06, "loss": 0.6855, "step": 3537 }, { "epoch": 0.4607575165950801, "grad_norm": 3.207590341567993, "learning_rate": 9.558195268577543e-06, "loss": 0.6649, "step": 3540 }, { "epoch": 0.4611479890667708, "grad_norm": 2.7838118076324463, "learning_rate": 9.557335961809059e-06, "loss": 0.6823, "step": 3543 }, { "epoch": 0.46153846153846156, "grad_norm": 2.8149774074554443, "learning_rate": 9.556475858883569e-06, "loss": 0.6601, "step": 3546 }, { "epoch": 0.4619289340101523, "grad_norm": 2.708364725112915, "learning_rate": 9.555614959951333e-06, "loss": 0.7265, "step": 3549 }, { "epoch": 0.46231940648184305, "grad_norm": 3.0776867866516113, "learning_rate": 9.554753265162746e-06, "loss": 0.573, "step": 3552 }, { "epoch": 0.46270987895353377, "grad_norm": 2.5604937076568604, "learning_rate": 9.553890774668347e-06, "loss": 0.6187, "step": 3555 }, { "epoch": 0.46310035142522454, "grad_norm": 3.412839651107788, "learning_rate": 9.553027488618806e-06, "loss": 0.6175, "step": 3558 }, { "epoch": 0.46349082389691526, "grad_norm": 2.88525128364563, "learning_rate": 9.552163407164945e-06, "loss": 0.7192, "step": 3561 }, { "epoch": 0.46388129636860603, "grad_norm": 3.187467098236084, "learning_rate": 9.551298530457711e-06, "loss": 0.6381, "step": 3564 }, { "epoch": 0.46427176884029675, "grad_norm": 2.5330684185028076, "learning_rate": 9.5504328586482e-06, "loss": 0.7419, "step": 3567 }, { "epoch": 0.4646622413119875, "grad_norm": 3.115802049636841, "learning_rate": 9.54956639188764e-06, "loss": 0.6623, "step": 3570 }, { "epoch": 0.46505271378367824, "grad_norm": 2.369229555130005, "learning_rate": 9.548699130327401e-06, "loss": 0.6891, "step": 3573 }, { "epoch": 0.465443186255369, "grad_norm": 3.0128095149993896, "learning_rate": 9.547831074118995e-06, "loss": 0.6068, "step": 3576 }, { "epoch": 0.46583365872705973, "grad_norm": 2.500648021697998, "learning_rate": 9.546962223414067e-06, "loss": 0.7091, "step": 3579 }, { "epoch": 0.4662241311987505, "grad_norm": 2.3269920349121094, "learning_rate": 9.546092578364403e-06, "loss": 0.6648, "step": 3582 }, { "epoch": 0.4666146036704412, "grad_norm": 2.717773675918579, "learning_rate": 9.545222139121931e-06, "loss": 0.5747, "step": 3585 }, { "epoch": 0.467005076142132, "grad_norm": 3.3133246898651123, "learning_rate": 9.544350905838712e-06, "loss": 0.6512, "step": 3588 }, { "epoch": 0.4673955486138227, "grad_norm": 2.5333547592163086, "learning_rate": 9.54347887866695e-06, "loss": 0.5804, "step": 3591 }, { "epoch": 0.4677860210855135, "grad_norm": 2.5862746238708496, "learning_rate": 9.542606057758986e-06, "loss": 0.6361, "step": 3594 }, { "epoch": 0.4681764935572042, "grad_norm": 2.544423818588257, "learning_rate": 9.5417324432673e-06, "loss": 0.6235, "step": 3597 }, { "epoch": 0.46856696602889497, "grad_norm": 2.701850652694702, "learning_rate": 9.540858035344509e-06, "loss": 0.725, "step": 3600 }, { "epoch": 0.4689574385005857, "grad_norm": 2.778111457824707, "learning_rate": 9.539982834143373e-06, "loss": 0.5713, "step": 3603 }, { "epoch": 0.46934791097227646, "grad_norm": 4.258044719696045, "learning_rate": 9.539106839816787e-06, "loss": 0.7481, "step": 3606 }, { "epoch": 0.4697383834439672, "grad_norm": 4.864741325378418, "learning_rate": 9.538230052517786e-06, "loss": 0.7349, "step": 3609 }, { "epoch": 0.47012885591565795, "grad_norm": 2.560973882675171, "learning_rate": 9.53735247239954e-06, "loss": 0.5515, "step": 3612 }, { "epoch": 0.47051932838734867, "grad_norm": 2.455519676208496, "learning_rate": 9.536474099615362e-06, "loss": 0.6881, "step": 3615 }, { "epoch": 0.47090980085903944, "grad_norm": 2.9638755321502686, "learning_rate": 9.535594934318702e-06, "loss": 0.6421, "step": 3618 }, { "epoch": 0.47130027333073016, "grad_norm": 3.095149517059326, "learning_rate": 9.53471497666315e-06, "loss": 0.6678, "step": 3621 }, { "epoch": 0.47169074580242093, "grad_norm": 3.0638108253479004, "learning_rate": 9.533834226802431e-06, "loss": 0.6058, "step": 3624 }, { "epoch": 0.4720812182741117, "grad_norm": 2.333308696746826, "learning_rate": 9.53295268489041e-06, "loss": 0.5788, "step": 3627 }, { "epoch": 0.4724716907458024, "grad_norm": 3.2323508262634277, "learning_rate": 9.53207035108109e-06, "loss": 0.7214, "step": 3630 }, { "epoch": 0.4728621632174932, "grad_norm": 2.5078964233398438, "learning_rate": 9.531187225528615e-06, "loss": 0.7241, "step": 3633 }, { "epoch": 0.4732526356891839, "grad_norm": 2.5901105403900146, "learning_rate": 9.530303308387263e-06, "loss": 0.707, "step": 3636 }, { "epoch": 0.4736431081608747, "grad_norm": 2.5510504245758057, "learning_rate": 9.529418599811455e-06, "loss": 0.7274, "step": 3639 }, { "epoch": 0.4740335806325654, "grad_norm": 2.424675703048706, "learning_rate": 9.528533099955745e-06, "loss": 0.6885, "step": 3642 }, { "epoch": 0.47442405310425617, "grad_norm": 3.0174453258514404, "learning_rate": 9.527646808974828e-06, "loss": 0.6603, "step": 3645 }, { "epoch": 0.4748145255759469, "grad_norm": 2.4409093856811523, "learning_rate": 9.52675972702354e-06, "loss": 0.684, "step": 3648 }, { "epoch": 0.47520499804763766, "grad_norm": 2.731062889099121, "learning_rate": 9.52587185425685e-06, "loss": 0.7147, "step": 3651 }, { "epoch": 0.4755954705193284, "grad_norm": 2.9121267795562744, "learning_rate": 9.524983190829868e-06, "loss": 0.7414, "step": 3654 }, { "epoch": 0.47598594299101915, "grad_norm": 2.500781297683716, "learning_rate": 9.52409373689784e-06, "loss": 0.6857, "step": 3657 }, { "epoch": 0.47637641546270987, "grad_norm": 2.709920644760132, "learning_rate": 9.523203492616158e-06, "loss": 0.6614, "step": 3660 }, { "epoch": 0.47676688793440064, "grad_norm": 2.8489487171173096, "learning_rate": 9.522312458140338e-06, "loss": 0.5616, "step": 3663 }, { "epoch": 0.47715736040609136, "grad_norm": 3.2674407958984375, "learning_rate": 9.521420633626045e-06, "loss": 0.5813, "step": 3666 }, { "epoch": 0.47754783287778213, "grad_norm": 2.6726903915405273, "learning_rate": 9.52052801922908e-06, "loss": 0.6633, "step": 3669 }, { "epoch": 0.47793830534947285, "grad_norm": 2.6989340782165527, "learning_rate": 9.51963461510538e-06, "loss": 0.6985, "step": 3672 }, { "epoch": 0.4783287778211636, "grad_norm": 2.936947822570801, "learning_rate": 9.51874042141102e-06, "loss": 0.6472, "step": 3675 }, { "epoch": 0.47871925029285434, "grad_norm": 2.9338979721069336, "learning_rate": 9.517845438302213e-06, "loss": 0.6153, "step": 3678 }, { "epoch": 0.4791097227645451, "grad_norm": 2.5126919746398926, "learning_rate": 9.516949665935314e-06, "loss": 0.6328, "step": 3681 }, { "epoch": 0.4795001952362358, "grad_norm": 2.4342703819274902, "learning_rate": 9.516053104466811e-06, "loss": 0.6888, "step": 3684 }, { "epoch": 0.4798906677079266, "grad_norm": 3.6857855319976807, "learning_rate": 9.51515575405333e-06, "loss": 0.614, "step": 3687 }, { "epoch": 0.4802811401796173, "grad_norm": 2.602360963821411, "learning_rate": 9.514257614851639e-06, "loss": 0.7064, "step": 3690 }, { "epoch": 0.4806716126513081, "grad_norm": 3.5609123706817627, "learning_rate": 9.513358687018636e-06, "loss": 0.78, "step": 3693 }, { "epoch": 0.4810620851229988, "grad_norm": 2.5246832370758057, "learning_rate": 9.512458970711366e-06, "loss": 0.6885, "step": 3696 }, { "epoch": 0.4814525575946896, "grad_norm": 2.706555128097534, "learning_rate": 9.51155846608701e-06, "loss": 0.659, "step": 3699 }, { "epoch": 0.4818430300663803, "grad_norm": 3.1037871837615967, "learning_rate": 9.510657173302878e-06, "loss": 0.6932, "step": 3702 }, { "epoch": 0.48223350253807107, "grad_norm": 2.7866525650024414, "learning_rate": 9.509755092516427e-06, "loss": 0.6329, "step": 3705 }, { "epoch": 0.4826239750097618, "grad_norm": 2.528336524963379, "learning_rate": 9.508852223885248e-06, "loss": 0.5991, "step": 3708 }, { "epoch": 0.48301444748145256, "grad_norm": 2.683349609375, "learning_rate": 9.507948567567073e-06, "loss": 0.6348, "step": 3711 }, { "epoch": 0.48340491995314333, "grad_norm": 3.169064998626709, "learning_rate": 9.507044123719764e-06, "loss": 0.758, "step": 3714 }, { "epoch": 0.48379539242483405, "grad_norm": 2.669492721557617, "learning_rate": 9.50613889250133e-06, "loss": 0.6458, "step": 3717 }, { "epoch": 0.4841858648965248, "grad_norm": 4.006582736968994, "learning_rate": 9.50523287406991e-06, "loss": 0.6128, "step": 3720 }, { "epoch": 0.48457633736821554, "grad_norm": 2.645681858062744, "learning_rate": 9.504326068583784e-06, "loss": 0.651, "step": 3723 }, { "epoch": 0.4849668098399063, "grad_norm": 2.4632298946380615, "learning_rate": 9.503418476201371e-06, "loss": 0.5938, "step": 3726 }, { "epoch": 0.485357282311597, "grad_norm": 2.407679557800293, "learning_rate": 9.502510097081223e-06, "loss": 0.7057, "step": 3729 }, { "epoch": 0.4857477547832878, "grad_norm": 2.6753084659576416, "learning_rate": 9.501600931382034e-06, "loss": 0.7167, "step": 3732 }, { "epoch": 0.4861382272549785, "grad_norm": 2.707786798477173, "learning_rate": 9.500690979262632e-06, "loss": 0.7135, "step": 3735 }, { "epoch": 0.4865286997266693, "grad_norm": 2.5371201038360596, "learning_rate": 9.499780240881981e-06, "loss": 0.7335, "step": 3738 }, { "epoch": 0.48691917219836, "grad_norm": 2.661376953125, "learning_rate": 9.498868716399192e-06, "loss": 0.6258, "step": 3741 }, { "epoch": 0.4873096446700508, "grad_norm": 2.647146463394165, "learning_rate": 9.4979564059735e-06, "loss": 0.7245, "step": 3744 }, { "epoch": 0.4877001171417415, "grad_norm": 3.4181723594665527, "learning_rate": 9.497043309764289e-06, "loss": 0.7563, "step": 3747 }, { "epoch": 0.48809058961343227, "grad_norm": 3.5437850952148438, "learning_rate": 9.496129427931069e-06, "loss": 0.7247, "step": 3750 }, { "epoch": 0.488481062085123, "grad_norm": 3.7329277992248535, "learning_rate": 9.495214760633498e-06, "loss": 0.6169, "step": 3753 }, { "epoch": 0.48887153455681376, "grad_norm": 2.8779706954956055, "learning_rate": 9.494299308031365e-06, "loss": 0.6749, "step": 3756 }, { "epoch": 0.4892620070285045, "grad_norm": 2.7706687450408936, "learning_rate": 9.493383070284597e-06, "loss": 0.6352, "step": 3759 }, { "epoch": 0.48965247950019525, "grad_norm": 2.6291885375976562, "learning_rate": 9.49246604755326e-06, "loss": 0.5958, "step": 3762 }, { "epoch": 0.49004295197188597, "grad_norm": 2.48679256439209, "learning_rate": 9.491548239997555e-06, "loss": 0.6365, "step": 3765 }, { "epoch": 0.49043342444357674, "grad_norm": 2.522580862045288, "learning_rate": 9.490629647777821e-06, "loss": 0.6723, "step": 3768 }, { "epoch": 0.49082389691526745, "grad_norm": 2.4899022579193115, "learning_rate": 9.489710271054533e-06, "loss": 0.651, "step": 3771 }, { "epoch": 0.4912143693869582, "grad_norm": 3.645275592803955, "learning_rate": 9.488790109988308e-06, "loss": 0.6087, "step": 3774 }, { "epoch": 0.49160484185864894, "grad_norm": 2.5229358673095703, "learning_rate": 9.487869164739893e-06, "loss": 0.6372, "step": 3777 }, { "epoch": 0.4919953143303397, "grad_norm": 2.5674540996551514, "learning_rate": 9.486947435470176e-06, "loss": 0.585, "step": 3780 }, { "epoch": 0.49238578680203043, "grad_norm": 2.749277353286743, "learning_rate": 9.486024922340181e-06, "loss": 0.6834, "step": 3783 }, { "epoch": 0.4927762592737212, "grad_norm": 2.4357573986053467, "learning_rate": 9.485101625511071e-06, "loss": 0.6562, "step": 3786 }, { "epoch": 0.4931667317454119, "grad_norm": 2.85579776763916, "learning_rate": 9.48417754514414e-06, "loss": 0.6401, "step": 3789 }, { "epoch": 0.4935572042171027, "grad_norm": 2.402381658554077, "learning_rate": 9.483252681400825e-06, "loss": 0.7112, "step": 3792 }, { "epoch": 0.4939476766887934, "grad_norm": 3.6302082538604736, "learning_rate": 9.4823270344427e-06, "loss": 0.71, "step": 3795 }, { "epoch": 0.4943381491604842, "grad_norm": 2.943791627883911, "learning_rate": 9.48140060443147e-06, "loss": 0.657, "step": 3798 }, { "epoch": 0.49472862163217496, "grad_norm": 3.03796648979187, "learning_rate": 9.480473391528982e-06, "loss": 0.7657, "step": 3801 }, { "epoch": 0.4951190941038657, "grad_norm": 3.2505815029144287, "learning_rate": 9.479545395897219e-06, "loss": 0.6439, "step": 3804 }, { "epoch": 0.49550956657555645, "grad_norm": 2.497419834136963, "learning_rate": 9.478616617698297e-06, "loss": 0.7109, "step": 3807 }, { "epoch": 0.49590003904724717, "grad_norm": 2.562572956085205, "learning_rate": 9.477687057094476e-06, "loss": 0.7191, "step": 3810 }, { "epoch": 0.49629051151893794, "grad_norm": 2.6476757526397705, "learning_rate": 9.476756714248142e-06, "loss": 0.6237, "step": 3813 }, { "epoch": 0.49668098399062866, "grad_norm": 2.27634859085083, "learning_rate": 9.475825589321831e-06, "loss": 0.635, "step": 3816 }, { "epoch": 0.4970714564623194, "grad_norm": 2.4250454902648926, "learning_rate": 9.474893682478205e-06, "loss": 0.6073, "step": 3819 }, { "epoch": 0.49746192893401014, "grad_norm": 2.7182376384735107, "learning_rate": 9.473960993880068e-06, "loss": 0.6665, "step": 3822 }, { "epoch": 0.4978524014057009, "grad_norm": 2.8492777347564697, "learning_rate": 9.473027523690355e-06, "loss": 0.5861, "step": 3825 }, { "epoch": 0.49824287387739163, "grad_norm": 2.5110251903533936, "learning_rate": 9.472093272072142e-06, "loss": 0.5882, "step": 3828 }, { "epoch": 0.4986333463490824, "grad_norm": 2.5196032524108887, "learning_rate": 9.471158239188645e-06, "loss": 0.7131, "step": 3831 }, { "epoch": 0.4990238188207731, "grad_norm": 2.7564871311187744, "learning_rate": 9.470222425203209e-06, "loss": 0.7012, "step": 3834 }, { "epoch": 0.4994142912924639, "grad_norm": 2.5324935913085938, "learning_rate": 9.469285830279318e-06, "loss": 0.6357, "step": 3837 }, { "epoch": 0.4998047637641546, "grad_norm": 2.3627541065216064, "learning_rate": 9.468348454580596e-06, "loss": 0.6458, "step": 3840 }, { "epoch": 0.5001952362358454, "grad_norm": 3.514322519302368, "learning_rate": 9.467410298270798e-06, "loss": 0.5593, "step": 3843 }, { "epoch": 0.5005857087075362, "grad_norm": 2.6262550354003906, "learning_rate": 9.46647136151382e-06, "loss": 0.6549, "step": 3846 }, { "epoch": 0.5009761811792268, "grad_norm": 3.208850860595703, "learning_rate": 9.46553164447369e-06, "loss": 0.6028, "step": 3849 }, { "epoch": 0.5013666536509176, "grad_norm": 2.4970576763153076, "learning_rate": 9.464591147314577e-06, "loss": 0.712, "step": 3852 }, { "epoch": 0.5017571261226084, "grad_norm": 3.3862645626068115, "learning_rate": 9.463649870200782e-06, "loss": 0.5893, "step": 3855 }, { "epoch": 0.5021475985942991, "grad_norm": 5.486196041107178, "learning_rate": 9.462707813296746e-06, "loss": 0.7407, "step": 3858 }, { "epoch": 0.5025380710659898, "grad_norm": 2.571362018585205, "learning_rate": 9.46176497676704e-06, "loss": 0.5659, "step": 3861 }, { "epoch": 0.5029285435376806, "grad_norm": 2.665663957595825, "learning_rate": 9.460821360776382e-06, "loss": 0.7724, "step": 3864 }, { "epoch": 0.5033190160093713, "grad_norm": 3.4792850017547607, "learning_rate": 9.459876965489614e-06, "loss": 0.6739, "step": 3867 }, { "epoch": 0.5037094884810621, "grad_norm": 3.114149332046509, "learning_rate": 9.458931791071723e-06, "loss": 0.6073, "step": 3870 }, { "epoch": 0.5040999609527528, "grad_norm": 4.276490211486816, "learning_rate": 9.457985837687832e-06, "loss": 0.7193, "step": 3873 }, { "epoch": 0.5044904334244436, "grad_norm": 2.727604389190674, "learning_rate": 9.457039105503188e-06, "loss": 0.6752, "step": 3876 }, { "epoch": 0.5048809058961343, "grad_norm": 2.9666244983673096, "learning_rate": 9.456091594683192e-06, "loss": 0.7623, "step": 3879 }, { "epoch": 0.5052713783678251, "grad_norm": 2.6261138916015625, "learning_rate": 9.455143305393367e-06, "loss": 0.7193, "step": 3882 }, { "epoch": 0.5056618508395159, "grad_norm": 3.4070770740509033, "learning_rate": 9.454194237799379e-06, "loss": 0.6537, "step": 3885 }, { "epoch": 0.5060523233112065, "grad_norm": 2.4243500232696533, "learning_rate": 9.453244392067028e-06, "loss": 0.6781, "step": 3888 }, { "epoch": 0.5064427957828973, "grad_norm": 3.1091771125793457, "learning_rate": 9.45229376836225e-06, "loss": 0.6486, "step": 3891 }, { "epoch": 0.5068332682545881, "grad_norm": 2.519580841064453, "learning_rate": 9.451342366851117e-06, "loss": 0.6303, "step": 3894 }, { "epoch": 0.5072237407262788, "grad_norm": 2.6116557121276855, "learning_rate": 9.450390187699838e-06, "loss": 0.6344, "step": 3897 }, { "epoch": 0.5076142131979695, "grad_norm": 2.69616961479187, "learning_rate": 9.449437231074755e-06, "loss": 0.6245, "step": 3900 }, { "epoch": 0.5080046856696603, "grad_norm": 2.675161600112915, "learning_rate": 9.448483497142347e-06, "loss": 0.6087, "step": 3903 }, { "epoch": 0.5083951581413511, "grad_norm": 2.4945101737976074, "learning_rate": 9.44752898606923e-06, "loss": 0.6146, "step": 3906 }, { "epoch": 0.5087856306130418, "grad_norm": 2.615118980407715, "learning_rate": 9.446573698022155e-06, "loss": 0.8205, "step": 3909 }, { "epoch": 0.5091761030847325, "grad_norm": 4.189955234527588, "learning_rate": 9.445617633168012e-06, "loss": 0.6343, "step": 3912 }, { "epoch": 0.5095665755564233, "grad_norm": 2.6967740058898926, "learning_rate": 9.444660791673818e-06, "loss": 0.58, "step": 3915 }, { "epoch": 0.509957048028114, "grad_norm": 2.989888906478882, "learning_rate": 9.443703173706734e-06, "loss": 0.6818, "step": 3918 }, { "epoch": 0.5103475204998048, "grad_norm": 3.485555410385132, "learning_rate": 9.442744779434054e-06, "loss": 0.612, "step": 3921 }, { "epoch": 0.5107379929714955, "grad_norm": 2.7845234870910645, "learning_rate": 9.441785609023208e-06, "loss": 0.6826, "step": 3924 }, { "epoch": 0.5111284654431862, "grad_norm": 2.6713454723358154, "learning_rate": 9.44082566264176e-06, "loss": 0.6219, "step": 3927 }, { "epoch": 0.511518937914877, "grad_norm": 3.361051321029663, "learning_rate": 9.43986494045741e-06, "loss": 0.669, "step": 3930 }, { "epoch": 0.5119094103865678, "grad_norm": 3.308380365371704, "learning_rate": 9.438903442637997e-06, "loss": 0.6721, "step": 3933 }, { "epoch": 0.5122998828582584, "grad_norm": 2.7254140377044678, "learning_rate": 9.43794116935149e-06, "loss": 0.6372, "step": 3936 }, { "epoch": 0.5126903553299492, "grad_norm": 2.9412028789520264, "learning_rate": 9.436978120765996e-06, "loss": 0.7481, "step": 3939 }, { "epoch": 0.51308082780164, "grad_norm": 2.711512804031372, "learning_rate": 9.436014297049758e-06, "loss": 0.7198, "step": 3942 }, { "epoch": 0.5134713002733308, "grad_norm": 2.667794942855835, "learning_rate": 9.435049698371155e-06, "loss": 0.681, "step": 3945 }, { "epoch": 0.5138617727450214, "grad_norm": 3.5074334144592285, "learning_rate": 9.4340843248987e-06, "loss": 0.6905, "step": 3948 }, { "epoch": 0.5142522452167122, "grad_norm": 3.1205289363861084, "learning_rate": 9.43311817680104e-06, "loss": 0.7021, "step": 3951 }, { "epoch": 0.514642717688403, "grad_norm": 2.667708396911621, "learning_rate": 9.432151254246961e-06, "loss": 0.7106, "step": 3954 }, { "epoch": 0.5150331901600937, "grad_norm": 2.348116874694824, "learning_rate": 9.431183557405383e-06, "loss": 0.5503, "step": 3957 }, { "epoch": 0.5154236626317844, "grad_norm": 3.321404457092285, "learning_rate": 9.430215086445358e-06, "loss": 0.6023, "step": 3960 }, { "epoch": 0.5158141351034752, "grad_norm": 2.7746543884277344, "learning_rate": 9.429245841536079e-06, "loss": 0.678, "step": 3963 }, { "epoch": 0.516204607575166, "grad_norm": 2.634472131729126, "learning_rate": 9.428275822846868e-06, "loss": 0.6451, "step": 3966 }, { "epoch": 0.5165950800468567, "grad_norm": 2.5320868492126465, "learning_rate": 9.427305030547185e-06, "loss": 0.6206, "step": 3969 }, { "epoch": 0.5169855525185475, "grad_norm": 2.5869338512420654, "learning_rate": 9.42633346480663e-06, "loss": 0.6766, "step": 3972 }, { "epoch": 0.5173760249902382, "grad_norm": 2.589397430419922, "learning_rate": 9.425361125794928e-06, "loss": 0.7054, "step": 3975 }, { "epoch": 0.5177664974619289, "grad_norm": 2.9746224880218506, "learning_rate": 9.424388013681947e-06, "loss": 0.7086, "step": 3978 }, { "epoch": 0.5181569699336197, "grad_norm": 2.565460205078125, "learning_rate": 9.42341412863769e-06, "loss": 0.6858, "step": 3981 }, { "epoch": 0.5185474424053105, "grad_norm": 2.5512335300445557, "learning_rate": 9.422439470832288e-06, "loss": 0.6332, "step": 3984 }, { "epoch": 0.5189379148770011, "grad_norm": 3.2780306339263916, "learning_rate": 9.421464040436012e-06, "loss": 0.664, "step": 3987 }, { "epoch": 0.5193283873486919, "grad_norm": 2.374025821685791, "learning_rate": 9.420487837619272e-06, "loss": 0.5543, "step": 3990 }, { "epoch": 0.5197188598203827, "grad_norm": 2.818821907043457, "learning_rate": 9.419510862552608e-06, "loss": 0.6181, "step": 3993 }, { "epoch": 0.5201093322920735, "grad_norm": 2.3839192390441895, "learning_rate": 9.41853311540669e-06, "loss": 0.6783, "step": 3996 }, { "epoch": 0.5204998047637641, "grad_norm": 2.5750620365142822, "learning_rate": 9.417554596352334e-06, "loss": 0.6101, "step": 3999 }, { "epoch": 0.5208902772354549, "grad_norm": 2.7210500240325928, "learning_rate": 9.416575305560482e-06, "loss": 0.749, "step": 4002 }, { "epoch": 0.5212807497071457, "grad_norm": 2.856265068054199, "learning_rate": 9.415595243202217e-06, "loss": 0.6108, "step": 4005 }, { "epoch": 0.5216712221788364, "grad_norm": 2.2772696018218994, "learning_rate": 9.41461440944875e-06, "loss": 0.7071, "step": 4008 }, { "epoch": 0.5220616946505271, "grad_norm": 2.629115104675293, "learning_rate": 9.413632804471434e-06, "loss": 0.6378, "step": 4011 }, { "epoch": 0.5224521671222179, "grad_norm": 2.853085517883301, "learning_rate": 9.412650428441752e-06, "loss": 0.6871, "step": 4014 }, { "epoch": 0.5228426395939086, "grad_norm": 2.5856900215148926, "learning_rate": 9.411667281531322e-06, "loss": 0.6206, "step": 4017 }, { "epoch": 0.5232331120655994, "grad_norm": 2.705491065979004, "learning_rate": 9.4106833639119e-06, "loss": 0.5966, "step": 4020 }, { "epoch": 0.5236235845372901, "grad_norm": 2.5959479808807373, "learning_rate": 9.409698675755372e-06, "loss": 0.6265, "step": 4023 }, { "epoch": 0.5240140570089808, "grad_norm": 2.661630630493164, "learning_rate": 9.408713217233762e-06, "loss": 0.6688, "step": 4026 }, { "epoch": 0.5244045294806716, "grad_norm": 2.437685966491699, "learning_rate": 9.40772698851923e-06, "loss": 0.6566, "step": 4029 }, { "epoch": 0.5247950019523624, "grad_norm": 2.658416986465454, "learning_rate": 9.406739989784061e-06, "loss": 0.679, "step": 4032 }, { "epoch": 0.5251854744240531, "grad_norm": 3.007324695587158, "learning_rate": 9.40575222120069e-06, "loss": 0.6227, "step": 4035 }, { "epoch": 0.5255759468957438, "grad_norm": 2.6084835529327393, "learning_rate": 9.404763682941673e-06, "loss": 0.6149, "step": 4038 }, { "epoch": 0.5259664193674346, "grad_norm": 3.3151960372924805, "learning_rate": 9.403774375179707e-06, "loss": 0.6682, "step": 4041 }, { "epoch": 0.5263568918391254, "grad_norm": 2.847337007522583, "learning_rate": 9.402784298087622e-06, "loss": 0.596, "step": 4044 }, { "epoch": 0.526747364310816, "grad_norm": 3.080592393875122, "learning_rate": 9.401793451838382e-06, "loss": 0.7028, "step": 4047 }, { "epoch": 0.5271378367825068, "grad_norm": 2.5581657886505127, "learning_rate": 9.400801836605085e-06, "loss": 0.6566, "step": 4050 }, { "epoch": 0.5275283092541976, "grad_norm": 2.55700421333313, "learning_rate": 9.399809452560966e-06, "loss": 0.6734, "step": 4053 }, { "epoch": 0.5279187817258884, "grad_norm": 2.44401478767395, "learning_rate": 9.398816299879392e-06, "loss": 0.5712, "step": 4056 }, { "epoch": 0.5283092541975791, "grad_norm": 2.4714529514312744, "learning_rate": 9.397822378733864e-06, "loss": 0.6925, "step": 4059 }, { "epoch": 0.5286997266692698, "grad_norm": 3.4595723152160645, "learning_rate": 9.396827689298018e-06, "loss": 0.6653, "step": 4062 }, { "epoch": 0.5290901991409606, "grad_norm": 2.528628349304199, "learning_rate": 9.395832231745624e-06, "loss": 0.6362, "step": 4065 }, { "epoch": 0.5294806716126513, "grad_norm": 2.4336354732513428, "learning_rate": 9.394836006250587e-06, "loss": 0.6348, "step": 4068 }, { "epoch": 0.5298711440843421, "grad_norm": 2.7131993770599365, "learning_rate": 9.393839012986944e-06, "loss": 0.675, "step": 4071 }, { "epoch": 0.5302616165560328, "grad_norm": 2.5495808124542236, "learning_rate": 9.39284125212887e-06, "loss": 0.6013, "step": 4074 }, { "epoch": 0.5306520890277235, "grad_norm": 2.3943541049957275, "learning_rate": 9.39184272385067e-06, "loss": 0.653, "step": 4077 }, { "epoch": 0.5310425614994143, "grad_norm": 2.781402111053467, "learning_rate": 9.390843428326785e-06, "loss": 0.7808, "step": 4080 }, { "epoch": 0.5314330339711051, "grad_norm": 3.896851062774658, "learning_rate": 9.38984336573179e-06, "loss": 0.6671, "step": 4083 }, { "epoch": 0.5318235064427957, "grad_norm": 2.4791414737701416, "learning_rate": 9.388842536240395e-06, "loss": 0.7199, "step": 4086 }, { "epoch": 0.5322139789144865, "grad_norm": 2.551300048828125, "learning_rate": 9.387840940027439e-06, "loss": 0.5786, "step": 4089 }, { "epoch": 0.5326044513861773, "grad_norm": 2.9271490573883057, "learning_rate": 9.386838577267906e-06, "loss": 0.755, "step": 4092 }, { "epoch": 0.5329949238578681, "grad_norm": 2.4005637168884277, "learning_rate": 9.385835448136897e-06, "loss": 0.662, "step": 4095 }, { "epoch": 0.5333853963295587, "grad_norm": 2.360596179962158, "learning_rate": 9.384831552809665e-06, "loss": 0.6733, "step": 4098 }, { "epoch": 0.5337758688012495, "grad_norm": 2.809666395187378, "learning_rate": 9.383826891461583e-06, "loss": 0.5828, "step": 4101 }, { "epoch": 0.5341663412729403, "grad_norm": 2.80281138420105, "learning_rate": 9.382821464268166e-06, "loss": 0.6344, "step": 4104 }, { "epoch": 0.534556813744631, "grad_norm": 2.5210893154144287, "learning_rate": 9.38181527140506e-06, "loss": 0.6828, "step": 4107 }, { "epoch": 0.5349472862163217, "grad_norm": 3.6144542694091797, "learning_rate": 9.380808313048045e-06, "loss": 0.7309, "step": 4110 }, { "epoch": 0.5353377586880125, "grad_norm": 2.6584270000457764, "learning_rate": 9.379800589373032e-06, "loss": 0.6842, "step": 4113 }, { "epoch": 0.5357282311597032, "grad_norm": 2.685014247894287, "learning_rate": 9.378792100556069e-06, "loss": 0.64, "step": 4116 }, { "epoch": 0.536118703631394, "grad_norm": 2.667910099029541, "learning_rate": 9.37778284677334e-06, "loss": 0.7603, "step": 4119 }, { "epoch": 0.5365091761030847, "grad_norm": 2.820080041885376, "learning_rate": 9.376772828201155e-06, "loss": 0.6083, "step": 4122 }, { "epoch": 0.5368996485747755, "grad_norm": 2.422607898712158, "learning_rate": 9.375762045015966e-06, "loss": 0.6028, "step": 4125 }, { "epoch": 0.5372901210464662, "grad_norm": 3.0713932514190674, "learning_rate": 9.374750497394352e-06, "loss": 0.7543, "step": 4128 }, { "epoch": 0.537680593518157, "grad_norm": 2.311880111694336, "learning_rate": 9.373738185513028e-06, "loss": 0.6959, "step": 4131 }, { "epoch": 0.5380710659898477, "grad_norm": 2.619349241256714, "learning_rate": 9.372725109548846e-06, "loss": 0.7416, "step": 4134 }, { "epoch": 0.5384615384615384, "grad_norm": 2.9388935565948486, "learning_rate": 9.371711269678786e-06, "loss": 0.737, "step": 4137 }, { "epoch": 0.5388520109332292, "grad_norm": 2.5478711128234863, "learning_rate": 9.370696666079964e-06, "loss": 0.7358, "step": 4140 }, { "epoch": 0.53924248340492, "grad_norm": 4.10902214050293, "learning_rate": 9.369681298929629e-06, "loss": 0.6521, "step": 4143 }, { "epoch": 0.5396329558766108, "grad_norm": 2.6128830909729004, "learning_rate": 9.368665168405163e-06, "loss": 0.663, "step": 4146 }, { "epoch": 0.5400234283483014, "grad_norm": 3.5728492736816406, "learning_rate": 9.367648274684082e-06, "loss": 0.6235, "step": 4149 }, { "epoch": 0.5404139008199922, "grad_norm": 3.0014986991882324, "learning_rate": 9.366630617944037e-06, "loss": 0.607, "step": 4152 }, { "epoch": 0.540804373291683, "grad_norm": 2.6722731590270996, "learning_rate": 9.365612198362807e-06, "loss": 0.6592, "step": 4155 }, { "epoch": 0.5411948457633737, "grad_norm": 3.075171947479248, "learning_rate": 9.364593016118311e-06, "loss": 0.7503, "step": 4158 }, { "epoch": 0.5415853182350644, "grad_norm": 2.6275832653045654, "learning_rate": 9.363573071388598e-06, "loss": 0.6354, "step": 4161 }, { "epoch": 0.5419757907067552, "grad_norm": 2.640997886657715, "learning_rate": 9.362552364351849e-06, "loss": 0.6437, "step": 4164 }, { "epoch": 0.5423662631784459, "grad_norm": 4.610156059265137, "learning_rate": 9.361530895186378e-06, "loss": 0.7229, "step": 4167 }, { "epoch": 0.5427567356501367, "grad_norm": 2.4628376960754395, "learning_rate": 9.360508664070634e-06, "loss": 0.7439, "step": 4170 }, { "epoch": 0.5431472081218274, "grad_norm": 3.297962188720703, "learning_rate": 9.359485671183202e-06, "loss": 0.7502, "step": 4173 }, { "epoch": 0.5435376805935181, "grad_norm": 2.7493808269500732, "learning_rate": 9.358461916702793e-06, "loss": 0.5738, "step": 4176 }, { "epoch": 0.5439281530652089, "grad_norm": 2.37622332572937, "learning_rate": 9.357437400808256e-06, "loss": 0.6252, "step": 4179 }, { "epoch": 0.5443186255368997, "grad_norm": 2.4607114791870117, "learning_rate": 9.356412123678572e-06, "loss": 0.6813, "step": 4182 }, { "epoch": 0.5447090980085904, "grad_norm": 3.2492547035217285, "learning_rate": 9.355386085492855e-06, "loss": 0.6283, "step": 4185 }, { "epoch": 0.5450995704802811, "grad_norm": 2.3244526386260986, "learning_rate": 9.35435928643035e-06, "loss": 0.5893, "step": 4188 }, { "epoch": 0.5454900429519719, "grad_norm": 3.75888729095459, "learning_rate": 9.353331726670438e-06, "loss": 0.7102, "step": 4191 }, { "epoch": 0.5458805154236627, "grad_norm": 2.8710289001464844, "learning_rate": 9.352303406392634e-06, "loss": 0.6786, "step": 4194 }, { "epoch": 0.5462709878953533, "grad_norm": 2.649484634399414, "learning_rate": 9.351274325776578e-06, "loss": 0.6359, "step": 4197 }, { "epoch": 0.5466614603670441, "grad_norm": 2.539466381072998, "learning_rate": 9.350244485002051e-06, "loss": 0.7931, "step": 4200 }, { "epoch": 0.5470519328387349, "grad_norm": 2.964155912399292, "learning_rate": 9.349213884248967e-06, "loss": 0.6971, "step": 4203 }, { "epoch": 0.5474424053104257, "grad_norm": 2.4749128818511963, "learning_rate": 9.348182523697365e-06, "loss": 0.6917, "step": 4206 }, { "epoch": 0.5478328777821163, "grad_norm": 2.6950347423553467, "learning_rate": 9.347150403527422e-06, "loss": 0.6089, "step": 4209 }, { "epoch": 0.5482233502538071, "grad_norm": 3.4852073192596436, "learning_rate": 9.34611752391945e-06, "loss": 0.6896, "step": 4212 }, { "epoch": 0.5486138227254979, "grad_norm": 2.6837730407714844, "learning_rate": 9.34508388505389e-06, "loss": 0.6311, "step": 4215 }, { "epoch": 0.5490042951971886, "grad_norm": 3.1561684608459473, "learning_rate": 9.344049487111316e-06, "loss": 0.6248, "step": 4218 }, { "epoch": 0.5493947676688793, "grad_norm": 3.7130353450775146, "learning_rate": 9.343014330272432e-06, "loss": 0.6496, "step": 4221 }, { "epoch": 0.5497852401405701, "grad_norm": 3.125425338745117, "learning_rate": 9.341978414718084e-06, "loss": 0.7285, "step": 4224 }, { "epoch": 0.5501757126122608, "grad_norm": 2.448007583618164, "learning_rate": 9.34094174062924e-06, "loss": 0.7151, "step": 4227 }, { "epoch": 0.5505661850839516, "grad_norm": 2.704099655151367, "learning_rate": 9.339904308187006e-06, "loss": 0.7632, "step": 4230 }, { "epoch": 0.5509566575556424, "grad_norm": 2.6874077320098877, "learning_rate": 9.33886611757262e-06, "loss": 0.6707, "step": 4233 }, { "epoch": 0.551347130027333, "grad_norm": 2.497955799102783, "learning_rate": 9.33782716896745e-06, "loss": 0.6996, "step": 4236 }, { "epoch": 0.5517376024990238, "grad_norm": 2.5674867630004883, "learning_rate": 9.336787462553001e-06, "loss": 0.6761, "step": 4239 }, { "epoch": 0.5521280749707146, "grad_norm": 2.8246846199035645, "learning_rate": 9.335746998510902e-06, "loss": 0.7313, "step": 4242 }, { "epoch": 0.5525185474424054, "grad_norm": 3.6085305213928223, "learning_rate": 9.334705777022926e-06, "loss": 0.7069, "step": 4245 }, { "epoch": 0.552909019914096, "grad_norm": 2.782156229019165, "learning_rate": 9.333663798270969e-06, "loss": 0.6735, "step": 4248 }, { "epoch": 0.5532994923857868, "grad_norm": 2.7994630336761475, "learning_rate": 9.332621062437064e-06, "loss": 0.6201, "step": 4251 }, { "epoch": 0.5536899648574776, "grad_norm": 2.498595952987671, "learning_rate": 9.331577569703374e-06, "loss": 0.7614, "step": 4254 }, { "epoch": 0.5540804373291683, "grad_norm": 2.729330062866211, "learning_rate": 9.330533320252193e-06, "loss": 0.6992, "step": 4257 }, { "epoch": 0.554470909800859, "grad_norm": 2.681333303451538, "learning_rate": 9.32948831426595e-06, "loss": 0.7586, "step": 4260 }, { "epoch": 0.5548613822725498, "grad_norm": 2.6096370220184326, "learning_rate": 9.32844255192721e-06, "loss": 0.6834, "step": 4263 }, { "epoch": 0.5552518547442405, "grad_norm": 2.6331887245178223, "learning_rate": 9.32739603341866e-06, "loss": 0.6116, "step": 4266 }, { "epoch": 0.5556423272159313, "grad_norm": 2.9159796237945557, "learning_rate": 9.326348758923127e-06, "loss": 0.6944, "step": 4269 }, { "epoch": 0.556032799687622, "grad_norm": 2.7244224548339844, "learning_rate": 9.325300728623567e-06, "loss": 0.7559, "step": 4272 }, { "epoch": 0.5564232721593128, "grad_norm": 2.5639004707336426, "learning_rate": 9.324251942703068e-06, "loss": 0.7335, "step": 4275 }, { "epoch": 0.5568137446310035, "grad_norm": 2.4377241134643555, "learning_rate": 9.323202401344852e-06, "loss": 0.667, "step": 4278 }, { "epoch": 0.5572042171026943, "grad_norm": 2.6609203815460205, "learning_rate": 9.322152104732272e-06, "loss": 0.6463, "step": 4281 }, { "epoch": 0.557594689574385, "grad_norm": 3.7862682342529297, "learning_rate": 9.321101053048812e-06, "loss": 0.6735, "step": 4284 }, { "epoch": 0.5579851620460757, "grad_norm": 3.0916101932525635, "learning_rate": 9.320049246478086e-06, "loss": 0.6867, "step": 4287 }, { "epoch": 0.5583756345177665, "grad_norm": 2.4427428245544434, "learning_rate": 9.318996685203848e-06, "loss": 0.6473, "step": 4290 }, { "epoch": 0.5587661069894573, "grad_norm": 2.9090192317962646, "learning_rate": 9.317943369409973e-06, "loss": 0.6461, "step": 4293 }, { "epoch": 0.5591565794611479, "grad_norm": 5.3703742027282715, "learning_rate": 9.316889299280475e-06, "loss": 0.716, "step": 4296 }, { "epoch": 0.5595470519328387, "grad_norm": 4.018325328826904, "learning_rate": 9.315834474999498e-06, "loss": 0.716, "step": 4299 }, { "epoch": 0.5599375244045295, "grad_norm": 2.85209059715271, "learning_rate": 9.314778896751317e-06, "loss": 0.6133, "step": 4302 }, { "epoch": 0.5603279968762203, "grad_norm": 2.487459182739258, "learning_rate": 9.31372256472034e-06, "loss": 0.6144, "step": 4305 }, { "epoch": 0.5607184693479109, "grad_norm": 2.43287992477417, "learning_rate": 9.312665479091106e-06, "loss": 0.5989, "step": 4308 }, { "epoch": 0.5611089418196017, "grad_norm": 3.1222102642059326, "learning_rate": 9.311607640048286e-06, "loss": 0.5524, "step": 4311 }, { "epoch": 0.5614994142912925, "grad_norm": 2.515286445617676, "learning_rate": 9.31054904777668e-06, "loss": 0.6501, "step": 4314 }, { "epoch": 0.5618898867629832, "grad_norm": 2.938828706741333, "learning_rate": 9.309489702461223e-06, "loss": 0.6829, "step": 4317 }, { "epoch": 0.562280359234674, "grad_norm": 2.4656574726104736, "learning_rate": 9.308429604286982e-06, "loss": 0.6517, "step": 4320 }, { "epoch": 0.5626708317063647, "grad_norm": 3.7046689987182617, "learning_rate": 9.307368753439153e-06, "loss": 0.6415, "step": 4323 }, { "epoch": 0.5630613041780554, "grad_norm": 3.0883255004882812, "learning_rate": 9.306307150103064e-06, "loss": 0.7135, "step": 4326 }, { "epoch": 0.5634517766497462, "grad_norm": 2.469595432281494, "learning_rate": 9.305244794464174e-06, "loss": 0.5753, "step": 4329 }, { "epoch": 0.563842249121437, "grad_norm": 2.6837077140808105, "learning_rate": 9.304181686708077e-06, "loss": 0.6382, "step": 4332 }, { "epoch": 0.5642327215931277, "grad_norm": 2.8714263439178467, "learning_rate": 9.303117827020493e-06, "loss": 0.7308, "step": 4335 }, { "epoch": 0.5646231940648184, "grad_norm": 2.2070488929748535, "learning_rate": 9.302053215587276e-06, "loss": 0.7811, "step": 4338 }, { "epoch": 0.5650136665365092, "grad_norm": 2.573568105697632, "learning_rate": 9.300987852594414e-06, "loss": 0.7276, "step": 4341 }, { "epoch": 0.5654041390082, "grad_norm": 2.5360209941864014, "learning_rate": 9.299921738228023e-06, "loss": 0.6433, "step": 4344 }, { "epoch": 0.5657946114798906, "grad_norm": 3.1989572048187256, "learning_rate": 9.298854872674348e-06, "loss": 0.6491, "step": 4347 }, { "epoch": 0.5661850839515814, "grad_norm": 2.791049003601074, "learning_rate": 9.297787256119772e-06, "loss": 0.6953, "step": 4350 }, { "epoch": 0.5665755564232722, "grad_norm": 3.882077693939209, "learning_rate": 9.296718888750802e-06, "loss": 0.697, "step": 4353 }, { "epoch": 0.566966028894963, "grad_norm": 2.5977842807769775, "learning_rate": 9.295649770754082e-06, "loss": 0.6448, "step": 4356 }, { "epoch": 0.5673565013666536, "grad_norm": 2.584838628768921, "learning_rate": 9.294579902316382e-06, "loss": 0.6445, "step": 4359 }, { "epoch": 0.5677469738383444, "grad_norm": 3.0195114612579346, "learning_rate": 9.293509283624611e-06, "loss": 0.7209, "step": 4362 }, { "epoch": 0.5681374463100352, "grad_norm": 3.4873313903808594, "learning_rate": 9.292437914865798e-06, "loss": 0.7565, "step": 4365 }, { "epoch": 0.5685279187817259, "grad_norm": 2.704925298690796, "learning_rate": 9.291365796227111e-06, "loss": 0.8317, "step": 4368 }, { "epoch": 0.5689183912534166, "grad_norm": 2.3734493255615234, "learning_rate": 9.290292927895848e-06, "loss": 0.6708, "step": 4371 }, { "epoch": 0.5693088637251074, "grad_norm": 2.2318577766418457, "learning_rate": 9.289219310059437e-06, "loss": 0.6154, "step": 4374 }, { "epoch": 0.5696993361967981, "grad_norm": 3.829906702041626, "learning_rate": 9.288144942905432e-06, "loss": 0.7834, "step": 4377 }, { "epoch": 0.5700898086684889, "grad_norm": 2.0775136947631836, "learning_rate": 9.28706982662153e-06, "loss": 0.5858, "step": 4380 }, { "epoch": 0.5704802811401796, "grad_norm": 2.4510486125946045, "learning_rate": 9.285993961395548e-06, "loss": 0.5897, "step": 4383 }, { "epoch": 0.5708707536118703, "grad_norm": 2.438112735748291, "learning_rate": 9.284917347415435e-06, "loss": 0.6553, "step": 4386 }, { "epoch": 0.5712612260835611, "grad_norm": 3.0546696186065674, "learning_rate": 9.283839984869276e-06, "loss": 0.6921, "step": 4389 }, { "epoch": 0.5716516985552519, "grad_norm": 2.234543561935425, "learning_rate": 9.282761873945285e-06, "loss": 0.5942, "step": 4392 }, { "epoch": 0.5720421710269425, "grad_norm": 2.697658061981201, "learning_rate": 9.281683014831804e-06, "loss": 0.6324, "step": 4395 }, { "epoch": 0.5724326434986333, "grad_norm": 2.6841681003570557, "learning_rate": 9.280603407717306e-06, "loss": 0.5907, "step": 4398 }, { "epoch": 0.5728231159703241, "grad_norm": 2.358556032180786, "learning_rate": 9.2795230527904e-06, "loss": 0.6638, "step": 4401 }, { "epoch": 0.5732135884420149, "grad_norm": 4.04594087600708, "learning_rate": 9.278441950239819e-06, "loss": 0.6402, "step": 4404 }, { "epoch": 0.5736040609137056, "grad_norm": 2.5940234661102295, "learning_rate": 9.277360100254428e-06, "loss": 0.6327, "step": 4407 }, { "epoch": 0.5739945333853963, "grad_norm": 2.4827663898468018, "learning_rate": 9.276277503023226e-06, "loss": 0.6645, "step": 4410 }, { "epoch": 0.5743850058570871, "grad_norm": 2.8493151664733887, "learning_rate": 9.27519415873534e-06, "loss": 0.6459, "step": 4413 }, { "epoch": 0.5747754783287778, "grad_norm": 2.4425408840179443, "learning_rate": 9.274110067580031e-06, "loss": 0.5905, "step": 4416 }, { "epoch": 0.5751659508004686, "grad_norm": 2.5381839275360107, "learning_rate": 9.273025229746683e-06, "loss": 0.7284, "step": 4419 }, { "epoch": 0.5755564232721593, "grad_norm": 3.9269378185272217, "learning_rate": 9.271939645424816e-06, "loss": 0.6981, "step": 4422 }, { "epoch": 0.57594689574385, "grad_norm": 2.5297110080718994, "learning_rate": 9.27085331480408e-06, "loss": 0.7223, "step": 4425 }, { "epoch": 0.5763373682155408, "grad_norm": 2.864983320236206, "learning_rate": 9.269766238074255e-06, "loss": 0.7924, "step": 4428 }, { "epoch": 0.5767278406872316, "grad_norm": 2.4377658367156982, "learning_rate": 9.26867841542525e-06, "loss": 0.6703, "step": 4431 }, { "epoch": 0.5771183131589223, "grad_norm": 2.9342570304870605, "learning_rate": 9.267589847047105e-06, "loss": 0.6622, "step": 4434 }, { "epoch": 0.577508785630613, "grad_norm": 2.2394070625305176, "learning_rate": 9.266500533129994e-06, "loss": 0.7219, "step": 4437 }, { "epoch": 0.5778992581023038, "grad_norm": 3.120809316635132, "learning_rate": 9.265410473864214e-06, "loss": 0.6276, "step": 4440 }, { "epoch": 0.5782897305739946, "grad_norm": 4.084946155548096, "learning_rate": 9.264319669440197e-06, "loss": 0.6136, "step": 4443 }, { "epoch": 0.5786802030456852, "grad_norm": 3.8687705993652344, "learning_rate": 9.263228120048504e-06, "loss": 0.698, "step": 4446 }, { "epoch": 0.579070675517376, "grad_norm": 2.582995891571045, "learning_rate": 9.262135825879827e-06, "loss": 0.6726, "step": 4449 }, { "epoch": 0.5794611479890668, "grad_norm": 3.0313069820404053, "learning_rate": 9.261042787124987e-06, "loss": 0.6088, "step": 4452 }, { "epoch": 0.5798516204607576, "grad_norm": 2.4391238689422607, "learning_rate": 9.259949003974938e-06, "loss": 0.6239, "step": 4455 }, { "epoch": 0.5802420929324482, "grad_norm": 2.8740012645721436, "learning_rate": 9.258854476620758e-06, "loss": 0.6486, "step": 4458 }, { "epoch": 0.580632565404139, "grad_norm": 2.8367395401000977, "learning_rate": 9.257759205253662e-06, "loss": 0.7247, "step": 4461 }, { "epoch": 0.5810230378758298, "grad_norm": 2.558208703994751, "learning_rate": 9.256663190064987e-06, "loss": 0.6522, "step": 4464 }, { "epoch": 0.5814135103475205, "grad_norm": 2.159445285797119, "learning_rate": 9.25556643124621e-06, "loss": 0.5987, "step": 4467 }, { "epoch": 0.5818039828192112, "grad_norm": 2.406489849090576, "learning_rate": 9.25446892898893e-06, "loss": 0.7873, "step": 4470 }, { "epoch": 0.582194455290902, "grad_norm": 2.4776813983917236, "learning_rate": 9.253370683484876e-06, "loss": 0.6753, "step": 4473 }, { "epoch": 0.5825849277625927, "grad_norm": 2.6619131565093994, "learning_rate": 9.252271694925913e-06, "loss": 0.7201, "step": 4476 }, { "epoch": 0.5829754002342835, "grad_norm": 2.598264455795288, "learning_rate": 9.25117196350403e-06, "loss": 0.679, "step": 4479 }, { "epoch": 0.5833658727059742, "grad_norm": 3.2932846546173096, "learning_rate": 9.250071489411348e-06, "loss": 0.6224, "step": 4482 }, { "epoch": 0.583756345177665, "grad_norm": 2.7969069480895996, "learning_rate": 9.248970272840116e-06, "loss": 0.5868, "step": 4485 }, { "epoch": 0.5841468176493557, "grad_norm": 2.904038190841675, "learning_rate": 9.247868313982719e-06, "loss": 0.6435, "step": 4488 }, { "epoch": 0.5845372901210465, "grad_norm": 2.6889734268188477, "learning_rate": 9.246765613031661e-06, "loss": 0.6655, "step": 4491 }, { "epoch": 0.5849277625927373, "grad_norm": 2.590397834777832, "learning_rate": 9.245662170179586e-06, "loss": 0.6866, "step": 4494 }, { "epoch": 0.5853182350644279, "grad_norm": 2.5878326892852783, "learning_rate": 9.24455798561926e-06, "loss": 0.5923, "step": 4497 }, { "epoch": 0.5857087075361187, "grad_norm": 3.6354682445526123, "learning_rate": 9.243453059543586e-06, "loss": 0.7215, "step": 4500 }, { "epoch": 0.5860991800078095, "grad_norm": 2.499769687652588, "learning_rate": 9.242347392145587e-06, "loss": 0.7189, "step": 4503 }, { "epoch": 0.5864896524795002, "grad_norm": 2.806671619415283, "learning_rate": 9.241240983618423e-06, "loss": 0.8219, "step": 4506 }, { "epoch": 0.5868801249511909, "grad_norm": 2.4375195503234863, "learning_rate": 9.240133834155382e-06, "loss": 0.7025, "step": 4509 }, { "epoch": 0.5872705974228817, "grad_norm": 3.987379789352417, "learning_rate": 9.239025943949882e-06, "loss": 0.6628, "step": 4512 }, { "epoch": 0.5876610698945725, "grad_norm": 2.5904123783111572, "learning_rate": 9.237917313195465e-06, "loss": 0.6761, "step": 4515 }, { "epoch": 0.5880515423662632, "grad_norm": 2.4467389583587646, "learning_rate": 9.236807942085809e-06, "loss": 0.5364, "step": 4518 }, { "epoch": 0.5884420148379539, "grad_norm": 3.339595317840576, "learning_rate": 9.235697830814718e-06, "loss": 0.6519, "step": 4521 }, { "epoch": 0.5888324873096447, "grad_norm": 4.496737003326416, "learning_rate": 9.234586979576127e-06, "loss": 0.6588, "step": 4524 }, { "epoch": 0.5892229597813354, "grad_norm": 2.4723331928253174, "learning_rate": 9.2334753885641e-06, "loss": 0.6806, "step": 4527 }, { "epoch": 0.5896134322530262, "grad_norm": 2.589547872543335, "learning_rate": 9.232363057972828e-06, "loss": 0.5903, "step": 4530 }, { "epoch": 0.5900039047247169, "grad_norm": 3.4298384189605713, "learning_rate": 9.231249987996632e-06, "loss": 0.6546, "step": 4533 }, { "epoch": 0.5903943771964076, "grad_norm": 2.480419158935547, "learning_rate": 9.230136178829967e-06, "loss": 0.7137, "step": 4536 }, { "epoch": 0.5907848496680984, "grad_norm": 2.6856725215911865, "learning_rate": 9.229021630667407e-06, "loss": 0.6591, "step": 4539 }, { "epoch": 0.5911753221397892, "grad_norm": 4.8847975730896, "learning_rate": 9.227906343703668e-06, "loss": 0.756, "step": 4542 }, { "epoch": 0.5915657946114798, "grad_norm": 2.5259408950805664, "learning_rate": 9.226790318133583e-06, "loss": 0.592, "step": 4545 }, { "epoch": 0.5919562670831706, "grad_norm": 3.381965160369873, "learning_rate": 9.225673554152122e-06, "loss": 0.7512, "step": 4548 }, { "epoch": 0.5923467395548614, "grad_norm": 2.3359241485595703, "learning_rate": 9.224556051954381e-06, "loss": 0.6277, "step": 4551 }, { "epoch": 0.5927372120265522, "grad_norm": 2.8028383255004883, "learning_rate": 9.223437811735583e-06, "loss": 0.7311, "step": 4554 }, { "epoch": 0.5931276844982428, "grad_norm": 2.1525771617889404, "learning_rate": 9.222318833691085e-06, "loss": 0.6426, "step": 4557 }, { "epoch": 0.5935181569699336, "grad_norm": 2.6036198139190674, "learning_rate": 9.22119911801637e-06, "loss": 0.6085, "step": 4560 }, { "epoch": 0.5939086294416244, "grad_norm": 2.456698417663574, "learning_rate": 9.220078664907048e-06, "loss": 0.6745, "step": 4563 }, { "epoch": 0.5942991019133151, "grad_norm": 3.952479600906372, "learning_rate": 9.218957474558862e-06, "loss": 0.6955, "step": 4566 }, { "epoch": 0.5946895743850058, "grad_norm": 3.0322844982147217, "learning_rate": 9.217835547167682e-06, "loss": 0.7331, "step": 4569 }, { "epoch": 0.5950800468566966, "grad_norm": 2.9084672927856445, "learning_rate": 9.216712882929503e-06, "loss": 0.6295, "step": 4572 }, { "epoch": 0.5954705193283873, "grad_norm": 2.524294376373291, "learning_rate": 9.215589482040455e-06, "loss": 0.6615, "step": 4575 }, { "epoch": 0.5958609918000781, "grad_norm": 2.533430576324463, "learning_rate": 9.21446534469679e-06, "loss": 0.6768, "step": 4578 }, { "epoch": 0.5962514642717689, "grad_norm": 2.4197540283203125, "learning_rate": 9.213340471094899e-06, "loss": 0.688, "step": 4581 }, { "epoch": 0.5966419367434596, "grad_norm": 2.344994068145752, "learning_rate": 9.212214861431289e-06, "loss": 0.7125, "step": 4584 }, { "epoch": 0.5970324092151503, "grad_norm": 2.436912775039673, "learning_rate": 9.211088515902604e-06, "loss": 0.6781, "step": 4587 }, { "epoch": 0.5974228816868411, "grad_norm": 2.6664328575134277, "learning_rate": 9.209961434705614e-06, "loss": 0.6564, "step": 4590 }, { "epoch": 0.5978133541585319, "grad_norm": 3.217984437942505, "learning_rate": 9.20883361803722e-06, "loss": 0.6577, "step": 4593 }, { "epoch": 0.5982038266302225, "grad_norm": 2.4378035068511963, "learning_rate": 9.207705066094445e-06, "loss": 0.5963, "step": 4596 }, { "epoch": 0.5985942991019133, "grad_norm": 2.4753284454345703, "learning_rate": 9.206575779074448e-06, "loss": 0.6725, "step": 4599 }, { "epoch": 0.5989847715736041, "grad_norm": 2.7560603618621826, "learning_rate": 9.20544575717451e-06, "loss": 0.6058, "step": 4602 }, { "epoch": 0.5993752440452949, "grad_norm": 2.4613637924194336, "learning_rate": 9.204315000592046e-06, "loss": 0.7212, "step": 4605 }, { "epoch": 0.5997657165169855, "grad_norm": 2.4374942779541016, "learning_rate": 9.203183509524596e-06, "loss": 0.5989, "step": 4608 }, { "epoch": 0.6001561889886763, "grad_norm": 2.783355236053467, "learning_rate": 9.202051284169829e-06, "loss": 0.6998, "step": 4611 }, { "epoch": 0.6005466614603671, "grad_norm": 2.803696870803833, "learning_rate": 9.200918324725543e-06, "loss": 0.6278, "step": 4614 }, { "epoch": 0.6009371339320578, "grad_norm": 3.1450366973876953, "learning_rate": 9.199784631389663e-06, "loss": 0.7003, "step": 4617 }, { "epoch": 0.6013276064037485, "grad_norm": 3.2626290321350098, "learning_rate": 9.198650204360241e-06, "loss": 0.5592, "step": 4620 }, { "epoch": 0.6017180788754393, "grad_norm": 2.694031238555908, "learning_rate": 9.197515043835463e-06, "loss": 0.6437, "step": 4623 }, { "epoch": 0.60210855134713, "grad_norm": 3.2066242694854736, "learning_rate": 9.196379150013638e-06, "loss": 0.6952, "step": 4626 }, { "epoch": 0.6024990238188208, "grad_norm": 3.181091547012329, "learning_rate": 9.195242523093202e-06, "loss": 0.6107, "step": 4629 }, { "epoch": 0.6028894962905115, "grad_norm": 3.754946231842041, "learning_rate": 9.194105163272722e-06, "loss": 0.7689, "step": 4632 }, { "epoch": 0.6032799687622022, "grad_norm": 3.5265393257141113, "learning_rate": 9.192967070750895e-06, "loss": 0.6393, "step": 4635 }, { "epoch": 0.603670441233893, "grad_norm": 3.310450315475464, "learning_rate": 9.191828245726539e-06, "loss": 0.718, "step": 4638 }, { "epoch": 0.6040609137055838, "grad_norm": 2.7861340045928955, "learning_rate": 9.19068868839861e-06, "loss": 0.6306, "step": 4641 }, { "epoch": 0.6044513861772745, "grad_norm": 2.8099071979522705, "learning_rate": 9.189548398966181e-06, "loss": 0.7781, "step": 4644 }, { "epoch": 0.6048418586489652, "grad_norm": 2.284885883331299, "learning_rate": 9.18840737762846e-06, "loss": 0.5758, "step": 4647 }, { "epoch": 0.605232331120656, "grad_norm": 2.39620304107666, "learning_rate": 9.187265624584782e-06, "loss": 0.5665, "step": 4650 }, { "epoch": 0.6056228035923468, "grad_norm": 2.680431604385376, "learning_rate": 9.186123140034607e-06, "loss": 0.7102, "step": 4653 }, { "epoch": 0.6060132760640374, "grad_norm": 2.5489349365234375, "learning_rate": 9.184979924177527e-06, "loss": 0.61, "step": 4656 }, { "epoch": 0.6064037485357282, "grad_norm": 2.9585001468658447, "learning_rate": 9.183835977213257e-06, "loss": 0.6092, "step": 4659 }, { "epoch": 0.606794221007419, "grad_norm": 2.7119574546813965, "learning_rate": 9.182691299341643e-06, "loss": 0.6834, "step": 4662 }, { "epoch": 0.6071846934791097, "grad_norm": 2.6506659984588623, "learning_rate": 9.181545890762661e-06, "loss": 0.6116, "step": 4665 }, { "epoch": 0.6075751659508005, "grad_norm": 2.5166819095611572, "learning_rate": 9.180399751676407e-06, "loss": 0.6865, "step": 4668 }, { "epoch": 0.6079656384224912, "grad_norm": 3.43406343460083, "learning_rate": 9.17925288228311e-06, "loss": 0.6894, "step": 4671 }, { "epoch": 0.608356110894182, "grad_norm": 2.9682881832122803, "learning_rate": 9.178105282783127e-06, "loss": 0.6195, "step": 4674 }, { "epoch": 0.6087465833658727, "grad_norm": 4.136783123016357, "learning_rate": 9.17695695337694e-06, "loss": 0.5971, "step": 4677 }, { "epoch": 0.6091370558375635, "grad_norm": 2.4573512077331543, "learning_rate": 9.175807894265161e-06, "loss": 0.7037, "step": 4680 }, { "epoch": 0.6095275283092542, "grad_norm": 2.2731971740722656, "learning_rate": 9.174658105648526e-06, "loss": 0.6454, "step": 4683 }, { "epoch": 0.6099180007809449, "grad_norm": 3.480694532394409, "learning_rate": 9.173507587727904e-06, "loss": 0.6724, "step": 4686 }, { "epoch": 0.6103084732526357, "grad_norm": 2.483057975769043, "learning_rate": 9.172356340704285e-06, "loss": 0.7026, "step": 4689 }, { "epoch": 0.6106989457243265, "grad_norm": 2.4843204021453857, "learning_rate": 9.171204364778791e-06, "loss": 0.6642, "step": 4692 }, { "epoch": 0.6110894181960171, "grad_norm": 2.6703476905822754, "learning_rate": 9.17005166015267e-06, "loss": 0.5992, "step": 4695 }, { "epoch": 0.6114798906677079, "grad_norm": 2.2671613693237305, "learning_rate": 9.168898227027296e-06, "loss": 0.6149, "step": 4698 }, { "epoch": 0.6118703631393987, "grad_norm": 2.83113169670105, "learning_rate": 9.167744065604171e-06, "loss": 0.7512, "step": 4701 }, { "epoch": 0.6122608356110895, "grad_norm": 2.2886712551116943, "learning_rate": 9.166589176084925e-06, "loss": 0.603, "step": 4704 }, { "epoch": 0.6126513080827801, "grad_norm": 2.4751598834991455, "learning_rate": 9.165433558671318e-06, "loss": 0.6055, "step": 4707 }, { "epoch": 0.6130417805544709, "grad_norm": 2.5859594345092773, "learning_rate": 9.164277213565228e-06, "loss": 0.6932, "step": 4710 }, { "epoch": 0.6134322530261617, "grad_norm": 2.6246237754821777, "learning_rate": 9.163120140968671e-06, "loss": 0.5861, "step": 4713 }, { "epoch": 0.6138227254978524, "grad_norm": 2.5987634658813477, "learning_rate": 9.161962341083784e-06, "loss": 0.729, "step": 4716 }, { "epoch": 0.6142131979695431, "grad_norm": 2.6038060188293457, "learning_rate": 9.160803814112829e-06, "loss": 0.6701, "step": 4719 }, { "epoch": 0.6146036704412339, "grad_norm": 3.065664768218994, "learning_rate": 9.159644560258201e-06, "loss": 0.7233, "step": 4722 }, { "epoch": 0.6149941429129246, "grad_norm": 2.4268383979797363, "learning_rate": 9.15848457972242e-06, "loss": 0.6326, "step": 4725 }, { "epoch": 0.6153846153846154, "grad_norm": 2.269376277923584, "learning_rate": 9.157323872708131e-06, "loss": 0.6076, "step": 4728 }, { "epoch": 0.6157750878563061, "grad_norm": 2.622344970703125, "learning_rate": 9.156162439418108e-06, "loss": 0.6793, "step": 4731 }, { "epoch": 0.6161655603279969, "grad_norm": 3.469761371612549, "learning_rate": 9.15500028005525e-06, "loss": 0.7301, "step": 4734 }, { "epoch": 0.6165560327996876, "grad_norm": 2.781172752380371, "learning_rate": 9.153837394822582e-06, "loss": 0.7908, "step": 4737 }, { "epoch": 0.6169465052713784, "grad_norm": 2.506148099899292, "learning_rate": 9.15267378392326e-06, "loss": 0.7075, "step": 4740 }, { "epoch": 0.6173369777430691, "grad_norm": 3.1186656951904297, "learning_rate": 9.151509447560566e-06, "loss": 0.5936, "step": 4743 }, { "epoch": 0.6177274502147598, "grad_norm": 2.5772218704223633, "learning_rate": 9.150344385937904e-06, "loss": 0.6029, "step": 4746 }, { "epoch": 0.6181179226864506, "grad_norm": 2.423085927963257, "learning_rate": 9.149178599258807e-06, "loss": 0.7071, "step": 4749 }, { "epoch": 0.6185083951581414, "grad_norm": 3.1963913440704346, "learning_rate": 9.14801208772694e-06, "loss": 0.6297, "step": 4752 }, { "epoch": 0.6188988676298322, "grad_norm": 2.7055928707122803, "learning_rate": 9.146844851546087e-06, "loss": 0.7194, "step": 4755 }, { "epoch": 0.6192893401015228, "grad_norm": 2.607105016708374, "learning_rate": 9.145676890920161e-06, "loss": 0.6897, "step": 4758 }, { "epoch": 0.6196798125732136, "grad_norm": 2.645681381225586, "learning_rate": 9.144508206053203e-06, "loss": 0.636, "step": 4761 }, { "epoch": 0.6200702850449044, "grad_norm": 2.521937131881714, "learning_rate": 9.143338797149381e-06, "loss": 0.6608, "step": 4764 }, { "epoch": 0.6204607575165951, "grad_norm": 2.7200021743774414, "learning_rate": 9.142168664412987e-06, "loss": 0.6228, "step": 4767 }, { "epoch": 0.6208512299882858, "grad_norm": 3.465608835220337, "learning_rate": 9.140997808048442e-06, "loss": 0.6801, "step": 4770 }, { "epoch": 0.6212417024599766, "grad_norm": 2.4828827381134033, "learning_rate": 9.139826228260292e-06, "loss": 0.6134, "step": 4773 }, { "epoch": 0.6216321749316673, "grad_norm": 2.365074634552002, "learning_rate": 9.138653925253207e-06, "loss": 0.5044, "step": 4776 }, { "epoch": 0.6220226474033581, "grad_norm": 2.630094051361084, "learning_rate": 9.137480899231987e-06, "loss": 0.5772, "step": 4779 }, { "epoch": 0.6224131198750488, "grad_norm": 2.5148115158081055, "learning_rate": 9.13630715040156e-06, "loss": 0.6968, "step": 4782 }, { "epoch": 0.6228035923467395, "grad_norm": 2.7790770530700684, "learning_rate": 9.135132678966975e-06, "loss": 0.5712, "step": 4785 }, { "epoch": 0.6231940648184303, "grad_norm": 2.780951738357544, "learning_rate": 9.13395748513341e-06, "loss": 0.6046, "step": 4788 }, { "epoch": 0.6235845372901211, "grad_norm": 2.728588819503784, "learning_rate": 9.132781569106168e-06, "loss": 0.734, "step": 4791 }, { "epoch": 0.6239750097618117, "grad_norm": 2.4009926319122314, "learning_rate": 9.131604931090681e-06, "loss": 0.62, "step": 4794 }, { "epoch": 0.6243654822335025, "grad_norm": 3.470907211303711, "learning_rate": 9.130427571292503e-06, "loss": 0.7862, "step": 4797 }, { "epoch": 0.6247559547051933, "grad_norm": 3.43316912651062, "learning_rate": 9.129249489917317e-06, "loss": 0.651, "step": 4800 }, { "epoch": 0.6251464271768841, "grad_norm": 2.8522374629974365, "learning_rate": 9.128070687170933e-06, "loss": 0.5899, "step": 4803 }, { "epoch": 0.6255368996485747, "grad_norm": 2.6155104637145996, "learning_rate": 9.126891163259286e-06, "loss": 0.6208, "step": 4806 }, { "epoch": 0.6259273721202655, "grad_norm": 2.6993188858032227, "learning_rate": 9.125710918388431e-06, "loss": 0.6091, "step": 4809 }, { "epoch": 0.6263178445919563, "grad_norm": 3.6405208110809326, "learning_rate": 9.12452995276456e-06, "loss": 0.6169, "step": 4812 }, { "epoch": 0.626708317063647, "grad_norm": 2.4731392860412598, "learning_rate": 9.123348266593983e-06, "loss": 0.6637, "step": 4815 }, { "epoch": 0.6270987895353377, "grad_norm": 2.377974271774292, "learning_rate": 9.122165860083136e-06, "loss": 0.6365, "step": 4818 }, { "epoch": 0.6274892620070285, "grad_norm": 2.286585569381714, "learning_rate": 9.120982733438587e-06, "loss": 0.6843, "step": 4821 }, { "epoch": 0.6278797344787193, "grad_norm": 2.7373149394989014, "learning_rate": 9.119798886867025e-06, "loss": 0.6896, "step": 4824 }, { "epoch": 0.62827020695041, "grad_norm": 2.6947765350341797, "learning_rate": 9.118614320575261e-06, "loss": 0.6867, "step": 4827 }, { "epoch": 0.6286606794221007, "grad_norm": 2.763946771621704, "learning_rate": 9.117429034770241e-06, "loss": 0.6705, "step": 4830 }, { "epoch": 0.6290511518937915, "grad_norm": 2.622236728668213, "learning_rate": 9.116243029659032e-06, "loss": 0.7195, "step": 4833 }, { "epoch": 0.6294416243654822, "grad_norm": 2.6941754817962646, "learning_rate": 9.115056305448822e-06, "loss": 0.7228, "step": 4836 }, { "epoch": 0.629832096837173, "grad_norm": 3.360145092010498, "learning_rate": 9.113868862346934e-06, "loss": 0.679, "step": 4839 }, { "epoch": 0.6302225693088638, "grad_norm": 2.4699013233184814, "learning_rate": 9.11268070056081e-06, "loss": 0.6504, "step": 4842 }, { "epoch": 0.6306130417805544, "grad_norm": 2.265643835067749, "learning_rate": 9.111491820298019e-06, "loss": 0.6572, "step": 4845 }, { "epoch": 0.6310035142522452, "grad_norm": 2.609482765197754, "learning_rate": 9.110302221766257e-06, "loss": 0.643, "step": 4848 }, { "epoch": 0.631393986723936, "grad_norm": 2.8599019050598145, "learning_rate": 9.109111905173342e-06, "loss": 0.7884, "step": 4851 }, { "epoch": 0.6317844591956268, "grad_norm": 2.703747272491455, "learning_rate": 9.107920870727223e-06, "loss": 0.6817, "step": 4854 }, { "epoch": 0.6321749316673174, "grad_norm": 2.2183678150177, "learning_rate": 9.106729118635968e-06, "loss": 0.6101, "step": 4857 }, { "epoch": 0.6325654041390082, "grad_norm": 2.5604469776153564, "learning_rate": 9.105536649107778e-06, "loss": 0.7131, "step": 4860 }, { "epoch": 0.632955876610699, "grad_norm": 2.3223118782043457, "learning_rate": 9.10434346235097e-06, "loss": 0.6237, "step": 4863 }, { "epoch": 0.6333463490823897, "grad_norm": 2.4612014293670654, "learning_rate": 9.103149558573995e-06, "loss": 0.695, "step": 4866 }, { "epoch": 0.6337368215540804, "grad_norm": 3.208245277404785, "learning_rate": 9.101954937985422e-06, "loss": 0.6685, "step": 4869 }, { "epoch": 0.6341272940257712, "grad_norm": 3.3025028705596924, "learning_rate": 9.100759600793951e-06, "loss": 0.6657, "step": 4872 }, { "epoch": 0.6345177664974619, "grad_norm": 2.8037331104278564, "learning_rate": 9.099563547208406e-06, "loss": 0.573, "step": 4875 }, { "epoch": 0.6349082389691527, "grad_norm": 2.62872052192688, "learning_rate": 9.098366777437733e-06, "loss": 0.7397, "step": 4878 }, { "epoch": 0.6352987114408434, "grad_norm": 2.564282178878784, "learning_rate": 9.097169291691007e-06, "loss": 0.626, "step": 4881 }, { "epoch": 0.6356891839125342, "grad_norm": 2.9302821159362793, "learning_rate": 9.095971090177421e-06, "loss": 0.7665, "step": 4884 }, { "epoch": 0.6360796563842249, "grad_norm": 2.7549326419830322, "learning_rate": 9.094772173106305e-06, "loss": 0.7412, "step": 4887 }, { "epoch": 0.6364701288559157, "grad_norm": 2.602814197540283, "learning_rate": 9.093572540687104e-06, "loss": 0.6038, "step": 4890 }, { "epoch": 0.6368606013276064, "grad_norm": 2.4310333728790283, "learning_rate": 9.09237219312939e-06, "loss": 0.6858, "step": 4893 }, { "epoch": 0.6372510737992971, "grad_norm": 2.8889222145080566, "learning_rate": 9.091171130642866e-06, "loss": 0.6245, "step": 4896 }, { "epoch": 0.6376415462709879, "grad_norm": 2.956254005432129, "learning_rate": 9.089969353437351e-06, "loss": 0.7212, "step": 4899 }, { "epoch": 0.6380320187426787, "grad_norm": 2.366194248199463, "learning_rate": 9.088766861722793e-06, "loss": 0.6385, "step": 4902 }, { "epoch": 0.6384224912143693, "grad_norm": 2.7603938579559326, "learning_rate": 9.087563655709266e-06, "loss": 0.7277, "step": 4905 }, { "epoch": 0.6388129636860601, "grad_norm": 3.687945604324341, "learning_rate": 9.086359735606969e-06, "loss": 0.6555, "step": 4908 }, { "epoch": 0.6392034361577509, "grad_norm": 3.259951591491699, "learning_rate": 9.085155101626221e-06, "loss": 0.7351, "step": 4911 }, { "epoch": 0.6395939086294417, "grad_norm": 2.4649972915649414, "learning_rate": 9.083949753977471e-06, "loss": 0.5857, "step": 4914 }, { "epoch": 0.6399843811011323, "grad_norm": 2.663567304611206, "learning_rate": 9.08274369287129e-06, "loss": 0.648, "step": 4917 }, { "epoch": 0.6403748535728231, "grad_norm": 2.2157647609710693, "learning_rate": 9.081536918518377e-06, "loss": 0.6431, "step": 4920 }, { "epoch": 0.6407653260445139, "grad_norm": 2.5896902084350586, "learning_rate": 9.080329431129548e-06, "loss": 0.6592, "step": 4923 }, { "epoch": 0.6411557985162046, "grad_norm": 3.887195348739624, "learning_rate": 9.07912123091575e-06, "loss": 0.7259, "step": 4926 }, { "epoch": 0.6415462709878954, "grad_norm": 2.6314144134521484, "learning_rate": 9.077912318088057e-06, "loss": 0.5699, "step": 4929 }, { "epoch": 0.6419367434595861, "grad_norm": 2.7441248893737793, "learning_rate": 9.076702692857661e-06, "loss": 0.6653, "step": 4932 }, { "epoch": 0.6423272159312768, "grad_norm": 3.4282479286193848, "learning_rate": 9.075492355435878e-06, "loss": 0.634, "step": 4935 }, { "epoch": 0.6427176884029676, "grad_norm": 2.2707557678222656, "learning_rate": 9.074281306034156e-06, "loss": 0.6404, "step": 4938 }, { "epoch": 0.6431081608746584, "grad_norm": 2.613741397857666, "learning_rate": 9.073069544864062e-06, "loss": 0.7175, "step": 4941 }, { "epoch": 0.643498633346349, "grad_norm": 3.953160285949707, "learning_rate": 9.071857072137284e-06, "loss": 0.6577, "step": 4944 }, { "epoch": 0.6438891058180398, "grad_norm": 2.4851486682891846, "learning_rate": 9.070643888065642e-06, "loss": 0.6288, "step": 4947 }, { "epoch": 0.6442795782897306, "grad_norm": 2.4990899562835693, "learning_rate": 9.069429992861078e-06, "loss": 0.7342, "step": 4950 }, { "epoch": 0.6446700507614214, "grad_norm": 2.3601980209350586, "learning_rate": 9.068215386735655e-06, "loss": 0.568, "step": 4953 }, { "epoch": 0.645060523233112, "grad_norm": 2.9806814193725586, "learning_rate": 9.06700006990156e-06, "loss": 0.6322, "step": 4956 }, { "epoch": 0.6454509957048028, "grad_norm": 3.1821587085723877, "learning_rate": 9.06578404257111e-06, "loss": 0.7119, "step": 4959 }, { "epoch": 0.6458414681764936, "grad_norm": 2.242640972137451, "learning_rate": 9.064567304956741e-06, "loss": 0.7047, "step": 4962 }, { "epoch": 0.6462319406481843, "grad_norm": 2.693079710006714, "learning_rate": 9.063349857271015e-06, "loss": 0.6865, "step": 4965 }, { "epoch": 0.646622413119875, "grad_norm": 3.0986106395721436, "learning_rate": 9.062131699726615e-06, "loss": 0.6241, "step": 4968 }, { "epoch": 0.6470128855915658, "grad_norm": 2.6668357849121094, "learning_rate": 9.060912832536354e-06, "loss": 0.7457, "step": 4971 }, { "epoch": 0.6474033580632566, "grad_norm": 2.3594837188720703, "learning_rate": 9.059693255913165e-06, "loss": 0.6704, "step": 4974 }, { "epoch": 0.6477938305349473, "grad_norm": 3.7347023487091064, "learning_rate": 9.058472970070102e-06, "loss": 0.6353, "step": 4977 }, { "epoch": 0.648184303006638, "grad_norm": 2.6183836460113525, "learning_rate": 9.05725197522035e-06, "loss": 0.6586, "step": 4980 }, { "epoch": 0.6485747754783288, "grad_norm": 2.691641330718994, "learning_rate": 9.056030271577213e-06, "loss": 0.6442, "step": 4983 }, { "epoch": 0.6489652479500195, "grad_norm": 2.6118125915527344, "learning_rate": 9.054807859354122e-06, "loss": 0.6896, "step": 4986 }, { "epoch": 0.6493557204217103, "grad_norm": 2.557551383972168, "learning_rate": 9.053584738764625e-06, "loss": 0.7078, "step": 4989 }, { "epoch": 0.649746192893401, "grad_norm": 2.4774162769317627, "learning_rate": 9.052360910022404e-06, "loss": 0.7401, "step": 4992 }, { "epoch": 0.6501366653650917, "grad_norm": 2.230543851852417, "learning_rate": 9.051136373341256e-06, "loss": 0.6496, "step": 4995 }, { "epoch": 0.6505271378367825, "grad_norm": 3.3752076625823975, "learning_rate": 9.049911128935104e-06, "loss": 0.5981, "step": 4998 }, { "epoch": 0.6509176103084733, "grad_norm": 2.407163619995117, "learning_rate": 9.048685177018001e-06, "loss": 0.6235, "step": 5001 }, { "epoch": 0.6513080827801639, "grad_norm": 2.324744939804077, "learning_rate": 9.047458517804113e-06, "loss": 0.6085, "step": 5004 }, { "epoch": 0.6516985552518547, "grad_norm": 2.3298988342285156, "learning_rate": 9.046231151507738e-06, "loss": 0.5624, "step": 5007 }, { "epoch": 0.6520890277235455, "grad_norm": 3.9053573608398438, "learning_rate": 9.04500307834329e-06, "loss": 0.687, "step": 5010 }, { "epoch": 0.6524795001952363, "grad_norm": 2.682884931564331, "learning_rate": 9.043774298525316e-06, "loss": 0.6969, "step": 5013 }, { "epoch": 0.652869972666927, "grad_norm": 2.7601308822631836, "learning_rate": 9.04254481226848e-06, "loss": 0.6236, "step": 5016 }, { "epoch": 0.6532604451386177, "grad_norm": 2.39310622215271, "learning_rate": 9.041314619787566e-06, "loss": 0.6217, "step": 5019 }, { "epoch": 0.6536509176103085, "grad_norm": 2.353529453277588, "learning_rate": 9.040083721297493e-06, "loss": 0.6112, "step": 5022 }, { "epoch": 0.6540413900819992, "grad_norm": 2.530123472213745, "learning_rate": 9.03885211701329e-06, "loss": 0.6053, "step": 5025 }, { "epoch": 0.65443186255369, "grad_norm": 2.888646125793457, "learning_rate": 9.037619807150121e-06, "loss": 0.6883, "step": 5028 }, { "epoch": 0.6548223350253807, "grad_norm": 2.598736524581909, "learning_rate": 9.036386791923265e-06, "loss": 0.6944, "step": 5031 }, { "epoch": 0.6552128074970714, "grad_norm": 2.7701828479766846, "learning_rate": 9.035153071548127e-06, "loss": 0.6272, "step": 5034 }, { "epoch": 0.6556032799687622, "grad_norm": 2.3290834426879883, "learning_rate": 9.033918646240236e-06, "loss": 0.6815, "step": 5037 }, { "epoch": 0.655993752440453, "grad_norm": 2.8352549076080322, "learning_rate": 9.032683516215245e-06, "loss": 0.662, "step": 5040 }, { "epoch": 0.6563842249121437, "grad_norm": 4.3119659423828125, "learning_rate": 9.031447681688926e-06, "loss": 0.6342, "step": 5043 }, { "epoch": 0.6567746973838344, "grad_norm": 2.828580379486084, "learning_rate": 9.03021114287718e-06, "loss": 0.7723, "step": 5046 }, { "epoch": 0.6571651698555252, "grad_norm": 2.502751350402832, "learning_rate": 9.028973899996022e-06, "loss": 0.6425, "step": 5049 }, { "epoch": 0.657555642327216, "grad_norm": 2.7101762294769287, "learning_rate": 9.027735953261603e-06, "loss": 0.6139, "step": 5052 }, { "epoch": 0.6579461147989066, "grad_norm": 2.368980646133423, "learning_rate": 9.026497302890184e-06, "loss": 0.6776, "step": 5055 }, { "epoch": 0.6583365872705974, "grad_norm": 2.5557427406311035, "learning_rate": 9.025257949098158e-06, "loss": 0.6162, "step": 5058 }, { "epoch": 0.6587270597422882, "grad_norm": 2.6219143867492676, "learning_rate": 9.024017892102036e-06, "loss": 0.6914, "step": 5061 }, { "epoch": 0.659117532213979, "grad_norm": 6.491698741912842, "learning_rate": 9.022777132118452e-06, "loss": 0.7507, "step": 5064 }, { "epoch": 0.6595080046856696, "grad_norm": 2.6775612831115723, "learning_rate": 9.021535669364167e-06, "loss": 0.7287, "step": 5067 }, { "epoch": 0.6598984771573604, "grad_norm": 3.4174671173095703, "learning_rate": 9.020293504056061e-06, "loss": 0.7409, "step": 5070 }, { "epoch": 0.6602889496290512, "grad_norm": 2.259105682373047, "learning_rate": 9.019050636411135e-06, "loss": 0.5771, "step": 5073 }, { "epoch": 0.6606794221007419, "grad_norm": 2.384159803390503, "learning_rate": 9.01780706664652e-06, "loss": 0.6882, "step": 5076 }, { "epoch": 0.6610698945724326, "grad_norm": 2.471353054046631, "learning_rate": 9.016562794979463e-06, "loss": 0.6606, "step": 5079 }, { "epoch": 0.6614603670441234, "grad_norm": 2.462852716445923, "learning_rate": 9.015317821627332e-06, "loss": 0.6092, "step": 5082 }, { "epoch": 0.6618508395158141, "grad_norm": 2.5678861141204834, "learning_rate": 9.01407214680763e-06, "loss": 0.7437, "step": 5085 }, { "epoch": 0.6622413119875049, "grad_norm": 2.7121429443359375, "learning_rate": 9.012825770737963e-06, "loss": 0.6702, "step": 5088 }, { "epoch": 0.6626317844591956, "grad_norm": 2.5541470050811768, "learning_rate": 9.011578693636078e-06, "loss": 0.6909, "step": 5091 }, { "epoch": 0.6630222569308863, "grad_norm": 3.0044548511505127, "learning_rate": 9.010330915719834e-06, "loss": 0.6704, "step": 5094 }, { "epoch": 0.6634127294025771, "grad_norm": 2.6041769981384277, "learning_rate": 9.009082437207215e-06, "loss": 0.6867, "step": 5097 }, { "epoch": 0.6638032018742679, "grad_norm": 2.428577423095703, "learning_rate": 9.00783325831633e-06, "loss": 0.6128, "step": 5100 }, { "epoch": 0.6641936743459587, "grad_norm": 2.460480213165283, "learning_rate": 9.006583379265405e-06, "loss": 0.6392, "step": 5103 }, { "epoch": 0.6645841468176493, "grad_norm": 2.7471256256103516, "learning_rate": 9.00533280027279e-06, "loss": 0.6698, "step": 5106 }, { "epoch": 0.6649746192893401, "grad_norm": 2.6716361045837402, "learning_rate": 9.004081521556965e-06, "loss": 0.7285, "step": 5109 }, { "epoch": 0.6653650917610309, "grad_norm": 2.468386173248291, "learning_rate": 9.002829543336518e-06, "loss": 0.6738, "step": 5112 }, { "epoch": 0.6657555642327216, "grad_norm": 2.630680561065674, "learning_rate": 9.001576865830173e-06, "loss": 0.7629, "step": 5115 }, { "epoch": 0.6661460367044123, "grad_norm": 2.4180047512054443, "learning_rate": 9.000323489256766e-06, "loss": 0.6747, "step": 5118 }, { "epoch": 0.6665365091761031, "grad_norm": 4.522125720977783, "learning_rate": 8.999069413835262e-06, "loss": 0.6331, "step": 5121 }, { "epoch": 0.6669269816477938, "grad_norm": 2.297905445098877, "learning_rate": 8.997814639784743e-06, "loss": 0.5958, "step": 5124 }, { "epoch": 0.6673174541194846, "grad_norm": 2.3950231075286865, "learning_rate": 8.996559167324417e-06, "loss": 0.6209, "step": 5127 }, { "epoch": 0.6677079265911753, "grad_norm": 2.4264347553253174, "learning_rate": 8.995302996673613e-06, "loss": 0.6492, "step": 5130 }, { "epoch": 0.668098399062866, "grad_norm": 3.127281427383423, "learning_rate": 8.99404612805178e-06, "loss": 0.6959, "step": 5133 }, { "epoch": 0.6684888715345568, "grad_norm": 2.8744657039642334, "learning_rate": 8.99278856167849e-06, "loss": 0.6416, "step": 5136 }, { "epoch": 0.6688793440062476, "grad_norm": 2.994250535964966, "learning_rate": 8.991530297773437e-06, "loss": 0.6651, "step": 5139 }, { "epoch": 0.6692698164779383, "grad_norm": 2.4786083698272705, "learning_rate": 8.99027133655644e-06, "loss": 0.6271, "step": 5142 }, { "epoch": 0.669660288949629, "grad_norm": 2.418321371078491, "learning_rate": 8.989011678247433e-06, "loss": 0.6906, "step": 5145 }, { "epoch": 0.6700507614213198, "grad_norm": 2.5830347537994385, "learning_rate": 8.987751323066479e-06, "loss": 0.7363, "step": 5148 }, { "epoch": 0.6704412338930106, "grad_norm": 2.7174127101898193, "learning_rate": 8.986490271233757e-06, "loss": 0.7439, "step": 5151 }, { "epoch": 0.6708317063647012, "grad_norm": 2.870361804962158, "learning_rate": 8.985228522969571e-06, "loss": 0.6398, "step": 5154 }, { "epoch": 0.671222178836392, "grad_norm": 2.797898292541504, "learning_rate": 8.983966078494346e-06, "loss": 0.6451, "step": 5157 }, { "epoch": 0.6716126513080828, "grad_norm": 2.4643352031707764, "learning_rate": 8.98270293802863e-06, "loss": 0.6963, "step": 5160 }, { "epoch": 0.6720031237797736, "grad_norm": 2.715022325515747, "learning_rate": 8.981439101793086e-06, "loss": 0.6041, "step": 5163 }, { "epoch": 0.6723935962514642, "grad_norm": 2.511801242828369, "learning_rate": 8.980174570008506e-06, "loss": 0.6198, "step": 5166 }, { "epoch": 0.672784068723155, "grad_norm": 2.53893780708313, "learning_rate": 8.978909342895806e-06, "loss": 0.6393, "step": 5169 }, { "epoch": 0.6731745411948458, "grad_norm": 2.6536126136779785, "learning_rate": 8.977643420676009e-06, "loss": 0.7114, "step": 5172 }, { "epoch": 0.6735650136665365, "grad_norm": 2.3529465198516846, "learning_rate": 8.976376803570278e-06, "loss": 0.6438, "step": 5175 }, { "epoch": 0.6739554861382272, "grad_norm": 2.5893778800964355, "learning_rate": 8.975109491799883e-06, "loss": 0.661, "step": 5178 }, { "epoch": 0.674345958609918, "grad_norm": 2.3905746936798096, "learning_rate": 8.973841485586224e-06, "loss": 0.5583, "step": 5181 }, { "epoch": 0.6747364310816087, "grad_norm": 2.6573519706726074, "learning_rate": 8.972572785150815e-06, "loss": 0.6492, "step": 5184 }, { "epoch": 0.6751269035532995, "grad_norm": 2.810039520263672, "learning_rate": 8.971303390715299e-06, "loss": 0.6346, "step": 5187 }, { "epoch": 0.6755173760249903, "grad_norm": 3.9205002784729004, "learning_rate": 8.970033302501433e-06, "loss": 0.7605, "step": 5190 }, { "epoch": 0.675907848496681, "grad_norm": 2.3390591144561768, "learning_rate": 8.968762520731103e-06, "loss": 0.6231, "step": 5193 }, { "epoch": 0.6762983209683717, "grad_norm": 2.8558874130249023, "learning_rate": 8.96749104562631e-06, "loss": 0.6479, "step": 5196 }, { "epoch": 0.6766887934400625, "grad_norm": 2.8038036823272705, "learning_rate": 8.966218877409173e-06, "loss": 0.7772, "step": 5199 }, { "epoch": 0.6770792659117533, "grad_norm": 2.364079475402832, "learning_rate": 8.964946016301946e-06, "loss": 0.5847, "step": 5202 }, { "epoch": 0.6774697383834439, "grad_norm": 2.699650764465332, "learning_rate": 8.963672462526991e-06, "loss": 0.667, "step": 5205 }, { "epoch": 0.6778602108551347, "grad_norm": 2.889913320541382, "learning_rate": 8.962398216306794e-06, "loss": 0.5679, "step": 5208 }, { "epoch": 0.6782506833268255, "grad_norm": 3.220335006713867, "learning_rate": 8.961123277863965e-06, "loss": 0.7115, "step": 5211 }, { "epoch": 0.6786411557985162, "grad_norm": 2.591503143310547, "learning_rate": 8.959847647421231e-06, "loss": 0.6948, "step": 5214 }, { "epoch": 0.6790316282702069, "grad_norm": 2.682509183883667, "learning_rate": 8.958571325201446e-06, "loss": 0.5903, "step": 5217 }, { "epoch": 0.6794221007418977, "grad_norm": 3.3060030937194824, "learning_rate": 8.957294311427575e-06, "loss": 0.6707, "step": 5220 }, { "epoch": 0.6798125732135885, "grad_norm": 2.3943710327148438, "learning_rate": 8.956016606322715e-06, "loss": 0.6522, "step": 5223 }, { "epoch": 0.6802030456852792, "grad_norm": 2.6341614723205566, "learning_rate": 8.954738210110075e-06, "loss": 0.5324, "step": 5226 }, { "epoch": 0.6805935181569699, "grad_norm": 3.0761325359344482, "learning_rate": 8.953459123012988e-06, "loss": 0.6379, "step": 5229 }, { "epoch": 0.6809839906286607, "grad_norm": 2.637105703353882, "learning_rate": 8.952179345254912e-06, "loss": 0.7148, "step": 5232 }, { "epoch": 0.6813744631003514, "grad_norm": 2.6795005798339844, "learning_rate": 8.950898877059417e-06, "loss": 0.6578, "step": 5235 }, { "epoch": 0.6817649355720422, "grad_norm": 2.5985288619995117, "learning_rate": 8.9496177186502e-06, "loss": 0.6964, "step": 5238 }, { "epoch": 0.6821554080437329, "grad_norm": 3.457732677459717, "learning_rate": 8.948335870251075e-06, "loss": 0.7052, "step": 5241 }, { "epoch": 0.6825458805154236, "grad_norm": 2.462507724761963, "learning_rate": 8.947053332085981e-06, "loss": 0.6498, "step": 5244 }, { "epoch": 0.6829363529871144, "grad_norm": 2.4393460750579834, "learning_rate": 8.945770104378973e-06, "loss": 0.6244, "step": 5247 }, { "epoch": 0.6833268254588052, "grad_norm": 2.7880349159240723, "learning_rate": 8.944486187354229e-06, "loss": 0.7032, "step": 5250 }, { "epoch": 0.6837172979304958, "grad_norm": 2.685492753982544, "learning_rate": 8.943201581236045e-06, "loss": 0.6221, "step": 5253 }, { "epoch": 0.6841077704021866, "grad_norm": 3.0528624057769775, "learning_rate": 8.94191628624884e-06, "loss": 0.6866, "step": 5256 }, { "epoch": 0.6844982428738774, "grad_norm": 2.485928773880005, "learning_rate": 8.940630302617153e-06, "loss": 0.6147, "step": 5259 }, { "epoch": 0.6848887153455682, "grad_norm": 2.2878990173339844, "learning_rate": 8.939343630565643e-06, "loss": 0.5663, "step": 5262 }, { "epoch": 0.6852791878172588, "grad_norm": 3.4641735553741455, "learning_rate": 8.938056270319086e-06, "loss": 0.6713, "step": 5265 }, { "epoch": 0.6856696602889496, "grad_norm": 2.376103162765503, "learning_rate": 8.936768222102382e-06, "loss": 0.6678, "step": 5268 }, { "epoch": 0.6860601327606404, "grad_norm": 2.2345285415649414, "learning_rate": 8.935479486140556e-06, "loss": 0.5991, "step": 5271 }, { "epoch": 0.6864506052323311, "grad_norm": 2.701521873474121, "learning_rate": 8.934190062658738e-06, "loss": 0.687, "step": 5274 }, { "epoch": 0.6868410777040219, "grad_norm": 2.6253838539123535, "learning_rate": 8.932899951882195e-06, "loss": 0.6385, "step": 5277 }, { "epoch": 0.6872315501757126, "grad_norm": 2.780701160430908, "learning_rate": 8.931609154036303e-06, "loss": 0.6819, "step": 5280 }, { "epoch": 0.6876220226474034, "grad_norm": 2.612517833709717, "learning_rate": 8.930317669346565e-06, "loss": 0.5871, "step": 5283 }, { "epoch": 0.6880124951190941, "grad_norm": 2.6523735523223877, "learning_rate": 8.929025498038595e-06, "loss": 0.6296, "step": 5286 }, { "epoch": 0.6884029675907849, "grad_norm": 3.8099377155303955, "learning_rate": 8.927732640338138e-06, "loss": 0.7596, "step": 5289 }, { "epoch": 0.6887934400624756, "grad_norm": 2.856313705444336, "learning_rate": 8.92643909647105e-06, "loss": 0.7508, "step": 5292 }, { "epoch": 0.6891839125341663, "grad_norm": 3.0642218589782715, "learning_rate": 8.925144866663313e-06, "loss": 0.6367, "step": 5295 }, { "epoch": 0.6895743850058571, "grad_norm": 2.504683017730713, "learning_rate": 8.923849951141025e-06, "loss": 0.5706, "step": 5298 }, { "epoch": 0.6899648574775479, "grad_norm": 2.745274543762207, "learning_rate": 8.922554350130404e-06, "loss": 0.6845, "step": 5301 }, { "epoch": 0.6903553299492385, "grad_norm": 2.6270129680633545, "learning_rate": 8.921258063857792e-06, "loss": 0.7274, "step": 5304 }, { "epoch": 0.6907458024209293, "grad_norm": 2.979092597961426, "learning_rate": 8.919961092549643e-06, "loss": 0.6765, "step": 5307 }, { "epoch": 0.6911362748926201, "grad_norm": 3.506718873977661, "learning_rate": 8.91866343643254e-06, "loss": 0.6857, "step": 5310 }, { "epoch": 0.6915267473643109, "grad_norm": 3.513763189315796, "learning_rate": 8.917365095733176e-06, "loss": 0.6291, "step": 5313 }, { "epoch": 0.6919172198360015, "grad_norm": 2.3708810806274414, "learning_rate": 8.91606607067837e-06, "loss": 0.5938, "step": 5316 }, { "epoch": 0.6923076923076923, "grad_norm": 2.5921242237091064, "learning_rate": 8.914766361495063e-06, "loss": 0.6986, "step": 5319 }, { "epoch": 0.6926981647793831, "grad_norm": 2.9242255687713623, "learning_rate": 8.913465968410307e-06, "loss": 0.7117, "step": 5322 }, { "epoch": 0.6930886372510738, "grad_norm": 2.42401123046875, "learning_rate": 8.912164891651277e-06, "loss": 0.6327, "step": 5325 }, { "epoch": 0.6934791097227645, "grad_norm": 2.31184458732605, "learning_rate": 8.910863131445273e-06, "loss": 0.6479, "step": 5328 }, { "epoch": 0.6938695821944553, "grad_norm": 2.514890193939209, "learning_rate": 8.909560688019705e-06, "loss": 0.7545, "step": 5331 }, { "epoch": 0.694260054666146, "grad_norm": 2.294757127761841, "learning_rate": 8.908257561602112e-06, "loss": 0.6594, "step": 5334 }, { "epoch": 0.6946505271378368, "grad_norm": 2.3671836853027344, "learning_rate": 8.906953752420142e-06, "loss": 0.6085, "step": 5337 }, { "epoch": 0.6950409996095275, "grad_norm": 2.5829970836639404, "learning_rate": 8.905649260701571e-06, "loss": 0.7096, "step": 5340 }, { "epoch": 0.6954314720812182, "grad_norm": 2.5177972316741943, "learning_rate": 8.904344086674292e-06, "loss": 0.6784, "step": 5343 }, { "epoch": 0.695821944552909, "grad_norm": 2.783193349838257, "learning_rate": 8.903038230566314e-06, "loss": 0.7778, "step": 5346 }, { "epoch": 0.6962124170245998, "grad_norm": 2.4683210849761963, "learning_rate": 8.901731692605767e-06, "loss": 0.6198, "step": 5349 }, { "epoch": 0.6966028894962905, "grad_norm": 2.2853734493255615, "learning_rate": 8.900424473020904e-06, "loss": 0.6472, "step": 5352 }, { "epoch": 0.6969933619679812, "grad_norm": 3.5069949626922607, "learning_rate": 8.899116572040087e-06, "loss": 0.6187, "step": 5355 }, { "epoch": 0.697383834439672, "grad_norm": 2.6812729835510254, "learning_rate": 8.897807989891809e-06, "loss": 0.6091, "step": 5358 }, { "epoch": 0.6977743069113628, "grad_norm": 2.5712661743164062, "learning_rate": 8.896498726804677e-06, "loss": 0.6686, "step": 5361 }, { "epoch": 0.6981647793830535, "grad_norm": 3.0275492668151855, "learning_rate": 8.895188783007412e-06, "loss": 0.6555, "step": 5364 }, { "epoch": 0.6985552518547442, "grad_norm": 2.5897321701049805, "learning_rate": 8.893878158728861e-06, "loss": 0.6643, "step": 5367 }, { "epoch": 0.698945724326435, "grad_norm": 2.7661631107330322, "learning_rate": 8.892566854197988e-06, "loss": 0.6861, "step": 5370 }, { "epoch": 0.6993361967981258, "grad_norm": 2.3814573287963867, "learning_rate": 8.891254869643873e-06, "loss": 0.6581, "step": 5373 }, { "epoch": 0.6997266692698165, "grad_norm": 2.452768564224243, "learning_rate": 8.88994220529572e-06, "loss": 0.6112, "step": 5376 }, { "epoch": 0.7001171417415072, "grad_norm": 2.68501615524292, "learning_rate": 8.888628861382846e-06, "loss": 0.604, "step": 5379 }, { "epoch": 0.700507614213198, "grad_norm": 2.675839424133301, "learning_rate": 8.88731483813469e-06, "loss": 0.7262, "step": 5382 }, { "epoch": 0.7008980866848887, "grad_norm": 4.175928592681885, "learning_rate": 8.88600013578081e-06, "loss": 0.6624, "step": 5385 }, { "epoch": 0.7012885591565795, "grad_norm": 2.8697991371154785, "learning_rate": 8.884684754550882e-06, "loss": 0.6547, "step": 5388 }, { "epoch": 0.7016790316282702, "grad_norm": 4.71966552734375, "learning_rate": 8.8833686946747e-06, "loss": 0.591, "step": 5391 }, { "epoch": 0.7020695040999609, "grad_norm": 2.5646259784698486, "learning_rate": 8.882051956382175e-06, "loss": 0.7158, "step": 5394 }, { "epoch": 0.7024599765716517, "grad_norm": 2.482074737548828, "learning_rate": 8.88073453990334e-06, "loss": 0.6647, "step": 5397 }, { "epoch": 0.7028504490433425, "grad_norm": 2.8584282398223877, "learning_rate": 8.879416445468344e-06, "loss": 0.5627, "step": 5400 }, { "epoch": 0.7032409215150331, "grad_norm": 2.51839280128479, "learning_rate": 8.878097673307458e-06, "loss": 0.7744, "step": 5403 }, { "epoch": 0.7036313939867239, "grad_norm": 2.4203269481658936, "learning_rate": 8.876778223651067e-06, "loss": 0.5973, "step": 5406 }, { "epoch": 0.7040218664584147, "grad_norm": 2.3395378589630127, "learning_rate": 8.875458096729674e-06, "loss": 0.5872, "step": 5409 }, { "epoch": 0.7044123389301055, "grad_norm": 2.6769402027130127, "learning_rate": 8.874137292773906e-06, "loss": 0.6168, "step": 5412 }, { "epoch": 0.7048028114017961, "grad_norm": 2.5752854347229004, "learning_rate": 8.872815812014501e-06, "loss": 0.7544, "step": 5415 }, { "epoch": 0.7051932838734869, "grad_norm": 2.6731069087982178, "learning_rate": 8.871493654682321e-06, "loss": 0.6455, "step": 5418 }, { "epoch": 0.7055837563451777, "grad_norm": 2.6608500480651855, "learning_rate": 8.870170821008346e-06, "loss": 0.6383, "step": 5421 }, { "epoch": 0.7059742288168684, "grad_norm": 3.0593783855438232, "learning_rate": 8.86884731122367e-06, "loss": 0.6723, "step": 5424 }, { "epoch": 0.7063647012885591, "grad_norm": 2.809638738632202, "learning_rate": 8.867523125559504e-06, "loss": 0.6422, "step": 5427 }, { "epoch": 0.7067551737602499, "grad_norm": 2.5025877952575684, "learning_rate": 8.866198264247187e-06, "loss": 0.687, "step": 5430 }, { "epoch": 0.7071456462319406, "grad_norm": 3.1199793815612793, "learning_rate": 8.864872727518168e-06, "loss": 0.5947, "step": 5433 }, { "epoch": 0.7075361187036314, "grad_norm": 2.5214881896972656, "learning_rate": 8.863546515604012e-06, "loss": 0.6269, "step": 5436 }, { "epoch": 0.7079265911753222, "grad_norm": 2.481205940246582, "learning_rate": 8.862219628736409e-06, "loss": 0.7641, "step": 5439 }, { "epoch": 0.7083170636470129, "grad_norm": 2.585766553878784, "learning_rate": 8.86089206714716e-06, "loss": 0.698, "step": 5442 }, { "epoch": 0.7087075361187036, "grad_norm": 3.276780843734741, "learning_rate": 8.859563831068188e-06, "loss": 0.6059, "step": 5445 }, { "epoch": 0.7090980085903944, "grad_norm": 2.3243212699890137, "learning_rate": 8.858234920731536e-06, "loss": 0.6083, "step": 5448 }, { "epoch": 0.7094884810620852, "grad_norm": 2.7607672214508057, "learning_rate": 8.856905336369359e-06, "loss": 0.7022, "step": 5451 }, { "epoch": 0.7098789535337758, "grad_norm": 2.5045323371887207, "learning_rate": 8.855575078213933e-06, "loss": 0.5898, "step": 5454 }, { "epoch": 0.7102694260054666, "grad_norm": 2.378309488296509, "learning_rate": 8.854244146497654e-06, "loss": 0.6493, "step": 5457 }, { "epoch": 0.7106598984771574, "grad_norm": 3.019075870513916, "learning_rate": 8.852912541453029e-06, "loss": 0.5836, "step": 5460 }, { "epoch": 0.7110503709488482, "grad_norm": 2.7453722953796387, "learning_rate": 8.85158026331269e-06, "loss": 0.59, "step": 5463 }, { "epoch": 0.7114408434205388, "grad_norm": 2.3514020442962646, "learning_rate": 8.85024731230938e-06, "loss": 0.5568, "step": 5466 }, { "epoch": 0.7118313158922296, "grad_norm": 2.4078609943389893, "learning_rate": 8.848913688675963e-06, "loss": 0.694, "step": 5469 }, { "epoch": 0.7122217883639204, "grad_norm": 2.3529157638549805, "learning_rate": 8.847579392645425e-06, "loss": 0.6763, "step": 5472 }, { "epoch": 0.7126122608356111, "grad_norm": 2.9866087436676025, "learning_rate": 8.846244424450858e-06, "loss": 0.6423, "step": 5475 }, { "epoch": 0.7130027333073018, "grad_norm": 2.784130334854126, "learning_rate": 8.844908784325483e-06, "loss": 0.6326, "step": 5478 }, { "epoch": 0.7133932057789926, "grad_norm": 2.18365740776062, "learning_rate": 8.84357247250263e-06, "loss": 0.5958, "step": 5481 }, { "epoch": 0.7137836782506833, "grad_norm": 2.3948042392730713, "learning_rate": 8.842235489215755e-06, "loss": 0.7437, "step": 5484 }, { "epoch": 0.7141741507223741, "grad_norm": 2.9066920280456543, "learning_rate": 8.84089783469842e-06, "loss": 0.6481, "step": 5487 }, { "epoch": 0.7145646231940648, "grad_norm": 2.770456075668335, "learning_rate": 8.839559509184317e-06, "loss": 0.6289, "step": 5490 }, { "epoch": 0.7149550956657555, "grad_norm": 2.8254899978637695, "learning_rate": 8.838220512907241e-06, "loss": 0.6341, "step": 5493 }, { "epoch": 0.7153455681374463, "grad_norm": 2.9626049995422363, "learning_rate": 8.836880846101118e-06, "loss": 0.6394, "step": 5496 }, { "epoch": 0.7157360406091371, "grad_norm": 2.709442377090454, "learning_rate": 8.835540508999982e-06, "loss": 0.7887, "step": 5499 }, { "epoch": 0.7161265130808278, "grad_norm": 2.8093841075897217, "learning_rate": 8.834199501837988e-06, "loss": 0.5902, "step": 5502 }, { "epoch": 0.7165169855525185, "grad_norm": 2.7774314880371094, "learning_rate": 8.832857824849407e-06, "loss": 0.6338, "step": 5505 }, { "epoch": 0.7169074580242093, "grad_norm": 4.112159729003906, "learning_rate": 8.831515478268627e-06, "loss": 0.6495, "step": 5508 }, { "epoch": 0.7172979304959001, "grad_norm": 3.2125117778778076, "learning_rate": 8.830172462330155e-06, "loss": 0.639, "step": 5511 }, { "epoch": 0.7176884029675907, "grad_norm": 2.521543264389038, "learning_rate": 8.828828777268609e-06, "loss": 0.6834, "step": 5514 }, { "epoch": 0.7180788754392815, "grad_norm": 2.344866991043091, "learning_rate": 8.827484423318731e-06, "loss": 0.7033, "step": 5517 }, { "epoch": 0.7184693479109723, "grad_norm": 3.047065496444702, "learning_rate": 8.826139400715377e-06, "loss": 0.6674, "step": 5520 }, { "epoch": 0.718859820382663, "grad_norm": 2.916560411453247, "learning_rate": 8.824793709693517e-06, "loss": 0.6385, "step": 5523 }, { "epoch": 0.7192502928543538, "grad_norm": 2.3283605575561523, "learning_rate": 8.823447350488243e-06, "loss": 0.6017, "step": 5526 }, { "epoch": 0.7196407653260445, "grad_norm": 3.2105703353881836, "learning_rate": 8.822100323334761e-06, "loss": 0.635, "step": 5529 }, { "epoch": 0.7200312377977353, "grad_norm": 2.4558160305023193, "learning_rate": 8.820752628468391e-06, "loss": 0.6822, "step": 5532 }, { "epoch": 0.720421710269426, "grad_norm": 2.4689717292785645, "learning_rate": 8.819404266124575e-06, "loss": 0.7866, "step": 5535 }, { "epoch": 0.7208121827411168, "grad_norm": 2.5155982971191406, "learning_rate": 8.818055236538872e-06, "loss": 0.6903, "step": 5538 }, { "epoch": 0.7212026552128075, "grad_norm": 3.5505385398864746, "learning_rate": 8.816705539946948e-06, "loss": 0.5982, "step": 5541 }, { "epoch": 0.7215931276844982, "grad_norm": 2.4933745861053467, "learning_rate": 8.815355176584595e-06, "loss": 0.6077, "step": 5544 }, { "epoch": 0.721983600156189, "grad_norm": 3.3649566173553467, "learning_rate": 8.81400414668772e-06, "loss": 0.7257, "step": 5547 }, { "epoch": 0.7223740726278798, "grad_norm": 2.446016550064087, "learning_rate": 8.812652450492345e-06, "loss": 0.5711, "step": 5550 }, { "epoch": 0.7227645450995704, "grad_norm": 2.715980052947998, "learning_rate": 8.811300088234607e-06, "loss": 0.6866, "step": 5553 }, { "epoch": 0.7231550175712612, "grad_norm": 2.724425792694092, "learning_rate": 8.80994706015076e-06, "loss": 0.6597, "step": 5556 }, { "epoch": 0.723545490042952, "grad_norm": 3.384270429611206, "learning_rate": 8.808593366477177e-06, "loss": 0.6568, "step": 5559 }, { "epoch": 0.7239359625146428, "grad_norm": 2.415548086166382, "learning_rate": 8.807239007450345e-06, "loss": 0.6081, "step": 5562 }, { "epoch": 0.7243264349863334, "grad_norm": 2.3857581615448, "learning_rate": 8.805883983306869e-06, "loss": 0.7436, "step": 5565 }, { "epoch": 0.7247169074580242, "grad_norm": 2.494887351989746, "learning_rate": 8.804528294283466e-06, "loss": 0.6451, "step": 5568 }, { "epoch": 0.725107379929715, "grad_norm": 2.5483858585357666, "learning_rate": 8.803171940616974e-06, "loss": 0.6226, "step": 5571 }, { "epoch": 0.7254978524014057, "grad_norm": 2.5574679374694824, "learning_rate": 8.801814922544345e-06, "loss": 0.6289, "step": 5574 }, { "epoch": 0.7258883248730964, "grad_norm": 2.4144797325134277, "learning_rate": 8.800457240302646e-06, "loss": 0.6128, "step": 5577 }, { "epoch": 0.7262787973447872, "grad_norm": 2.4427857398986816, "learning_rate": 8.799098894129063e-06, "loss": 0.6263, "step": 5580 }, { "epoch": 0.726669269816478, "grad_norm": 2.785493850708008, "learning_rate": 8.797739884260896e-06, "loss": 0.7186, "step": 5583 }, { "epoch": 0.7270597422881687, "grad_norm": 2.493211030960083, "learning_rate": 8.79638021093556e-06, "loss": 0.6333, "step": 5586 }, { "epoch": 0.7274502147598594, "grad_norm": 2.7701425552368164, "learning_rate": 8.795019874390587e-06, "loss": 0.7146, "step": 5589 }, { "epoch": 0.7278406872315502, "grad_norm": 2.20003604888916, "learning_rate": 8.793658874863626e-06, "loss": 0.5846, "step": 5592 }, { "epoch": 0.7282311597032409, "grad_norm": 3.9232680797576904, "learning_rate": 8.79229721259244e-06, "loss": 0.6657, "step": 5595 }, { "epoch": 0.7286216321749317, "grad_norm": 2.5265204906463623, "learning_rate": 8.790934887814908e-06, "loss": 0.6148, "step": 5598 }, { "epoch": 0.7290121046466224, "grad_norm": 2.7512199878692627, "learning_rate": 8.789571900769028e-06, "loss": 0.6684, "step": 5601 }, { "epoch": 0.7294025771183131, "grad_norm": 2.3781023025512695, "learning_rate": 8.788208251692908e-06, "loss": 0.7028, "step": 5604 }, { "epoch": 0.7297930495900039, "grad_norm": 2.4492669105529785, "learning_rate": 8.786843940824775e-06, "loss": 0.5972, "step": 5607 }, { "epoch": 0.7301835220616947, "grad_norm": 2.4138023853302, "learning_rate": 8.785478968402972e-06, "loss": 0.5792, "step": 5610 }, { "epoch": 0.7305739945333855, "grad_norm": 3.772378921508789, "learning_rate": 8.784113334665958e-06, "loss": 0.7113, "step": 5613 }, { "epoch": 0.7309644670050761, "grad_norm": 2.438852071762085, "learning_rate": 8.782747039852304e-06, "loss": 0.6087, "step": 5616 }, { "epoch": 0.7313549394767669, "grad_norm": 2.447324514389038, "learning_rate": 8.7813800842007e-06, "loss": 0.6334, "step": 5619 }, { "epoch": 0.7317454119484577, "grad_norm": 2.2790205478668213, "learning_rate": 8.78001246794995e-06, "loss": 0.6197, "step": 5622 }, { "epoch": 0.7321358844201484, "grad_norm": 2.459559917449951, "learning_rate": 8.778644191338974e-06, "loss": 0.6726, "step": 5625 }, { "epoch": 0.7325263568918391, "grad_norm": 2.6263234615325928, "learning_rate": 8.777275254606808e-06, "loss": 0.6843, "step": 5628 }, { "epoch": 0.7329168293635299, "grad_norm": 2.590893507003784, "learning_rate": 8.775905657992599e-06, "loss": 0.7584, "step": 5631 }, { "epoch": 0.7333073018352206, "grad_norm": 2.9429869651794434, "learning_rate": 8.774535401735616e-06, "loss": 0.687, "step": 5634 }, { "epoch": 0.7336977743069114, "grad_norm": 2.8409743309020996, "learning_rate": 8.773164486075238e-06, "loss": 0.7379, "step": 5637 }, { "epoch": 0.7340882467786021, "grad_norm": 2.462329864501953, "learning_rate": 8.771792911250963e-06, "loss": 0.6646, "step": 5640 }, { "epoch": 0.7344787192502928, "grad_norm": 2.554640769958496, "learning_rate": 8.770420677502401e-06, "loss": 0.7051, "step": 5643 }, { "epoch": 0.7348691917219836, "grad_norm": 2.4346702098846436, "learning_rate": 8.769047785069277e-06, "loss": 0.6723, "step": 5646 }, { "epoch": 0.7352596641936744, "grad_norm": 2.7797698974609375, "learning_rate": 8.767674234191436e-06, "loss": 0.6458, "step": 5649 }, { "epoch": 0.735650136665365, "grad_norm": 2.8324296474456787, "learning_rate": 8.76630002510883e-06, "loss": 0.7206, "step": 5652 }, { "epoch": 0.7360406091370558, "grad_norm": 2.280325412750244, "learning_rate": 8.764925158061537e-06, "loss": 0.6426, "step": 5655 }, { "epoch": 0.7364310816087466, "grad_norm": 3.168109655380249, "learning_rate": 8.763549633289737e-06, "loss": 0.6371, "step": 5658 }, { "epoch": 0.7368215540804374, "grad_norm": 3.2661867141723633, "learning_rate": 8.762173451033731e-06, "loss": 0.6164, "step": 5661 }, { "epoch": 0.737212026552128, "grad_norm": 4.669281959533691, "learning_rate": 8.760796611533939e-06, "loss": 0.6704, "step": 5664 }, { "epoch": 0.7376024990238188, "grad_norm": 2.702472686767578, "learning_rate": 8.75941911503089e-06, "loss": 0.6053, "step": 5667 }, { "epoch": 0.7379929714955096, "grad_norm": 2.3921306133270264, "learning_rate": 8.758040961765233e-06, "loss": 0.6229, "step": 5670 }, { "epoch": 0.7383834439672003, "grad_norm": 2.605787515640259, "learning_rate": 8.756662151977724e-06, "loss": 0.7242, "step": 5673 }, { "epoch": 0.738773916438891, "grad_norm": 2.5881075859069824, "learning_rate": 8.755282685909239e-06, "loss": 0.6878, "step": 5676 }, { "epoch": 0.7391643889105818, "grad_norm": 2.3970558643341064, "learning_rate": 8.753902563800769e-06, "loss": 0.6808, "step": 5679 }, { "epoch": 0.7395548613822726, "grad_norm": 2.6305997371673584, "learning_rate": 8.752521785893418e-06, "loss": 0.6692, "step": 5682 }, { "epoch": 0.7399453338539633, "grad_norm": 3.6381564140319824, "learning_rate": 8.751140352428406e-06, "loss": 0.6898, "step": 5685 }, { "epoch": 0.740335806325654, "grad_norm": 2.4845845699310303, "learning_rate": 8.749758263647066e-06, "loss": 0.6876, "step": 5688 }, { "epoch": 0.7407262787973448, "grad_norm": 2.307579517364502, "learning_rate": 8.748375519790846e-06, "loss": 0.5223, "step": 5691 }, { "epoch": 0.7411167512690355, "grad_norm": 2.4577105045318604, "learning_rate": 8.746992121101306e-06, "loss": 0.6736, "step": 5694 }, { "epoch": 0.7415072237407263, "grad_norm": 2.539513111114502, "learning_rate": 8.745608067820127e-06, "loss": 0.6554, "step": 5697 }, { "epoch": 0.7418976962124171, "grad_norm": 2.9040579795837402, "learning_rate": 8.744223360189097e-06, "loss": 0.6961, "step": 5700 }, { "epoch": 0.7422881686841077, "grad_norm": 3.254425525665283, "learning_rate": 8.742837998450122e-06, "loss": 0.6879, "step": 5703 }, { "epoch": 0.7426786411557985, "grad_norm": 3.117102861404419, "learning_rate": 8.741451982845224e-06, "loss": 0.6168, "step": 5706 }, { "epoch": 0.7430691136274893, "grad_norm": 2.4279301166534424, "learning_rate": 8.740065313616536e-06, "loss": 0.6354, "step": 5709 }, { "epoch": 0.7434595860991801, "grad_norm": 2.2769625186920166, "learning_rate": 8.738677991006304e-06, "loss": 0.6411, "step": 5712 }, { "epoch": 0.7438500585708707, "grad_norm": 2.597215175628662, "learning_rate": 8.737290015256892e-06, "loss": 0.6494, "step": 5715 }, { "epoch": 0.7442405310425615, "grad_norm": 3.5001959800720215, "learning_rate": 8.735901386610777e-06, "loss": 0.6644, "step": 5718 }, { "epoch": 0.7446310035142523, "grad_norm": 2.5007054805755615, "learning_rate": 8.73451210531055e-06, "loss": 0.643, "step": 5721 }, { "epoch": 0.745021475985943, "grad_norm": 2.652787923812866, "learning_rate": 8.733122171598914e-06, "loss": 0.7299, "step": 5724 }, { "epoch": 0.7454119484576337, "grad_norm": 2.6559813022613525, "learning_rate": 8.731731585718687e-06, "loss": 0.6918, "step": 5727 }, { "epoch": 0.7458024209293245, "grad_norm": 2.646437644958496, "learning_rate": 8.730340347912803e-06, "loss": 0.6691, "step": 5730 }, { "epoch": 0.7461928934010152, "grad_norm": 2.6184146404266357, "learning_rate": 8.728948458424307e-06, "loss": 0.6599, "step": 5733 }, { "epoch": 0.746583365872706, "grad_norm": 2.406266927719116, "learning_rate": 8.72755591749636e-06, "loss": 0.6671, "step": 5736 }, { "epoch": 0.7469738383443967, "grad_norm": 3.493818759918213, "learning_rate": 8.726162725372237e-06, "loss": 0.6482, "step": 5739 }, { "epoch": 0.7473643108160875, "grad_norm": 2.1511340141296387, "learning_rate": 8.724768882295324e-06, "loss": 0.6, "step": 5742 }, { "epoch": 0.7477547832877782, "grad_norm": 2.556877851486206, "learning_rate": 8.723374388509123e-06, "loss": 0.6613, "step": 5745 }, { "epoch": 0.748145255759469, "grad_norm": 2.497506856918335, "learning_rate": 8.721979244257247e-06, "loss": 0.6198, "step": 5748 }, { "epoch": 0.7485357282311597, "grad_norm": 2.378761053085327, "learning_rate": 8.72058344978343e-06, "loss": 0.5909, "step": 5751 }, { "epoch": 0.7489262007028504, "grad_norm": 3.043630838394165, "learning_rate": 8.71918700533151e-06, "loss": 0.6554, "step": 5754 }, { "epoch": 0.7493166731745412, "grad_norm": 2.5283005237579346, "learning_rate": 8.717789911145445e-06, "loss": 0.6827, "step": 5757 }, { "epoch": 0.749707145646232, "grad_norm": 2.631117105484009, "learning_rate": 8.716392167469303e-06, "loss": 0.6304, "step": 5760 }, { "epoch": 0.7500976181179226, "grad_norm": 2.4751923084259033, "learning_rate": 8.714993774547267e-06, "loss": 0.7159, "step": 5763 }, { "epoch": 0.7504880905896134, "grad_norm": 3.3558480739593506, "learning_rate": 8.713594732623635e-06, "loss": 0.7029, "step": 5766 }, { "epoch": 0.7508785630613042, "grad_norm": 2.266209125518799, "learning_rate": 8.712195041942814e-06, "loss": 0.7196, "step": 5769 }, { "epoch": 0.751269035532995, "grad_norm": 2.9685750007629395, "learning_rate": 8.71079470274933e-06, "loss": 0.6283, "step": 5772 }, { "epoch": 0.7516595080046856, "grad_norm": 2.5255727767944336, "learning_rate": 8.709393715287817e-06, "loss": 0.5652, "step": 5775 }, { "epoch": 0.7520499804763764, "grad_norm": 2.282809257507324, "learning_rate": 8.707992079803025e-06, "loss": 0.5898, "step": 5778 }, { "epoch": 0.7524404529480672, "grad_norm": 2.4005398750305176, "learning_rate": 8.706589796539818e-06, "loss": 0.7173, "step": 5781 }, { "epoch": 0.7528309254197579, "grad_norm": 3.0054335594177246, "learning_rate": 8.70518686574317e-06, "loss": 0.6631, "step": 5784 }, { "epoch": 0.7532213978914487, "grad_norm": 2.390732526779175, "learning_rate": 8.703783287658172e-06, "loss": 0.7253, "step": 5787 }, { "epoch": 0.7536118703631394, "grad_norm": 2.417182445526123, "learning_rate": 8.702379062530026e-06, "loss": 0.6376, "step": 5790 }, { "epoch": 0.7540023428348301, "grad_norm": 2.577763795852661, "learning_rate": 8.700974190604045e-06, "loss": 0.621, "step": 5793 }, { "epoch": 0.7543928153065209, "grad_norm": 2.8259828090667725, "learning_rate": 8.69956867212566e-06, "loss": 0.624, "step": 5796 }, { "epoch": 0.7547832877782117, "grad_norm": 3.2486746311187744, "learning_rate": 8.698162507340408e-06, "loss": 0.6333, "step": 5799 }, { "epoch": 0.7551737602499023, "grad_norm": 2.8928191661834717, "learning_rate": 8.696755696493949e-06, "loss": 0.6203, "step": 5802 }, { "epoch": 0.7555642327215931, "grad_norm": 3.078284502029419, "learning_rate": 8.695348239832045e-06, "loss": 0.6175, "step": 5805 }, { "epoch": 0.7559547051932839, "grad_norm": 2.5001373291015625, "learning_rate": 8.693940137600578e-06, "loss": 0.639, "step": 5808 }, { "epoch": 0.7563451776649747, "grad_norm": 2.3842241764068604, "learning_rate": 8.692531390045538e-06, "loss": 0.6531, "step": 5811 }, { "epoch": 0.7567356501366653, "grad_norm": 2.515578269958496, "learning_rate": 8.691121997413035e-06, "loss": 0.6603, "step": 5814 }, { "epoch": 0.7571261226083561, "grad_norm": 2.7876977920532227, "learning_rate": 8.689711959949282e-06, "loss": 0.6454, "step": 5817 }, { "epoch": 0.7575165950800469, "grad_norm": 3.4954264163970947, "learning_rate": 8.688301277900613e-06, "loss": 0.7285, "step": 5820 }, { "epoch": 0.7579070675517376, "grad_norm": 2.9758989810943604, "learning_rate": 8.686889951513468e-06, "loss": 0.6519, "step": 5823 }, { "epoch": 0.7582975400234283, "grad_norm": 2.79794979095459, "learning_rate": 8.685477981034407e-06, "loss": 0.6698, "step": 5826 }, { "epoch": 0.7586880124951191, "grad_norm": 2.7669429779052734, "learning_rate": 8.684065366710093e-06, "loss": 0.7602, "step": 5829 }, { "epoch": 0.7590784849668099, "grad_norm": 2.231877326965332, "learning_rate": 8.682652108787312e-06, "loss": 0.6112, "step": 5832 }, { "epoch": 0.7594689574385006, "grad_norm": 2.530808210372925, "learning_rate": 8.681238207512955e-06, "loss": 0.5664, "step": 5835 }, { "epoch": 0.7598594299101913, "grad_norm": 3.670186758041382, "learning_rate": 8.679823663134025e-06, "loss": 0.5716, "step": 5838 }, { "epoch": 0.7602499023818821, "grad_norm": 2.7073922157287598, "learning_rate": 8.678408475897643e-06, "loss": 0.556, "step": 5841 }, { "epoch": 0.7606403748535728, "grad_norm": 2.874410390853882, "learning_rate": 8.67699264605104e-06, "loss": 0.6784, "step": 5844 }, { "epoch": 0.7610308473252636, "grad_norm": 2.438572883605957, "learning_rate": 8.675576173841555e-06, "loss": 0.7022, "step": 5847 }, { "epoch": 0.7614213197969543, "grad_norm": 2.563101053237915, "learning_rate": 8.674159059516645e-06, "loss": 0.7107, "step": 5850 }, { "epoch": 0.761811792268645, "grad_norm": 2.388939380645752, "learning_rate": 8.672741303323877e-06, "loss": 0.6116, "step": 5853 }, { "epoch": 0.7622022647403358, "grad_norm": 2.337643623352051, "learning_rate": 8.671322905510931e-06, "loss": 0.6408, "step": 5856 }, { "epoch": 0.7625927372120266, "grad_norm": 2.49255633354187, "learning_rate": 8.669903866325594e-06, "loss": 0.6909, "step": 5859 }, { "epoch": 0.7629832096837172, "grad_norm": 2.3588013648986816, "learning_rate": 8.668484186015775e-06, "loss": 0.6309, "step": 5862 }, { "epoch": 0.763373682155408, "grad_norm": 2.3805925846099854, "learning_rate": 8.667063864829483e-06, "loss": 0.6424, "step": 5865 }, { "epoch": 0.7637641546270988, "grad_norm": 3.148681879043579, "learning_rate": 8.665642903014851e-06, "loss": 0.6926, "step": 5868 }, { "epoch": 0.7641546270987896, "grad_norm": 2.75685977935791, "learning_rate": 8.664221300820114e-06, "loss": 0.6597, "step": 5871 }, { "epoch": 0.7645450995704803, "grad_norm": 2.3916969299316406, "learning_rate": 8.662799058493625e-06, "loss": 0.5482, "step": 5874 }, { "epoch": 0.764935572042171, "grad_norm": 2.5081396102905273, "learning_rate": 8.661376176283844e-06, "loss": 0.6723, "step": 5877 }, { "epoch": 0.7653260445138618, "grad_norm": 2.1820735931396484, "learning_rate": 8.659952654439348e-06, "loss": 0.6866, "step": 5880 }, { "epoch": 0.7657165169855525, "grad_norm": 2.611650228500366, "learning_rate": 8.658528493208825e-06, "loss": 0.7078, "step": 5883 }, { "epoch": 0.7661069894572433, "grad_norm": 2.291048765182495, "learning_rate": 8.657103692841067e-06, "loss": 0.6261, "step": 5886 }, { "epoch": 0.766497461928934, "grad_norm": 2.417051076889038, "learning_rate": 8.655678253584989e-06, "loss": 0.6115, "step": 5889 }, { "epoch": 0.7668879344006247, "grad_norm": 2.867833137512207, "learning_rate": 8.65425217568961e-06, "loss": 0.7631, "step": 5892 }, { "epoch": 0.7672784068723155, "grad_norm": 2.566713571548462, "learning_rate": 8.652825459404065e-06, "loss": 0.5086, "step": 5895 }, { "epoch": 0.7676688793440063, "grad_norm": 2.6429190635681152, "learning_rate": 8.651398104977595e-06, "loss": 0.5853, "step": 5898 }, { "epoch": 0.768059351815697, "grad_norm": 2.9396679401397705, "learning_rate": 8.649970112659558e-06, "loss": 0.5673, "step": 5901 }, { "epoch": 0.7684498242873877, "grad_norm": 2.601750612258911, "learning_rate": 8.648541482699422e-06, "loss": 0.7371, "step": 5904 }, { "epoch": 0.7688402967590785, "grad_norm": 2.7551345825195312, "learning_rate": 8.647112215346763e-06, "loss": 0.6192, "step": 5907 }, { "epoch": 0.7692307692307693, "grad_norm": 2.5565147399902344, "learning_rate": 8.645682310851273e-06, "loss": 0.6002, "step": 5910 }, { "epoch": 0.7696212417024599, "grad_norm": 3.5521817207336426, "learning_rate": 8.644251769462751e-06, "loss": 0.6178, "step": 5913 }, { "epoch": 0.7700117141741507, "grad_norm": 2.3843579292297363, "learning_rate": 8.642820591431114e-06, "loss": 0.5701, "step": 5916 }, { "epoch": 0.7704021866458415, "grad_norm": 2.349097490310669, "learning_rate": 8.641388777006381e-06, "loss": 0.6391, "step": 5919 }, { "epoch": 0.7707926591175323, "grad_norm": 2.2657618522644043, "learning_rate": 8.639956326438688e-06, "loss": 0.6661, "step": 5922 }, { "epoch": 0.7711831315892229, "grad_norm": 3.2351880073547363, "learning_rate": 8.638523239978285e-06, "loss": 0.6367, "step": 5925 }, { "epoch": 0.7715736040609137, "grad_norm": 2.358475923538208, "learning_rate": 8.637089517875524e-06, "loss": 0.6163, "step": 5928 }, { "epoch": 0.7719640765326045, "grad_norm": 2.8347368240356445, "learning_rate": 8.635655160380879e-06, "loss": 0.7084, "step": 5931 }, { "epoch": 0.7723545490042952, "grad_norm": 2.7897112369537354, "learning_rate": 8.634220167744922e-06, "loss": 0.612, "step": 5934 }, { "epoch": 0.7727450214759859, "grad_norm": 2.506141424179077, "learning_rate": 8.632784540218348e-06, "loss": 0.6386, "step": 5937 }, { "epoch": 0.7731354939476767, "grad_norm": 2.703279495239258, "learning_rate": 8.631348278051956e-06, "loss": 0.6336, "step": 5940 }, { "epoch": 0.7735259664193674, "grad_norm": 2.3759536743164062, "learning_rate": 8.62991138149666e-06, "loss": 0.6074, "step": 5943 }, { "epoch": 0.7739164388910582, "grad_norm": 3.115846872329712, "learning_rate": 8.628473850803484e-06, "loss": 0.6421, "step": 5946 }, { "epoch": 0.7743069113627489, "grad_norm": 3.065495729446411, "learning_rate": 8.627035686223557e-06, "loss": 0.6935, "step": 5949 }, { "epoch": 0.7746973838344396, "grad_norm": 2.664783477783203, "learning_rate": 8.625596888008127e-06, "loss": 0.6864, "step": 5952 }, { "epoch": 0.7750878563061304, "grad_norm": 2.6792876720428467, "learning_rate": 8.624157456408547e-06, "loss": 0.7312, "step": 5955 }, { "epoch": 0.7754783287778212, "grad_norm": 2.283553123474121, "learning_rate": 8.622717391676284e-06, "loss": 0.6556, "step": 5958 }, { "epoch": 0.775868801249512, "grad_norm": 3.2782957553863525, "learning_rate": 8.621276694062915e-06, "loss": 0.6916, "step": 5961 }, { "epoch": 0.7762592737212026, "grad_norm": 2.4025683403015137, "learning_rate": 8.619835363820123e-06, "loss": 0.8082, "step": 5964 }, { "epoch": 0.7766497461928934, "grad_norm": 2.7000598907470703, "learning_rate": 8.61839340119971e-06, "loss": 0.6322, "step": 5967 }, { "epoch": 0.7770402186645842, "grad_norm": 3.0906264781951904, "learning_rate": 8.61695080645358e-06, "loss": 0.6954, "step": 5970 }, { "epoch": 0.7774306911362749, "grad_norm": 2.45308256149292, "learning_rate": 8.615507579833754e-06, "loss": 0.7327, "step": 5973 }, { "epoch": 0.7778211636079656, "grad_norm": 2.349186420440674, "learning_rate": 8.61406372159236e-06, "loss": 0.6414, "step": 5976 }, { "epoch": 0.7782116360796564, "grad_norm": 2.472166061401367, "learning_rate": 8.612619231981636e-06, "loss": 0.6639, "step": 5979 }, { "epoch": 0.7786021085513471, "grad_norm": 2.506831645965576, "learning_rate": 8.611174111253932e-06, "loss": 0.5965, "step": 5982 }, { "epoch": 0.7789925810230379, "grad_norm": 3.1906239986419678, "learning_rate": 8.609728359661709e-06, "loss": 0.6799, "step": 5985 }, { "epoch": 0.7793830534947286, "grad_norm": 3.2461135387420654, "learning_rate": 8.608281977457534e-06, "loss": 0.7414, "step": 5988 }, { "epoch": 0.7797735259664194, "grad_norm": 2.6268630027770996, "learning_rate": 8.606834964894089e-06, "loss": 0.655, "step": 5991 }, { "epoch": 0.7801639984381101, "grad_norm": 2.3125433921813965, "learning_rate": 8.605387322224162e-06, "loss": 0.6857, "step": 5994 }, { "epoch": 0.7805544709098009, "grad_norm": 3.1327126026153564, "learning_rate": 8.603939049700655e-06, "loss": 0.7118, "step": 5997 }, { "epoch": 0.7809449433814916, "grad_norm": 3.7016682624816895, "learning_rate": 8.602490147576579e-06, "loss": 0.7118, "step": 6000 }, { "epoch": 0.7813354158531823, "grad_norm": 2.4639270305633545, "learning_rate": 8.601040616105053e-06, "loss": 0.7133, "step": 6003 }, { "epoch": 0.7817258883248731, "grad_norm": 2.384366989135742, "learning_rate": 8.599590455539304e-06, "loss": 0.6218, "step": 6006 }, { "epoch": 0.7821163607965639, "grad_norm": 3.026371717453003, "learning_rate": 8.598139666132676e-06, "loss": 0.6333, "step": 6009 }, { "epoch": 0.7825068332682545, "grad_norm": 2.3918018341064453, "learning_rate": 8.596688248138618e-06, "loss": 0.5672, "step": 6012 }, { "epoch": 0.7828973057399453, "grad_norm": 2.7614986896514893, "learning_rate": 8.59523620181069e-06, "loss": 0.68, "step": 6015 }, { "epoch": 0.7832877782116361, "grad_norm": 2.838771343231201, "learning_rate": 8.59378352740256e-06, "loss": 0.7742, "step": 6018 }, { "epoch": 0.7836782506833269, "grad_norm": 2.317967176437378, "learning_rate": 8.592330225168008e-06, "loss": 0.6576, "step": 6021 }, { "epoch": 0.7840687231550175, "grad_norm": 2.4622199535369873, "learning_rate": 8.590876295360924e-06, "loss": 0.706, "step": 6024 }, { "epoch": 0.7844591956267083, "grad_norm": 2.2674942016601562, "learning_rate": 8.589421738235304e-06, "loss": 0.5825, "step": 6027 }, { "epoch": 0.7848496680983991, "grad_norm": 2.866095781326294, "learning_rate": 8.587966554045259e-06, "loss": 0.6909, "step": 6030 }, { "epoch": 0.7852401405700898, "grad_norm": 2.193502902984619, "learning_rate": 8.586510743045002e-06, "loss": 0.7236, "step": 6033 }, { "epoch": 0.7856306130417805, "grad_norm": 2.5779576301574707, "learning_rate": 8.585054305488866e-06, "loss": 0.6158, "step": 6036 }, { "epoch": 0.7860210855134713, "grad_norm": 2.6559667587280273, "learning_rate": 8.583597241631283e-06, "loss": 0.5794, "step": 6039 }, { "epoch": 0.786411557985162, "grad_norm": 2.5779333114624023, "learning_rate": 8.582139551726801e-06, "loss": 0.6935, "step": 6042 }, { "epoch": 0.7868020304568528, "grad_norm": 2.5629148483276367, "learning_rate": 8.580681236030075e-06, "loss": 0.615, "step": 6045 }, { "epoch": 0.7871925029285436, "grad_norm": 2.830688238143921, "learning_rate": 8.579222294795871e-06, "loss": 0.6147, "step": 6048 }, { "epoch": 0.7875829754002343, "grad_norm": 2.249699115753174, "learning_rate": 8.57776272827906e-06, "loss": 0.6282, "step": 6051 }, { "epoch": 0.787973447871925, "grad_norm": 2.603372097015381, "learning_rate": 8.576302536734628e-06, "loss": 0.8085, "step": 6054 }, { "epoch": 0.7883639203436158, "grad_norm": 2.5415706634521484, "learning_rate": 8.574841720417666e-06, "loss": 0.733, "step": 6057 }, { "epoch": 0.7887543928153066, "grad_norm": 2.7278621196746826, "learning_rate": 8.573380279583374e-06, "loss": 0.6182, "step": 6060 }, { "epoch": 0.7891448652869972, "grad_norm": 2.673105001449585, "learning_rate": 8.571918214487068e-06, "loss": 0.702, "step": 6063 }, { "epoch": 0.789535337758688, "grad_norm": 2.781696081161499, "learning_rate": 8.570455525384161e-06, "loss": 0.6388, "step": 6066 }, { "epoch": 0.7899258102303788, "grad_norm": 2.392241954803467, "learning_rate": 8.568992212530187e-06, "loss": 0.5784, "step": 6069 }, { "epoch": 0.7903162827020696, "grad_norm": 3.162004232406616, "learning_rate": 8.56752827618078e-06, "loss": 0.7068, "step": 6072 }, { "epoch": 0.7907067551737602, "grad_norm": 2.509340763092041, "learning_rate": 8.566063716591689e-06, "loss": 0.6832, "step": 6075 }, { "epoch": 0.791097227645451, "grad_norm": 3.036722421646118, "learning_rate": 8.56459853401877e-06, "loss": 0.7436, "step": 6078 }, { "epoch": 0.7914877001171418, "grad_norm": 2.521820306777954, "learning_rate": 8.563132728717983e-06, "loss": 0.6142, "step": 6081 }, { "epoch": 0.7918781725888325, "grad_norm": 2.626577377319336, "learning_rate": 8.561666300945406e-06, "loss": 0.6366, "step": 6084 }, { "epoch": 0.7922686450605232, "grad_norm": 2.6800007820129395, "learning_rate": 8.560199250957218e-06, "loss": 0.6728, "step": 6087 }, { "epoch": 0.792659117532214, "grad_norm": 2.4256107807159424, "learning_rate": 8.55873157900971e-06, "loss": 0.6083, "step": 6090 }, { "epoch": 0.7930495900039047, "grad_norm": 2.7973265647888184, "learning_rate": 8.557263285359282e-06, "loss": 0.6473, "step": 6093 }, { "epoch": 0.7934400624755955, "grad_norm": 4.817023277282715, "learning_rate": 8.55579437026244e-06, "loss": 0.6608, "step": 6096 }, { "epoch": 0.7938305349472862, "grad_norm": 2.4599454402923584, "learning_rate": 8.554324833975805e-06, "loss": 0.5633, "step": 6099 }, { "epoch": 0.7942210074189769, "grad_norm": 2.911656618118286, "learning_rate": 8.552854676756097e-06, "loss": 0.683, "step": 6102 }, { "epoch": 0.7946114798906677, "grad_norm": 2.656649589538574, "learning_rate": 8.551383898860152e-06, "loss": 0.7645, "step": 6105 }, { "epoch": 0.7950019523623585, "grad_norm": 2.6597232818603516, "learning_rate": 8.54991250054491e-06, "loss": 0.6028, "step": 6108 }, { "epoch": 0.7953924248340491, "grad_norm": 2.368178367614746, "learning_rate": 8.548440482067422e-06, "loss": 0.6649, "step": 6111 }, { "epoch": 0.7957828973057399, "grad_norm": 2.8005642890930176, "learning_rate": 8.546967843684846e-06, "loss": 0.6525, "step": 6114 }, { "epoch": 0.7961733697774307, "grad_norm": 2.426015853881836, "learning_rate": 8.54549458565445e-06, "loss": 0.5669, "step": 6117 }, { "epoch": 0.7965638422491215, "grad_norm": 2.4316413402557373, "learning_rate": 8.544020708233608e-06, "loss": 0.6233, "step": 6120 }, { "epoch": 0.7969543147208121, "grad_norm": 2.4489283561706543, "learning_rate": 8.542546211679806e-06, "loss": 0.7188, "step": 6123 }, { "epoch": 0.7973447871925029, "grad_norm": 3.5121216773986816, "learning_rate": 8.541071096250633e-06, "loss": 0.6304, "step": 6126 }, { "epoch": 0.7977352596641937, "grad_norm": 2.336344003677368, "learning_rate": 8.539595362203787e-06, "loss": 0.6539, "step": 6129 }, { "epoch": 0.7981257321358844, "grad_norm": 2.4872500896453857, "learning_rate": 8.538119009797079e-06, "loss": 0.6592, "step": 6132 }, { "epoch": 0.7985162046075752, "grad_norm": 2.25089955329895, "learning_rate": 8.536642039288421e-06, "loss": 0.6421, "step": 6135 }, { "epoch": 0.7989066770792659, "grad_norm": 4.216074466705322, "learning_rate": 8.53516445093584e-06, "loss": 0.6712, "step": 6138 }, { "epoch": 0.7992971495509567, "grad_norm": 3.146594285964966, "learning_rate": 8.533686244997466e-06, "loss": 0.7159, "step": 6141 }, { "epoch": 0.7996876220226474, "grad_norm": 2.209770441055298, "learning_rate": 8.53220742173154e-06, "loss": 0.6019, "step": 6144 }, { "epoch": 0.8000780944943382, "grad_norm": 3.0704033374786377, "learning_rate": 8.530727981396406e-06, "loss": 0.7287, "step": 6147 }, { "epoch": 0.8004685669660289, "grad_norm": 2.567683219909668, "learning_rate": 8.529247924250524e-06, "loss": 0.6043, "step": 6150 }, { "epoch": 0.8008590394377196, "grad_norm": 3.0786571502685547, "learning_rate": 8.527767250552452e-06, "loss": 0.6735, "step": 6153 }, { "epoch": 0.8012495119094104, "grad_norm": 2.3544533252716064, "learning_rate": 8.526285960560864e-06, "loss": 0.6422, "step": 6156 }, { "epoch": 0.8016399843811012, "grad_norm": 2.1804707050323486, "learning_rate": 8.524804054534535e-06, "loss": 0.6256, "step": 6159 }, { "epoch": 0.8020304568527918, "grad_norm": 2.7374136447906494, "learning_rate": 8.523321532732354e-06, "loss": 0.5924, "step": 6162 }, { "epoch": 0.8024209293244826, "grad_norm": 2.6108615398406982, "learning_rate": 8.521838395413312e-06, "loss": 0.7202, "step": 6165 }, { "epoch": 0.8028114017961734, "grad_norm": 2.354628086090088, "learning_rate": 8.520354642836512e-06, "loss": 0.6135, "step": 6168 }, { "epoch": 0.8032018742678642, "grad_norm": 2.340169668197632, "learning_rate": 8.518870275261161e-06, "loss": 0.6044, "step": 6171 }, { "epoch": 0.8035923467395548, "grad_norm": 2.519382953643799, "learning_rate": 8.517385292946578e-06, "loss": 0.6896, "step": 6174 }, { "epoch": 0.8039828192112456, "grad_norm": 2.6927080154418945, "learning_rate": 8.515899696152183e-06, "loss": 0.5729, "step": 6177 }, { "epoch": 0.8043732916829364, "grad_norm": 2.767089605331421, "learning_rate": 8.514413485137505e-06, "loss": 0.5569, "step": 6180 }, { "epoch": 0.8047637641546271, "grad_norm": 3.1286277770996094, "learning_rate": 8.512926660162186e-06, "loss": 0.6846, "step": 6183 }, { "epoch": 0.8051542366263178, "grad_norm": 3.36635684967041, "learning_rate": 8.511439221485971e-06, "loss": 0.6434, "step": 6186 }, { "epoch": 0.8055447090980086, "grad_norm": 2.8603038787841797, "learning_rate": 8.50995116936871e-06, "loss": 0.7117, "step": 6189 }, { "epoch": 0.8059351815696993, "grad_norm": 2.3195858001708984, "learning_rate": 8.508462504070363e-06, "loss": 0.6657, "step": 6192 }, { "epoch": 0.8063256540413901, "grad_norm": 2.69417142868042, "learning_rate": 8.506973225850996e-06, "loss": 0.6849, "step": 6195 }, { "epoch": 0.8067161265130808, "grad_norm": 2.8202831745147705, "learning_rate": 8.505483334970787e-06, "loss": 0.7059, "step": 6198 }, { "epoch": 0.8071065989847716, "grad_norm": 2.3420493602752686, "learning_rate": 8.503992831690011e-06, "loss": 0.6732, "step": 6201 }, { "epoch": 0.8074970714564623, "grad_norm": 2.349193811416626, "learning_rate": 8.502501716269061e-06, "loss": 0.6318, "step": 6204 }, { "epoch": 0.8078875439281531, "grad_norm": 2.51961350440979, "learning_rate": 8.501009988968427e-06, "loss": 0.6682, "step": 6207 }, { "epoch": 0.8082780163998438, "grad_norm": 2.5512888431549072, "learning_rate": 8.499517650048715e-06, "loss": 0.6768, "step": 6210 }, { "epoch": 0.8086684888715345, "grad_norm": 2.58780837059021, "learning_rate": 8.498024699770631e-06, "loss": 0.6196, "step": 6213 }, { "epoch": 0.8090589613432253, "grad_norm": 3.9888927936553955, "learning_rate": 8.49653113839499e-06, "loss": 0.7719, "step": 6216 }, { "epoch": 0.8094494338149161, "grad_norm": 3.109391927719116, "learning_rate": 8.495036966182716e-06, "loss": 0.7418, "step": 6219 }, { "epoch": 0.8098399062866068, "grad_norm": 2.7313342094421387, "learning_rate": 8.493542183394835e-06, "loss": 0.7156, "step": 6222 }, { "epoch": 0.8102303787582975, "grad_norm": 2.2368297576904297, "learning_rate": 8.492046790292485e-06, "loss": 0.673, "step": 6225 }, { "epoch": 0.8106208512299883, "grad_norm": 2.565585136413574, "learning_rate": 8.490550787136906e-06, "loss": 0.6379, "step": 6228 }, { "epoch": 0.811011323701679, "grad_norm": 2.4983272552490234, "learning_rate": 8.489054174189448e-06, "loss": 0.6459, "step": 6231 }, { "epoch": 0.8114017961733698, "grad_norm": 2.1908957958221436, "learning_rate": 8.487556951711567e-06, "loss": 0.6178, "step": 6234 }, { "epoch": 0.8117922686450605, "grad_norm": 2.3688669204711914, "learning_rate": 8.486059119964822e-06, "loss": 0.7055, "step": 6237 }, { "epoch": 0.8121827411167513, "grad_norm": 2.3758630752563477, "learning_rate": 8.484560679210883e-06, "loss": 0.5999, "step": 6240 }, { "epoch": 0.812573213588442, "grad_norm": 2.6127567291259766, "learning_rate": 8.483061629711522e-06, "loss": 0.7506, "step": 6243 }, { "epoch": 0.8129636860601328, "grad_norm": 3.369610548019409, "learning_rate": 8.481561971728622e-06, "loss": 0.6454, "step": 6246 }, { "epoch": 0.8133541585318235, "grad_norm": 2.357160806655884, "learning_rate": 8.480061705524173e-06, "loss": 0.621, "step": 6249 }, { "epoch": 0.8137446310035142, "grad_norm": 2.6496059894561768, "learning_rate": 8.47856083136026e-06, "loss": 0.7154, "step": 6252 }, { "epoch": 0.814135103475205, "grad_norm": 2.5359816551208496, "learning_rate": 8.47705934949909e-06, "loss": 0.7051, "step": 6255 }, { "epoch": 0.8145255759468958, "grad_norm": 2.283336877822876, "learning_rate": 8.475557260202966e-06, "loss": 0.646, "step": 6258 }, { "epoch": 0.8149160484185864, "grad_norm": 2.6512975692749023, "learning_rate": 8.474054563734303e-06, "loss": 0.6226, "step": 6261 }, { "epoch": 0.8153065208902772, "grad_norm": 2.991300582885742, "learning_rate": 8.472551260355612e-06, "loss": 0.7599, "step": 6264 }, { "epoch": 0.815696993361968, "grad_norm": 2.309941530227661, "learning_rate": 8.471047350329523e-06, "loss": 0.5883, "step": 6267 }, { "epoch": 0.8160874658336588, "grad_norm": 2.3625473976135254, "learning_rate": 8.469542833918762e-06, "loss": 0.7385, "step": 6270 }, { "epoch": 0.8164779383053494, "grad_norm": 2.453748941421509, "learning_rate": 8.46803771138617e-06, "loss": 0.5488, "step": 6273 }, { "epoch": 0.8168684107770402, "grad_norm": 3.088710069656372, "learning_rate": 8.466531982994684e-06, "loss": 0.6956, "step": 6276 }, { "epoch": 0.817258883248731, "grad_norm": 2.4059526920318604, "learning_rate": 8.465025649007352e-06, "loss": 0.5862, "step": 6279 }, { "epoch": 0.8176493557204217, "grad_norm": 3.4859941005706787, "learning_rate": 8.463518709687328e-06, "loss": 0.5537, "step": 6282 }, { "epoch": 0.8180398281921124, "grad_norm": 3.3922863006591797, "learning_rate": 8.462011165297873e-06, "loss": 0.6667, "step": 6285 }, { "epoch": 0.8184303006638032, "grad_norm": 3.5943069458007812, "learning_rate": 8.46050301610235e-06, "loss": 0.6736, "step": 6288 }, { "epoch": 0.818820773135494, "grad_norm": 2.6332778930664062, "learning_rate": 8.45899426236423e-06, "loss": 0.7386, "step": 6291 }, { "epoch": 0.8192112456071847, "grad_norm": 3.304346799850464, "learning_rate": 8.45748490434709e-06, "loss": 0.7207, "step": 6294 }, { "epoch": 0.8196017180788754, "grad_norm": 2.711082935333252, "learning_rate": 8.455974942314612e-06, "loss": 0.6716, "step": 6297 }, { "epoch": 0.8199921905505662, "grad_norm": 2.3270423412323, "learning_rate": 8.454464376530579e-06, "loss": 0.634, "step": 6300 }, { "epoch": 0.8203826630222569, "grad_norm": 2.4965059757232666, "learning_rate": 8.452953207258888e-06, "loss": 0.6086, "step": 6303 }, { "epoch": 0.8207731354939477, "grad_norm": 2.57068133354187, "learning_rate": 8.451441434763534e-06, "loss": 0.6383, "step": 6306 }, { "epoch": 0.8211636079656385, "grad_norm": 3.073572874069214, "learning_rate": 8.449929059308623e-06, "loss": 0.6085, "step": 6309 }, { "epoch": 0.8215540804373291, "grad_norm": 2.8972861766815186, "learning_rate": 8.448416081158363e-06, "loss": 0.76, "step": 6312 }, { "epoch": 0.8219445529090199, "grad_norm": 2.6822032928466797, "learning_rate": 8.446902500577067e-06, "loss": 0.6039, "step": 6315 }, { "epoch": 0.8223350253807107, "grad_norm": 2.42512845993042, "learning_rate": 8.445388317829157e-06, "loss": 0.6074, "step": 6318 }, { "epoch": 0.8227254978524015, "grad_norm": 2.6248953342437744, "learning_rate": 8.443873533179156e-06, "loss": 0.7003, "step": 6321 }, { "epoch": 0.8231159703240921, "grad_norm": 3.51706862449646, "learning_rate": 8.442358146891692e-06, "loss": 0.6427, "step": 6324 }, { "epoch": 0.8235064427957829, "grad_norm": 2.4549152851104736, "learning_rate": 8.440842159231503e-06, "loss": 0.662, "step": 6327 }, { "epoch": 0.8238969152674737, "grad_norm": 2.4990108013153076, "learning_rate": 8.439325570463426e-06, "loss": 0.7352, "step": 6330 }, { "epoch": 0.8242873877391644, "grad_norm": 3.4981563091278076, "learning_rate": 8.437808380852408e-06, "loss": 0.7033, "step": 6333 }, { "epoch": 0.8246778602108551, "grad_norm": 3.689436674118042, "learning_rate": 8.436290590663498e-06, "loss": 0.6423, "step": 6336 }, { "epoch": 0.8250683326825459, "grad_norm": 2.182603359222412, "learning_rate": 8.43477220016185e-06, "loss": 0.6086, "step": 6339 }, { "epoch": 0.8254588051542366, "grad_norm": 2.4376380443573, "learning_rate": 8.433253209612727e-06, "loss": 0.6321, "step": 6342 }, { "epoch": 0.8258492776259274, "grad_norm": 2.2977375984191895, "learning_rate": 8.431733619281486e-06, "loss": 0.6188, "step": 6345 }, { "epoch": 0.8262397500976181, "grad_norm": 2.3864758014678955, "learning_rate": 8.430213429433605e-06, "loss": 0.653, "step": 6348 }, { "epoch": 0.8266302225693088, "grad_norm": 3.494652509689331, "learning_rate": 8.42869264033465e-06, "loss": 0.6787, "step": 6351 }, { "epoch": 0.8270206950409996, "grad_norm": 2.394007682800293, "learning_rate": 8.427171252250308e-06, "loss": 0.5673, "step": 6354 }, { "epoch": 0.8274111675126904, "grad_norm": 2.576982259750366, "learning_rate": 8.425649265446356e-06, "loss": 0.7278, "step": 6357 }, { "epoch": 0.827801639984381, "grad_norm": 2.5857975482940674, "learning_rate": 8.424126680188684e-06, "loss": 0.7381, "step": 6360 }, { "epoch": 0.8281921124560718, "grad_norm": 2.553446054458618, "learning_rate": 8.422603496743285e-06, "loss": 0.5989, "step": 6363 }, { "epoch": 0.8285825849277626, "grad_norm": 2.7090554237365723, "learning_rate": 8.421079715376255e-06, "loss": 0.6736, "step": 6366 }, { "epoch": 0.8289730573994534, "grad_norm": 2.7122576236724854, "learning_rate": 8.419555336353793e-06, "loss": 0.6736, "step": 6369 }, { "epoch": 0.829363529871144, "grad_norm": 2.37516713142395, "learning_rate": 8.418030359942211e-06, "loss": 0.6084, "step": 6372 }, { "epoch": 0.8297540023428348, "grad_norm": 2.34924054145813, "learning_rate": 8.416504786407913e-06, "loss": 0.7132, "step": 6375 }, { "epoch": 0.8301444748145256, "grad_norm": 2.3876399993896484, "learning_rate": 8.414978616017418e-06, "loss": 0.7109, "step": 6378 }, { "epoch": 0.8305349472862164, "grad_norm": 2.995637893676758, "learning_rate": 8.413451849037342e-06, "loss": 0.6192, "step": 6381 }, { "epoch": 0.830925419757907, "grad_norm": 2.5374836921691895, "learning_rate": 8.41192448573441e-06, "loss": 0.6034, "step": 6384 }, { "epoch": 0.8313158922295978, "grad_norm": 2.576464891433716, "learning_rate": 8.410396526375446e-06, "loss": 0.6396, "step": 6387 }, { "epoch": 0.8317063647012886, "grad_norm": 2.4232656955718994, "learning_rate": 8.408867971227384e-06, "loss": 0.6386, "step": 6390 }, { "epoch": 0.8320968371729793, "grad_norm": 2.4770760536193848, "learning_rate": 8.40733882055726e-06, "loss": 0.6802, "step": 6393 }, { "epoch": 0.8324873096446701, "grad_norm": 2.2709827423095703, "learning_rate": 8.40580907463221e-06, "loss": 0.6144, "step": 6396 }, { "epoch": 0.8328777821163608, "grad_norm": 2.4368174076080322, "learning_rate": 8.40427873371948e-06, "loss": 0.6847, "step": 6399 }, { "epoch": 0.8332682545880515, "grad_norm": 2.510909080505371, "learning_rate": 8.402747798086417e-06, "loss": 0.7264, "step": 6402 }, { "epoch": 0.8336587270597423, "grad_norm": 3.945446491241455, "learning_rate": 8.401216268000473e-06, "loss": 0.7692, "step": 6405 }, { "epoch": 0.8340491995314331, "grad_norm": 2.4416685104370117, "learning_rate": 8.3996841437292e-06, "loss": 0.6565, "step": 6408 }, { "epoch": 0.8344396720031237, "grad_norm": 2.5184824466705322, "learning_rate": 8.39815142554026e-06, "loss": 0.6929, "step": 6411 }, { "epoch": 0.8348301444748145, "grad_norm": 2.375833511352539, "learning_rate": 8.396618113701416e-06, "loss": 0.6166, "step": 6414 }, { "epoch": 0.8352206169465053, "grad_norm": 2.406198024749756, "learning_rate": 8.395084208480531e-06, "loss": 0.5858, "step": 6417 }, { "epoch": 0.8356110894181961, "grad_norm": 3.6379899978637695, "learning_rate": 8.393549710145578e-06, "loss": 0.5477, "step": 6420 }, { "epoch": 0.8360015618898867, "grad_norm": 2.8601226806640625, "learning_rate": 8.39201461896463e-06, "loss": 0.7262, "step": 6423 }, { "epoch": 0.8363920343615775, "grad_norm": 2.3782405853271484, "learning_rate": 8.390478935205864e-06, "loss": 0.672, "step": 6426 }, { "epoch": 0.8367825068332683, "grad_norm": 2.471163749694824, "learning_rate": 8.388942659137558e-06, "loss": 0.6127, "step": 6429 }, { "epoch": 0.837172979304959, "grad_norm": 2.592690944671631, "learning_rate": 8.3874057910281e-06, "loss": 0.7282, "step": 6432 }, { "epoch": 0.8375634517766497, "grad_norm": 2.696824073791504, "learning_rate": 8.385868331145977e-06, "loss": 0.6982, "step": 6435 }, { "epoch": 0.8379539242483405, "grad_norm": 2.5590860843658447, "learning_rate": 8.38433027975978e-06, "loss": 0.6906, "step": 6438 }, { "epoch": 0.8383443967200312, "grad_norm": 2.547417163848877, "learning_rate": 8.3827916371382e-06, "loss": 0.6392, "step": 6441 }, { "epoch": 0.838734869191722, "grad_norm": 2.264625310897827, "learning_rate": 8.381252403550043e-06, "loss": 0.6396, "step": 6444 }, { "epoch": 0.8391253416634127, "grad_norm": 2.935336112976074, "learning_rate": 8.3797125792642e-06, "loss": 0.6588, "step": 6447 }, { "epoch": 0.8395158141351035, "grad_norm": 2.308274745941162, "learning_rate": 8.378172164549678e-06, "loss": 0.6231, "step": 6450 }, { "epoch": 0.8399062866067942, "grad_norm": 2.976280927658081, "learning_rate": 8.376631159675587e-06, "loss": 0.6117, "step": 6453 }, { "epoch": 0.840296759078485, "grad_norm": 2.33328914642334, "learning_rate": 8.375089564911137e-06, "loss": 0.7065, "step": 6456 }, { "epoch": 0.8406872315501757, "grad_norm": 2.5370125770568848, "learning_rate": 8.373547380525639e-06, "loss": 0.671, "step": 6459 }, { "epoch": 0.8410777040218664, "grad_norm": 3.066671133041382, "learning_rate": 8.372004606788511e-06, "loss": 0.5947, "step": 6462 }, { "epoch": 0.8414681764935572, "grad_norm": 2.562814474105835, "learning_rate": 8.370461243969272e-06, "loss": 0.5773, "step": 6465 }, { "epoch": 0.841858648965248, "grad_norm": 2.8281662464141846, "learning_rate": 8.368917292337544e-06, "loss": 0.6202, "step": 6468 }, { "epoch": 0.8422491214369386, "grad_norm": 3.3414337635040283, "learning_rate": 8.36737275216305e-06, "loss": 0.6927, "step": 6471 }, { "epoch": 0.8426395939086294, "grad_norm": 2.5855307579040527, "learning_rate": 8.365827623715624e-06, "loss": 0.6142, "step": 6474 }, { "epoch": 0.8430300663803202, "grad_norm": 2.5391762256622314, "learning_rate": 8.36428190726519e-06, "loss": 0.6912, "step": 6477 }, { "epoch": 0.843420538852011, "grad_norm": 2.4301860332489014, "learning_rate": 8.362735603081784e-06, "loss": 0.7405, "step": 6480 }, { "epoch": 0.8438110113237017, "grad_norm": 2.3643648624420166, "learning_rate": 8.361188711435543e-06, "loss": 0.5884, "step": 6483 }, { "epoch": 0.8442014837953924, "grad_norm": 2.4440126419067383, "learning_rate": 8.359641232596707e-06, "loss": 0.6901, "step": 6486 }, { "epoch": 0.8445919562670832, "grad_norm": 2.4889235496520996, "learning_rate": 8.358093166835614e-06, "loss": 0.6792, "step": 6489 }, { "epoch": 0.8449824287387739, "grad_norm": 2.9333462715148926, "learning_rate": 8.356544514422708e-06, "loss": 0.535, "step": 6492 }, { "epoch": 0.8453729012104647, "grad_norm": 3.2094194889068604, "learning_rate": 8.354995275628536e-06, "loss": 0.713, "step": 6495 }, { "epoch": 0.8457633736821554, "grad_norm": 2.7420010566711426, "learning_rate": 8.35344545072375e-06, "loss": 0.6427, "step": 6498 }, { "epoch": 0.8461538461538461, "grad_norm": 2.5620057582855225, "learning_rate": 8.351895039979096e-06, "loss": 0.7041, "step": 6501 }, { "epoch": 0.8465443186255369, "grad_norm": 3.0430588722229004, "learning_rate": 8.350344043665432e-06, "loss": 0.6237, "step": 6504 }, { "epoch": 0.8469347910972277, "grad_norm": 3.560917377471924, "learning_rate": 8.34879246205371e-06, "loss": 0.6937, "step": 6507 }, { "epoch": 0.8473252635689184, "grad_norm": 2.730560064315796, "learning_rate": 8.34724029541499e-06, "loss": 0.6022, "step": 6510 }, { "epoch": 0.8477157360406091, "grad_norm": 2.495201587677002, "learning_rate": 8.345687544020432e-06, "loss": 0.6019, "step": 6513 }, { "epoch": 0.8481062085122999, "grad_norm": 2.6979150772094727, "learning_rate": 8.344134208141298e-06, "loss": 0.696, "step": 6516 }, { "epoch": 0.8484966809839907, "grad_norm": 3.2949979305267334, "learning_rate": 8.342580288048953e-06, "loss": 0.6723, "step": 6519 }, { "epoch": 0.8488871534556813, "grad_norm": 2.2897496223449707, "learning_rate": 8.341025784014865e-06, "loss": 0.6004, "step": 6522 }, { "epoch": 0.8492776259273721, "grad_norm": 2.2679920196533203, "learning_rate": 8.3394706963106e-06, "loss": 0.6487, "step": 6525 }, { "epoch": 0.8496680983990629, "grad_norm": 2.6986122131347656, "learning_rate": 8.337915025207829e-06, "loss": 0.6389, "step": 6528 }, { "epoch": 0.8500585708707536, "grad_norm": 2.4359288215637207, "learning_rate": 8.336358770978325e-06, "loss": 0.5729, "step": 6531 }, { "epoch": 0.8504490433424443, "grad_norm": 2.355321168899536, "learning_rate": 8.334801933893963e-06, "loss": 0.6444, "step": 6534 }, { "epoch": 0.8508395158141351, "grad_norm": 2.818239212036133, "learning_rate": 8.333244514226718e-06, "loss": 0.6435, "step": 6537 }, { "epoch": 0.8512299882858259, "grad_norm": 2.3462061882019043, "learning_rate": 8.331686512248669e-06, "loss": 0.7376, "step": 6540 }, { "epoch": 0.8516204607575166, "grad_norm": 2.6086630821228027, "learning_rate": 8.330127928231994e-06, "loss": 0.6756, "step": 6543 }, { "epoch": 0.8520109332292073, "grad_norm": 2.5126383304595947, "learning_rate": 8.328568762448978e-06, "loss": 0.6713, "step": 6546 }, { "epoch": 0.8524014057008981, "grad_norm": 3.5347037315368652, "learning_rate": 8.327009015172e-06, "loss": 0.6268, "step": 6549 }, { "epoch": 0.8527918781725888, "grad_norm": 2.4854538440704346, "learning_rate": 8.325448686673545e-06, "loss": 0.6253, "step": 6552 }, { "epoch": 0.8531823506442796, "grad_norm": 2.2126731872558594, "learning_rate": 8.323887777226204e-06, "loss": 0.5835, "step": 6555 }, { "epoch": 0.8535728231159703, "grad_norm": 3.192509889602661, "learning_rate": 8.322326287102655e-06, "loss": 0.5728, "step": 6558 }, { "epoch": 0.853963295587661, "grad_norm": 2.4523346424102783, "learning_rate": 8.320764216575696e-06, "loss": 0.5734, "step": 6561 }, { "epoch": 0.8543537680593518, "grad_norm": 2.1568102836608887, "learning_rate": 8.319201565918214e-06, "loss": 0.7255, "step": 6564 }, { "epoch": 0.8547442405310426, "grad_norm": 2.56260347366333, "learning_rate": 8.317638335403203e-06, "loss": 0.6623, "step": 6567 }, { "epoch": 0.8551347130027334, "grad_norm": 2.450410842895508, "learning_rate": 8.31607452530375e-06, "loss": 0.6081, "step": 6570 }, { "epoch": 0.855525185474424, "grad_norm": 2.3223392963409424, "learning_rate": 8.314510135893057e-06, "loss": 0.6811, "step": 6573 }, { "epoch": 0.8559156579461148, "grad_norm": 2.9705634117126465, "learning_rate": 8.312945167444413e-06, "loss": 0.7399, "step": 6576 }, { "epoch": 0.8563061304178056, "grad_norm": 3.151143789291382, "learning_rate": 8.31137962023122e-06, "loss": 0.6165, "step": 6579 }, { "epoch": 0.8566966028894963, "grad_norm": 2.2744345664978027, "learning_rate": 8.309813494526973e-06, "loss": 0.5956, "step": 6582 }, { "epoch": 0.857087075361187, "grad_norm": 2.3356449604034424, "learning_rate": 8.30824679060527e-06, "loss": 0.6373, "step": 6585 }, { "epoch": 0.8574775478328778, "grad_norm": 2.322843551635742, "learning_rate": 8.306679508739813e-06, "loss": 0.6431, "step": 6588 }, { "epoch": 0.8578680203045685, "grad_norm": 2.799908399581909, "learning_rate": 8.305111649204402e-06, "loss": 0.6925, "step": 6591 }, { "epoch": 0.8582584927762593, "grad_norm": 3.1793437004089355, "learning_rate": 8.30354321227294e-06, "loss": 0.6127, "step": 6594 }, { "epoch": 0.85864896524795, "grad_norm": 2.3757338523864746, "learning_rate": 8.301974198219427e-06, "loss": 0.6473, "step": 6597 }, { "epoch": 0.8590394377196408, "grad_norm": 2.6627843379974365, "learning_rate": 8.300404607317968e-06, "loss": 0.7966, "step": 6600 }, { "epoch": 0.8594299101913315, "grad_norm": 2.2631454467773438, "learning_rate": 8.298834439842768e-06, "loss": 0.5955, "step": 6603 }, { "epoch": 0.8598203826630223, "grad_norm": 3.50620174407959, "learning_rate": 8.29726369606813e-06, "loss": 0.6787, "step": 6606 }, { "epoch": 0.860210855134713, "grad_norm": 2.180896520614624, "learning_rate": 8.295692376268462e-06, "loss": 0.561, "step": 6609 }, { "epoch": 0.8606013276064037, "grad_norm": 2.7363123893737793, "learning_rate": 8.29412048071827e-06, "loss": 0.6337, "step": 6612 }, { "epoch": 0.8609918000780945, "grad_norm": 2.419677495956421, "learning_rate": 8.292548009692156e-06, "loss": 0.6391, "step": 6615 }, { "epoch": 0.8613822725497853, "grad_norm": 2.2987353801727295, "learning_rate": 8.290974963464835e-06, "loss": 0.5479, "step": 6618 }, { "epoch": 0.8617727450214759, "grad_norm": 2.5347087383270264, "learning_rate": 8.289401342311108e-06, "loss": 0.6663, "step": 6621 }, { "epoch": 0.8621632174931667, "grad_norm": 2.4396309852600098, "learning_rate": 8.287827146505888e-06, "loss": 0.5425, "step": 6624 }, { "epoch": 0.8625536899648575, "grad_norm": 2.2343242168426514, "learning_rate": 8.286252376324181e-06, "loss": 0.5953, "step": 6627 }, { "epoch": 0.8629441624365483, "grad_norm": 2.7455759048461914, "learning_rate": 8.284677032041099e-06, "loss": 0.6759, "step": 6630 }, { "epoch": 0.8633346349082389, "grad_norm": 2.5142693519592285, "learning_rate": 8.283101113931849e-06, "loss": 0.6229, "step": 6633 }, { "epoch": 0.8637251073799297, "grad_norm": 2.3203072547912598, "learning_rate": 8.281524622271741e-06, "loss": 0.6268, "step": 6636 }, { "epoch": 0.8641155798516205, "grad_norm": 2.863619565963745, "learning_rate": 8.279947557336184e-06, "loss": 0.6373, "step": 6639 }, { "epoch": 0.8645060523233112, "grad_norm": 2.654784679412842, "learning_rate": 8.278369919400688e-06, "loss": 0.6699, "step": 6642 }, { "epoch": 0.8648965247950019, "grad_norm": 2.5628163814544678, "learning_rate": 8.276791708740865e-06, "loss": 0.7191, "step": 6645 }, { "epoch": 0.8652869972666927, "grad_norm": 2.7298977375030518, "learning_rate": 8.275212925632424e-06, "loss": 0.6759, "step": 6648 }, { "epoch": 0.8656774697383834, "grad_norm": 2.9911298751831055, "learning_rate": 8.273633570351175e-06, "loss": 0.6965, "step": 6651 }, { "epoch": 0.8660679422100742, "grad_norm": 2.741440534591675, "learning_rate": 8.272053643173028e-06, "loss": 0.5874, "step": 6654 }, { "epoch": 0.866458414681765, "grad_norm": 2.3754491806030273, "learning_rate": 8.270473144373992e-06, "loss": 0.7394, "step": 6657 }, { "epoch": 0.8668488871534556, "grad_norm": 2.6968066692352295, "learning_rate": 8.268892074230179e-06, "loss": 0.6542, "step": 6660 }, { "epoch": 0.8672393596251464, "grad_norm": 2.947608709335327, "learning_rate": 8.267310433017795e-06, "loss": 0.6674, "step": 6663 }, { "epoch": 0.8676298320968372, "grad_norm": 2.4192142486572266, "learning_rate": 8.265728221013154e-06, "loss": 0.6469, "step": 6666 }, { "epoch": 0.868020304568528, "grad_norm": 2.530045986175537, "learning_rate": 8.264145438492664e-06, "loss": 0.735, "step": 6669 }, { "epoch": 0.8684107770402186, "grad_norm": 3.38845157623291, "learning_rate": 8.26256208573283e-06, "loss": 0.6779, "step": 6672 }, { "epoch": 0.8688012495119094, "grad_norm": 2.9682278633117676, "learning_rate": 8.260978163010265e-06, "loss": 0.6009, "step": 6675 }, { "epoch": 0.8691917219836002, "grad_norm": 2.4228317737579346, "learning_rate": 8.259393670601673e-06, "loss": 0.6627, "step": 6678 }, { "epoch": 0.869582194455291, "grad_norm": 2.3679723739624023, "learning_rate": 8.257808608783864e-06, "loss": 0.6578, "step": 6681 }, { "epoch": 0.8699726669269816, "grad_norm": 2.9238123893737793, "learning_rate": 8.256222977833746e-06, "loss": 0.6094, "step": 6684 }, { "epoch": 0.8703631393986724, "grad_norm": 3.277010440826416, "learning_rate": 8.254636778028321e-06, "loss": 0.5665, "step": 6687 }, { "epoch": 0.8707536118703632, "grad_norm": 2.598862648010254, "learning_rate": 8.2530500096447e-06, "loss": 0.6638, "step": 6690 }, { "epoch": 0.8711440843420539, "grad_norm": 2.5538406372070312, "learning_rate": 8.251462672960087e-06, "loss": 0.7227, "step": 6693 }, { "epoch": 0.8715345568137446, "grad_norm": 2.3932902812957764, "learning_rate": 8.249874768251783e-06, "loss": 0.6756, "step": 6696 }, { "epoch": 0.8719250292854354, "grad_norm": 2.3504838943481445, "learning_rate": 8.248286295797194e-06, "loss": 0.671, "step": 6699 }, { "epoch": 0.8723155017571261, "grad_norm": 3.473229169845581, "learning_rate": 8.246697255873822e-06, "loss": 0.6147, "step": 6702 }, { "epoch": 0.8727059742288169, "grad_norm": 2.742168426513672, "learning_rate": 8.24510764875927e-06, "loss": 0.6286, "step": 6705 }, { "epoch": 0.8730964467005076, "grad_norm": 2.26391863822937, "learning_rate": 8.243517474731238e-06, "loss": 0.5674, "step": 6708 }, { "epoch": 0.8734869191721983, "grad_norm": 2.6377511024475098, "learning_rate": 8.241926734067528e-06, "loss": 0.731, "step": 6711 }, { "epoch": 0.8738773916438891, "grad_norm": 2.8868167400360107, "learning_rate": 8.240335427046037e-06, "loss": 0.7232, "step": 6714 }, { "epoch": 0.8742678641155799, "grad_norm": 2.5035762786865234, "learning_rate": 8.238743553944762e-06, "loss": 0.7436, "step": 6717 }, { "epoch": 0.8746583365872705, "grad_norm": 4.231232643127441, "learning_rate": 8.237151115041803e-06, "loss": 0.6408, "step": 6720 }, { "epoch": 0.8750488090589613, "grad_norm": 3.021878719329834, "learning_rate": 8.235558110615354e-06, "loss": 0.6811, "step": 6723 }, { "epoch": 0.8754392815306521, "grad_norm": 3.348742961883545, "learning_rate": 8.233964540943708e-06, "loss": 0.7516, "step": 6726 }, { "epoch": 0.8758297540023429, "grad_norm": 2.2513198852539062, "learning_rate": 8.232370406305263e-06, "loss": 0.6115, "step": 6729 }, { "epoch": 0.8762202264740335, "grad_norm": 2.705314874649048, "learning_rate": 8.230775706978507e-06, "loss": 0.6867, "step": 6732 }, { "epoch": 0.8766106989457243, "grad_norm": 3.001579523086548, "learning_rate": 8.22918044324203e-06, "loss": 0.6006, "step": 6735 }, { "epoch": 0.8770011714174151, "grad_norm": 2.306036949157715, "learning_rate": 8.227584615374524e-06, "loss": 0.5314, "step": 6738 }, { "epoch": 0.8773916438891058, "grad_norm": 2.6267998218536377, "learning_rate": 8.225988223654775e-06, "loss": 0.7156, "step": 6741 }, { "epoch": 0.8777821163607966, "grad_norm": 2.350863456726074, "learning_rate": 8.224391268361672e-06, "loss": 0.5479, "step": 6744 }, { "epoch": 0.8781725888324873, "grad_norm": 2.4865407943725586, "learning_rate": 8.222793749774194e-06, "loss": 0.5811, "step": 6747 }, { "epoch": 0.878563061304178, "grad_norm": 3.3585386276245117, "learning_rate": 8.221195668171429e-06, "loss": 0.6603, "step": 6750 }, { "epoch": 0.8789535337758688, "grad_norm": 4.48010778427124, "learning_rate": 8.219597023832558e-06, "loss": 0.6756, "step": 6753 }, { "epoch": 0.8793440062475596, "grad_norm": 2.635272979736328, "learning_rate": 8.21799781703686e-06, "loss": 0.7552, "step": 6756 }, { "epoch": 0.8797344787192503, "grad_norm": 2.7308220863342285, "learning_rate": 8.216398048063712e-06, "loss": 0.6473, "step": 6759 }, { "epoch": 0.880124951190941, "grad_norm": 3.390993118286133, "learning_rate": 8.214797717192591e-06, "loss": 0.7474, "step": 6762 }, { "epoch": 0.8805154236626318, "grad_norm": 2.1488442420959473, "learning_rate": 8.213196824703074e-06, "loss": 0.5481, "step": 6765 }, { "epoch": 0.8809058961343226, "grad_norm": 2.6741676330566406, "learning_rate": 8.21159537087483e-06, "loss": 0.6344, "step": 6768 }, { "epoch": 0.8812963686060132, "grad_norm": 2.426910877227783, "learning_rate": 8.20999335598763e-06, "loss": 0.6935, "step": 6771 }, { "epoch": 0.881686841077704, "grad_norm": 2.539985179901123, "learning_rate": 8.208390780321344e-06, "loss": 0.6417, "step": 6774 }, { "epoch": 0.8820773135493948, "grad_norm": 3.312417507171631, "learning_rate": 8.20678764415594e-06, "loss": 0.6397, "step": 6777 }, { "epoch": 0.8824677860210856, "grad_norm": 2.3949477672576904, "learning_rate": 8.205183947771478e-06, "loss": 0.6372, "step": 6780 }, { "epoch": 0.8828582584927762, "grad_norm": 2.6882550716400146, "learning_rate": 8.203579691448124e-06, "loss": 0.7021, "step": 6783 }, { "epoch": 0.883248730964467, "grad_norm": 2.244438886642456, "learning_rate": 8.201974875466138e-06, "loss": 0.6369, "step": 6786 }, { "epoch": 0.8836392034361578, "grad_norm": 2.579512357711792, "learning_rate": 8.200369500105876e-06, "loss": 0.6198, "step": 6789 }, { "epoch": 0.8840296759078485, "grad_norm": 2.45475172996521, "learning_rate": 8.198763565647796e-06, "loss": 0.6765, "step": 6792 }, { "epoch": 0.8844201483795392, "grad_norm": 2.1796491146087646, "learning_rate": 8.19715707237245e-06, "loss": 0.5684, "step": 6795 }, { "epoch": 0.88481062085123, "grad_norm": 3.6202688217163086, "learning_rate": 8.195550020560488e-06, "loss": 0.6061, "step": 6798 }, { "epoch": 0.8852010933229207, "grad_norm": 2.273062229156494, "learning_rate": 8.193942410492662e-06, "loss": 0.5358, "step": 6801 }, { "epoch": 0.8855915657946115, "grad_norm": 2.308729410171509, "learning_rate": 8.192334242449816e-06, "loss": 0.5674, "step": 6804 }, { "epoch": 0.8859820382663022, "grad_norm": 2.949068784713745, "learning_rate": 8.190725516712893e-06, "loss": 0.6682, "step": 6807 }, { "epoch": 0.886372510737993, "grad_norm": 2.661355972290039, "learning_rate": 8.189116233562933e-06, "loss": 0.7175, "step": 6810 }, { "epoch": 0.8867629832096837, "grad_norm": 3.1999619007110596, "learning_rate": 8.187506393281076e-06, "loss": 0.6816, "step": 6813 }, { "epoch": 0.8871534556813745, "grad_norm": 3.3736467361450195, "learning_rate": 8.185895996148558e-06, "loss": 0.6235, "step": 6816 }, { "epoch": 0.8875439281530652, "grad_norm": 2.3668558597564697, "learning_rate": 8.184285042446713e-06, "loss": 0.7111, "step": 6819 }, { "epoch": 0.8879344006247559, "grad_norm": 3.4571516513824463, "learning_rate": 8.18267353245697e-06, "loss": 0.6578, "step": 6822 }, { "epoch": 0.8883248730964467, "grad_norm": 2.8542165756225586, "learning_rate": 8.181061466460856e-06, "loss": 0.6717, "step": 6825 }, { "epoch": 0.8887153455681375, "grad_norm": 2.5274338722229004, "learning_rate": 8.179448844739995e-06, "loss": 0.7508, "step": 6828 }, { "epoch": 0.8891058180398282, "grad_norm": 2.284775733947754, "learning_rate": 8.177835667576108e-06, "loss": 0.6448, "step": 6831 }, { "epoch": 0.8894962905115189, "grad_norm": 2.375109910964966, "learning_rate": 8.176221935251016e-06, "loss": 0.6777, "step": 6834 }, { "epoch": 0.8898867629832097, "grad_norm": 2.5060408115386963, "learning_rate": 8.174607648046635e-06, "loss": 0.7086, "step": 6837 }, { "epoch": 0.8902772354549005, "grad_norm": 3.7444376945495605, "learning_rate": 8.172992806244976e-06, "loss": 0.6429, "step": 6840 }, { "epoch": 0.8906677079265912, "grad_norm": 2.7690014839172363, "learning_rate": 8.171377410128149e-06, "loss": 0.6279, "step": 6843 }, { "epoch": 0.8910581803982819, "grad_norm": 3.0757744312286377, "learning_rate": 8.169761459978358e-06, "loss": 0.7566, "step": 6846 }, { "epoch": 0.8914486528699727, "grad_norm": 2.608480930328369, "learning_rate": 8.16814495607791e-06, "loss": 0.6398, "step": 6849 }, { "epoch": 0.8918391253416634, "grad_norm": 2.6468796730041504, "learning_rate": 8.166527898709202e-06, "loss": 0.6788, "step": 6852 }, { "epoch": 0.8922295978133542, "grad_norm": 2.874540328979492, "learning_rate": 8.164910288154733e-06, "loss": 0.6425, "step": 6855 }, { "epoch": 0.8926200702850449, "grad_norm": 2.4776840209960938, "learning_rate": 8.163292124697094e-06, "loss": 0.7579, "step": 6858 }, { "epoch": 0.8930105427567356, "grad_norm": 2.2084155082702637, "learning_rate": 8.161673408618975e-06, "loss": 0.6798, "step": 6861 }, { "epoch": 0.8934010152284264, "grad_norm": 2.4386301040649414, "learning_rate": 8.160054140203163e-06, "loss": 0.6953, "step": 6864 }, { "epoch": 0.8937914877001172, "grad_norm": 2.9873063564300537, "learning_rate": 8.15843431973254e-06, "loss": 0.6849, "step": 6867 }, { "epoch": 0.8941819601718078, "grad_norm": 2.3077900409698486, "learning_rate": 8.156813947490086e-06, "loss": 0.5833, "step": 6870 }, { "epoch": 0.8945724326434986, "grad_norm": 3.516432046890259, "learning_rate": 8.155193023758876e-06, "loss": 0.6973, "step": 6873 }, { "epoch": 0.8949629051151894, "grad_norm": 2.4575483798980713, "learning_rate": 8.153571548822083e-06, "loss": 0.6842, "step": 6876 }, { "epoch": 0.8953533775868802, "grad_norm": 2.8039913177490234, "learning_rate": 8.151949522962975e-06, "loss": 0.6416, "step": 6879 }, { "epoch": 0.8957438500585708, "grad_norm": 3.1583502292633057, "learning_rate": 8.150326946464913e-06, "loss": 0.6764, "step": 6882 }, { "epoch": 0.8961343225302616, "grad_norm": 2.762176752090454, "learning_rate": 8.148703819611364e-06, "loss": 0.6455, "step": 6885 }, { "epoch": 0.8965247950019524, "grad_norm": 3.457850694656372, "learning_rate": 8.147080142685882e-06, "loss": 0.6589, "step": 6888 }, { "epoch": 0.8969152674736431, "grad_norm": 3.8411946296691895, "learning_rate": 8.145455915972117e-06, "loss": 0.7218, "step": 6891 }, { "epoch": 0.8973057399453338, "grad_norm": 2.2801125049591064, "learning_rate": 8.143831139753822e-06, "loss": 0.6221, "step": 6894 }, { "epoch": 0.8976962124170246, "grad_norm": 2.8186426162719727, "learning_rate": 8.14220581431484e-06, "loss": 0.7055, "step": 6897 }, { "epoch": 0.8980866848887153, "grad_norm": 2.3961057662963867, "learning_rate": 8.140579939939113e-06, "loss": 0.6705, "step": 6900 }, { "epoch": 0.8984771573604061, "grad_norm": 2.616764783859253, "learning_rate": 8.138953516910676e-06, "loss": 0.5349, "step": 6903 }, { "epoch": 0.8988676298320968, "grad_norm": 2.015042543411255, "learning_rate": 8.137326545513664e-06, "loss": 0.535, "step": 6906 }, { "epoch": 0.8992581023037876, "grad_norm": 2.598292112350464, "learning_rate": 8.135699026032305e-06, "loss": 0.7247, "step": 6909 }, { "epoch": 0.8996485747754783, "grad_norm": 2.3289687633514404, "learning_rate": 8.134070958750923e-06, "loss": 0.6876, "step": 6912 }, { "epoch": 0.9000390472471691, "grad_norm": 2.219310760498047, "learning_rate": 8.132442343953937e-06, "loss": 0.6288, "step": 6915 }, { "epoch": 0.9004295197188599, "grad_norm": 2.5258898735046387, "learning_rate": 8.130813181925862e-06, "loss": 0.6711, "step": 6918 }, { "epoch": 0.9008199921905505, "grad_norm": 2.4190750122070312, "learning_rate": 8.129183472951312e-06, "loss": 0.7272, "step": 6921 }, { "epoch": 0.9012104646622413, "grad_norm": 2.566995620727539, "learning_rate": 8.127553217314991e-06, "loss": 0.735, "step": 6924 }, { "epoch": 0.9016009371339321, "grad_norm": 2.446164608001709, "learning_rate": 8.125922415301704e-06, "loss": 0.6453, "step": 6927 }, { "epoch": 0.9019914096056229, "grad_norm": 2.655388593673706, "learning_rate": 8.124291067196347e-06, "loss": 0.5551, "step": 6930 }, { "epoch": 0.9023818820773135, "grad_norm": 2.4122257232666016, "learning_rate": 8.12265917328391e-06, "loss": 0.6901, "step": 6933 }, { "epoch": 0.9027723545490043, "grad_norm": 2.494718551635742, "learning_rate": 8.121026733849486e-06, "loss": 0.6336, "step": 6936 }, { "epoch": 0.9031628270206951, "grad_norm": 2.373518466949463, "learning_rate": 8.119393749178258e-06, "loss": 0.6479, "step": 6939 }, { "epoch": 0.9035532994923858, "grad_norm": 2.2196645736694336, "learning_rate": 8.117760219555505e-06, "loss": 0.5689, "step": 6942 }, { "epoch": 0.9039437719640765, "grad_norm": 3.160625457763672, "learning_rate": 8.116126145266599e-06, "loss": 0.627, "step": 6945 }, { "epoch": 0.9043342444357673, "grad_norm": 2.553009271621704, "learning_rate": 8.114491526597012e-06, "loss": 0.5639, "step": 6948 }, { "epoch": 0.904724716907458, "grad_norm": 2.1808202266693115, "learning_rate": 8.112856363832307e-06, "loss": 0.5542, "step": 6951 }, { "epoch": 0.9051151893791488, "grad_norm": 2.4375030994415283, "learning_rate": 8.111220657258144e-06, "loss": 0.5841, "step": 6954 }, { "epoch": 0.9055056618508395, "grad_norm": 2.3346710205078125, "learning_rate": 8.109584407160277e-06, "loss": 0.6731, "step": 6957 }, { "epoch": 0.9058961343225302, "grad_norm": 2.4284756183624268, "learning_rate": 8.107947613824554e-06, "loss": 0.6966, "step": 6960 }, { "epoch": 0.906286606794221, "grad_norm": 3.4861011505126953, "learning_rate": 8.106310277536921e-06, "loss": 0.6403, "step": 6963 }, { "epoch": 0.9066770792659118, "grad_norm": 3.8682639598846436, "learning_rate": 8.104672398583419e-06, "loss": 0.6438, "step": 6966 }, { "epoch": 0.9070675517376025, "grad_norm": 2.232022523880005, "learning_rate": 8.10303397725018e-06, "loss": 0.545, "step": 6969 }, { "epoch": 0.9074580242092932, "grad_norm": 3.7557497024536133, "learning_rate": 8.101395013823433e-06, "loss": 0.7008, "step": 6972 }, { "epoch": 0.907848496680984, "grad_norm": 2.4933996200561523, "learning_rate": 8.099755508589502e-06, "loss": 0.6323, "step": 6975 }, { "epoch": 0.9082389691526748, "grad_norm": 2.4882712364196777, "learning_rate": 8.098115461834803e-06, "loss": 0.6243, "step": 6978 }, { "epoch": 0.9086294416243654, "grad_norm": 2.3607735633850098, "learning_rate": 8.096474873845851e-06, "loss": 0.6152, "step": 6981 }, { "epoch": 0.9090199140960562, "grad_norm": 3.5572595596313477, "learning_rate": 8.094833744909252e-06, "loss": 0.7645, "step": 6984 }, { "epoch": 0.909410386567747, "grad_norm": 3.0269618034362793, "learning_rate": 8.09319207531171e-06, "loss": 0.6273, "step": 6987 }, { "epoch": 0.9098008590394377, "grad_norm": 3.1948349475860596, "learning_rate": 8.091549865340019e-06, "loss": 0.6008, "step": 6990 }, { "epoch": 0.9101913315111285, "grad_norm": 2.419280529022217, "learning_rate": 8.08990711528107e-06, "loss": 0.6304, "step": 6993 }, { "epoch": 0.9105818039828192, "grad_norm": 2.2587809562683105, "learning_rate": 8.088263825421847e-06, "loss": 0.6211, "step": 6996 }, { "epoch": 0.91097227645451, "grad_norm": 2.709536552429199, "learning_rate": 8.086619996049431e-06, "loss": 0.6343, "step": 6999 }, { "epoch": 0.9113627489262007, "grad_norm": 5.089230060577393, "learning_rate": 8.084975627450995e-06, "loss": 0.6053, "step": 7002 }, { "epoch": 0.9117532213978915, "grad_norm": 2.689100742340088, "learning_rate": 8.083330719913808e-06, "loss": 0.668, "step": 7005 }, { "epoch": 0.9121436938695822, "grad_norm": 2.3222506046295166, "learning_rate": 8.08168527372523e-06, "loss": 0.5972, "step": 7008 }, { "epoch": 0.9125341663412729, "grad_norm": 2.3201560974121094, "learning_rate": 8.080039289172717e-06, "loss": 0.6853, "step": 7011 }, { "epoch": 0.9129246388129637, "grad_norm": 2.8677849769592285, "learning_rate": 8.078392766543821e-06, "loss": 0.5319, "step": 7014 }, { "epoch": 0.9133151112846545, "grad_norm": 2.3285553455352783, "learning_rate": 8.076745706126184e-06, "loss": 0.6937, "step": 7017 }, { "epoch": 0.9137055837563451, "grad_norm": 2.447075366973877, "learning_rate": 8.075098108207544e-06, "loss": 0.7326, "step": 7020 }, { "epoch": 0.9140960562280359, "grad_norm": 3.435882091522217, "learning_rate": 8.073449973075733e-06, "loss": 0.6119, "step": 7023 }, { "epoch": 0.9144865286997267, "grad_norm": 2.431121587753296, "learning_rate": 8.071801301018678e-06, "loss": 0.6601, "step": 7026 }, { "epoch": 0.9148770011714175, "grad_norm": 3.276074171066284, "learning_rate": 8.070152092324399e-06, "loss": 0.6196, "step": 7029 }, { "epoch": 0.9152674736431081, "grad_norm": 2.349234104156494, "learning_rate": 8.068502347281006e-06, "loss": 0.5846, "step": 7032 }, { "epoch": 0.9156579461147989, "grad_norm": 2.1926095485687256, "learning_rate": 8.06685206617671e-06, "loss": 0.7039, "step": 7035 }, { "epoch": 0.9160484185864897, "grad_norm": 2.8905913829803467, "learning_rate": 8.06520124929981e-06, "loss": 0.6995, "step": 7038 }, { "epoch": 0.9164388910581804, "grad_norm": 2.275421380996704, "learning_rate": 8.063549896938698e-06, "loss": 0.5875, "step": 7041 }, { "epoch": 0.9168293635298711, "grad_norm": 2.2735865116119385, "learning_rate": 8.061898009381865e-06, "loss": 0.6204, "step": 7044 }, { "epoch": 0.9172198360015619, "grad_norm": 2.518444299697876, "learning_rate": 8.06024558691789e-06, "loss": 0.6785, "step": 7047 }, { "epoch": 0.9176103084732526, "grad_norm": 2.3565497398376465, "learning_rate": 8.05859262983545e-06, "loss": 0.6093, "step": 7050 }, { "epoch": 0.9180007809449434, "grad_norm": 2.3102149963378906, "learning_rate": 8.056939138423313e-06, "loss": 0.701, "step": 7053 }, { "epoch": 0.9183912534166341, "grad_norm": 3.3671510219573975, "learning_rate": 8.055285112970337e-06, "loss": 0.6835, "step": 7056 }, { "epoch": 0.9187817258883249, "grad_norm": 2.4449825286865234, "learning_rate": 8.05363055376548e-06, "loss": 0.6697, "step": 7059 }, { "epoch": 0.9191721983600156, "grad_norm": 2.696711540222168, "learning_rate": 8.051975461097789e-06, "loss": 0.6039, "step": 7062 }, { "epoch": 0.9195626708317064, "grad_norm": 2.4236817359924316, "learning_rate": 8.050319835256406e-06, "loss": 0.6511, "step": 7065 }, { "epoch": 0.9199531433033971, "grad_norm": 2.9492392539978027, "learning_rate": 8.048663676530563e-06, "loss": 0.6339, "step": 7068 }, { "epoch": 0.9203436157750878, "grad_norm": 2.4083123207092285, "learning_rate": 8.04700698520959e-06, "loss": 0.6088, "step": 7071 }, { "epoch": 0.9207340882467786, "grad_norm": 2.4848201274871826, "learning_rate": 8.045349761582908e-06, "loss": 0.6274, "step": 7074 }, { "epoch": 0.9211245607184694, "grad_norm": 6.317074298858643, "learning_rate": 8.043692005940029e-06, "loss": 0.6443, "step": 7077 }, { "epoch": 0.9215150331901601, "grad_norm": 2.408294916152954, "learning_rate": 8.042033718570559e-06, "loss": 0.694, "step": 7080 }, { "epoch": 0.9219055056618508, "grad_norm": 2.3996033668518066, "learning_rate": 8.040374899764198e-06, "loss": 0.6197, "step": 7083 }, { "epoch": 0.9222959781335416, "grad_norm": 2.790170907974243, "learning_rate": 8.038715549810737e-06, "loss": 0.575, "step": 7086 }, { "epoch": 0.9226864506052324, "grad_norm": 2.455018997192383, "learning_rate": 8.037055669000062e-06, "loss": 0.6055, "step": 7089 }, { "epoch": 0.9230769230769231, "grad_norm": 2.327902317047119, "learning_rate": 8.035395257622151e-06, "loss": 0.5594, "step": 7092 }, { "epoch": 0.9234673955486138, "grad_norm": 3.6916446685791016, "learning_rate": 8.033734315967074e-06, "loss": 0.6818, "step": 7095 }, { "epoch": 0.9238578680203046, "grad_norm": 2.5879135131835938, "learning_rate": 8.032072844324995e-06, "loss": 0.66, "step": 7098 }, { "epoch": 0.9242483404919953, "grad_norm": 2.4714932441711426, "learning_rate": 8.030410842986169e-06, "loss": 0.6118, "step": 7101 }, { "epoch": 0.9246388129636861, "grad_norm": 2.5215554237365723, "learning_rate": 8.028748312240942e-06, "loss": 0.7066, "step": 7104 }, { "epoch": 0.9250292854353768, "grad_norm": 2.2146830558776855, "learning_rate": 8.027085252379755e-06, "loss": 0.5609, "step": 7107 }, { "epoch": 0.9254197579070675, "grad_norm": 2.9621617794036865, "learning_rate": 8.025421663693147e-06, "loss": 0.6719, "step": 7110 }, { "epoch": 0.9258102303787583, "grad_norm": 2.4687397480010986, "learning_rate": 8.023757546471737e-06, "loss": 0.634, "step": 7113 }, { "epoch": 0.9262007028504491, "grad_norm": 2.19496488571167, "learning_rate": 8.022092901006245e-06, "loss": 0.6325, "step": 7116 }, { "epoch": 0.9265911753221397, "grad_norm": 3.014942169189453, "learning_rate": 8.020427727587479e-06, "loss": 0.6796, "step": 7119 }, { "epoch": 0.9269816477938305, "grad_norm": 2.605684995651245, "learning_rate": 8.018762026506344e-06, "loss": 0.6616, "step": 7122 }, { "epoch": 0.9273721202655213, "grad_norm": 2.285712957382202, "learning_rate": 8.017095798053834e-06, "loss": 0.5857, "step": 7125 }, { "epoch": 0.9277625927372121, "grad_norm": 2.385016918182373, "learning_rate": 8.015429042521034e-06, "loss": 0.6805, "step": 7128 }, { "epoch": 0.9281530652089027, "grad_norm": 2.6234757900238037, "learning_rate": 8.013761760199125e-06, "loss": 0.6721, "step": 7131 }, { "epoch": 0.9285435376805935, "grad_norm": 2.4633302688598633, "learning_rate": 8.012093951379376e-06, "loss": 0.6305, "step": 7134 }, { "epoch": 0.9289340101522843, "grad_norm": 2.8097805976867676, "learning_rate": 8.01042561635315e-06, "loss": 0.613, "step": 7137 }, { "epoch": 0.929324482623975, "grad_norm": 2.5998694896698, "learning_rate": 8.008756755411902e-06, "loss": 0.619, "step": 7140 }, { "epoch": 0.9297149550956657, "grad_norm": 2.280726194381714, "learning_rate": 8.007087368847178e-06, "loss": 0.6404, "step": 7143 }, { "epoch": 0.9301054275673565, "grad_norm": 2.546746015548706, "learning_rate": 8.005417456950617e-06, "loss": 0.6563, "step": 7146 }, { "epoch": 0.9304959000390473, "grad_norm": 3.274017572402954, "learning_rate": 8.003747020013948e-06, "loss": 0.6148, "step": 7149 }, { "epoch": 0.930886372510738, "grad_norm": 2.3492894172668457, "learning_rate": 8.002076058328996e-06, "loss": 0.5916, "step": 7152 }, { "epoch": 0.9312768449824287, "grad_norm": 2.176417112350464, "learning_rate": 8.00040457218767e-06, "loss": 0.5756, "step": 7155 }, { "epoch": 0.9316673174541195, "grad_norm": 2.4730896949768066, "learning_rate": 7.998732561881976e-06, "loss": 0.5959, "step": 7158 }, { "epoch": 0.9320577899258102, "grad_norm": 2.558849573135376, "learning_rate": 7.997060027704016e-06, "loss": 0.6642, "step": 7161 }, { "epoch": 0.932448262397501, "grad_norm": 2.7973668575286865, "learning_rate": 7.99538696994597e-06, "loss": 0.6965, "step": 7164 }, { "epoch": 0.9328387348691918, "grad_norm": 2.7359659671783447, "learning_rate": 7.993713388900124e-06, "loss": 0.671, "step": 7167 }, { "epoch": 0.9332292073408824, "grad_norm": 3.094665765762329, "learning_rate": 7.992039284858846e-06, "loss": 0.6593, "step": 7170 }, { "epoch": 0.9336196798125732, "grad_norm": 2.6220576763153076, "learning_rate": 7.990364658114599e-06, "loss": 0.6067, "step": 7173 }, { "epoch": 0.934010152284264, "grad_norm": 2.2661070823669434, "learning_rate": 7.988689508959936e-06, "loss": 0.6265, "step": 7176 }, { "epoch": 0.9344006247559548, "grad_norm": 3.1826601028442383, "learning_rate": 7.987013837687505e-06, "loss": 0.6483, "step": 7179 }, { "epoch": 0.9347910972276454, "grad_norm": 2.6422383785247803, "learning_rate": 7.98533764459004e-06, "loss": 0.6612, "step": 7182 }, { "epoch": 0.9351815696993362, "grad_norm": 2.5566980838775635, "learning_rate": 7.983660929960368e-06, "loss": 0.6701, "step": 7185 }, { "epoch": 0.935572042171027, "grad_norm": 2.446725368499756, "learning_rate": 7.98198369409141e-06, "loss": 0.62, "step": 7188 }, { "epoch": 0.9359625146427177, "grad_norm": 2.3360819816589355, "learning_rate": 7.980305937276172e-06, "loss": 0.598, "step": 7191 }, { "epoch": 0.9363529871144084, "grad_norm": 2.8677642345428467, "learning_rate": 7.978627659807757e-06, "loss": 0.6929, "step": 7194 }, { "epoch": 0.9367434595860992, "grad_norm": 2.7148616313934326, "learning_rate": 7.976948861979356e-06, "loss": 0.626, "step": 7197 }, { "epoch": 0.9371339320577899, "grad_norm": 2.4548072814941406, "learning_rate": 7.975269544084251e-06, "loss": 0.6934, "step": 7200 }, { "epoch": 0.9375244045294807, "grad_norm": 3.422706365585327, "learning_rate": 7.973589706415816e-06, "loss": 0.666, "step": 7203 }, { "epoch": 0.9379148770011714, "grad_norm": 2.493299722671509, "learning_rate": 7.971909349267514e-06, "loss": 0.7444, "step": 7206 }, { "epoch": 0.9383053494728621, "grad_norm": 2.5340187549591064, "learning_rate": 7.970228472932901e-06, "loss": 0.6665, "step": 7209 }, { "epoch": 0.9386958219445529, "grad_norm": 2.3306686878204346, "learning_rate": 7.968547077705624e-06, "loss": 0.6389, "step": 7212 }, { "epoch": 0.9390862944162437, "grad_norm": 2.3347361087799072, "learning_rate": 7.966865163879416e-06, "loss": 0.6495, "step": 7215 }, { "epoch": 0.9394767668879344, "grad_norm": 2.574287176132202, "learning_rate": 7.965182731748104e-06, "loss": 0.6285, "step": 7218 }, { "epoch": 0.9398672393596251, "grad_norm": 2.4919655323028564, "learning_rate": 7.96349978160561e-06, "loss": 0.6683, "step": 7221 }, { "epoch": 0.9402577118313159, "grad_norm": 2.245663642883301, "learning_rate": 7.961816313745936e-06, "loss": 0.5926, "step": 7224 }, { "epoch": 0.9406481843030067, "grad_norm": 2.493183135986328, "learning_rate": 7.960132328463184e-06, "loss": 0.6197, "step": 7227 }, { "epoch": 0.9410386567746973, "grad_norm": 2.5422475337982178, "learning_rate": 7.958447826051538e-06, "loss": 0.6305, "step": 7230 }, { "epoch": 0.9414291292463881, "grad_norm": 2.432318925857544, "learning_rate": 7.956762806805285e-06, "loss": 0.7437, "step": 7233 }, { "epoch": 0.9418196017180789, "grad_norm": 2.516645669937134, "learning_rate": 7.955077271018788e-06, "loss": 0.6369, "step": 7236 }, { "epoch": 0.9422100741897697, "grad_norm": 2.9639275074005127, "learning_rate": 7.953391218986507e-06, "loss": 0.5996, "step": 7239 }, { "epoch": 0.9426005466614603, "grad_norm": 2.503748655319214, "learning_rate": 7.951704651002994e-06, "loss": 0.6876, "step": 7242 }, { "epoch": 0.9429910191331511, "grad_norm": 2.3590073585510254, "learning_rate": 7.950017567362888e-06, "loss": 0.6315, "step": 7245 }, { "epoch": 0.9433814916048419, "grad_norm": 2.718970775604248, "learning_rate": 7.948329968360919e-06, "loss": 0.7917, "step": 7248 }, { "epoch": 0.9437719640765326, "grad_norm": 3.0263495445251465, "learning_rate": 7.946641854291908e-06, "loss": 0.62, "step": 7251 }, { "epoch": 0.9441624365482234, "grad_norm": 2.2088472843170166, "learning_rate": 7.944953225450762e-06, "loss": 0.5835, "step": 7254 }, { "epoch": 0.9445529090199141, "grad_norm": 2.943924903869629, "learning_rate": 7.943264082132484e-06, "loss": 0.6698, "step": 7257 }, { "epoch": 0.9449433814916048, "grad_norm": 3.520203113555908, "learning_rate": 7.941574424632161e-06, "loss": 0.6166, "step": 7260 }, { "epoch": 0.9453338539632956, "grad_norm": 2.5051796436309814, "learning_rate": 7.939884253244977e-06, "loss": 0.5959, "step": 7263 }, { "epoch": 0.9457243264349864, "grad_norm": 2.657858371734619, "learning_rate": 7.938193568266195e-06, "loss": 0.6384, "step": 7266 }, { "epoch": 0.946114798906677, "grad_norm": 2.747825860977173, "learning_rate": 7.93650236999118e-06, "loss": 0.6422, "step": 7269 }, { "epoch": 0.9465052713783678, "grad_norm": 2.390871524810791, "learning_rate": 7.93481065871538e-06, "loss": 0.6069, "step": 7272 }, { "epoch": 0.9468957438500586, "grad_norm": 4.321902275085449, "learning_rate": 7.933118434734329e-06, "loss": 0.6149, "step": 7275 }, { "epoch": 0.9472862163217494, "grad_norm": 2.5380969047546387, "learning_rate": 7.931425698343657e-06, "loss": 0.6597, "step": 7278 }, { "epoch": 0.94767668879344, "grad_norm": 2.3168387413024902, "learning_rate": 7.929732449839085e-06, "loss": 0.6158, "step": 7281 }, { "epoch": 0.9480671612651308, "grad_norm": 2.2340736389160156, "learning_rate": 7.928038689516417e-06, "loss": 0.546, "step": 7284 }, { "epoch": 0.9484576337368216, "grad_norm": 2.3683393001556396, "learning_rate": 7.92634441767155e-06, "loss": 0.5225, "step": 7287 }, { "epoch": 0.9488481062085123, "grad_norm": 3.3821890354156494, "learning_rate": 7.924649634600468e-06, "loss": 0.7159, "step": 7290 }, { "epoch": 0.949238578680203, "grad_norm": 2.016606569290161, "learning_rate": 7.922954340599247e-06, "loss": 0.578, "step": 7293 }, { "epoch": 0.9496290511518938, "grad_norm": 3.1906471252441406, "learning_rate": 7.921258535964051e-06, "loss": 0.7206, "step": 7296 }, { "epoch": 0.9500195236235845, "grad_norm": 2.839524507522583, "learning_rate": 7.919562220991137e-06, "loss": 0.6458, "step": 7299 }, { "epoch": 0.9504099960952753, "grad_norm": 2.620429039001465, "learning_rate": 7.917865395976844e-06, "loss": 0.6736, "step": 7302 }, { "epoch": 0.950800468566966, "grad_norm": 2.331636667251587, "learning_rate": 7.916168061217603e-06, "loss": 0.6511, "step": 7305 }, { "epoch": 0.9511909410386568, "grad_norm": 3.166801929473877, "learning_rate": 7.914470217009937e-06, "loss": 0.7378, "step": 7308 }, { "epoch": 0.9515814135103475, "grad_norm": 2.2052841186523438, "learning_rate": 7.912771863650457e-06, "loss": 0.6025, "step": 7311 }, { "epoch": 0.9519718859820383, "grad_norm": 2.397726058959961, "learning_rate": 7.911073001435859e-06, "loss": 0.6423, "step": 7314 }, { "epoch": 0.952362358453729, "grad_norm": 2.694310426712036, "learning_rate": 7.909373630662931e-06, "loss": 0.6696, "step": 7317 }, { "epoch": 0.9527528309254197, "grad_norm": 2.3516201972961426, "learning_rate": 7.90767375162855e-06, "loss": 0.6661, "step": 7320 }, { "epoch": 0.9531433033971105, "grad_norm": 3.105769395828247, "learning_rate": 7.905973364629682e-06, "loss": 0.643, "step": 7323 }, { "epoch": 0.9535337758688013, "grad_norm": 2.4792470932006836, "learning_rate": 7.904272469963381e-06, "loss": 0.6866, "step": 7326 }, { "epoch": 0.9539242483404919, "grad_norm": 2.354459285736084, "learning_rate": 7.90257106792679e-06, "loss": 0.6899, "step": 7329 }, { "epoch": 0.9543147208121827, "grad_norm": 2.265612840652466, "learning_rate": 7.900869158817137e-06, "loss": 0.6038, "step": 7332 }, { "epoch": 0.9547051932838735, "grad_norm": 2.663803815841675, "learning_rate": 7.899166742931745e-06, "loss": 0.6569, "step": 7335 }, { "epoch": 0.9550956657555643, "grad_norm": 2.5542824268341064, "learning_rate": 7.897463820568024e-06, "loss": 0.6466, "step": 7338 }, { "epoch": 0.955486138227255, "grad_norm": 2.5738067626953125, "learning_rate": 7.895760392023467e-06, "loss": 0.6287, "step": 7341 }, { "epoch": 0.9558766106989457, "grad_norm": 2.7856462001800537, "learning_rate": 7.894056457595661e-06, "loss": 0.6974, "step": 7344 }, { "epoch": 0.9562670831706365, "grad_norm": 2.3538174629211426, "learning_rate": 7.892352017582281e-06, "loss": 0.5836, "step": 7347 }, { "epoch": 0.9566575556423272, "grad_norm": 3.389510154724121, "learning_rate": 7.89064707228109e-06, "loss": 0.6142, "step": 7350 }, { "epoch": 0.957048028114018, "grad_norm": 2.4188413619995117, "learning_rate": 7.888941621989934e-06, "loss": 0.5828, "step": 7353 }, { "epoch": 0.9574385005857087, "grad_norm": 2.8886420726776123, "learning_rate": 7.887235667006754e-06, "loss": 0.5977, "step": 7356 }, { "epoch": 0.9578289730573994, "grad_norm": 3.743659019470215, "learning_rate": 7.885529207629578e-06, "loss": 0.6807, "step": 7359 }, { "epoch": 0.9582194455290902, "grad_norm": 3.0627243518829346, "learning_rate": 7.883822244156518e-06, "loss": 0.6187, "step": 7362 }, { "epoch": 0.958609918000781, "grad_norm": 2.3821604251861572, "learning_rate": 7.88211477688578e-06, "loss": 0.6304, "step": 7365 }, { "epoch": 0.9590003904724717, "grad_norm": 2.6071219444274902, "learning_rate": 7.880406806115655e-06, "loss": 0.6801, "step": 7368 }, { "epoch": 0.9593908629441624, "grad_norm": 2.3850324153900146, "learning_rate": 7.878698332144518e-06, "loss": 0.7271, "step": 7371 }, { "epoch": 0.9597813354158532, "grad_norm": 2.5399577617645264, "learning_rate": 7.87698935527084e-06, "loss": 0.6031, "step": 7374 }, { "epoch": 0.960171807887544, "grad_norm": 2.3061318397521973, "learning_rate": 7.875279875793173e-06, "loss": 0.6618, "step": 7377 }, { "epoch": 0.9605622803592346, "grad_norm": 2.355522632598877, "learning_rate": 7.87356989401016e-06, "loss": 0.5788, "step": 7380 }, { "epoch": 0.9609527528309254, "grad_norm": 2.948932409286499, "learning_rate": 7.871859410220531e-06, "loss": 0.7404, "step": 7383 }, { "epoch": 0.9613432253026162, "grad_norm": 2.565260648727417, "learning_rate": 7.870148424723107e-06, "loss": 0.6905, "step": 7386 }, { "epoch": 0.961733697774307, "grad_norm": 3.4541046619415283, "learning_rate": 7.86843693781679e-06, "loss": 0.6341, "step": 7389 }, { "epoch": 0.9621241702459976, "grad_norm": 2.6894516944885254, "learning_rate": 7.866724949800574e-06, "loss": 0.6463, "step": 7392 }, { "epoch": 0.9625146427176884, "grad_norm": 3.081102132797241, "learning_rate": 7.86501246097354e-06, "loss": 0.6437, "step": 7395 }, { "epoch": 0.9629051151893792, "grad_norm": 2.0764200687408447, "learning_rate": 7.863299471634855e-06, "loss": 0.5674, "step": 7398 }, { "epoch": 0.9632955876610699, "grad_norm": 2.589940309524536, "learning_rate": 7.861585982083777e-06, "loss": 0.6612, "step": 7401 }, { "epoch": 0.9636860601327606, "grad_norm": 2.2398934364318848, "learning_rate": 7.859871992619647e-06, "loss": 0.5479, "step": 7404 }, { "epoch": 0.9640765326044514, "grad_norm": 2.3958423137664795, "learning_rate": 7.8581575035419e-06, "loss": 0.6572, "step": 7407 }, { "epoch": 0.9644670050761421, "grad_norm": 2.667583465576172, "learning_rate": 7.856442515150044e-06, "loss": 0.6274, "step": 7410 }, { "epoch": 0.9648574775478329, "grad_norm": 2.350374460220337, "learning_rate": 7.854727027743693e-06, "loss": 0.6201, "step": 7413 }, { "epoch": 0.9652479500195236, "grad_norm": 2.8348028659820557, "learning_rate": 7.853011041622536e-06, "loss": 0.6498, "step": 7416 }, { "epoch": 0.9656384224912143, "grad_norm": 2.5437428951263428, "learning_rate": 7.85129455708635e-06, "loss": 0.6588, "step": 7419 }, { "epoch": 0.9660288949629051, "grad_norm": 3.723634958267212, "learning_rate": 7.849577574435004e-06, "loss": 0.6834, "step": 7422 }, { "epoch": 0.9664193674345959, "grad_norm": 2.6529476642608643, "learning_rate": 7.847860093968452e-06, "loss": 0.6071, "step": 7425 }, { "epoch": 0.9668098399062867, "grad_norm": 2.431433916091919, "learning_rate": 7.84614211598673e-06, "loss": 0.6335, "step": 7428 }, { "epoch": 0.9672003123779773, "grad_norm": 2.597510576248169, "learning_rate": 7.84442364078997e-06, "loss": 0.6908, "step": 7431 }, { "epoch": 0.9675907848496681, "grad_norm": 3.476114273071289, "learning_rate": 7.842704668678383e-06, "loss": 0.6247, "step": 7434 }, { "epoch": 0.9679812573213589, "grad_norm": 4.752762794494629, "learning_rate": 7.84098519995227e-06, "loss": 0.5914, "step": 7437 }, { "epoch": 0.9683717297930496, "grad_norm": 2.079385995864868, "learning_rate": 7.839265234912019e-06, "loss": 0.6201, "step": 7440 }, { "epoch": 0.9687622022647403, "grad_norm": 2.5645883083343506, "learning_rate": 7.837544773858104e-06, "loss": 0.6475, "step": 7443 }, { "epoch": 0.9691526747364311, "grad_norm": 2.471785068511963, "learning_rate": 7.835823817091088e-06, "loss": 0.5883, "step": 7446 }, { "epoch": 0.9695431472081218, "grad_norm": 2.9823968410491943, "learning_rate": 7.834102364911615e-06, "loss": 0.6943, "step": 7449 }, { "epoch": 0.9699336196798126, "grad_norm": 3.442391872406006, "learning_rate": 7.832380417620421e-06, "loss": 0.7353, "step": 7452 }, { "epoch": 0.9703240921515033, "grad_norm": 2.499734401702881, "learning_rate": 7.83065797551833e-06, "loss": 0.6728, "step": 7455 }, { "epoch": 0.970714564623194, "grad_norm": 2.1826016902923584, "learning_rate": 7.828935038906242e-06, "loss": 0.5552, "step": 7458 }, { "epoch": 0.9711050370948848, "grad_norm": 3.049651861190796, "learning_rate": 7.827211608085156e-06, "loss": 0.5896, "step": 7461 }, { "epoch": 0.9714955095665756, "grad_norm": 3.0418035984039307, "learning_rate": 7.825487683356147e-06, "loss": 0.6457, "step": 7464 }, { "epoch": 0.9718859820382663, "grad_norm": 2.458627700805664, "learning_rate": 7.823763265020385e-06, "loss": 0.5146, "step": 7467 }, { "epoch": 0.972276454509957, "grad_norm": 2.405871629714966, "learning_rate": 7.822038353379123e-06, "loss": 0.6785, "step": 7470 }, { "epoch": 0.9726669269816478, "grad_norm": 3.09692120552063, "learning_rate": 7.820312948733694e-06, "loss": 0.6946, "step": 7473 }, { "epoch": 0.9730573994533386, "grad_norm": 2.938538074493408, "learning_rate": 7.818587051385528e-06, "loss": 0.5546, "step": 7476 }, { "epoch": 0.9734478719250292, "grad_norm": 2.3744659423828125, "learning_rate": 7.816860661636133e-06, "loss": 0.6079, "step": 7479 }, { "epoch": 0.97383834439672, "grad_norm": 2.385146379470825, "learning_rate": 7.815133779787106e-06, "loss": 0.6877, "step": 7482 }, { "epoch": 0.9742288168684108, "grad_norm": 2.245079278945923, "learning_rate": 7.81340640614013e-06, "loss": 0.6019, "step": 7485 }, { "epoch": 0.9746192893401016, "grad_norm": 2.719306707382202, "learning_rate": 7.811678540996974e-06, "loss": 0.7438, "step": 7488 }, { "epoch": 0.9750097618117922, "grad_norm": 2.31829833984375, "learning_rate": 7.80995018465949e-06, "loss": 0.6019, "step": 7491 }, { "epoch": 0.975400234283483, "grad_norm": 2.4517407417297363, "learning_rate": 7.808221337429622e-06, "loss": 0.706, "step": 7494 }, { "epoch": 0.9757907067551738, "grad_norm": 2.333136558532715, "learning_rate": 7.806491999609393e-06, "loss": 0.5492, "step": 7497 }, { "epoch": 0.9761811792268645, "grad_norm": 2.3452956676483154, "learning_rate": 7.804762171500915e-06, "loss": 0.6827, "step": 7500 }, { "epoch": 0.9765716516985552, "grad_norm": 2.5150270462036133, "learning_rate": 7.803031853406389e-06, "loss": 0.6804, "step": 7503 }, { "epoch": 0.976962124170246, "grad_norm": 2.3206984996795654, "learning_rate": 7.801301045628091e-06, "loss": 0.6799, "step": 7506 }, { "epoch": 0.9773525966419367, "grad_norm": 2.463517904281616, "learning_rate": 7.799569748468395e-06, "loss": 0.6221, "step": 7509 }, { "epoch": 0.9777430691136275, "grad_norm": 2.432781457901001, "learning_rate": 7.797837962229755e-06, "loss": 0.744, "step": 7512 }, { "epoch": 0.9781335415853183, "grad_norm": 2.4999756813049316, "learning_rate": 7.796105687214705e-06, "loss": 0.6272, "step": 7515 }, { "epoch": 0.978524014057009, "grad_norm": 2.6092498302459717, "learning_rate": 7.794372923725876e-06, "loss": 0.6399, "step": 7518 }, { "epoch": 0.9789144865286997, "grad_norm": 2.057215452194214, "learning_rate": 7.792639672065978e-06, "loss": 0.5434, "step": 7521 }, { "epoch": 0.9793049590003905, "grad_norm": 2.6579465866088867, "learning_rate": 7.790905932537802e-06, "loss": 0.6238, "step": 7524 }, { "epoch": 0.9796954314720813, "grad_norm": 2.4319543838500977, "learning_rate": 7.78917170544423e-06, "loss": 0.6155, "step": 7527 }, { "epoch": 0.9800859039437719, "grad_norm": 2.190973997116089, "learning_rate": 7.787436991088228e-06, "loss": 0.605, "step": 7530 }, { "epoch": 0.9804763764154627, "grad_norm": 3.2778749465942383, "learning_rate": 7.78570178977285e-06, "loss": 0.6937, "step": 7533 }, { "epoch": 0.9808668488871535, "grad_norm": 3.431509256362915, "learning_rate": 7.783966101801228e-06, "loss": 0.6452, "step": 7536 }, { "epoch": 0.9812573213588442, "grad_norm": 2.6262214183807373, "learning_rate": 7.782229927476585e-06, "loss": 0.649, "step": 7539 }, { "epoch": 0.9816477938305349, "grad_norm": 2.3871302604675293, "learning_rate": 7.780493267102226e-06, "loss": 0.6498, "step": 7542 }, { "epoch": 0.9820382663022257, "grad_norm": 2.3250765800476074, "learning_rate": 7.778756120981544e-06, "loss": 0.608, "step": 7545 }, { "epoch": 0.9824287387739165, "grad_norm": 2.921252489089966, "learning_rate": 7.777018489418011e-06, "loss": 0.6643, "step": 7548 }, { "epoch": 0.9828192112456072, "grad_norm": 3.2305235862731934, "learning_rate": 7.775280372715193e-06, "loss": 0.6425, "step": 7551 }, { "epoch": 0.9832096837172979, "grad_norm": 2.3904078006744385, "learning_rate": 7.77354177117673e-06, "loss": 0.6353, "step": 7554 }, { "epoch": 0.9836001561889887, "grad_norm": 3.6194827556610107, "learning_rate": 7.771802685106356e-06, "loss": 0.6311, "step": 7557 }, { "epoch": 0.9839906286606794, "grad_norm": 2.5549159049987793, "learning_rate": 7.770063114807882e-06, "loss": 0.6819, "step": 7560 }, { "epoch": 0.9843811011323702, "grad_norm": 2.8810348510742188, "learning_rate": 7.76832306058521e-06, "loss": 0.6459, "step": 7563 }, { "epoch": 0.9847715736040609, "grad_norm": 2.3030362129211426, "learning_rate": 7.766582522742323e-06, "loss": 0.6363, "step": 7566 }, { "epoch": 0.9851620460757516, "grad_norm": 2.76395583152771, "learning_rate": 7.764841501583288e-06, "loss": 0.572, "step": 7569 }, { "epoch": 0.9855525185474424, "grad_norm": 2.2686634063720703, "learning_rate": 7.76309999741226e-06, "loss": 0.5821, "step": 7572 }, { "epoch": 0.9859429910191332, "grad_norm": 2.786400079727173, "learning_rate": 7.761358010533478e-06, "loss": 0.6529, "step": 7575 }, { "epoch": 0.9863334634908238, "grad_norm": 2.2984442710876465, "learning_rate": 7.759615541251257e-06, "loss": 0.6813, "step": 7578 }, { "epoch": 0.9867239359625146, "grad_norm": 2.821648359298706, "learning_rate": 7.757872589870008e-06, "loss": 0.6864, "step": 7581 }, { "epoch": 0.9871144084342054, "grad_norm": 2.334958791732788, "learning_rate": 7.756129156694219e-06, "loss": 0.6827, "step": 7584 }, { "epoch": 0.9875048809058962, "grad_norm": 2.1806464195251465, "learning_rate": 7.754385242028464e-06, "loss": 0.6831, "step": 7587 }, { "epoch": 0.9878953533775868, "grad_norm": 2.2671940326690674, "learning_rate": 7.7526408461774e-06, "loss": 0.6335, "step": 7590 }, { "epoch": 0.9882858258492776, "grad_norm": 2.351231098175049, "learning_rate": 7.750895969445773e-06, "loss": 0.6129, "step": 7593 }, { "epoch": 0.9886762983209684, "grad_norm": 2.558773994445801, "learning_rate": 7.749150612138407e-06, "loss": 0.771, "step": 7596 }, { "epoch": 0.9890667707926591, "grad_norm": 4.013977527618408, "learning_rate": 7.747404774560213e-06, "loss": 0.7569, "step": 7599 }, { "epoch": 0.9894572432643499, "grad_norm": 2.6245102882385254, "learning_rate": 7.745658457016182e-06, "loss": 0.6937, "step": 7602 }, { "epoch": 0.9898477157360406, "grad_norm": 2.6696815490722656, "learning_rate": 7.743911659811399e-06, "loss": 0.5789, "step": 7605 }, { "epoch": 0.9902381882077314, "grad_norm": 2.585771322250366, "learning_rate": 7.74216438325102e-06, "loss": 0.7235, "step": 7608 }, { "epoch": 0.9906286606794221, "grad_norm": 2.760922431945801, "learning_rate": 7.740416627640287e-06, "loss": 0.6648, "step": 7611 }, { "epoch": 0.9910191331511129, "grad_norm": 2.4276010990142822, "learning_rate": 7.738668393284538e-06, "loss": 0.5827, "step": 7614 }, { "epoch": 0.9914096056228036, "grad_norm": 2.4627444744110107, "learning_rate": 7.736919680489183e-06, "loss": 0.565, "step": 7617 }, { "epoch": 0.9918000780944943, "grad_norm": 2.5422356128692627, "learning_rate": 7.735170489559715e-06, "loss": 0.6392, "step": 7620 }, { "epoch": 0.9921905505661851, "grad_norm": 3.172285318374634, "learning_rate": 7.733420820801718e-06, "loss": 0.7205, "step": 7623 }, { "epoch": 0.9925810230378759, "grad_norm": 2.1353375911712646, "learning_rate": 7.731670674520854e-06, "loss": 0.5064, "step": 7626 }, { "epoch": 0.9929714955095665, "grad_norm": 2.693587303161621, "learning_rate": 7.72992005102287e-06, "loss": 0.6689, "step": 7629 }, { "epoch": 0.9933619679812573, "grad_norm": 2.3884193897247314, "learning_rate": 7.728168950613595e-06, "loss": 0.5864, "step": 7632 }, { "epoch": 0.9937524404529481, "grad_norm": 2.1766364574432373, "learning_rate": 7.726417373598944e-06, "loss": 0.726, "step": 7635 }, { "epoch": 0.9941429129246389, "grad_norm": 3.2286782264709473, "learning_rate": 7.724665320284913e-06, "loss": 0.6333, "step": 7638 }, { "epoch": 0.9945333853963295, "grad_norm": 2.1552257537841797, "learning_rate": 7.722912790977582e-06, "loss": 0.5859, "step": 7641 }, { "epoch": 0.9949238578680203, "grad_norm": 2.586132049560547, "learning_rate": 7.721159785983116e-06, "loss": 0.5727, "step": 7644 }, { "epoch": 0.9953143303397111, "grad_norm": 2.324312686920166, "learning_rate": 7.71940630560776e-06, "loss": 0.6321, "step": 7647 }, { "epoch": 0.9957048028114018, "grad_norm": 2.7428348064422607, "learning_rate": 7.717652350157843e-06, "loss": 0.7647, "step": 7650 }, { "epoch": 0.9960952752830925, "grad_norm": 2.904000759124756, "learning_rate": 7.715897919939775e-06, "loss": 0.6042, "step": 7653 }, { "epoch": 0.9964857477547833, "grad_norm": 2.2930192947387695, "learning_rate": 7.714143015260056e-06, "loss": 0.6164, "step": 7656 }, { "epoch": 0.996876220226474, "grad_norm": 2.6954126358032227, "learning_rate": 7.712387636425261e-06, "loss": 0.6288, "step": 7659 }, { "epoch": 0.9972666926981648, "grad_norm": 2.165433168411255, "learning_rate": 7.710631783742053e-06, "loss": 0.6372, "step": 7662 }, { "epoch": 0.9976571651698555, "grad_norm": 2.2576329708099365, "learning_rate": 7.708875457517176e-06, "loss": 0.7483, "step": 7665 }, { "epoch": 0.9980476376415462, "grad_norm": 2.5120785236358643, "learning_rate": 7.707118658057453e-06, "loss": 0.6769, "step": 7668 }, { "epoch": 0.998438110113237, "grad_norm": 2.3859639167785645, "learning_rate": 7.705361385669795e-06, "loss": 0.6798, "step": 7671 }, { "epoch": 0.9988285825849278, "grad_norm": 2.457106828689575, "learning_rate": 7.703603640661195e-06, "loss": 0.6851, "step": 7674 }, { "epoch": 0.9992190550566185, "grad_norm": 2.6297943592071533, "learning_rate": 7.701845423338727e-06, "loss": 0.6308, "step": 7677 }, { "epoch": 0.9996095275283092, "grad_norm": 2.5548126697540283, "learning_rate": 7.700086734009546e-06, "loss": 0.5697, "step": 7680 }, { "epoch": 1.0, "grad_norm": 7.604892730712891, "learning_rate": 7.698327572980893e-06, "loss": 0.5724, "step": 7683 }, { "epoch": 1.0003904724716908, "grad_norm": 2.4292237758636475, "learning_rate": 7.69656794056009e-06, "loss": 0.5014, "step": 7686 }, { "epoch": 1.0007809449433815, "grad_norm": 2.254239320755005, "learning_rate": 7.694807837054542e-06, "loss": 0.5559, "step": 7689 }, { "epoch": 1.0011714174150723, "grad_norm": 2.3204002380371094, "learning_rate": 7.693047262771734e-06, "loss": 0.4617, "step": 7692 }, { "epoch": 1.001561889886763, "grad_norm": 2.1934375762939453, "learning_rate": 7.691286218019232e-06, "loss": 0.5337, "step": 7695 }, { "epoch": 1.0019523623584536, "grad_norm": 3.9040029048919678, "learning_rate": 7.689524703104691e-06, "loss": 0.5133, "step": 7698 }, { "epoch": 1.0023428348301444, "grad_norm": 2.4869489669799805, "learning_rate": 7.687762718335841e-06, "loss": 0.478, "step": 7701 }, { "epoch": 1.0027333073018352, "grad_norm": 2.8475353717803955, "learning_rate": 7.686000264020499e-06, "loss": 0.5887, "step": 7704 }, { "epoch": 1.003123779773526, "grad_norm": 2.571333408355713, "learning_rate": 7.684237340466563e-06, "loss": 0.5476, "step": 7707 }, { "epoch": 1.0035142522452167, "grad_norm": 3.4672353267669678, "learning_rate": 7.682473947982008e-06, "loss": 0.5168, "step": 7710 }, { "epoch": 1.0039047247169075, "grad_norm": 2.6517016887664795, "learning_rate": 7.680710086874899e-06, "loss": 0.5201, "step": 7713 }, { "epoch": 1.0042951971885983, "grad_norm": 3.378917694091797, "learning_rate": 7.678945757453375e-06, "loss": 0.5947, "step": 7716 }, { "epoch": 1.004685669660289, "grad_norm": 3.3272864818573, "learning_rate": 7.677180960025665e-06, "loss": 0.5434, "step": 7719 }, { "epoch": 1.0050761421319796, "grad_norm": 2.3880414962768555, "learning_rate": 7.675415694900072e-06, "loss": 0.489, "step": 7722 }, { "epoch": 1.0054666146036704, "grad_norm": 2.6195037364959717, "learning_rate": 7.673649962384985e-06, "loss": 0.5526, "step": 7725 }, { "epoch": 1.0058570870753611, "grad_norm": 2.506817102432251, "learning_rate": 7.671883762788877e-06, "loss": 0.4631, "step": 7728 }, { "epoch": 1.006247559547052, "grad_norm": 3.4505410194396973, "learning_rate": 7.670117096420294e-06, "loss": 0.4856, "step": 7731 }, { "epoch": 1.0066380320187427, "grad_norm": 2.148554801940918, "learning_rate": 7.668349963587872e-06, "loss": 0.4429, "step": 7734 }, { "epoch": 1.0070285044904335, "grad_norm": 2.645296812057495, "learning_rate": 7.666582364600324e-06, "loss": 0.5216, "step": 7737 }, { "epoch": 1.0074189769621242, "grad_norm": 3.129601001739502, "learning_rate": 7.664814299766447e-06, "loss": 0.5421, "step": 7740 }, { "epoch": 1.007809449433815, "grad_norm": 2.280381917953491, "learning_rate": 7.66304576939512e-06, "loss": 0.5322, "step": 7743 }, { "epoch": 1.0081999219055056, "grad_norm": 2.548676013946533, "learning_rate": 7.661276773795297e-06, "loss": 0.513, "step": 7746 }, { "epoch": 1.0085903943771963, "grad_norm": 3.303908109664917, "learning_rate": 7.65950731327602e-06, "loss": 0.5974, "step": 7749 }, { "epoch": 1.008980866848887, "grad_norm": 2.7848570346832275, "learning_rate": 7.657737388146411e-06, "loss": 0.5288, "step": 7752 }, { "epoch": 1.0093713393205779, "grad_norm": 2.4875972270965576, "learning_rate": 7.65596699871567e-06, "loss": 0.5485, "step": 7755 }, { "epoch": 1.0097618117922686, "grad_norm": 2.5876026153564453, "learning_rate": 7.654196145293082e-06, "loss": 0.4749, "step": 7758 }, { "epoch": 1.0101522842639594, "grad_norm": 2.8364417552948, "learning_rate": 7.652424828188011e-06, "loss": 0.533, "step": 7761 }, { "epoch": 1.0105427567356502, "grad_norm": 2.3310322761535645, "learning_rate": 7.650653047709906e-06, "loss": 0.6102, "step": 7764 }, { "epoch": 1.010933229207341, "grad_norm": 4.061567306518555, "learning_rate": 7.648880804168287e-06, "loss": 0.4694, "step": 7767 }, { "epoch": 1.0113237016790317, "grad_norm": 2.307548999786377, "learning_rate": 7.647108097872763e-06, "loss": 0.5576, "step": 7770 }, { "epoch": 1.0117141741507223, "grad_norm": 2.360511541366577, "learning_rate": 7.645334929133024e-06, "loss": 0.6458, "step": 7773 }, { "epoch": 1.012104646622413, "grad_norm": 2.422124147415161, "learning_rate": 7.643561298258836e-06, "loss": 0.439, "step": 7776 }, { "epoch": 1.0124951190941038, "grad_norm": 2.6831116676330566, "learning_rate": 7.641787205560051e-06, "loss": 0.5588, "step": 7779 }, { "epoch": 1.0128855915657946, "grad_norm": 2.6215929985046387, "learning_rate": 7.6400126513466e-06, "loss": 0.5985, "step": 7782 }, { "epoch": 1.0132760640374854, "grad_norm": 3.8551838397979736, "learning_rate": 7.638237635928493e-06, "loss": 0.5659, "step": 7785 }, { "epoch": 1.0136665365091762, "grad_norm": 2.633751392364502, "learning_rate": 7.63646215961582e-06, "loss": 0.5141, "step": 7788 }, { "epoch": 1.014057008980867, "grad_norm": 2.5351755619049072, "learning_rate": 7.634686222718757e-06, "loss": 0.473, "step": 7791 }, { "epoch": 1.0144474814525577, "grad_norm": 2.423412799835205, "learning_rate": 7.632909825547549e-06, "loss": 0.4506, "step": 7794 }, { "epoch": 1.0148379539242482, "grad_norm": 2.8047244548797607, "learning_rate": 7.631132968412536e-06, "loss": 0.4848, "step": 7797 }, { "epoch": 1.015228426395939, "grad_norm": 2.771131753921509, "learning_rate": 7.629355651624126e-06, "loss": 0.4393, "step": 7800 }, { "epoch": 1.0156188988676298, "grad_norm": 3.0369348526000977, "learning_rate": 7.627577875492817e-06, "loss": 0.4429, "step": 7803 }, { "epoch": 1.0160093713393206, "grad_norm": 2.7859838008880615, "learning_rate": 7.625799640329181e-06, "loss": 0.4625, "step": 7806 }, { "epoch": 1.0163998438110113, "grad_norm": 2.7366514205932617, "learning_rate": 7.62402094644387e-06, "loss": 0.5831, "step": 7809 }, { "epoch": 1.0167903162827021, "grad_norm": 2.6900017261505127, "learning_rate": 7.622241794147622e-06, "loss": 0.461, "step": 7812 }, { "epoch": 1.0171807887543929, "grad_norm": 2.5590507984161377, "learning_rate": 7.6204621837512495e-06, "loss": 0.5923, "step": 7815 }, { "epoch": 1.0175712612260837, "grad_norm": 2.411809206008911, "learning_rate": 7.6186821155656435e-06, "loss": 0.4458, "step": 7818 }, { "epoch": 1.0179617336977742, "grad_norm": 3.017052412033081, "learning_rate": 7.616901589901781e-06, "loss": 0.5078, "step": 7821 }, { "epoch": 1.018352206169465, "grad_norm": 2.686730146408081, "learning_rate": 7.615120607070717e-06, "loss": 0.5053, "step": 7824 }, { "epoch": 1.0187426786411558, "grad_norm": 2.5899784564971924, "learning_rate": 7.613339167383585e-06, "loss": 0.5209, "step": 7827 }, { "epoch": 1.0191331511128465, "grad_norm": 2.8320977687835693, "learning_rate": 7.6115572711515975e-06, "loss": 0.588, "step": 7830 }, { "epoch": 1.0195236235845373, "grad_norm": 2.990689516067505, "learning_rate": 7.609774918686048e-06, "loss": 0.5531, "step": 7833 }, { "epoch": 1.019914096056228, "grad_norm": 2.3511035442352295, "learning_rate": 7.60799211029831e-06, "loss": 0.4349, "step": 7836 }, { "epoch": 1.0203045685279188, "grad_norm": 4.445713520050049, "learning_rate": 7.606208846299839e-06, "loss": 0.4621, "step": 7839 }, { "epoch": 1.0206950409996096, "grad_norm": 2.555044651031494, "learning_rate": 7.604425127002162e-06, "loss": 0.5091, "step": 7842 }, { "epoch": 1.0210855134713002, "grad_norm": 2.6413726806640625, "learning_rate": 7.602640952716897e-06, "loss": 0.5894, "step": 7845 }, { "epoch": 1.021475985942991, "grad_norm": 3.004298210144043, "learning_rate": 7.600856323755732e-06, "loss": 0.467, "step": 7848 }, { "epoch": 1.0218664584146817, "grad_norm": 3.5717835426330566, "learning_rate": 7.599071240430438e-06, "loss": 0.5908, "step": 7851 }, { "epoch": 1.0222569308863725, "grad_norm": 4.032369613647461, "learning_rate": 7.5972857030528654e-06, "loss": 0.5001, "step": 7854 }, { "epoch": 1.0226474033580633, "grad_norm": 2.679647922515869, "learning_rate": 7.595499711934946e-06, "loss": 0.549, "step": 7857 }, { "epoch": 1.023037875829754, "grad_norm": 2.6583425998687744, "learning_rate": 7.593713267388686e-06, "loss": 0.4949, "step": 7860 }, { "epoch": 1.0234283483014448, "grad_norm": 2.699441432952881, "learning_rate": 7.591926369726174e-06, "loss": 0.5405, "step": 7863 }, { "epoch": 1.0238188207731356, "grad_norm": 2.4705007076263428, "learning_rate": 7.590139019259579e-06, "loss": 0.6412, "step": 7866 }, { "epoch": 1.0242092932448263, "grad_norm": 2.6664178371429443, "learning_rate": 7.588351216301147e-06, "loss": 0.4991, "step": 7869 }, { "epoch": 1.024599765716517, "grad_norm": 2.7439897060394287, "learning_rate": 7.5865629611632005e-06, "loss": 0.5448, "step": 7872 }, { "epoch": 1.0249902381882077, "grad_norm": 2.5113844871520996, "learning_rate": 7.584774254158147e-06, "loss": 0.5012, "step": 7875 }, { "epoch": 1.0253807106598984, "grad_norm": 2.640521287918091, "learning_rate": 7.582985095598469e-06, "loss": 0.5282, "step": 7878 }, { "epoch": 1.0257711831315892, "grad_norm": 2.277463912963867, "learning_rate": 7.5811954857967285e-06, "loss": 0.4448, "step": 7881 }, { "epoch": 1.02616165560328, "grad_norm": 2.85617733001709, "learning_rate": 7.579405425065567e-06, "loss": 0.4533, "step": 7884 }, { "epoch": 1.0265521280749708, "grad_norm": 2.714686155319214, "learning_rate": 7.577614913717703e-06, "loss": 0.4766, "step": 7887 }, { "epoch": 1.0269426005466615, "grad_norm": 4.346346378326416, "learning_rate": 7.575823952065936e-06, "loss": 0.4783, "step": 7890 }, { "epoch": 1.0273330730183523, "grad_norm": 2.2822585105895996, "learning_rate": 7.574032540423145e-06, "loss": 0.4686, "step": 7893 }, { "epoch": 1.0277235454900429, "grad_norm": 2.7824783325195312, "learning_rate": 7.572240679102283e-06, "loss": 0.5298, "step": 7896 }, { "epoch": 1.0281140179617336, "grad_norm": 2.2325971126556396, "learning_rate": 7.570448368416387e-06, "loss": 0.4116, "step": 7899 }, { "epoch": 1.0285044904334244, "grad_norm": 2.75234317779541, "learning_rate": 7.568655608678566e-06, "loss": 0.5276, "step": 7902 }, { "epoch": 1.0288949629051152, "grad_norm": 2.7082951068878174, "learning_rate": 7.566862400202015e-06, "loss": 0.5465, "step": 7905 }, { "epoch": 1.029285435376806, "grad_norm": 2.646604299545288, "learning_rate": 7.5650687433000026e-06, "loss": 0.5085, "step": 7908 }, { "epoch": 1.0296759078484967, "grad_norm": 2.7950475215911865, "learning_rate": 7.563274638285876e-06, "loss": 0.4742, "step": 7911 }, { "epoch": 1.0300663803201875, "grad_norm": 2.585192918777466, "learning_rate": 7.5614800854730645e-06, "loss": 0.4821, "step": 7914 }, { "epoch": 1.0304568527918783, "grad_norm": 2.4464495182037354, "learning_rate": 7.559685085175069e-06, "loss": 0.5618, "step": 7917 }, { "epoch": 1.0308473252635688, "grad_norm": 2.5040671825408936, "learning_rate": 7.557889637705473e-06, "loss": 0.4845, "step": 7920 }, { "epoch": 1.0312377977352596, "grad_norm": 2.605333089828491, "learning_rate": 7.556093743377941e-06, "loss": 0.5349, "step": 7923 }, { "epoch": 1.0316282702069504, "grad_norm": 2.8396682739257812, "learning_rate": 7.5542974025062076e-06, "loss": 0.5243, "step": 7926 }, { "epoch": 1.0320187426786411, "grad_norm": 2.373145818710327, "learning_rate": 7.552500615404093e-06, "loss": 0.4422, "step": 7929 }, { "epoch": 1.032409215150332, "grad_norm": 2.7025516033172607, "learning_rate": 7.550703382385488e-06, "loss": 0.5432, "step": 7932 }, { "epoch": 1.0327996876220227, "grad_norm": 3.0393564701080322, "learning_rate": 7.548905703764371e-06, "loss": 0.4643, "step": 7935 }, { "epoch": 1.0331901600937134, "grad_norm": 2.475292921066284, "learning_rate": 7.5471075798547865e-06, "loss": 0.4984, "step": 7938 }, { "epoch": 1.0335806325654042, "grad_norm": 2.519763708114624, "learning_rate": 7.545309010970867e-06, "loss": 0.4986, "step": 7941 }, { "epoch": 1.0339711050370948, "grad_norm": 2.4332497119903564, "learning_rate": 7.54350999742682e-06, "loss": 0.4935, "step": 7944 }, { "epoch": 1.0343615775087855, "grad_norm": 3.0140507221221924, "learning_rate": 7.5417105395369235e-06, "loss": 0.4621, "step": 7947 }, { "epoch": 1.0347520499804763, "grad_norm": 2.577070713043213, "learning_rate": 7.539910637615546e-06, "loss": 0.4794, "step": 7950 }, { "epoch": 1.035142522452167, "grad_norm": 2.6139345169067383, "learning_rate": 7.538110291977123e-06, "loss": 0.476, "step": 7953 }, { "epoch": 1.0355329949238579, "grad_norm": 2.6360697746276855, "learning_rate": 7.53630950293617e-06, "loss": 0.4843, "step": 7956 }, { "epoch": 1.0359234673955486, "grad_norm": 2.5535480976104736, "learning_rate": 7.5345082708072836e-06, "loss": 0.4639, "step": 7959 }, { "epoch": 1.0363139398672394, "grad_norm": 2.9951012134552, "learning_rate": 7.532706595905133e-06, "loss": 0.5363, "step": 7962 }, { "epoch": 1.0367044123389302, "grad_norm": 2.2908568382263184, "learning_rate": 7.53090447854447e-06, "loss": 0.4262, "step": 7965 }, { "epoch": 1.037094884810621, "grad_norm": 2.7684085369110107, "learning_rate": 7.529101919040116e-06, "loss": 0.5007, "step": 7968 }, { "epoch": 1.0374853572823115, "grad_norm": 2.3997421264648438, "learning_rate": 7.5272989177069795e-06, "loss": 0.5509, "step": 7971 }, { "epoch": 1.0378758297540023, "grad_norm": 2.433640241622925, "learning_rate": 7.525495474860037e-06, "loss": 0.5496, "step": 7974 }, { "epoch": 1.038266302225693, "grad_norm": 3.6235389709472656, "learning_rate": 7.52369159081435e-06, "loss": 0.5144, "step": 7977 }, { "epoch": 1.0386567746973838, "grad_norm": 2.715113401412964, "learning_rate": 7.521887265885049e-06, "loss": 0.4914, "step": 7980 }, { "epoch": 1.0390472471690746, "grad_norm": 2.352916955947876, "learning_rate": 7.520082500387349e-06, "loss": 0.4605, "step": 7983 }, { "epoch": 1.0394377196407654, "grad_norm": 2.825352668762207, "learning_rate": 7.518277294636538e-06, "loss": 0.5833, "step": 7986 }, { "epoch": 1.0398281921124561, "grad_norm": 2.906801462173462, "learning_rate": 7.51647164894798e-06, "loss": 0.5857, "step": 7989 }, { "epoch": 1.040218664584147, "grad_norm": 2.4375948905944824, "learning_rate": 7.51466556363712e-06, "loss": 0.4738, "step": 7992 }, { "epoch": 1.0406091370558375, "grad_norm": 2.6101300716400146, "learning_rate": 7.512859039019476e-06, "loss": 0.4943, "step": 7995 }, { "epoch": 1.0409996095275282, "grad_norm": 7.20686149597168, "learning_rate": 7.511052075410644e-06, "loss": 0.5433, "step": 7998 }, { "epoch": 1.041390081999219, "grad_norm": 2.5502688884735107, "learning_rate": 7.509244673126298e-06, "loss": 0.5695, "step": 8001 }, { "epoch": 1.0417805544709098, "grad_norm": 3.865182399749756, "learning_rate": 7.507436832482185e-06, "loss": 0.4969, "step": 8004 }, { "epoch": 1.0421710269426006, "grad_norm": 2.659177541732788, "learning_rate": 7.5056285537941335e-06, "loss": 0.6332, "step": 8007 }, { "epoch": 1.0425614994142913, "grad_norm": 2.6241230964660645, "learning_rate": 7.503819837378042e-06, "loss": 0.4546, "step": 8010 }, { "epoch": 1.042951971885982, "grad_norm": 3.0326120853424072, "learning_rate": 7.502010683549894e-06, "loss": 0.5339, "step": 8013 }, { "epoch": 1.0433424443576729, "grad_norm": 2.694370985031128, "learning_rate": 7.500201092625743e-06, "loss": 0.4685, "step": 8016 }, { "epoch": 1.0437329168293634, "grad_norm": 2.683987855911255, "learning_rate": 7.498391064921721e-06, "loss": 0.5303, "step": 8019 }, { "epoch": 1.0441233893010542, "grad_norm": 3.791745901107788, "learning_rate": 7.496580600754036e-06, "loss": 0.4798, "step": 8022 }, { "epoch": 1.044513861772745, "grad_norm": 3.1122212409973145, "learning_rate": 7.494769700438971e-06, "loss": 0.5105, "step": 8025 }, { "epoch": 1.0449043342444357, "grad_norm": 2.536529302597046, "learning_rate": 7.492958364292888e-06, "loss": 0.4569, "step": 8028 }, { "epoch": 1.0452948067161265, "grad_norm": 2.741360902786255, "learning_rate": 7.491146592632223e-06, "loss": 0.5162, "step": 8031 }, { "epoch": 1.0456852791878173, "grad_norm": 2.675175905227661, "learning_rate": 7.48933438577349e-06, "loss": 0.4978, "step": 8034 }, { "epoch": 1.046075751659508, "grad_norm": 2.6189332008361816, "learning_rate": 7.487521744033275e-06, "loss": 0.4782, "step": 8037 }, { "epoch": 1.0464662241311988, "grad_norm": 3.0063090324401855, "learning_rate": 7.485708667728245e-06, "loss": 0.4818, "step": 8040 }, { "epoch": 1.0468566966028896, "grad_norm": 3.2774417400360107, "learning_rate": 7.483895157175141e-06, "loss": 0.4994, "step": 8043 }, { "epoch": 1.0472471690745802, "grad_norm": 2.4329867362976074, "learning_rate": 7.482081212690777e-06, "loss": 0.478, "step": 8046 }, { "epoch": 1.047637641546271, "grad_norm": 2.885873556137085, "learning_rate": 7.480266834592047e-06, "loss": 0.5012, "step": 8049 }, { "epoch": 1.0480281140179617, "grad_norm": 2.5598886013031006, "learning_rate": 7.478452023195918e-06, "loss": 0.4845, "step": 8052 }, { "epoch": 1.0484185864896525, "grad_norm": 3.7768354415893555, "learning_rate": 7.476636778819435e-06, "loss": 0.4945, "step": 8055 }, { "epoch": 1.0488090589613432, "grad_norm": 3.060943365097046, "learning_rate": 7.474821101779718e-06, "loss": 0.5462, "step": 8058 }, { "epoch": 1.049199531433034, "grad_norm": 2.9565443992614746, "learning_rate": 7.47300499239396e-06, "loss": 0.5928, "step": 8061 }, { "epoch": 1.0495900039047248, "grad_norm": 2.9404168128967285, "learning_rate": 7.471188450979432e-06, "loss": 0.6085, "step": 8064 }, { "epoch": 1.0499804763764156, "grad_norm": 2.50423264503479, "learning_rate": 7.4693714778534795e-06, "loss": 0.5282, "step": 8067 }, { "epoch": 1.0503709488481061, "grad_norm": 2.8531126976013184, "learning_rate": 7.467554073333525e-06, "loss": 0.5469, "step": 8070 }, { "epoch": 1.0507614213197969, "grad_norm": 2.6135265827178955, "learning_rate": 7.465736237737066e-06, "loss": 0.5384, "step": 8073 }, { "epoch": 1.0511518937914877, "grad_norm": 2.651714324951172, "learning_rate": 7.463917971381672e-06, "loss": 0.4965, "step": 8076 }, { "epoch": 1.0515423662631784, "grad_norm": 2.368147611618042, "learning_rate": 7.462099274584993e-06, "loss": 0.5733, "step": 8079 }, { "epoch": 1.0519328387348692, "grad_norm": 3.0812952518463135, "learning_rate": 7.460280147664749e-06, "loss": 0.4543, "step": 8082 }, { "epoch": 1.05232331120656, "grad_norm": 2.6388537883758545, "learning_rate": 7.458460590938741e-06, "loss": 0.5554, "step": 8085 }, { "epoch": 1.0527137836782507, "grad_norm": 2.5167076587677, "learning_rate": 7.4566406047248385e-06, "loss": 0.5311, "step": 8088 }, { "epoch": 1.0531042561499415, "grad_norm": 3.7936503887176514, "learning_rate": 7.454820189340989e-06, "loss": 0.5205, "step": 8091 }, { "epoch": 1.053494728621632, "grad_norm": 3.482969045639038, "learning_rate": 7.452999345105218e-06, "loss": 0.4703, "step": 8094 }, { "epoch": 1.0538852010933228, "grad_norm": 3.16825270652771, "learning_rate": 7.451178072335621e-06, "loss": 0.4716, "step": 8097 }, { "epoch": 1.0542756735650136, "grad_norm": 2.6491572856903076, "learning_rate": 7.449356371350371e-06, "loss": 0.5886, "step": 8100 }, { "epoch": 1.0546661460367044, "grad_norm": 2.7450640201568604, "learning_rate": 7.447534242467718e-06, "loss": 0.4394, "step": 8103 }, { "epoch": 1.0550566185083952, "grad_norm": 2.392484664916992, "learning_rate": 7.445711686005978e-06, "loss": 0.4185, "step": 8106 }, { "epoch": 1.055447090980086, "grad_norm": 2.569303274154663, "learning_rate": 7.443888702283555e-06, "loss": 0.4916, "step": 8109 }, { "epoch": 1.0558375634517767, "grad_norm": 2.565568447113037, "learning_rate": 7.442065291618915e-06, "loss": 0.4677, "step": 8112 }, { "epoch": 1.0562280359234675, "grad_norm": 2.54575252532959, "learning_rate": 7.440241454330606e-06, "loss": 0.4087, "step": 8115 }, { "epoch": 1.0566185083951583, "grad_norm": 3.055168390274048, "learning_rate": 7.438417190737248e-06, "loss": 0.442, "step": 8118 }, { "epoch": 1.0570089808668488, "grad_norm": 2.5168895721435547, "learning_rate": 7.436592501157538e-06, "loss": 0.4346, "step": 8121 }, { "epoch": 1.0573994533385396, "grad_norm": 2.5084915161132812, "learning_rate": 7.434767385910243e-06, "loss": 0.4508, "step": 8124 }, { "epoch": 1.0577899258102303, "grad_norm": 3.041224479675293, "learning_rate": 7.432941845314207e-06, "loss": 0.4823, "step": 8127 }, { "epoch": 1.0581803982819211, "grad_norm": 3.2237794399261475, "learning_rate": 7.431115879688351e-06, "loss": 0.5292, "step": 8130 }, { "epoch": 1.058570870753612, "grad_norm": 3.038773775100708, "learning_rate": 7.429289489351663e-06, "loss": 0.5, "step": 8133 }, { "epoch": 1.0589613432253027, "grad_norm": 3.018162488937378, "learning_rate": 7.4274626746232125e-06, "loss": 0.4816, "step": 8136 }, { "epoch": 1.0593518156969934, "grad_norm": 2.773552179336548, "learning_rate": 7.425635435822139e-06, "loss": 0.4643, "step": 8139 }, { "epoch": 1.0597422881686842, "grad_norm": 2.652228593826294, "learning_rate": 7.423807773267659e-06, "loss": 0.5507, "step": 8142 }, { "epoch": 1.0601327606403748, "grad_norm": 2.473726511001587, "learning_rate": 7.421979687279058e-06, "loss": 0.5871, "step": 8145 }, { "epoch": 1.0605232331120655, "grad_norm": 2.8862738609313965, "learning_rate": 7.420151178175702e-06, "loss": 0.5979, "step": 8148 }, { "epoch": 1.0609137055837563, "grad_norm": 3.1102709770202637, "learning_rate": 7.4183222462770266e-06, "loss": 0.4983, "step": 8151 }, { "epoch": 1.061304178055447, "grad_norm": 2.4361696243286133, "learning_rate": 7.416492891902541e-06, "loss": 0.4916, "step": 8154 }, { "epoch": 1.0616946505271379, "grad_norm": 2.7870354652404785, "learning_rate": 7.414663115371832e-06, "loss": 0.4924, "step": 8157 }, { "epoch": 1.0620851229988286, "grad_norm": 2.7369415760040283, "learning_rate": 7.412832917004556e-06, "loss": 0.5325, "step": 8160 }, { "epoch": 1.0624755954705194, "grad_norm": 2.71121883392334, "learning_rate": 7.411002297120444e-06, "loss": 0.5131, "step": 8163 }, { "epoch": 1.0628660679422102, "grad_norm": 2.594827890396118, "learning_rate": 7.409171256039305e-06, "loss": 0.468, "step": 8166 }, { "epoch": 1.0632565404139007, "grad_norm": 2.7998273372650146, "learning_rate": 7.407339794081013e-06, "loss": 0.4999, "step": 8169 }, { "epoch": 1.0636470128855915, "grad_norm": 2.753424882888794, "learning_rate": 7.405507911565526e-06, "loss": 0.517, "step": 8172 }, { "epoch": 1.0640374853572823, "grad_norm": 2.8335673809051514, "learning_rate": 7.403675608812866e-06, "loss": 0.4969, "step": 8175 }, { "epoch": 1.064427957828973, "grad_norm": 2.668989419937134, "learning_rate": 7.401842886143133e-06, "loss": 0.4657, "step": 8178 }, { "epoch": 1.0648184303006638, "grad_norm": 3.0732874870300293, "learning_rate": 7.400009743876502e-06, "loss": 0.4661, "step": 8181 }, { "epoch": 1.0652089027723546, "grad_norm": 2.5212926864624023, "learning_rate": 7.398176182333217e-06, "loss": 0.4812, "step": 8184 }, { "epoch": 1.0655993752440454, "grad_norm": 2.622633457183838, "learning_rate": 7.396342201833597e-06, "loss": 0.5319, "step": 8187 }, { "epoch": 1.0659898477157361, "grad_norm": 3.5963311195373535, "learning_rate": 7.394507802698037e-06, "loss": 0.52, "step": 8190 }, { "epoch": 1.066380320187427, "grad_norm": 2.81231951713562, "learning_rate": 7.392672985247002e-06, "loss": 0.5517, "step": 8193 }, { "epoch": 1.0667707926591175, "grad_norm": 2.674590826034546, "learning_rate": 7.390837749801027e-06, "loss": 0.5542, "step": 8196 }, { "epoch": 1.0671612651308082, "grad_norm": 2.3117876052856445, "learning_rate": 7.389002096680729e-06, "loss": 0.4823, "step": 8199 }, { "epoch": 1.067551737602499, "grad_norm": 2.682879686355591, "learning_rate": 7.387166026206789e-06, "loss": 0.4202, "step": 8202 }, { "epoch": 1.0679422100741898, "grad_norm": 3.0507800579071045, "learning_rate": 7.3853295386999665e-06, "loss": 0.5087, "step": 8205 }, { "epoch": 1.0683326825458805, "grad_norm": 3.6044976711273193, "learning_rate": 7.383492634481093e-06, "loss": 0.5968, "step": 8208 }, { "epoch": 1.0687231550175713, "grad_norm": 2.9628076553344727, "learning_rate": 7.381655313871069e-06, "loss": 0.5076, "step": 8211 }, { "epoch": 1.069113627489262, "grad_norm": 2.616960287094116, "learning_rate": 7.379817577190873e-06, "loss": 0.5251, "step": 8214 }, { "epoch": 1.0695040999609526, "grad_norm": 3.7109944820404053, "learning_rate": 7.377979424761551e-06, "loss": 0.5161, "step": 8217 }, { "epoch": 1.0698945724326434, "grad_norm": 2.695524215698242, "learning_rate": 7.376140856904227e-06, "loss": 0.525, "step": 8220 }, { "epoch": 1.0702850449043342, "grad_norm": 2.8099405765533447, "learning_rate": 7.374301873940093e-06, "loss": 0.5413, "step": 8223 }, { "epoch": 1.070675517376025, "grad_norm": 2.4617536067962646, "learning_rate": 7.372462476190417e-06, "loss": 0.5188, "step": 8226 }, { "epoch": 1.0710659898477157, "grad_norm": 2.8862340450286865, "learning_rate": 7.370622663976539e-06, "loss": 0.499, "step": 8229 }, { "epoch": 1.0714564623194065, "grad_norm": 2.755232334136963, "learning_rate": 7.3687824376198665e-06, "loss": 0.4981, "step": 8232 }, { "epoch": 1.0718469347910973, "grad_norm": 2.4629595279693604, "learning_rate": 7.3669417974418865e-06, "loss": 0.501, "step": 8235 }, { "epoch": 1.072237407262788, "grad_norm": 2.6113266944885254, "learning_rate": 7.365100743764153e-06, "loss": 0.4459, "step": 8238 }, { "epoch": 1.0726278797344788, "grad_norm": 2.6274919509887695, "learning_rate": 7.363259276908294e-06, "loss": 0.4977, "step": 8241 }, { "epoch": 1.0730183522061694, "grad_norm": 2.575099468231201, "learning_rate": 7.3614173971960134e-06, "loss": 0.5597, "step": 8244 }, { "epoch": 1.0734088246778601, "grad_norm": 2.7904398441314697, "learning_rate": 7.35957510494908e-06, "loss": 0.5409, "step": 8247 }, { "epoch": 1.073799297149551, "grad_norm": 2.6250545978546143, "learning_rate": 7.357732400489342e-06, "loss": 0.4675, "step": 8250 }, { "epoch": 1.0741897696212417, "grad_norm": 2.454293727874756, "learning_rate": 7.35588928413871e-06, "loss": 0.5445, "step": 8253 }, { "epoch": 1.0745802420929325, "grad_norm": 2.9731876850128174, "learning_rate": 7.354045756219177e-06, "loss": 0.5563, "step": 8256 }, { "epoch": 1.0749707145646232, "grad_norm": 2.4799342155456543, "learning_rate": 7.352201817052804e-06, "loss": 0.5015, "step": 8259 }, { "epoch": 1.075361187036314, "grad_norm": 2.2865350246429443, "learning_rate": 7.350357466961719e-06, "loss": 0.4247, "step": 8262 }, { "epoch": 1.0757516595080048, "grad_norm": 2.5727767944335938, "learning_rate": 7.348512706268132e-06, "loss": 0.5026, "step": 8265 }, { "epoch": 1.0761421319796955, "grad_norm": 2.5520944595336914, "learning_rate": 7.346667535294314e-06, "loss": 0.4901, "step": 8268 }, { "epoch": 1.076532604451386, "grad_norm": 2.7078912258148193, "learning_rate": 7.344821954362615e-06, "loss": 0.5435, "step": 8271 }, { "epoch": 1.0769230769230769, "grad_norm": 2.717442512512207, "learning_rate": 7.342975963795454e-06, "loss": 0.4969, "step": 8274 }, { "epoch": 1.0773135493947676, "grad_norm": 2.646892547607422, "learning_rate": 7.341129563915319e-06, "loss": 0.6138, "step": 8277 }, { "epoch": 1.0777040218664584, "grad_norm": 3.4911608695983887, "learning_rate": 7.339282755044776e-06, "loss": 0.4708, "step": 8280 }, { "epoch": 1.0780944943381492, "grad_norm": 2.682307243347168, "learning_rate": 7.337435537506456e-06, "loss": 0.4995, "step": 8283 }, { "epoch": 1.07848496680984, "grad_norm": 2.9293227195739746, "learning_rate": 7.335587911623065e-06, "loss": 0.4823, "step": 8286 }, { "epoch": 1.0788754392815307, "grad_norm": 2.717480182647705, "learning_rate": 7.33373987771738e-06, "loss": 0.4743, "step": 8289 }, { "epoch": 1.0792659117532213, "grad_norm": 2.61503005027771, "learning_rate": 7.331891436112246e-06, "loss": 0.4466, "step": 8292 }, { "epoch": 1.079656384224912, "grad_norm": 2.5344207286834717, "learning_rate": 7.330042587130586e-06, "loss": 0.4884, "step": 8295 }, { "epoch": 1.0800468566966028, "grad_norm": 2.708646774291992, "learning_rate": 7.328193331095387e-06, "loss": 0.4836, "step": 8298 }, { "epoch": 1.0804373291682936, "grad_norm": 2.689146041870117, "learning_rate": 7.326343668329711e-06, "loss": 0.5197, "step": 8301 }, { "epoch": 1.0808278016399844, "grad_norm": 2.5231661796569824, "learning_rate": 7.324493599156688e-06, "loss": 0.5055, "step": 8304 }, { "epoch": 1.0812182741116751, "grad_norm": 2.644646167755127, "learning_rate": 7.322643123899525e-06, "loss": 0.4866, "step": 8307 }, { "epoch": 1.081608746583366, "grad_norm": 3.1305809020996094, "learning_rate": 7.3207922428814935e-06, "loss": 0.5115, "step": 8310 }, { "epoch": 1.0819992190550567, "grad_norm": 2.661428928375244, "learning_rate": 7.318940956425941e-06, "loss": 0.5316, "step": 8313 }, { "epoch": 1.0823896915267475, "grad_norm": 2.929919958114624, "learning_rate": 7.317089264856281e-06, "loss": 0.5167, "step": 8316 }, { "epoch": 1.082780163998438, "grad_norm": 2.6820735931396484, "learning_rate": 7.315237168496e-06, "loss": 0.5193, "step": 8319 }, { "epoch": 1.0831706364701288, "grad_norm": 2.9934213161468506, "learning_rate": 7.313384667668657e-06, "loss": 0.4755, "step": 8322 }, { "epoch": 1.0835611089418196, "grad_norm": 2.795145273208618, "learning_rate": 7.311531762697879e-06, "loss": 0.4628, "step": 8325 }, { "epoch": 1.0839515814135103, "grad_norm": 2.4267120361328125, "learning_rate": 7.309678453907365e-06, "loss": 0.4883, "step": 8328 }, { "epoch": 1.084342053885201, "grad_norm": 2.76674485206604, "learning_rate": 7.307824741620883e-06, "loss": 0.4929, "step": 8331 }, { "epoch": 1.0847325263568919, "grad_norm": 2.8393499851226807, "learning_rate": 7.3059706261622745e-06, "loss": 0.5926, "step": 8334 }, { "epoch": 1.0851229988285827, "grad_norm": 2.8024489879608154, "learning_rate": 7.304116107855449e-06, "loss": 0.5291, "step": 8337 }, { "epoch": 1.0855134713002734, "grad_norm": 2.2711691856384277, "learning_rate": 7.302261187024386e-06, "loss": 0.5723, "step": 8340 }, { "epoch": 1.085903943771964, "grad_norm": 2.544989585876465, "learning_rate": 7.300405863993136e-06, "loss": 0.4604, "step": 8343 }, { "epoch": 1.0862944162436547, "grad_norm": 4.123943328857422, "learning_rate": 7.298550139085823e-06, "loss": 0.4671, "step": 8346 }, { "epoch": 1.0866848887153455, "grad_norm": 2.6443090438842773, "learning_rate": 7.296694012626635e-06, "loss": 0.4958, "step": 8349 }, { "epoch": 1.0870753611870363, "grad_norm": 2.406564474105835, "learning_rate": 7.294837484939835e-06, "loss": 0.4587, "step": 8352 }, { "epoch": 1.087465833658727, "grad_norm": 2.6871235370635986, "learning_rate": 7.292980556349754e-06, "loss": 0.5546, "step": 8355 }, { "epoch": 1.0878563061304178, "grad_norm": 2.515164375305176, "learning_rate": 7.291123227180793e-06, "loss": 0.46, "step": 8358 }, { "epoch": 1.0882467786021086, "grad_norm": 2.631863832473755, "learning_rate": 7.289265497757424e-06, "loss": 0.4499, "step": 8361 }, { "epoch": 1.0886372510737994, "grad_norm": 3.6877024173736572, "learning_rate": 7.287407368404189e-06, "loss": 0.519, "step": 8364 }, { "epoch": 1.08902772354549, "grad_norm": 2.77193021774292, "learning_rate": 7.2855488394456975e-06, "loss": 0.5345, "step": 8367 }, { "epoch": 1.0894181960171807, "grad_norm": 2.9497268199920654, "learning_rate": 7.283689911206633e-06, "loss": 0.4815, "step": 8370 }, { "epoch": 1.0898086684888715, "grad_norm": 2.750422477722168, "learning_rate": 7.281830584011745e-06, "loss": 0.4844, "step": 8373 }, { "epoch": 1.0901991409605623, "grad_norm": 2.9934771060943604, "learning_rate": 7.279970858185854e-06, "loss": 0.4681, "step": 8376 }, { "epoch": 1.090589613432253, "grad_norm": 2.8141846656799316, "learning_rate": 7.278110734053852e-06, "loss": 0.5763, "step": 8379 }, { "epoch": 1.0909800859039438, "grad_norm": 2.7781667709350586, "learning_rate": 7.276250211940695e-06, "loss": 0.5492, "step": 8382 }, { "epoch": 1.0913705583756346, "grad_norm": 2.797043800354004, "learning_rate": 7.274389292171416e-06, "loss": 0.4497, "step": 8385 }, { "epoch": 1.0917610308473253, "grad_norm": 2.90187406539917, "learning_rate": 7.272527975071111e-06, "loss": 0.4726, "step": 8388 }, { "epoch": 1.0921515033190161, "grad_norm": 2.9917635917663574, "learning_rate": 7.270666260964949e-06, "loss": 0.5161, "step": 8391 }, { "epoch": 1.0925419757907067, "grad_norm": 2.316232919692993, "learning_rate": 7.268804150178171e-06, "loss": 0.4295, "step": 8394 }, { "epoch": 1.0929324482623974, "grad_norm": 2.60370135307312, "learning_rate": 7.266941643036077e-06, "loss": 0.4582, "step": 8397 }, { "epoch": 1.0933229207340882, "grad_norm": 2.546489953994751, "learning_rate": 7.26507873986405e-06, "loss": 0.4716, "step": 8400 }, { "epoch": 1.093713393205779, "grad_norm": 2.750613212585449, "learning_rate": 7.26321544098753e-06, "loss": 0.5778, "step": 8403 }, { "epoch": 1.0941038656774698, "grad_norm": 2.4992191791534424, "learning_rate": 7.261351746732035e-06, "loss": 0.4807, "step": 8406 }, { "epoch": 1.0944943381491605, "grad_norm": 2.8816723823547363, "learning_rate": 7.2594876574231465e-06, "loss": 0.5364, "step": 8409 }, { "epoch": 1.0948848106208513, "grad_norm": 2.80865478515625, "learning_rate": 7.257623173386516e-06, "loss": 0.546, "step": 8412 }, { "epoch": 1.095275283092542, "grad_norm": 2.4753668308258057, "learning_rate": 7.25575829494787e-06, "loss": 0.426, "step": 8415 }, { "epoch": 1.0956657555642326, "grad_norm": 2.413982629776001, "learning_rate": 7.253893022432993e-06, "loss": 0.4686, "step": 8418 }, { "epoch": 1.0960562280359234, "grad_norm": 2.514122247695923, "learning_rate": 7.2520273561677455e-06, "loss": 0.5196, "step": 8421 }, { "epoch": 1.0964467005076142, "grad_norm": 2.6076467037200928, "learning_rate": 7.25016129647806e-06, "loss": 0.5595, "step": 8424 }, { "epoch": 1.096837172979305, "grad_norm": 3.611821174621582, "learning_rate": 7.248294843689927e-06, "loss": 0.5155, "step": 8427 }, { "epoch": 1.0972276454509957, "grad_norm": 3.5970561504364014, "learning_rate": 7.246427998129414e-06, "loss": 0.5353, "step": 8430 }, { "epoch": 1.0976181179226865, "grad_norm": 2.886139392852783, "learning_rate": 7.244560760122656e-06, "loss": 0.5134, "step": 8433 }, { "epoch": 1.0980085903943773, "grad_norm": 2.6416497230529785, "learning_rate": 7.242693129995857e-06, "loss": 0.4561, "step": 8436 }, { "epoch": 1.098399062866068, "grad_norm": 2.9228434562683105, "learning_rate": 7.2408251080752845e-06, "loss": 0.4602, "step": 8439 }, { "epoch": 1.0987895353377586, "grad_norm": 2.7232162952423096, "learning_rate": 7.2389566946872795e-06, "loss": 0.4795, "step": 8442 }, { "epoch": 1.0991800078094494, "grad_norm": 2.603848934173584, "learning_rate": 7.23708789015825e-06, "loss": 0.5097, "step": 8445 }, { "epoch": 1.0995704802811401, "grad_norm": 3.2897467613220215, "learning_rate": 7.235218694814673e-06, "loss": 0.5706, "step": 8448 }, { "epoch": 1.099960952752831, "grad_norm": 2.6159188747406006, "learning_rate": 7.233349108983091e-06, "loss": 0.5107, "step": 8451 }, { "epoch": 1.1003514252245217, "grad_norm": 2.4405086040496826, "learning_rate": 7.231479132990118e-06, "loss": 0.5245, "step": 8454 }, { "epoch": 1.1007418976962124, "grad_norm": 2.8570423126220703, "learning_rate": 7.229608767162437e-06, "loss": 0.4532, "step": 8457 }, { "epoch": 1.1011323701679032, "grad_norm": 3.2515525817871094, "learning_rate": 7.2277380118267924e-06, "loss": 0.5231, "step": 8460 }, { "epoch": 1.101522842639594, "grad_norm": 2.619053602218628, "learning_rate": 7.2258668673100055e-06, "loss": 0.4558, "step": 8463 }, { "epoch": 1.1019133151112848, "grad_norm": 3.4879095554351807, "learning_rate": 7.223995333938958e-06, "loss": 0.5, "step": 8466 }, { "epoch": 1.1023037875829753, "grad_norm": 2.678898334503174, "learning_rate": 7.222123412040605e-06, "loss": 0.5018, "step": 8469 }, { "epoch": 1.102694260054666, "grad_norm": 2.831430196762085, "learning_rate": 7.220251101941966e-06, "loss": 0.5194, "step": 8472 }, { "epoch": 1.1030847325263569, "grad_norm": 2.5068278312683105, "learning_rate": 7.21837840397013e-06, "loss": 0.5053, "step": 8475 }, { "epoch": 1.1034752049980476, "grad_norm": 4.6476898193359375, "learning_rate": 7.216505318452254e-06, "loss": 0.4749, "step": 8478 }, { "epoch": 1.1038656774697384, "grad_norm": 2.4245364665985107, "learning_rate": 7.214631845715563e-06, "loss": 0.6265, "step": 8481 }, { "epoch": 1.1042561499414292, "grad_norm": 2.79682993888855, "learning_rate": 7.212757986087346e-06, "loss": 0.5395, "step": 8484 }, { "epoch": 1.10464662241312, "grad_norm": 4.285904407501221, "learning_rate": 7.210883739894963e-06, "loss": 0.4866, "step": 8487 }, { "epoch": 1.1050370948848107, "grad_norm": 2.6022517681121826, "learning_rate": 7.209009107465843e-06, "loss": 0.4493, "step": 8490 }, { "epoch": 1.1054275673565013, "grad_norm": 2.655932664871216, "learning_rate": 7.207134089127479e-06, "loss": 0.4609, "step": 8493 }, { "epoch": 1.105818039828192, "grad_norm": 2.3036811351776123, "learning_rate": 7.205258685207433e-06, "loss": 0.5362, "step": 8496 }, { "epoch": 1.1062085122998828, "grad_norm": 3.0361623764038086, "learning_rate": 7.203382896033332e-06, "loss": 0.5391, "step": 8499 }, { "epoch": 1.1065989847715736, "grad_norm": 3.3767175674438477, "learning_rate": 7.201506721932876e-06, "loss": 0.5089, "step": 8502 }, { "epoch": 1.1069894572432644, "grad_norm": 2.5257315635681152, "learning_rate": 7.199630163233828e-06, "loss": 0.475, "step": 8505 }, { "epoch": 1.1073799297149551, "grad_norm": 2.7609949111938477, "learning_rate": 7.197753220264017e-06, "loss": 0.6277, "step": 8508 }, { "epoch": 1.107770402186646, "grad_norm": 2.56369686126709, "learning_rate": 7.1958758933513405e-06, "loss": 0.4942, "step": 8511 }, { "epoch": 1.1081608746583367, "grad_norm": 2.836712121963501, "learning_rate": 7.1939981828237646e-06, "loss": 0.4874, "step": 8514 }, { "epoch": 1.1085513471300272, "grad_norm": 2.9525368213653564, "learning_rate": 7.192120089009322e-06, "loss": 0.5098, "step": 8517 }, { "epoch": 1.108941819601718, "grad_norm": 2.7775232791900635, "learning_rate": 7.190241612236113e-06, "loss": 0.5331, "step": 8520 }, { "epoch": 1.1093322920734088, "grad_norm": 4.186012268066406, "learning_rate": 7.188362752832302e-06, "loss": 0.5345, "step": 8523 }, { "epoch": 1.1097227645450995, "grad_norm": 2.920480728149414, "learning_rate": 7.18648351112612e-06, "loss": 0.5221, "step": 8526 }, { "epoch": 1.1101132370167903, "grad_norm": 2.599139928817749, "learning_rate": 7.184603887445869e-06, "loss": 0.5322, "step": 8529 }, { "epoch": 1.110503709488481, "grad_norm": 2.4106132984161377, "learning_rate": 7.182723882119915e-06, "loss": 0.4708, "step": 8532 }, { "epoch": 1.1108941819601719, "grad_norm": 2.7855916023254395, "learning_rate": 7.18084349547669e-06, "loss": 0.5707, "step": 8535 }, { "epoch": 1.1112846544318626, "grad_norm": 2.641810655593872, "learning_rate": 7.178962727844694e-06, "loss": 0.5432, "step": 8538 }, { "epoch": 1.1116751269035534, "grad_norm": 2.4662587642669678, "learning_rate": 7.177081579552494e-06, "loss": 0.5, "step": 8541 }, { "epoch": 1.112065599375244, "grad_norm": 3.3329203128814697, "learning_rate": 7.175200050928722e-06, "loss": 0.4517, "step": 8544 }, { "epoch": 1.1124560718469347, "grad_norm": 2.6928961277008057, "learning_rate": 7.173318142302077e-06, "loss": 0.4792, "step": 8547 }, { "epoch": 1.1128465443186255, "grad_norm": 3.24125075340271, "learning_rate": 7.171435854001324e-06, "loss": 0.5087, "step": 8550 }, { "epoch": 1.1132370167903163, "grad_norm": 2.6062655448913574, "learning_rate": 7.169553186355296e-06, "loss": 0.5109, "step": 8553 }, { "epoch": 1.113627489262007, "grad_norm": 3.467337131500244, "learning_rate": 7.167670139692888e-06, "loss": 0.5299, "step": 8556 }, { "epoch": 1.1140179617336978, "grad_norm": 2.869731903076172, "learning_rate": 7.16578671434307e-06, "loss": 0.4565, "step": 8559 }, { "epoch": 1.1144084342053886, "grad_norm": 2.4524435997009277, "learning_rate": 7.163902910634868e-06, "loss": 0.4392, "step": 8562 }, { "epoch": 1.1147989066770791, "grad_norm": 4.4590067863464355, "learning_rate": 7.16201872889738e-06, "loss": 0.413, "step": 8565 }, { "epoch": 1.11518937914877, "grad_norm": 2.6730213165283203, "learning_rate": 7.160134169459768e-06, "loss": 0.4561, "step": 8568 }, { "epoch": 1.1155798516204607, "grad_norm": 2.4888358116149902, "learning_rate": 7.15824923265126e-06, "loss": 0.5105, "step": 8571 }, { "epoch": 1.1159703240921515, "grad_norm": 2.672909736633301, "learning_rate": 7.156363918801152e-06, "loss": 0.5565, "step": 8574 }, { "epoch": 1.1163607965638422, "grad_norm": 2.708988666534424, "learning_rate": 7.154478228238804e-06, "loss": 0.556, "step": 8577 }, { "epoch": 1.116751269035533, "grad_norm": 2.6112136840820312, "learning_rate": 7.152592161293642e-06, "loss": 0.5554, "step": 8580 }, { "epoch": 1.1171417415072238, "grad_norm": 2.5211217403411865, "learning_rate": 7.150705718295157e-06, "loss": 0.4915, "step": 8583 }, { "epoch": 1.1175322139789146, "grad_norm": 3.3289098739624023, "learning_rate": 7.1488188995729095e-06, "loss": 0.4083, "step": 8586 }, { "epoch": 1.1179226864506053, "grad_norm": 2.5261623859405518, "learning_rate": 7.146931705456518e-06, "loss": 0.4479, "step": 8589 }, { "epoch": 1.1183131589222959, "grad_norm": 2.5002522468566895, "learning_rate": 7.145044136275675e-06, "loss": 0.5424, "step": 8592 }, { "epoch": 1.1187036313939867, "grad_norm": 2.2631165981292725, "learning_rate": 7.143156192360135e-06, "loss": 0.4406, "step": 8595 }, { "epoch": 1.1190941038656774, "grad_norm": 2.8084895610809326, "learning_rate": 7.141267874039715e-06, "loss": 0.4845, "step": 8598 }, { "epoch": 1.1194845763373682, "grad_norm": 2.751574993133545, "learning_rate": 7.139379181644304e-06, "loss": 0.4932, "step": 8601 }, { "epoch": 1.119875048809059, "grad_norm": 2.6821987628936768, "learning_rate": 7.137490115503848e-06, "loss": 0.4911, "step": 8604 }, { "epoch": 1.1202655212807497, "grad_norm": 2.521465301513672, "learning_rate": 7.135600675948367e-06, "loss": 0.5492, "step": 8607 }, { "epoch": 1.1206559937524405, "grad_norm": 2.7268893718719482, "learning_rate": 7.133710863307941e-06, "loss": 0.5966, "step": 8610 }, { "epoch": 1.1210464662241313, "grad_norm": 3.1640634536743164, "learning_rate": 7.131820677912715e-06, "loss": 0.5761, "step": 8613 }, { "epoch": 1.121436938695822, "grad_norm": 2.8102428913116455, "learning_rate": 7.129930120092902e-06, "loss": 0.4964, "step": 8616 }, { "epoch": 1.1218274111675126, "grad_norm": 2.8150181770324707, "learning_rate": 7.128039190178776e-06, "loss": 0.5629, "step": 8619 }, { "epoch": 1.1222178836392034, "grad_norm": 2.691941976547241, "learning_rate": 7.1261478885006815e-06, "loss": 0.4933, "step": 8622 }, { "epoch": 1.1226083561108942, "grad_norm": 3.071762800216675, "learning_rate": 7.124256215389023e-06, "loss": 0.5538, "step": 8625 }, { "epoch": 1.122998828582585, "grad_norm": 3.8000946044921875, "learning_rate": 7.122364171174273e-06, "loss": 0.5261, "step": 8628 }, { "epoch": 1.1233893010542757, "grad_norm": 2.708369493484497, "learning_rate": 7.1204717561869684e-06, "loss": 0.537, "step": 8631 }, { "epoch": 1.1237797735259665, "grad_norm": 2.911895275115967, "learning_rate": 7.118578970757707e-06, "loss": 0.489, "step": 8634 }, { "epoch": 1.1241702459976572, "grad_norm": 2.5384697914123535, "learning_rate": 7.116685815217157e-06, "loss": 0.4744, "step": 8637 }, { "epoch": 1.1245607184693478, "grad_norm": 2.503662586212158, "learning_rate": 7.114792289896046e-06, "loss": 0.5045, "step": 8640 }, { "epoch": 1.1249511909410386, "grad_norm": 2.787290334701538, "learning_rate": 7.112898395125174e-06, "loss": 0.471, "step": 8643 }, { "epoch": 1.1253416634127293, "grad_norm": 2.368600368499756, "learning_rate": 7.111004131235396e-06, "loss": 0.4786, "step": 8646 }, { "epoch": 1.1257321358844201, "grad_norm": 2.707949638366699, "learning_rate": 7.109109498557636e-06, "loss": 0.5496, "step": 8649 }, { "epoch": 1.1261226083561109, "grad_norm": 2.765071392059326, "learning_rate": 7.107214497422885e-06, "loss": 0.5355, "step": 8652 }, { "epoch": 1.1265130808278017, "grad_norm": 3.1850650310516357, "learning_rate": 7.105319128162194e-06, "loss": 0.4455, "step": 8655 }, { "epoch": 1.1269035532994924, "grad_norm": 2.8872485160827637, "learning_rate": 7.10342339110668e-06, "loss": 0.5017, "step": 8658 }, { "epoch": 1.1272940257711832, "grad_norm": 3.188868761062622, "learning_rate": 7.101527286587524e-06, "loss": 0.52, "step": 8661 }, { "epoch": 1.127684498242874, "grad_norm": 2.5735905170440674, "learning_rate": 7.099630814935973e-06, "loss": 0.513, "step": 8664 }, { "epoch": 1.1280749707145645, "grad_norm": 2.725409507751465, "learning_rate": 7.097733976483335e-06, "loss": 0.5538, "step": 8667 }, { "epoch": 1.1284654431862553, "grad_norm": 2.974015235900879, "learning_rate": 7.095836771560984e-06, "loss": 0.5158, "step": 8670 }, { "epoch": 1.128855915657946, "grad_norm": 2.86106276512146, "learning_rate": 7.093939200500359e-06, "loss": 0.5367, "step": 8673 }, { "epoch": 1.1292463881296368, "grad_norm": 3.0025503635406494, "learning_rate": 7.092041263632961e-06, "loss": 0.5411, "step": 8676 }, { "epoch": 1.1296368606013276, "grad_norm": 3.5087170600891113, "learning_rate": 7.090142961290354e-06, "loss": 0.564, "step": 8679 }, { "epoch": 1.1300273330730184, "grad_norm": 2.5758073329925537, "learning_rate": 7.088244293804169e-06, "loss": 0.4342, "step": 8682 }, { "epoch": 1.1304178055447092, "grad_norm": 2.531656503677368, "learning_rate": 7.086345261506098e-06, "loss": 0.5625, "step": 8685 }, { "epoch": 1.1308082780164, "grad_norm": 2.7261338233947754, "learning_rate": 7.0844458647279e-06, "loss": 0.4574, "step": 8688 }, { "epoch": 1.1311987504880907, "grad_norm": 2.582547664642334, "learning_rate": 7.082546103801394e-06, "loss": 0.5195, "step": 8691 }, { "epoch": 1.1315892229597813, "grad_norm": 2.5053184032440186, "learning_rate": 7.080645979058466e-06, "loss": 0.4558, "step": 8694 }, { "epoch": 1.131979695431472, "grad_norm": 2.540330410003662, "learning_rate": 7.0787454908310614e-06, "loss": 0.5509, "step": 8697 }, { "epoch": 1.1323701679031628, "grad_norm": 2.711557149887085, "learning_rate": 7.076844639451193e-06, "loss": 0.5265, "step": 8700 }, { "epoch": 1.1327606403748536, "grad_norm": 2.436702013015747, "learning_rate": 7.074943425250933e-06, "loss": 0.4933, "step": 8703 }, { "epoch": 1.1331511128465444, "grad_norm": 2.6545193195343018, "learning_rate": 7.073041848562424e-06, "loss": 0.4736, "step": 8706 }, { "epoch": 1.1335415853182351, "grad_norm": 2.6401283740997314, "learning_rate": 7.071139909717865e-06, "loss": 0.5213, "step": 8709 }, { "epoch": 1.133932057789926, "grad_norm": 2.585341453552246, "learning_rate": 7.06923760904952e-06, "loss": 0.5262, "step": 8712 }, { "epoch": 1.1343225302616164, "grad_norm": 2.6922566890716553, "learning_rate": 7.067334946889718e-06, "loss": 0.5317, "step": 8715 }, { "epoch": 1.1347130027333072, "grad_norm": 2.583522081375122, "learning_rate": 7.06543192357085e-06, "loss": 0.4766, "step": 8718 }, { "epoch": 1.135103475204998, "grad_norm": 2.4560515880584717, "learning_rate": 7.06352853942537e-06, "loss": 0.5026, "step": 8721 }, { "epoch": 1.1354939476766888, "grad_norm": 2.9161624908447266, "learning_rate": 7.061624794785795e-06, "loss": 0.5901, "step": 8724 }, { "epoch": 1.1358844201483795, "grad_norm": 2.5634748935699463, "learning_rate": 7.059720689984705e-06, "loss": 0.496, "step": 8727 }, { "epoch": 1.1362748926200703, "grad_norm": 2.5747694969177246, "learning_rate": 7.0578162253547445e-06, "loss": 0.5148, "step": 8730 }, { "epoch": 1.136665365091761, "grad_norm": 2.320279121398926, "learning_rate": 7.055911401228618e-06, "loss": 0.4465, "step": 8733 }, { "epoch": 1.1370558375634519, "grad_norm": 2.7718331813812256, "learning_rate": 7.054006217939093e-06, "loss": 0.6083, "step": 8736 }, { "epoch": 1.1374463100351426, "grad_norm": 2.2558786869049072, "learning_rate": 7.052100675819006e-06, "loss": 0.4439, "step": 8739 }, { "epoch": 1.1378367825068332, "grad_norm": 2.8862807750701904, "learning_rate": 7.050194775201246e-06, "loss": 0.5045, "step": 8742 }, { "epoch": 1.138227254978524, "grad_norm": 2.867145538330078, "learning_rate": 7.048288516418772e-06, "loss": 0.5259, "step": 8745 }, { "epoch": 1.1386177274502147, "grad_norm": 2.575711965560913, "learning_rate": 7.046381899804602e-06, "loss": 0.562, "step": 8748 }, { "epoch": 1.1390081999219055, "grad_norm": 2.9559128284454346, "learning_rate": 7.044474925691821e-06, "loss": 0.5471, "step": 8751 }, { "epoch": 1.1393986723935963, "grad_norm": 2.526271343231201, "learning_rate": 7.042567594413571e-06, "loss": 0.5216, "step": 8754 }, { "epoch": 1.139789144865287, "grad_norm": 2.697111129760742, "learning_rate": 7.040659906303058e-06, "loss": 0.4498, "step": 8757 }, { "epoch": 1.1401796173369778, "grad_norm": 2.5852532386779785, "learning_rate": 7.038751861693553e-06, "loss": 0.4273, "step": 8760 }, { "epoch": 1.1405700898086686, "grad_norm": 2.4026541709899902, "learning_rate": 7.036843460918388e-06, "loss": 0.4601, "step": 8763 }, { "epoch": 1.1409605622803594, "grad_norm": 3.018265962600708, "learning_rate": 7.034934704310954e-06, "loss": 0.537, "step": 8766 }, { "epoch": 1.14135103475205, "grad_norm": 2.6514596939086914, "learning_rate": 7.0330255922047075e-06, "loss": 0.4801, "step": 8769 }, { "epoch": 1.1417415072237407, "grad_norm": 2.793213129043579, "learning_rate": 7.031116124933167e-06, "loss": 0.5271, "step": 8772 }, { "epoch": 1.1421319796954315, "grad_norm": 3.0178632736206055, "learning_rate": 7.029206302829914e-06, "loss": 0.4884, "step": 8775 }, { "epoch": 1.1425224521671222, "grad_norm": 2.857133388519287, "learning_rate": 7.027296126228586e-06, "loss": 0.5116, "step": 8778 }, { "epoch": 1.142912924638813, "grad_norm": 2.7881019115448, "learning_rate": 7.0253855954628925e-06, "loss": 0.4901, "step": 8781 }, { "epoch": 1.1433033971105038, "grad_norm": 2.7480220794677734, "learning_rate": 7.023474710866595e-06, "loss": 0.4977, "step": 8784 }, { "epoch": 1.1436938695821945, "grad_norm": 2.6863813400268555, "learning_rate": 7.021563472773522e-06, "loss": 0.476, "step": 8787 }, { "epoch": 1.144084342053885, "grad_norm": 2.9496448040008545, "learning_rate": 7.019651881517562e-06, "loss": 0.4938, "step": 8790 }, { "epoch": 1.1444748145255759, "grad_norm": 2.909560203552246, "learning_rate": 7.017739937432668e-06, "loss": 0.4599, "step": 8793 }, { "epoch": 1.1448652869972666, "grad_norm": 3.5830419063568115, "learning_rate": 7.015827640852852e-06, "loss": 0.5422, "step": 8796 }, { "epoch": 1.1452557594689574, "grad_norm": 3.0168848037719727, "learning_rate": 7.013914992112187e-06, "loss": 0.4376, "step": 8799 }, { "epoch": 1.1456462319406482, "grad_norm": 2.4287290573120117, "learning_rate": 7.0120019915448125e-06, "loss": 0.4951, "step": 8802 }, { "epoch": 1.146036704412339, "grad_norm": 2.5160810947418213, "learning_rate": 7.01008863948492e-06, "loss": 0.5767, "step": 8805 }, { "epoch": 1.1464271768840297, "grad_norm": 2.722810745239258, "learning_rate": 7.00817493626677e-06, "loss": 0.5479, "step": 8808 }, { "epoch": 1.1468176493557205, "grad_norm": 3.0669102668762207, "learning_rate": 7.006260882224684e-06, "loss": 0.6298, "step": 8811 }, { "epoch": 1.1472081218274113, "grad_norm": 2.84295392036438, "learning_rate": 7.004346477693042e-06, "loss": 0.4986, "step": 8814 }, { "epoch": 1.1475985942991018, "grad_norm": 2.5158627033233643, "learning_rate": 7.0024317230062884e-06, "loss": 0.5512, "step": 8817 }, { "epoch": 1.1479890667707926, "grad_norm": 2.525624990463257, "learning_rate": 7.0005166184989245e-06, "loss": 0.4828, "step": 8820 }, { "epoch": 1.1483795392424834, "grad_norm": 3.02347993850708, "learning_rate": 6.9986011645055175e-06, "loss": 0.483, "step": 8823 }, { "epoch": 1.1487700117141741, "grad_norm": 3.2486398220062256, "learning_rate": 6.99668536136069e-06, "loss": 0.6375, "step": 8826 }, { "epoch": 1.149160484185865, "grad_norm": 3.728907823562622, "learning_rate": 6.9947692093991295e-06, "loss": 0.5128, "step": 8829 }, { "epoch": 1.1495509566575557, "grad_norm": 3.2733185291290283, "learning_rate": 6.992852708955586e-06, "loss": 0.5249, "step": 8832 }, { "epoch": 1.1499414291292465, "grad_norm": 2.754006862640381, "learning_rate": 6.990935860364865e-06, "loss": 0.5313, "step": 8835 }, { "epoch": 1.150331901600937, "grad_norm": 2.4702494144439697, "learning_rate": 6.989018663961838e-06, "loss": 0.4722, "step": 8838 }, { "epoch": 1.1507223740726278, "grad_norm": 2.463707208633423, "learning_rate": 6.987101120081436e-06, "loss": 0.4612, "step": 8841 }, { "epoch": 1.1511128465443186, "grad_norm": 2.601174831390381, "learning_rate": 6.9851832290586465e-06, "loss": 0.4494, "step": 8844 }, { "epoch": 1.1515033190160093, "grad_norm": 2.667945623397827, "learning_rate": 6.983264991228525e-06, "loss": 0.4577, "step": 8847 }, { "epoch": 1.1518937914877, "grad_norm": 2.5175373554229736, "learning_rate": 6.981346406926179e-06, "loss": 0.5165, "step": 8850 }, { "epoch": 1.1522842639593909, "grad_norm": 2.911750078201294, "learning_rate": 6.979427476486786e-06, "loss": 0.5315, "step": 8853 }, { "epoch": 1.1526747364310816, "grad_norm": 2.7814128398895264, "learning_rate": 6.9775082002455775e-06, "loss": 0.4972, "step": 8856 }, { "epoch": 1.1530652089027724, "grad_norm": 2.6590986251831055, "learning_rate": 6.975588578537846e-06, "loss": 0.553, "step": 8859 }, { "epoch": 1.1534556813744632, "grad_norm": 2.5675268173217773, "learning_rate": 6.973668611698945e-06, "loss": 0.4934, "step": 8862 }, { "epoch": 1.1538461538461537, "grad_norm": 2.761390447616577, "learning_rate": 6.971748300064291e-06, "loss": 0.6005, "step": 8865 }, { "epoch": 1.1542366263178445, "grad_norm": 2.5827791690826416, "learning_rate": 6.969827643969356e-06, "loss": 0.4941, "step": 8868 }, { "epoch": 1.1546270987895353, "grad_norm": 2.479886293411255, "learning_rate": 6.9679066437496744e-06, "loss": 0.4605, "step": 8871 }, { "epoch": 1.155017571261226, "grad_norm": 2.5713136196136475, "learning_rate": 6.965985299740844e-06, "loss": 0.567, "step": 8874 }, { "epoch": 1.1554080437329168, "grad_norm": 2.7975733280181885, "learning_rate": 6.964063612278517e-06, "loss": 0.5209, "step": 8877 }, { "epoch": 1.1557985162046076, "grad_norm": 2.6016461849212646, "learning_rate": 6.962141581698408e-06, "loss": 0.4932, "step": 8880 }, { "epoch": 1.1561889886762984, "grad_norm": 3.229020357131958, "learning_rate": 6.960219208336293e-06, "loss": 0.5739, "step": 8883 }, { "epoch": 1.1565794611479892, "grad_norm": 2.7121598720550537, "learning_rate": 6.958296492528005e-06, "loss": 0.5604, "step": 8886 }, { "epoch": 1.15696993361968, "grad_norm": 3.215475082397461, "learning_rate": 6.956373434609441e-06, "loss": 0.5234, "step": 8889 }, { "epoch": 1.1573604060913705, "grad_norm": 2.619798421859741, "learning_rate": 6.954450034916552e-06, "loss": 0.4628, "step": 8892 }, { "epoch": 1.1577508785630612, "grad_norm": 2.3444454669952393, "learning_rate": 6.952526293785356e-06, "loss": 0.4755, "step": 8895 }, { "epoch": 1.158141351034752, "grad_norm": 2.6880340576171875, "learning_rate": 6.950602211551921e-06, "loss": 0.4455, "step": 8898 }, { "epoch": 1.1585318235064428, "grad_norm": 2.406294345855713, "learning_rate": 6.948677788552384e-06, "loss": 0.495, "step": 8901 }, { "epoch": 1.1589222959781336, "grad_norm": 3.347994327545166, "learning_rate": 6.946753025122938e-06, "loss": 0.4894, "step": 8904 }, { "epoch": 1.1593127684498243, "grad_norm": 2.817830801010132, "learning_rate": 6.944827921599832e-06, "loss": 0.5655, "step": 8907 }, { "epoch": 1.159703240921515, "grad_norm": 3.159911870956421, "learning_rate": 6.942902478319382e-06, "loss": 0.5178, "step": 8910 }, { "epoch": 1.1600937133932057, "grad_norm": 2.5966691970825195, "learning_rate": 6.940976695617954e-06, "loss": 0.5147, "step": 8913 }, { "epoch": 1.1604841858648964, "grad_norm": 2.7088546752929688, "learning_rate": 6.9390505738319815e-06, "loss": 0.472, "step": 8916 }, { "epoch": 1.1608746583365872, "grad_norm": 2.583479404449463, "learning_rate": 6.937124113297953e-06, "loss": 0.4644, "step": 8919 }, { "epoch": 1.161265130808278, "grad_norm": 3.7508015632629395, "learning_rate": 6.935197314352415e-06, "loss": 0.502, "step": 8922 }, { "epoch": 1.1616556032799688, "grad_norm": 2.6737987995147705, "learning_rate": 6.93327017733198e-06, "loss": 0.5079, "step": 8925 }, { "epoch": 1.1620460757516595, "grad_norm": 2.8131909370422363, "learning_rate": 6.93134270257331e-06, "loss": 0.5143, "step": 8928 }, { "epoch": 1.1624365482233503, "grad_norm": 3.131113290786743, "learning_rate": 6.929414890413135e-06, "loss": 0.4909, "step": 8931 }, { "epoch": 1.162827020695041, "grad_norm": 2.632091522216797, "learning_rate": 6.9274867411882355e-06, "loss": 0.4803, "step": 8934 }, { "epoch": 1.1632174931667318, "grad_norm": 3.4867727756500244, "learning_rate": 6.925558255235458e-06, "loss": 0.5363, "step": 8937 }, { "epoch": 1.1636079656384224, "grad_norm": 2.5351741313934326, "learning_rate": 6.923629432891704e-06, "loss": 0.465, "step": 8940 }, { "epoch": 1.1639984381101132, "grad_norm": 2.7857537269592285, "learning_rate": 6.921700274493935e-06, "loss": 0.5723, "step": 8943 }, { "epoch": 1.164388910581804, "grad_norm": 2.703540802001953, "learning_rate": 6.9197707803791714e-06, "loss": 0.5018, "step": 8946 }, { "epoch": 1.1647793830534947, "grad_norm": 2.7754833698272705, "learning_rate": 6.917840950884489e-06, "loss": 0.4933, "step": 8949 }, { "epoch": 1.1651698555251855, "grad_norm": 2.64084792137146, "learning_rate": 6.915910786347029e-06, "loss": 0.5401, "step": 8952 }, { "epoch": 1.1655603279968763, "grad_norm": 2.382725715637207, "learning_rate": 6.913980287103984e-06, "loss": 0.4424, "step": 8955 }, { "epoch": 1.165950800468567, "grad_norm": 2.396775245666504, "learning_rate": 6.912049453492609e-06, "loss": 0.4835, "step": 8958 }, { "epoch": 1.1663412729402578, "grad_norm": 2.7328484058380127, "learning_rate": 6.910118285850218e-06, "loss": 0.5132, "step": 8961 }, { "epoch": 1.1667317454119486, "grad_norm": 2.620654821395874, "learning_rate": 6.90818678451418e-06, "loss": 0.4648, "step": 8964 }, { "epoch": 1.1671222178836391, "grad_norm": 3.5922820568084717, "learning_rate": 6.906254949821926e-06, "loss": 0.5845, "step": 8967 }, { "epoch": 1.16751269035533, "grad_norm": 2.563399314880371, "learning_rate": 6.904322782110942e-06, "loss": 0.595, "step": 8970 }, { "epoch": 1.1679031628270207, "grad_norm": 3.2699246406555176, "learning_rate": 6.9023902817187735e-06, "loss": 0.4712, "step": 8973 }, { "epoch": 1.1682936352987114, "grad_norm": 2.6587018966674805, "learning_rate": 6.900457448983024e-06, "loss": 0.5394, "step": 8976 }, { "epoch": 1.1686841077704022, "grad_norm": 3.6553778648376465, "learning_rate": 6.898524284241357e-06, "loss": 0.5078, "step": 8979 }, { "epoch": 1.169074580242093, "grad_norm": 2.561161994934082, "learning_rate": 6.896590787831493e-06, "loss": 0.5351, "step": 8982 }, { "epoch": 1.1694650527137838, "grad_norm": 3.0311195850372314, "learning_rate": 6.894656960091206e-06, "loss": 0.5395, "step": 8985 }, { "epoch": 1.1698555251854743, "grad_norm": 2.600670337677002, "learning_rate": 6.892722801358336e-06, "loss": 0.4744, "step": 8988 }, { "epoch": 1.170245997657165, "grad_norm": 2.8165881633758545, "learning_rate": 6.890788311970773e-06, "loss": 0.5708, "step": 8991 }, { "epoch": 1.1706364701288559, "grad_norm": 3.0961506366729736, "learning_rate": 6.888853492266469e-06, "loss": 0.4359, "step": 8994 }, { "epoch": 1.1710269426005466, "grad_norm": 2.740213394165039, "learning_rate": 6.886918342583433e-06, "loss": 0.4675, "step": 8997 }, { "epoch": 1.1714174150722374, "grad_norm": 2.547696828842163, "learning_rate": 6.884982863259734e-06, "loss": 0.544, "step": 9000 }, { "epoch": 1.1718078875439282, "grad_norm": 2.924173355102539, "learning_rate": 6.883047054633494e-06, "loss": 0.4527, "step": 9003 }, { "epoch": 1.172198360015619, "grad_norm": 2.757521629333496, "learning_rate": 6.8811109170428935e-06, "loss": 0.4779, "step": 9006 }, { "epoch": 1.1725888324873097, "grad_norm": 2.3954832553863525, "learning_rate": 6.8791744508261735e-06, "loss": 0.4461, "step": 9009 }, { "epoch": 1.1729793049590005, "grad_norm": 2.38798189163208, "learning_rate": 6.877237656321631e-06, "loss": 0.5077, "step": 9012 }, { "epoch": 1.173369777430691, "grad_norm": 2.7606191635131836, "learning_rate": 6.875300533867619e-06, "loss": 0.5737, "step": 9015 }, { "epoch": 1.1737602499023818, "grad_norm": 2.400142192840576, "learning_rate": 6.873363083802547e-06, "loss": 0.5388, "step": 9018 }, { "epoch": 1.1741507223740726, "grad_norm": 3.098991632461548, "learning_rate": 6.8714253064648865e-06, "loss": 0.5075, "step": 9021 }, { "epoch": 1.1745411948457634, "grad_norm": 2.514508008956909, "learning_rate": 6.8694872021931625e-06, "loss": 0.5503, "step": 9024 }, { "epoch": 1.1749316673174541, "grad_norm": 2.7697696685791016, "learning_rate": 6.867548771325956e-06, "loss": 0.5224, "step": 9027 }, { "epoch": 1.175322139789145, "grad_norm": 2.679353952407837, "learning_rate": 6.865610014201909e-06, "loss": 0.5027, "step": 9030 }, { "epoch": 1.1757126122608357, "grad_norm": 2.770353317260742, "learning_rate": 6.863670931159716e-06, "loss": 0.4884, "step": 9033 }, { "epoch": 1.1761030847325264, "grad_norm": 2.543590784072876, "learning_rate": 6.861731522538133e-06, "loss": 0.4744, "step": 9036 }, { "epoch": 1.1764935572042172, "grad_norm": 3.0317022800445557, "learning_rate": 6.859791788675969e-06, "loss": 0.5167, "step": 9039 }, { "epoch": 1.1768840296759078, "grad_norm": 2.7118606567382812, "learning_rate": 6.8578517299120916e-06, "loss": 0.5217, "step": 9042 }, { "epoch": 1.1772745021475985, "grad_norm": 2.709425449371338, "learning_rate": 6.855911346585427e-06, "loss": 0.5527, "step": 9045 }, { "epoch": 1.1776649746192893, "grad_norm": 2.604459762573242, "learning_rate": 6.853970639034953e-06, "loss": 0.4463, "step": 9048 }, { "epoch": 1.17805544709098, "grad_norm": 2.796820878982544, "learning_rate": 6.852029607599707e-06, "loss": 0.5053, "step": 9051 }, { "epoch": 1.1784459195626709, "grad_norm": 3.0297458171844482, "learning_rate": 6.850088252618787e-06, "loss": 0.5075, "step": 9054 }, { "epoch": 1.1788363920343616, "grad_norm": 2.483165979385376, "learning_rate": 6.84814657443134e-06, "loss": 0.4528, "step": 9057 }, { "epoch": 1.1792268645060524, "grad_norm": 2.606029987335205, "learning_rate": 6.846204573376576e-06, "loss": 0.4552, "step": 9060 }, { "epoch": 1.179617336977743, "grad_norm": 2.4798717498779297, "learning_rate": 6.844262249793755e-06, "loss": 0.514, "step": 9063 }, { "epoch": 1.1800078094494337, "grad_norm": 2.535163164138794, "learning_rate": 6.842319604022201e-06, "loss": 0.5319, "step": 9066 }, { "epoch": 1.1803982819211245, "grad_norm": 2.676571846008301, "learning_rate": 6.840376636401285e-06, "loss": 0.4704, "step": 9069 }, { "epoch": 1.1807887543928153, "grad_norm": 2.842242956161499, "learning_rate": 6.838433347270444e-06, "loss": 0.496, "step": 9072 }, { "epoch": 1.181179226864506, "grad_norm": 2.5695507526397705, "learning_rate": 6.8364897369691655e-06, "loss": 0.5432, "step": 9075 }, { "epoch": 1.1815696993361968, "grad_norm": 2.565473794937134, "learning_rate": 6.834545805836992e-06, "loss": 0.4648, "step": 9078 }, { "epoch": 1.1819601718078876, "grad_norm": 2.2036924362182617, "learning_rate": 6.832601554213525e-06, "loss": 0.4596, "step": 9081 }, { "epoch": 1.1823506442795784, "grad_norm": 2.521524429321289, "learning_rate": 6.830656982438421e-06, "loss": 0.4932, "step": 9084 }, { "epoch": 1.1827411167512691, "grad_norm": 2.6289567947387695, "learning_rate": 6.828712090851395e-06, "loss": 0.5188, "step": 9087 }, { "epoch": 1.1831315892229597, "grad_norm": 2.556784152984619, "learning_rate": 6.826766879792215e-06, "loss": 0.5411, "step": 9090 }, { "epoch": 1.1835220616946505, "grad_norm": 3.470583915710449, "learning_rate": 6.824821349600702e-06, "loss": 0.5407, "step": 9093 }, { "epoch": 1.1839125341663412, "grad_norm": 2.5393197536468506, "learning_rate": 6.822875500616739e-06, "loss": 0.4989, "step": 9096 }, { "epoch": 1.184303006638032, "grad_norm": 2.5908868312835693, "learning_rate": 6.82092933318026e-06, "loss": 0.4926, "step": 9099 }, { "epoch": 1.1846934791097228, "grad_norm": 3.1211929321289062, "learning_rate": 6.818982847631258e-06, "loss": 0.517, "step": 9102 }, { "epoch": 1.1850839515814136, "grad_norm": 2.8722972869873047, "learning_rate": 6.8170360443097794e-06, "loss": 0.5064, "step": 9105 }, { "epoch": 1.1854744240531043, "grad_norm": 2.619140386581421, "learning_rate": 6.815088923555925e-06, "loss": 0.4872, "step": 9108 }, { "epoch": 1.185864896524795, "grad_norm": 2.4975740909576416, "learning_rate": 6.813141485709856e-06, "loss": 0.4782, "step": 9111 }, { "epoch": 1.1862553689964859, "grad_norm": 2.6161115169525146, "learning_rate": 6.811193731111782e-06, "loss": 0.4789, "step": 9114 }, { "epoch": 1.1866458414681764, "grad_norm": 3.224074602127075, "learning_rate": 6.809245660101974e-06, "loss": 0.5793, "step": 9117 }, { "epoch": 1.1870363139398672, "grad_norm": 2.5327675342559814, "learning_rate": 6.8072972730207555e-06, "loss": 0.4145, "step": 9120 }, { "epoch": 1.187426786411558, "grad_norm": 2.6852951049804688, "learning_rate": 6.8053485702085045e-06, "loss": 0.4999, "step": 9123 }, { "epoch": 1.1878172588832487, "grad_norm": 2.3816871643066406, "learning_rate": 6.8033995520056565e-06, "loss": 0.5238, "step": 9126 }, { "epoch": 1.1882077313549395, "grad_norm": 2.494142770767212, "learning_rate": 6.801450218752701e-06, "loss": 0.4763, "step": 9129 }, { "epoch": 1.1885982038266303, "grad_norm": 2.643019676208496, "learning_rate": 6.799500570790182e-06, "loss": 0.5655, "step": 9132 }, { "epoch": 1.188988676298321, "grad_norm": 2.7931339740753174, "learning_rate": 6.797550608458698e-06, "loss": 0.5176, "step": 9135 }, { "epoch": 1.1893791487700116, "grad_norm": 2.6419882774353027, "learning_rate": 6.795600332098905e-06, "loss": 0.4891, "step": 9138 }, { "epoch": 1.1897696212417024, "grad_norm": 2.681978940963745, "learning_rate": 6.793649742051511e-06, "loss": 0.4745, "step": 9141 }, { "epoch": 1.1901600937133932, "grad_norm": 2.605980396270752, "learning_rate": 6.7916988386572806e-06, "loss": 0.4831, "step": 9144 }, { "epoch": 1.190550566185084, "grad_norm": 2.5457866191864014, "learning_rate": 6.789747622257033e-06, "loss": 0.4513, "step": 9147 }, { "epoch": 1.1909410386567747, "grad_norm": 2.7378089427948, "learning_rate": 6.787796093191638e-06, "loss": 0.5176, "step": 9150 }, { "epoch": 1.1913315111284655, "grad_norm": 2.543762683868408, "learning_rate": 6.785844251802031e-06, "loss": 0.5133, "step": 9153 }, { "epoch": 1.1917219836001562, "grad_norm": 2.86997652053833, "learning_rate": 6.783892098429187e-06, "loss": 0.5483, "step": 9156 }, { "epoch": 1.192112456071847, "grad_norm": 3.302907943725586, "learning_rate": 6.781939633414146e-06, "loss": 0.5072, "step": 9159 }, { "epoch": 1.1925029285435378, "grad_norm": 2.7741358280181885, "learning_rate": 6.779986857098002e-06, "loss": 0.4742, "step": 9162 }, { "epoch": 1.1928934010152283, "grad_norm": 2.5811760425567627, "learning_rate": 6.778033769821896e-06, "loss": 0.5102, "step": 9165 }, { "epoch": 1.1932838734869191, "grad_norm": 4.686474800109863, "learning_rate": 6.776080371927033e-06, "loss": 0.4987, "step": 9168 }, { "epoch": 1.1936743459586099, "grad_norm": 3.189173460006714, "learning_rate": 6.774126663754663e-06, "loss": 0.464, "step": 9171 }, { "epoch": 1.1940648184303007, "grad_norm": 2.439980983734131, "learning_rate": 6.7721726456461e-06, "loss": 0.447, "step": 9174 }, { "epoch": 1.1944552909019914, "grad_norm": 2.823490858078003, "learning_rate": 6.770218317942701e-06, "loss": 0.4427, "step": 9177 }, { "epoch": 1.1948457633736822, "grad_norm": 2.5944924354553223, "learning_rate": 6.768263680985888e-06, "loss": 0.4201, "step": 9180 }, { "epoch": 1.195236235845373, "grad_norm": 3.171487808227539, "learning_rate": 6.766308735117129e-06, "loss": 0.615, "step": 9183 }, { "epoch": 1.1956267083170635, "grad_norm": 2.696826219558716, "learning_rate": 6.764353480677949e-06, "loss": 0.5554, "step": 9186 }, { "epoch": 1.1960171807887543, "grad_norm": 2.919696569442749, "learning_rate": 6.762397918009929e-06, "loss": 0.5412, "step": 9189 }, { "epoch": 1.196407653260445, "grad_norm": 2.540598154067993, "learning_rate": 6.760442047454699e-06, "loss": 0.5434, "step": 9192 }, { "epoch": 1.1967981257321358, "grad_norm": 2.6347830295562744, "learning_rate": 6.758485869353948e-06, "loss": 0.4581, "step": 9195 }, { "epoch": 1.1971885982038266, "grad_norm": 2.45042085647583, "learning_rate": 6.756529384049415e-06, "loss": 0.5198, "step": 9198 }, { "epoch": 1.1975790706755174, "grad_norm": 3.13641357421875, "learning_rate": 6.754572591882892e-06, "loss": 0.46, "step": 9201 }, { "epoch": 1.1979695431472082, "grad_norm": 3.144970417022705, "learning_rate": 6.752615493196231e-06, "loss": 0.4713, "step": 9204 }, { "epoch": 1.198360015618899, "grad_norm": 2.6827385425567627, "learning_rate": 6.750658088331326e-06, "loss": 0.5368, "step": 9207 }, { "epoch": 1.1987504880905897, "grad_norm": 2.579334259033203, "learning_rate": 6.7487003776301394e-06, "loss": 0.5117, "step": 9210 }, { "epoch": 1.1991409605622803, "grad_norm": 2.422724962234497, "learning_rate": 6.746742361434675e-06, "loss": 0.3792, "step": 9213 }, { "epoch": 1.199531433033971, "grad_norm": 2.511230230331421, "learning_rate": 6.744784040086994e-06, "loss": 0.5195, "step": 9216 }, { "epoch": 1.1999219055056618, "grad_norm": 2.661550283432007, "learning_rate": 6.742825413929213e-06, "loss": 0.4506, "step": 9219 }, { "epoch": 1.2003123779773526, "grad_norm": 2.8288450241088867, "learning_rate": 6.740866483303497e-06, "loss": 0.5301, "step": 9222 }, { "epoch": 1.2007028504490433, "grad_norm": 2.494079351425171, "learning_rate": 6.73890724855207e-06, "loss": 0.4576, "step": 9225 }, { "epoch": 1.2010933229207341, "grad_norm": 2.7465291023254395, "learning_rate": 6.736947710017202e-06, "loss": 0.4389, "step": 9228 }, { "epoch": 1.201483795392425, "grad_norm": 3.119746208190918, "learning_rate": 6.734987868041226e-06, "loss": 0.519, "step": 9231 }, { "epoch": 1.2018742678641157, "grad_norm": 2.939565896987915, "learning_rate": 6.733027722966519e-06, "loss": 0.5981, "step": 9234 }, { "epoch": 1.2022647403358064, "grad_norm": 2.5819921493530273, "learning_rate": 6.731067275135512e-06, "loss": 0.5423, "step": 9237 }, { "epoch": 1.202655212807497, "grad_norm": 2.769885301589966, "learning_rate": 6.7291065248906975e-06, "loss": 0.481, "step": 9240 }, { "epoch": 1.2030456852791878, "grad_norm": 2.8047449588775635, "learning_rate": 6.727145472574608e-06, "loss": 0.4647, "step": 9243 }, { "epoch": 1.2034361577508785, "grad_norm": 2.8334429264068604, "learning_rate": 6.725184118529839e-06, "loss": 0.525, "step": 9246 }, { "epoch": 1.2038266302225693, "grad_norm": 2.9303195476531982, "learning_rate": 6.723222463099033e-06, "loss": 0.4845, "step": 9249 }, { "epoch": 1.20421710269426, "grad_norm": 2.522890567779541, "learning_rate": 6.721260506624888e-06, "loss": 0.423, "step": 9252 }, { "epoch": 1.2046075751659509, "grad_norm": 3.1758248805999756, "learning_rate": 6.719298249450153e-06, "loss": 0.4912, "step": 9255 }, { "epoch": 1.2049980476376416, "grad_norm": 2.49548602104187, "learning_rate": 6.7173356919176315e-06, "loss": 0.4624, "step": 9258 }, { "epoch": 1.2053885201093322, "grad_norm": 2.884762763977051, "learning_rate": 6.7153728343701776e-06, "loss": 0.4791, "step": 9261 }, { "epoch": 1.205778992581023, "grad_norm": 2.4931530952453613, "learning_rate": 6.7134096771506976e-06, "loss": 0.542, "step": 9264 }, { "epoch": 1.2061694650527137, "grad_norm": 2.408496618270874, "learning_rate": 6.711446220602152e-06, "loss": 0.4824, "step": 9267 }, { "epoch": 1.2065599375244045, "grad_norm": 2.6510279178619385, "learning_rate": 6.70948246506755e-06, "loss": 0.5171, "step": 9270 }, { "epoch": 1.2069504099960953, "grad_norm": 2.6602025032043457, "learning_rate": 6.707518410889959e-06, "loss": 0.5202, "step": 9273 }, { "epoch": 1.207340882467786, "grad_norm": 3.2938411235809326, "learning_rate": 6.7055540584124955e-06, "loss": 0.4416, "step": 9276 }, { "epoch": 1.2077313549394768, "grad_norm": 2.6444809436798096, "learning_rate": 6.703589407978324e-06, "loss": 0.5285, "step": 9279 }, { "epoch": 1.2081218274111676, "grad_norm": 2.7918782234191895, "learning_rate": 6.7016244599306675e-06, "loss": 0.5213, "step": 9282 }, { "epoch": 1.2085122998828584, "grad_norm": 2.3297805786132812, "learning_rate": 6.699659214612797e-06, "loss": 0.4901, "step": 9285 }, { "epoch": 1.208902772354549, "grad_norm": 2.6607468128204346, "learning_rate": 6.697693672368038e-06, "loss": 0.518, "step": 9288 }, { "epoch": 1.2092932448262397, "grad_norm": 2.5616703033447266, "learning_rate": 6.695727833539765e-06, "loss": 0.47, "step": 9291 }, { "epoch": 1.2096837172979304, "grad_norm": 3.2793567180633545, "learning_rate": 6.693761698471406e-06, "loss": 0.552, "step": 9294 }, { "epoch": 1.2100741897696212, "grad_norm": 2.4998676776885986, "learning_rate": 6.6917952675064435e-06, "loss": 0.5091, "step": 9297 }, { "epoch": 1.210464662241312, "grad_norm": 2.6549830436706543, "learning_rate": 6.689828540988406e-06, "loss": 0.5316, "step": 9300 }, { "epoch": 1.2108551347130028, "grad_norm": 2.714661121368408, "learning_rate": 6.687861519260877e-06, "loss": 0.5968, "step": 9303 }, { "epoch": 1.2112456071846935, "grad_norm": 4.035742282867432, "learning_rate": 6.685894202667491e-06, "loss": 0.4923, "step": 9306 }, { "epoch": 1.2116360796563843, "grad_norm": 2.6630351543426514, "learning_rate": 6.683926591551934e-06, "loss": 0.5069, "step": 9309 }, { "epoch": 1.212026552128075, "grad_norm": 2.704087257385254, "learning_rate": 6.681958686257945e-06, "loss": 0.5276, "step": 9312 }, { "epoch": 1.2124170245997656, "grad_norm": 4.812976837158203, "learning_rate": 6.679990487129311e-06, "loss": 0.5277, "step": 9315 }, { "epoch": 1.2128074970714564, "grad_norm": 2.845241069793701, "learning_rate": 6.678021994509874e-06, "loss": 0.5935, "step": 9318 }, { "epoch": 1.2131979695431472, "grad_norm": 2.6971890926361084, "learning_rate": 6.676053208743525e-06, "loss": 0.5589, "step": 9321 }, { "epoch": 1.213588442014838, "grad_norm": 2.571746349334717, "learning_rate": 6.674084130174204e-06, "loss": 0.4472, "step": 9324 }, { "epoch": 1.2139789144865287, "grad_norm": 2.5498881340026855, "learning_rate": 6.67211475914591e-06, "loss": 0.496, "step": 9327 }, { "epoch": 1.2143693869582195, "grad_norm": 2.6578900814056396, "learning_rate": 6.670145096002683e-06, "loss": 0.5228, "step": 9330 }, { "epoch": 1.2147598594299103, "grad_norm": 2.7154641151428223, "learning_rate": 6.668175141088622e-06, "loss": 0.5132, "step": 9333 }, { "epoch": 1.2151503319016008, "grad_norm": 2.644035577774048, "learning_rate": 6.666204894747874e-06, "loss": 0.5248, "step": 9336 }, { "epoch": 1.2155408043732916, "grad_norm": 2.640990734100342, "learning_rate": 6.664234357324636e-06, "loss": 0.4759, "step": 9339 }, { "epoch": 1.2159312768449824, "grad_norm": 2.595244884490967, "learning_rate": 6.662263529163155e-06, "loss": 0.5262, "step": 9342 }, { "epoch": 1.2163217493166731, "grad_norm": 2.70097279548645, "learning_rate": 6.660292410607734e-06, "loss": 0.5814, "step": 9345 }, { "epoch": 1.216712221788364, "grad_norm": 3.5157313346862793, "learning_rate": 6.658321002002722e-06, "loss": 0.508, "step": 9348 }, { "epoch": 1.2171026942600547, "grad_norm": 2.607250213623047, "learning_rate": 6.656349303692519e-06, "loss": 0.501, "step": 9351 }, { "epoch": 1.2174931667317455, "grad_norm": 3.2209982872009277, "learning_rate": 6.654377316021576e-06, "loss": 0.4115, "step": 9354 }, { "epoch": 1.2178836392034362, "grad_norm": 2.4936866760253906, "learning_rate": 6.652405039334396e-06, "loss": 0.4835, "step": 9357 }, { "epoch": 1.218274111675127, "grad_norm": 2.4629316329956055, "learning_rate": 6.650432473975534e-06, "loss": 0.4565, "step": 9360 }, { "epoch": 1.2186645841468176, "grad_norm": 3.328362464904785, "learning_rate": 6.648459620289589e-06, "loss": 0.5637, "step": 9363 }, { "epoch": 1.2190550566185083, "grad_norm": 2.8752493858337402, "learning_rate": 6.646486478621217e-06, "loss": 0.4952, "step": 9366 }, { "epoch": 1.219445529090199, "grad_norm": 2.651597738265991, "learning_rate": 6.644513049315121e-06, "loss": 0.4809, "step": 9369 }, { "epoch": 1.2198360015618899, "grad_norm": 2.642346143722534, "learning_rate": 6.642539332716055e-06, "loss": 0.4713, "step": 9372 }, { "epoch": 1.2202264740335806, "grad_norm": 2.764996290206909, "learning_rate": 6.6405653291688225e-06, "loss": 0.5554, "step": 9375 }, { "epoch": 1.2206169465052714, "grad_norm": 2.660748243331909, "learning_rate": 6.638591039018277e-06, "loss": 0.475, "step": 9378 }, { "epoch": 1.2210074189769622, "grad_norm": 2.4974265098571777, "learning_rate": 6.636616462609324e-06, "loss": 0.5118, "step": 9381 }, { "epoch": 1.221397891448653, "grad_norm": 3.5743143558502197, "learning_rate": 6.634641600286921e-06, "loss": 0.5091, "step": 9384 }, { "epoch": 1.2217883639203437, "grad_norm": 2.744140625, "learning_rate": 6.632666452396067e-06, "loss": 0.4696, "step": 9387 }, { "epoch": 1.2221788363920343, "grad_norm": 2.7472984790802, "learning_rate": 6.630691019281819e-06, "loss": 0.4901, "step": 9390 }, { "epoch": 1.222569308863725, "grad_norm": 3.2367637157440186, "learning_rate": 6.6287153012892805e-06, "loss": 0.4851, "step": 9393 }, { "epoch": 1.2229597813354158, "grad_norm": 2.966864824295044, "learning_rate": 6.626739298763605e-06, "loss": 0.4722, "step": 9396 }, { "epoch": 1.2233502538071066, "grad_norm": 2.585268020629883, "learning_rate": 6.624763012049995e-06, "loss": 0.4988, "step": 9399 }, { "epoch": 1.2237407262787974, "grad_norm": 2.772038221359253, "learning_rate": 6.622786441493706e-06, "loss": 0.5594, "step": 9402 }, { "epoch": 1.2241311987504881, "grad_norm": 3.0204148292541504, "learning_rate": 6.62080958744004e-06, "loss": 0.4736, "step": 9405 }, { "epoch": 1.224521671222179, "grad_norm": 2.651444673538208, "learning_rate": 6.618832450234348e-06, "loss": 0.473, "step": 9408 }, { "epoch": 1.2249121436938695, "grad_norm": 3.069899559020996, "learning_rate": 6.6168550302220334e-06, "loss": 0.618, "step": 9411 }, { "epoch": 1.2253026161655602, "grad_norm": 2.877786874771118, "learning_rate": 6.6148773277485455e-06, "loss": 0.5673, "step": 9414 }, { "epoch": 1.225693088637251, "grad_norm": 2.4998676776885986, "learning_rate": 6.612899343159385e-06, "loss": 0.4381, "step": 9417 }, { "epoch": 1.2260835611089418, "grad_norm": 2.4126298427581787, "learning_rate": 6.610921076800103e-06, "loss": 0.5247, "step": 9420 }, { "epoch": 1.2264740335806326, "grad_norm": 2.9004220962524414, "learning_rate": 6.608942529016298e-06, "loss": 0.5114, "step": 9423 }, { "epoch": 1.2268645060523233, "grad_norm": 3.1982264518737793, "learning_rate": 6.606963700153618e-06, "loss": 0.5308, "step": 9426 }, { "epoch": 1.227254978524014, "grad_norm": 2.521232843399048, "learning_rate": 6.604984590557759e-06, "loss": 0.4787, "step": 9429 }, { "epoch": 1.2276454509957049, "grad_norm": 3.711055040359497, "learning_rate": 6.603005200574471e-06, "loss": 0.4829, "step": 9432 }, { "epoch": 1.2280359234673957, "grad_norm": 2.418956756591797, "learning_rate": 6.601025530549544e-06, "loss": 0.4596, "step": 9435 }, { "epoch": 1.2284263959390862, "grad_norm": 2.617719888687134, "learning_rate": 6.5990455808288256e-06, "loss": 0.5762, "step": 9438 }, { "epoch": 1.228816868410777, "grad_norm": 3.037282705307007, "learning_rate": 6.597065351758207e-06, "loss": 0.4857, "step": 9441 }, { "epoch": 1.2292073408824677, "grad_norm": 2.377511739730835, "learning_rate": 6.5950848436836335e-06, "loss": 0.4933, "step": 9444 }, { "epoch": 1.2295978133541585, "grad_norm": 2.6414594650268555, "learning_rate": 6.5931040569510926e-06, "loss": 0.5575, "step": 9447 }, { "epoch": 1.2299882858258493, "grad_norm": 3.2469959259033203, "learning_rate": 6.591122991906625e-06, "loss": 0.537, "step": 9450 }, { "epoch": 1.23037875829754, "grad_norm": 2.7081236839294434, "learning_rate": 6.5891416488963155e-06, "loss": 0.5333, "step": 9453 }, { "epoch": 1.2307692307692308, "grad_norm": 3.2291836738586426, "learning_rate": 6.587160028266306e-06, "loss": 0.5007, "step": 9456 }, { "epoch": 1.2311597032409216, "grad_norm": 3.501340389251709, "learning_rate": 6.585178130362776e-06, "loss": 0.4621, "step": 9459 }, { "epoch": 1.2315501757126124, "grad_norm": 2.6834263801574707, "learning_rate": 6.583195955531963e-06, "loss": 0.472, "step": 9462 }, { "epoch": 1.231940648184303, "grad_norm": 3.268996238708496, "learning_rate": 6.581213504120146e-06, "loss": 0.5295, "step": 9465 }, { "epoch": 1.2323311206559937, "grad_norm": 2.7084834575653076, "learning_rate": 6.579230776473658e-06, "loss": 0.543, "step": 9468 }, { "epoch": 1.2327215931276845, "grad_norm": 2.728600263595581, "learning_rate": 6.577247772938874e-06, "loss": 0.5473, "step": 9471 }, { "epoch": 1.2331120655993753, "grad_norm": 3.1851062774658203, "learning_rate": 6.575264493862221e-06, "loss": 0.5181, "step": 9474 }, { "epoch": 1.233502538071066, "grad_norm": 2.5818753242492676, "learning_rate": 6.573280939590178e-06, "loss": 0.433, "step": 9477 }, { "epoch": 1.2338930105427568, "grad_norm": 2.6541337966918945, "learning_rate": 6.571297110469261e-06, "loss": 0.503, "step": 9480 }, { "epoch": 1.2342834830144476, "grad_norm": 2.538254499435425, "learning_rate": 6.569313006846048e-06, "loss": 0.5565, "step": 9483 }, { "epoch": 1.2346739554861381, "grad_norm": 2.7005538940429688, "learning_rate": 6.567328629067151e-06, "loss": 0.5625, "step": 9486 }, { "epoch": 1.235064427957829, "grad_norm": 2.8296282291412354, "learning_rate": 6.565343977479241e-06, "loss": 0.5815, "step": 9489 }, { "epoch": 1.2354549004295197, "grad_norm": 2.405651092529297, "learning_rate": 6.563359052429031e-06, "loss": 0.4166, "step": 9492 }, { "epoch": 1.2358453729012104, "grad_norm": 2.3672516345977783, "learning_rate": 6.561373854263283e-06, "loss": 0.5841, "step": 9495 }, { "epoch": 1.2362358453729012, "grad_norm": 2.7860894203186035, "learning_rate": 6.559388383328808e-06, "loss": 0.4879, "step": 9498 }, { "epoch": 1.236626317844592, "grad_norm": 2.6302614212036133, "learning_rate": 6.5574026399724625e-06, "loss": 0.5354, "step": 9501 }, { "epoch": 1.2370167903162828, "grad_norm": 2.8287155628204346, "learning_rate": 6.5554166245411525e-06, "loss": 0.4181, "step": 9504 }, { "epoch": 1.2374072627879735, "grad_norm": 2.8489389419555664, "learning_rate": 6.55343033738183e-06, "loss": 0.4872, "step": 9507 }, { "epoch": 1.2377977352596643, "grad_norm": 3.101821184158325, "learning_rate": 6.551443778841495e-06, "loss": 0.5089, "step": 9510 }, { "epoch": 1.2381882077313549, "grad_norm": 3.262808084487915, "learning_rate": 6.549456949267197e-06, "loss": 0.5069, "step": 9513 }, { "epoch": 1.2385786802030456, "grad_norm": 2.664828300476074, "learning_rate": 6.547469849006027e-06, "loss": 0.4743, "step": 9516 }, { "epoch": 1.2389691526747364, "grad_norm": 2.8644232749938965, "learning_rate": 6.545482478405133e-06, "loss": 0.4491, "step": 9519 }, { "epoch": 1.2393596251464272, "grad_norm": 2.658860921859741, "learning_rate": 6.543494837811698e-06, "loss": 0.5217, "step": 9522 }, { "epoch": 1.239750097618118, "grad_norm": 2.8123228549957275, "learning_rate": 6.541506927572965e-06, "loss": 0.52, "step": 9525 }, { "epoch": 1.2401405700898087, "grad_norm": 2.8045687675476074, "learning_rate": 6.539518748036212e-06, "loss": 0.463, "step": 9528 }, { "epoch": 1.2405310425614995, "grad_norm": 2.54270076751709, "learning_rate": 6.537530299548774e-06, "loss": 0.5799, "step": 9531 }, { "epoch": 1.24092151503319, "grad_norm": 2.9174578189849854, "learning_rate": 6.535541582458027e-06, "loss": 0.4923, "step": 9534 }, { "epoch": 1.2413119875048808, "grad_norm": 3.1313931941986084, "learning_rate": 6.533552597111395e-06, "loss": 0.5614, "step": 9537 }, { "epoch": 1.2417024599765716, "grad_norm": 2.6302151679992676, "learning_rate": 6.531563343856352e-06, "loss": 0.5368, "step": 9540 }, { "epoch": 1.2420929324482624, "grad_norm": 2.6476080417633057, "learning_rate": 6.5295738230404125e-06, "loss": 0.5409, "step": 9543 }, { "epoch": 1.2424834049199531, "grad_norm": 2.8190321922302246, "learning_rate": 6.527584035011145e-06, "loss": 0.4559, "step": 9546 }, { "epoch": 1.242873877391644, "grad_norm": 3.6176981925964355, "learning_rate": 6.525593980116161e-06, "loss": 0.5597, "step": 9549 }, { "epoch": 1.2432643498633347, "grad_norm": 2.844064474105835, "learning_rate": 6.523603658703117e-06, "loss": 0.575, "step": 9552 }, { "epoch": 1.2436548223350254, "grad_norm": 3.310737371444702, "learning_rate": 6.52161307111972e-06, "loss": 0.5425, "step": 9555 }, { "epoch": 1.2440452948067162, "grad_norm": 2.583648204803467, "learning_rate": 6.519622217713719e-06, "loss": 0.4643, "step": 9558 }, { "epoch": 1.2444357672784068, "grad_norm": 2.5113677978515625, "learning_rate": 6.517631098832914e-06, "loss": 0.4568, "step": 9561 }, { "epoch": 1.2448262397500975, "grad_norm": 4.185740947723389, "learning_rate": 6.515639714825148e-06, "loss": 0.4515, "step": 9564 }, { "epoch": 1.2452167122217883, "grad_norm": 2.941195011138916, "learning_rate": 6.513648066038314e-06, "loss": 0.5402, "step": 9567 }, { "epoch": 1.245607184693479, "grad_norm": 2.2286741733551025, "learning_rate": 6.511656152820347e-06, "loss": 0.4221, "step": 9570 }, { "epoch": 1.2459976571651699, "grad_norm": 2.8112363815307617, "learning_rate": 6.509663975519228e-06, "loss": 0.5133, "step": 9573 }, { "epoch": 1.2463881296368606, "grad_norm": 2.6967437267303467, "learning_rate": 6.507671534482991e-06, "loss": 0.5319, "step": 9576 }, { "epoch": 1.2467786021085514, "grad_norm": 2.501628875732422, "learning_rate": 6.505678830059707e-06, "loss": 0.4809, "step": 9579 }, { "epoch": 1.2471690745802422, "grad_norm": 2.820998191833496, "learning_rate": 6.5036858625974986e-06, "loss": 0.5363, "step": 9582 }, { "epoch": 1.247559547051933, "grad_norm": 2.792283296585083, "learning_rate": 6.501692632444534e-06, "loss": 0.4598, "step": 9585 }, { "epoch": 1.2479500195236235, "grad_norm": 2.6301913261413574, "learning_rate": 6.499699139949025e-06, "loss": 0.4655, "step": 9588 }, { "epoch": 1.2483404919953143, "grad_norm": 2.497735023498535, "learning_rate": 6.497705385459232e-06, "loss": 0.5135, "step": 9591 }, { "epoch": 1.248730964467005, "grad_norm": 2.354994535446167, "learning_rate": 6.4957113693234586e-06, "loss": 0.4236, "step": 9594 }, { "epoch": 1.2491214369386958, "grad_norm": 3.677664041519165, "learning_rate": 6.493717091890056e-06, "loss": 0.4943, "step": 9597 }, { "epoch": 1.2495119094103866, "grad_norm": 2.9446117877960205, "learning_rate": 6.491722553507419e-06, "loss": 0.5157, "step": 9600 }, { "epoch": 1.2499023818820774, "grad_norm": 3.580937623977661, "learning_rate": 6.48972775452399e-06, "loss": 0.5131, "step": 9603 }, { "epoch": 1.2502928543537681, "grad_norm": 2.604628562927246, "learning_rate": 6.487732695288256e-06, "loss": 0.4847, "step": 9606 }, { "epoch": 1.2506833268254587, "grad_norm": 2.9998080730438232, "learning_rate": 6.4857373761487505e-06, "loss": 0.5538, "step": 9609 }, { "epoch": 1.2510737992971497, "grad_norm": 2.7411253452301025, "learning_rate": 6.4837417974540505e-06, "loss": 0.4721, "step": 9612 }, { "epoch": 1.2514642717688402, "grad_norm": 2.709904432296753, "learning_rate": 6.481745959552781e-06, "loss": 0.446, "step": 9615 }, { "epoch": 1.251854744240531, "grad_norm": 2.6614980697631836, "learning_rate": 6.479749862793609e-06, "loss": 0.5096, "step": 9618 }, { "epoch": 1.2522452167122218, "grad_norm": 2.518862724304199, "learning_rate": 6.477753507525249e-06, "loss": 0.5016, "step": 9621 }, { "epoch": 1.2526356891839125, "grad_norm": 2.293060302734375, "learning_rate": 6.475756894096458e-06, "loss": 0.5011, "step": 9624 }, { "epoch": 1.2530261616556033, "grad_norm": 2.695930004119873, "learning_rate": 6.4737600228560435e-06, "loss": 0.5012, "step": 9627 }, { "epoch": 1.253416634127294, "grad_norm": 2.469388246536255, "learning_rate": 6.471762894152853e-06, "loss": 0.489, "step": 9630 }, { "epoch": 1.2538071065989849, "grad_norm": 3.3694796562194824, "learning_rate": 6.469765508335783e-06, "loss": 0.435, "step": 9633 }, { "epoch": 1.2541975790706754, "grad_norm": 2.474310874938965, "learning_rate": 6.467767865753768e-06, "loss": 0.4652, "step": 9636 }, { "epoch": 1.2545880515423662, "grad_norm": 3.2254626750946045, "learning_rate": 6.465769966755795e-06, "loss": 0.5496, "step": 9639 }, { "epoch": 1.254978524014057, "grad_norm": 2.502992630004883, "learning_rate": 6.4637718116908945e-06, "loss": 0.4993, "step": 9642 }, { "epoch": 1.2553689964857477, "grad_norm": 2.6724181175231934, "learning_rate": 6.461773400908136e-06, "loss": 0.5079, "step": 9645 }, { "epoch": 1.2557594689574385, "grad_norm": 2.582564353942871, "learning_rate": 6.459774734756639e-06, "loss": 0.546, "step": 9648 }, { "epoch": 1.2561499414291293, "grad_norm": 2.8137545585632324, "learning_rate": 6.457775813585567e-06, "loss": 0.5178, "step": 9651 }, { "epoch": 1.25654041390082, "grad_norm": 3.133225679397583, "learning_rate": 6.4557766377441285e-06, "loss": 0.5144, "step": 9654 }, { "epoch": 1.2569308863725106, "grad_norm": 2.940943717956543, "learning_rate": 6.453777207581573e-06, "loss": 0.4967, "step": 9657 }, { "epoch": 1.2573213588442016, "grad_norm": 2.1819236278533936, "learning_rate": 6.451777523447197e-06, "loss": 0.439, "step": 9660 }, { "epoch": 1.2577118313158921, "grad_norm": 2.8423125743865967, "learning_rate": 6.449777585690344e-06, "loss": 0.5978, "step": 9663 }, { "epoch": 1.258102303787583, "grad_norm": 2.4884462356567383, "learning_rate": 6.447777394660394e-06, "loss": 0.5501, "step": 9666 }, { "epoch": 1.2584927762592737, "grad_norm": 2.5996439456939697, "learning_rate": 6.445776950706779e-06, "loss": 0.5206, "step": 9669 }, { "epoch": 1.2588832487309645, "grad_norm": 2.7258780002593994, "learning_rate": 6.4437762541789735e-06, "loss": 0.4822, "step": 9672 }, { "epoch": 1.2592737212026552, "grad_norm": 2.3117198944091797, "learning_rate": 6.441775305426494e-06, "loss": 0.4431, "step": 9675 }, { "epoch": 1.259664193674346, "grad_norm": 2.780622959136963, "learning_rate": 6.4397741047989e-06, "loss": 0.382, "step": 9678 }, { "epoch": 1.2600546661460368, "grad_norm": 2.5801424980163574, "learning_rate": 6.4377726526458e-06, "loss": 0.4101, "step": 9681 }, { "epoch": 1.2604451386177273, "grad_norm": 2.5400123596191406, "learning_rate": 6.435770949316843e-06, "loss": 0.4303, "step": 9684 }, { "epoch": 1.2608356110894183, "grad_norm": 2.6822738647460938, "learning_rate": 6.43376899516172e-06, "loss": 0.4843, "step": 9687 }, { "epoch": 1.2612260835611089, "grad_norm": 2.613121747970581, "learning_rate": 6.43176679053017e-06, "loss": 0.4722, "step": 9690 }, { "epoch": 1.2616165560327997, "grad_norm": 3.226043224334717, "learning_rate": 6.429764335771973e-06, "loss": 0.4816, "step": 9693 }, { "epoch": 1.2620070285044904, "grad_norm": 2.7644383907318115, "learning_rate": 6.427761631236955e-06, "loss": 0.4896, "step": 9696 }, { "epoch": 1.2623975009761812, "grad_norm": 2.632202625274658, "learning_rate": 6.4257586772749845e-06, "loss": 0.5031, "step": 9699 }, { "epoch": 1.262787973447872, "grad_norm": 3.154134750366211, "learning_rate": 6.423755474235972e-06, "loss": 0.5501, "step": 9702 }, { "epoch": 1.2631784459195627, "grad_norm": 2.597161054611206, "learning_rate": 6.421752022469874e-06, "loss": 0.592, "step": 9705 }, { "epoch": 1.2635689183912535, "grad_norm": 2.492748975753784, "learning_rate": 6.4197483223266865e-06, "loss": 0.5851, "step": 9708 }, { "epoch": 1.263959390862944, "grad_norm": 2.564765453338623, "learning_rate": 6.417744374156455e-06, "loss": 0.5529, "step": 9711 }, { "epoch": 1.2643498633346348, "grad_norm": 2.9283761978149414, "learning_rate": 6.4157401783092645e-06, "loss": 0.5119, "step": 9714 }, { "epoch": 1.2647403358063256, "grad_norm": 2.577751874923706, "learning_rate": 6.413735735135241e-06, "loss": 0.4277, "step": 9717 }, { "epoch": 1.2651308082780164, "grad_norm": 2.47666072845459, "learning_rate": 6.411731044984562e-06, "loss": 0.5617, "step": 9720 }, { "epoch": 1.2655212807497072, "grad_norm": 4.063993453979492, "learning_rate": 6.409726108207436e-06, "loss": 0.4548, "step": 9723 }, { "epoch": 1.265911753221398, "grad_norm": 2.558497428894043, "learning_rate": 6.407720925154126e-06, "loss": 0.5154, "step": 9726 }, { "epoch": 1.2663022256930887, "grad_norm": 2.8390650749206543, "learning_rate": 6.4057154961749324e-06, "loss": 0.5305, "step": 9729 }, { "epoch": 1.2666926981647793, "grad_norm": 2.5285894870758057, "learning_rate": 6.403709821620198e-06, "loss": 0.4643, "step": 9732 }, { "epoch": 1.2670831706364702, "grad_norm": 2.475567579269409, "learning_rate": 6.401703901840311e-06, "loss": 0.52, "step": 9735 }, { "epoch": 1.2674736431081608, "grad_norm": 2.560523271560669, "learning_rate": 6.3996977371857e-06, "loss": 0.5761, "step": 9738 }, { "epoch": 1.2678641155798516, "grad_norm": 2.965449810028076, "learning_rate": 6.397691328006839e-06, "loss": 0.481, "step": 9741 }, { "epoch": 1.2682545880515423, "grad_norm": 2.673515558242798, "learning_rate": 6.395684674654245e-06, "loss": 0.5183, "step": 9744 }, { "epoch": 1.2686450605232331, "grad_norm": 2.865999221801758, "learning_rate": 6.393677777478473e-06, "loss": 0.5216, "step": 9747 }, { "epoch": 1.2690355329949239, "grad_norm": 2.5293450355529785, "learning_rate": 6.391670636830126e-06, "loss": 0.496, "step": 9750 }, { "epoch": 1.2694260054666147, "grad_norm": 2.695828914642334, "learning_rate": 6.389663253059846e-06, "loss": 0.5161, "step": 9753 }, { "epoch": 1.2698164779383054, "grad_norm": 3.5010106563568115, "learning_rate": 6.3876556265183185e-06, "loss": 0.4878, "step": 9756 }, { "epoch": 1.270206950409996, "grad_norm": 3.126572847366333, "learning_rate": 6.3856477575562735e-06, "loss": 0.5395, "step": 9759 }, { "epoch": 1.270597422881687, "grad_norm": 3.661149024963379, "learning_rate": 6.38363964652448e-06, "loss": 0.5382, "step": 9762 }, { "epoch": 1.2709878953533775, "grad_norm": 2.6320817470550537, "learning_rate": 6.381631293773751e-06, "loss": 0.5113, "step": 9765 }, { "epoch": 1.2713783678250683, "grad_norm": 2.5496699810028076, "learning_rate": 6.3796226996549404e-06, "loss": 0.5602, "step": 9768 }, { "epoch": 1.271768840296759, "grad_norm": 2.5300614833831787, "learning_rate": 6.3776138645189475e-06, "loss": 0.5367, "step": 9771 }, { "epoch": 1.2721593127684498, "grad_norm": 2.5111279487609863, "learning_rate": 6.37560478871671e-06, "loss": 0.4835, "step": 9774 }, { "epoch": 1.2725497852401406, "grad_norm": 2.6758015155792236, "learning_rate": 6.37359547259921e-06, "loss": 0.5407, "step": 9777 }, { "epoch": 1.2729402577118314, "grad_norm": 2.8629322052001953, "learning_rate": 6.371585916517471e-06, "loss": 0.4738, "step": 9780 }, { "epoch": 1.2733307301835222, "grad_norm": 2.585297107696533, "learning_rate": 6.3695761208225585e-06, "loss": 0.496, "step": 9783 }, { "epoch": 1.2737212026552127, "grad_norm": 3.2529196739196777, "learning_rate": 6.3675660858655765e-06, "loss": 0.4647, "step": 9786 }, { "epoch": 1.2741116751269035, "grad_norm": 2.405118703842163, "learning_rate": 6.3655558119976765e-06, "loss": 0.5462, "step": 9789 }, { "epoch": 1.2745021475985943, "grad_norm": 2.579508066177368, "learning_rate": 6.363545299570051e-06, "loss": 0.4761, "step": 9792 }, { "epoch": 1.274892620070285, "grad_norm": 2.9745607376098633, "learning_rate": 6.361534548933928e-06, "loss": 0.5342, "step": 9795 }, { "epoch": 1.2752830925419758, "grad_norm": 2.931020975112915, "learning_rate": 6.359523560440585e-06, "loss": 0.4915, "step": 9798 }, { "epoch": 1.2756735650136666, "grad_norm": 2.9338982105255127, "learning_rate": 6.357512334441336e-06, "loss": 0.6454, "step": 9801 }, { "epoch": 1.2760640374853573, "grad_norm": 2.669160842895508, "learning_rate": 6.355500871287538e-06, "loss": 0.4672, "step": 9804 }, { "epoch": 1.276454509957048, "grad_norm": 2.584463119506836, "learning_rate": 6.353489171330588e-06, "loss": 0.5323, "step": 9807 }, { "epoch": 1.276844982428739, "grad_norm": 2.854555606842041, "learning_rate": 6.351477234921928e-06, "loss": 0.4001, "step": 9810 }, { "epoch": 1.2772354549004294, "grad_norm": 3.679852247238159, "learning_rate": 6.349465062413038e-06, "loss": 0.5706, "step": 9813 }, { "epoch": 1.2776259273721202, "grad_norm": 2.8256568908691406, "learning_rate": 6.34745265415544e-06, "loss": 0.4925, "step": 9816 }, { "epoch": 1.278016399843811, "grad_norm": 3.215463876724243, "learning_rate": 6.3454400105006985e-06, "loss": 0.5522, "step": 9819 }, { "epoch": 1.2784068723155018, "grad_norm": 2.2167320251464844, "learning_rate": 6.343427131800417e-06, "loss": 0.4712, "step": 9822 }, { "epoch": 1.2787973447871925, "grad_norm": 2.5765607357025146, "learning_rate": 6.341414018406242e-06, "loss": 0.5017, "step": 9825 }, { "epoch": 1.2791878172588833, "grad_norm": 2.5398025512695312, "learning_rate": 6.3394006706698615e-06, "loss": 0.4501, "step": 9828 }, { "epoch": 1.279578289730574, "grad_norm": 2.628095865249634, "learning_rate": 6.337387088943e-06, "loss": 0.5367, "step": 9831 }, { "epoch": 1.2799687622022646, "grad_norm": 2.486548662185669, "learning_rate": 6.335373273577429e-06, "loss": 0.5093, "step": 9834 }, { "epoch": 1.2803592346739554, "grad_norm": 2.8743481636047363, "learning_rate": 6.333359224924955e-06, "loss": 0.4291, "step": 9837 }, { "epoch": 1.2807497071456462, "grad_norm": 2.8362040519714355, "learning_rate": 6.331344943337428e-06, "loss": 0.4317, "step": 9840 }, { "epoch": 1.281140179617337, "grad_norm": 2.6773505210876465, "learning_rate": 6.329330429166741e-06, "loss": 0.4869, "step": 9843 }, { "epoch": 1.2815306520890277, "grad_norm": 2.4184865951538086, "learning_rate": 6.327315682764825e-06, "loss": 0.5656, "step": 9846 }, { "epoch": 1.2819211245607185, "grad_norm": 2.5900416374206543, "learning_rate": 6.325300704483653e-06, "loss": 0.4823, "step": 9849 }, { "epoch": 1.2823115970324093, "grad_norm": 2.582472085952759, "learning_rate": 6.3232854946752345e-06, "loss": 0.4939, "step": 9852 }, { "epoch": 1.2827020695041, "grad_norm": 2.6832892894744873, "learning_rate": 6.321270053691624e-06, "loss": 0.499, "step": 9855 }, { "epoch": 1.2830925419757908, "grad_norm": 2.630359649658203, "learning_rate": 6.319254381884914e-06, "loss": 0.4966, "step": 9858 }, { "epoch": 1.2834830144474814, "grad_norm": 2.7337570190429688, "learning_rate": 6.317238479607239e-06, "loss": 0.4762, "step": 9861 }, { "epoch": 1.2838734869191721, "grad_norm": 2.502153158187866, "learning_rate": 6.315222347210773e-06, "loss": 0.5532, "step": 9864 }, { "epoch": 1.284263959390863, "grad_norm": 2.532254934310913, "learning_rate": 6.31320598504773e-06, "loss": 0.5093, "step": 9867 }, { "epoch": 1.2846544318625537, "grad_norm": 2.7918901443481445, "learning_rate": 6.311189393470364e-06, "loss": 0.5242, "step": 9870 }, { "epoch": 1.2850449043342445, "grad_norm": 2.472677230834961, "learning_rate": 6.309172572830969e-06, "loss": 0.4492, "step": 9873 }, { "epoch": 1.2854353768059352, "grad_norm": 2.5617218017578125, "learning_rate": 6.30715552348188e-06, "loss": 0.4832, "step": 9876 }, { "epoch": 1.285825849277626, "grad_norm": 2.8872265815734863, "learning_rate": 6.30513824577547e-06, "loss": 0.5347, "step": 9879 }, { "epoch": 1.2862163217493165, "grad_norm": 2.543060302734375, "learning_rate": 6.3031207400641535e-06, "loss": 0.553, "step": 9882 }, { "epoch": 1.2866067942210075, "grad_norm": 2.5379858016967773, "learning_rate": 6.301103006700388e-06, "loss": 0.4914, "step": 9885 }, { "epoch": 1.286997266692698, "grad_norm": 2.7542781829833984, "learning_rate": 6.299085046036662e-06, "loss": 0.4767, "step": 9888 }, { "epoch": 1.2873877391643889, "grad_norm": 2.86118745803833, "learning_rate": 6.297066858425512e-06, "loss": 0.4647, "step": 9891 }, { "epoch": 1.2877782116360796, "grad_norm": 4.252162456512451, "learning_rate": 6.29504844421951e-06, "loss": 0.4904, "step": 9894 }, { "epoch": 1.2881686841077704, "grad_norm": 3.537123441696167, "learning_rate": 6.2930298037712704e-06, "loss": 0.5455, "step": 9897 }, { "epoch": 1.2885591565794612, "grad_norm": 3.0261664390563965, "learning_rate": 6.2910109374334434e-06, "loss": 0.5058, "step": 9900 }, { "epoch": 1.288949629051152, "grad_norm": 2.73046612739563, "learning_rate": 6.288991845558721e-06, "loss": 0.5526, "step": 9903 }, { "epoch": 1.2893401015228427, "grad_norm": 2.695645332336426, "learning_rate": 6.286972528499835e-06, "loss": 0.6358, "step": 9906 }, { "epoch": 1.2897305739945333, "grad_norm": 2.4671976566314697, "learning_rate": 6.284952986609556e-06, "loss": 0.5519, "step": 9909 }, { "epoch": 1.290121046466224, "grad_norm": 2.7098228931427, "learning_rate": 6.282933220240695e-06, "loss": 0.4461, "step": 9912 }, { "epoch": 1.2905115189379148, "grad_norm": 2.5738272666931152, "learning_rate": 6.280913229746096e-06, "loss": 0.4398, "step": 9915 }, { "epoch": 1.2909019914096056, "grad_norm": 2.765180826187134, "learning_rate": 6.278893015478652e-06, "loss": 0.6561, "step": 9918 }, { "epoch": 1.2912924638812964, "grad_norm": 2.4514658451080322, "learning_rate": 6.27687257779129e-06, "loss": 0.4876, "step": 9921 }, { "epoch": 1.2916829363529871, "grad_norm": 2.56715726852417, "learning_rate": 6.274851917036971e-06, "loss": 0.459, "step": 9924 }, { "epoch": 1.292073408824678, "grad_norm": 2.630765438079834, "learning_rate": 6.272831033568708e-06, "loss": 0.4325, "step": 9927 }, { "epoch": 1.2924638812963687, "grad_norm": 2.567124843597412, "learning_rate": 6.27080992773954e-06, "loss": 0.4577, "step": 9930 }, { "epoch": 1.2928543537680595, "grad_norm": 2.8632030487060547, "learning_rate": 6.26878859990255e-06, "loss": 0.5061, "step": 9933 }, { "epoch": 1.29324482623975, "grad_norm": 3.047332525253296, "learning_rate": 6.266767050410862e-06, "loss": 0.4394, "step": 9936 }, { "epoch": 1.2936352987114408, "grad_norm": 2.710806131362915, "learning_rate": 6.264745279617634e-06, "loss": 0.4752, "step": 9939 }, { "epoch": 1.2940257711831316, "grad_norm": 2.510077714920044, "learning_rate": 6.262723287876068e-06, "loss": 0.4945, "step": 9942 }, { "epoch": 1.2944162436548223, "grad_norm": 3.1183907985687256, "learning_rate": 6.260701075539397e-06, "loss": 0.4816, "step": 9945 }, { "epoch": 1.294806716126513, "grad_norm": 2.8329877853393555, "learning_rate": 6.258678642960902e-06, "loss": 0.4994, "step": 9948 }, { "epoch": 1.2951971885982039, "grad_norm": 2.953737735748291, "learning_rate": 6.256655990493896e-06, "loss": 0.4451, "step": 9951 }, { "epoch": 1.2955876610698946, "grad_norm": 2.770921468734741, "learning_rate": 6.254633118491732e-06, "loss": 0.5525, "step": 9954 }, { "epoch": 1.2959781335415852, "grad_norm": 2.6168859004974365, "learning_rate": 6.252610027307803e-06, "loss": 0.4346, "step": 9957 }, { "epoch": 1.2963686060132762, "grad_norm": 2.5253684520721436, "learning_rate": 6.250586717295535e-06, "loss": 0.4968, "step": 9960 }, { "epoch": 1.2967590784849667, "grad_norm": 2.5443472862243652, "learning_rate": 6.248563188808401e-06, "loss": 0.4767, "step": 9963 }, { "epoch": 1.2971495509566575, "grad_norm": 2.66750431060791, "learning_rate": 6.246539442199901e-06, "loss": 0.5305, "step": 9966 }, { "epoch": 1.2975400234283483, "grad_norm": 3.3093185424804688, "learning_rate": 6.244515477823585e-06, "loss": 0.5501, "step": 9969 }, { "epoch": 1.297930495900039, "grad_norm": 2.5666654109954834, "learning_rate": 6.242491296033033e-06, "loss": 0.5458, "step": 9972 }, { "epoch": 1.2983209683717298, "grad_norm": 2.224759340286255, "learning_rate": 6.240466897181865e-06, "loss": 0.4821, "step": 9975 }, { "epoch": 1.2987114408434206, "grad_norm": 2.936877727508545, "learning_rate": 6.23844228162374e-06, "loss": 0.4621, "step": 9978 }, { "epoch": 1.2991019133151114, "grad_norm": 2.833364248275757, "learning_rate": 6.236417449712353e-06, "loss": 0.4913, "step": 9981 }, { "epoch": 1.299492385786802, "grad_norm": 2.554704189300537, "learning_rate": 6.23439240180144e-06, "loss": 0.4368, "step": 9984 }, { "epoch": 1.2998828582584927, "grad_norm": 2.432718276977539, "learning_rate": 6.232367138244768e-06, "loss": 0.5568, "step": 9987 }, { "epoch": 1.3002733307301835, "grad_norm": 2.5396578311920166, "learning_rate": 6.230341659396152e-06, "loss": 0.5137, "step": 9990 }, { "epoch": 1.3006638032018742, "grad_norm": 2.720970392227173, "learning_rate": 6.228315965609437e-06, "loss": 0.474, "step": 9993 }, { "epoch": 1.301054275673565, "grad_norm": 3.310605525970459, "learning_rate": 6.226290057238506e-06, "loss": 0.4991, "step": 9996 }, { "epoch": 1.3014447481452558, "grad_norm": 2.8268284797668457, "learning_rate": 6.224263934637281e-06, "loss": 0.3968, "step": 9999 }, { "epoch": 1.3018352206169466, "grad_norm": 2.8904576301574707, "learning_rate": 6.222237598159723e-06, "loss": 0.5225, "step": 10002 }, { "epoch": 1.3022256930886371, "grad_norm": 2.540018081665039, "learning_rate": 6.220211048159826e-06, "loss": 0.4189, "step": 10005 }, { "epoch": 1.302616165560328, "grad_norm": 2.706920862197876, "learning_rate": 6.2181842849916284e-06, "loss": 0.5126, "step": 10008 }, { "epoch": 1.3030066380320187, "grad_norm": 3.605525016784668, "learning_rate": 6.216157309009198e-06, "loss": 0.4911, "step": 10011 }, { "epoch": 1.3033971105037094, "grad_norm": 2.4646642208099365, "learning_rate": 6.214130120566643e-06, "loss": 0.4261, "step": 10014 }, { "epoch": 1.3037875829754002, "grad_norm": 2.7980430126190186, "learning_rate": 6.212102720018112e-06, "loss": 0.483, "step": 10017 }, { "epoch": 1.304178055447091, "grad_norm": 2.9838016033172607, "learning_rate": 6.210075107717785e-06, "loss": 0.4596, "step": 10020 }, { "epoch": 1.3045685279187818, "grad_norm": 2.551286220550537, "learning_rate": 6.208047284019881e-06, "loss": 0.4226, "step": 10023 }, { "epoch": 1.3049590003904725, "grad_norm": 2.640700340270996, "learning_rate": 6.20601924927866e-06, "loss": 0.4905, "step": 10026 }, { "epoch": 1.3053494728621633, "grad_norm": 2.759453296661377, "learning_rate": 6.203991003848411e-06, "loss": 0.467, "step": 10029 }, { "epoch": 1.3057399453338538, "grad_norm": 2.907013177871704, "learning_rate": 6.201962548083468e-06, "loss": 0.487, "step": 10032 }, { "epoch": 1.3061304178055448, "grad_norm": 2.470888376235962, "learning_rate": 6.199933882338196e-06, "loss": 0.4687, "step": 10035 }, { "epoch": 1.3065208902772354, "grad_norm": 2.576772451400757, "learning_rate": 6.197905006966999e-06, "loss": 0.425, "step": 10038 }, { "epoch": 1.3069113627489262, "grad_norm": 2.713982582092285, "learning_rate": 6.195875922324318e-06, "loss": 0.4787, "step": 10041 }, { "epoch": 1.307301835220617, "grad_norm": 2.516087532043457, "learning_rate": 6.1938466287646285e-06, "loss": 0.4707, "step": 10044 }, { "epoch": 1.3076923076923077, "grad_norm": 2.569885492324829, "learning_rate": 6.191817126642444e-06, "loss": 0.458, "step": 10047 }, { "epoch": 1.3080827801639985, "grad_norm": 3.064462184906006, "learning_rate": 6.189787416312315e-06, "loss": 0.5624, "step": 10050 }, { "epoch": 1.3084732526356893, "grad_norm": 2.9090516567230225, "learning_rate": 6.187757498128827e-06, "loss": 0.5414, "step": 10053 }, { "epoch": 1.30886372510738, "grad_norm": 2.532334566116333, "learning_rate": 6.185727372446604e-06, "loss": 0.5469, "step": 10056 }, { "epoch": 1.3092541975790706, "grad_norm": 2.665684223175049, "learning_rate": 6.183697039620302e-06, "loss": 0.5266, "step": 10059 }, { "epoch": 1.3096446700507614, "grad_norm": 2.6369564533233643, "learning_rate": 6.181666500004617e-06, "loss": 0.4774, "step": 10062 }, { "epoch": 1.3100351425224521, "grad_norm": 2.548985481262207, "learning_rate": 6.179635753954283e-06, "loss": 0.4342, "step": 10065 }, { "epoch": 1.310425614994143, "grad_norm": 2.7811813354492188, "learning_rate": 6.177604801824062e-06, "loss": 0.4539, "step": 10068 }, { "epoch": 1.3108160874658337, "grad_norm": 2.4228222370147705, "learning_rate": 6.17557364396876e-06, "loss": 0.446, "step": 10071 }, { "epoch": 1.3112065599375244, "grad_norm": 2.639486789703369, "learning_rate": 6.173542280743214e-06, "loss": 0.5583, "step": 10074 }, { "epoch": 1.3115970324092152, "grad_norm": 3.5657029151916504, "learning_rate": 6.171510712502303e-06, "loss": 0.5388, "step": 10077 }, { "epoch": 1.3119875048809058, "grad_norm": 2.815169095993042, "learning_rate": 6.169478939600933e-06, "loss": 0.5293, "step": 10080 }, { "epoch": 1.3123779773525968, "grad_norm": 2.9462995529174805, "learning_rate": 6.167446962394054e-06, "loss": 0.5047, "step": 10083 }, { "epoch": 1.3127684498242873, "grad_norm": 2.7291252613067627, "learning_rate": 6.165414781236647e-06, "loss": 0.4032, "step": 10086 }, { "epoch": 1.313158922295978, "grad_norm": 2.6978507041931152, "learning_rate": 6.163382396483728e-06, "loss": 0.5027, "step": 10089 }, { "epoch": 1.3135493947676689, "grad_norm": 2.74324369430542, "learning_rate": 6.161349808490351e-06, "loss": 0.4742, "step": 10092 }, { "epoch": 1.3139398672393596, "grad_norm": 2.520897388458252, "learning_rate": 6.159317017611607e-06, "loss": 0.4695, "step": 10095 }, { "epoch": 1.3143303397110504, "grad_norm": 2.367314338684082, "learning_rate": 6.157284024202619e-06, "loss": 0.4521, "step": 10098 }, { "epoch": 1.3147208121827412, "grad_norm": 2.571027994155884, "learning_rate": 6.155250828618547e-06, "loss": 0.4327, "step": 10101 }, { "epoch": 1.315111284654432, "grad_norm": 2.6807379722595215, "learning_rate": 6.153217431214583e-06, "loss": 0.5297, "step": 10104 }, { "epoch": 1.3155017571261225, "grad_norm": 2.974998950958252, "learning_rate": 6.1511838323459624e-06, "loss": 0.5418, "step": 10107 }, { "epoch": 1.3158922295978135, "grad_norm": 2.9916090965270996, "learning_rate": 6.149150032367946e-06, "loss": 0.4774, "step": 10110 }, { "epoch": 1.316282702069504, "grad_norm": 3.1814825534820557, "learning_rate": 6.147116031635838e-06, "loss": 0.5896, "step": 10113 }, { "epoch": 1.3166731745411948, "grad_norm": 2.669076681137085, "learning_rate": 6.145081830504971e-06, "loss": 0.4869, "step": 10116 }, { "epoch": 1.3170636470128856, "grad_norm": 2.4291157722473145, "learning_rate": 6.1430474293307175e-06, "loss": 0.4442, "step": 10119 }, { "epoch": 1.3174541194845764, "grad_norm": 2.7454960346221924, "learning_rate": 6.141012828468484e-06, "loss": 0.506, "step": 10122 }, { "epoch": 1.3178445919562671, "grad_norm": 2.744112730026245, "learning_rate": 6.138978028273709e-06, "loss": 0.5368, "step": 10125 }, { "epoch": 1.318235064427958, "grad_norm": 2.635687828063965, "learning_rate": 6.13694302910187e-06, "loss": 0.4753, "step": 10128 }, { "epoch": 1.3186255368996487, "grad_norm": 2.746645212173462, "learning_rate": 6.134907831308473e-06, "loss": 0.4922, "step": 10131 }, { "epoch": 1.3190160093713392, "grad_norm": 2.9933664798736572, "learning_rate": 6.132872435249067e-06, "loss": 0.5043, "step": 10134 }, { "epoch": 1.31940648184303, "grad_norm": 2.8414406776428223, "learning_rate": 6.130836841279228e-06, "loss": 0.4989, "step": 10137 }, { "epoch": 1.3197969543147208, "grad_norm": 2.6284008026123047, "learning_rate": 6.128801049754572e-06, "loss": 0.5045, "step": 10140 }, { "epoch": 1.3201874267864115, "grad_norm": 2.5800864696502686, "learning_rate": 6.1267650610307496e-06, "loss": 0.4773, "step": 10143 }, { "epoch": 1.3205778992581023, "grad_norm": 3.2710795402526855, "learning_rate": 6.1247288754634395e-06, "loss": 0.4755, "step": 10146 }, { "epoch": 1.320968371729793, "grad_norm": 2.9163713455200195, "learning_rate": 6.122692493408362e-06, "loss": 0.5315, "step": 10149 }, { "epoch": 1.3213588442014839, "grad_norm": 3.6550145149230957, "learning_rate": 6.120655915221268e-06, "loss": 0.5406, "step": 10152 }, { "epoch": 1.3217493166731744, "grad_norm": 2.446315050125122, "learning_rate": 6.118619141257941e-06, "loss": 0.4987, "step": 10155 }, { "epoch": 1.3221397891448654, "grad_norm": 2.639047622680664, "learning_rate": 6.116582171874204e-06, "loss": 0.497, "step": 10158 }, { "epoch": 1.322530261616556, "grad_norm": 3.0936222076416016, "learning_rate": 6.11454500742591e-06, "loss": 0.451, "step": 10161 }, { "epoch": 1.3229207340882467, "grad_norm": 2.499833106994629, "learning_rate": 6.112507648268951e-06, "loss": 0.5365, "step": 10164 }, { "epoch": 1.3233112065599375, "grad_norm": 2.466047763824463, "learning_rate": 6.110470094759243e-06, "loss": 0.4519, "step": 10167 }, { "epoch": 1.3237016790316283, "grad_norm": 2.9250972270965576, "learning_rate": 6.1084323472527465e-06, "loss": 0.4602, "step": 10170 }, { "epoch": 1.324092151503319, "grad_norm": 2.504556894302368, "learning_rate": 6.106394406105451e-06, "loss": 0.4688, "step": 10173 }, { "epoch": 1.3244826239750098, "grad_norm": 3.0200345516204834, "learning_rate": 6.104356271673379e-06, "loss": 0.4318, "step": 10176 }, { "epoch": 1.3248730964467006, "grad_norm": 2.429672956466675, "learning_rate": 6.102317944312592e-06, "loss": 0.4749, "step": 10179 }, { "epoch": 1.3252635689183911, "grad_norm": 2.6296794414520264, "learning_rate": 6.1002794243791774e-06, "loss": 0.4971, "step": 10182 }, { "epoch": 1.325654041390082, "grad_norm": 2.859323024749756, "learning_rate": 6.098240712229263e-06, "loss": 0.553, "step": 10185 }, { "epoch": 1.3260445138617727, "grad_norm": 2.4780797958374023, "learning_rate": 6.096201808219005e-06, "loss": 0.4585, "step": 10188 }, { "epoch": 1.3264349863334635, "grad_norm": 2.843698263168335, "learning_rate": 6.094162712704599e-06, "loss": 0.5146, "step": 10191 }, { "epoch": 1.3268254588051542, "grad_norm": 2.588787317276001, "learning_rate": 6.0921234260422675e-06, "loss": 0.5512, "step": 10194 }, { "epoch": 1.327215931276845, "grad_norm": 2.7030463218688965, "learning_rate": 6.090083948588271e-06, "loss": 0.4705, "step": 10197 }, { "epoch": 1.3276064037485358, "grad_norm": 2.659209966659546, "learning_rate": 6.088044280698903e-06, "loss": 0.5182, "step": 10200 }, { "epoch": 1.3279968762202266, "grad_norm": 2.3089091777801514, "learning_rate": 6.086004422730487e-06, "loss": 0.5118, "step": 10203 }, { "epoch": 1.3283873486919173, "grad_norm": 3.2967066764831543, "learning_rate": 6.083964375039384e-06, "loss": 0.4932, "step": 10206 }, { "epoch": 1.3287778211636079, "grad_norm": 3.2022552490234375, "learning_rate": 6.081924137981984e-06, "loss": 0.5659, "step": 10209 }, { "epoch": 1.3291682936352986, "grad_norm": 2.5667223930358887, "learning_rate": 6.079883711914713e-06, "loss": 0.4576, "step": 10212 }, { "epoch": 1.3295587661069894, "grad_norm": 2.711458444595337, "learning_rate": 6.07784309719403e-06, "loss": 0.4464, "step": 10215 }, { "epoch": 1.3299492385786802, "grad_norm": 2.9583401679992676, "learning_rate": 6.075802294176425e-06, "loss": 0.5373, "step": 10218 }, { "epoch": 1.330339711050371, "grad_norm": 4.091579914093018, "learning_rate": 6.073761303218423e-06, "loss": 0.5137, "step": 10221 }, { "epoch": 1.3307301835220617, "grad_norm": 2.6844708919525146, "learning_rate": 6.071720124676579e-06, "loss": 0.6416, "step": 10224 }, { "epoch": 1.3311206559937525, "grad_norm": 2.5595126152038574, "learning_rate": 6.069678758907486e-06, "loss": 0.55, "step": 10227 }, { "epoch": 1.331511128465443, "grad_norm": 2.9795103073120117, "learning_rate": 6.067637206267761e-06, "loss": 0.4621, "step": 10230 }, { "epoch": 1.331901600937134, "grad_norm": 2.5117387771606445, "learning_rate": 6.065595467114064e-06, "loss": 0.4778, "step": 10233 }, { "epoch": 1.3322920734088246, "grad_norm": 2.6409218311309814, "learning_rate": 6.063553541803081e-06, "loss": 0.4728, "step": 10236 }, { "epoch": 1.3326825458805154, "grad_norm": 2.658996105194092, "learning_rate": 6.061511430691529e-06, "loss": 0.4789, "step": 10239 }, { "epoch": 1.3330730183522062, "grad_norm": 2.801865339279175, "learning_rate": 6.059469134136167e-06, "loss": 0.4593, "step": 10242 }, { "epoch": 1.333463490823897, "grad_norm": 2.8045008182525635, "learning_rate": 6.057426652493773e-06, "loss": 0.5459, "step": 10245 }, { "epoch": 1.3338539632955877, "grad_norm": 2.777100086212158, "learning_rate": 6.055383986121169e-06, "loss": 0.578, "step": 10248 }, { "epoch": 1.3342444357672785, "grad_norm": 2.5679752826690674, "learning_rate": 6.053341135375202e-06, "loss": 0.4892, "step": 10251 }, { "epoch": 1.3346349082389692, "grad_norm": 2.5535635948181152, "learning_rate": 6.051298100612755e-06, "loss": 0.5193, "step": 10254 }, { "epoch": 1.3350253807106598, "grad_norm": 2.5816335678100586, "learning_rate": 6.049254882190742e-06, "loss": 0.5621, "step": 10257 }, { "epoch": 1.3354158531823506, "grad_norm": 2.74711275100708, "learning_rate": 6.047211480466105e-06, "loss": 0.5361, "step": 10260 }, { "epoch": 1.3358063256540413, "grad_norm": 2.556450605392456, "learning_rate": 6.045167895795829e-06, "loss": 0.4354, "step": 10263 }, { "epoch": 1.336196798125732, "grad_norm": 2.905630588531494, "learning_rate": 6.043124128536919e-06, "loss": 0.4784, "step": 10266 }, { "epoch": 1.3365872705974229, "grad_norm": 3.139017343521118, "learning_rate": 6.041080179046418e-06, "loss": 0.5801, "step": 10269 }, { "epoch": 1.3369777430691137, "grad_norm": 2.844486713409424, "learning_rate": 6.039036047681402e-06, "loss": 0.5141, "step": 10272 }, { "epoch": 1.3373682155408044, "grad_norm": 2.940715789794922, "learning_rate": 6.036991734798971e-06, "loss": 0.5246, "step": 10275 }, { "epoch": 1.3377586880124952, "grad_norm": 2.516834020614624, "learning_rate": 6.034947240756268e-06, "loss": 0.4542, "step": 10278 }, { "epoch": 1.338149160484186, "grad_norm": 2.7946300506591797, "learning_rate": 6.032902565910456e-06, "loss": 0.5008, "step": 10281 }, { "epoch": 1.3385396329558765, "grad_norm": 2.5001931190490723, "learning_rate": 6.030857710618743e-06, "loss": 0.4844, "step": 10284 }, { "epoch": 1.3389301054275673, "grad_norm": 2.5661776065826416, "learning_rate": 6.0288126752383535e-06, "loss": 0.4938, "step": 10287 }, { "epoch": 1.339320577899258, "grad_norm": 2.576228618621826, "learning_rate": 6.026767460126555e-06, "loss": 0.5228, "step": 10290 }, { "epoch": 1.3397110503709488, "grad_norm": 2.4661436080932617, "learning_rate": 6.0247220656406415e-06, "loss": 0.4381, "step": 10293 }, { "epoch": 1.3401015228426396, "grad_norm": 2.5111136436462402, "learning_rate": 6.022676492137939e-06, "loss": 0.5015, "step": 10296 }, { "epoch": 1.3404919953143304, "grad_norm": 2.6229360103607178, "learning_rate": 6.020630739975803e-06, "loss": 0.4764, "step": 10299 }, { "epoch": 1.3408824677860212, "grad_norm": 2.8185629844665527, "learning_rate": 6.018584809511625e-06, "loss": 0.5273, "step": 10302 }, { "epoch": 1.3412729402577117, "grad_norm": 2.5472960472106934, "learning_rate": 6.0165387011028235e-06, "loss": 0.4626, "step": 10305 }, { "epoch": 1.3416634127294027, "grad_norm": 2.914748430252075, "learning_rate": 6.01449241510685e-06, "loss": 0.5434, "step": 10308 }, { "epoch": 1.3420538852010933, "grad_norm": 2.704376220703125, "learning_rate": 6.012445951881185e-06, "loss": 0.5164, "step": 10311 }, { "epoch": 1.342444357672784, "grad_norm": 2.3576114177703857, "learning_rate": 6.010399311783343e-06, "loss": 0.4096, "step": 10314 }, { "epoch": 1.3428348301444748, "grad_norm": 2.456395387649536, "learning_rate": 6.008352495170866e-06, "loss": 0.4971, "step": 10317 }, { "epoch": 1.3432253026161656, "grad_norm": 2.8089687824249268, "learning_rate": 6.006305502401329e-06, "loss": 0.4943, "step": 10320 }, { "epoch": 1.3436157750878563, "grad_norm": 2.7782235145568848, "learning_rate": 6.0042583338323376e-06, "loss": 0.5548, "step": 10323 }, { "epoch": 1.3440062475595471, "grad_norm": 2.7142183780670166, "learning_rate": 6.002210989821528e-06, "loss": 0.5179, "step": 10326 }, { "epoch": 1.344396720031238, "grad_norm": 2.6159982681274414, "learning_rate": 6.0001634707265675e-06, "loss": 0.4984, "step": 10329 }, { "epoch": 1.3447871925029284, "grad_norm": 2.9001638889312744, "learning_rate": 5.998115776905152e-06, "loss": 0.5373, "step": 10332 }, { "epoch": 1.3451776649746192, "grad_norm": 2.7461464405059814, "learning_rate": 5.996067908715012e-06, "loss": 0.5562, "step": 10335 }, { "epoch": 1.34556813744631, "grad_norm": 2.5092601776123047, "learning_rate": 5.994019866513901e-06, "loss": 0.5171, "step": 10338 }, { "epoch": 1.3459586099180008, "grad_norm": 2.727618455886841, "learning_rate": 5.991971650659612e-06, "loss": 0.4846, "step": 10341 }, { "epoch": 1.3463490823896915, "grad_norm": 2.589345932006836, "learning_rate": 5.9899232615099626e-06, "loss": 0.5663, "step": 10344 }, { "epoch": 1.3467395548613823, "grad_norm": 3.144259452819824, "learning_rate": 5.9878746994227996e-06, "loss": 0.6201, "step": 10347 }, { "epoch": 1.347130027333073, "grad_norm": 2.4682085514068604, "learning_rate": 5.985825964756008e-06, "loss": 0.4205, "step": 10350 }, { "epoch": 1.3475204998047636, "grad_norm": 3.090812921524048, "learning_rate": 5.9837770578674925e-06, "loss": 0.4667, "step": 10353 }, { "epoch": 1.3479109722764546, "grad_norm": 2.575993299484253, "learning_rate": 5.981727979115195e-06, "loss": 0.4956, "step": 10356 }, { "epoch": 1.3483014447481452, "grad_norm": 2.3932414054870605, "learning_rate": 5.979678728857086e-06, "loss": 0.4554, "step": 10359 }, { "epoch": 1.348691917219836, "grad_norm": 2.7043232917785645, "learning_rate": 5.977629307451162e-06, "loss": 0.5633, "step": 10362 }, { "epoch": 1.3490823896915267, "grad_norm": 2.7044363021850586, "learning_rate": 5.975579715255455e-06, "loss": 0.431, "step": 10365 }, { "epoch": 1.3494728621632175, "grad_norm": 2.512634038925171, "learning_rate": 5.973529952628023e-06, "loss": 0.4995, "step": 10368 }, { "epoch": 1.3498633346349083, "grad_norm": 3.501483917236328, "learning_rate": 5.97148001992696e-06, "loss": 0.4722, "step": 10371 }, { "epoch": 1.350253807106599, "grad_norm": 2.4441893100738525, "learning_rate": 5.969429917510378e-06, "loss": 0.5325, "step": 10374 }, { "epoch": 1.3506442795782898, "grad_norm": 2.3490185737609863, "learning_rate": 5.9673796457364295e-06, "loss": 0.456, "step": 10377 }, { "epoch": 1.3510347520499804, "grad_norm": 3.762827157974243, "learning_rate": 5.965329204963292e-06, "loss": 0.5797, "step": 10380 }, { "epoch": 1.3514252245216714, "grad_norm": 3.0791232585906982, "learning_rate": 5.9632785955491735e-06, "loss": 0.4798, "step": 10383 }, { "epoch": 1.351815696993362, "grad_norm": 2.7821664810180664, "learning_rate": 5.961227817852311e-06, "loss": 0.5121, "step": 10386 }, { "epoch": 1.3522061694650527, "grad_norm": 2.4701087474823, "learning_rate": 5.959176872230969e-06, "loss": 0.4682, "step": 10389 }, { "epoch": 1.3525966419367434, "grad_norm": 2.5216877460479736, "learning_rate": 5.957125759043449e-06, "loss": 0.4486, "step": 10392 }, { "epoch": 1.3529871144084342, "grad_norm": 2.7570204734802246, "learning_rate": 5.955074478648068e-06, "loss": 0.5457, "step": 10395 }, { "epoch": 1.353377586880125, "grad_norm": 2.721740484237671, "learning_rate": 5.9530230314031875e-06, "loss": 0.4864, "step": 10398 }, { "epoch": 1.3537680593518158, "grad_norm": 2.7100112438201904, "learning_rate": 5.950971417667189e-06, "loss": 0.4506, "step": 10401 }, { "epoch": 1.3541585318235065, "grad_norm": 3.179159641265869, "learning_rate": 5.948919637798482e-06, "loss": 0.5391, "step": 10404 }, { "epoch": 1.354549004295197, "grad_norm": 2.8454229831695557, "learning_rate": 5.946867692155511e-06, "loss": 0.507, "step": 10407 }, { "epoch": 1.3549394767668879, "grad_norm": 2.766425609588623, "learning_rate": 5.944815581096746e-06, "loss": 0.5161, "step": 10410 }, { "epoch": 1.3553299492385786, "grad_norm": 2.6787941455841064, "learning_rate": 5.942763304980689e-06, "loss": 0.4643, "step": 10413 }, { "epoch": 1.3557204217102694, "grad_norm": 2.679385185241699, "learning_rate": 5.940710864165863e-06, "loss": 0.5447, "step": 10416 }, { "epoch": 1.3561108941819602, "grad_norm": 2.693063497543335, "learning_rate": 5.938658259010829e-06, "loss": 0.5279, "step": 10419 }, { "epoch": 1.356501366653651, "grad_norm": 2.5846731662750244, "learning_rate": 5.936605489874172e-06, "loss": 0.5074, "step": 10422 }, { "epoch": 1.3568918391253417, "grad_norm": 2.7756001949310303, "learning_rate": 5.934552557114507e-06, "loss": 0.4502, "step": 10425 }, { "epoch": 1.3572823115970323, "grad_norm": 2.7776410579681396, "learning_rate": 5.932499461090475e-06, "loss": 0.5334, "step": 10428 }, { "epoch": 1.3576727840687233, "grad_norm": 2.835615396499634, "learning_rate": 5.930446202160749e-06, "loss": 0.5619, "step": 10431 }, { "epoch": 1.3580632565404138, "grad_norm": 2.890580177307129, "learning_rate": 5.928392780684028e-06, "loss": 0.561, "step": 10434 }, { "epoch": 1.3584537290121046, "grad_norm": 4.082695960998535, "learning_rate": 5.926339197019043e-06, "loss": 0.5714, "step": 10437 }, { "epoch": 1.3588442014837954, "grad_norm": 2.4185142517089844, "learning_rate": 5.924285451524549e-06, "loss": 0.5067, "step": 10440 }, { "epoch": 1.3592346739554861, "grad_norm": 2.5827300548553467, "learning_rate": 5.922231544559331e-06, "loss": 0.4823, "step": 10443 }, { "epoch": 1.359625146427177, "grad_norm": 2.5972402095794678, "learning_rate": 5.9201774764822e-06, "loss": 0.5133, "step": 10446 }, { "epoch": 1.3600156188988677, "grad_norm": 2.655304431915283, "learning_rate": 5.918123247652002e-06, "loss": 0.4895, "step": 10449 }, { "epoch": 1.3604060913705585, "grad_norm": 3.1130921840667725, "learning_rate": 5.916068858427604e-06, "loss": 0.5255, "step": 10452 }, { "epoch": 1.360796563842249, "grad_norm": 2.6465952396392822, "learning_rate": 5.914014309167901e-06, "loss": 0.5161, "step": 10455 }, { "epoch": 1.36118703631394, "grad_norm": 2.8651537895202637, "learning_rate": 5.911959600231825e-06, "loss": 0.4893, "step": 10458 }, { "epoch": 1.3615775087856306, "grad_norm": 2.6356594562530518, "learning_rate": 5.909904731978323e-06, "loss": 0.4806, "step": 10461 }, { "epoch": 1.3619679812573213, "grad_norm": 2.7509796619415283, "learning_rate": 5.90784970476638e-06, "loss": 0.4904, "step": 10464 }, { "epoch": 1.362358453729012, "grad_norm": 2.8054521083831787, "learning_rate": 5.905794518955002e-06, "loss": 0.5197, "step": 10467 }, { "epoch": 1.3627489262007029, "grad_norm": 2.970839262008667, "learning_rate": 5.903739174903226e-06, "loss": 0.4888, "step": 10470 }, { "epoch": 1.3631393986723936, "grad_norm": 2.5401625633239746, "learning_rate": 5.901683672970118e-06, "loss": 0.6049, "step": 10473 }, { "epoch": 1.3635298711440844, "grad_norm": 2.5715386867523193, "learning_rate": 5.89962801351477e-06, "loss": 0.4562, "step": 10476 }, { "epoch": 1.3639203436157752, "grad_norm": 2.8467347621917725, "learning_rate": 5.897572196896301e-06, "loss": 0.4964, "step": 10479 }, { "epoch": 1.3643108160874657, "grad_norm": 3.585697650909424, "learning_rate": 5.895516223473856e-06, "loss": 0.5151, "step": 10482 }, { "epoch": 1.3647012885591565, "grad_norm": 2.776897668838501, "learning_rate": 5.8934600936066115e-06, "loss": 0.5529, "step": 10485 }, { "epoch": 1.3650917610308473, "grad_norm": 2.829629421234131, "learning_rate": 5.891403807653768e-06, "loss": 0.5388, "step": 10488 }, { "epoch": 1.365482233502538, "grad_norm": 2.7404351234436035, "learning_rate": 5.889347365974554e-06, "loss": 0.4966, "step": 10491 }, { "epoch": 1.3658727059742288, "grad_norm": 2.4462528228759766, "learning_rate": 5.887290768928228e-06, "loss": 0.5372, "step": 10494 }, { "epoch": 1.3662631784459196, "grad_norm": 2.7154576778411865, "learning_rate": 5.88523401687407e-06, "loss": 0.586, "step": 10497 }, { "epoch": 1.3666536509176104, "grad_norm": 3.031604766845703, "learning_rate": 5.883177110171392e-06, "loss": 0.5067, "step": 10500 }, { "epoch": 1.367044123389301, "grad_norm": 4.211544513702393, "learning_rate": 5.881120049179529e-06, "loss": 0.5154, "step": 10503 }, { "epoch": 1.367434595860992, "grad_norm": 2.9091553688049316, "learning_rate": 5.8790628342578485e-06, "loss": 0.4859, "step": 10506 }, { "epoch": 1.3678250683326825, "grad_norm": 2.4611270427703857, "learning_rate": 5.87700546576574e-06, "loss": 0.4909, "step": 10509 }, { "epoch": 1.3682155408043732, "grad_norm": 2.56895112991333, "learning_rate": 5.874947944062621e-06, "loss": 0.491, "step": 10512 }, { "epoch": 1.368606013276064, "grad_norm": 2.8200464248657227, "learning_rate": 5.872890269507938e-06, "loss": 0.4582, "step": 10515 }, { "epoch": 1.3689964857477548, "grad_norm": 2.5796611309051514, "learning_rate": 5.870832442461161e-06, "loss": 0.432, "step": 10518 }, { "epoch": 1.3693869582194456, "grad_norm": 2.872462034225464, "learning_rate": 5.868774463281788e-06, "loss": 0.5382, "step": 10521 }, { "epoch": 1.3697774306911363, "grad_norm": 2.5397450923919678, "learning_rate": 5.866716332329343e-06, "loss": 0.4294, "step": 10524 }, { "epoch": 1.370167903162827, "grad_norm": 2.746539831161499, "learning_rate": 5.8646580499633786e-06, "loss": 0.5046, "step": 10527 }, { "epoch": 1.3705583756345177, "grad_norm": 2.5908896923065186, "learning_rate": 5.862599616543473e-06, "loss": 0.5438, "step": 10530 }, { "epoch": 1.3709488481062087, "grad_norm": 3.0889134407043457, "learning_rate": 5.860541032429227e-06, "loss": 0.5459, "step": 10533 }, { "epoch": 1.3713393205778992, "grad_norm": 2.655442953109741, "learning_rate": 5.858482297980275e-06, "loss": 0.4929, "step": 10536 }, { "epoch": 1.37172979304959, "grad_norm": 2.756154775619507, "learning_rate": 5.856423413556269e-06, "loss": 0.4623, "step": 10539 }, { "epoch": 1.3721202655212807, "grad_norm": 2.576265573501587, "learning_rate": 5.854364379516896e-06, "loss": 0.5525, "step": 10542 }, { "epoch": 1.3725107379929715, "grad_norm": 2.8419313430786133, "learning_rate": 5.852305196221864e-06, "loss": 0.5179, "step": 10545 }, { "epoch": 1.3729012104646623, "grad_norm": 2.7825493812561035, "learning_rate": 5.8502458640309055e-06, "loss": 0.4992, "step": 10548 }, { "epoch": 1.373291682936353, "grad_norm": 2.590987205505371, "learning_rate": 5.8481863833037846e-06, "loss": 0.489, "step": 10551 }, { "epoch": 1.3736821554080438, "grad_norm": 2.5935583114624023, "learning_rate": 5.846126754400285e-06, "loss": 0.5009, "step": 10554 }, { "epoch": 1.3740726278797344, "grad_norm": 3.053290605545044, "learning_rate": 5.844066977680223e-06, "loss": 0.4684, "step": 10557 }, { "epoch": 1.3744631003514252, "grad_norm": 3.1412622928619385, "learning_rate": 5.842007053503436e-06, "loss": 0.5036, "step": 10560 }, { "epoch": 1.374853572823116, "grad_norm": 2.869248390197754, "learning_rate": 5.839946982229786e-06, "loss": 0.432, "step": 10563 }, { "epoch": 1.3752440452948067, "grad_norm": 2.681100606918335, "learning_rate": 5.8378867642191675e-06, "loss": 0.5079, "step": 10566 }, { "epoch": 1.3756345177664975, "grad_norm": 2.4988043308258057, "learning_rate": 5.835826399831492e-06, "loss": 0.4901, "step": 10569 }, { "epoch": 1.3760249902381883, "grad_norm": 2.498563766479492, "learning_rate": 5.833765889426706e-06, "loss": 0.4638, "step": 10572 }, { "epoch": 1.376415462709879, "grad_norm": 2.5112462043762207, "learning_rate": 5.831705233364768e-06, "loss": 0.4716, "step": 10575 }, { "epoch": 1.3768059351815696, "grad_norm": 2.4168717861175537, "learning_rate": 5.82964443200568e-06, "loss": 0.476, "step": 10578 }, { "epoch": 1.3771964076532606, "grad_norm": 2.5891149044036865, "learning_rate": 5.827583485709453e-06, "loss": 0.5093, "step": 10581 }, { "epoch": 1.3775868801249511, "grad_norm": 2.5921523571014404, "learning_rate": 5.825522394836132e-06, "loss": 0.5532, "step": 10584 }, { "epoch": 1.377977352596642, "grad_norm": 2.529782295227051, "learning_rate": 5.823461159745786e-06, "loss": 0.4955, "step": 10587 }, { "epoch": 1.3783678250683327, "grad_norm": 2.8079707622528076, "learning_rate": 5.821399780798507e-06, "loss": 0.5489, "step": 10590 }, { "epoch": 1.3787582975400234, "grad_norm": 3.1426773071289062, "learning_rate": 5.8193382583544155e-06, "loss": 0.478, "step": 10593 }, { "epoch": 1.3791487700117142, "grad_norm": 2.480963706970215, "learning_rate": 5.817276592773651e-06, "loss": 0.4766, "step": 10596 }, { "epoch": 1.379539242483405, "grad_norm": 2.4679062366485596, "learning_rate": 5.815214784416386e-06, "loss": 0.5077, "step": 10599 }, { "epoch": 1.3799297149550958, "grad_norm": 2.7252042293548584, "learning_rate": 5.813152833642816e-06, "loss": 0.518, "step": 10602 }, { "epoch": 1.3803201874267863, "grad_norm": 2.4545648097991943, "learning_rate": 5.811090740813154e-06, "loss": 0.4882, "step": 10605 }, { "epoch": 1.380710659898477, "grad_norm": 2.6716935634613037, "learning_rate": 5.809028506287647e-06, "loss": 0.5061, "step": 10608 }, { "epoch": 1.3811011323701678, "grad_norm": 2.5040860176086426, "learning_rate": 5.806966130426561e-06, "loss": 0.4739, "step": 10611 }, { "epoch": 1.3814916048418586, "grad_norm": 2.5503172874450684, "learning_rate": 5.80490361359019e-06, "loss": 0.5359, "step": 10614 }, { "epoch": 1.3818820773135494, "grad_norm": 2.5785112380981445, "learning_rate": 5.802840956138851e-06, "loss": 0.4595, "step": 10617 }, { "epoch": 1.3822725497852402, "grad_norm": 3.1451265811920166, "learning_rate": 5.800778158432886e-06, "loss": 0.4806, "step": 10620 }, { "epoch": 1.382663022256931, "grad_norm": 2.8773903846740723, "learning_rate": 5.798715220832661e-06, "loss": 0.5106, "step": 10623 }, { "epoch": 1.3830534947286217, "grad_norm": 2.5292470455169678, "learning_rate": 5.796652143698568e-06, "loss": 0.4749, "step": 10626 }, { "epoch": 1.3834439672003125, "grad_norm": 2.4957058429718018, "learning_rate": 5.7945889273910215e-06, "loss": 0.5352, "step": 10629 }, { "epoch": 1.383834439672003, "grad_norm": 2.4294910430908203, "learning_rate": 5.79252557227046e-06, "loss": 0.5267, "step": 10632 }, { "epoch": 1.3842249121436938, "grad_norm": 3.0004422664642334, "learning_rate": 5.7904620786973476e-06, "loss": 0.4788, "step": 10635 }, { "epoch": 1.3846153846153846, "grad_norm": 2.777956962585449, "learning_rate": 5.788398447032174e-06, "loss": 0.5147, "step": 10638 }, { "epoch": 1.3850058570870754, "grad_norm": 3.005110263824463, "learning_rate": 5.78633467763545e-06, "loss": 0.5815, "step": 10641 }, { "epoch": 1.3853963295587661, "grad_norm": 2.7683937549591064, "learning_rate": 5.784270770867713e-06, "loss": 0.5067, "step": 10644 }, { "epoch": 1.385786802030457, "grad_norm": 2.3799400329589844, "learning_rate": 5.782206727089521e-06, "loss": 0.4352, "step": 10647 }, { "epoch": 1.3861772745021477, "grad_norm": 2.8313100337982178, "learning_rate": 5.780142546661461e-06, "loss": 0.4987, "step": 10650 }, { "epoch": 1.3865677469738382, "grad_norm": 2.4416072368621826, "learning_rate": 5.778078229944137e-06, "loss": 0.5242, "step": 10653 }, { "epoch": 1.3869582194455292, "grad_norm": 4.677280426025391, "learning_rate": 5.776013777298184e-06, "loss": 0.5669, "step": 10656 }, { "epoch": 1.3873486919172198, "grad_norm": 2.549098014831543, "learning_rate": 5.7739491890842556e-06, "loss": 0.4694, "step": 10659 }, { "epoch": 1.3877391643889105, "grad_norm": 2.7043087482452393, "learning_rate": 5.771884465663033e-06, "loss": 0.5064, "step": 10662 }, { "epoch": 1.3881296368606013, "grad_norm": 2.652681350708008, "learning_rate": 5.7698196073952195e-06, "loss": 0.4974, "step": 10665 }, { "epoch": 1.388520109332292, "grad_norm": 2.560001850128174, "learning_rate": 5.767754614641538e-06, "loss": 0.5313, "step": 10668 }, { "epoch": 1.3889105818039829, "grad_norm": 2.911827325820923, "learning_rate": 5.76568948776274e-06, "loss": 0.5815, "step": 10671 }, { "epoch": 1.3893010542756736, "grad_norm": 3.264103889465332, "learning_rate": 5.763624227119602e-06, "loss": 0.5138, "step": 10674 }, { "epoch": 1.3896915267473644, "grad_norm": 2.7543458938598633, "learning_rate": 5.761558833072915e-06, "loss": 0.5643, "step": 10677 }, { "epoch": 1.390081999219055, "grad_norm": 2.639909505844116, "learning_rate": 5.759493305983504e-06, "loss": 0.5443, "step": 10680 }, { "epoch": 1.3904724716907457, "grad_norm": 3.0217959880828857, "learning_rate": 5.757427646212208e-06, "loss": 0.5419, "step": 10683 }, { "epoch": 1.3908629441624365, "grad_norm": 2.6086668968200684, "learning_rate": 5.755361854119898e-06, "loss": 0.5186, "step": 10686 }, { "epoch": 1.3912534166341273, "grad_norm": 2.5692412853240967, "learning_rate": 5.753295930067461e-06, "loss": 0.439, "step": 10689 }, { "epoch": 1.391643889105818, "grad_norm": 2.9494736194610596, "learning_rate": 5.751229874415808e-06, "loss": 0.4788, "step": 10692 }, { "epoch": 1.3920343615775088, "grad_norm": 4.0618896484375, "learning_rate": 5.749163687525878e-06, "loss": 0.4985, "step": 10695 }, { "epoch": 1.3924248340491996, "grad_norm": 2.8040335178375244, "learning_rate": 5.747097369758626e-06, "loss": 0.5112, "step": 10698 }, { "epoch": 1.3928153065208901, "grad_norm": 3.7752537727355957, "learning_rate": 5.745030921475036e-06, "loss": 0.5142, "step": 10701 }, { "epoch": 1.3932057789925811, "grad_norm": 2.545715093612671, "learning_rate": 5.742964343036111e-06, "loss": 0.4986, "step": 10704 }, { "epoch": 1.3935962514642717, "grad_norm": 3.0850815773010254, "learning_rate": 5.74089763480288e-06, "loss": 0.4749, "step": 10707 }, { "epoch": 1.3939867239359625, "grad_norm": 2.574460029602051, "learning_rate": 5.738830797136389e-06, "loss": 0.5006, "step": 10710 }, { "epoch": 1.3943771964076532, "grad_norm": 2.6434524059295654, "learning_rate": 5.736763830397713e-06, "loss": 0.4904, "step": 10713 }, { "epoch": 1.394767668879344, "grad_norm": 2.6245415210723877, "learning_rate": 5.734696734947946e-06, "loss": 0.4879, "step": 10716 }, { "epoch": 1.3951581413510348, "grad_norm": 2.7214295864105225, "learning_rate": 5.732629511148204e-06, "loss": 0.5052, "step": 10719 }, { "epoch": 1.3955486138227255, "grad_norm": 2.8227062225341797, "learning_rate": 5.730562159359628e-06, "loss": 0.5524, "step": 10722 }, { "epoch": 1.3959390862944163, "grad_norm": 4.676625728607178, "learning_rate": 5.728494679943378e-06, "loss": 0.5505, "step": 10725 }, { "epoch": 1.3963295587661069, "grad_norm": 3.0134570598602295, "learning_rate": 5.726427073260641e-06, "loss": 0.4246, "step": 10728 }, { "epoch": 1.3967200312377979, "grad_norm": 2.913252353668213, "learning_rate": 5.7243593396726235e-06, "loss": 0.5162, "step": 10731 }, { "epoch": 1.3971105037094884, "grad_norm": 2.4458465576171875, "learning_rate": 5.722291479540552e-06, "loss": 0.496, "step": 10734 }, { "epoch": 1.3975009761811792, "grad_norm": 2.5969252586364746, "learning_rate": 5.72022349322568e-06, "loss": 0.5335, "step": 10737 }, { "epoch": 1.39789144865287, "grad_norm": 2.6486926078796387, "learning_rate": 5.7181553810892785e-06, "loss": 0.4731, "step": 10740 }, { "epoch": 1.3982819211245607, "grad_norm": 2.3636739253997803, "learning_rate": 5.716087143492643e-06, "loss": 0.4643, "step": 10743 }, { "epoch": 1.3986723935962515, "grad_norm": 2.5083768367767334, "learning_rate": 5.714018780797088e-06, "loss": 0.4983, "step": 10746 }, { "epoch": 1.3990628660679423, "grad_norm": 2.5721659660339355, "learning_rate": 5.7119502933639545e-06, "loss": 0.5938, "step": 10749 }, { "epoch": 1.399453338539633, "grad_norm": 2.918208360671997, "learning_rate": 5.709881681554604e-06, "loss": 0.4612, "step": 10752 }, { "epoch": 1.3998438110113236, "grad_norm": 2.538756847381592, "learning_rate": 5.7078129457304165e-06, "loss": 0.4997, "step": 10755 }, { "epoch": 1.4002342834830144, "grad_norm": 2.635209798812866, "learning_rate": 5.7057440862527965e-06, "loss": 0.4774, "step": 10758 }, { "epoch": 1.4006247559547051, "grad_norm": 2.62276554107666, "learning_rate": 5.70367510348317e-06, "loss": 0.4467, "step": 10761 }, { "epoch": 1.401015228426396, "grad_norm": 2.6300337314605713, "learning_rate": 5.7016059977829805e-06, "loss": 0.4499, "step": 10764 }, { "epoch": 1.4014057008980867, "grad_norm": 2.4392457008361816, "learning_rate": 5.6995367695137e-06, "loss": 0.5159, "step": 10767 }, { "epoch": 1.4017961733697775, "grad_norm": 2.7532896995544434, "learning_rate": 5.697467419036819e-06, "loss": 0.5695, "step": 10770 }, { "epoch": 1.4021866458414682, "grad_norm": 3.3758609294891357, "learning_rate": 5.695397946713848e-06, "loss": 0.5062, "step": 10773 }, { "epoch": 1.4025771183131588, "grad_norm": 2.9240944385528564, "learning_rate": 5.6933283529063165e-06, "loss": 0.4811, "step": 10776 }, { "epoch": 1.4029675907848498, "grad_norm": 3.1382761001586914, "learning_rate": 5.691258637975781e-06, "loss": 0.506, "step": 10779 }, { "epoch": 1.4033580632565403, "grad_norm": 2.694122314453125, "learning_rate": 5.689188802283816e-06, "loss": 0.5352, "step": 10782 }, { "epoch": 1.403748535728231, "grad_norm": 2.632483720779419, "learning_rate": 5.687118846192015e-06, "loss": 0.4797, "step": 10785 }, { "epoch": 1.4041390081999219, "grad_norm": 2.6875948905944824, "learning_rate": 5.685048770062e-06, "loss": 0.5539, "step": 10788 }, { "epoch": 1.4045294806716127, "grad_norm": 2.9431047439575195, "learning_rate": 5.682978574255404e-06, "loss": 0.5159, "step": 10791 }, { "epoch": 1.4049199531433034, "grad_norm": 2.7504560947418213, "learning_rate": 5.680908259133889e-06, "loss": 0.4741, "step": 10794 }, { "epoch": 1.4053104256149942, "grad_norm": 2.588517189025879, "learning_rate": 5.678837825059134e-06, "loss": 0.5162, "step": 10797 }, { "epoch": 1.405700898086685, "grad_norm": 2.9769070148468018, "learning_rate": 5.676767272392837e-06, "loss": 0.5516, "step": 10800 }, { "epoch": 1.4060913705583755, "grad_norm": 3.6146578788757324, "learning_rate": 5.674696601496723e-06, "loss": 0.4143, "step": 10803 }, { "epoch": 1.4064818430300665, "grad_norm": 2.6829850673675537, "learning_rate": 5.672625812732531e-06, "loss": 0.4618, "step": 10806 }, { "epoch": 1.406872315501757, "grad_norm": 2.5348923206329346, "learning_rate": 5.670554906462024e-06, "loss": 0.4398, "step": 10809 }, { "epoch": 1.4072627879734478, "grad_norm": 2.8809609413146973, "learning_rate": 5.668483883046987e-06, "loss": 0.438, "step": 10812 }, { "epoch": 1.4076532604451386, "grad_norm": 2.6564998626708984, "learning_rate": 5.666412742849222e-06, "loss": 0.4728, "step": 10815 }, { "epoch": 1.4080437329168294, "grad_norm": 2.5094847679138184, "learning_rate": 5.664341486230552e-06, "loss": 0.5797, "step": 10818 }, { "epoch": 1.4084342053885202, "grad_norm": 4.3344268798828125, "learning_rate": 5.662270113552822e-06, "loss": 0.4779, "step": 10821 }, { "epoch": 1.408824677860211, "grad_norm": 2.705709218978882, "learning_rate": 5.660198625177897e-06, "loss": 0.5035, "step": 10824 }, { "epoch": 1.4092151503319017, "grad_norm": 2.7563042640686035, "learning_rate": 5.65812702146766e-06, "loss": 0.4518, "step": 10827 }, { "epoch": 1.4096056228035923, "grad_norm": 2.515812635421753, "learning_rate": 5.656055302784017e-06, "loss": 0.4553, "step": 10830 }, { "epoch": 1.409996095275283, "grad_norm": 2.5328338146209717, "learning_rate": 5.653983469488893e-06, "loss": 0.467, "step": 10833 }, { "epoch": 1.4103865677469738, "grad_norm": 2.820492744445801, "learning_rate": 5.651911521944233e-06, "loss": 0.5257, "step": 10836 }, { "epoch": 1.4107770402186646, "grad_norm": 3.2355074882507324, "learning_rate": 5.649839460512001e-06, "loss": 0.4985, "step": 10839 }, { "epoch": 1.4111675126903553, "grad_norm": 3.2596559524536133, "learning_rate": 5.647767285554183e-06, "loss": 0.5677, "step": 10842 }, { "epoch": 1.4115579851620461, "grad_norm": 2.754706382751465, "learning_rate": 5.645694997432783e-06, "loss": 0.4839, "step": 10845 }, { "epoch": 1.4119484576337369, "grad_norm": 2.3893485069274902, "learning_rate": 5.643622596509823e-06, "loss": 0.4494, "step": 10848 }, { "epoch": 1.4123389301054274, "grad_norm": 2.934878349304199, "learning_rate": 5.641550083147352e-06, "loss": 0.4586, "step": 10851 }, { "epoch": 1.4127294025771184, "grad_norm": 2.4555704593658447, "learning_rate": 5.639477457707431e-06, "loss": 0.5323, "step": 10854 }, { "epoch": 1.413119875048809, "grad_norm": 2.666161298751831, "learning_rate": 5.6374047205521424e-06, "loss": 0.4899, "step": 10857 }, { "epoch": 1.4135103475204998, "grad_norm": 2.8357090950012207, "learning_rate": 5.6353318720435905e-06, "loss": 0.5637, "step": 10860 }, { "epoch": 1.4139008199921905, "grad_norm": 2.9021785259246826, "learning_rate": 5.633258912543897e-06, "loss": 0.5518, "step": 10863 }, { "epoch": 1.4142912924638813, "grad_norm": 2.5638427734375, "learning_rate": 5.631185842415203e-06, "loss": 0.448, "step": 10866 }, { "epoch": 1.414681764935572, "grad_norm": 2.585672378540039, "learning_rate": 5.62911266201967e-06, "loss": 0.5891, "step": 10869 }, { "epoch": 1.4150722374072628, "grad_norm": 2.641378164291382, "learning_rate": 5.62703937171948e-06, "loss": 0.4652, "step": 10872 }, { "epoch": 1.4154627098789536, "grad_norm": 2.626474618911743, "learning_rate": 5.62496597187683e-06, "loss": 0.5193, "step": 10875 }, { "epoch": 1.4158531823506442, "grad_norm": 2.8795952796936035, "learning_rate": 5.622892462853938e-06, "loss": 0.4486, "step": 10878 }, { "epoch": 1.4162436548223352, "grad_norm": 3.0229554176330566, "learning_rate": 5.620818845013046e-06, "loss": 0.501, "step": 10881 }, { "epoch": 1.4166341272940257, "grad_norm": 2.5767412185668945, "learning_rate": 5.618745118716406e-06, "loss": 0.462, "step": 10884 }, { "epoch": 1.4170245997657165, "grad_norm": 2.8493659496307373, "learning_rate": 5.616671284326296e-06, "loss": 0.5408, "step": 10887 }, { "epoch": 1.4174150722374073, "grad_norm": 2.6280994415283203, "learning_rate": 5.6145973422050085e-06, "loss": 0.5258, "step": 10890 }, { "epoch": 1.417805544709098, "grad_norm": 2.548902988433838, "learning_rate": 5.61252329271486e-06, "loss": 0.4983, "step": 10893 }, { "epoch": 1.4181960171807888, "grad_norm": 2.673133373260498, "learning_rate": 5.6104491362181805e-06, "loss": 0.6353, "step": 10896 }, { "epoch": 1.4185864896524796, "grad_norm": 2.571382999420166, "learning_rate": 5.60837487307732e-06, "loss": 0.4516, "step": 10899 }, { "epoch": 1.4189769621241703, "grad_norm": 3.19085693359375, "learning_rate": 5.60630050365465e-06, "loss": 0.5573, "step": 10902 }, { "epoch": 1.419367434595861, "grad_norm": 2.6122026443481445, "learning_rate": 5.604226028312558e-06, "loss": 0.534, "step": 10905 }, { "epoch": 1.4197579070675517, "grad_norm": 4.08894157409668, "learning_rate": 5.602151447413449e-06, "loss": 0.5676, "step": 10908 }, { "epoch": 1.4201483795392424, "grad_norm": 2.857332229614258, "learning_rate": 5.600076761319748e-06, "loss": 0.5727, "step": 10911 }, { "epoch": 1.4205388520109332, "grad_norm": 5.386052131652832, "learning_rate": 5.5980019703939006e-06, "loss": 0.4778, "step": 10914 }, { "epoch": 1.420929324482624, "grad_norm": 2.5507993698120117, "learning_rate": 5.595927074998367e-06, "loss": 0.5343, "step": 10917 }, { "epoch": 1.4213197969543148, "grad_norm": 3.1667394638061523, "learning_rate": 5.593852075495627e-06, "loss": 0.4644, "step": 10920 }, { "epoch": 1.4217102694260055, "grad_norm": 2.6564674377441406, "learning_rate": 5.591776972248178e-06, "loss": 0.465, "step": 10923 }, { "epoch": 1.422100741897696, "grad_norm": 2.5531013011932373, "learning_rate": 5.589701765618539e-06, "loss": 0.4859, "step": 10926 }, { "epoch": 1.422491214369387, "grad_norm": 2.634110689163208, "learning_rate": 5.587626455969238e-06, "loss": 0.5084, "step": 10929 }, { "epoch": 1.4228816868410776, "grad_norm": 2.458155632019043, "learning_rate": 5.5855510436628345e-06, "loss": 0.4223, "step": 10932 }, { "epoch": 1.4232721593127684, "grad_norm": 2.6002464294433594, "learning_rate": 5.583475529061895e-06, "loss": 0.607, "step": 10935 }, { "epoch": 1.4236626317844592, "grad_norm": 2.449547290802002, "learning_rate": 5.581399912529008e-06, "loss": 0.4332, "step": 10938 }, { "epoch": 1.42405310425615, "grad_norm": 2.828279972076416, "learning_rate": 5.579324194426779e-06, "loss": 0.5483, "step": 10941 }, { "epoch": 1.4244435767278407, "grad_norm": 2.5671331882476807, "learning_rate": 5.577248375117832e-06, "loss": 0.5026, "step": 10944 }, { "epoch": 1.4248340491995315, "grad_norm": 2.9858782291412354, "learning_rate": 5.575172454964808e-06, "loss": 0.5081, "step": 10947 }, { "epoch": 1.4252245216712223, "grad_norm": 2.493577003479004, "learning_rate": 5.573096434330366e-06, "loss": 0.4517, "step": 10950 }, { "epoch": 1.4256149941429128, "grad_norm": 2.619349718093872, "learning_rate": 5.571020313577183e-06, "loss": 0.5067, "step": 10953 }, { "epoch": 1.4260054666146036, "grad_norm": 2.6694607734680176, "learning_rate": 5.5689440930679514e-06, "loss": 0.5137, "step": 10956 }, { "epoch": 1.4263959390862944, "grad_norm": 2.9713809490203857, "learning_rate": 5.566867773165386e-06, "loss": 0.5501, "step": 10959 }, { "epoch": 1.4267864115579851, "grad_norm": 2.9507107734680176, "learning_rate": 5.564791354232211e-06, "loss": 0.5056, "step": 10962 }, { "epoch": 1.427176884029676, "grad_norm": 3.161423683166504, "learning_rate": 5.562714836631175e-06, "loss": 0.461, "step": 10965 }, { "epoch": 1.4275673565013667, "grad_norm": 2.630687713623047, "learning_rate": 5.560638220725042e-06, "loss": 0.4621, "step": 10968 }, { "epoch": 1.4279578289730575, "grad_norm": 2.7884042263031006, "learning_rate": 5.55856150687659e-06, "loss": 0.5301, "step": 10971 }, { "epoch": 1.4283483014447482, "grad_norm": 2.516737222671509, "learning_rate": 5.5564846954486184e-06, "loss": 0.4985, "step": 10974 }, { "epoch": 1.428738773916439, "grad_norm": 2.5410304069519043, "learning_rate": 5.55440778680394e-06, "loss": 0.3917, "step": 10977 }, { "epoch": 1.4291292463881295, "grad_norm": 2.4755969047546387, "learning_rate": 5.552330781305389e-06, "loss": 0.4808, "step": 10980 }, { "epoch": 1.4295197188598203, "grad_norm": 3.0844016075134277, "learning_rate": 5.550253679315812e-06, "loss": 0.4758, "step": 10983 }, { "epoch": 1.429910191331511, "grad_norm": 2.555314779281616, "learning_rate": 5.548176481198075e-06, "loss": 0.4533, "step": 10986 }, { "epoch": 1.4303006638032019, "grad_norm": 3.199594736099243, "learning_rate": 5.5460991873150605e-06, "loss": 0.5575, "step": 10989 }, { "epoch": 1.4306911362748926, "grad_norm": 2.5074615478515625, "learning_rate": 5.544021798029665e-06, "loss": 0.4556, "step": 10992 }, { "epoch": 1.4310816087465834, "grad_norm": 2.9451918601989746, "learning_rate": 5.541944313704807e-06, "loss": 0.5797, "step": 10995 }, { "epoch": 1.4314720812182742, "grad_norm": 2.5799622535705566, "learning_rate": 5.539866734703416e-06, "loss": 0.4921, "step": 10998 }, { "epoch": 1.4318625536899647, "grad_norm": 2.503661870956421, "learning_rate": 5.537789061388445e-06, "loss": 0.5223, "step": 11001 }, { "epoch": 1.4322530261616557, "grad_norm": 2.756873846054077, "learning_rate": 5.535711294122854e-06, "loss": 0.5098, "step": 11004 }, { "epoch": 1.4326434986333463, "grad_norm": 2.3765101432800293, "learning_rate": 5.533633433269627e-06, "loss": 0.5223, "step": 11007 }, { "epoch": 1.433033971105037, "grad_norm": 2.8661093711853027, "learning_rate": 5.531555479191764e-06, "loss": 0.5067, "step": 11010 }, { "epoch": 1.4334244435767278, "grad_norm": 2.547419309616089, "learning_rate": 5.529477432252275e-06, "loss": 0.4815, "step": 11013 }, { "epoch": 1.4338149160484186, "grad_norm": 2.4830684661865234, "learning_rate": 5.527399292814193e-06, "loss": 0.501, "step": 11016 }, { "epoch": 1.4342053885201094, "grad_norm": 2.8555281162261963, "learning_rate": 5.525321061240563e-06, "loss": 0.4946, "step": 11019 }, { "epoch": 1.4345958609918001, "grad_norm": 2.6124064922332764, "learning_rate": 5.523242737894451e-06, "loss": 0.4766, "step": 11022 }, { "epoch": 1.434986333463491, "grad_norm": 2.4766440391540527, "learning_rate": 5.521164323138931e-06, "loss": 0.4742, "step": 11025 }, { "epoch": 1.4353768059351815, "grad_norm": 2.639648199081421, "learning_rate": 5.519085817337101e-06, "loss": 0.4725, "step": 11028 }, { "epoch": 1.4357672784068722, "grad_norm": 2.593116521835327, "learning_rate": 5.517007220852072e-06, "loss": 0.4893, "step": 11031 }, { "epoch": 1.436157750878563, "grad_norm": 2.823061466217041, "learning_rate": 5.514928534046968e-06, "loss": 0.5143, "step": 11034 }, { "epoch": 1.4365482233502538, "grad_norm": 2.7042064666748047, "learning_rate": 5.512849757284932e-06, "loss": 0.5257, "step": 11037 }, { "epoch": 1.4369386958219446, "grad_norm": 2.685746431350708, "learning_rate": 5.510770890929122e-06, "loss": 0.5436, "step": 11040 }, { "epoch": 1.4373291682936353, "grad_norm": 2.78102970123291, "learning_rate": 5.5086919353427124e-06, "loss": 0.5199, "step": 11043 }, { "epoch": 1.437719640765326, "grad_norm": 2.8201215267181396, "learning_rate": 5.506612890888892e-06, "loss": 0.6273, "step": 11046 }, { "epoch": 1.4381101132370167, "grad_norm": 2.8764889240264893, "learning_rate": 5.5045337579308654e-06, "loss": 0.4641, "step": 11049 }, { "epoch": 1.4385005857087076, "grad_norm": 4.958584308624268, "learning_rate": 5.502454536831854e-06, "loss": 0.484, "step": 11052 }, { "epoch": 1.4388910581803982, "grad_norm": 2.74894380569458, "learning_rate": 5.5003752279550905e-06, "loss": 0.5001, "step": 11055 }, { "epoch": 1.439281530652089, "grad_norm": 3.1531951427459717, "learning_rate": 5.498295831663827e-06, "loss": 0.4416, "step": 11058 }, { "epoch": 1.4396720031237797, "grad_norm": 2.578901767730713, "learning_rate": 5.496216348321329e-06, "loss": 0.4324, "step": 11061 }, { "epoch": 1.4400624755954705, "grad_norm": 2.5892860889434814, "learning_rate": 5.49413677829088e-06, "loss": 0.4925, "step": 11064 }, { "epoch": 1.4404529480671613, "grad_norm": 2.979255199432373, "learning_rate": 5.492057121935777e-06, "loss": 0.5162, "step": 11067 }, { "epoch": 1.440843420538852, "grad_norm": 2.781018018722534, "learning_rate": 5.489977379619328e-06, "loss": 0.5983, "step": 11070 }, { "epoch": 1.4412338930105428, "grad_norm": 2.6435060501098633, "learning_rate": 5.487897551704862e-06, "loss": 0.4696, "step": 11073 }, { "epoch": 1.4416243654822334, "grad_norm": 2.8207993507385254, "learning_rate": 5.48581763855572e-06, "loss": 0.6021, "step": 11076 }, { "epoch": 1.4420148379539244, "grad_norm": 2.7664260864257812, "learning_rate": 5.4837376405352595e-06, "loss": 0.5321, "step": 11079 }, { "epoch": 1.442405310425615, "grad_norm": 2.488525390625, "learning_rate": 5.481657558006849e-06, "loss": 0.4147, "step": 11082 }, { "epoch": 1.4427957828973057, "grad_norm": 2.4613826274871826, "learning_rate": 5.4795773913338765e-06, "loss": 0.5012, "step": 11085 }, { "epoch": 1.4431862553689965, "grad_norm": 3.2609658241271973, "learning_rate": 5.477497140879745e-06, "loss": 0.5489, "step": 11088 }, { "epoch": 1.4435767278406872, "grad_norm": 2.521787405014038, "learning_rate": 5.475416807007866e-06, "loss": 0.5193, "step": 11091 }, { "epoch": 1.443967200312378, "grad_norm": 3.109292984008789, "learning_rate": 5.473336390081671e-06, "loss": 0.515, "step": 11094 }, { "epoch": 1.4443576727840688, "grad_norm": 3.495357036590576, "learning_rate": 5.471255890464604e-06, "loss": 0.5132, "step": 11097 }, { "epoch": 1.4447481452557596, "grad_norm": 3.0955452919006348, "learning_rate": 5.469175308520124e-06, "loss": 0.4092, "step": 11100 }, { "epoch": 1.4451386177274501, "grad_norm": 2.3907532691955566, "learning_rate": 5.467094644611705e-06, "loss": 0.4515, "step": 11103 }, { "epoch": 1.4455290901991409, "grad_norm": 2.64176869392395, "learning_rate": 5.465013899102836e-06, "loss": 0.5404, "step": 11106 }, { "epoch": 1.4459195626708317, "grad_norm": 2.4482946395874023, "learning_rate": 5.4629330723570154e-06, "loss": 0.4778, "step": 11109 }, { "epoch": 1.4463100351425224, "grad_norm": 2.8428847789764404, "learning_rate": 5.460852164737761e-06, "loss": 0.5936, "step": 11112 }, { "epoch": 1.4467005076142132, "grad_norm": 2.7340033054351807, "learning_rate": 5.458771176608602e-06, "loss": 0.497, "step": 11115 }, { "epoch": 1.447090980085904, "grad_norm": 2.5455846786499023, "learning_rate": 5.456690108333086e-06, "loss": 0.452, "step": 11118 }, { "epoch": 1.4474814525575947, "grad_norm": 2.627605676651001, "learning_rate": 5.454608960274765e-06, "loss": 0.5079, "step": 11121 }, { "epoch": 1.4478719250292853, "grad_norm": 2.4849627017974854, "learning_rate": 5.452527732797219e-06, "loss": 0.46, "step": 11124 }, { "epoch": 1.4482623975009763, "grad_norm": 3.0936851501464844, "learning_rate": 5.450446426264028e-06, "loss": 0.5866, "step": 11127 }, { "epoch": 1.4486528699726668, "grad_norm": 2.7293624877929688, "learning_rate": 5.448365041038796e-06, "loss": 0.5474, "step": 11130 }, { "epoch": 1.4490433424443576, "grad_norm": 2.670182704925537, "learning_rate": 5.446283577485132e-06, "loss": 0.5431, "step": 11133 }, { "epoch": 1.4494338149160484, "grad_norm": 2.502770185470581, "learning_rate": 5.4442020359666655e-06, "loss": 0.4375, "step": 11136 }, { "epoch": 1.4498242873877392, "grad_norm": 2.5619125366210938, "learning_rate": 5.442120416847041e-06, "loss": 0.533, "step": 11139 }, { "epoch": 1.45021475985943, "grad_norm": 2.700932025909424, "learning_rate": 5.440038720489906e-06, "loss": 0.4754, "step": 11142 }, { "epoch": 1.4506052323311207, "grad_norm": 2.834325075149536, "learning_rate": 5.437956947258935e-06, "loss": 0.4661, "step": 11145 }, { "epoch": 1.4509957048028115, "grad_norm": 2.6361968517303467, "learning_rate": 5.435875097517805e-06, "loss": 0.5139, "step": 11148 }, { "epoch": 1.451386177274502, "grad_norm": 2.8682637214660645, "learning_rate": 5.433793171630213e-06, "loss": 0.4919, "step": 11151 }, { "epoch": 1.451776649746193, "grad_norm": 2.7310025691986084, "learning_rate": 5.431711169959866e-06, "loss": 0.4384, "step": 11154 }, { "epoch": 1.4521671222178836, "grad_norm": 3.184248208999634, "learning_rate": 5.429629092870488e-06, "loss": 0.4457, "step": 11157 }, { "epoch": 1.4525575946895743, "grad_norm": 2.8572938442230225, "learning_rate": 5.4275469407258096e-06, "loss": 0.5435, "step": 11160 }, { "epoch": 1.4529480671612651, "grad_norm": 2.845240354537964, "learning_rate": 5.425464713889579e-06, "loss": 0.5269, "step": 11163 }, { "epoch": 1.453338539632956, "grad_norm": 2.744442939758301, "learning_rate": 5.42338241272556e-06, "loss": 0.5117, "step": 11166 }, { "epoch": 1.4537290121046467, "grad_norm": 3.74177622795105, "learning_rate": 5.4213000375975226e-06, "loss": 0.4822, "step": 11169 }, { "epoch": 1.4541194845763374, "grad_norm": 2.628488779067993, "learning_rate": 5.419217588869255e-06, "loss": 0.4378, "step": 11172 }, { "epoch": 1.4545099570480282, "grad_norm": 2.955030918121338, "learning_rate": 5.4171350669045585e-06, "loss": 0.536, "step": 11175 }, { "epoch": 1.4549004295197188, "grad_norm": 2.7912299633026123, "learning_rate": 5.415052472067241e-06, "loss": 0.5115, "step": 11178 }, { "epoch": 1.4552909019914095, "grad_norm": 2.868971824645996, "learning_rate": 5.412969804721132e-06, "loss": 0.4951, "step": 11181 }, { "epoch": 1.4556813744631003, "grad_norm": 2.6621592044830322, "learning_rate": 5.410887065230064e-06, "loss": 0.5373, "step": 11184 }, { "epoch": 1.456071846934791, "grad_norm": 2.808011770248413, "learning_rate": 5.4088042539578925e-06, "loss": 0.5717, "step": 11187 }, { "epoch": 1.4564623194064819, "grad_norm": 2.3780086040496826, "learning_rate": 5.406721371268476e-06, "loss": 0.4945, "step": 11190 }, { "epoch": 1.4568527918781726, "grad_norm": 2.6770284175872803, "learning_rate": 5.404638417525693e-06, "loss": 0.4986, "step": 11193 }, { "epoch": 1.4572432643498634, "grad_norm": 2.5734894275665283, "learning_rate": 5.4025553930934295e-06, "loss": 0.4697, "step": 11196 }, { "epoch": 1.457633736821554, "grad_norm": 2.9276740550994873, "learning_rate": 5.4004722983355854e-06, "loss": 0.5275, "step": 11199 }, { "epoch": 1.458024209293245, "grad_norm": 2.488164186477661, "learning_rate": 5.398389133616074e-06, "loss": 0.4691, "step": 11202 }, { "epoch": 1.4584146817649355, "grad_norm": 2.68459153175354, "learning_rate": 5.396305899298817e-06, "loss": 0.545, "step": 11205 }, { "epoch": 1.4588051542366263, "grad_norm": 2.5225677490234375, "learning_rate": 5.394222595747755e-06, "loss": 0.4992, "step": 11208 }, { "epoch": 1.459195626708317, "grad_norm": 2.5488972663879395, "learning_rate": 5.3921392233268345e-06, "loss": 0.3998, "step": 11211 }, { "epoch": 1.4595860991800078, "grad_norm": 2.9433960914611816, "learning_rate": 5.390055782400016e-06, "loss": 0.5108, "step": 11214 }, { "epoch": 1.4599765716516986, "grad_norm": 2.6426749229431152, "learning_rate": 5.387972273331273e-06, "loss": 0.4687, "step": 11217 }, { "epoch": 1.4603670441233894, "grad_norm": 2.4266717433929443, "learning_rate": 5.38588869648459e-06, "loss": 0.4355, "step": 11220 }, { "epoch": 1.4607575165950801, "grad_norm": 2.7653090953826904, "learning_rate": 5.383805052223964e-06, "loss": 0.5161, "step": 11223 }, { "epoch": 1.4611479890667707, "grad_norm": 2.5222866535186768, "learning_rate": 5.381721340913403e-06, "loss": 0.5055, "step": 11226 }, { "epoch": 1.4615384615384617, "grad_norm": 2.504310369491577, "learning_rate": 5.379637562916925e-06, "loss": 0.5179, "step": 11229 }, { "epoch": 1.4619289340101522, "grad_norm": 3.081963300704956, "learning_rate": 5.377553718598566e-06, "loss": 0.5027, "step": 11232 }, { "epoch": 1.462319406481843, "grad_norm": 2.8216118812561035, "learning_rate": 5.375469808322364e-06, "loss": 0.5212, "step": 11235 }, { "epoch": 1.4627098789535338, "grad_norm": 2.547189474105835, "learning_rate": 5.3733858324523795e-06, "loss": 0.4809, "step": 11238 }, { "epoch": 1.4631003514252245, "grad_norm": 2.744575023651123, "learning_rate": 5.371301791352673e-06, "loss": 0.4945, "step": 11241 }, { "epoch": 1.4634908238969153, "grad_norm": 2.8713624477386475, "learning_rate": 5.369217685387326e-06, "loss": 0.5239, "step": 11244 }, { "epoch": 1.463881296368606, "grad_norm": 2.840270519256592, "learning_rate": 5.367133514920425e-06, "loss": 0.4806, "step": 11247 }, { "epoch": 1.4642717688402969, "grad_norm": 3.1532773971557617, "learning_rate": 5.3650492803160715e-06, "loss": 0.5324, "step": 11250 }, { "epoch": 1.4646622413119874, "grad_norm": 2.547405958175659, "learning_rate": 5.362964981938378e-06, "loss": 0.5092, "step": 11253 }, { "epoch": 1.4650527137836782, "grad_norm": 2.8435122966766357, "learning_rate": 5.360880620151464e-06, "loss": 0.5244, "step": 11256 }, { "epoch": 1.465443186255369, "grad_norm": 2.7665226459503174, "learning_rate": 5.358796195319467e-06, "loss": 0.5135, "step": 11259 }, { "epoch": 1.4658336587270597, "grad_norm": 2.7496416568756104, "learning_rate": 5.356711707806527e-06, "loss": 0.5243, "step": 11262 }, { "epoch": 1.4662241311987505, "grad_norm": 2.4985382556915283, "learning_rate": 5.354627157976803e-06, "loss": 0.4796, "step": 11265 }, { "epoch": 1.4666146036704413, "grad_norm": 3.4736976623535156, "learning_rate": 5.35254254619446e-06, "loss": 0.4909, "step": 11268 }, { "epoch": 1.467005076142132, "grad_norm": 2.8436739444732666, "learning_rate": 5.3504578728236755e-06, "loss": 0.5168, "step": 11271 }, { "epoch": 1.4673955486138226, "grad_norm": 3.0882272720336914, "learning_rate": 5.34837313822864e-06, "loss": 0.5582, "step": 11274 }, { "epoch": 1.4677860210855136, "grad_norm": 5.404419898986816, "learning_rate": 5.346288342773549e-06, "loss": 0.511, "step": 11277 }, { "epoch": 1.4681764935572041, "grad_norm": 2.911956548690796, "learning_rate": 5.344203486822612e-06, "loss": 0.4474, "step": 11280 }, { "epoch": 1.468566966028895, "grad_norm": 3.251417398452759, "learning_rate": 5.342118570740052e-06, "loss": 0.565, "step": 11283 }, { "epoch": 1.4689574385005857, "grad_norm": 2.6077330112457275, "learning_rate": 5.340033594890096e-06, "loss": 0.5114, "step": 11286 }, { "epoch": 1.4693479109722765, "grad_norm": 2.613679885864258, "learning_rate": 5.337948559636986e-06, "loss": 0.4709, "step": 11289 }, { "epoch": 1.4697383834439672, "grad_norm": 3.1710855960845947, "learning_rate": 5.335863465344974e-06, "loss": 0.4774, "step": 11292 }, { "epoch": 1.470128855915658, "grad_norm": 2.876929521560669, "learning_rate": 5.333778312378323e-06, "loss": 0.5028, "step": 11295 }, { "epoch": 1.4705193283873488, "grad_norm": 2.627826690673828, "learning_rate": 5.3316931011013005e-06, "loss": 0.4586, "step": 11298 }, { "epoch": 1.4709098008590393, "grad_norm": 3.5629796981811523, "learning_rate": 5.329607831878191e-06, "loss": 0.5185, "step": 11301 }, { "epoch": 1.47130027333073, "grad_norm": 2.592860221862793, "learning_rate": 5.327522505073288e-06, "loss": 0.508, "step": 11304 }, { "epoch": 1.4716907458024209, "grad_norm": 2.701939344406128, "learning_rate": 5.325437121050892e-06, "loss": 0.5413, "step": 11307 }, { "epoch": 1.4720812182741116, "grad_norm": 2.25862717628479, "learning_rate": 5.323351680175315e-06, "loss": 0.4955, "step": 11310 }, { "epoch": 1.4724716907458024, "grad_norm": 2.581425666809082, "learning_rate": 5.3212661828108804e-06, "loss": 0.5118, "step": 11313 }, { "epoch": 1.4728621632174932, "grad_norm": 2.7208588123321533, "learning_rate": 5.31918062932192e-06, "loss": 0.5058, "step": 11316 }, { "epoch": 1.473252635689184, "grad_norm": 2.554607391357422, "learning_rate": 5.317095020072773e-06, "loss": 0.4651, "step": 11319 }, { "epoch": 1.4736431081608747, "grad_norm": 2.7195706367492676, "learning_rate": 5.315009355427795e-06, "loss": 0.5694, "step": 11322 }, { "epoch": 1.4740335806325655, "grad_norm": 2.790142774581909, "learning_rate": 5.312923635751344e-06, "loss": 0.4539, "step": 11325 }, { "epoch": 1.474424053104256, "grad_norm": 2.372218132019043, "learning_rate": 5.310837861407794e-06, "loss": 0.4228, "step": 11328 }, { "epoch": 1.4748145255759468, "grad_norm": 2.9723665714263916, "learning_rate": 5.308752032761522e-06, "loss": 0.469, "step": 11331 }, { "epoch": 1.4752049980476376, "grad_norm": 2.6167614459991455, "learning_rate": 5.306666150176919e-06, "loss": 0.5017, "step": 11334 }, { "epoch": 1.4755954705193284, "grad_norm": 2.730600357055664, "learning_rate": 5.304580214018385e-06, "loss": 0.5289, "step": 11337 }, { "epoch": 1.4759859429910192, "grad_norm": 2.659367799758911, "learning_rate": 5.30249422465033e-06, "loss": 0.425, "step": 11340 }, { "epoch": 1.47637641546271, "grad_norm": 2.761075019836426, "learning_rate": 5.300408182437169e-06, "loss": 0.5373, "step": 11343 }, { "epoch": 1.4767668879344007, "grad_norm": 2.6184074878692627, "learning_rate": 5.298322087743331e-06, "loss": 0.5185, "step": 11346 }, { "epoch": 1.4771573604060912, "grad_norm": 2.6529762744903564, "learning_rate": 5.296235940933251e-06, "loss": 0.4562, "step": 11349 }, { "epoch": 1.4775478328777822, "grad_norm": 2.837994337081909, "learning_rate": 5.294149742371375e-06, "loss": 0.535, "step": 11352 }, { "epoch": 1.4779383053494728, "grad_norm": 2.4222748279571533, "learning_rate": 5.292063492422159e-06, "loss": 0.4687, "step": 11355 }, { "epoch": 1.4783287778211636, "grad_norm": 2.741462230682373, "learning_rate": 5.289977191450064e-06, "loss": 0.5317, "step": 11358 }, { "epoch": 1.4787192502928543, "grad_norm": 2.7143518924713135, "learning_rate": 5.287890839819566e-06, "loss": 0.5798, "step": 11361 }, { "epoch": 1.479109722764545, "grad_norm": 2.4520294666290283, "learning_rate": 5.285804437895141e-06, "loss": 0.4746, "step": 11364 }, { "epoch": 1.4795001952362359, "grad_norm": 3.155306577682495, "learning_rate": 5.283717986041285e-06, "loss": 0.5228, "step": 11367 }, { "epoch": 1.4798906677079267, "grad_norm": 2.5893876552581787, "learning_rate": 5.281631484622491e-06, "loss": 0.4359, "step": 11370 }, { "epoch": 1.4802811401796174, "grad_norm": 2.689603090286255, "learning_rate": 5.27954493400327e-06, "loss": 0.5414, "step": 11373 }, { "epoch": 1.480671612651308, "grad_norm": 2.8122377395629883, "learning_rate": 5.277458334548138e-06, "loss": 0.4752, "step": 11376 }, { "epoch": 1.4810620851229988, "grad_norm": 2.8746378421783447, "learning_rate": 5.2753716866216175e-06, "loss": 0.5519, "step": 11379 }, { "epoch": 1.4814525575946895, "grad_norm": 2.670448064804077, "learning_rate": 5.273284990588243e-06, "loss": 0.4566, "step": 11382 }, { "epoch": 1.4818430300663803, "grad_norm": 2.758208751678467, "learning_rate": 5.2711982468125556e-06, "loss": 0.5005, "step": 11385 }, { "epoch": 1.482233502538071, "grad_norm": 2.7960431575775146, "learning_rate": 5.269111455659105e-06, "loss": 0.4609, "step": 11388 }, { "epoch": 1.4826239750097618, "grad_norm": 3.621656656265259, "learning_rate": 5.26702461749245e-06, "loss": 0.5131, "step": 11391 }, { "epoch": 1.4830144474814526, "grad_norm": 3.8719372749328613, "learning_rate": 5.264937732677153e-06, "loss": 0.4925, "step": 11394 }, { "epoch": 1.4834049199531434, "grad_norm": 2.7081613540649414, "learning_rate": 5.262850801577796e-06, "loss": 0.5013, "step": 11397 }, { "epoch": 1.4837953924248342, "grad_norm": 2.990955114364624, "learning_rate": 5.260763824558954e-06, "loss": 0.5611, "step": 11400 }, { "epoch": 1.4841858648965247, "grad_norm": 2.5260677337646484, "learning_rate": 5.258676801985222e-06, "loss": 0.5057, "step": 11403 }, { "epoch": 1.4845763373682155, "grad_norm": 2.441223382949829, "learning_rate": 5.256589734221196e-06, "loss": 0.4837, "step": 11406 }, { "epoch": 1.4849668098399063, "grad_norm": 2.5809214115142822, "learning_rate": 5.254502621631482e-06, "loss": 0.4651, "step": 11409 }, { "epoch": 1.485357282311597, "grad_norm": 2.6332476139068604, "learning_rate": 5.252415464580698e-06, "loss": 0.5485, "step": 11412 }, { "epoch": 1.4857477547832878, "grad_norm": 2.675795316696167, "learning_rate": 5.25032826343346e-06, "loss": 0.5142, "step": 11415 }, { "epoch": 1.4861382272549786, "grad_norm": 2.489439010620117, "learning_rate": 5.248241018554404e-06, "loss": 0.4575, "step": 11418 }, { "epoch": 1.4865286997266693, "grad_norm": 2.5916011333465576, "learning_rate": 5.246153730308162e-06, "loss": 0.51, "step": 11421 }, { "epoch": 1.48691917219836, "grad_norm": 2.8045613765716553, "learning_rate": 5.244066399059385e-06, "loss": 0.4623, "step": 11424 }, { "epoch": 1.487309644670051, "grad_norm": 2.699486255645752, "learning_rate": 5.241979025172717e-06, "loss": 0.5239, "step": 11427 }, { "epoch": 1.4877001171417414, "grad_norm": 2.300581693649292, "learning_rate": 5.239891609012824e-06, "loss": 0.4958, "step": 11430 }, { "epoch": 1.4880905896134322, "grad_norm": 2.496739625930786, "learning_rate": 5.237804150944373e-06, "loss": 0.5159, "step": 11433 }, { "epoch": 1.488481062085123, "grad_norm": 2.7403767108917236, "learning_rate": 5.2357166513320344e-06, "loss": 0.5758, "step": 11436 }, { "epoch": 1.4888715345568138, "grad_norm": 3.07275128364563, "learning_rate": 5.233629110540494e-06, "loss": 0.4913, "step": 11439 }, { "epoch": 1.4892620070285045, "grad_norm": 2.733052968978882, "learning_rate": 5.2315415289344405e-06, "loss": 0.5081, "step": 11442 }, { "epoch": 1.4896524795001953, "grad_norm": 2.944462299346924, "learning_rate": 5.229453906878569e-06, "loss": 0.5173, "step": 11445 }, { "epoch": 1.490042951971886, "grad_norm": 2.523071527481079, "learning_rate": 5.227366244737582e-06, "loss": 0.5012, "step": 11448 }, { "epoch": 1.4904334244435766, "grad_norm": 2.42546010017395, "learning_rate": 5.225278542876189e-06, "loss": 0.5246, "step": 11451 }, { "epoch": 1.4908238969152674, "grad_norm": 2.5009751319885254, "learning_rate": 5.2231908016591104e-06, "loss": 0.4843, "step": 11454 }, { "epoch": 1.4912143693869582, "grad_norm": 3.1024861335754395, "learning_rate": 5.221103021451066e-06, "loss": 0.4763, "step": 11457 }, { "epoch": 1.491604841858649, "grad_norm": 2.727797269821167, "learning_rate": 5.219015202616792e-06, "loss": 0.5251, "step": 11460 }, { "epoch": 1.4919953143303397, "grad_norm": 2.642549753189087, "learning_rate": 5.2169273455210205e-06, "loss": 0.4429, "step": 11463 }, { "epoch": 1.4923857868020305, "grad_norm": 2.5647194385528564, "learning_rate": 5.214839450528498e-06, "loss": 0.5479, "step": 11466 }, { "epoch": 1.4927762592737213, "grad_norm": 2.818277597427368, "learning_rate": 5.212751518003977e-06, "loss": 0.4881, "step": 11469 }, { "epoch": 1.4931667317454118, "grad_norm": 2.8775992393493652, "learning_rate": 5.210663548312212e-06, "loss": 0.536, "step": 11472 }, { "epoch": 1.4935572042171028, "grad_norm": 3.7240641117095947, "learning_rate": 5.208575541817971e-06, "loss": 0.4804, "step": 11475 }, { "epoch": 1.4939476766887934, "grad_norm": 2.5948472023010254, "learning_rate": 5.206487498886017e-06, "loss": 0.6184, "step": 11478 }, { "epoch": 1.4943381491604841, "grad_norm": 3.2808594703674316, "learning_rate": 5.2043994198811356e-06, "loss": 0.6083, "step": 11481 }, { "epoch": 1.494728621632175, "grad_norm": 2.4906272888183594, "learning_rate": 5.202311305168103e-06, "loss": 0.4931, "step": 11484 }, { "epoch": 1.4951190941038657, "grad_norm": 2.869908571243286, "learning_rate": 5.200223155111711e-06, "loss": 0.5207, "step": 11487 }, { "epoch": 1.4955095665755564, "grad_norm": 2.8915436267852783, "learning_rate": 5.198134970076757e-06, "loss": 0.5586, "step": 11490 }, { "epoch": 1.4959000390472472, "grad_norm": 3.477677345275879, "learning_rate": 5.196046750428039e-06, "loss": 0.5211, "step": 11493 }, { "epoch": 1.496290511518938, "grad_norm": 2.8467986583709717, "learning_rate": 5.193958496530367e-06, "loss": 0.4828, "step": 11496 }, { "epoch": 1.4966809839906285, "grad_norm": 2.7901194095611572, "learning_rate": 5.1918702087485515e-06, "loss": 0.5162, "step": 11499 }, { "epoch": 1.4970714564623195, "grad_norm": 2.9377663135528564, "learning_rate": 5.189781887447417e-06, "loss": 0.4016, "step": 11502 }, { "epoch": 1.49746192893401, "grad_norm": 2.7478699684143066, "learning_rate": 5.187693532991784e-06, "loss": 0.4597, "step": 11505 }, { "epoch": 1.4978524014057009, "grad_norm": 2.764273166656494, "learning_rate": 5.185605145746487e-06, "loss": 0.4994, "step": 11508 }, { "epoch": 1.4982428738773916, "grad_norm": 2.593223810195923, "learning_rate": 5.183516726076362e-06, "loss": 0.5124, "step": 11511 }, { "epoch": 1.4986333463490824, "grad_norm": 2.9273481369018555, "learning_rate": 5.181428274346249e-06, "loss": 0.4994, "step": 11514 }, { "epoch": 1.4990238188207732, "grad_norm": 2.4240915775299072, "learning_rate": 5.179339790920999e-06, "loss": 0.485, "step": 11517 }, { "epoch": 1.499414291292464, "grad_norm": 2.7207272052764893, "learning_rate": 5.177251276165465e-06, "loss": 0.538, "step": 11520 }, { "epoch": 1.4998047637641547, "grad_norm": 2.5581884384155273, "learning_rate": 5.175162730444505e-06, "loss": 0.4982, "step": 11523 }, { "epoch": 1.5001952362358453, "grad_norm": 2.6691441535949707, "learning_rate": 5.173074154122986e-06, "loss": 0.5418, "step": 11526 }, { "epoch": 1.5005857087075363, "grad_norm": 2.560971736907959, "learning_rate": 5.170985547565775e-06, "loss": 0.473, "step": 11529 }, { "epoch": 1.5009761811792268, "grad_norm": 2.5895869731903076, "learning_rate": 5.1688969111377505e-06, "loss": 0.4541, "step": 11532 }, { "epoch": 1.5013666536509176, "grad_norm": 2.396502733230591, "learning_rate": 5.166808245203789e-06, "loss": 0.51, "step": 11535 }, { "epoch": 1.5017571261226084, "grad_norm": 2.532872200012207, "learning_rate": 5.164719550128779e-06, "loss": 0.5406, "step": 11538 }, { "epoch": 1.5021475985942991, "grad_norm": 2.419482946395874, "learning_rate": 5.162630826277609e-06, "loss": 0.4598, "step": 11541 }, { "epoch": 1.50253807106599, "grad_norm": 2.596123218536377, "learning_rate": 5.160542074015177e-06, "loss": 0.4867, "step": 11544 }, { "epoch": 1.5029285435376805, "grad_norm": 2.9170663356781006, "learning_rate": 5.158453293706383e-06, "loss": 0.4896, "step": 11547 }, { "epoch": 1.5033190160093715, "grad_norm": 2.6187119483947754, "learning_rate": 5.15636448571613e-06, "loss": 0.4578, "step": 11550 }, { "epoch": 1.503709488481062, "grad_norm": 2.4606587886810303, "learning_rate": 5.1542756504093315e-06, "loss": 0.4461, "step": 11553 }, { "epoch": 1.5040999609527528, "grad_norm": 2.533982038497925, "learning_rate": 5.152186788150901e-06, "loss": 0.499, "step": 11556 }, { "epoch": 1.5044904334244436, "grad_norm": 2.852947473526001, "learning_rate": 5.1500978993057596e-06, "loss": 0.5007, "step": 11559 }, { "epoch": 1.5048809058961343, "grad_norm": 3.0088095664978027, "learning_rate": 5.1480089842388295e-06, "loss": 0.5055, "step": 11562 }, { "epoch": 1.505271378367825, "grad_norm": 2.5373263359069824, "learning_rate": 5.145920043315041e-06, "loss": 0.4603, "step": 11565 }, { "epoch": 1.5056618508395159, "grad_norm": 2.940673828125, "learning_rate": 5.143831076899329e-06, "loss": 0.6183, "step": 11568 }, { "epoch": 1.5060523233112066, "grad_norm": 2.945150375366211, "learning_rate": 5.14174208535663e-06, "loss": 0.55, "step": 11571 }, { "epoch": 1.5064427957828972, "grad_norm": 2.7196662425994873, "learning_rate": 5.1396530690518876e-06, "loss": 0.5235, "step": 11574 }, { "epoch": 1.5068332682545882, "grad_norm": 2.892042875289917, "learning_rate": 5.137564028350048e-06, "loss": 0.5291, "step": 11577 }, { "epoch": 1.5072237407262787, "grad_norm": 2.4638333320617676, "learning_rate": 5.135474963616062e-06, "loss": 0.4451, "step": 11580 }, { "epoch": 1.5076142131979695, "grad_norm": 2.493852138519287, "learning_rate": 5.133385875214883e-06, "loss": 0.5536, "step": 11583 }, { "epoch": 1.5080046856696603, "grad_norm": 2.891443967819214, "learning_rate": 5.131296763511473e-06, "loss": 0.4479, "step": 11586 }, { "epoch": 1.508395158141351, "grad_norm": 2.568324327468872, "learning_rate": 5.129207628870796e-06, "loss": 0.4524, "step": 11589 }, { "epoch": 1.5087856306130418, "grad_norm": 3.3236899375915527, "learning_rate": 5.127118471657817e-06, "loss": 0.4885, "step": 11592 }, { "epoch": 1.5091761030847324, "grad_norm": 2.7332510948181152, "learning_rate": 5.125029292237508e-06, "loss": 0.5649, "step": 11595 }, { "epoch": 1.5095665755564234, "grad_norm": 2.6280899047851562, "learning_rate": 5.122940090974846e-06, "loss": 0.4571, "step": 11598 }, { "epoch": 1.509957048028114, "grad_norm": 2.642153024673462, "learning_rate": 5.120850868234808e-06, "loss": 0.5028, "step": 11601 }, { "epoch": 1.510347520499805, "grad_norm": 2.4956655502319336, "learning_rate": 5.118761624382377e-06, "loss": 0.5421, "step": 11604 }, { "epoch": 1.5107379929714955, "grad_norm": 3.0363759994506836, "learning_rate": 5.11667235978254e-06, "loss": 0.5359, "step": 11607 }, { "epoch": 1.5111284654431862, "grad_norm": 3.446927070617676, "learning_rate": 5.114583074800289e-06, "loss": 0.5317, "step": 11610 }, { "epoch": 1.511518937914877, "grad_norm": 2.6204493045806885, "learning_rate": 5.112493769800614e-06, "loss": 0.4863, "step": 11613 }, { "epoch": 1.5119094103865678, "grad_norm": 2.5534026622772217, "learning_rate": 5.110404445148515e-06, "loss": 0.5407, "step": 11616 }, { "epoch": 1.5122998828582586, "grad_norm": 2.5729386806488037, "learning_rate": 5.108315101208991e-06, "loss": 0.538, "step": 11619 }, { "epoch": 1.512690355329949, "grad_norm": 2.6536967754364014, "learning_rate": 5.106225738347047e-06, "loss": 0.4916, "step": 11622 }, { "epoch": 1.51308082780164, "grad_norm": 2.6462488174438477, "learning_rate": 5.10413635692769e-06, "loss": 0.4578, "step": 11625 }, { "epoch": 1.5134713002733307, "grad_norm": 2.5776121616363525, "learning_rate": 5.10204695731593e-06, "loss": 0.5028, "step": 11628 }, { "epoch": 1.5138617727450214, "grad_norm": 2.8399088382720947, "learning_rate": 5.099957539876783e-06, "loss": 0.5134, "step": 11631 }, { "epoch": 1.5142522452167122, "grad_norm": 2.53810453414917, "learning_rate": 5.097868104975262e-06, "loss": 0.5759, "step": 11634 }, { "epoch": 1.514642717688403, "grad_norm": 2.6173861026763916, "learning_rate": 5.09577865297639e-06, "loss": 0.522, "step": 11637 }, { "epoch": 1.5150331901600937, "grad_norm": 2.800180673599243, "learning_rate": 5.0936891842451895e-06, "loss": 0.6223, "step": 11640 }, { "epoch": 1.5154236626317843, "grad_norm": 2.824063539505005, "learning_rate": 5.0915996991466845e-06, "loss": 0.4638, "step": 11643 }, { "epoch": 1.5158141351034753, "grad_norm": 2.6472010612487793, "learning_rate": 5.089510198045904e-06, "loss": 0.4483, "step": 11646 }, { "epoch": 1.5162046075751658, "grad_norm": 2.6510941982269287, "learning_rate": 5.087420681307881e-06, "loss": 0.5283, "step": 11649 }, { "epoch": 1.5165950800468568, "grad_norm": 2.5269935131073, "learning_rate": 5.085331149297649e-06, "loss": 0.4922, "step": 11652 }, { "epoch": 1.5169855525185474, "grad_norm": 2.427276134490967, "learning_rate": 5.083241602380246e-06, "loss": 0.4429, "step": 11655 }, { "epoch": 1.5173760249902382, "grad_norm": 2.5796139240264893, "learning_rate": 5.081152040920708e-06, "loss": 0.5555, "step": 11658 }, { "epoch": 1.517766497461929, "grad_norm": 3.097916603088379, "learning_rate": 5.079062465284081e-06, "loss": 0.5131, "step": 11661 }, { "epoch": 1.5181569699336197, "grad_norm": 2.6202802658081055, "learning_rate": 5.076972875835406e-06, "loss": 0.5081, "step": 11664 }, { "epoch": 1.5185474424053105, "grad_norm": 2.4596011638641357, "learning_rate": 5.074883272939732e-06, "loss": 0.4617, "step": 11667 }, { "epoch": 1.518937914877001, "grad_norm": 3.6132545471191406, "learning_rate": 5.072793656962108e-06, "loss": 0.4359, "step": 11670 }, { "epoch": 1.519328387348692, "grad_norm": 2.3989944458007812, "learning_rate": 5.0707040282675855e-06, "loss": 0.4565, "step": 11673 }, { "epoch": 1.5197188598203826, "grad_norm": 3.203303813934326, "learning_rate": 5.068614387221218e-06, "loss": 0.4667, "step": 11676 }, { "epoch": 1.5201093322920736, "grad_norm": 2.8090553283691406, "learning_rate": 5.066524734188061e-06, "loss": 0.5105, "step": 11679 }, { "epoch": 1.5204998047637641, "grad_norm": 2.759253740310669, "learning_rate": 5.064435069533174e-06, "loss": 0.5128, "step": 11682 }, { "epoch": 1.520890277235455, "grad_norm": 2.5702476501464844, "learning_rate": 5.062345393621615e-06, "loss": 0.4793, "step": 11685 }, { "epoch": 1.5212807497071457, "grad_norm": 2.932741403579712, "learning_rate": 5.060255706818447e-06, "loss": 0.4439, "step": 11688 }, { "epoch": 1.5216712221788364, "grad_norm": 2.6461048126220703, "learning_rate": 5.058166009488733e-06, "loss": 0.5125, "step": 11691 }, { "epoch": 1.5220616946505272, "grad_norm": 2.705612897872925, "learning_rate": 5.05607630199754e-06, "loss": 0.533, "step": 11694 }, { "epoch": 1.5224521671222178, "grad_norm": 2.7387025356292725, "learning_rate": 5.0539865847099354e-06, "loss": 0.4833, "step": 11697 }, { "epoch": 1.5228426395939088, "grad_norm": 2.522993803024292, "learning_rate": 5.051896857990988e-06, "loss": 0.5356, "step": 11700 }, { "epoch": 1.5232331120655993, "grad_norm": 2.885016441345215, "learning_rate": 5.049807122205768e-06, "loss": 0.5451, "step": 11703 }, { "epoch": 1.52362358453729, "grad_norm": 2.7136266231536865, "learning_rate": 5.04771737771935e-06, "loss": 0.4738, "step": 11706 }, { "epoch": 1.5240140570089808, "grad_norm": 2.6322624683380127, "learning_rate": 5.045627624896804e-06, "loss": 0.5244, "step": 11709 }, { "epoch": 1.5244045294806716, "grad_norm": 3.288853406906128, "learning_rate": 5.0435378641032095e-06, "loss": 0.5564, "step": 11712 }, { "epoch": 1.5247950019523624, "grad_norm": 2.5992119312286377, "learning_rate": 5.0414480957036415e-06, "loss": 0.5101, "step": 11715 }, { "epoch": 1.525185474424053, "grad_norm": 2.7537200450897217, "learning_rate": 5.039358320063179e-06, "loss": 0.5499, "step": 11718 }, { "epoch": 1.525575946895744, "grad_norm": 2.5986032485961914, "learning_rate": 5.037268537546901e-06, "loss": 0.4637, "step": 11721 }, { "epoch": 1.5259664193674345, "grad_norm": 2.795783042907715, "learning_rate": 5.035178748519887e-06, "loss": 0.5092, "step": 11724 }, { "epoch": 1.5263568918391255, "grad_norm": 2.8021063804626465, "learning_rate": 5.03308895334722e-06, "loss": 0.5189, "step": 11727 }, { "epoch": 1.526747364310816, "grad_norm": 2.8685810565948486, "learning_rate": 5.0309991523939805e-06, "loss": 0.5911, "step": 11730 }, { "epoch": 1.5271378367825068, "grad_norm": 2.5764682292938232, "learning_rate": 5.028909346025257e-06, "loss": 0.4561, "step": 11733 }, { "epoch": 1.5275283092541976, "grad_norm": 2.9899494647979736, "learning_rate": 5.026819534606131e-06, "loss": 0.4998, "step": 11736 }, { "epoch": 1.5279187817258884, "grad_norm": 2.4830875396728516, "learning_rate": 5.024729718501688e-06, "loss": 0.4199, "step": 11739 }, { "epoch": 1.5283092541975791, "grad_norm": 2.9966747760772705, "learning_rate": 5.022639898077016e-06, "loss": 0.4864, "step": 11742 }, { "epoch": 1.5286997266692697, "grad_norm": 2.7699429988861084, "learning_rate": 5.020550073697202e-06, "loss": 0.5197, "step": 11745 }, { "epoch": 1.5290901991409607, "grad_norm": 3.0958468914031982, "learning_rate": 5.018460245727333e-06, "loss": 0.4696, "step": 11748 }, { "epoch": 1.5294806716126512, "grad_norm": 2.7975351810455322, "learning_rate": 5.016370414532495e-06, "loss": 0.563, "step": 11751 }, { "epoch": 1.5298711440843422, "grad_norm": 3.140310525894165, "learning_rate": 5.014280580477782e-06, "loss": 0.5284, "step": 11754 }, { "epoch": 1.5302616165560328, "grad_norm": 2.853102684020996, "learning_rate": 5.012190743928282e-06, "loss": 0.4683, "step": 11757 }, { "epoch": 1.5306520890277235, "grad_norm": 2.716357946395874, "learning_rate": 5.010100905249084e-06, "loss": 0.4811, "step": 11760 }, { "epoch": 1.5310425614994143, "grad_norm": 3.0148074626922607, "learning_rate": 5.0080110648052815e-06, "loss": 0.5135, "step": 11763 }, { "epoch": 1.531433033971105, "grad_norm": 2.7584571838378906, "learning_rate": 5.00592122296196e-06, "loss": 0.4751, "step": 11766 }, { "epoch": 1.5318235064427959, "grad_norm": 2.487861156463623, "learning_rate": 5.003831380084216e-06, "loss": 0.5302, "step": 11769 }, { "epoch": 1.5322139789144864, "grad_norm": 2.4335525035858154, "learning_rate": 5.001741536537135e-06, "loss": 0.4591, "step": 11772 }, { "epoch": 1.5326044513861774, "grad_norm": 2.387814521789551, "learning_rate": 4.999651692685813e-06, "loss": 0.4683, "step": 11775 }, { "epoch": 1.532994923857868, "grad_norm": 2.30008602142334, "learning_rate": 4.997561848895338e-06, "loss": 0.4718, "step": 11778 }, { "epoch": 1.5333853963295587, "grad_norm": 2.351167678833008, "learning_rate": 4.995472005530804e-06, "loss": 0.4853, "step": 11781 }, { "epoch": 1.5337758688012495, "grad_norm": 2.77471923828125, "learning_rate": 4.993382162957302e-06, "loss": 0.4924, "step": 11784 }, { "epoch": 1.5341663412729403, "grad_norm": 2.6090359687805176, "learning_rate": 4.991292321539921e-06, "loss": 0.5111, "step": 11787 }, { "epoch": 1.534556813744631, "grad_norm": 2.52298903465271, "learning_rate": 4.989202481643755e-06, "loss": 0.4663, "step": 11790 }, { "epoch": 1.5349472862163216, "grad_norm": 3.639498472213745, "learning_rate": 4.98711264363389e-06, "loss": 0.4876, "step": 11793 }, { "epoch": 1.5353377586880126, "grad_norm": 2.813267946243286, "learning_rate": 4.98502280787542e-06, "loss": 0.5949, "step": 11796 }, { "epoch": 1.5357282311597031, "grad_norm": 2.9353835582733154, "learning_rate": 4.9829329747334345e-06, "loss": 0.4813, "step": 11799 }, { "epoch": 1.5361187036313941, "grad_norm": 2.37113618850708, "learning_rate": 4.9808431445730225e-06, "loss": 0.4632, "step": 11802 }, { "epoch": 1.5365091761030847, "grad_norm": 3.059115409851074, "learning_rate": 4.978753317759271e-06, "loss": 0.5102, "step": 11805 }, { "epoch": 1.5368996485747755, "grad_norm": 2.4613428115844727, "learning_rate": 4.976663494657271e-06, "loss": 0.4513, "step": 11808 }, { "epoch": 1.5372901210464662, "grad_norm": 2.785877227783203, "learning_rate": 4.97457367563211e-06, "loss": 0.5541, "step": 11811 }, { "epoch": 1.537680593518157, "grad_norm": 2.452495574951172, "learning_rate": 4.972483861048875e-06, "loss": 0.4388, "step": 11814 }, { "epoch": 1.5380710659898478, "grad_norm": 2.4876022338867188, "learning_rate": 4.970394051272651e-06, "loss": 0.5395, "step": 11817 }, { "epoch": 1.5384615384615383, "grad_norm": 2.76699161529541, "learning_rate": 4.968304246668524e-06, "loss": 0.5129, "step": 11820 }, { "epoch": 1.5388520109332293, "grad_norm": 2.546515703201294, "learning_rate": 4.9662144476015785e-06, "loss": 0.4904, "step": 11823 }, { "epoch": 1.5392424834049199, "grad_norm": 2.5809149742126465, "learning_rate": 4.964124654436898e-06, "loss": 0.4912, "step": 11826 }, { "epoch": 1.5396329558766109, "grad_norm": 3.259404182434082, "learning_rate": 4.962034867539565e-06, "loss": 0.5205, "step": 11829 }, { "epoch": 1.5400234283483014, "grad_norm": 2.743436098098755, "learning_rate": 4.959945087274659e-06, "loss": 0.4914, "step": 11832 }, { "epoch": 1.5404139008199922, "grad_norm": 2.653174638748169, "learning_rate": 4.957855314007265e-06, "loss": 0.5208, "step": 11835 }, { "epoch": 1.540804373291683, "grad_norm": 2.54440975189209, "learning_rate": 4.955765548102457e-06, "loss": 0.4773, "step": 11838 }, { "epoch": 1.5411948457633737, "grad_norm": 3.731177806854248, "learning_rate": 4.9536757899253156e-06, "loss": 0.4968, "step": 11841 }, { "epoch": 1.5415853182350645, "grad_norm": 2.7818851470947266, "learning_rate": 4.951586039840916e-06, "loss": 0.4352, "step": 11844 }, { "epoch": 1.541975790706755, "grad_norm": 2.795779228210449, "learning_rate": 4.949496298214331e-06, "loss": 0.4873, "step": 11847 }, { "epoch": 1.542366263178446, "grad_norm": 2.714663028717041, "learning_rate": 4.9474065654106384e-06, "loss": 0.6013, "step": 11850 }, { "epoch": 1.5427567356501366, "grad_norm": 2.7950053215026855, "learning_rate": 4.945316841794909e-06, "loss": 0.4848, "step": 11853 }, { "epoch": 1.5431472081218274, "grad_norm": 2.954528570175171, "learning_rate": 4.943227127732212e-06, "loss": 0.6347, "step": 11856 }, { "epoch": 1.5435376805935181, "grad_norm": 2.76151442527771, "learning_rate": 4.9411374235876155e-06, "loss": 0.5226, "step": 11859 }, { "epoch": 1.543928153065209, "grad_norm": 4.05312967300415, "learning_rate": 4.939047729726189e-06, "loss": 0.5568, "step": 11862 }, { "epoch": 1.5443186255368997, "grad_norm": 2.8274898529052734, "learning_rate": 4.936958046512994e-06, "loss": 0.5498, "step": 11865 }, { "epoch": 1.5447090980085902, "grad_norm": 2.768404483795166, "learning_rate": 4.934868374313097e-06, "loss": 0.5259, "step": 11868 }, { "epoch": 1.5450995704802812, "grad_norm": 2.711665153503418, "learning_rate": 4.9327787134915576e-06, "loss": 0.5094, "step": 11871 }, { "epoch": 1.5454900429519718, "grad_norm": 2.3921022415161133, "learning_rate": 4.9306890644134345e-06, "loss": 0.4367, "step": 11874 }, { "epoch": 1.5458805154236628, "grad_norm": 2.8708338737487793, "learning_rate": 4.928599427443788e-06, "loss": 0.5638, "step": 11877 }, { "epoch": 1.5462709878953533, "grad_norm": 2.8968679904937744, "learning_rate": 4.92650980294767e-06, "loss": 0.524, "step": 11880 }, { "epoch": 1.546661460367044, "grad_norm": 3.2613213062286377, "learning_rate": 4.924420191290134e-06, "loss": 0.4995, "step": 11883 }, { "epoch": 1.5470519328387349, "grad_norm": 2.6707489490509033, "learning_rate": 4.922330592836234e-06, "loss": 0.4213, "step": 11886 }, { "epoch": 1.5474424053104257, "grad_norm": 2.552860736846924, "learning_rate": 4.920241007951012e-06, "loss": 0.4674, "step": 11889 }, { "epoch": 1.5478328777821164, "grad_norm": 3.741816282272339, "learning_rate": 4.918151436999521e-06, "loss": 0.4466, "step": 11892 }, { "epoch": 1.548223350253807, "grad_norm": 3.1363577842712402, "learning_rate": 4.916061880346802e-06, "loss": 0.4513, "step": 11895 }, { "epoch": 1.548613822725498, "grad_norm": 3.1754071712493896, "learning_rate": 4.913972338357895e-06, "loss": 0.4901, "step": 11898 }, { "epoch": 1.5490042951971885, "grad_norm": 2.3715884685516357, "learning_rate": 4.91188281139784e-06, "loss": 0.5472, "step": 11901 }, { "epoch": 1.5493947676688793, "grad_norm": 2.7456414699554443, "learning_rate": 4.909793299831673e-06, "loss": 0.5158, "step": 11904 }, { "epoch": 1.54978524014057, "grad_norm": 2.7107367515563965, "learning_rate": 4.907703804024425e-06, "loss": 0.4408, "step": 11907 }, { "epoch": 1.5501757126122608, "grad_norm": 2.649536609649658, "learning_rate": 4.90561432434113e-06, "loss": 0.534, "step": 11910 }, { "epoch": 1.5505661850839516, "grad_norm": 2.4240305423736572, "learning_rate": 4.903524861146814e-06, "loss": 0.49, "step": 11913 }, { "epoch": 1.5509566575556424, "grad_norm": 2.6543915271759033, "learning_rate": 4.901435414806502e-06, "loss": 0.4731, "step": 11916 }, { "epoch": 1.5513471300273332, "grad_norm": 2.658057928085327, "learning_rate": 4.899345985685215e-06, "loss": 0.4885, "step": 11919 }, { "epoch": 1.5517376024990237, "grad_norm": 2.6774919033050537, "learning_rate": 4.8972565741479715e-06, "loss": 0.4726, "step": 11922 }, { "epoch": 1.5521280749707147, "grad_norm": 2.612046718597412, "learning_rate": 4.89516718055979e-06, "loss": 0.4846, "step": 11925 }, { "epoch": 1.5525185474424053, "grad_norm": 2.682743549346924, "learning_rate": 4.893077805285681e-06, "loss": 0.4907, "step": 11928 }, { "epoch": 1.552909019914096, "grad_norm": 2.6726317405700684, "learning_rate": 4.890988448690653e-06, "loss": 0.4273, "step": 11931 }, { "epoch": 1.5532994923857868, "grad_norm": 3.1792590618133545, "learning_rate": 4.8888991111397144e-06, "loss": 0.5008, "step": 11934 }, { "epoch": 1.5536899648574776, "grad_norm": 2.972460985183716, "learning_rate": 4.886809792997868e-06, "loss": 0.534, "step": 11937 }, { "epoch": 1.5540804373291683, "grad_norm": 2.578676223754883, "learning_rate": 4.884720494630113e-06, "loss": 0.5175, "step": 11940 }, { "epoch": 1.554470909800859, "grad_norm": 2.6860499382019043, "learning_rate": 4.882631216401445e-06, "loss": 0.4532, "step": 11943 }, { "epoch": 1.5548613822725499, "grad_norm": 2.5615968704223633, "learning_rate": 4.880541958676856e-06, "loss": 0.4637, "step": 11946 }, { "epoch": 1.5552518547442404, "grad_norm": 2.9076266288757324, "learning_rate": 4.878452721821336e-06, "loss": 0.5384, "step": 11949 }, { "epoch": 1.5556423272159314, "grad_norm": 2.601215124130249, "learning_rate": 4.876363506199869e-06, "loss": 0.4748, "step": 11952 }, { "epoch": 1.556032799687622, "grad_norm": 2.8393771648406982, "learning_rate": 4.874274312177439e-06, "loss": 0.5763, "step": 11955 }, { "epoch": 1.5564232721593128, "grad_norm": 2.3092920780181885, "learning_rate": 4.87218514011902e-06, "loss": 0.4322, "step": 11958 }, { "epoch": 1.5568137446310035, "grad_norm": 2.9448838233947754, "learning_rate": 4.87009599038959e-06, "loss": 0.5118, "step": 11961 }, { "epoch": 1.5572042171026943, "grad_norm": 2.6871285438537598, "learning_rate": 4.868006863354117e-06, "loss": 0.5206, "step": 11964 }, { "epoch": 1.557594689574385, "grad_norm": 2.516818046569824, "learning_rate": 4.865917759377567e-06, "loss": 0.4144, "step": 11967 }, { "epoch": 1.5579851620460756, "grad_norm": 3.410189151763916, "learning_rate": 4.8638286788249025e-06, "loss": 0.4504, "step": 11970 }, { "epoch": 1.5583756345177666, "grad_norm": 3.0182759761810303, "learning_rate": 4.861739622061078e-06, "loss": 0.4726, "step": 11973 }, { "epoch": 1.5587661069894572, "grad_norm": 2.5583255290985107, "learning_rate": 4.859650589451055e-06, "loss": 0.457, "step": 11976 }, { "epoch": 1.559156579461148, "grad_norm": 2.401313304901123, "learning_rate": 4.857561581359777e-06, "loss": 0.5042, "step": 11979 }, { "epoch": 1.5595470519328387, "grad_norm": 2.8148505687713623, "learning_rate": 4.855472598152193e-06, "loss": 0.4324, "step": 11982 }, { "epoch": 1.5599375244045295, "grad_norm": 3.2659788131713867, "learning_rate": 4.8533836401932395e-06, "loss": 0.4854, "step": 11985 }, { "epoch": 1.5603279968762203, "grad_norm": 2.5836613178253174, "learning_rate": 4.851294707847856e-06, "loss": 0.5318, "step": 11988 }, { "epoch": 1.5607184693479108, "grad_norm": 2.268071413040161, "learning_rate": 4.849205801480976e-06, "loss": 0.4897, "step": 11991 }, { "epoch": 1.5611089418196018, "grad_norm": 2.9751930236816406, "learning_rate": 4.847116921457524e-06, "loss": 0.4713, "step": 11994 }, { "epoch": 1.5614994142912924, "grad_norm": 3.3204164505004883, "learning_rate": 4.8450280681424235e-06, "loss": 0.4727, "step": 11997 }, { "epoch": 1.5618898867629833, "grad_norm": 2.7896077632904053, "learning_rate": 4.842939241900595e-06, "loss": 0.5829, "step": 12000 }, { "epoch": 1.562280359234674, "grad_norm": 2.7530436515808105, "learning_rate": 4.84085044309695e-06, "loss": 0.4501, "step": 12003 }, { "epoch": 1.5626708317063647, "grad_norm": 2.5019564628601074, "learning_rate": 4.838761672096398e-06, "loss": 0.4916, "step": 12006 }, { "epoch": 1.5630613041780554, "grad_norm": 2.6398847103118896, "learning_rate": 4.8366729292638425e-06, "loss": 0.5605, "step": 12009 }, { "epoch": 1.5634517766497462, "grad_norm": 3.4240760803222656, "learning_rate": 4.834584214964182e-06, "loss": 0.5437, "step": 12012 }, { "epoch": 1.563842249121437, "grad_norm": 2.8203554153442383, "learning_rate": 4.8324955295623105e-06, "loss": 0.5711, "step": 12015 }, { "epoch": 1.5642327215931275, "grad_norm": 2.9343690872192383, "learning_rate": 4.8304068734231194e-06, "loss": 0.4177, "step": 12018 }, { "epoch": 1.5646231940648185, "grad_norm": 2.7858777046203613, "learning_rate": 4.82831824691149e-06, "loss": 0.4601, "step": 12021 }, { "epoch": 1.565013666536509, "grad_norm": 2.6615116596221924, "learning_rate": 4.826229650392301e-06, "loss": 0.5292, "step": 12024 }, { "epoch": 1.5654041390082, "grad_norm": 2.6517207622528076, "learning_rate": 4.824141084230429e-06, "loss": 0.5592, "step": 12027 }, { "epoch": 1.5657946114798906, "grad_norm": 2.7702314853668213, "learning_rate": 4.822052548790737e-06, "loss": 0.5166, "step": 12030 }, { "epoch": 1.5661850839515814, "grad_norm": 2.714916706085205, "learning_rate": 4.819964044438092e-06, "loss": 0.4259, "step": 12033 }, { "epoch": 1.5665755564232722, "grad_norm": 2.4574501514434814, "learning_rate": 4.81787557153735e-06, "loss": 0.4761, "step": 12036 }, { "epoch": 1.566966028894963, "grad_norm": 2.5279035568237305, "learning_rate": 4.81578713045336e-06, "loss": 0.4604, "step": 12039 }, { "epoch": 1.5673565013666537, "grad_norm": 2.7598211765289307, "learning_rate": 4.813698721550973e-06, "loss": 0.4823, "step": 12042 }, { "epoch": 1.5677469738383443, "grad_norm": 2.556950330734253, "learning_rate": 4.811610345195027e-06, "loss": 0.4942, "step": 12045 }, { "epoch": 1.5681374463100353, "grad_norm": 3.494272470474243, "learning_rate": 4.809522001750358e-06, "loss": 0.505, "step": 12048 }, { "epoch": 1.5685279187817258, "grad_norm": 2.75669002532959, "learning_rate": 4.807433691581793e-06, "loss": 0.486, "step": 12051 }, { "epoch": 1.5689183912534166, "grad_norm": 2.629504680633545, "learning_rate": 4.805345415054158e-06, "loss": 0.4452, "step": 12054 }, { "epoch": 1.5693088637251074, "grad_norm": 2.816953659057617, "learning_rate": 4.803257172532267e-06, "loss": 0.4767, "step": 12057 }, { "epoch": 1.5696993361967981, "grad_norm": 2.689758777618408, "learning_rate": 4.801168964380938e-06, "loss": 0.5014, "step": 12060 }, { "epoch": 1.570089808668489, "grad_norm": 2.534708261489868, "learning_rate": 4.79908079096497e-06, "loss": 0.4778, "step": 12063 }, { "epoch": 1.5704802811401795, "grad_norm": 2.8629817962646484, "learning_rate": 4.796992652649166e-06, "loss": 0.5156, "step": 12066 }, { "epoch": 1.5708707536118705, "grad_norm": 3.1996259689331055, "learning_rate": 4.794904549798319e-06, "loss": 0.4859, "step": 12069 }, { "epoch": 1.571261226083561, "grad_norm": 2.5046546459198, "learning_rate": 4.792816482777216e-06, "loss": 0.5298, "step": 12072 }, { "epoch": 1.571651698555252, "grad_norm": 2.7203421592712402, "learning_rate": 4.790728451950636e-06, "loss": 0.5006, "step": 12075 }, { "epoch": 1.5720421710269425, "grad_norm": 2.6201863288879395, "learning_rate": 4.7886404576833564e-06, "loss": 0.5196, "step": 12078 }, { "epoch": 1.5724326434986333, "grad_norm": 2.636479377746582, "learning_rate": 4.786552500340144e-06, "loss": 0.4408, "step": 12081 }, { "epoch": 1.572823115970324, "grad_norm": 2.937016487121582, "learning_rate": 4.784464580285761e-06, "loss": 0.5735, "step": 12084 }, { "epoch": 1.5732135884420149, "grad_norm": 2.6083309650421143, "learning_rate": 4.782376697884962e-06, "loss": 0.5457, "step": 12087 }, { "epoch": 1.5736040609137056, "grad_norm": 2.875025987625122, "learning_rate": 4.780288853502496e-06, "loss": 0.5335, "step": 12090 }, { "epoch": 1.5739945333853962, "grad_norm": 2.6229593753814697, "learning_rate": 4.778201047503106e-06, "loss": 0.5258, "step": 12093 }, { "epoch": 1.5743850058570872, "grad_norm": 2.5524308681488037, "learning_rate": 4.776113280251525e-06, "loss": 0.4433, "step": 12096 }, { "epoch": 1.5747754783287777, "grad_norm": 2.6454172134399414, "learning_rate": 4.7740255521124825e-06, "loss": 0.5138, "step": 12099 }, { "epoch": 1.5751659508004687, "grad_norm": 2.7520358562469482, "learning_rate": 4.771937863450701e-06, "loss": 0.5128, "step": 12102 }, { "epoch": 1.5755564232721593, "grad_norm": 2.627397060394287, "learning_rate": 4.769850214630897e-06, "loss": 0.5503, "step": 12105 }, { "epoch": 1.57594689574385, "grad_norm": 2.6487503051757812, "learning_rate": 4.767762606017775e-06, "loss": 0.4488, "step": 12108 }, { "epoch": 1.5763373682155408, "grad_norm": 2.3210136890411377, "learning_rate": 4.765675037976038e-06, "loss": 0.4593, "step": 12111 }, { "epoch": 1.5767278406872316, "grad_norm": 2.7287368774414062, "learning_rate": 4.763587510870377e-06, "loss": 0.5627, "step": 12114 }, { "epoch": 1.5771183131589224, "grad_norm": 2.7396085262298584, "learning_rate": 4.761500025065482e-06, "loss": 0.4773, "step": 12117 }, { "epoch": 1.577508785630613, "grad_norm": 2.732445478439331, "learning_rate": 4.7594125809260315e-06, "loss": 0.4791, "step": 12120 }, { "epoch": 1.577899258102304, "grad_norm": 2.82453989982605, "learning_rate": 4.7573251788166954e-06, "loss": 0.5459, "step": 12123 }, { "epoch": 1.5782897305739945, "grad_norm": 2.6506190299987793, "learning_rate": 4.755237819102141e-06, "loss": 0.4923, "step": 12126 }, { "epoch": 1.5786802030456852, "grad_norm": 2.771843910217285, "learning_rate": 4.7531505021470245e-06, "loss": 0.5828, "step": 12129 }, { "epoch": 1.579070675517376, "grad_norm": 2.5433382987976074, "learning_rate": 4.751063228315996e-06, "loss": 0.5142, "step": 12132 }, { "epoch": 1.5794611479890668, "grad_norm": 2.8748183250427246, "learning_rate": 4.748975997973698e-06, "loss": 0.5367, "step": 12135 }, { "epoch": 1.5798516204607576, "grad_norm": 2.996832847595215, "learning_rate": 4.746888811484765e-06, "loss": 0.4731, "step": 12138 }, { "epoch": 1.580242092932448, "grad_norm": 2.554917335510254, "learning_rate": 4.744801669213822e-06, "loss": 0.4605, "step": 12141 }, { "epoch": 1.580632565404139, "grad_norm": 2.709306240081787, "learning_rate": 4.742714571525492e-06, "loss": 0.5183, "step": 12144 }, { "epoch": 1.5810230378758297, "grad_norm": 3.325532913208008, "learning_rate": 4.740627518784387e-06, "loss": 0.4707, "step": 12147 }, { "epoch": 1.5814135103475206, "grad_norm": 2.7227187156677246, "learning_rate": 4.738540511355107e-06, "loss": 0.4803, "step": 12150 }, { "epoch": 1.5818039828192112, "grad_norm": 2.8616886138916016, "learning_rate": 4.736453549602249e-06, "loss": 0.5472, "step": 12153 }, { "epoch": 1.582194455290902, "grad_norm": 2.477766513824463, "learning_rate": 4.734366633890404e-06, "loss": 0.5774, "step": 12156 }, { "epoch": 1.5825849277625927, "grad_norm": 2.6389472484588623, "learning_rate": 4.732279764584148e-06, "loss": 0.4967, "step": 12159 }, { "epoch": 1.5829754002342835, "grad_norm": 3.282442569732666, "learning_rate": 4.730192942048054e-06, "loss": 0.4934, "step": 12162 }, { "epoch": 1.5833658727059743, "grad_norm": 2.8340752124786377, "learning_rate": 4.7281061666466845e-06, "loss": 0.5129, "step": 12165 }, { "epoch": 1.5837563451776648, "grad_norm": 2.659324884414673, "learning_rate": 4.726019438744596e-06, "loss": 0.4756, "step": 12168 }, { "epoch": 1.5841468176493558, "grad_norm": 2.673013925552368, "learning_rate": 4.723932758706337e-06, "loss": 0.4839, "step": 12171 }, { "epoch": 1.5845372901210464, "grad_norm": 2.854623556137085, "learning_rate": 4.721846126896442e-06, "loss": 0.5232, "step": 12174 }, { "epoch": 1.5849277625927374, "grad_norm": 2.7628884315490723, "learning_rate": 4.7197595436794445e-06, "loss": 0.4909, "step": 12177 }, { "epoch": 1.585318235064428, "grad_norm": 2.850160598754883, "learning_rate": 4.717673009419865e-06, "loss": 0.4582, "step": 12180 }, { "epoch": 1.5857087075361187, "grad_norm": 3.7766642570495605, "learning_rate": 4.715586524482216e-06, "loss": 0.4465, "step": 12183 }, { "epoch": 1.5860991800078095, "grad_norm": 2.8595075607299805, "learning_rate": 4.7135000892310025e-06, "loss": 0.4837, "step": 12186 }, { "epoch": 1.5864896524795002, "grad_norm": 2.8177318572998047, "learning_rate": 4.711413704030722e-06, "loss": 0.4759, "step": 12189 }, { "epoch": 1.586880124951191, "grad_norm": 2.634204149246216, "learning_rate": 4.709327369245861e-06, "loss": 0.4633, "step": 12192 }, { "epoch": 1.5872705974228816, "grad_norm": 2.40295147895813, "learning_rate": 4.7072410852408965e-06, "loss": 0.499, "step": 12195 }, { "epoch": 1.5876610698945726, "grad_norm": 3.038196325302124, "learning_rate": 4.705154852380299e-06, "loss": 0.4878, "step": 12198 }, { "epoch": 1.5880515423662631, "grad_norm": 2.4882776737213135, "learning_rate": 4.7030686710285275e-06, "loss": 0.4522, "step": 12201 }, { "epoch": 1.5884420148379539, "grad_norm": 3.963737964630127, "learning_rate": 4.700982541550034e-06, "loss": 0.5776, "step": 12204 }, { "epoch": 1.5888324873096447, "grad_norm": 2.4971764087677, "learning_rate": 4.6988964643092635e-06, "loss": 0.4437, "step": 12207 }, { "epoch": 1.5892229597813354, "grad_norm": 2.5991008281707764, "learning_rate": 4.696810439670645e-06, "loss": 0.4736, "step": 12210 }, { "epoch": 1.5896134322530262, "grad_norm": 3.21665620803833, "learning_rate": 4.694724467998607e-06, "loss": 0.4886, "step": 12213 }, { "epoch": 1.5900039047247168, "grad_norm": 2.903533458709717, "learning_rate": 4.692638549657561e-06, "loss": 0.4829, "step": 12216 }, { "epoch": 1.5903943771964077, "grad_norm": 2.9863076210021973, "learning_rate": 4.690552685011913e-06, "loss": 0.5482, "step": 12219 }, { "epoch": 1.5907848496680983, "grad_norm": 2.6374809741973877, "learning_rate": 4.688466874426062e-06, "loss": 0.5568, "step": 12222 }, { "epoch": 1.5911753221397893, "grad_norm": 3.212864398956299, "learning_rate": 4.68638111826439e-06, "loss": 0.4724, "step": 12225 }, { "epoch": 1.5915657946114798, "grad_norm": 2.7329304218292236, "learning_rate": 4.684295416891278e-06, "loss": 0.5167, "step": 12228 }, { "epoch": 1.5919562670831706, "grad_norm": 2.6516621112823486, "learning_rate": 4.6822097706710935e-06, "loss": 0.519, "step": 12231 }, { "epoch": 1.5923467395548614, "grad_norm": 2.7666425704956055, "learning_rate": 4.680124179968193e-06, "loss": 0.5548, "step": 12234 }, { "epoch": 1.5927372120265522, "grad_norm": 2.8573389053344727, "learning_rate": 4.678038645146926e-06, "loss": 0.5459, "step": 12237 }, { "epoch": 1.593127684498243, "grad_norm": 2.5972604751586914, "learning_rate": 4.67595316657163e-06, "loss": 0.4768, "step": 12240 }, { "epoch": 1.5935181569699335, "grad_norm": 3.3133761882781982, "learning_rate": 4.673867744606633e-06, "loss": 0.4992, "step": 12243 }, { "epoch": 1.5939086294416245, "grad_norm": 2.886195659637451, "learning_rate": 4.671782379616256e-06, "loss": 0.5065, "step": 12246 }, { "epoch": 1.594299101913315, "grad_norm": 2.5426437854766846, "learning_rate": 4.669697071964807e-06, "loss": 0.4161, "step": 12249 }, { "epoch": 1.5946895743850058, "grad_norm": 3.0730109214782715, "learning_rate": 4.667611822016584e-06, "loss": 0.6377, "step": 12252 }, { "epoch": 1.5950800468566966, "grad_norm": 2.649171829223633, "learning_rate": 4.665526630135877e-06, "loss": 0.5299, "step": 12255 }, { "epoch": 1.5954705193283873, "grad_norm": 2.3111774921417236, "learning_rate": 4.663441496686964e-06, "loss": 0.4358, "step": 12258 }, { "epoch": 1.5958609918000781, "grad_norm": 3.4514715671539307, "learning_rate": 4.661356422034113e-06, "loss": 0.572, "step": 12261 }, { "epoch": 1.596251464271769, "grad_norm": 2.392629623413086, "learning_rate": 4.659271406541584e-06, "loss": 0.5273, "step": 12264 }, { "epoch": 1.5966419367434597, "grad_norm": 2.7930562496185303, "learning_rate": 4.65718645057362e-06, "loss": 0.483, "step": 12267 }, { "epoch": 1.5970324092151502, "grad_norm": 2.5676727294921875, "learning_rate": 4.655101554494465e-06, "loss": 0.4614, "step": 12270 }, { "epoch": 1.5974228816868412, "grad_norm": 2.318683385848999, "learning_rate": 4.653016718668342e-06, "loss": 0.4482, "step": 12273 }, { "epoch": 1.5978133541585318, "grad_norm": 2.654034376144409, "learning_rate": 4.650931943459469e-06, "loss": 0.4685, "step": 12276 }, { "epoch": 1.5982038266302225, "grad_norm": 2.6656546592712402, "learning_rate": 4.64884722923205e-06, "loss": 0.5102, "step": 12279 }, { "epoch": 1.5985942991019133, "grad_norm": 2.5751311779022217, "learning_rate": 4.646762576350282e-06, "loss": 0.4802, "step": 12282 }, { "epoch": 1.598984771573604, "grad_norm": 2.3519175052642822, "learning_rate": 4.644677985178349e-06, "loss": 0.468, "step": 12285 }, { "epoch": 1.5993752440452949, "grad_norm": 2.790675401687622, "learning_rate": 4.642593456080425e-06, "loss": 0.6174, "step": 12288 }, { "epoch": 1.5997657165169854, "grad_norm": 2.661125898361206, "learning_rate": 4.640508989420672e-06, "loss": 0.4254, "step": 12291 }, { "epoch": 1.6001561889886764, "grad_norm": 2.642672300338745, "learning_rate": 4.638424585563241e-06, "loss": 0.4429, "step": 12294 }, { "epoch": 1.600546661460367, "grad_norm": 2.7480437755584717, "learning_rate": 4.636340244872275e-06, "loss": 0.5078, "step": 12297 }, { "epoch": 1.600937133932058, "grad_norm": 3.16561222076416, "learning_rate": 4.634255967711905e-06, "loss": 0.5383, "step": 12300 }, { "epoch": 1.6013276064037485, "grad_norm": 2.9352433681488037, "learning_rate": 4.632171754446246e-06, "loss": 0.4734, "step": 12303 }, { "epoch": 1.6017180788754393, "grad_norm": 2.6060163974761963, "learning_rate": 4.630087605439407e-06, "loss": 0.466, "step": 12306 }, { "epoch": 1.60210855134713, "grad_norm": 2.827497720718384, "learning_rate": 4.628003521055486e-06, "loss": 0.4221, "step": 12309 }, { "epoch": 1.6024990238188208, "grad_norm": 2.8147799968719482, "learning_rate": 4.625919501658568e-06, "loss": 0.4873, "step": 12312 }, { "epoch": 1.6028894962905116, "grad_norm": 3.2619431018829346, "learning_rate": 4.623835547612726e-06, "loss": 0.4946, "step": 12315 }, { "epoch": 1.6032799687622021, "grad_norm": 3.4461402893066406, "learning_rate": 4.621751659282021e-06, "loss": 0.5536, "step": 12318 }, { "epoch": 1.6036704412338931, "grad_norm": 2.9235973358154297, "learning_rate": 4.619667837030508e-06, "loss": 0.5901, "step": 12321 }, { "epoch": 1.6040609137055837, "grad_norm": 2.7743449211120605, "learning_rate": 4.6175840812222214e-06, "loss": 0.4504, "step": 12324 }, { "epoch": 1.6044513861772745, "grad_norm": 3.0675461292266846, "learning_rate": 4.615500392221193e-06, "loss": 0.5658, "step": 12327 }, { "epoch": 1.6048418586489652, "grad_norm": 2.25282883644104, "learning_rate": 4.613416770391437e-06, "loss": 0.4366, "step": 12330 }, { "epoch": 1.605232331120656, "grad_norm": 2.5219714641571045, "learning_rate": 4.611333216096957e-06, "loss": 0.574, "step": 12333 }, { "epoch": 1.6056228035923468, "grad_norm": 2.765669345855713, "learning_rate": 4.6092497297017475e-06, "loss": 0.5322, "step": 12336 }, { "epoch": 1.6060132760640373, "grad_norm": 2.6073596477508545, "learning_rate": 4.607166311569787e-06, "loss": 0.4499, "step": 12339 }, { "epoch": 1.6064037485357283, "grad_norm": 2.747248411178589, "learning_rate": 4.605082962065047e-06, "loss": 0.4734, "step": 12342 }, { "epoch": 1.6067942210074189, "grad_norm": 3.6643686294555664, "learning_rate": 4.602999681551482e-06, "loss": 0.4507, "step": 12345 }, { "epoch": 1.6071846934791099, "grad_norm": 2.712887763977051, "learning_rate": 4.600916470393037e-06, "loss": 0.5074, "step": 12348 }, { "epoch": 1.6075751659508004, "grad_norm": 2.606999635696411, "learning_rate": 4.5988333289536444e-06, "loss": 0.4703, "step": 12351 }, { "epoch": 1.6079656384224912, "grad_norm": 3.723257064819336, "learning_rate": 4.596750257597227e-06, "loss": 0.5086, "step": 12354 }, { "epoch": 1.608356110894182, "grad_norm": 3.431612253189087, "learning_rate": 4.59466725668769e-06, "loss": 0.5268, "step": 12357 }, { "epoch": 1.6087465833658727, "grad_norm": 2.8018877506256104, "learning_rate": 4.592584326588931e-06, "loss": 0.5083, "step": 12360 }, { "epoch": 1.6091370558375635, "grad_norm": 2.578345775604248, "learning_rate": 4.590501467664834e-06, "loss": 0.4861, "step": 12363 }, { "epoch": 1.609527528309254, "grad_norm": 2.7130467891693115, "learning_rate": 4.588418680279268e-06, "loss": 0.5426, "step": 12366 }, { "epoch": 1.609918000780945, "grad_norm": 2.725135564804077, "learning_rate": 4.5863359647960924e-06, "loss": 0.4927, "step": 12369 }, { "epoch": 1.6103084732526356, "grad_norm": 2.6991970539093018, "learning_rate": 4.584253321579155e-06, "loss": 0.5302, "step": 12372 }, { "epoch": 1.6106989457243266, "grad_norm": 2.4623939990997314, "learning_rate": 4.582170750992287e-06, "loss": 0.3889, "step": 12375 }, { "epoch": 1.6110894181960171, "grad_norm": 2.5120255947113037, "learning_rate": 4.580088253399311e-06, "loss": 0.427, "step": 12378 }, { "epoch": 1.611479890667708, "grad_norm": 2.3307392597198486, "learning_rate": 4.578005829164032e-06, "loss": 0.5037, "step": 12381 }, { "epoch": 1.6118703631393987, "grad_norm": 2.7387290000915527, "learning_rate": 4.575923478650246e-06, "loss": 0.4973, "step": 12384 }, { "epoch": 1.6122608356110895, "grad_norm": 2.8055717945098877, "learning_rate": 4.573841202221739e-06, "loss": 0.5121, "step": 12387 }, { "epoch": 1.6126513080827802, "grad_norm": 3.864004373550415, "learning_rate": 4.5717590002422755e-06, "loss": 0.5525, "step": 12390 }, { "epoch": 1.6130417805544708, "grad_norm": 2.713977336883545, "learning_rate": 4.569676873075613e-06, "loss": 0.5923, "step": 12393 }, { "epoch": 1.6134322530261618, "grad_norm": 2.7594332695007324, "learning_rate": 4.567594821085497e-06, "loss": 0.5122, "step": 12396 }, { "epoch": 1.6138227254978523, "grad_norm": 2.3967032432556152, "learning_rate": 4.565512844635657e-06, "loss": 0.4932, "step": 12399 }, { "epoch": 1.614213197969543, "grad_norm": 2.6698012351989746, "learning_rate": 4.563430944089807e-06, "loss": 0.457, "step": 12402 }, { "epoch": 1.6146036704412339, "grad_norm": 2.7440128326416016, "learning_rate": 4.561349119811655e-06, "loss": 0.4945, "step": 12405 }, { "epoch": 1.6149941429129246, "grad_norm": 2.666889190673828, "learning_rate": 4.559267372164886e-06, "loss": 0.5029, "step": 12408 }, { "epoch": 1.6153846153846154, "grad_norm": 2.826246738433838, "learning_rate": 4.557185701513182e-06, "loss": 0.4937, "step": 12411 }, { "epoch": 1.615775087856306, "grad_norm": 2.5194458961486816, "learning_rate": 4.555104108220205e-06, "loss": 0.4776, "step": 12414 }, { "epoch": 1.616165560327997, "grad_norm": 2.725764274597168, "learning_rate": 4.553022592649602e-06, "loss": 0.5434, "step": 12417 }, { "epoch": 1.6165560327996875, "grad_norm": 2.692974805831909, "learning_rate": 4.550941155165015e-06, "loss": 0.518, "step": 12420 }, { "epoch": 1.6169465052713785, "grad_norm": 2.79667329788208, "learning_rate": 4.548859796130061e-06, "loss": 0.5053, "step": 12423 }, { "epoch": 1.617336977743069, "grad_norm": 2.6578612327575684, "learning_rate": 4.546778515908352e-06, "loss": 0.4535, "step": 12426 }, { "epoch": 1.6177274502147598, "grad_norm": 2.6429173946380615, "learning_rate": 4.544697314863483e-06, "loss": 0.4714, "step": 12429 }, { "epoch": 1.6181179226864506, "grad_norm": 2.6964876651763916, "learning_rate": 4.542616193359035e-06, "loss": 0.5593, "step": 12432 }, { "epoch": 1.6185083951581414, "grad_norm": 2.86681866645813, "learning_rate": 4.540535151758575e-06, "loss": 0.5782, "step": 12435 }, { "epoch": 1.6188988676298322, "grad_norm": 4.218873977661133, "learning_rate": 4.538454190425658e-06, "loss": 0.5248, "step": 12438 }, { "epoch": 1.6192893401015227, "grad_norm": 4.104538917541504, "learning_rate": 4.5363733097238235e-06, "loss": 0.5671, "step": 12441 }, { "epoch": 1.6196798125732137, "grad_norm": 2.703684091567993, "learning_rate": 4.534292510016597e-06, "loss": 0.522, "step": 12444 }, { "epoch": 1.6200702850449042, "grad_norm": 3.4295432567596436, "learning_rate": 4.532211791667488e-06, "loss": 0.5239, "step": 12447 }, { "epoch": 1.6204607575165952, "grad_norm": 2.69512939453125, "learning_rate": 4.5301311550399966e-06, "loss": 0.4816, "step": 12450 }, { "epoch": 1.6208512299882858, "grad_norm": 2.700019121170044, "learning_rate": 4.5280506004976025e-06, "loss": 0.4808, "step": 12453 }, { "epoch": 1.6212417024599766, "grad_norm": 2.682190179824829, "learning_rate": 4.525970128403777e-06, "loss": 0.5554, "step": 12456 }, { "epoch": 1.6216321749316673, "grad_norm": 2.4642701148986816, "learning_rate": 4.523889739121971e-06, "loss": 0.4808, "step": 12459 }, { "epoch": 1.622022647403358, "grad_norm": 2.6819307804107666, "learning_rate": 4.521809433015627e-06, "loss": 0.5932, "step": 12462 }, { "epoch": 1.6224131198750489, "grad_norm": 2.5704028606414795, "learning_rate": 4.51972921044817e-06, "loss": 0.5527, "step": 12465 }, { "epoch": 1.6228035923467394, "grad_norm": 2.388164758682251, "learning_rate": 4.517649071783008e-06, "loss": 0.5643, "step": 12468 }, { "epoch": 1.6231940648184304, "grad_norm": 2.3138227462768555, "learning_rate": 4.51556901738354e-06, "loss": 0.4487, "step": 12471 }, { "epoch": 1.623584537290121, "grad_norm": 2.4271912574768066, "learning_rate": 4.513489047613144e-06, "loss": 0.465, "step": 12474 }, { "epoch": 1.6239750097618117, "grad_norm": 2.532554864883423, "learning_rate": 4.511409162835188e-06, "loss": 0.5073, "step": 12477 }, { "epoch": 1.6243654822335025, "grad_norm": 2.848936080932617, "learning_rate": 4.509329363413023e-06, "loss": 0.4336, "step": 12480 }, { "epoch": 1.6247559547051933, "grad_norm": 2.899810791015625, "learning_rate": 4.507249649709988e-06, "loss": 0.5284, "step": 12483 }, { "epoch": 1.625146427176884, "grad_norm": 2.649650812149048, "learning_rate": 4.505170022089401e-06, "loss": 0.506, "step": 12486 }, { "epoch": 1.6255368996485746, "grad_norm": 2.40224552154541, "learning_rate": 4.503090480914571e-06, "loss": 0.4498, "step": 12489 }, { "epoch": 1.6259273721202656, "grad_norm": 2.5746147632598877, "learning_rate": 4.501011026548789e-06, "loss": 0.5017, "step": 12492 }, { "epoch": 1.6263178445919562, "grad_norm": 2.6533167362213135, "learning_rate": 4.49893165935533e-06, "loss": 0.5273, "step": 12495 }, { "epoch": 1.6267083170636472, "grad_norm": 4.123754501342773, "learning_rate": 4.496852379697456e-06, "loss": 0.5213, "step": 12498 }, { "epoch": 1.6270987895353377, "grad_norm": 3.875533103942871, "learning_rate": 4.494773187938413e-06, "loss": 0.5422, "step": 12501 }, { "epoch": 1.6274892620070285, "grad_norm": 3.554651975631714, "learning_rate": 4.4926940844414316e-06, "loss": 0.4757, "step": 12504 }, { "epoch": 1.6278797344787193, "grad_norm": 2.468736171722412, "learning_rate": 4.490615069569727e-06, "loss": 0.5176, "step": 12507 }, { "epoch": 1.62827020695041, "grad_norm": 3.26057767868042, "learning_rate": 4.488536143686497e-06, "loss": 0.522, "step": 12510 }, { "epoch": 1.6286606794221008, "grad_norm": 2.696108102798462, "learning_rate": 4.486457307154927e-06, "loss": 0.5466, "step": 12513 }, { "epoch": 1.6290511518937913, "grad_norm": 2.7262580394744873, "learning_rate": 4.4843785603381855e-06, "loss": 0.5753, "step": 12516 }, { "epoch": 1.6294416243654823, "grad_norm": 2.864877462387085, "learning_rate": 4.482299903599424e-06, "loss": 0.457, "step": 12519 }, { "epoch": 1.629832096837173, "grad_norm": 2.8265585899353027, "learning_rate": 4.48022133730178e-06, "loss": 0.5091, "step": 12522 }, { "epoch": 1.630222569308864, "grad_norm": 3.3130998611450195, "learning_rate": 4.478142861808375e-06, "loss": 0.4634, "step": 12525 }, { "epoch": 1.6306130417805544, "grad_norm": 2.6155717372894287, "learning_rate": 4.476064477482316e-06, "loss": 0.4637, "step": 12528 }, { "epoch": 1.6310035142522452, "grad_norm": 2.8541154861450195, "learning_rate": 4.4739861846866885e-06, "loss": 0.5217, "step": 12531 }, { "epoch": 1.631393986723936, "grad_norm": 2.5578043460845947, "learning_rate": 4.47190798378457e-06, "loss": 0.5005, "step": 12534 }, { "epoch": 1.6317844591956268, "grad_norm": 3.508741617202759, "learning_rate": 4.469829875139014e-06, "loss": 0.512, "step": 12537 }, { "epoch": 1.6321749316673175, "grad_norm": 2.5680432319641113, "learning_rate": 4.467751859113064e-06, "loss": 0.5627, "step": 12540 }, { "epoch": 1.632565404139008, "grad_norm": 2.6496567726135254, "learning_rate": 4.465673936069746e-06, "loss": 0.4628, "step": 12543 }, { "epoch": 1.632955876610699, "grad_norm": 2.5644404888153076, "learning_rate": 4.463596106372066e-06, "loss": 0.5009, "step": 12546 }, { "epoch": 1.6333463490823896, "grad_norm": 2.2369980812072754, "learning_rate": 4.461518370383017e-06, "loss": 0.4671, "step": 12549 }, { "epoch": 1.6337368215540804, "grad_norm": 2.7351465225219727, "learning_rate": 4.459440728465578e-06, "loss": 0.5614, "step": 12552 }, { "epoch": 1.6341272940257712, "grad_norm": 2.928203821182251, "learning_rate": 4.4573631809827045e-06, "loss": 0.5391, "step": 12555 }, { "epoch": 1.634517766497462, "grad_norm": 2.912785291671753, "learning_rate": 4.4552857282973435e-06, "loss": 0.4928, "step": 12558 }, { "epoch": 1.6349082389691527, "grad_norm": 3.1030282974243164, "learning_rate": 4.453208370772417e-06, "loss": 0.5712, "step": 12561 }, { "epoch": 1.6352987114408433, "grad_norm": 2.6347246170043945, "learning_rate": 4.45113110877084e-06, "loss": 0.5575, "step": 12564 }, { "epoch": 1.6356891839125343, "grad_norm": 2.7265217304229736, "learning_rate": 4.449053942655503e-06, "loss": 0.5683, "step": 12567 }, { "epoch": 1.6360796563842248, "grad_norm": 2.5423009395599365, "learning_rate": 4.446976872789284e-06, "loss": 0.4451, "step": 12570 }, { "epoch": 1.6364701288559158, "grad_norm": 2.4595041275024414, "learning_rate": 4.444899899535042e-06, "loss": 0.4713, "step": 12573 }, { "epoch": 1.6368606013276064, "grad_norm": 2.64188814163208, "learning_rate": 4.442823023255619e-06, "loss": 0.4481, "step": 12576 }, { "epoch": 1.6372510737992971, "grad_norm": 2.5329110622406006, "learning_rate": 4.440746244313842e-06, "loss": 0.5019, "step": 12579 }, { "epoch": 1.637641546270988, "grad_norm": 2.4973838329315186, "learning_rate": 4.43866956307252e-06, "loss": 0.4662, "step": 12582 }, { "epoch": 1.6380320187426787, "grad_norm": 2.651660203933716, "learning_rate": 4.436592979894445e-06, "loss": 0.4375, "step": 12585 }, { "epoch": 1.6384224912143694, "grad_norm": 2.4576289653778076, "learning_rate": 4.4345164951423895e-06, "loss": 0.4207, "step": 12588 }, { "epoch": 1.63881296368606, "grad_norm": 3.1014697551727295, "learning_rate": 4.432440109179113e-06, "loss": 0.5241, "step": 12591 }, { "epoch": 1.639203436157751, "grad_norm": 2.6351943016052246, "learning_rate": 4.430363822367357e-06, "loss": 0.5933, "step": 12594 }, { "epoch": 1.6395939086294415, "grad_norm": 2.748256206512451, "learning_rate": 4.428287635069841e-06, "loss": 0.5146, "step": 12597 }, { "epoch": 1.6399843811011323, "grad_norm": 2.712522506713867, "learning_rate": 4.426211547649274e-06, "loss": 0.5062, "step": 12600 }, { "epoch": 1.640374853572823, "grad_norm": 2.814810276031494, "learning_rate": 4.42413556046834e-06, "loss": 0.5229, "step": 12603 }, { "epoch": 1.6407653260445139, "grad_norm": 2.7721574306488037, "learning_rate": 4.422059673889714e-06, "loss": 0.4604, "step": 12606 }, { "epoch": 1.6411557985162046, "grad_norm": 2.700059175491333, "learning_rate": 4.419983888276047e-06, "loss": 0.5371, "step": 12609 }, { "epoch": 1.6415462709878954, "grad_norm": 2.785081624984741, "learning_rate": 4.417908203989975e-06, "loss": 0.5373, "step": 12612 }, { "epoch": 1.6419367434595862, "grad_norm": 2.8077304363250732, "learning_rate": 4.415832621394116e-06, "loss": 0.5162, "step": 12615 }, { "epoch": 1.6423272159312767, "grad_norm": 2.6027612686157227, "learning_rate": 4.413757140851067e-06, "loss": 0.5698, "step": 12618 }, { "epoch": 1.6427176884029677, "grad_norm": 2.704460620880127, "learning_rate": 4.411681762723415e-06, "loss": 0.476, "step": 12621 }, { "epoch": 1.6431081608746583, "grad_norm": 2.906745672225952, "learning_rate": 4.409606487373718e-06, "loss": 0.4695, "step": 12624 }, { "epoch": 1.643498633346349, "grad_norm": 2.890692710876465, "learning_rate": 4.407531315164527e-06, "loss": 0.5786, "step": 12627 }, { "epoch": 1.6438891058180398, "grad_norm": 2.488058567047119, "learning_rate": 4.4054562464583705e-06, "loss": 0.4332, "step": 12630 }, { "epoch": 1.6442795782897306, "grad_norm": 2.7957355976104736, "learning_rate": 4.403381281617755e-06, "loss": 0.4859, "step": 12633 }, { "epoch": 1.6446700507614214, "grad_norm": 2.960820436477661, "learning_rate": 4.401306421005176e-06, "loss": 0.4931, "step": 12636 }, { "epoch": 1.645060523233112, "grad_norm": 2.780045986175537, "learning_rate": 4.399231664983104e-06, "loss": 0.5052, "step": 12639 }, { "epoch": 1.645450995704803, "grad_norm": 2.800703525543213, "learning_rate": 4.3971570139139975e-06, "loss": 0.57, "step": 12642 }, { "epoch": 1.6458414681764935, "grad_norm": 3.8522229194641113, "learning_rate": 4.395082468160291e-06, "loss": 0.5197, "step": 12645 }, { "epoch": 1.6462319406481845, "grad_norm": 2.728604793548584, "learning_rate": 4.393008028084407e-06, "loss": 0.4438, "step": 12648 }, { "epoch": 1.646622413119875, "grad_norm": 2.7166476249694824, "learning_rate": 4.390933694048742e-06, "loss": 0.5547, "step": 12651 }, { "epoch": 1.6470128855915658, "grad_norm": 2.5482025146484375, "learning_rate": 4.3888594664156795e-06, "loss": 0.5274, "step": 12654 }, { "epoch": 1.6474033580632566, "grad_norm": 3.0686612129211426, "learning_rate": 4.386785345547584e-06, "loss": 0.5536, "step": 12657 }, { "epoch": 1.6477938305349473, "grad_norm": 2.8095195293426514, "learning_rate": 4.384711331806797e-06, "loss": 0.4901, "step": 12660 }, { "epoch": 1.648184303006638, "grad_norm": 2.81028151512146, "learning_rate": 4.3826374255556476e-06, "loss": 0.515, "step": 12663 }, { "epoch": 1.6485747754783286, "grad_norm": 2.4701220989227295, "learning_rate": 4.38056362715644e-06, "loss": 0.5399, "step": 12666 }, { "epoch": 1.6489652479500196, "grad_norm": 2.5714948177337646, "learning_rate": 4.378489936971463e-06, "loss": 0.5045, "step": 12669 }, { "epoch": 1.6493557204217102, "grad_norm": 2.7451059818267822, "learning_rate": 4.376416355362989e-06, "loss": 0.4809, "step": 12672 }, { "epoch": 1.649746192893401, "grad_norm": 2.625004529953003, "learning_rate": 4.3743428826932635e-06, "loss": 0.5552, "step": 12675 }, { "epoch": 1.6501366653650917, "grad_norm": 2.387716770172119, "learning_rate": 4.37226951932452e-06, "loss": 0.4466, "step": 12678 }, { "epoch": 1.6505271378367825, "grad_norm": 2.606682777404785, "learning_rate": 4.370196265618973e-06, "loss": 0.5303, "step": 12681 }, { "epoch": 1.6509176103084733, "grad_norm": 2.3871326446533203, "learning_rate": 4.368123121938812e-06, "loss": 0.4816, "step": 12684 }, { "epoch": 1.6513080827801638, "grad_norm": 3.2255170345306396, "learning_rate": 4.3660500886462105e-06, "loss": 0.5146, "step": 12687 }, { "epoch": 1.6516985552518548, "grad_norm": 2.7184829711914062, "learning_rate": 4.3639771661033275e-06, "loss": 0.5417, "step": 12690 }, { "epoch": 1.6520890277235454, "grad_norm": 3.5075788497924805, "learning_rate": 4.361904354672296e-06, "loss": 0.4598, "step": 12693 }, { "epoch": 1.6524795001952364, "grad_norm": 2.910029649734497, "learning_rate": 4.3598316547152295e-06, "loss": 0.5129, "step": 12696 }, { "epoch": 1.652869972666927, "grad_norm": 2.9071853160858154, "learning_rate": 4.357759066594228e-06, "loss": 0.5485, "step": 12699 }, { "epoch": 1.6532604451386177, "grad_norm": 2.824410915374756, "learning_rate": 4.3556865906713654e-06, "loss": 0.5358, "step": 12702 }, { "epoch": 1.6536509176103085, "grad_norm": 2.680872678756714, "learning_rate": 4.3536142273087005e-06, "loss": 0.5531, "step": 12705 }, { "epoch": 1.6540413900819992, "grad_norm": 2.904576063156128, "learning_rate": 4.351541976868271e-06, "loss": 0.523, "step": 12708 }, { "epoch": 1.65443186255369, "grad_norm": 2.6715736389160156, "learning_rate": 4.349469839712093e-06, "loss": 0.4883, "step": 12711 }, { "epoch": 1.6548223350253806, "grad_norm": 2.5224480628967285, "learning_rate": 4.347397816202165e-06, "loss": 0.509, "step": 12714 }, { "epoch": 1.6552128074970716, "grad_norm": 3.416038990020752, "learning_rate": 4.345325906700467e-06, "loss": 0.4453, "step": 12717 }, { "epoch": 1.655603279968762, "grad_norm": 2.9145963191986084, "learning_rate": 4.343254111568954e-06, "loss": 0.5879, "step": 12720 }, { "epoch": 1.655993752440453, "grad_norm": 3.9935879707336426, "learning_rate": 4.341182431169568e-06, "loss": 0.5139, "step": 12723 }, { "epoch": 1.6563842249121437, "grad_norm": 4.118599891662598, "learning_rate": 4.339110865864225e-06, "loss": 0.5033, "step": 12726 }, { "epoch": 1.6567746973838344, "grad_norm": 2.687406301498413, "learning_rate": 4.337039416014821e-06, "loss": 0.5637, "step": 12729 }, { "epoch": 1.6571651698555252, "grad_norm": 2.5384199619293213, "learning_rate": 4.334968081983238e-06, "loss": 0.5492, "step": 12732 }, { "epoch": 1.657555642327216, "grad_norm": 2.816277265548706, "learning_rate": 4.3328968641313326e-06, "loss": 0.4586, "step": 12735 }, { "epoch": 1.6579461147989067, "grad_norm": 2.6040713787078857, "learning_rate": 4.330825762820942e-06, "loss": 0.4576, "step": 12738 }, { "epoch": 1.6583365872705973, "grad_norm": 2.7248170375823975, "learning_rate": 4.3287547784138815e-06, "loss": 0.4663, "step": 12741 }, { "epoch": 1.6587270597422883, "grad_norm": 2.8028006553649902, "learning_rate": 4.32668391127195e-06, "loss": 0.5485, "step": 12744 }, { "epoch": 1.6591175322139788, "grad_norm": 2.470500946044922, "learning_rate": 4.324613161756923e-06, "loss": 0.4919, "step": 12747 }, { "epoch": 1.6595080046856696, "grad_norm": 2.61730694770813, "learning_rate": 4.322542530230556e-06, "loss": 0.4849, "step": 12750 }, { "epoch": 1.6598984771573604, "grad_norm": 2.686258316040039, "learning_rate": 4.320472017054584e-06, "loss": 0.4206, "step": 12753 }, { "epoch": 1.6602889496290512, "grad_norm": 2.6718552112579346, "learning_rate": 4.318401622590719e-06, "loss": 0.5302, "step": 12756 }, { "epoch": 1.660679422100742, "grad_norm": 2.5929765701293945, "learning_rate": 4.316331347200659e-06, "loss": 0.4858, "step": 12759 }, { "epoch": 1.6610698945724325, "grad_norm": 2.6052663326263428, "learning_rate": 4.314261191246073e-06, "loss": 0.4703, "step": 12762 }, { "epoch": 1.6614603670441235, "grad_norm": 2.5814874172210693, "learning_rate": 4.312191155088616e-06, "loss": 0.4815, "step": 12765 }, { "epoch": 1.661850839515814, "grad_norm": 2.9272522926330566, "learning_rate": 4.310121239089915e-06, "loss": 0.5128, "step": 12768 }, { "epoch": 1.662241311987505, "grad_norm": 3.3689401149749756, "learning_rate": 4.308051443611582e-06, "loss": 0.4717, "step": 12771 }, { "epoch": 1.6626317844591956, "grad_norm": 2.8045191764831543, "learning_rate": 4.305981769015207e-06, "loss": 0.4954, "step": 12774 }, { "epoch": 1.6630222569308863, "grad_norm": 2.869739294052124, "learning_rate": 4.303912215662359e-06, "loss": 0.4864, "step": 12777 }, { "epoch": 1.6634127294025771, "grad_norm": 2.9662373065948486, "learning_rate": 4.30184278391458e-06, "loss": 0.5554, "step": 12780 }, { "epoch": 1.663803201874268, "grad_norm": 2.603210926055908, "learning_rate": 4.299773474133398e-06, "loss": 0.467, "step": 12783 }, { "epoch": 1.6641936743459587, "grad_norm": 2.6853482723236084, "learning_rate": 4.297704286680319e-06, "loss": 0.5312, "step": 12786 }, { "epoch": 1.6645841468176492, "grad_norm": 2.606921672821045, "learning_rate": 4.295635221916823e-06, "loss": 0.5665, "step": 12789 }, { "epoch": 1.6649746192893402, "grad_norm": 2.5418643951416016, "learning_rate": 4.293566280204371e-06, "loss": 0.4628, "step": 12792 }, { "epoch": 1.6653650917610308, "grad_norm": 3.40142822265625, "learning_rate": 4.2914974619044045e-06, "loss": 0.487, "step": 12795 }, { "epoch": 1.6657555642327218, "grad_norm": 2.476050853729248, "learning_rate": 4.289428767378341e-06, "loss": 0.5231, "step": 12798 }, { "epoch": 1.6661460367044123, "grad_norm": 2.9447269439697266, "learning_rate": 4.287360196987578e-06, "loss": 0.5296, "step": 12801 }, { "epoch": 1.666536509176103, "grad_norm": 3.3223366737365723, "learning_rate": 4.2852917510934876e-06, "loss": 0.4443, "step": 12804 }, { "epoch": 1.6669269816477938, "grad_norm": 2.9464173316955566, "learning_rate": 4.283223430057425e-06, "loss": 0.5343, "step": 12807 }, { "epoch": 1.6673174541194846, "grad_norm": 3.260638475418091, "learning_rate": 4.281155234240722e-06, "loss": 0.4831, "step": 12810 }, { "epoch": 1.6677079265911754, "grad_norm": 2.604071617126465, "learning_rate": 4.279087164004686e-06, "loss": 0.5149, "step": 12813 }, { "epoch": 1.668098399062866, "grad_norm": 2.497521162033081, "learning_rate": 4.277019219710607e-06, "loss": 0.4481, "step": 12816 }, { "epoch": 1.668488871534557, "grad_norm": 2.61600399017334, "learning_rate": 4.274951401719748e-06, "loss": 0.5209, "step": 12819 }, { "epoch": 1.6688793440062475, "grad_norm": 2.627828359603882, "learning_rate": 4.272883710393356e-06, "loss": 0.538, "step": 12822 }, { "epoch": 1.6692698164779383, "grad_norm": 2.7958621978759766, "learning_rate": 4.270816146092649e-06, "loss": 0.4669, "step": 12825 }, { "epoch": 1.669660288949629, "grad_norm": 2.8905887603759766, "learning_rate": 4.268748709178828e-06, "loss": 0.554, "step": 12828 }, { "epoch": 1.6700507614213198, "grad_norm": 2.683310031890869, "learning_rate": 4.2666814000130685e-06, "loss": 0.518, "step": 12831 }, { "epoch": 1.6704412338930106, "grad_norm": 3.2548582553863525, "learning_rate": 4.264614218956525e-06, "loss": 0.514, "step": 12834 }, { "epoch": 1.6708317063647011, "grad_norm": 2.7682833671569824, "learning_rate": 4.262547166370333e-06, "loss": 0.5013, "step": 12837 }, { "epoch": 1.6712221788363921, "grad_norm": 2.7718143463134766, "learning_rate": 4.2604802426155975e-06, "loss": 0.5049, "step": 12840 }, { "epoch": 1.6716126513080827, "grad_norm": 2.5910987854003906, "learning_rate": 4.258413448053409e-06, "loss": 0.5122, "step": 12843 }, { "epoch": 1.6720031237797737, "grad_norm": 2.675654172897339, "learning_rate": 4.25634678304483e-06, "loss": 0.5298, "step": 12846 }, { "epoch": 1.6723935962514642, "grad_norm": 2.951941728591919, "learning_rate": 4.254280247950904e-06, "loss": 0.5185, "step": 12849 }, { "epoch": 1.672784068723155, "grad_norm": 3.080920696258545, "learning_rate": 4.252213843132651e-06, "loss": 0.4948, "step": 12852 }, { "epoch": 1.6731745411948458, "grad_norm": 2.5052084922790527, "learning_rate": 4.250147568951062e-06, "loss": 0.4856, "step": 12855 }, { "epoch": 1.6735650136665365, "grad_norm": 2.8432629108428955, "learning_rate": 4.2480814257671195e-06, "loss": 0.4367, "step": 12858 }, { "epoch": 1.6739554861382273, "grad_norm": 2.4311513900756836, "learning_rate": 4.24601541394177e-06, "loss": 0.5279, "step": 12861 }, { "epoch": 1.6743459586099179, "grad_norm": 2.772369861602783, "learning_rate": 4.243949533835941e-06, "loss": 0.5058, "step": 12864 }, { "epoch": 1.6747364310816089, "grad_norm": 2.680986166000366, "learning_rate": 4.241883785810538e-06, "loss": 0.5034, "step": 12867 }, { "epoch": 1.6751269035532994, "grad_norm": 2.6189749240875244, "learning_rate": 4.239818170226442e-06, "loss": 0.4952, "step": 12870 }, { "epoch": 1.6755173760249904, "grad_norm": 2.844604253768921, "learning_rate": 4.237752687444514e-06, "loss": 0.5086, "step": 12873 }, { "epoch": 1.675907848496681, "grad_norm": 2.81457781791687, "learning_rate": 4.235687337825586e-06, "loss": 0.4829, "step": 12876 }, { "epoch": 1.6762983209683717, "grad_norm": 3.1727747917175293, "learning_rate": 4.233622121730474e-06, "loss": 0.5165, "step": 12879 }, { "epoch": 1.6766887934400625, "grad_norm": 2.7962770462036133, "learning_rate": 4.231557039519965e-06, "loss": 0.4056, "step": 12882 }, { "epoch": 1.6770792659117533, "grad_norm": 2.9042179584503174, "learning_rate": 4.229492091554823e-06, "loss": 0.6229, "step": 12885 }, { "epoch": 1.677469738383444, "grad_norm": 2.709332227706909, "learning_rate": 4.227427278195794e-06, "loss": 0.4915, "step": 12888 }, { "epoch": 1.6778602108551346, "grad_norm": 2.407127857208252, "learning_rate": 4.225362599803592e-06, "loss": 0.445, "step": 12891 }, { "epoch": 1.6782506833268256, "grad_norm": 2.5972049236297607, "learning_rate": 4.2232980567389156e-06, "loss": 0.4591, "step": 12894 }, { "epoch": 1.6786411557985161, "grad_norm": 3.5093605518341064, "learning_rate": 4.221233649362432e-06, "loss": 0.5018, "step": 12897 }, { "epoch": 1.679031628270207, "grad_norm": 2.601149320602417, "learning_rate": 4.219169378034795e-06, "loss": 0.4992, "step": 12900 }, { "epoch": 1.6794221007418977, "grad_norm": 2.5034475326538086, "learning_rate": 4.217105243116623e-06, "loss": 0.4433, "step": 12903 }, { "epoch": 1.6798125732135885, "grad_norm": 2.8355612754821777, "learning_rate": 4.21504124496852e-06, "loss": 0.4554, "step": 12906 }, { "epoch": 1.6802030456852792, "grad_norm": 2.9951822757720947, "learning_rate": 4.212977383951059e-06, "loss": 0.5543, "step": 12909 }, { "epoch": 1.6805935181569698, "grad_norm": 3.2354860305786133, "learning_rate": 4.210913660424793e-06, "loss": 0.4783, "step": 12912 }, { "epoch": 1.6809839906286608, "grad_norm": 2.7228434085845947, "learning_rate": 4.208850074750251e-06, "loss": 0.4661, "step": 12915 }, { "epoch": 1.6813744631003513, "grad_norm": 3.545387029647827, "learning_rate": 4.206786627287936e-06, "loss": 0.5935, "step": 12918 }, { "epoch": 1.6817649355720423, "grad_norm": 2.7536470890045166, "learning_rate": 4.20472331839833e-06, "loss": 0.4677, "step": 12921 }, { "epoch": 1.6821554080437329, "grad_norm": 2.6740031242370605, "learning_rate": 4.202660148441886e-06, "loss": 0.5313, "step": 12924 }, { "epoch": 1.6825458805154236, "grad_norm": 2.6446104049682617, "learning_rate": 4.200597117779038e-06, "loss": 0.4866, "step": 12927 }, { "epoch": 1.6829363529871144, "grad_norm": 2.780635118484497, "learning_rate": 4.198534226770191e-06, "loss": 0.5019, "step": 12930 }, { "epoch": 1.6833268254588052, "grad_norm": 3.038558006286621, "learning_rate": 4.196471475775728e-06, "loss": 0.4894, "step": 12933 }, { "epoch": 1.683717297930496, "grad_norm": 3.2223899364471436, "learning_rate": 4.1944088651560085e-06, "loss": 0.5071, "step": 12936 }, { "epoch": 1.6841077704021865, "grad_norm": 3.295980930328369, "learning_rate": 4.192346395271364e-06, "loss": 0.538, "step": 12939 }, { "epoch": 1.6844982428738775, "grad_norm": 2.9584147930145264, "learning_rate": 4.190284066482107e-06, "loss": 0.551, "step": 12942 }, { "epoch": 1.684888715345568, "grad_norm": 2.542959451675415, "learning_rate": 4.18822187914852e-06, "loss": 0.4231, "step": 12945 }, { "epoch": 1.6852791878172588, "grad_norm": 2.58151912689209, "learning_rate": 4.186159833630862e-06, "loss": 0.5433, "step": 12948 }, { "epoch": 1.6856696602889496, "grad_norm": 2.462104082107544, "learning_rate": 4.18409793028937e-06, "loss": 0.4551, "step": 12951 }, { "epoch": 1.6860601327606404, "grad_norm": 2.931121587753296, "learning_rate": 4.182036169484252e-06, "loss": 0.5095, "step": 12954 }, { "epoch": 1.6864506052323311, "grad_norm": 2.6130144596099854, "learning_rate": 4.1799745515756964e-06, "loss": 0.4584, "step": 12957 }, { "epoch": 1.686841077704022, "grad_norm": 2.993987798690796, "learning_rate": 4.17791307692386e-06, "loss": 0.4698, "step": 12960 }, { "epoch": 1.6872315501757127, "grad_norm": 2.883889675140381, "learning_rate": 4.1758517458888805e-06, "loss": 0.5164, "step": 12963 }, { "epoch": 1.6876220226474032, "grad_norm": 2.5255353450775146, "learning_rate": 4.173790558830868e-06, "loss": 0.4782, "step": 12966 }, { "epoch": 1.6880124951190942, "grad_norm": 2.975459575653076, "learning_rate": 4.171729516109904e-06, "loss": 0.5081, "step": 12969 }, { "epoch": 1.6884029675907848, "grad_norm": 2.7381391525268555, "learning_rate": 4.169668618086054e-06, "loss": 0.5112, "step": 12972 }, { "epoch": 1.6887934400624756, "grad_norm": 2.576137065887451, "learning_rate": 4.167607865119348e-06, "loss": 0.5115, "step": 12975 }, { "epoch": 1.6891839125341663, "grad_norm": 2.776411533355713, "learning_rate": 4.165547257569797e-06, "loss": 0.4519, "step": 12978 }, { "epoch": 1.689574385005857, "grad_norm": 2.5057878494262695, "learning_rate": 4.163486795797384e-06, "loss": 0.4683, "step": 12981 }, { "epoch": 1.6899648574775479, "grad_norm": 2.751605272293091, "learning_rate": 4.161426480162069e-06, "loss": 0.4207, "step": 12984 }, { "epoch": 1.6903553299492384, "grad_norm": 2.7747738361358643, "learning_rate": 4.1593663110237845e-06, "loss": 0.5169, "step": 12987 }, { "epoch": 1.6907458024209294, "grad_norm": 2.7248165607452393, "learning_rate": 4.157306288742435e-06, "loss": 0.4886, "step": 12990 }, { "epoch": 1.69113627489262, "grad_norm": 4.524834156036377, "learning_rate": 4.155246413677907e-06, "loss": 0.4705, "step": 12993 }, { "epoch": 1.691526747364311, "grad_norm": 3.088681936264038, "learning_rate": 4.153186686190051e-06, "loss": 0.4504, "step": 12996 }, { "epoch": 1.6919172198360015, "grad_norm": 2.302208662033081, "learning_rate": 4.151127106638701e-06, "loss": 0.4188, "step": 12999 }, { "epoch": 1.6923076923076923, "grad_norm": 2.612056255340576, "learning_rate": 4.149067675383659e-06, "loss": 0.5164, "step": 13002 }, { "epoch": 1.692698164779383, "grad_norm": 2.487496852874756, "learning_rate": 4.147008392784703e-06, "loss": 0.4577, "step": 13005 }, { "epoch": 1.6930886372510738, "grad_norm": 2.817664861679077, "learning_rate": 4.144949259201589e-06, "loss": 0.4666, "step": 13008 }, { "epoch": 1.6934791097227646, "grad_norm": 3.112154006958008, "learning_rate": 4.142890274994038e-06, "loss": 0.5034, "step": 13011 }, { "epoch": 1.6938695821944552, "grad_norm": 2.818704605102539, "learning_rate": 4.1408314405217544e-06, "loss": 0.5956, "step": 13014 }, { "epoch": 1.6942600546661462, "grad_norm": 2.6162893772125244, "learning_rate": 4.138772756144411e-06, "loss": 0.5144, "step": 13017 }, { "epoch": 1.6946505271378367, "grad_norm": 3.4802184104919434, "learning_rate": 4.136714222221654e-06, "loss": 0.375, "step": 13020 }, { "epoch": 1.6950409996095275, "grad_norm": 2.7583959102630615, "learning_rate": 4.134655839113105e-06, "loss": 0.4669, "step": 13023 }, { "epoch": 1.6954314720812182, "grad_norm": 3.098271369934082, "learning_rate": 4.132597607178362e-06, "loss": 0.5267, "step": 13026 }, { "epoch": 1.695821944552909, "grad_norm": 2.7036032676696777, "learning_rate": 4.130539526776994e-06, "loss": 0.5162, "step": 13029 }, { "epoch": 1.6962124170245998, "grad_norm": 2.6259026527404785, "learning_rate": 4.12848159826854e-06, "loss": 0.4926, "step": 13032 }, { "epoch": 1.6966028894962903, "grad_norm": 3.8111186027526855, "learning_rate": 4.126423822012516e-06, "loss": 0.4105, "step": 13035 }, { "epoch": 1.6969933619679813, "grad_norm": 2.448268175125122, "learning_rate": 4.1243661983684155e-06, "loss": 0.5241, "step": 13038 }, { "epoch": 1.697383834439672, "grad_norm": 2.864579200744629, "learning_rate": 4.1223087276956964e-06, "loss": 0.5952, "step": 13041 }, { "epoch": 1.6977743069113629, "grad_norm": 2.495445728302002, "learning_rate": 4.120251410353797e-06, "loss": 0.4932, "step": 13044 }, { "epoch": 1.6981647793830534, "grad_norm": 2.7435507774353027, "learning_rate": 4.1181942467021246e-06, "loss": 0.4825, "step": 13047 }, { "epoch": 1.6985552518547442, "grad_norm": 2.677366018295288, "learning_rate": 4.116137237100062e-06, "loss": 0.4762, "step": 13050 }, { "epoch": 1.698945724326435, "grad_norm": 2.592958688735962, "learning_rate": 4.1140803819069665e-06, "loss": 0.4575, "step": 13053 }, { "epoch": 1.6993361967981258, "grad_norm": 2.5551681518554688, "learning_rate": 4.112023681482163e-06, "loss": 0.5381, "step": 13056 }, { "epoch": 1.6997266692698165, "grad_norm": 2.487131357192993, "learning_rate": 4.109967136184955e-06, "loss": 0.4652, "step": 13059 }, { "epoch": 1.700117141741507, "grad_norm": 2.729330539703369, "learning_rate": 4.107910746374616e-06, "loss": 0.4711, "step": 13062 }, { "epoch": 1.700507614213198, "grad_norm": 2.5914466381073, "learning_rate": 4.105854512410391e-06, "loss": 0.4262, "step": 13065 }, { "epoch": 1.7008980866848886, "grad_norm": 2.8775665760040283, "learning_rate": 4.1037984346515035e-06, "loss": 0.4693, "step": 13068 }, { "epoch": 1.7012885591565796, "grad_norm": 4.019662380218506, "learning_rate": 4.101742513457144e-06, "loss": 0.5601, "step": 13071 }, { "epoch": 1.7016790316282702, "grad_norm": 2.315446615219116, "learning_rate": 4.099686749186478e-06, "loss": 0.4316, "step": 13074 }, { "epoch": 1.702069504099961, "grad_norm": 2.7482759952545166, "learning_rate": 4.097631142198641e-06, "loss": 0.4958, "step": 13077 }, { "epoch": 1.7024599765716517, "grad_norm": 2.536959409713745, "learning_rate": 4.0955756928527484e-06, "loss": 0.4941, "step": 13080 }, { "epoch": 1.7028504490433425, "grad_norm": 2.7001805305480957, "learning_rate": 4.093520401507878e-06, "loss": 0.4224, "step": 13083 }, { "epoch": 1.7032409215150333, "grad_norm": 2.7134954929351807, "learning_rate": 4.091465268523086e-06, "loss": 0.5029, "step": 13086 }, { "epoch": 1.7036313939867238, "grad_norm": 2.511518716812134, "learning_rate": 4.089410294257401e-06, "loss": 0.4307, "step": 13089 }, { "epoch": 1.7040218664584148, "grad_norm": 2.8347418308258057, "learning_rate": 4.087355479069822e-06, "loss": 0.4964, "step": 13092 }, { "epoch": 1.7044123389301054, "grad_norm": 2.555575132369995, "learning_rate": 4.085300823319321e-06, "loss": 0.4809, "step": 13095 }, { "epoch": 1.7048028114017961, "grad_norm": 2.7679803371429443, "learning_rate": 4.083246327364842e-06, "loss": 0.4417, "step": 13098 }, { "epoch": 1.705193283873487, "grad_norm": 2.7733895778656006, "learning_rate": 4.0811919915653e-06, "loss": 0.5306, "step": 13101 }, { "epoch": 1.7055837563451777, "grad_norm": 2.7801830768585205, "learning_rate": 4.079137816279586e-06, "loss": 0.5075, "step": 13104 }, { "epoch": 1.7059742288168684, "grad_norm": 3.2246150970458984, "learning_rate": 4.077083801866555e-06, "loss": 0.5001, "step": 13107 }, { "epoch": 1.706364701288559, "grad_norm": 2.7091383934020996, "learning_rate": 4.0750299486850436e-06, "loss": 0.4837, "step": 13110 }, { "epoch": 1.70675517376025, "grad_norm": 3.151174783706665, "learning_rate": 4.072976257093855e-06, "loss": 0.5339, "step": 13113 }, { "epoch": 1.7071456462319405, "grad_norm": 2.550407886505127, "learning_rate": 4.070922727451765e-06, "loss": 0.4892, "step": 13116 }, { "epoch": 1.7075361187036315, "grad_norm": 2.9066054821014404, "learning_rate": 4.068869360117519e-06, "loss": 0.4448, "step": 13119 }, { "epoch": 1.707926591175322, "grad_norm": 2.571066379547119, "learning_rate": 4.066816155449837e-06, "loss": 0.6094, "step": 13122 }, { "epoch": 1.7083170636470129, "grad_norm": 2.580421209335327, "learning_rate": 4.06476311380741e-06, "loss": 0.4401, "step": 13125 }, { "epoch": 1.7087075361187036, "grad_norm": 2.7733590602874756, "learning_rate": 4.0627102355488986e-06, "loss": 0.5215, "step": 13128 }, { "epoch": 1.7090980085903944, "grad_norm": 2.7099995613098145, "learning_rate": 4.060657521032939e-06, "loss": 0.5325, "step": 13131 }, { "epoch": 1.7094884810620852, "grad_norm": 2.444837808609009, "learning_rate": 4.058604970618133e-06, "loss": 0.4751, "step": 13134 }, { "epoch": 1.7098789535337757, "grad_norm": 2.5356292724609375, "learning_rate": 4.056552584663059e-06, "loss": 0.4456, "step": 13137 }, { "epoch": 1.7102694260054667, "grad_norm": 2.789571762084961, "learning_rate": 4.054500363526264e-06, "loss": 0.4897, "step": 13140 }, { "epoch": 1.7106598984771573, "grad_norm": 2.5640852451324463, "learning_rate": 4.052448307566265e-06, "loss": 0.4146, "step": 13143 }, { "epoch": 1.7110503709488483, "grad_norm": 3.146996259689331, "learning_rate": 4.050396417141555e-06, "loss": 0.4841, "step": 13146 }, { "epoch": 1.7114408434205388, "grad_norm": 2.4805715084075928, "learning_rate": 4.048344692610591e-06, "loss": 0.4652, "step": 13149 }, { "epoch": 1.7118313158922296, "grad_norm": 2.782636880874634, "learning_rate": 4.046293134331808e-06, "loss": 0.5592, "step": 13152 }, { "epoch": 1.7122217883639204, "grad_norm": 2.8912787437438965, "learning_rate": 4.044241742663608e-06, "loss": 0.5252, "step": 13155 }, { "epoch": 1.7126122608356111, "grad_norm": 2.741149425506592, "learning_rate": 4.042190517964366e-06, "loss": 0.5303, "step": 13158 }, { "epoch": 1.713002733307302, "grad_norm": 2.541614294052124, "learning_rate": 4.040139460592425e-06, "loss": 0.4626, "step": 13161 }, { "epoch": 1.7133932057789925, "grad_norm": 2.6299006938934326, "learning_rate": 4.038088570906101e-06, "loss": 0.485, "step": 13164 }, { "epoch": 1.7137836782506835, "grad_norm": 2.4726014137268066, "learning_rate": 4.036037849263681e-06, "loss": 0.4611, "step": 13167 }, { "epoch": 1.714174150722374, "grad_norm": 2.893159866333008, "learning_rate": 4.03398729602342e-06, "loss": 0.5536, "step": 13170 }, { "epoch": 1.7145646231940648, "grad_norm": 2.6248669624328613, "learning_rate": 4.031936911543547e-06, "loss": 0.461, "step": 13173 }, { "epoch": 1.7149550956657555, "grad_norm": 2.4585137367248535, "learning_rate": 4.029886696182258e-06, "loss": 0.486, "step": 13176 }, { "epoch": 1.7153455681374463, "grad_norm": 3.081102132797241, "learning_rate": 4.027836650297722e-06, "loss": 0.4455, "step": 13179 }, { "epoch": 1.715736040609137, "grad_norm": 2.7504281997680664, "learning_rate": 4.025786774248079e-06, "loss": 0.4965, "step": 13182 }, { "epoch": 1.7161265130808276, "grad_norm": 2.750417709350586, "learning_rate": 4.023737068391437e-06, "loss": 0.4974, "step": 13185 }, { "epoch": 1.7165169855525186, "grad_norm": 2.768533229827881, "learning_rate": 4.021687533085876e-06, "loss": 0.5187, "step": 13188 }, { "epoch": 1.7169074580242092, "grad_norm": 2.4056451320648193, "learning_rate": 4.019638168689442e-06, "loss": 0.4074, "step": 13191 }, { "epoch": 1.7172979304959002, "grad_norm": 2.582237482070923, "learning_rate": 4.0175889755601605e-06, "loss": 0.5469, "step": 13194 }, { "epoch": 1.7176884029675907, "grad_norm": 2.816178560256958, "learning_rate": 4.015539954056017e-06, "loss": 0.4812, "step": 13197 }, { "epoch": 1.7180788754392815, "grad_norm": 2.4552249908447266, "learning_rate": 4.013491104534973e-06, "loss": 0.4056, "step": 13200 }, { "epoch": 1.7184693479109723, "grad_norm": 2.762195110321045, "learning_rate": 4.011442427354958e-06, "loss": 0.5348, "step": 13203 }, { "epoch": 1.718859820382663, "grad_norm": 2.9049007892608643, "learning_rate": 4.009393922873871e-06, "loss": 0.4404, "step": 13206 }, { "epoch": 1.7192502928543538, "grad_norm": 2.658714532852173, "learning_rate": 4.007345591449583e-06, "loss": 0.5044, "step": 13209 }, { "epoch": 1.7196407653260444, "grad_norm": 3.3005869388580322, "learning_rate": 4.005297433439929e-06, "loss": 0.4398, "step": 13212 }, { "epoch": 1.7200312377977354, "grad_norm": 2.9398393630981445, "learning_rate": 4.003249449202723e-06, "loss": 0.4689, "step": 13215 }, { "epoch": 1.720421710269426, "grad_norm": 2.541372060775757, "learning_rate": 4.0012016390957414e-06, "loss": 0.5121, "step": 13218 }, { "epoch": 1.720812182741117, "grad_norm": 2.7919490337371826, "learning_rate": 3.999154003476732e-06, "loss": 0.4993, "step": 13221 }, { "epoch": 1.7212026552128075, "grad_norm": 3.1717112064361572, "learning_rate": 3.997106542703413e-06, "loss": 0.5892, "step": 13224 }, { "epoch": 1.7215931276844982, "grad_norm": 2.6251416206359863, "learning_rate": 3.99505925713347e-06, "loss": 0.47, "step": 13227 }, { "epoch": 1.721983600156189, "grad_norm": 3.013826608657837, "learning_rate": 3.993012147124561e-06, "loss": 0.5313, "step": 13230 }, { "epoch": 1.7223740726278798, "grad_norm": 2.7360174655914307, "learning_rate": 3.990965213034311e-06, "loss": 0.6029, "step": 13233 }, { "epoch": 1.7227645450995706, "grad_norm": 2.6772170066833496, "learning_rate": 3.988918455220317e-06, "loss": 0.4684, "step": 13236 }, { "epoch": 1.723155017571261, "grad_norm": 2.4262611865997314, "learning_rate": 3.986871874040141e-06, "loss": 0.5268, "step": 13239 }, { "epoch": 1.723545490042952, "grad_norm": 2.6841237545013428, "learning_rate": 3.9848254698513176e-06, "loss": 0.5358, "step": 13242 }, { "epoch": 1.7239359625146427, "grad_norm": 2.6044881343841553, "learning_rate": 3.98277924301135e-06, "loss": 0.6248, "step": 13245 }, { "epoch": 1.7243264349863334, "grad_norm": 3.0996434688568115, "learning_rate": 3.980733193877707e-06, "loss": 0.504, "step": 13248 }, { "epoch": 1.7247169074580242, "grad_norm": 2.843388795852661, "learning_rate": 3.978687322807832e-06, "loss": 0.4786, "step": 13251 }, { "epoch": 1.725107379929715, "grad_norm": 3.045637607574463, "learning_rate": 3.9766416301591336e-06, "loss": 0.4879, "step": 13254 }, { "epoch": 1.7254978524014057, "grad_norm": 2.5478973388671875, "learning_rate": 3.974596116288988e-06, "loss": 0.4843, "step": 13257 }, { "epoch": 1.7258883248730963, "grad_norm": 2.8354203701019287, "learning_rate": 3.972550781554745e-06, "loss": 0.5909, "step": 13260 }, { "epoch": 1.7262787973447873, "grad_norm": 2.7125988006591797, "learning_rate": 3.970505626313718e-06, "loss": 0.4752, "step": 13263 }, { "epoch": 1.7266692698164778, "grad_norm": 2.5882513523101807, "learning_rate": 3.9684606509231935e-06, "loss": 0.4578, "step": 13266 }, { "epoch": 1.7270597422881688, "grad_norm": 2.7400310039520264, "learning_rate": 3.966415855740423e-06, "loss": 0.5583, "step": 13269 }, { "epoch": 1.7274502147598594, "grad_norm": 2.917891025543213, "learning_rate": 3.964371241122627e-06, "loss": 0.4712, "step": 13272 }, { "epoch": 1.7278406872315502, "grad_norm": 2.7035434246063232, "learning_rate": 3.962326807426996e-06, "loss": 0.5, "step": 13275 }, { "epoch": 1.728231159703241, "grad_norm": 2.7468101978302, "learning_rate": 3.960282555010691e-06, "loss": 0.5003, "step": 13278 }, { "epoch": 1.7286216321749317, "grad_norm": 2.587204694747925, "learning_rate": 3.958238484230835e-06, "loss": 0.5208, "step": 13281 }, { "epoch": 1.7290121046466225, "grad_norm": 2.676217555999756, "learning_rate": 3.956194595444525e-06, "loss": 0.5368, "step": 13284 }, { "epoch": 1.729402577118313, "grad_norm": 2.5884201526641846, "learning_rate": 3.954150889008823e-06, "loss": 0.5131, "step": 13287 }, { "epoch": 1.729793049590004, "grad_norm": 2.455409049987793, "learning_rate": 3.95210736528076e-06, "loss": 0.4353, "step": 13290 }, { "epoch": 1.7301835220616946, "grad_norm": 2.7997970581054688, "learning_rate": 3.9500640246173376e-06, "loss": 0.5197, "step": 13293 }, { "epoch": 1.7305739945333856, "grad_norm": 2.450458288192749, "learning_rate": 3.948020867375521e-06, "loss": 0.5324, "step": 13296 }, { "epoch": 1.7309644670050761, "grad_norm": 2.428149461746216, "learning_rate": 3.945977893912244e-06, "loss": 0.508, "step": 13299 }, { "epoch": 1.7313549394767669, "grad_norm": 4.35640811920166, "learning_rate": 3.943935104584413e-06, "loss": 0.525, "step": 13302 }, { "epoch": 1.7317454119484577, "grad_norm": 2.549283266067505, "learning_rate": 3.941892499748897e-06, "loss": 0.4751, "step": 13305 }, { "epoch": 1.7321358844201484, "grad_norm": 3.114838123321533, "learning_rate": 3.9398500797625355e-06, "loss": 0.5122, "step": 13308 }, { "epoch": 1.7325263568918392, "grad_norm": 3.187574863433838, "learning_rate": 3.937807844982136e-06, "loss": 0.4961, "step": 13311 }, { "epoch": 1.7329168293635298, "grad_norm": 2.5373189449310303, "learning_rate": 3.93576579576447e-06, "loss": 0.4976, "step": 13314 }, { "epoch": 1.7333073018352207, "grad_norm": 2.5374085903167725, "learning_rate": 3.93372393246628e-06, "loss": 0.4775, "step": 13317 }, { "epoch": 1.7336977743069113, "grad_norm": 2.4994728565216064, "learning_rate": 3.931682255444276e-06, "loss": 0.4706, "step": 13320 }, { "epoch": 1.734088246778602, "grad_norm": 4.567479610443115, "learning_rate": 3.929640765055137e-06, "loss": 0.5715, "step": 13323 }, { "epoch": 1.7344787192502928, "grad_norm": 2.689119815826416, "learning_rate": 3.927599461655503e-06, "loss": 0.4771, "step": 13326 }, { "epoch": 1.7348691917219836, "grad_norm": 2.8153414726257324, "learning_rate": 3.925558345601987e-06, "loss": 0.4448, "step": 13329 }, { "epoch": 1.7352596641936744, "grad_norm": 2.966712236404419, "learning_rate": 3.923517417251168e-06, "loss": 0.5059, "step": 13332 }, { "epoch": 1.735650136665365, "grad_norm": 2.778205156326294, "learning_rate": 3.921476676959591e-06, "loss": 0.4661, "step": 13335 }, { "epoch": 1.736040609137056, "grad_norm": 3.179136276245117, "learning_rate": 3.919436125083771e-06, "loss": 0.5206, "step": 13338 }, { "epoch": 1.7364310816087465, "grad_norm": 2.8156237602233887, "learning_rate": 3.917395761980186e-06, "loss": 0.5265, "step": 13341 }, { "epoch": 1.7368215540804375, "grad_norm": 2.721435546875, "learning_rate": 3.915355588005283e-06, "loss": 0.5218, "step": 13344 }, { "epoch": 1.737212026552128, "grad_norm": 3.0874862670898438, "learning_rate": 3.913315603515479e-06, "loss": 0.4019, "step": 13347 }, { "epoch": 1.7376024990238188, "grad_norm": 3.176990032196045, "learning_rate": 3.911275808867151e-06, "loss": 0.5453, "step": 13350 }, { "epoch": 1.7379929714955096, "grad_norm": 2.735309600830078, "learning_rate": 3.909236204416651e-06, "loss": 0.4105, "step": 13353 }, { "epoch": 1.7383834439672003, "grad_norm": 2.8676302433013916, "learning_rate": 3.90719679052029e-06, "loss": 0.4664, "step": 13356 }, { "epoch": 1.7387739164388911, "grad_norm": 3.0311317443847656, "learning_rate": 3.905157567534349e-06, "loss": 0.509, "step": 13359 }, { "epoch": 1.7391643889105817, "grad_norm": 2.751351833343506, "learning_rate": 3.9031185358150794e-06, "loss": 0.5447, "step": 13362 }, { "epoch": 1.7395548613822727, "grad_norm": 2.6076507568359375, "learning_rate": 3.901079695718696e-06, "loss": 0.4675, "step": 13365 }, { "epoch": 1.7399453338539632, "grad_norm": 2.406752109527588, "learning_rate": 3.899041047601375e-06, "loss": 0.5027, "step": 13368 }, { "epoch": 1.740335806325654, "grad_norm": 2.558464527130127, "learning_rate": 3.897002591819269e-06, "loss": 0.4831, "step": 13371 }, { "epoch": 1.7407262787973448, "grad_norm": 2.5584099292755127, "learning_rate": 3.894964328728489e-06, "loss": 0.4087, "step": 13374 }, { "epoch": 1.7411167512690355, "grad_norm": 2.5314037799835205, "learning_rate": 3.8929262586851164e-06, "loss": 0.4534, "step": 13377 }, { "epoch": 1.7415072237407263, "grad_norm": 2.593812942504883, "learning_rate": 3.890888382045198e-06, "loss": 0.454, "step": 13380 }, { "epoch": 1.741897696212417, "grad_norm": 2.6239094734191895, "learning_rate": 3.8888506991647455e-06, "loss": 0.4661, "step": 13383 }, { "epoch": 1.7422881686841079, "grad_norm": 2.606416940689087, "learning_rate": 3.886813210399738e-06, "loss": 0.5233, "step": 13386 }, { "epoch": 1.7426786411557984, "grad_norm": 2.7739343643188477, "learning_rate": 3.884775916106121e-06, "loss": 0.4525, "step": 13389 }, { "epoch": 1.7430691136274894, "grad_norm": 2.44478440284729, "learning_rate": 3.882738816639806e-06, "loss": 0.452, "step": 13392 }, { "epoch": 1.74345958609918, "grad_norm": 2.652439832687378, "learning_rate": 3.880701912356668e-06, "loss": 0.4519, "step": 13395 }, { "epoch": 1.7438500585708707, "grad_norm": 2.414402484893799, "learning_rate": 3.878665203612553e-06, "loss": 0.4507, "step": 13398 }, { "epoch": 1.7442405310425615, "grad_norm": 2.590601682662964, "learning_rate": 3.876628690763265e-06, "loss": 0.4609, "step": 13401 }, { "epoch": 1.7446310035142523, "grad_norm": 2.637747049331665, "learning_rate": 3.874592374164583e-06, "loss": 0.6, "step": 13404 }, { "epoch": 1.745021475985943, "grad_norm": 2.618107795715332, "learning_rate": 3.872556254172246e-06, "loss": 0.5137, "step": 13407 }, { "epoch": 1.7454119484576336, "grad_norm": 2.5348377227783203, "learning_rate": 3.870520331141961e-06, "loss": 0.5094, "step": 13410 }, { "epoch": 1.7458024209293246, "grad_norm": 2.787950277328491, "learning_rate": 3.868484605429396e-06, "loss": 0.5294, "step": 13413 }, { "epoch": 1.7461928934010151, "grad_norm": 2.4089295864105225, "learning_rate": 3.866449077390192e-06, "loss": 0.439, "step": 13416 }, { "epoch": 1.7465833658727061, "grad_norm": 2.6536457538604736, "learning_rate": 3.864413747379948e-06, "loss": 0.4402, "step": 13419 }, { "epoch": 1.7469738383443967, "grad_norm": 2.460862398147583, "learning_rate": 3.862378615754233e-06, "loss": 0.4751, "step": 13422 }, { "epoch": 1.7473643108160875, "grad_norm": 2.9238407611846924, "learning_rate": 3.860343682868583e-06, "loss": 0.4904, "step": 13425 }, { "epoch": 1.7477547832877782, "grad_norm": 2.762789249420166, "learning_rate": 3.858308949078492e-06, "loss": 0.5113, "step": 13428 }, { "epoch": 1.748145255759469, "grad_norm": 2.810913324356079, "learning_rate": 3.856274414739428e-06, "loss": 0.4988, "step": 13431 }, { "epoch": 1.7485357282311598, "grad_norm": 2.568662643432617, "learning_rate": 3.854240080206815e-06, "loss": 0.5601, "step": 13434 }, { "epoch": 1.7489262007028503, "grad_norm": 2.7099461555480957, "learning_rate": 3.852205945836051e-06, "loss": 0.4212, "step": 13437 }, { "epoch": 1.7493166731745413, "grad_norm": 2.6403145790100098, "learning_rate": 3.850172011982494e-06, "loss": 0.4719, "step": 13440 }, { "epoch": 1.7497071456462319, "grad_norm": 2.7137389183044434, "learning_rate": 3.848138279001466e-06, "loss": 0.4694, "step": 13443 }, { "epoch": 1.7500976181179226, "grad_norm": 2.654871702194214, "learning_rate": 3.8461047472482584e-06, "loss": 0.4603, "step": 13446 }, { "epoch": 1.7504880905896134, "grad_norm": 2.468869209289551, "learning_rate": 3.844071417078124e-06, "loss": 0.4592, "step": 13449 }, { "epoch": 1.7508785630613042, "grad_norm": 3.2677621841430664, "learning_rate": 3.842038288846282e-06, "loss": 0.5041, "step": 13452 }, { "epoch": 1.751269035532995, "grad_norm": 2.530336380004883, "learning_rate": 3.8400053629079145e-06, "loss": 0.5458, "step": 13455 }, { "epoch": 1.7516595080046855, "grad_norm": 2.7045438289642334, "learning_rate": 3.8379726396181705e-06, "loss": 0.4496, "step": 13458 }, { "epoch": 1.7520499804763765, "grad_norm": 2.640897035598755, "learning_rate": 3.83594011933216e-06, "loss": 0.4873, "step": 13461 }, { "epoch": 1.752440452948067, "grad_norm": 2.674314022064209, "learning_rate": 3.833907802404963e-06, "loss": 0.5766, "step": 13464 }, { "epoch": 1.752830925419758, "grad_norm": 3.62727689743042, "learning_rate": 3.83187568919162e-06, "loss": 0.4801, "step": 13467 }, { "epoch": 1.7532213978914486, "grad_norm": 2.658531904220581, "learning_rate": 3.829843780047137e-06, "loss": 0.5359, "step": 13470 }, { "epoch": 1.7536118703631394, "grad_norm": 3.2108259201049805, "learning_rate": 3.827812075326483e-06, "loss": 0.542, "step": 13473 }, { "epoch": 1.7540023428348301, "grad_norm": 2.5640816688537598, "learning_rate": 3.825780575384595e-06, "loss": 0.5172, "step": 13476 }, { "epoch": 1.754392815306521, "grad_norm": 4.314723491668701, "learning_rate": 3.823749280576369e-06, "loss": 0.4687, "step": 13479 }, { "epoch": 1.7547832877782117, "grad_norm": 2.8595173358917236, "learning_rate": 3.821718191256669e-06, "loss": 0.5252, "step": 13482 }, { "epoch": 1.7551737602499022, "grad_norm": 2.5360357761383057, "learning_rate": 3.819687307780321e-06, "loss": 0.4925, "step": 13485 }, { "epoch": 1.7555642327215932, "grad_norm": 2.3102357387542725, "learning_rate": 3.81765663050212e-06, "loss": 0.516, "step": 13488 }, { "epoch": 1.7559547051932838, "grad_norm": 2.8565187454223633, "learning_rate": 3.8156261597768165e-06, "loss": 0.5065, "step": 13491 }, { "epoch": 1.7563451776649748, "grad_norm": 3.1690919399261475, "learning_rate": 3.8135958959591334e-06, "loss": 0.506, "step": 13494 }, { "epoch": 1.7567356501366653, "grad_norm": 2.7095978260040283, "learning_rate": 3.8115658394037496e-06, "loss": 0.5125, "step": 13497 }, { "epoch": 1.757126122608356, "grad_norm": 2.614197254180908, "learning_rate": 3.809535990465314e-06, "loss": 0.5541, "step": 13500 }, { "epoch": 1.7575165950800469, "grad_norm": 3.0147502422332764, "learning_rate": 3.807506349498438e-06, "loss": 0.4678, "step": 13503 }, { "epoch": 1.7579070675517376, "grad_norm": 2.663944721221924, "learning_rate": 3.8054769168576924e-06, "loss": 0.4942, "step": 13506 }, { "epoch": 1.7582975400234284, "grad_norm": 2.467390775680542, "learning_rate": 3.803447692897617e-06, "loss": 0.4927, "step": 13509 }, { "epoch": 1.758688012495119, "grad_norm": 3.1490232944488525, "learning_rate": 3.8014186779727123e-06, "loss": 0.5778, "step": 13512 }, { "epoch": 1.75907848496681, "grad_norm": 2.497331142425537, "learning_rate": 3.7993898724374435e-06, "loss": 0.4939, "step": 13515 }, { "epoch": 1.7594689574385005, "grad_norm": 2.6486501693725586, "learning_rate": 3.7973612766462387e-06, "loss": 0.4405, "step": 13518 }, { "epoch": 1.7598594299101913, "grad_norm": 3.311718702316284, "learning_rate": 3.7953328909534876e-06, "loss": 0.5304, "step": 13521 }, { "epoch": 1.760249902381882, "grad_norm": 3.001080274581909, "learning_rate": 3.7933047157135465e-06, "loss": 0.6345, "step": 13524 }, { "epoch": 1.7606403748535728, "grad_norm": 2.920088529586792, "learning_rate": 3.7912767512807318e-06, "loss": 0.5114, "step": 13527 }, { "epoch": 1.7610308473252636, "grad_norm": 2.604182720184326, "learning_rate": 3.7892489980093285e-06, "loss": 0.5102, "step": 13530 }, { "epoch": 1.7614213197969542, "grad_norm": 3.248330593109131, "learning_rate": 3.7872214562535765e-06, "loss": 0.4517, "step": 13533 }, { "epoch": 1.7618117922686451, "grad_norm": 3.4100341796875, "learning_rate": 3.785194126367685e-06, "loss": 0.4723, "step": 13536 }, { "epoch": 1.7622022647403357, "grad_norm": 3.491152286529541, "learning_rate": 3.783167008705825e-06, "loss": 0.5118, "step": 13539 }, { "epoch": 1.7625927372120267, "grad_norm": 2.765254259109497, "learning_rate": 3.7811401036221283e-06, "loss": 0.4562, "step": 13542 }, { "epoch": 1.7629832096837172, "grad_norm": 2.513669967651367, "learning_rate": 3.779113411470692e-06, "loss": 0.5244, "step": 13545 }, { "epoch": 1.763373682155408, "grad_norm": 2.982708215713501, "learning_rate": 3.7770869326055733e-06, "loss": 0.5286, "step": 13548 }, { "epoch": 1.7637641546270988, "grad_norm": 3.210015058517456, "learning_rate": 3.7750606673807945e-06, "loss": 0.5115, "step": 13551 }, { "epoch": 1.7641546270987896, "grad_norm": 3.011054277420044, "learning_rate": 3.773034616150342e-06, "loss": 0.5402, "step": 13554 }, { "epoch": 1.7645450995704803, "grad_norm": 4.221112251281738, "learning_rate": 3.7710087792681594e-06, "loss": 0.6668, "step": 13557 }, { "epoch": 1.7649355720421709, "grad_norm": 2.5336077213287354, "learning_rate": 3.7689831570881584e-06, "loss": 0.4922, "step": 13560 }, { "epoch": 1.7653260445138619, "grad_norm": 2.933840751647949, "learning_rate": 3.7669577499642094e-06, "loss": 0.5391, "step": 13563 }, { "epoch": 1.7657165169855524, "grad_norm": 2.6583011150360107, "learning_rate": 3.7649325582501478e-06, "loss": 0.5105, "step": 13566 }, { "epoch": 1.7661069894572434, "grad_norm": 2.7706074714660645, "learning_rate": 3.7629075822997685e-06, "loss": 0.5597, "step": 13569 }, { "epoch": 1.766497461928934, "grad_norm": 2.6521201133728027, "learning_rate": 3.7608828224668346e-06, "loss": 0.4072, "step": 13572 }, { "epoch": 1.7668879344006247, "grad_norm": 2.8983657360076904, "learning_rate": 3.7588582791050644e-06, "loss": 0.5915, "step": 13575 }, { "epoch": 1.7672784068723155, "grad_norm": 2.609997510910034, "learning_rate": 3.7568339525681407e-06, "loss": 0.4157, "step": 13578 }, { "epoch": 1.7676688793440063, "grad_norm": 3.424751043319702, "learning_rate": 3.754809843209712e-06, "loss": 0.5333, "step": 13581 }, { "epoch": 1.768059351815697, "grad_norm": 4.254384994506836, "learning_rate": 3.752785951383383e-06, "loss": 0.5174, "step": 13584 }, { "epoch": 1.7684498242873876, "grad_norm": 3.230611562728882, "learning_rate": 3.7507622774427242e-06, "loss": 0.602, "step": 13587 }, { "epoch": 1.7688402967590786, "grad_norm": 3.1927285194396973, "learning_rate": 3.748738821741269e-06, "loss": 0.523, "step": 13590 }, { "epoch": 1.7692307692307692, "grad_norm": 3.054600238800049, "learning_rate": 3.7467155846325086e-06, "loss": 0.4559, "step": 13593 }, { "epoch": 1.76962124170246, "grad_norm": 2.9370806217193604, "learning_rate": 3.7446925664699e-06, "loss": 0.5063, "step": 13596 }, { "epoch": 1.7700117141741507, "grad_norm": 2.851138114929199, "learning_rate": 3.7426697676068575e-06, "loss": 0.4684, "step": 13599 }, { "epoch": 1.7704021866458415, "grad_norm": 2.6864585876464844, "learning_rate": 3.740647188396762e-06, "loss": 0.4323, "step": 13602 }, { "epoch": 1.7707926591175323, "grad_norm": 3.145974636077881, "learning_rate": 3.7386248291929544e-06, "loss": 0.4725, "step": 13605 }, { "epoch": 1.7711831315892228, "grad_norm": 4.056112766265869, "learning_rate": 3.7366026903487346e-06, "loss": 0.6875, "step": 13608 }, { "epoch": 1.7715736040609138, "grad_norm": 2.8808858394622803, "learning_rate": 3.7345807722173655e-06, "loss": 0.6169, "step": 13611 }, { "epoch": 1.7719640765326043, "grad_norm": 3.3072636127471924, "learning_rate": 3.732559075152075e-06, "loss": 0.5757, "step": 13614 }, { "epoch": 1.7723545490042953, "grad_norm": 2.6923253536224365, "learning_rate": 3.730537599506049e-06, "loss": 0.5674, "step": 13617 }, { "epoch": 1.772745021475986, "grad_norm": 2.857133150100708, "learning_rate": 3.7285163456324323e-06, "loss": 0.5753, "step": 13620 }, { "epoch": 1.7731354939476767, "grad_norm": 2.6387181282043457, "learning_rate": 3.7264953138843363e-06, "loss": 0.525, "step": 13623 }, { "epoch": 1.7735259664193674, "grad_norm": 2.5029585361480713, "learning_rate": 3.724474504614829e-06, "loss": 0.4137, "step": 13626 }, { "epoch": 1.7739164388910582, "grad_norm": 2.632925510406494, "learning_rate": 3.7224539181769425e-06, "loss": 0.5328, "step": 13629 }, { "epoch": 1.774306911362749, "grad_norm": 2.7073307037353516, "learning_rate": 3.7204335549236703e-06, "loss": 0.4625, "step": 13632 }, { "epoch": 1.7746973838344395, "grad_norm": 2.770639181137085, "learning_rate": 3.718413415207962e-06, "loss": 0.4865, "step": 13635 }, { "epoch": 1.7750878563061305, "grad_norm": 3.0762181282043457, "learning_rate": 3.7163934993827364e-06, "loss": 0.522, "step": 13638 }, { "epoch": 1.775478328777821, "grad_norm": 2.7162609100341797, "learning_rate": 3.714373807800864e-06, "loss": 0.4633, "step": 13641 }, { "epoch": 1.775868801249512, "grad_norm": 2.4745163917541504, "learning_rate": 3.7123543408151843e-06, "loss": 0.4783, "step": 13644 }, { "epoch": 1.7762592737212026, "grad_norm": 2.7672512531280518, "learning_rate": 3.710335098778492e-06, "loss": 0.548, "step": 13647 }, { "epoch": 1.7766497461928934, "grad_norm": 2.50407075881958, "learning_rate": 3.7083160820435445e-06, "loss": 0.4029, "step": 13650 }, { "epoch": 1.7770402186645842, "grad_norm": 2.7135939598083496, "learning_rate": 3.706297290963059e-06, "loss": 0.4438, "step": 13653 }, { "epoch": 1.777430691136275, "grad_norm": 2.827840805053711, "learning_rate": 3.7042787258897163e-06, "loss": 0.5588, "step": 13656 }, { "epoch": 1.7778211636079657, "grad_norm": 2.7864699363708496, "learning_rate": 3.7022603871761554e-06, "loss": 0.4841, "step": 13659 }, { "epoch": 1.7782116360796563, "grad_norm": 3.1784870624542236, "learning_rate": 3.700242275174973e-06, "loss": 0.5815, "step": 13662 }, { "epoch": 1.7786021085513473, "grad_norm": 2.666109323501587, "learning_rate": 3.698224390238732e-06, "loss": 0.5251, "step": 13665 }, { "epoch": 1.7789925810230378, "grad_norm": 2.4882564544677734, "learning_rate": 3.6962067327199523e-06, "loss": 0.4551, "step": 13668 }, { "epoch": 1.7793830534947286, "grad_norm": 2.7891669273376465, "learning_rate": 3.6941893029711123e-06, "loss": 0.4774, "step": 13671 }, { "epoch": 1.7797735259664194, "grad_norm": 2.7892396450042725, "learning_rate": 3.6921721013446555e-06, "loss": 0.512, "step": 13674 }, { "epoch": 1.7801639984381101, "grad_norm": 2.7493927478790283, "learning_rate": 3.690155128192979e-06, "loss": 0.3913, "step": 13677 }, { "epoch": 1.780554470909801, "grad_norm": 2.922779083251953, "learning_rate": 3.6881383838684475e-06, "loss": 0.5359, "step": 13680 }, { "epoch": 1.7809449433814915, "grad_norm": 3.476430892944336, "learning_rate": 3.6861218687233813e-06, "loss": 0.4305, "step": 13683 }, { "epoch": 1.7813354158531824, "grad_norm": 2.7361629009246826, "learning_rate": 3.6841055831100593e-06, "loss": 0.5422, "step": 13686 }, { "epoch": 1.781725888324873, "grad_norm": 2.4145750999450684, "learning_rate": 3.6820895273807257e-06, "loss": 0.5088, "step": 13689 }, { "epoch": 1.782116360796564, "grad_norm": 2.698687791824341, "learning_rate": 3.6800737018875765e-06, "loss": 0.5185, "step": 13692 }, { "epoch": 1.7825068332682545, "grad_norm": 3.8540420532226562, "learning_rate": 3.678058106982775e-06, "loss": 0.4911, "step": 13695 }, { "epoch": 1.7828973057399453, "grad_norm": 2.5574920177459717, "learning_rate": 3.676042743018442e-06, "loss": 0.5211, "step": 13698 }, { "epoch": 1.783287778211636, "grad_norm": 2.5759153366088867, "learning_rate": 3.674027610346658e-06, "loss": 0.455, "step": 13701 }, { "epoch": 1.7836782506833269, "grad_norm": 2.5695457458496094, "learning_rate": 3.672012709319459e-06, "loss": 0.4909, "step": 13704 }, { "epoch": 1.7840687231550176, "grad_norm": 2.9643442630767822, "learning_rate": 3.669998040288847e-06, "loss": 0.4506, "step": 13707 }, { "epoch": 1.7844591956267082, "grad_norm": 2.8210248947143555, "learning_rate": 3.66798360360678e-06, "loss": 0.4668, "step": 13710 }, { "epoch": 1.7848496680983992, "grad_norm": 2.568918228149414, "learning_rate": 3.6659693996251745e-06, "loss": 0.4581, "step": 13713 }, { "epoch": 1.7852401405700897, "grad_norm": 2.8247551918029785, "learning_rate": 3.663955428695908e-06, "loss": 0.4848, "step": 13716 }, { "epoch": 1.7856306130417805, "grad_norm": 2.960132598876953, "learning_rate": 3.6619416911708196e-06, "loss": 0.497, "step": 13719 }, { "epoch": 1.7860210855134713, "grad_norm": 2.265368938446045, "learning_rate": 3.6599281874017005e-06, "loss": 0.4379, "step": 13722 }, { "epoch": 1.786411557985162, "grad_norm": 2.88246750831604, "learning_rate": 3.6579149177403093e-06, "loss": 0.61, "step": 13725 }, { "epoch": 1.7868020304568528, "grad_norm": 2.980820655822754, "learning_rate": 3.6559018825383587e-06, "loss": 0.5359, "step": 13728 }, { "epoch": 1.7871925029285436, "grad_norm": 2.5079283714294434, "learning_rate": 3.6538890821475204e-06, "loss": 0.5089, "step": 13731 }, { "epoch": 1.7875829754002344, "grad_norm": 2.517543077468872, "learning_rate": 3.6518765169194294e-06, "loss": 0.4798, "step": 13734 }, { "epoch": 1.787973447871925, "grad_norm": 3.10083270072937, "learning_rate": 3.649864187205672e-06, "loss": 0.4349, "step": 13737 }, { "epoch": 1.788363920343616, "grad_norm": 2.687077522277832, "learning_rate": 3.647852093357803e-06, "loss": 0.4688, "step": 13740 }, { "epoch": 1.7887543928153065, "grad_norm": 2.802783966064453, "learning_rate": 3.645840235727328e-06, "loss": 0.4867, "step": 13743 }, { "epoch": 1.7891448652869972, "grad_norm": 2.6640493869781494, "learning_rate": 3.6438286146657166e-06, "loss": 0.5281, "step": 13746 }, { "epoch": 1.789535337758688, "grad_norm": 2.8820464611053467, "learning_rate": 3.6418172305243914e-06, "loss": 0.4845, "step": 13749 }, { "epoch": 1.7899258102303788, "grad_norm": 2.6128082275390625, "learning_rate": 3.6398060836547404e-06, "loss": 0.4531, "step": 13752 }, { "epoch": 1.7903162827020696, "grad_norm": 2.659701108932495, "learning_rate": 3.637795174408104e-06, "loss": 0.5323, "step": 13755 }, { "epoch": 1.79070675517376, "grad_norm": 2.875169277191162, "learning_rate": 3.635784503135785e-06, "loss": 0.5001, "step": 13758 }, { "epoch": 1.791097227645451, "grad_norm": 3.0239455699920654, "learning_rate": 3.6337740701890446e-06, "loss": 0.4817, "step": 13761 }, { "epoch": 1.7914877001171416, "grad_norm": 2.8980631828308105, "learning_rate": 3.6317638759190985e-06, "loss": 0.4957, "step": 13764 }, { "epoch": 1.7918781725888326, "grad_norm": 2.978820562362671, "learning_rate": 3.629753920677126e-06, "loss": 0.5658, "step": 13767 }, { "epoch": 1.7922686450605232, "grad_norm": 2.6764168739318848, "learning_rate": 3.6277442048142615e-06, "loss": 0.5507, "step": 13770 }, { "epoch": 1.792659117532214, "grad_norm": 2.452681064605713, "learning_rate": 3.6257347286815956e-06, "loss": 0.4449, "step": 13773 }, { "epoch": 1.7930495900039047, "grad_norm": 2.4262232780456543, "learning_rate": 3.623725492630184e-06, "loss": 0.4299, "step": 13776 }, { "epoch": 1.7934400624755955, "grad_norm": 2.8017055988311768, "learning_rate": 3.6217164970110296e-06, "loss": 0.4115, "step": 13779 }, { "epoch": 1.7938305349472863, "grad_norm": 2.725071430206299, "learning_rate": 3.6197077421751077e-06, "loss": 0.5487, "step": 13782 }, { "epoch": 1.7942210074189768, "grad_norm": 3.4036777019500732, "learning_rate": 3.6176992284733375e-06, "loss": 0.4692, "step": 13785 }, { "epoch": 1.7946114798906678, "grad_norm": 2.6664416790008545, "learning_rate": 3.6156909562566054e-06, "loss": 0.5513, "step": 13788 }, { "epoch": 1.7950019523623584, "grad_norm": 3.166633129119873, "learning_rate": 3.6136829258757503e-06, "loss": 0.4979, "step": 13791 }, { "epoch": 1.7953924248340491, "grad_norm": 2.793639898300171, "learning_rate": 3.611675137681572e-06, "loss": 0.5232, "step": 13794 }, { "epoch": 1.79578289730574, "grad_norm": 2.91501784324646, "learning_rate": 3.609667592024827e-06, "loss": 0.516, "step": 13797 }, { "epoch": 1.7961733697774307, "grad_norm": 2.355177164077759, "learning_rate": 3.607660289256228e-06, "loss": 0.5682, "step": 13800 }, { "epoch": 1.7965638422491215, "grad_norm": 2.7519314289093018, "learning_rate": 3.6056532297264486e-06, "loss": 0.5224, "step": 13803 }, { "epoch": 1.796954314720812, "grad_norm": 3.09912109375, "learning_rate": 3.603646413786115e-06, "loss": 0.502, "step": 13806 }, { "epoch": 1.797344787192503, "grad_norm": 2.3987009525299072, "learning_rate": 3.601639841785816e-06, "loss": 0.4405, "step": 13809 }, { "epoch": 1.7977352596641936, "grad_norm": 2.6418380737304688, "learning_rate": 3.599633514076096e-06, "loss": 0.4318, "step": 13812 }, { "epoch": 1.7981257321358846, "grad_norm": 2.3674392700195312, "learning_rate": 3.5976274310074536e-06, "loss": 0.453, "step": 13815 }, { "epoch": 1.798516204607575, "grad_norm": 2.514341354370117, "learning_rate": 3.595621592930351e-06, "loss": 0.4702, "step": 13818 }, { "epoch": 1.7989066770792659, "grad_norm": 2.4524552822113037, "learning_rate": 3.5936160001951977e-06, "loss": 0.4447, "step": 13821 }, { "epoch": 1.7992971495509567, "grad_norm": 2.8817389011383057, "learning_rate": 3.5916106531523737e-06, "loss": 0.5262, "step": 13824 }, { "epoch": 1.7996876220226474, "grad_norm": 2.756680965423584, "learning_rate": 3.5896055521522043e-06, "loss": 0.5041, "step": 13827 }, { "epoch": 1.8000780944943382, "grad_norm": 2.7653608322143555, "learning_rate": 3.587600697544979e-06, "loss": 0.5412, "step": 13830 }, { "epoch": 1.8004685669660287, "grad_norm": 2.692890167236328, "learning_rate": 3.585596089680941e-06, "loss": 0.5134, "step": 13833 }, { "epoch": 1.8008590394377197, "grad_norm": 2.555551767349243, "learning_rate": 3.5835917289102893e-06, "loss": 0.4865, "step": 13836 }, { "epoch": 1.8012495119094103, "grad_norm": 2.5611484050750732, "learning_rate": 3.5815876155831845e-06, "loss": 0.4808, "step": 13839 }, { "epoch": 1.8016399843811013, "grad_norm": 3.0679988861083984, "learning_rate": 3.5795837500497388e-06, "loss": 0.5976, "step": 13842 }, { "epoch": 1.8020304568527918, "grad_norm": 3.132232427597046, "learning_rate": 3.577580132660023e-06, "loss": 0.4778, "step": 13845 }, { "epoch": 1.8024209293244826, "grad_norm": 3.60653018951416, "learning_rate": 3.575576763764067e-06, "loss": 0.4976, "step": 13848 }, { "epoch": 1.8028114017961734, "grad_norm": 2.553593873977661, "learning_rate": 3.573573643711852e-06, "loss": 0.4926, "step": 13851 }, { "epoch": 1.8032018742678642, "grad_norm": 2.7795355319976807, "learning_rate": 3.5715707728533227e-06, "loss": 0.4787, "step": 13854 }, { "epoch": 1.803592346739555, "grad_norm": 2.8556997776031494, "learning_rate": 3.5695681515383727e-06, "loss": 0.4926, "step": 13857 }, { "epoch": 1.8039828192112455, "grad_norm": 2.585430383682251, "learning_rate": 3.5675657801168583e-06, "loss": 0.4774, "step": 13860 }, { "epoch": 1.8043732916829365, "grad_norm": 3.1514101028442383, "learning_rate": 3.5655636589385874e-06, "loss": 0.6497, "step": 13863 }, { "epoch": 1.804763764154627, "grad_norm": 2.7750942707061768, "learning_rate": 3.563561788353329e-06, "loss": 0.5409, "step": 13866 }, { "epoch": 1.8051542366263178, "grad_norm": 2.840725898742676, "learning_rate": 3.561560168710804e-06, "loss": 0.6219, "step": 13869 }, { "epoch": 1.8055447090980086, "grad_norm": 2.4959402084350586, "learning_rate": 3.559558800360692e-06, "loss": 0.475, "step": 13872 }, { "epoch": 1.8059351815696993, "grad_norm": 2.7684502601623535, "learning_rate": 3.557557683652627e-06, "loss": 0.4395, "step": 13875 }, { "epoch": 1.8063256540413901, "grad_norm": 3.1551716327667236, "learning_rate": 3.5555568189362e-06, "loss": 0.4665, "step": 13878 }, { "epoch": 1.8067161265130807, "grad_norm": 3.5174217224121094, "learning_rate": 3.5535562065609598e-06, "loss": 0.4556, "step": 13881 }, { "epoch": 1.8071065989847717, "grad_norm": 2.683762311935425, "learning_rate": 3.551555846876405e-06, "loss": 0.4735, "step": 13884 }, { "epoch": 1.8074970714564622, "grad_norm": 2.77344012260437, "learning_rate": 3.5495557402319975e-06, "loss": 0.5262, "step": 13887 }, { "epoch": 1.8078875439281532, "grad_norm": 3.604360342025757, "learning_rate": 3.5475558869771516e-06, "loss": 0.5683, "step": 13890 }, { "epoch": 1.8082780163998438, "grad_norm": 2.6248981952667236, "learning_rate": 3.545556287461236e-06, "loss": 0.4836, "step": 13893 }, { "epoch": 1.8086684888715345, "grad_norm": 3.0263054370880127, "learning_rate": 3.543556942033577e-06, "loss": 0.4698, "step": 13896 }, { "epoch": 1.8090589613432253, "grad_norm": 2.9796009063720703, "learning_rate": 3.5415578510434572e-06, "loss": 0.4816, "step": 13899 }, { "epoch": 1.809449433814916, "grad_norm": 2.5007407665252686, "learning_rate": 3.539559014840112e-06, "loss": 0.4536, "step": 13902 }, { "epoch": 1.8098399062866068, "grad_norm": 4.0354323387146, "learning_rate": 3.537560433772733e-06, "loss": 0.5184, "step": 13905 }, { "epoch": 1.8102303787582974, "grad_norm": 2.4867868423461914, "learning_rate": 3.5355621081904717e-06, "loss": 0.4836, "step": 13908 }, { "epoch": 1.8106208512299884, "grad_norm": 2.469088315963745, "learning_rate": 3.5335640384424296e-06, "loss": 0.5388, "step": 13911 }, { "epoch": 1.811011323701679, "grad_norm": 2.7055606842041016, "learning_rate": 3.531566224877665e-06, "loss": 0.4555, "step": 13914 }, { "epoch": 1.81140179617337, "grad_norm": 2.7885193824768066, "learning_rate": 3.529568667845192e-06, "loss": 0.5323, "step": 13917 }, { "epoch": 1.8117922686450605, "grad_norm": 2.801211357116699, "learning_rate": 3.5275713676939782e-06, "loss": 0.4274, "step": 13920 }, { "epoch": 1.8121827411167513, "grad_norm": 2.6563968658447266, "learning_rate": 3.525574324772949e-06, "loss": 0.4206, "step": 13923 }, { "epoch": 1.812573213588442, "grad_norm": 3.055948495864868, "learning_rate": 3.523577539430985e-06, "loss": 0.4404, "step": 13926 }, { "epoch": 1.8129636860601328, "grad_norm": 2.6117143630981445, "learning_rate": 3.5215810120169182e-06, "loss": 0.4857, "step": 13929 }, { "epoch": 1.8133541585318236, "grad_norm": 2.4713497161865234, "learning_rate": 3.5195847428795388e-06, "loss": 0.4886, "step": 13932 }, { "epoch": 1.8137446310035141, "grad_norm": 2.4422478675842285, "learning_rate": 3.5175887323675896e-06, "loss": 0.5087, "step": 13935 }, { "epoch": 1.8141351034752051, "grad_norm": 2.721740484237671, "learning_rate": 3.5155929808297706e-06, "loss": 0.4973, "step": 13938 }, { "epoch": 1.8145255759468957, "grad_norm": 2.5684258937835693, "learning_rate": 3.5135974886147358e-06, "loss": 0.4052, "step": 13941 }, { "epoch": 1.8149160484185864, "grad_norm": 2.683708667755127, "learning_rate": 3.5116022560710916e-06, "loss": 0.4971, "step": 13944 }, { "epoch": 1.8153065208902772, "grad_norm": 2.9392552375793457, "learning_rate": 3.5096072835474015e-06, "loss": 0.4841, "step": 13947 }, { "epoch": 1.815696993361968, "grad_norm": 2.6948726177215576, "learning_rate": 3.5076125713921844e-06, "loss": 0.5777, "step": 13950 }, { "epoch": 1.8160874658336588, "grad_norm": 2.9752037525177, "learning_rate": 3.505618119953913e-06, "loss": 0.4687, "step": 13953 }, { "epoch": 1.8164779383053493, "grad_norm": 2.543264865875244, "learning_rate": 3.5036239295810105e-06, "loss": 0.4773, "step": 13956 }, { "epoch": 1.8168684107770403, "grad_norm": 2.5448291301727295, "learning_rate": 3.5016300006218607e-06, "loss": 0.5217, "step": 13959 }, { "epoch": 1.8172588832487309, "grad_norm": 2.9891393184661865, "learning_rate": 3.4996363334247975e-06, "loss": 0.4791, "step": 13962 }, { "epoch": 1.8176493557204219, "grad_norm": 2.651190757751465, "learning_rate": 3.49764292833811e-06, "loss": 0.5361, "step": 13965 }, { "epoch": 1.8180398281921124, "grad_norm": 2.7708990573883057, "learning_rate": 3.4956497857100437e-06, "loss": 0.4645, "step": 13968 }, { "epoch": 1.8184303006638032, "grad_norm": 2.510695457458496, "learning_rate": 3.493656905888794e-06, "loss": 0.4663, "step": 13971 }, { "epoch": 1.818820773135494, "grad_norm": 3.075878381729126, "learning_rate": 3.4916642892225138e-06, "loss": 0.5056, "step": 13974 }, { "epoch": 1.8192112456071847, "grad_norm": 2.6439952850341797, "learning_rate": 3.4896719360593106e-06, "loss": 0.533, "step": 13977 }, { "epoch": 1.8196017180788755, "grad_norm": 2.594844102859497, "learning_rate": 3.4876798467472415e-06, "loss": 0.4981, "step": 13980 }, { "epoch": 1.819992190550566, "grad_norm": 3.148606538772583, "learning_rate": 3.4856880216343235e-06, "loss": 0.5199, "step": 13983 }, { "epoch": 1.820382663022257, "grad_norm": 2.5869855880737305, "learning_rate": 3.4836964610685207e-06, "loss": 0.437, "step": 13986 }, { "epoch": 1.8207731354939476, "grad_norm": 2.792637586593628, "learning_rate": 3.4817051653977553e-06, "loss": 0.5815, "step": 13989 }, { "epoch": 1.8211636079656386, "grad_norm": 2.534087896347046, "learning_rate": 3.479714134969905e-06, "loss": 0.4854, "step": 13992 }, { "epoch": 1.8215540804373291, "grad_norm": 4.054605960845947, "learning_rate": 3.4777233701327974e-06, "loss": 0.5168, "step": 13995 }, { "epoch": 1.82194455290902, "grad_norm": 2.612586498260498, "learning_rate": 3.4757328712342143e-06, "loss": 0.4172, "step": 13998 }, { "epoch": 1.8223350253807107, "grad_norm": 2.6771693229675293, "learning_rate": 3.4737426386218913e-06, "loss": 0.4638, "step": 14001 }, { "epoch": 1.8227254978524015, "grad_norm": 2.4215550422668457, "learning_rate": 3.4717526726435204e-06, "loss": 0.5155, "step": 14004 }, { "epoch": 1.8231159703240922, "grad_norm": 2.9423484802246094, "learning_rate": 3.469762973646741e-06, "loss": 0.4784, "step": 14007 }, { "epoch": 1.8235064427957828, "grad_norm": 3.212175130844116, "learning_rate": 3.4677735419791507e-06, "loss": 0.4752, "step": 14010 }, { "epoch": 1.8238969152674738, "grad_norm": 2.640934944152832, "learning_rate": 3.465784377988301e-06, "loss": 0.5054, "step": 14013 }, { "epoch": 1.8242873877391643, "grad_norm": 2.4397079944610596, "learning_rate": 3.4637954820216914e-06, "loss": 0.4716, "step": 14016 }, { "epoch": 1.824677860210855, "grad_norm": 2.737032413482666, "learning_rate": 3.4618068544267806e-06, "loss": 0.547, "step": 14019 }, { "epoch": 1.8250683326825459, "grad_norm": 2.630606174468994, "learning_rate": 3.459818495550976e-06, "loss": 0.5541, "step": 14022 }, { "epoch": 1.8254588051542366, "grad_norm": 2.5901596546173096, "learning_rate": 3.4578304057416394e-06, "loss": 0.4804, "step": 14025 }, { "epoch": 1.8258492776259274, "grad_norm": 2.6611945629119873, "learning_rate": 3.455842585346088e-06, "loss": 0.5694, "step": 14028 }, { "epoch": 1.826239750097618, "grad_norm": 2.6162936687469482, "learning_rate": 3.4538550347115863e-06, "loss": 0.5321, "step": 14031 }, { "epoch": 1.826630222569309, "grad_norm": 2.7024919986724854, "learning_rate": 3.4518677541853584e-06, "loss": 0.4661, "step": 14034 }, { "epoch": 1.8270206950409995, "grad_norm": 2.514699697494507, "learning_rate": 3.4498807441145775e-06, "loss": 0.5, "step": 14037 }, { "epoch": 1.8274111675126905, "grad_norm": 2.608577013015747, "learning_rate": 3.4478940048463705e-06, "loss": 0.4691, "step": 14040 }, { "epoch": 1.827801639984381, "grad_norm": 2.8621628284454346, "learning_rate": 3.445907536727814e-06, "loss": 0.4189, "step": 14043 }, { "epoch": 1.8281921124560718, "grad_norm": 2.61087703704834, "learning_rate": 3.4439213401059436e-06, "loss": 0.4572, "step": 14046 }, { "epoch": 1.8285825849277626, "grad_norm": 2.482490062713623, "learning_rate": 3.4419354153277398e-06, "loss": 0.4796, "step": 14049 }, { "epoch": 1.8289730573994534, "grad_norm": 2.7227578163146973, "learning_rate": 3.4399497627401414e-06, "loss": 0.4849, "step": 14052 }, { "epoch": 1.8293635298711441, "grad_norm": 2.800520181655884, "learning_rate": 3.437964382690039e-06, "loss": 0.4679, "step": 14055 }, { "epoch": 1.8297540023428347, "grad_norm": 2.624976396560669, "learning_rate": 3.4359792755242716e-06, "loss": 0.5038, "step": 14058 }, { "epoch": 1.8301444748145257, "grad_norm": 2.559096097946167, "learning_rate": 3.4339944415896354e-06, "loss": 0.4533, "step": 14061 }, { "epoch": 1.8305349472862162, "grad_norm": 2.5498814582824707, "learning_rate": 3.432009881232875e-06, "loss": 0.4878, "step": 14064 }, { "epoch": 1.830925419757907, "grad_norm": 2.93689227104187, "learning_rate": 3.4300255948006893e-06, "loss": 0.4776, "step": 14067 }, { "epoch": 1.8313158922295978, "grad_norm": 2.3573851585388184, "learning_rate": 3.4280415826397304e-06, "loss": 0.4692, "step": 14070 }, { "epoch": 1.8317063647012886, "grad_norm": 2.303317070007324, "learning_rate": 3.426057845096598e-06, "loss": 0.4765, "step": 14073 }, { "epoch": 1.8320968371729793, "grad_norm": 2.6520111560821533, "learning_rate": 3.4240743825178514e-06, "loss": 0.4867, "step": 14076 }, { "epoch": 1.83248730964467, "grad_norm": 2.8947081565856934, "learning_rate": 3.4220911952499943e-06, "loss": 0.5239, "step": 14079 }, { "epoch": 1.8328777821163609, "grad_norm": 2.4253430366516113, "learning_rate": 3.4201082836394868e-06, "loss": 0.5101, "step": 14082 }, { "epoch": 1.8332682545880514, "grad_norm": 3.378649950027466, "learning_rate": 3.418125648032737e-06, "loss": 0.4679, "step": 14085 }, { "epoch": 1.8336587270597424, "grad_norm": 2.7288544178009033, "learning_rate": 3.4161432887761093e-06, "loss": 0.4794, "step": 14088 }, { "epoch": 1.834049199531433, "grad_norm": 2.832486152648926, "learning_rate": 3.414161206215918e-06, "loss": 0.5284, "step": 14091 }, { "epoch": 1.8344396720031237, "grad_norm": 2.27494215965271, "learning_rate": 3.4121794006984265e-06, "loss": 0.4875, "step": 14094 }, { "epoch": 1.8348301444748145, "grad_norm": 2.7032313346862793, "learning_rate": 3.4101978725698553e-06, "loss": 0.4654, "step": 14097 }, { "epoch": 1.8352206169465053, "grad_norm": 2.672342300415039, "learning_rate": 3.40821662217637e-06, "loss": 0.4787, "step": 14100 }, { "epoch": 1.835611089418196, "grad_norm": 2.9763712882995605, "learning_rate": 3.4062356498640915e-06, "loss": 0.4966, "step": 14103 }, { "epoch": 1.8360015618898866, "grad_norm": 2.8209915161132812, "learning_rate": 3.4042549559790938e-06, "loss": 0.5311, "step": 14106 }, { "epoch": 1.8363920343615776, "grad_norm": 3.304166078567505, "learning_rate": 3.4022745408673973e-06, "loss": 0.4676, "step": 14109 }, { "epoch": 1.8367825068332682, "grad_norm": 2.846513271331787, "learning_rate": 3.400294404874978e-06, "loss": 0.4849, "step": 14112 }, { "epoch": 1.8371729793049592, "grad_norm": 2.843594789505005, "learning_rate": 3.3983145483477582e-06, "loss": 0.5584, "step": 14115 }, { "epoch": 1.8375634517766497, "grad_norm": 2.5627312660217285, "learning_rate": 3.39633497163162e-06, "loss": 0.4594, "step": 14118 }, { "epoch": 1.8379539242483405, "grad_norm": 2.5003201961517334, "learning_rate": 3.394355675072388e-06, "loss": 0.425, "step": 14121 }, { "epoch": 1.8383443967200312, "grad_norm": 2.563779354095459, "learning_rate": 3.3923766590158425e-06, "loss": 0.5306, "step": 14124 }, { "epoch": 1.838734869191722, "grad_norm": 2.529606819152832, "learning_rate": 3.3903979238077124e-06, "loss": 0.5286, "step": 14127 }, { "epoch": 1.8391253416634128, "grad_norm": 2.8574764728546143, "learning_rate": 3.3884194697936777e-06, "loss": 0.5582, "step": 14130 }, { "epoch": 1.8395158141351033, "grad_norm": 2.485995292663574, "learning_rate": 3.3864412973193734e-06, "loss": 0.4713, "step": 14133 }, { "epoch": 1.8399062866067943, "grad_norm": 3.2460153102874756, "learning_rate": 3.3844634067303783e-06, "loss": 0.4945, "step": 14136 }, { "epoch": 1.840296759078485, "grad_norm": 3.2644894123077393, "learning_rate": 3.382485798372228e-06, "loss": 0.524, "step": 14139 }, { "epoch": 1.8406872315501757, "grad_norm": 2.7396457195281982, "learning_rate": 3.380508472590407e-06, "loss": 0.4508, "step": 14142 }, { "epoch": 1.8410777040218664, "grad_norm": 2.678882598876953, "learning_rate": 3.3785314297303477e-06, "loss": 0.4521, "step": 14145 }, { "epoch": 1.8414681764935572, "grad_norm": 2.4532623291015625, "learning_rate": 3.3765546701374375e-06, "loss": 0.4508, "step": 14148 }, { "epoch": 1.841858648965248, "grad_norm": 2.8880367279052734, "learning_rate": 3.3745781941570104e-06, "loss": 0.4395, "step": 14151 }, { "epoch": 1.8422491214369385, "grad_norm": 2.6723203659057617, "learning_rate": 3.372602002134353e-06, "loss": 0.4853, "step": 14154 }, { "epoch": 1.8426395939086295, "grad_norm": 2.651268243789673, "learning_rate": 3.370626094414702e-06, "loss": 0.4853, "step": 14157 }, { "epoch": 1.84303006638032, "grad_norm": 2.4674911499023438, "learning_rate": 3.368650471343246e-06, "loss": 0.5133, "step": 14160 }, { "epoch": 1.843420538852011, "grad_norm": 2.827803373336792, "learning_rate": 3.36667513326512e-06, "loss": 0.4601, "step": 14163 }, { "epoch": 1.8438110113237016, "grad_norm": 3.614109754562378, "learning_rate": 3.364700080525412e-06, "loss": 0.5692, "step": 14166 }, { "epoch": 1.8442014837953924, "grad_norm": 3.3453476428985596, "learning_rate": 3.362725313469161e-06, "loss": 0.512, "step": 14169 }, { "epoch": 1.8445919562670832, "grad_norm": 2.600367546081543, "learning_rate": 3.3607508324413525e-06, "loss": 0.5574, "step": 14172 }, { "epoch": 1.844982428738774, "grad_norm": 2.761702299118042, "learning_rate": 3.3587766377869256e-06, "loss": 0.4362, "step": 14175 }, { "epoch": 1.8453729012104647, "grad_norm": 2.6490187644958496, "learning_rate": 3.3568027298507673e-06, "loss": 0.5518, "step": 14178 }, { "epoch": 1.8457633736821553, "grad_norm": 2.837568521499634, "learning_rate": 3.3548291089777146e-06, "loss": 0.4928, "step": 14181 }, { "epoch": 1.8461538461538463, "grad_norm": 2.8118865489959717, "learning_rate": 3.352855775512557e-06, "loss": 0.5401, "step": 14184 }, { "epoch": 1.8465443186255368, "grad_norm": 2.6508400440216064, "learning_rate": 3.350882729800029e-06, "loss": 0.5607, "step": 14187 }, { "epoch": 1.8469347910972278, "grad_norm": 3.0374929904937744, "learning_rate": 3.3489099721848188e-06, "loss": 0.4713, "step": 14190 }, { "epoch": 1.8473252635689184, "grad_norm": 2.809018135070801, "learning_rate": 3.346937503011565e-06, "loss": 0.5143, "step": 14193 }, { "epoch": 1.8477157360406091, "grad_norm": 2.843602418899536, "learning_rate": 3.34496532262485e-06, "loss": 0.4967, "step": 14196 }, { "epoch": 1.8481062085123, "grad_norm": 2.7359254360198975, "learning_rate": 3.3429934313692102e-06, "loss": 0.527, "step": 14199 }, { "epoch": 1.8484966809839907, "grad_norm": 3.020136594772339, "learning_rate": 3.341021829589134e-06, "loss": 0.5108, "step": 14202 }, { "epoch": 1.8488871534556814, "grad_norm": 2.610989809036255, "learning_rate": 3.3390505176290544e-06, "loss": 0.56, "step": 14205 }, { "epoch": 1.849277625927372, "grad_norm": 3.2436866760253906, "learning_rate": 3.3370794958333547e-06, "loss": 0.4896, "step": 14208 }, { "epoch": 1.849668098399063, "grad_norm": 2.493400812149048, "learning_rate": 3.33510876454637e-06, "loss": 0.463, "step": 14211 }, { "epoch": 1.8500585708707535, "grad_norm": 2.6635570526123047, "learning_rate": 3.33313832411238e-06, "loss": 0.4602, "step": 14214 }, { "epoch": 1.8504490433424443, "grad_norm": 2.576613664627075, "learning_rate": 3.3311681748756185e-06, "loss": 0.4683, "step": 14217 }, { "epoch": 1.850839515814135, "grad_norm": 2.853515148162842, "learning_rate": 3.3291983171802676e-06, "loss": 0.5186, "step": 14220 }, { "epoch": 1.8512299882858259, "grad_norm": 2.5207786560058594, "learning_rate": 3.3272287513704544e-06, "loss": 0.4683, "step": 14223 }, { "epoch": 1.8516204607575166, "grad_norm": 2.8509247303009033, "learning_rate": 3.32525947779026e-06, "loss": 0.4833, "step": 14226 }, { "epoch": 1.8520109332292072, "grad_norm": 4.261683464050293, "learning_rate": 3.3232904967837116e-06, "loss": 0.5181, "step": 14229 }, { "epoch": 1.8524014057008982, "grad_norm": 2.9350645542144775, "learning_rate": 3.3213218086947857e-06, "loss": 0.4807, "step": 14232 }, { "epoch": 1.8527918781725887, "grad_norm": 2.8778061866760254, "learning_rate": 3.3193534138674094e-06, "loss": 0.4726, "step": 14235 }, { "epoch": 1.8531823506442797, "grad_norm": 3.9157052040100098, "learning_rate": 3.3173853126454546e-06, "loss": 0.5698, "step": 14238 }, { "epoch": 1.8535728231159703, "grad_norm": 2.5443649291992188, "learning_rate": 3.3154175053727478e-06, "loss": 0.5034, "step": 14241 }, { "epoch": 1.853963295587661, "grad_norm": 2.9121346473693848, "learning_rate": 3.313449992393055e-06, "loss": 0.5932, "step": 14244 }, { "epoch": 1.8543537680593518, "grad_norm": 2.4990174770355225, "learning_rate": 3.311482774050105e-06, "loss": 0.4869, "step": 14247 }, { "epoch": 1.8547442405310426, "grad_norm": 3.1738932132720947, "learning_rate": 3.3095158506875603e-06, "loss": 0.6015, "step": 14250 }, { "epoch": 1.8551347130027334, "grad_norm": 2.808356285095215, "learning_rate": 3.3075492226490404e-06, "loss": 0.4492, "step": 14253 }, { "epoch": 1.855525185474424, "grad_norm": 2.824268102645874, "learning_rate": 3.3055828902781116e-06, "loss": 0.5146, "step": 14256 }, { "epoch": 1.855915657946115, "grad_norm": 2.653419017791748, "learning_rate": 3.3036168539182867e-06, "loss": 0.5002, "step": 14259 }, { "epoch": 1.8563061304178055, "grad_norm": 2.6380562782287598, "learning_rate": 3.3016511139130296e-06, "loss": 0.4467, "step": 14262 }, { "epoch": 1.8566966028894965, "grad_norm": 3.018332004547119, "learning_rate": 3.299685670605749e-06, "loss": 0.5476, "step": 14265 }, { "epoch": 1.857087075361187, "grad_norm": 2.474071979522705, "learning_rate": 3.297720524339805e-06, "loss": 0.5168, "step": 14268 }, { "epoch": 1.8574775478328778, "grad_norm": 2.407060384750366, "learning_rate": 3.295755675458505e-06, "loss": 0.44, "step": 14271 }, { "epoch": 1.8578680203045685, "grad_norm": 2.6494884490966797, "learning_rate": 3.2937911243051035e-06, "loss": 0.56, "step": 14274 }, { "epoch": 1.8582584927762593, "grad_norm": 2.5988290309906006, "learning_rate": 3.291826871222803e-06, "loss": 0.458, "step": 14277 }, { "epoch": 1.85864896524795, "grad_norm": 3.0355000495910645, "learning_rate": 3.2898629165547534e-06, "loss": 0.497, "step": 14280 }, { "epoch": 1.8590394377196406, "grad_norm": 2.6298789978027344, "learning_rate": 3.2878992606440557e-06, "loss": 0.537, "step": 14283 }, { "epoch": 1.8594299101913316, "grad_norm": 3.61897349357605, "learning_rate": 3.2859359038337537e-06, "loss": 0.5495, "step": 14286 }, { "epoch": 1.8598203826630222, "grad_norm": 2.8695971965789795, "learning_rate": 3.283972846466846e-06, "loss": 0.4939, "step": 14289 }, { "epoch": 1.860210855134713, "grad_norm": 2.84236741065979, "learning_rate": 3.2820100888862703e-06, "loss": 0.4501, "step": 14292 }, { "epoch": 1.8606013276064037, "grad_norm": 2.9210715293884277, "learning_rate": 3.2800476314349184e-06, "loss": 0.5261, "step": 14295 }, { "epoch": 1.8609918000780945, "grad_norm": 2.3659050464630127, "learning_rate": 3.2780854744556284e-06, "loss": 0.4849, "step": 14298 }, { "epoch": 1.8613822725497853, "grad_norm": 2.6904006004333496, "learning_rate": 3.276123618291182e-06, "loss": 0.4635, "step": 14301 }, { "epoch": 1.8617727450214758, "grad_norm": 3.254288911819458, "learning_rate": 3.274162063284314e-06, "loss": 0.4929, "step": 14304 }, { "epoch": 1.8621632174931668, "grad_norm": 2.973808526992798, "learning_rate": 3.2722008097777025e-06, "loss": 0.4474, "step": 14307 }, { "epoch": 1.8625536899648574, "grad_norm": 2.9174914360046387, "learning_rate": 3.2702398581139742e-06, "loss": 0.5224, "step": 14310 }, { "epoch": 1.8629441624365484, "grad_norm": 2.607135057449341, "learning_rate": 3.268279208635705e-06, "loss": 0.4667, "step": 14313 }, { "epoch": 1.863334634908239, "grad_norm": 2.792543649673462, "learning_rate": 3.266318861685414e-06, "loss": 0.4632, "step": 14316 }, { "epoch": 1.8637251073799297, "grad_norm": 2.533592462539673, "learning_rate": 3.2643588176055706e-06, "loss": 0.4745, "step": 14319 }, { "epoch": 1.8641155798516205, "grad_norm": 2.8035097122192383, "learning_rate": 3.2623990767385923e-06, "loss": 0.4732, "step": 14322 }, { "epoch": 1.8645060523233112, "grad_norm": 2.933763265609741, "learning_rate": 3.2604396394268377e-06, "loss": 0.567, "step": 14325 }, { "epoch": 1.864896524795002, "grad_norm": 2.4261467456817627, "learning_rate": 3.2584805060126183e-06, "loss": 0.429, "step": 14328 }, { "epoch": 1.8652869972666926, "grad_norm": 2.9292244911193848, "learning_rate": 3.2565216768381924e-06, "loss": 0.4519, "step": 14331 }, { "epoch": 1.8656774697383836, "grad_norm": 2.7582828998565674, "learning_rate": 3.2545631522457623e-06, "loss": 0.5999, "step": 14334 }, { "epoch": 1.866067942210074, "grad_norm": 2.7902820110321045, "learning_rate": 3.2526049325774762e-06, "loss": 0.4989, "step": 14337 }, { "epoch": 1.866458414681765, "grad_norm": 2.306330442428589, "learning_rate": 3.2506470181754336e-06, "loss": 0.529, "step": 14340 }, { "epoch": 1.8668488871534556, "grad_norm": 2.968900442123413, "learning_rate": 3.2486894093816755e-06, "loss": 0.5525, "step": 14343 }, { "epoch": 1.8672393596251464, "grad_norm": 2.6159310340881348, "learning_rate": 3.246732106538194e-06, "loss": 0.4139, "step": 14346 }, { "epoch": 1.8676298320968372, "grad_norm": 2.9599854946136475, "learning_rate": 3.2447751099869264e-06, "loss": 0.5086, "step": 14349 }, { "epoch": 1.868020304568528, "grad_norm": 2.811408519744873, "learning_rate": 3.242818420069753e-06, "loss": 0.4619, "step": 14352 }, { "epoch": 1.8684107770402187, "grad_norm": 3.143258810043335, "learning_rate": 3.240862037128506e-06, "loss": 0.5827, "step": 14355 }, { "epoch": 1.8688012495119093, "grad_norm": 2.813084840774536, "learning_rate": 3.23890596150496e-06, "loss": 0.497, "step": 14358 }, { "epoch": 1.8691917219836003, "grad_norm": 2.9719557762145996, "learning_rate": 3.2369501935408375e-06, "loss": 0.5179, "step": 14361 }, { "epoch": 1.8695821944552908, "grad_norm": 3.1469063758850098, "learning_rate": 3.234994733577808e-06, "loss": 0.4145, "step": 14364 }, { "epoch": 1.8699726669269816, "grad_norm": 2.816202402114868, "learning_rate": 3.2330395819574845e-06, "loss": 0.4828, "step": 14367 }, { "epoch": 1.8703631393986724, "grad_norm": 2.6076595783233643, "learning_rate": 3.2310847390214283e-06, "loss": 0.5034, "step": 14370 }, { "epoch": 1.8707536118703632, "grad_norm": 3.1273179054260254, "learning_rate": 3.229130205111147e-06, "loss": 0.4748, "step": 14373 }, { "epoch": 1.871144084342054, "grad_norm": 2.559382438659668, "learning_rate": 3.2271759805680956e-06, "loss": 0.4771, "step": 14376 }, { "epoch": 1.8715345568137445, "grad_norm": 2.720404624938965, "learning_rate": 3.22522206573367e-06, "loss": 0.4631, "step": 14379 }, { "epoch": 1.8719250292854355, "grad_norm": 3.042336940765381, "learning_rate": 3.223268460949215e-06, "loss": 0.5467, "step": 14382 }, { "epoch": 1.872315501757126, "grad_norm": 2.677138090133667, "learning_rate": 3.221315166556024e-06, "loss": 0.4852, "step": 14385 }, { "epoch": 1.872705974228817, "grad_norm": 3.1034047603607178, "learning_rate": 3.21936218289533e-06, "loss": 0.5535, "step": 14388 }, { "epoch": 1.8730964467005076, "grad_norm": 3.5317320823669434, "learning_rate": 3.217409510308318e-06, "loss": 0.4424, "step": 14391 }, { "epoch": 1.8734869191721983, "grad_norm": 3.066141366958618, "learning_rate": 3.215457149136114e-06, "loss": 0.454, "step": 14394 }, { "epoch": 1.8738773916438891, "grad_norm": 2.683347702026367, "learning_rate": 3.213505099719791e-06, "loss": 0.4301, "step": 14397 }, { "epoch": 1.8742678641155799, "grad_norm": 2.6969237327575684, "learning_rate": 3.2115533624003703e-06, "loss": 0.5306, "step": 14400 }, { "epoch": 1.8746583365872707, "grad_norm": 2.678342580795288, "learning_rate": 3.2096019375188135e-06, "loss": 0.4622, "step": 14403 }, { "epoch": 1.8750488090589612, "grad_norm": 2.750361919403076, "learning_rate": 3.2076508254160334e-06, "loss": 0.4639, "step": 14406 }, { "epoch": 1.8754392815306522, "grad_norm": 2.4361095428466797, "learning_rate": 3.205700026432882e-06, "loss": 0.4648, "step": 14409 }, { "epoch": 1.8758297540023428, "grad_norm": 2.7799482345581055, "learning_rate": 3.2037495409101603e-06, "loss": 0.5366, "step": 14412 }, { "epoch": 1.8762202264740335, "grad_norm": 2.5228137969970703, "learning_rate": 3.201799369188616e-06, "loss": 0.5324, "step": 14415 }, { "epoch": 1.8766106989457243, "grad_norm": 2.955665349960327, "learning_rate": 3.1998495116089413e-06, "loss": 0.528, "step": 14418 }, { "epoch": 1.877001171417415, "grad_norm": 3.2559332847595215, "learning_rate": 3.1978999685117674e-06, "loss": 0.4378, "step": 14421 }, { "epoch": 1.8773916438891058, "grad_norm": 3.0776727199554443, "learning_rate": 3.1959507402376787e-06, "loss": 0.4957, "step": 14424 }, { "epoch": 1.8777821163607966, "grad_norm": 2.5709543228149414, "learning_rate": 3.1940018271272013e-06, "loss": 0.5957, "step": 14427 }, { "epoch": 1.8781725888324874, "grad_norm": 2.643850564956665, "learning_rate": 3.1920532295208045e-06, "loss": 0.5714, "step": 14430 }, { "epoch": 1.878563061304178, "grad_norm": 2.7562592029571533, "learning_rate": 3.190104947758905e-06, "loss": 0.527, "step": 14433 }, { "epoch": 1.878953533775869, "grad_norm": 2.548182964324951, "learning_rate": 3.1881569821818646e-06, "loss": 0.4998, "step": 14436 }, { "epoch": 1.8793440062475595, "grad_norm": 2.421619176864624, "learning_rate": 3.186209333129987e-06, "loss": 0.4503, "step": 14439 }, { "epoch": 1.8797344787192503, "grad_norm": 2.976463794708252, "learning_rate": 3.1842620009435244e-06, "loss": 0.5083, "step": 14442 }, { "epoch": 1.880124951190941, "grad_norm": 3.011476516723633, "learning_rate": 3.1823149859626695e-06, "loss": 0.4516, "step": 14445 }, { "epoch": 1.8805154236626318, "grad_norm": 2.777480125427246, "learning_rate": 3.180368288527563e-06, "loss": 0.5437, "step": 14448 }, { "epoch": 1.8809058961343226, "grad_norm": 2.6614065170288086, "learning_rate": 3.1784219089782885e-06, "loss": 0.532, "step": 14451 }, { "epoch": 1.8812963686060131, "grad_norm": 3.332582473754883, "learning_rate": 3.176475847654873e-06, "loss": 0.5112, "step": 14454 }, { "epoch": 1.8816868410777041, "grad_norm": 3.011148452758789, "learning_rate": 3.1745301048972923e-06, "loss": 0.5642, "step": 14457 }, { "epoch": 1.8820773135493947, "grad_norm": 3.288445472717285, "learning_rate": 3.1725846810454612e-06, "loss": 0.4546, "step": 14460 }, { "epoch": 1.8824677860210857, "grad_norm": 2.9788262844085693, "learning_rate": 3.170639576439244e-06, "loss": 0.5007, "step": 14463 }, { "epoch": 1.8828582584927762, "grad_norm": 2.3742592334747314, "learning_rate": 3.1686947914184424e-06, "loss": 0.4228, "step": 14466 }, { "epoch": 1.883248730964467, "grad_norm": 2.842820405960083, "learning_rate": 3.1667503263228093e-06, "loss": 0.5175, "step": 14469 }, { "epoch": 1.8836392034361578, "grad_norm": 3.063159942626953, "learning_rate": 3.1648061814920372e-06, "loss": 0.476, "step": 14472 }, { "epoch": 1.8840296759078485, "grad_norm": 2.8648762702941895, "learning_rate": 3.1628623572657646e-06, "loss": 0.4767, "step": 14475 }, { "epoch": 1.8844201483795393, "grad_norm": 2.5164406299591064, "learning_rate": 3.160918853983574e-06, "loss": 0.5384, "step": 14478 }, { "epoch": 1.8848106208512299, "grad_norm": 2.923412322998047, "learning_rate": 3.1589756719849897e-06, "loss": 0.4499, "step": 14481 }, { "epoch": 1.8852010933229209, "grad_norm": 2.5238845348358154, "learning_rate": 3.1570328116094835e-06, "loss": 0.4332, "step": 14484 }, { "epoch": 1.8855915657946114, "grad_norm": 2.5288538932800293, "learning_rate": 3.155090273196467e-06, "loss": 0.4952, "step": 14487 }, { "epoch": 1.8859820382663022, "grad_norm": 2.2970383167266846, "learning_rate": 3.153148057085299e-06, "loss": 0.4388, "step": 14490 }, { "epoch": 1.886372510737993, "grad_norm": 4.423757553100586, "learning_rate": 3.1512061636152814e-06, "loss": 0.4552, "step": 14493 }, { "epoch": 1.8867629832096837, "grad_norm": 3.0830297470092773, "learning_rate": 3.149264593125655e-06, "loss": 0.5075, "step": 14496 }, { "epoch": 1.8871534556813745, "grad_norm": 2.8348443508148193, "learning_rate": 3.147323345955612e-06, "loss": 0.5471, "step": 14499 }, { "epoch": 1.887543928153065, "grad_norm": 2.6750619411468506, "learning_rate": 3.1453824224442836e-06, "loss": 0.458, "step": 14502 }, { "epoch": 1.887934400624756, "grad_norm": 2.684641122817993, "learning_rate": 3.143441822930745e-06, "loss": 0.4285, "step": 14505 }, { "epoch": 1.8883248730964466, "grad_norm": 2.603320598602295, "learning_rate": 3.1415015477540136e-06, "loss": 0.5014, "step": 14508 }, { "epoch": 1.8887153455681376, "grad_norm": 2.6286399364471436, "learning_rate": 3.1395615972530514e-06, "loss": 0.4378, "step": 14511 }, { "epoch": 1.8891058180398281, "grad_norm": 2.7393951416015625, "learning_rate": 3.137621971766766e-06, "loss": 0.5872, "step": 14514 }, { "epoch": 1.889496290511519, "grad_norm": 2.693389415740967, "learning_rate": 3.1356826716340027e-06, "loss": 0.5285, "step": 14517 }, { "epoch": 1.8898867629832097, "grad_norm": 2.694274663925171, "learning_rate": 3.1337436971935565e-06, "loss": 0.4483, "step": 14520 }, { "epoch": 1.8902772354549005, "grad_norm": 2.6547138690948486, "learning_rate": 3.131805048784159e-06, "loss": 0.4728, "step": 14523 }, { "epoch": 1.8906677079265912, "grad_norm": 2.5658202171325684, "learning_rate": 3.129866726744489e-06, "loss": 0.5587, "step": 14526 }, { "epoch": 1.8910581803982818, "grad_norm": 2.6531288623809814, "learning_rate": 3.1279287314131694e-06, "loss": 0.5347, "step": 14529 }, { "epoch": 1.8914486528699728, "grad_norm": 2.8203883171081543, "learning_rate": 3.1259910631287605e-06, "loss": 0.4708, "step": 14532 }, { "epoch": 1.8918391253416633, "grad_norm": 2.5969855785369873, "learning_rate": 3.1240537222297716e-06, "loss": 0.5771, "step": 14535 }, { "epoch": 1.8922295978133543, "grad_norm": 3.149728775024414, "learning_rate": 3.122116709054649e-06, "loss": 0.5279, "step": 14538 }, { "epoch": 1.8926200702850449, "grad_norm": 2.6601550579071045, "learning_rate": 3.120180023941789e-06, "loss": 0.4675, "step": 14541 }, { "epoch": 1.8930105427567356, "grad_norm": 2.72967529296875, "learning_rate": 3.118243667229523e-06, "loss": 0.4924, "step": 14544 }, { "epoch": 1.8934010152284264, "grad_norm": 3.1012916564941406, "learning_rate": 3.116307639256131e-06, "loss": 0.4926, "step": 14547 }, { "epoch": 1.8937914877001172, "grad_norm": 2.993875026702881, "learning_rate": 3.1143719403598307e-06, "loss": 0.4754, "step": 14550 }, { "epoch": 1.894181960171808, "grad_norm": 4.121201515197754, "learning_rate": 3.1124365708787856e-06, "loss": 0.5015, "step": 14553 }, { "epoch": 1.8945724326434985, "grad_norm": 2.526926040649414, "learning_rate": 3.110501531151102e-06, "loss": 0.4477, "step": 14556 }, { "epoch": 1.8949629051151895, "grad_norm": 3.1489815711975098, "learning_rate": 3.1085668215148245e-06, "loss": 0.444, "step": 14559 }, { "epoch": 1.89535337758688, "grad_norm": 2.5161519050598145, "learning_rate": 3.1066324423079445e-06, "loss": 0.5054, "step": 14562 }, { "epoch": 1.8957438500585708, "grad_norm": 2.526221990585327, "learning_rate": 3.104698393868395e-06, "loss": 0.489, "step": 14565 }, { "epoch": 1.8961343225302616, "grad_norm": 2.54593825340271, "learning_rate": 3.102764676534048e-06, "loss": 0.49, "step": 14568 }, { "epoch": 1.8965247950019524, "grad_norm": 3.2360286712646484, "learning_rate": 3.1008312906427212e-06, "loss": 0.4462, "step": 14571 }, { "epoch": 1.8969152674736431, "grad_norm": 3.160997152328491, "learning_rate": 3.098898236532172e-06, "loss": 0.4875, "step": 14574 }, { "epoch": 1.8973057399453337, "grad_norm": 4.4345808029174805, "learning_rate": 3.096965514540102e-06, "loss": 0.5209, "step": 14577 }, { "epoch": 1.8976962124170247, "grad_norm": 2.559445858001709, "learning_rate": 3.0950331250041515e-06, "loss": 0.5191, "step": 14580 }, { "epoch": 1.8980866848887152, "grad_norm": 2.8593218326568604, "learning_rate": 3.093101068261909e-06, "loss": 0.4575, "step": 14583 }, { "epoch": 1.8984771573604062, "grad_norm": 2.4053094387054443, "learning_rate": 3.0911693446508973e-06, "loss": 0.4613, "step": 14586 }, { "epoch": 1.8988676298320968, "grad_norm": 2.4780166149139404, "learning_rate": 3.089237954508585e-06, "loss": 0.4018, "step": 14589 }, { "epoch": 1.8992581023037876, "grad_norm": 2.995244264602661, "learning_rate": 3.0873068981723842e-06, "loss": 0.5322, "step": 14592 }, { "epoch": 1.8996485747754783, "grad_norm": 2.625032663345337, "learning_rate": 3.085376175979643e-06, "loss": 0.4439, "step": 14595 }, { "epoch": 1.900039047247169, "grad_norm": 2.8063700199127197, "learning_rate": 3.083445788267657e-06, "loss": 0.4898, "step": 14598 }, { "epoch": 1.9004295197188599, "grad_norm": 2.738100051879883, "learning_rate": 3.081515735373659e-06, "loss": 0.5017, "step": 14601 }, { "epoch": 1.9008199921905504, "grad_norm": 2.68695330619812, "learning_rate": 3.0795860176348267e-06, "loss": 0.4812, "step": 14604 }, { "epoch": 1.9012104646622414, "grad_norm": 3.1662650108337402, "learning_rate": 3.0776566353882775e-06, "loss": 0.4975, "step": 14607 }, { "epoch": 1.901600937133932, "grad_norm": 2.6604361534118652, "learning_rate": 3.0757275889710697e-06, "loss": 0.4858, "step": 14610 }, { "epoch": 1.901991409605623, "grad_norm": 2.82057523727417, "learning_rate": 3.0737988787202034e-06, "loss": 0.4714, "step": 14613 }, { "epoch": 1.9023818820773135, "grad_norm": 2.7449402809143066, "learning_rate": 3.071870504972623e-06, "loss": 0.5916, "step": 14616 }, { "epoch": 1.9027723545490043, "grad_norm": 2.767760753631592, "learning_rate": 3.0699424680652066e-06, "loss": 0.5381, "step": 14619 }, { "epoch": 1.903162827020695, "grad_norm": 2.4877452850341797, "learning_rate": 3.0680147683347803e-06, "loss": 0.4768, "step": 14622 }, { "epoch": 1.9035532994923858, "grad_norm": 2.712571382522583, "learning_rate": 3.066087406118111e-06, "loss": 0.4785, "step": 14625 }, { "epoch": 1.9039437719640766, "grad_norm": 2.5379676818847656, "learning_rate": 3.064160381751905e-06, "loss": 0.4833, "step": 14628 }, { "epoch": 1.9043342444357672, "grad_norm": 2.888591766357422, "learning_rate": 3.062233695572806e-06, "loss": 0.5205, "step": 14631 }, { "epoch": 1.9047247169074581, "grad_norm": 2.4471936225891113, "learning_rate": 3.060307347917405e-06, "loss": 0.4541, "step": 14634 }, { "epoch": 1.9051151893791487, "grad_norm": 2.5203559398651123, "learning_rate": 3.0583813391222294e-06, "loss": 0.4671, "step": 14637 }, { "epoch": 1.9055056618508395, "grad_norm": 2.5161707401275635, "learning_rate": 3.056455669523749e-06, "loss": 0.4245, "step": 14640 }, { "epoch": 1.9058961343225302, "grad_norm": 2.660309076309204, "learning_rate": 3.0545303394583755e-06, "loss": 0.4664, "step": 14643 }, { "epoch": 1.906286606794221, "grad_norm": 2.8389623165130615, "learning_rate": 3.0526053492624574e-06, "loss": 0.5387, "step": 14646 }, { "epoch": 1.9066770792659118, "grad_norm": 2.4667160511016846, "learning_rate": 3.05068069927229e-06, "loss": 0.4764, "step": 14649 }, { "epoch": 1.9070675517376023, "grad_norm": 3.343127727508545, "learning_rate": 3.0487563898241025e-06, "loss": 0.5026, "step": 14652 }, { "epoch": 1.9074580242092933, "grad_norm": 2.870053291320801, "learning_rate": 3.046832421254068e-06, "loss": 0.4699, "step": 14655 }, { "epoch": 1.9078484966809839, "grad_norm": 2.796257495880127, "learning_rate": 3.0449087938983025e-06, "loss": 0.5665, "step": 14658 }, { "epoch": 1.9082389691526749, "grad_norm": 2.6486213207244873, "learning_rate": 3.0429855080928567e-06, "loss": 0.4896, "step": 14661 }, { "epoch": 1.9086294416243654, "grad_norm": 2.7578749656677246, "learning_rate": 3.0410625641737245e-06, "loss": 0.4799, "step": 14664 }, { "epoch": 1.9090199140960562, "grad_norm": 2.627326726913452, "learning_rate": 3.0391399624768424e-06, "loss": 0.4241, "step": 14667 }, { "epoch": 1.909410386567747, "grad_norm": 2.7783567905426025, "learning_rate": 3.0372177033380846e-06, "loss": 0.4997, "step": 14670 }, { "epoch": 1.9098008590394377, "grad_norm": 2.5225064754486084, "learning_rate": 3.0352957870932643e-06, "loss": 0.4955, "step": 14673 }, { "epoch": 1.9101913315111285, "grad_norm": 4.349731922149658, "learning_rate": 3.0333742140781374e-06, "loss": 0.4517, "step": 14676 }, { "epoch": 1.910581803982819, "grad_norm": 2.4601027965545654, "learning_rate": 3.031452984628398e-06, "loss": 0.5083, "step": 14679 }, { "epoch": 1.91097227645451, "grad_norm": 2.8652219772338867, "learning_rate": 3.02953209907968e-06, "loss": 0.4887, "step": 14682 }, { "epoch": 1.9113627489262006, "grad_norm": 2.7341558933258057, "learning_rate": 3.0276115577675604e-06, "loss": 0.5125, "step": 14685 }, { "epoch": 1.9117532213978916, "grad_norm": 3.915335178375244, "learning_rate": 3.025691361027552e-06, "loss": 0.531, "step": 14688 }, { "epoch": 1.9121436938695822, "grad_norm": 3.0976195335388184, "learning_rate": 3.023771509195108e-06, "loss": 0.4759, "step": 14691 }, { "epoch": 1.912534166341273, "grad_norm": 2.6159005165100098, "learning_rate": 3.0218520026056264e-06, "loss": 0.4972, "step": 14694 }, { "epoch": 1.9129246388129637, "grad_norm": 2.7680933475494385, "learning_rate": 3.019932841594437e-06, "loss": 0.5076, "step": 14697 }, { "epoch": 1.9133151112846545, "grad_norm": 3.1167070865631104, "learning_rate": 3.0180140264968153e-06, "loss": 0.6106, "step": 14700 }, { "epoch": 1.9137055837563453, "grad_norm": 2.826345682144165, "learning_rate": 3.0160955576479735e-06, "loss": 0.4774, "step": 14703 }, { "epoch": 1.9140960562280358, "grad_norm": 2.6846492290496826, "learning_rate": 3.014177435383063e-06, "loss": 0.4761, "step": 14706 }, { "epoch": 1.9144865286997268, "grad_norm": 2.6631195545196533, "learning_rate": 3.0122596600371777e-06, "loss": 0.4419, "step": 14709 }, { "epoch": 1.9148770011714173, "grad_norm": 2.869974374771118, "learning_rate": 3.010342231945349e-06, "loss": 0.4628, "step": 14712 }, { "epoch": 1.9152674736431081, "grad_norm": 2.954019546508789, "learning_rate": 3.008425151442546e-06, "loss": 0.4112, "step": 14715 }, { "epoch": 1.915657946114799, "grad_norm": 3.6011927127838135, "learning_rate": 3.0065084188636794e-06, "loss": 0.5629, "step": 14718 }, { "epoch": 1.9160484185864897, "grad_norm": 2.377660036087036, "learning_rate": 3.0045920345435996e-06, "loss": 0.4984, "step": 14721 }, { "epoch": 1.9164388910581804, "grad_norm": 2.4976119995117188, "learning_rate": 3.002675998817093e-06, "loss": 0.5637, "step": 14724 }, { "epoch": 1.916829363529871, "grad_norm": 2.8942108154296875, "learning_rate": 3.0007603120188877e-06, "loss": 0.4453, "step": 14727 }, { "epoch": 1.917219836001562, "grad_norm": 2.675811290740967, "learning_rate": 2.998844974483649e-06, "loss": 0.4768, "step": 14730 }, { "epoch": 1.9176103084732525, "grad_norm": 2.6159439086914062, "learning_rate": 2.9969299865459845e-06, "loss": 0.4118, "step": 14733 }, { "epoch": 1.9180007809449435, "grad_norm": 2.5294883251190186, "learning_rate": 2.995015348540438e-06, "loss": 0.4178, "step": 14736 }, { "epoch": 1.918391253416634, "grad_norm": 2.5951461791992188, "learning_rate": 2.993101060801491e-06, "loss": 0.5095, "step": 14739 }, { "epoch": 1.9187817258883249, "grad_norm": 2.7234127521514893, "learning_rate": 2.991187123663567e-06, "loss": 0.4648, "step": 14742 }, { "epoch": 1.9191721983600156, "grad_norm": 2.6454546451568604, "learning_rate": 2.9892735374610273e-06, "loss": 0.4701, "step": 14745 }, { "epoch": 1.9195626708317064, "grad_norm": 2.923607349395752, "learning_rate": 2.987360302528169e-06, "loss": 0.4573, "step": 14748 }, { "epoch": 1.9199531433033972, "grad_norm": 2.9072718620300293, "learning_rate": 2.9854474191992323e-06, "loss": 0.457, "step": 14751 }, { "epoch": 1.9203436157750877, "grad_norm": 2.4134531021118164, "learning_rate": 2.983534887808394e-06, "loss": 0.4027, "step": 14754 }, { "epoch": 1.9207340882467787, "grad_norm": 2.700012683868408, "learning_rate": 2.9816227086897696e-06, "loss": 0.4745, "step": 14757 }, { "epoch": 1.9211245607184693, "grad_norm": 2.7360305786132812, "learning_rate": 2.9797108821774114e-06, "loss": 0.4525, "step": 14760 }, { "epoch": 1.9215150331901603, "grad_norm": 2.5662286281585693, "learning_rate": 2.9777994086053123e-06, "loss": 0.5103, "step": 14763 }, { "epoch": 1.9219055056618508, "grad_norm": 2.873728036880493, "learning_rate": 2.975888288307402e-06, "loss": 0.5333, "step": 14766 }, { "epoch": 1.9222959781335416, "grad_norm": 3.446164846420288, "learning_rate": 2.973977521617549e-06, "loss": 0.4311, "step": 14769 }, { "epoch": 1.9226864506052324, "grad_norm": 2.5216710567474365, "learning_rate": 2.9720671088695628e-06, "loss": 0.5313, "step": 14772 }, { "epoch": 1.9230769230769231, "grad_norm": 2.530691385269165, "learning_rate": 2.9701570503971846e-06, "loss": 0.497, "step": 14775 }, { "epoch": 1.923467395548614, "grad_norm": 2.462676763534546, "learning_rate": 2.968247346534101e-06, "loss": 0.4939, "step": 14778 }, { "epoch": 1.9238578680203045, "grad_norm": 3.943150043487549, "learning_rate": 2.9663379976139307e-06, "loss": 0.5034, "step": 14781 }, { "epoch": 1.9242483404919954, "grad_norm": 2.7674999237060547, "learning_rate": 2.964429003970234e-06, "loss": 0.4747, "step": 14784 }, { "epoch": 1.924638812963686, "grad_norm": 2.529186725616455, "learning_rate": 2.9625203659365094e-06, "loss": 0.4283, "step": 14787 }, { "epoch": 1.9250292854353768, "grad_norm": 2.8938591480255127, "learning_rate": 2.9606120838461884e-06, "loss": 0.4594, "step": 14790 }, { "epoch": 1.9254197579070675, "grad_norm": 2.950148820877075, "learning_rate": 2.958704158032647e-06, "loss": 0.5019, "step": 14793 }, { "epoch": 1.9258102303787583, "grad_norm": 3.631539821624756, "learning_rate": 2.956796588829195e-06, "loss": 0.4972, "step": 14796 }, { "epoch": 1.926200702850449, "grad_norm": 2.9207494258880615, "learning_rate": 2.954889376569081e-06, "loss": 0.5222, "step": 14799 }, { "epoch": 1.9265911753221396, "grad_norm": 2.5649592876434326, "learning_rate": 2.9529825215854907e-06, "loss": 0.5079, "step": 14802 }, { "epoch": 1.9269816477938306, "grad_norm": 2.7472033500671387, "learning_rate": 2.951076024211547e-06, "loss": 0.4459, "step": 14805 }, { "epoch": 1.9273721202655212, "grad_norm": 2.761784315109253, "learning_rate": 2.949169884780313e-06, "loss": 0.4845, "step": 14808 }, { "epoch": 1.9277625927372122, "grad_norm": 2.7410855293273926, "learning_rate": 2.947264103624784e-06, "loss": 0.5347, "step": 14811 }, { "epoch": 1.9281530652089027, "grad_norm": 3.519693613052368, "learning_rate": 2.9453586810778996e-06, "loss": 0.512, "step": 14814 }, { "epoch": 1.9285435376805935, "grad_norm": 2.6707022190093994, "learning_rate": 2.9434536174725305e-06, "loss": 0.4977, "step": 14817 }, { "epoch": 1.9289340101522843, "grad_norm": 3.4989326000213623, "learning_rate": 2.941548913141487e-06, "loss": 0.4988, "step": 14820 }, { "epoch": 1.929324482623975, "grad_norm": 2.744438886642456, "learning_rate": 2.9396445684175196e-06, "loss": 0.4738, "step": 14823 }, { "epoch": 1.9297149550956658, "grad_norm": 3.4008469581604004, "learning_rate": 2.9377405836333106e-06, "loss": 0.4601, "step": 14826 }, { "epoch": 1.9301054275673564, "grad_norm": 3.585939407348633, "learning_rate": 2.9358369591214847e-06, "loss": 0.4585, "step": 14829 }, { "epoch": 1.9304959000390474, "grad_norm": 2.683812379837036, "learning_rate": 2.9339336952145957e-06, "loss": 0.4817, "step": 14832 }, { "epoch": 1.930886372510738, "grad_norm": 2.6055679321289062, "learning_rate": 2.932030792245148e-06, "loss": 0.5232, "step": 14835 }, { "epoch": 1.9312768449824287, "grad_norm": 2.5260632038116455, "learning_rate": 2.9301282505455687e-06, "loss": 0.4211, "step": 14838 }, { "epoch": 1.9316673174541195, "grad_norm": 2.553103446960449, "learning_rate": 2.9282260704482313e-06, "loss": 0.4662, "step": 14841 }, { "epoch": 1.9320577899258102, "grad_norm": 2.7395856380462646, "learning_rate": 2.9263242522854397e-06, "loss": 0.4706, "step": 14844 }, { "epoch": 1.932448262397501, "grad_norm": 2.5782995223999023, "learning_rate": 2.924422796389439e-06, "loss": 0.4471, "step": 14847 }, { "epoch": 1.9328387348691918, "grad_norm": 2.5767011642456055, "learning_rate": 2.92252170309241e-06, "loss": 0.4797, "step": 14850 }, { "epoch": 1.9332292073408825, "grad_norm": 2.6651394367218018, "learning_rate": 2.920620972726468e-06, "loss": 0.5292, "step": 14853 }, { "epoch": 1.933619679812573, "grad_norm": 3.3890509605407715, "learning_rate": 2.9187206056236693e-06, "loss": 0.4272, "step": 14856 }, { "epoch": 1.934010152284264, "grad_norm": 3.1942598819732666, "learning_rate": 2.916820602116e-06, "loss": 0.5652, "step": 14859 }, { "epoch": 1.9344006247559546, "grad_norm": 2.847402572631836, "learning_rate": 2.914920962535391e-06, "loss": 0.467, "step": 14862 }, { "epoch": 1.9347910972276454, "grad_norm": 2.6904537677764893, "learning_rate": 2.913021687213704e-06, "loss": 0.5246, "step": 14865 }, { "epoch": 1.9351815696993362, "grad_norm": 2.631735324859619, "learning_rate": 2.9111227764827376e-06, "loss": 0.4657, "step": 14868 }, { "epoch": 1.935572042171027, "grad_norm": 2.6257803440093994, "learning_rate": 2.9092242306742256e-06, "loss": 0.5195, "step": 14871 }, { "epoch": 1.9359625146427177, "grad_norm": 2.7459394931793213, "learning_rate": 2.9073260501198424e-06, "loss": 0.4976, "step": 14874 }, { "epoch": 1.9363529871144083, "grad_norm": 2.597118854522705, "learning_rate": 2.905428235151198e-06, "loss": 0.4381, "step": 14877 }, { "epoch": 1.9367434595860993, "grad_norm": 2.7893166542053223, "learning_rate": 2.9035307860998346e-06, "loss": 0.4861, "step": 14880 }, { "epoch": 1.9371339320577898, "grad_norm": 2.5637564659118652, "learning_rate": 2.90163370329723e-06, "loss": 0.5199, "step": 14883 }, { "epoch": 1.9375244045294808, "grad_norm": 2.49005389213562, "learning_rate": 2.899736987074806e-06, "loss": 0.5072, "step": 14886 }, { "epoch": 1.9379148770011714, "grad_norm": 2.6753792762756348, "learning_rate": 2.8978406377639114e-06, "loss": 0.483, "step": 14889 }, { "epoch": 1.9383053494728621, "grad_norm": 2.4277470111846924, "learning_rate": 2.8959446556958333e-06, "loss": 0.5358, "step": 14892 }, { "epoch": 1.938695821944553, "grad_norm": 2.332995653152466, "learning_rate": 2.8940490412017997e-06, "loss": 0.4988, "step": 14895 }, { "epoch": 1.9390862944162437, "grad_norm": 2.639230728149414, "learning_rate": 2.892153794612968e-06, "loss": 0.4237, "step": 14898 }, { "epoch": 1.9394767668879345, "grad_norm": 2.7841386795043945, "learning_rate": 2.8902589162604323e-06, "loss": 0.4862, "step": 14901 }, { "epoch": 1.939867239359625, "grad_norm": 2.618056535720825, "learning_rate": 2.8883644064752274e-06, "loss": 0.4738, "step": 14904 }, { "epoch": 1.940257711831316, "grad_norm": 2.2609994411468506, "learning_rate": 2.886470265588319e-06, "loss": 0.4599, "step": 14907 }, { "epoch": 1.9406481843030066, "grad_norm": 2.6283135414123535, "learning_rate": 2.8845764939306063e-06, "loss": 0.5095, "step": 14910 }, { "epoch": 1.9410386567746973, "grad_norm": 2.641413688659668, "learning_rate": 2.8826830918329325e-06, "loss": 0.4114, "step": 14913 }, { "epoch": 1.941429129246388, "grad_norm": 2.5801405906677246, "learning_rate": 2.8807900596260663e-06, "loss": 0.5321, "step": 14916 }, { "epoch": 1.9418196017180789, "grad_norm": 2.513852834701538, "learning_rate": 2.87889739764072e-06, "loss": 0.5193, "step": 14919 }, { "epoch": 1.9422100741897697, "grad_norm": 2.852112054824829, "learning_rate": 2.8770051062075343e-06, "loss": 0.4681, "step": 14922 }, { "epoch": 1.9426005466614602, "grad_norm": 2.5019607543945312, "learning_rate": 2.8751131856570935e-06, "loss": 0.5315, "step": 14925 }, { "epoch": 1.9429910191331512, "grad_norm": 2.95793080329895, "learning_rate": 2.873221636319908e-06, "loss": 0.5047, "step": 14928 }, { "epoch": 1.9433814916048417, "grad_norm": 2.9171652793884277, "learning_rate": 2.871330458526429e-06, "loss": 0.5912, "step": 14931 }, { "epoch": 1.9437719640765327, "grad_norm": 3.1407294273376465, "learning_rate": 2.8694396526070383e-06, "loss": 0.4123, "step": 14934 }, { "epoch": 1.9441624365482233, "grad_norm": 2.9281039237976074, "learning_rate": 2.8675492188920605e-06, "loss": 0.6034, "step": 14937 }, { "epoch": 1.944552909019914, "grad_norm": 3.462409257888794, "learning_rate": 2.865659157711748e-06, "loss": 0.4745, "step": 14940 }, { "epoch": 1.9449433814916048, "grad_norm": 3.067357301712036, "learning_rate": 2.863769469396289e-06, "loss": 0.4869, "step": 14943 }, { "epoch": 1.9453338539632956, "grad_norm": 2.8917782306671143, "learning_rate": 2.8618801542758116e-06, "loss": 0.513, "step": 14946 }, { "epoch": 1.9457243264349864, "grad_norm": 2.6178336143493652, "learning_rate": 2.859991212680373e-06, "loss": 0.5033, "step": 14949 }, { "epoch": 1.946114798906677, "grad_norm": 2.847200870513916, "learning_rate": 2.858102644939966e-06, "loss": 0.4524, "step": 14952 }, { "epoch": 1.946505271378368, "grad_norm": 2.400238275527954, "learning_rate": 2.8562144513845236e-06, "loss": 0.412, "step": 14955 }, { "epoch": 1.9468957438500585, "grad_norm": 2.936591386795044, "learning_rate": 2.8543266323439034e-06, "loss": 0.4973, "step": 14958 }, { "epoch": 1.9472862163217495, "grad_norm": 2.7296829223632812, "learning_rate": 2.8524391881479096e-06, "loss": 0.4715, "step": 14961 }, { "epoch": 1.94767668879344, "grad_norm": 2.435722589492798, "learning_rate": 2.8505521191262697e-06, "loss": 0.4847, "step": 14964 }, { "epoch": 1.9480671612651308, "grad_norm": 2.6124179363250732, "learning_rate": 2.8486654256086543e-06, "loss": 0.5464, "step": 14967 }, { "epoch": 1.9484576337368216, "grad_norm": 3.493014335632324, "learning_rate": 2.8467791079246636e-06, "loss": 0.5372, "step": 14970 }, { "epoch": 1.9488481062085123, "grad_norm": 3.0467758178710938, "learning_rate": 2.8448931664038315e-06, "loss": 0.551, "step": 14973 }, { "epoch": 1.9492385786802031, "grad_norm": 2.6954057216644287, "learning_rate": 2.843007601375631e-06, "loss": 0.5891, "step": 14976 }, { "epoch": 1.9496290511518937, "grad_norm": 3.6716325283050537, "learning_rate": 2.8411224131694647e-06, "loss": 0.4633, "step": 14979 }, { "epoch": 1.9500195236235847, "grad_norm": 2.4117796421051025, "learning_rate": 2.839237602114672e-06, "loss": 0.5026, "step": 14982 }, { "epoch": 1.9504099960952752, "grad_norm": 3.0987799167633057, "learning_rate": 2.837353168540522e-06, "loss": 0.5195, "step": 14985 }, { "epoch": 1.950800468566966, "grad_norm": 2.7744829654693604, "learning_rate": 2.8354691127762256e-06, "loss": 0.4605, "step": 14988 }, { "epoch": 1.9511909410386568, "grad_norm": 2.757351875305176, "learning_rate": 2.8335854351509223e-06, "loss": 0.506, "step": 14991 }, { "epoch": 1.9515814135103475, "grad_norm": 2.551328659057617, "learning_rate": 2.8317021359936837e-06, "loss": 0.5723, "step": 14994 }, { "epoch": 1.9519718859820383, "grad_norm": 2.6989192962646484, "learning_rate": 2.829819215633523e-06, "loss": 0.4556, "step": 14997 }, { "epoch": 1.9523623584537289, "grad_norm": 2.8726086616516113, "learning_rate": 2.8279366743993776e-06, "loss": 0.4711, "step": 15000 }, { "epoch": 1.9527528309254198, "grad_norm": 2.742509603500366, "learning_rate": 2.8260545126201277e-06, "loss": 0.4994, "step": 15003 }, { "epoch": 1.9531433033971104, "grad_norm": 2.504814624786377, "learning_rate": 2.824172730624579e-06, "loss": 0.5377, "step": 15006 }, { "epoch": 1.9535337758688014, "grad_norm": 2.9309048652648926, "learning_rate": 2.82229132874148e-06, "loss": 0.5133, "step": 15009 }, { "epoch": 1.953924248340492, "grad_norm": 4.551779270172119, "learning_rate": 2.8204103072995036e-06, "loss": 0.4612, "step": 15012 }, { "epoch": 1.9543147208121827, "grad_norm": 3.107567548751831, "learning_rate": 2.81852966662726e-06, "loss": 0.5182, "step": 15015 }, { "epoch": 1.9547051932838735, "grad_norm": 2.6413309574127197, "learning_rate": 2.8166494070532958e-06, "loss": 0.4866, "step": 15018 }, { "epoch": 1.9550956657555643, "grad_norm": 2.4750940799713135, "learning_rate": 2.8147695289060874e-06, "loss": 0.4426, "step": 15021 }, { "epoch": 1.955486138227255, "grad_norm": 3.1785085201263428, "learning_rate": 2.8128900325140427e-06, "loss": 0.473, "step": 15024 }, { "epoch": 1.9558766106989456, "grad_norm": 2.49540114402771, "learning_rate": 2.8110109182055112e-06, "loss": 0.4889, "step": 15027 }, { "epoch": 1.9562670831706366, "grad_norm": 2.723663330078125, "learning_rate": 2.8091321863087672e-06, "loss": 0.541, "step": 15030 }, { "epoch": 1.9566575556423271, "grad_norm": 3.729959011077881, "learning_rate": 2.8072538371520208e-06, "loss": 0.4935, "step": 15033 }, { "epoch": 1.9570480281140181, "grad_norm": 2.766430139541626, "learning_rate": 2.805375871063415e-06, "loss": 0.5315, "step": 15036 }, { "epoch": 1.9574385005857087, "grad_norm": 2.1455156803131104, "learning_rate": 2.8034982883710293e-06, "loss": 0.4341, "step": 15039 }, { "epoch": 1.9578289730573994, "grad_norm": 2.6431660652160645, "learning_rate": 2.8016210894028694e-06, "loss": 0.4833, "step": 15042 }, { "epoch": 1.9582194455290902, "grad_norm": 2.4709603786468506, "learning_rate": 2.799744274486883e-06, "loss": 0.4393, "step": 15045 }, { "epoch": 1.958609918000781, "grad_norm": 2.707580804824829, "learning_rate": 2.797867843950941e-06, "loss": 0.4678, "step": 15048 }, { "epoch": 1.9590003904724718, "grad_norm": 2.598074197769165, "learning_rate": 2.795991798122856e-06, "loss": 0.5941, "step": 15051 }, { "epoch": 1.9593908629441623, "grad_norm": 2.8426082134246826, "learning_rate": 2.794116137330367e-06, "loss": 0.5295, "step": 15054 }, { "epoch": 1.9597813354158533, "grad_norm": 2.351702928543091, "learning_rate": 2.792240861901147e-06, "loss": 0.4504, "step": 15057 }, { "epoch": 1.9601718078875439, "grad_norm": 2.866417646408081, "learning_rate": 2.7903659721628063e-06, "loss": 0.4783, "step": 15060 }, { "epoch": 1.9605622803592346, "grad_norm": 2.6855812072753906, "learning_rate": 2.788491468442881e-06, "loss": 0.4801, "step": 15063 }, { "epoch": 1.9609527528309254, "grad_norm": 2.5768182277679443, "learning_rate": 2.7866173510688423e-06, "loss": 0.4532, "step": 15066 }, { "epoch": 1.9613432253026162, "grad_norm": 2.8649215698242188, "learning_rate": 2.7847436203680977e-06, "loss": 0.5368, "step": 15069 }, { "epoch": 1.961733697774307, "grad_norm": 3.515437602996826, "learning_rate": 2.7828702766679827e-06, "loss": 0.5493, "step": 15072 }, { "epoch": 1.9621241702459975, "grad_norm": 2.8081016540527344, "learning_rate": 2.780997320295764e-06, "loss": 0.5247, "step": 15075 }, { "epoch": 1.9625146427176885, "grad_norm": 2.8509280681610107, "learning_rate": 2.7791247515786475e-06, "loss": 0.5239, "step": 15078 }, { "epoch": 1.962905115189379, "grad_norm": 2.9370276927948, "learning_rate": 2.777252570843765e-06, "loss": 0.4763, "step": 15081 }, { "epoch": 1.96329558766107, "grad_norm": 3.092346429824829, "learning_rate": 2.77538077841818e-06, "loss": 0.4743, "step": 15084 }, { "epoch": 1.9636860601327606, "grad_norm": 2.7640228271484375, "learning_rate": 2.7735093746288933e-06, "loss": 0.5355, "step": 15087 }, { "epoch": 1.9640765326044514, "grad_norm": 2.5823822021484375, "learning_rate": 2.7716383598028367e-06, "loss": 0.4613, "step": 15090 }, { "epoch": 1.9644670050761421, "grad_norm": 2.547419786453247, "learning_rate": 2.769767734266871e-06, "loss": 0.5324, "step": 15093 }, { "epoch": 1.964857477547833, "grad_norm": 2.7568259239196777, "learning_rate": 2.7678974983477907e-06, "loss": 0.5248, "step": 15096 }, { "epoch": 1.9652479500195237, "grad_norm": 2.7360951900482178, "learning_rate": 2.7660276523723195e-06, "loss": 0.4737, "step": 15099 }, { "epoch": 1.9656384224912142, "grad_norm": 2.4766454696655273, "learning_rate": 2.7641581966671203e-06, "loss": 0.5236, "step": 15102 }, { "epoch": 1.9660288949629052, "grad_norm": 3.0755808353424072, "learning_rate": 2.7622891315587803e-06, "loss": 0.4267, "step": 15105 }, { "epoch": 1.9664193674345958, "grad_norm": 2.7649264335632324, "learning_rate": 2.760420457373819e-06, "loss": 0.4645, "step": 15108 }, { "epoch": 1.9668098399062868, "grad_norm": 2.694270372390747, "learning_rate": 2.7585521744386954e-06, "loss": 0.5818, "step": 15111 }, { "epoch": 1.9672003123779773, "grad_norm": 2.4713313579559326, "learning_rate": 2.7566842830797914e-06, "loss": 0.4033, "step": 15114 }, { "epoch": 1.967590784849668, "grad_norm": 2.431579351425171, "learning_rate": 2.754816783623421e-06, "loss": 0.3948, "step": 15117 }, { "epoch": 1.9679812573213589, "grad_norm": 3.099750518798828, "learning_rate": 2.7529496763958385e-06, "loss": 0.4857, "step": 15120 }, { "epoch": 1.9683717297930496, "grad_norm": 2.6645519733428955, "learning_rate": 2.7510829617232197e-06, "loss": 0.5021, "step": 15123 }, { "epoch": 1.9687622022647404, "grad_norm": 3.6451914310455322, "learning_rate": 2.7492166399316746e-06, "loss": 0.495, "step": 15126 }, { "epoch": 1.969152674736431, "grad_norm": 2.5093352794647217, "learning_rate": 2.7473507113472477e-06, "loss": 0.4996, "step": 15129 }, { "epoch": 1.969543147208122, "grad_norm": 2.9083104133605957, "learning_rate": 2.7454851762959146e-06, "loss": 0.5105, "step": 15132 }, { "epoch": 1.9699336196798125, "grad_norm": 2.977738618850708, "learning_rate": 2.7436200351035784e-06, "loss": 0.5711, "step": 15135 }, { "epoch": 1.9703240921515033, "grad_norm": 2.227489948272705, "learning_rate": 2.7417552880960736e-06, "loss": 0.4285, "step": 15138 }, { "epoch": 1.970714564623194, "grad_norm": 2.565983295440674, "learning_rate": 2.739890935599171e-06, "loss": 0.4802, "step": 15141 }, { "epoch": 1.9711050370948848, "grad_norm": 2.6615631580352783, "learning_rate": 2.738026977938567e-06, "loss": 0.4581, "step": 15144 }, { "epoch": 1.9714955095665756, "grad_norm": 2.5697174072265625, "learning_rate": 2.736163415439892e-06, "loss": 0.4529, "step": 15147 }, { "epoch": 1.9718859820382661, "grad_norm": 2.6330325603485107, "learning_rate": 2.734300248428704e-06, "loss": 0.5237, "step": 15150 }, { "epoch": 1.9722764545099571, "grad_norm": 2.766741991043091, "learning_rate": 2.7324374772304978e-06, "loss": 0.4946, "step": 15153 }, { "epoch": 1.9726669269816477, "grad_norm": 2.515528917312622, "learning_rate": 2.7305751021706943e-06, "loss": 0.5103, "step": 15156 }, { "epoch": 1.9730573994533387, "grad_norm": 2.4971110820770264, "learning_rate": 2.7287131235746446e-06, "loss": 0.4888, "step": 15159 }, { "epoch": 1.9734478719250292, "grad_norm": 2.7696313858032227, "learning_rate": 2.7268515417676354e-06, "loss": 0.4618, "step": 15162 }, { "epoch": 1.97383834439672, "grad_norm": 2.6422767639160156, "learning_rate": 2.7249903570748805e-06, "loss": 0.5001, "step": 15165 }, { "epoch": 1.9742288168684108, "grad_norm": 2.5587644577026367, "learning_rate": 2.7231295698215223e-06, "loss": 0.4142, "step": 15168 }, { "epoch": 1.9746192893401016, "grad_norm": 2.5920650959014893, "learning_rate": 2.721269180332638e-06, "loss": 0.4981, "step": 15171 }, { "epoch": 1.9750097618117923, "grad_norm": 2.7538814544677734, "learning_rate": 2.7194091889332364e-06, "loss": 0.5542, "step": 15174 }, { "epoch": 1.9754002342834829, "grad_norm": 2.732511281967163, "learning_rate": 2.717549595948251e-06, "loss": 0.4629, "step": 15177 }, { "epoch": 1.9757907067551739, "grad_norm": 3.137880325317383, "learning_rate": 2.7156904017025485e-06, "loss": 0.4911, "step": 15180 }, { "epoch": 1.9761811792268644, "grad_norm": 3.1518633365631104, "learning_rate": 2.7138316065209298e-06, "loss": 0.5212, "step": 15183 }, { "epoch": 1.9765716516985552, "grad_norm": 2.9920685291290283, "learning_rate": 2.7119732107281193e-06, "loss": 0.4845, "step": 15186 }, { "epoch": 1.976962124170246, "grad_norm": 2.8355932235717773, "learning_rate": 2.710115214648775e-06, "loss": 0.5633, "step": 15189 }, { "epoch": 1.9773525966419367, "grad_norm": 3.028092861175537, "learning_rate": 2.708257618607485e-06, "loss": 0.5472, "step": 15192 }, { "epoch": 1.9777430691136275, "grad_norm": 2.849092483520508, "learning_rate": 2.7064004229287688e-06, "loss": 0.5292, "step": 15195 }, { "epoch": 1.9781335415853183, "grad_norm": 2.641608238220215, "learning_rate": 2.704543627937074e-06, "loss": 0.5607, "step": 15198 }, { "epoch": 1.978524014057009, "grad_norm": 2.6991071701049805, "learning_rate": 2.702687233956777e-06, "loss": 0.4641, "step": 15201 }, { "epoch": 1.9789144865286996, "grad_norm": 2.600306272506714, "learning_rate": 2.7008312413121886e-06, "loss": 0.5011, "step": 15204 }, { "epoch": 1.9793049590003906, "grad_norm": 2.6777749061584473, "learning_rate": 2.6989756503275454e-06, "loss": 0.488, "step": 15207 }, { "epoch": 1.9796954314720812, "grad_norm": 2.550184965133667, "learning_rate": 2.697120461327014e-06, "loss": 0.5401, "step": 15210 }, { "epoch": 1.980085903943772, "grad_norm": 2.9522573947906494, "learning_rate": 2.6952656746346937e-06, "loss": 0.5706, "step": 15213 }, { "epoch": 1.9804763764154627, "grad_norm": 3.306687355041504, "learning_rate": 2.6934112905746136e-06, "loss": 0.5365, "step": 15216 }, { "epoch": 1.9808668488871535, "grad_norm": 3.0320816040039062, "learning_rate": 2.6915573094707282e-06, "loss": 0.4609, "step": 15219 }, { "epoch": 1.9812573213588442, "grad_norm": 2.547999620437622, "learning_rate": 2.689703731646922e-06, "loss": 0.5136, "step": 15222 }, { "epoch": 1.9816477938305348, "grad_norm": 3.0180954933166504, "learning_rate": 2.687850557427017e-06, "loss": 0.4992, "step": 15225 }, { "epoch": 1.9820382663022258, "grad_norm": 3.133898973464966, "learning_rate": 2.685997787134755e-06, "loss": 0.51, "step": 15228 }, { "epoch": 1.9824287387739163, "grad_norm": 2.631911039352417, "learning_rate": 2.6841454210938095e-06, "loss": 0.4903, "step": 15231 }, { "epoch": 1.9828192112456073, "grad_norm": 4.11314582824707, "learning_rate": 2.6822934596277893e-06, "loss": 0.5155, "step": 15234 }, { "epoch": 1.9832096837172979, "grad_norm": 2.362086772918701, "learning_rate": 2.6804419030602256e-06, "loss": 0.461, "step": 15237 }, { "epoch": 1.9836001561889887, "grad_norm": 2.590529203414917, "learning_rate": 2.6785907517145825e-06, "loss": 0.4995, "step": 15240 }, { "epoch": 1.9839906286606794, "grad_norm": 2.6236050128936768, "learning_rate": 2.676740005914249e-06, "loss": 0.5005, "step": 15243 }, { "epoch": 1.9843811011323702, "grad_norm": 2.9158525466918945, "learning_rate": 2.6748896659825507e-06, "loss": 0.5008, "step": 15246 }, { "epoch": 1.984771573604061, "grad_norm": 2.9477591514587402, "learning_rate": 2.673039732242737e-06, "loss": 0.4673, "step": 15249 }, { "epoch": 1.9851620460757515, "grad_norm": 4.2003397941589355, "learning_rate": 2.671190205017985e-06, "loss": 0.4733, "step": 15252 }, { "epoch": 1.9855525185474425, "grad_norm": 2.9860401153564453, "learning_rate": 2.669341084631405e-06, "loss": 0.4566, "step": 15255 }, { "epoch": 1.985942991019133, "grad_norm": 2.4780070781707764, "learning_rate": 2.6674923714060365e-06, "loss": 0.476, "step": 15258 }, { "epoch": 1.9863334634908238, "grad_norm": 2.6294336318969727, "learning_rate": 2.6656440656648434e-06, "loss": 0.4586, "step": 15261 }, { "epoch": 1.9867239359625146, "grad_norm": 2.643993616104126, "learning_rate": 2.66379616773072e-06, "loss": 0.4864, "step": 15264 }, { "epoch": 1.9871144084342054, "grad_norm": 2.825512647628784, "learning_rate": 2.6619486779264924e-06, "loss": 0.5533, "step": 15267 }, { "epoch": 1.9875048809058962, "grad_norm": 2.878120183944702, "learning_rate": 2.6601015965749135e-06, "loss": 0.4189, "step": 15270 }, { "epoch": 1.9878953533775867, "grad_norm": 2.5639851093292236, "learning_rate": 2.65825492399866e-06, "loss": 0.5051, "step": 15273 }, { "epoch": 1.9882858258492777, "grad_norm": 2.7018072605133057, "learning_rate": 2.6564086605203478e-06, "loss": 0.4696, "step": 15276 }, { "epoch": 1.9886762983209683, "grad_norm": 2.7019267082214355, "learning_rate": 2.654562806462512e-06, "loss": 0.4619, "step": 15279 }, { "epoch": 1.9890667707926593, "grad_norm": 2.6082382202148438, "learning_rate": 2.652717362147618e-06, "loss": 0.5409, "step": 15282 }, { "epoch": 1.9894572432643498, "grad_norm": 2.8006742000579834, "learning_rate": 2.6508723278980654e-06, "loss": 0.4565, "step": 15285 }, { "epoch": 1.9898477157360406, "grad_norm": 2.544560194015503, "learning_rate": 2.6490277040361743e-06, "loss": 0.3934, "step": 15288 }, { "epoch": 1.9902381882077314, "grad_norm": 2.632777214050293, "learning_rate": 2.647183490884198e-06, "loss": 0.5214, "step": 15291 }, { "epoch": 1.9906286606794221, "grad_norm": 2.4380311965942383, "learning_rate": 2.6453396887643124e-06, "loss": 0.4517, "step": 15294 }, { "epoch": 1.991019133151113, "grad_norm": 2.867617607116699, "learning_rate": 2.6434962979986334e-06, "loss": 0.4878, "step": 15297 }, { "epoch": 1.9914096056228034, "grad_norm": 2.7441964149475098, "learning_rate": 2.641653318909194e-06, "loss": 0.5228, "step": 15300 }, { "epoch": 1.9918000780944944, "grad_norm": 2.700723171234131, "learning_rate": 2.6398107518179584e-06, "loss": 0.4856, "step": 15303 }, { "epoch": 1.992190550566185, "grad_norm": 2.6741185188293457, "learning_rate": 2.637968597046818e-06, "loss": 0.5286, "step": 15306 }, { "epoch": 1.992581023037876, "grad_norm": 2.861276388168335, "learning_rate": 2.6361268549175957e-06, "loss": 0.5157, "step": 15309 }, { "epoch": 1.9929714955095665, "grad_norm": 2.8468337059020996, "learning_rate": 2.6342855257520393e-06, "loss": 0.4486, "step": 15312 }, { "epoch": 1.9933619679812573, "grad_norm": 2.4954781532287598, "learning_rate": 2.632444609871824e-06, "loss": 0.4334, "step": 15315 }, { "epoch": 1.993752440452948, "grad_norm": 2.777869939804077, "learning_rate": 2.630604107598555e-06, "loss": 0.4058, "step": 15318 }, { "epoch": 1.9941429129246389, "grad_norm": 3.0267093181610107, "learning_rate": 2.6287640192537645e-06, "loss": 0.5133, "step": 15321 }, { "epoch": 1.9945333853963296, "grad_norm": 2.5115318298339844, "learning_rate": 2.62692434515891e-06, "loss": 0.4361, "step": 15324 }, { "epoch": 1.9949238578680202, "grad_norm": 2.824373245239258, "learning_rate": 2.6250850856353815e-06, "loss": 0.4602, "step": 15327 }, { "epoch": 1.9953143303397112, "grad_norm": 2.420016050338745, "learning_rate": 2.6232462410044927e-06, "loss": 0.4136, "step": 15330 }, { "epoch": 1.9957048028114017, "grad_norm": 2.629142999649048, "learning_rate": 2.6214078115874843e-06, "loss": 0.5071, "step": 15333 }, { "epoch": 1.9960952752830925, "grad_norm": 2.6193673610687256, "learning_rate": 2.6195697977055262e-06, "loss": 0.4901, "step": 15336 }, { "epoch": 1.9964857477547833, "grad_norm": 3.3041958808898926, "learning_rate": 2.6177321996797193e-06, "loss": 0.4341, "step": 15339 }, { "epoch": 1.996876220226474, "grad_norm": 2.496450424194336, "learning_rate": 2.615895017831086e-06, "loss": 0.4542, "step": 15342 }, { "epoch": 1.9972666926981648, "grad_norm": 2.4021811485290527, "learning_rate": 2.6140582524805746e-06, "loss": 0.4821, "step": 15345 }, { "epoch": 1.9976571651698554, "grad_norm": 2.5634875297546387, "learning_rate": 2.6122219039490704e-06, "loss": 0.4816, "step": 15348 }, { "epoch": 1.9980476376415464, "grad_norm": 2.7742042541503906, "learning_rate": 2.6103859725573756e-06, "loss": 0.5172, "step": 15351 }, { "epoch": 1.998438110113237, "grad_norm": 2.705730676651001, "learning_rate": 2.6085504586262245e-06, "loss": 0.4998, "step": 15354 }, { "epoch": 1.998828582584928, "grad_norm": 2.538831949234009, "learning_rate": 2.606715362476275e-06, "loss": 0.474, "step": 15357 }, { "epoch": 1.9992190550566185, "grad_norm": 2.5744338035583496, "learning_rate": 2.6048806844281206e-06, "loss": 0.5268, "step": 15360 }, { "epoch": 1.9996095275283092, "grad_norm": 2.8102900981903076, "learning_rate": 2.6030464248022704e-06, "loss": 0.5102, "step": 15363 }, { "epoch": 2.0, "grad_norm": 8.22767162322998, "learning_rate": 2.601212583919166e-06, "loss": 0.5386, "step": 15366 }, { "epoch": 2.0003904724716906, "grad_norm": 2.527303695678711, "learning_rate": 2.5993791620991783e-06, "loss": 0.3502, "step": 15369 }, { "epoch": 2.0007809449433815, "grad_norm": 2.931882858276367, "learning_rate": 2.5975461596626016e-06, "loss": 0.3747, "step": 15372 }, { "epoch": 2.001171417415072, "grad_norm": 2.3010025024414062, "learning_rate": 2.5957135769296543e-06, "loss": 0.3607, "step": 15375 }, { "epoch": 2.001561889886763, "grad_norm": 2.2463386058807373, "learning_rate": 2.5938814142204873e-06, "loss": 0.3475, "step": 15378 }, { "epoch": 2.0019523623584536, "grad_norm": 2.4900364875793457, "learning_rate": 2.592049671855178e-06, "loss": 0.3501, "step": 15381 }, { "epoch": 2.0023428348301446, "grad_norm": 2.3296518325805664, "learning_rate": 2.5902183501537247e-06, "loss": 0.4047, "step": 15384 }, { "epoch": 2.002733307301835, "grad_norm": 2.306422710418701, "learning_rate": 2.5883874494360544e-06, "loss": 0.372, "step": 15387 }, { "epoch": 2.003123779773526, "grad_norm": 2.479424238204956, "learning_rate": 2.5865569700220257e-06, "loss": 0.3311, "step": 15390 }, { "epoch": 2.0035142522452167, "grad_norm": 2.6242549419403076, "learning_rate": 2.584726912231417e-06, "loss": 0.3669, "step": 15393 }, { "epoch": 2.0039047247169073, "grad_norm": 2.8145828247070312, "learning_rate": 2.582897276383933e-06, "loss": 0.411, "step": 15396 }, { "epoch": 2.0042951971885983, "grad_norm": 2.4189159870147705, "learning_rate": 2.5810680627992134e-06, "loss": 0.3667, "step": 15399 }, { "epoch": 2.004685669660289, "grad_norm": 2.6705586910247803, "learning_rate": 2.579239271796814e-06, "loss": 0.3744, "step": 15402 }, { "epoch": 2.00507614213198, "grad_norm": 2.6565775871276855, "learning_rate": 2.5774109036962208e-06, "loss": 0.3976, "step": 15405 }, { "epoch": 2.0054666146036704, "grad_norm": 2.923213005065918, "learning_rate": 2.5755829588168444e-06, "loss": 0.3592, "step": 15408 }, { "epoch": 2.0058570870753614, "grad_norm": 2.6897075176239014, "learning_rate": 2.573755437478027e-06, "loss": 0.3244, "step": 15411 }, { "epoch": 2.006247559547052, "grad_norm": 2.722161293029785, "learning_rate": 2.571928339999031e-06, "loss": 0.2853, "step": 15414 }, { "epoch": 2.0066380320187425, "grad_norm": 2.8695123195648193, "learning_rate": 2.570101666699044e-06, "loss": 0.3987, "step": 15417 }, { "epoch": 2.0070285044904335, "grad_norm": 2.4492764472961426, "learning_rate": 2.5682754178971838e-06, "loss": 0.3593, "step": 15420 }, { "epoch": 2.007418976962124, "grad_norm": 2.7777905464172363, "learning_rate": 2.5664495939124945e-06, "loss": 0.3572, "step": 15423 }, { "epoch": 2.007809449433815, "grad_norm": 2.52078914642334, "learning_rate": 2.564624195063942e-06, "loss": 0.346, "step": 15426 }, { "epoch": 2.0081999219055056, "grad_norm": 2.5213940143585205, "learning_rate": 2.5627992216704167e-06, "loss": 0.3278, "step": 15429 }, { "epoch": 2.0085903943771966, "grad_norm": 2.6475985050201416, "learning_rate": 2.560974674050743e-06, "loss": 0.3902, "step": 15432 }, { "epoch": 2.008980866848887, "grad_norm": 2.8680548667907715, "learning_rate": 2.5591505525236626e-06, "loss": 0.3529, "step": 15435 }, { "epoch": 2.009371339320578, "grad_norm": 2.852402687072754, "learning_rate": 2.557326857407844e-06, "loss": 0.3346, "step": 15438 }, { "epoch": 2.0097618117922686, "grad_norm": 2.724487066268921, "learning_rate": 2.555503589021886e-06, "loss": 0.3578, "step": 15441 }, { "epoch": 2.010152284263959, "grad_norm": 3.298837900161743, "learning_rate": 2.553680747684309e-06, "loss": 0.3784, "step": 15444 }, { "epoch": 2.01054275673565, "grad_norm": 2.7460174560546875, "learning_rate": 2.551858333713557e-06, "loss": 0.3613, "step": 15447 }, { "epoch": 2.0109332292073407, "grad_norm": 2.6980228424072266, "learning_rate": 2.5500363474280066e-06, "loss": 0.3708, "step": 15450 }, { "epoch": 2.0113237016790317, "grad_norm": 2.433493137359619, "learning_rate": 2.548214789145951e-06, "loss": 0.3527, "step": 15453 }, { "epoch": 2.0117141741507223, "grad_norm": 2.826143741607666, "learning_rate": 2.5463936591856153e-06, "loss": 0.3262, "step": 15456 }, { "epoch": 2.0121046466224133, "grad_norm": 2.4797494411468506, "learning_rate": 2.5445729578651427e-06, "loss": 0.3859, "step": 15459 }, { "epoch": 2.012495119094104, "grad_norm": 2.5826714038848877, "learning_rate": 2.5427526855026097e-06, "loss": 0.3374, "step": 15462 }, { "epoch": 2.012885591565795, "grad_norm": 2.6993298530578613, "learning_rate": 2.540932842416015e-06, "loss": 0.3859, "step": 15465 }, { "epoch": 2.0132760640374854, "grad_norm": 2.468688488006592, "learning_rate": 2.5391134289232794e-06, "loss": 0.3522, "step": 15468 }, { "epoch": 2.013666536509176, "grad_norm": 3.0440926551818848, "learning_rate": 2.5372944453422486e-06, "loss": 0.3925, "step": 15471 }, { "epoch": 2.014057008980867, "grad_norm": 2.1184885501861572, "learning_rate": 2.5354758919906995e-06, "loss": 0.2867, "step": 15474 }, { "epoch": 2.0144474814525575, "grad_norm": 2.9124364852905273, "learning_rate": 2.5336577691863286e-06, "loss": 0.3881, "step": 15477 }, { "epoch": 2.0148379539242485, "grad_norm": 2.6721181869506836, "learning_rate": 2.531840077246754e-06, "loss": 0.3463, "step": 15480 }, { "epoch": 2.015228426395939, "grad_norm": 2.512578248977661, "learning_rate": 2.5300228164895275e-06, "loss": 0.3355, "step": 15483 }, { "epoch": 2.01561889886763, "grad_norm": 2.8622634410858154, "learning_rate": 2.5282059872321192e-06, "loss": 0.4173, "step": 15486 }, { "epoch": 2.0160093713393206, "grad_norm": 2.445178985595703, "learning_rate": 2.526389589791923e-06, "loss": 0.3202, "step": 15489 }, { "epoch": 2.016399843811011, "grad_norm": 2.7001988887786865, "learning_rate": 2.524573624486264e-06, "loss": 0.3657, "step": 15492 }, { "epoch": 2.016790316282702, "grad_norm": 2.5869991779327393, "learning_rate": 2.5227580916323846e-06, "loss": 0.4093, "step": 15495 }, { "epoch": 2.0171807887543927, "grad_norm": 2.6643974781036377, "learning_rate": 2.5209429915474536e-06, "loss": 0.3307, "step": 15498 }, { "epoch": 2.0175712612260837, "grad_norm": 2.8480355739593506, "learning_rate": 2.5191283245485686e-06, "loss": 0.3815, "step": 15501 }, { "epoch": 2.017961733697774, "grad_norm": 2.674461603164673, "learning_rate": 2.517314090952745e-06, "loss": 0.3721, "step": 15504 }, { "epoch": 2.018352206169465, "grad_norm": 2.5194647312164307, "learning_rate": 2.515500291076928e-06, "loss": 0.303, "step": 15507 }, { "epoch": 2.0187426786411558, "grad_norm": 2.625236749649048, "learning_rate": 2.5136869252379825e-06, "loss": 0.4136, "step": 15510 }, { "epoch": 2.0191331511128467, "grad_norm": 2.7869434356689453, "learning_rate": 2.511873993752702e-06, "loss": 0.3184, "step": 15513 }, { "epoch": 2.0195236235845373, "grad_norm": 2.7091245651245117, "learning_rate": 2.5100614969378006e-06, "loss": 0.3589, "step": 15516 }, { "epoch": 2.019914096056228, "grad_norm": 3.2992379665374756, "learning_rate": 2.508249435109918e-06, "loss": 0.3377, "step": 15519 }, { "epoch": 2.020304568527919, "grad_norm": 2.686725616455078, "learning_rate": 2.5064378085856146e-06, "loss": 0.3125, "step": 15522 }, { "epoch": 2.0206950409996094, "grad_norm": 2.7225749492645264, "learning_rate": 2.5046266176813825e-06, "loss": 0.3388, "step": 15525 }, { "epoch": 2.0210855134713004, "grad_norm": 2.5445516109466553, "learning_rate": 2.5028158627136313e-06, "loss": 0.3304, "step": 15528 }, { "epoch": 2.021475985942991, "grad_norm": 2.5298032760620117, "learning_rate": 2.5010055439986935e-06, "loss": 0.359, "step": 15531 }, { "epoch": 2.021866458414682, "grad_norm": 2.7906692028045654, "learning_rate": 2.4991956618528317e-06, "loss": 0.3837, "step": 15534 }, { "epoch": 2.0222569308863725, "grad_norm": 2.5515687465667725, "learning_rate": 2.4973862165922268e-06, "loss": 0.3741, "step": 15537 }, { "epoch": 2.0226474033580635, "grad_norm": 2.7983710765838623, "learning_rate": 2.495577208532984e-06, "loss": 0.4018, "step": 15540 }, { "epoch": 2.023037875829754, "grad_norm": 2.931863784790039, "learning_rate": 2.493768637991135e-06, "loss": 0.3165, "step": 15543 }, { "epoch": 2.0234283483014446, "grad_norm": 2.761305809020996, "learning_rate": 2.491960505282632e-06, "loss": 0.3567, "step": 15546 }, { "epoch": 2.0238188207731356, "grad_norm": 2.8153398036956787, "learning_rate": 2.4901528107233535e-06, "loss": 0.3898, "step": 15549 }, { "epoch": 2.024209293244826, "grad_norm": 2.77380633354187, "learning_rate": 2.4883455546290975e-06, "loss": 0.3286, "step": 15552 }, { "epoch": 2.024599765716517, "grad_norm": 2.8505361080169678, "learning_rate": 2.486538737315591e-06, "loss": 0.3429, "step": 15555 }, { "epoch": 2.0249902381882077, "grad_norm": 2.722792863845825, "learning_rate": 2.4847323590984797e-06, "loss": 0.4518, "step": 15558 }, { "epoch": 2.0253807106598987, "grad_norm": 2.7249341011047363, "learning_rate": 2.482926420293332e-06, "loss": 0.3427, "step": 15561 }, { "epoch": 2.025771183131589, "grad_norm": 2.7627315521240234, "learning_rate": 2.4811209212156455e-06, "loss": 0.3595, "step": 15564 }, { "epoch": 2.0261616556032798, "grad_norm": 2.6366686820983887, "learning_rate": 2.479315862180835e-06, "loss": 0.3352, "step": 15567 }, { "epoch": 2.0265521280749708, "grad_norm": 2.850191354751587, "learning_rate": 2.477511243504241e-06, "loss": 0.3398, "step": 15570 }, { "epoch": 2.0269426005466613, "grad_norm": 3.085858106613159, "learning_rate": 2.475707065501124e-06, "loss": 0.3551, "step": 15573 }, { "epoch": 2.0273330730183523, "grad_norm": 2.575150728225708, "learning_rate": 2.473903328486674e-06, "loss": 0.3393, "step": 15576 }, { "epoch": 2.027723545490043, "grad_norm": 2.692265748977661, "learning_rate": 2.4721000327759988e-06, "loss": 0.4056, "step": 15579 }, { "epoch": 2.028114017961734, "grad_norm": 2.6966402530670166, "learning_rate": 2.4702971786841278e-06, "loss": 0.3393, "step": 15582 }, { "epoch": 2.0285044904334244, "grad_norm": 2.7873032093048096, "learning_rate": 2.46849476652602e-06, "loss": 0.3656, "step": 15585 }, { "epoch": 2.0288949629051154, "grad_norm": 2.6634371280670166, "learning_rate": 2.4666927966165487e-06, "loss": 0.3544, "step": 15588 }, { "epoch": 2.029285435376806, "grad_norm": 2.635133981704712, "learning_rate": 2.464891269270519e-06, "loss": 0.4251, "step": 15591 }, { "epoch": 2.0296759078484965, "grad_norm": 2.715653657913208, "learning_rate": 2.4630901848026494e-06, "loss": 0.3665, "step": 15594 }, { "epoch": 2.0300663803201875, "grad_norm": 4.321242332458496, "learning_rate": 2.4612895435275896e-06, "loss": 0.3857, "step": 15597 }, { "epoch": 2.030456852791878, "grad_norm": 2.4996755123138428, "learning_rate": 2.4594893457599056e-06, "loss": 0.3667, "step": 15600 }, { "epoch": 2.030847325263569, "grad_norm": 2.7729170322418213, "learning_rate": 2.4576895918140866e-06, "loss": 0.3656, "step": 15603 }, { "epoch": 2.0312377977352596, "grad_norm": 2.7078745365142822, "learning_rate": 2.45589028200455e-06, "loss": 0.3683, "step": 15606 }, { "epoch": 2.0316282702069506, "grad_norm": 2.8833117485046387, "learning_rate": 2.4540914166456286e-06, "loss": 0.353, "step": 15609 }, { "epoch": 2.032018742678641, "grad_norm": 3.260715961456299, "learning_rate": 2.452292996051581e-06, "loss": 0.4125, "step": 15612 }, { "epoch": 2.0324092151503317, "grad_norm": 2.7390503883361816, "learning_rate": 2.450495020536586e-06, "loss": 0.4213, "step": 15615 }, { "epoch": 2.0327996876220227, "grad_norm": 2.833608865737915, "learning_rate": 2.4486974904147488e-06, "loss": 0.3744, "step": 15618 }, { "epoch": 2.0331901600937132, "grad_norm": 3.231580972671509, "learning_rate": 2.446900406000093e-06, "loss": 0.3103, "step": 15621 }, { "epoch": 2.0335806325654042, "grad_norm": 2.8441002368927, "learning_rate": 2.445103767606563e-06, "loss": 0.3917, "step": 15624 }, { "epoch": 2.0339711050370948, "grad_norm": 2.8229074478149414, "learning_rate": 2.443307575548033e-06, "loss": 0.345, "step": 15627 }, { "epoch": 2.0343615775087858, "grad_norm": 2.594104766845703, "learning_rate": 2.4415118301382885e-06, "loss": 0.3214, "step": 15630 }, { "epoch": 2.0347520499804763, "grad_norm": 3.496530532836914, "learning_rate": 2.4397165316910472e-06, "loss": 0.4241, "step": 15633 }, { "epoch": 2.0351425224521673, "grad_norm": 2.7391481399536133, "learning_rate": 2.4379216805199396e-06, "loss": 0.3217, "step": 15636 }, { "epoch": 2.035532994923858, "grad_norm": 2.622236728668213, "learning_rate": 2.436127276938526e-06, "loss": 0.4088, "step": 15639 }, { "epoch": 2.0359234673955484, "grad_norm": 2.8432962894439697, "learning_rate": 2.434333321260285e-06, "loss": 0.3536, "step": 15642 }, { "epoch": 2.0363139398672394, "grad_norm": 2.6063525676727295, "learning_rate": 2.432539813798612e-06, "loss": 0.378, "step": 15645 }, { "epoch": 2.03670441233893, "grad_norm": 2.529179334640503, "learning_rate": 2.430746754866835e-06, "loss": 0.3053, "step": 15648 }, { "epoch": 2.037094884810621, "grad_norm": 2.843583583831787, "learning_rate": 2.428954144778195e-06, "loss": 0.4077, "step": 15651 }, { "epoch": 2.0374853572823115, "grad_norm": 2.8022096157073975, "learning_rate": 2.4271619838458552e-06, "loss": 0.3926, "step": 15654 }, { "epoch": 2.0378758297540025, "grad_norm": 2.6679294109344482, "learning_rate": 2.4253702723829066e-06, "loss": 0.3386, "step": 15657 }, { "epoch": 2.038266302225693, "grad_norm": 2.6503212451934814, "learning_rate": 2.423579010702355e-06, "loss": 0.3481, "step": 15660 }, { "epoch": 2.038656774697384, "grad_norm": 2.8369200229644775, "learning_rate": 2.4217881991171297e-06, "loss": 0.4236, "step": 15663 }, { "epoch": 2.0390472471690746, "grad_norm": 2.750505208969116, "learning_rate": 2.4199978379400806e-06, "loss": 0.4346, "step": 15666 }, { "epoch": 2.039437719640765, "grad_norm": 2.6629180908203125, "learning_rate": 2.418207927483984e-06, "loss": 0.3667, "step": 15669 }, { "epoch": 2.039828192112456, "grad_norm": 2.9319400787353516, "learning_rate": 2.416418468061529e-06, "loss": 0.4105, "step": 15672 }, { "epoch": 2.0402186645841467, "grad_norm": 3.028381109237671, "learning_rate": 2.4146294599853348e-06, "loss": 0.3763, "step": 15675 }, { "epoch": 2.0406091370558377, "grad_norm": 2.613823890686035, "learning_rate": 2.412840903567933e-06, "loss": 0.3848, "step": 15678 }, { "epoch": 2.0409996095275282, "grad_norm": 2.842607021331787, "learning_rate": 2.411052799121784e-06, "loss": 0.3756, "step": 15681 }, { "epoch": 2.0413900819992192, "grad_norm": 2.8494303226470947, "learning_rate": 2.409265146959265e-06, "loss": 0.382, "step": 15684 }, { "epoch": 2.04178055447091, "grad_norm": 3.151146650314331, "learning_rate": 2.4074779473926734e-06, "loss": 0.349, "step": 15687 }, { "epoch": 2.0421710269426003, "grad_norm": 2.663558006286621, "learning_rate": 2.405691200734232e-06, "loss": 0.4037, "step": 15690 }, { "epoch": 2.0425614994142913, "grad_norm": 2.7715821266174316, "learning_rate": 2.40390490729608e-06, "loss": 0.333, "step": 15693 }, { "epoch": 2.042951971885982, "grad_norm": 2.496654748916626, "learning_rate": 2.4021190673902777e-06, "loss": 0.3529, "step": 15696 }, { "epoch": 2.043342444357673, "grad_norm": 2.606013059616089, "learning_rate": 2.4003336813288112e-06, "loss": 0.3328, "step": 15699 }, { "epoch": 2.0437329168293634, "grad_norm": 2.736743211746216, "learning_rate": 2.3985487494235814e-06, "loss": 0.3804, "step": 15702 }, { "epoch": 2.0441233893010544, "grad_norm": 2.691873550415039, "learning_rate": 2.396764271986411e-06, "loss": 0.3932, "step": 15705 }, { "epoch": 2.044513861772745, "grad_norm": 3.0015580654144287, "learning_rate": 2.3949802493290475e-06, "loss": 0.3425, "step": 15708 }, { "epoch": 2.044904334244436, "grad_norm": 3.084564208984375, "learning_rate": 2.393196681763154e-06, "loss": 0.3792, "step": 15711 }, { "epoch": 2.0452948067161265, "grad_norm": 2.7556662559509277, "learning_rate": 2.3914135696003144e-06, "loss": 0.3621, "step": 15714 }, { "epoch": 2.045685279187817, "grad_norm": 2.8170785903930664, "learning_rate": 2.3896309131520367e-06, "loss": 0.336, "step": 15717 }, { "epoch": 2.046075751659508, "grad_norm": 2.6950416564941406, "learning_rate": 2.387848712729749e-06, "loss": 0.3685, "step": 15720 }, { "epoch": 2.0464662241311986, "grad_norm": 3.019321918487549, "learning_rate": 2.386066968644796e-06, "loss": 0.3972, "step": 15723 }, { "epoch": 2.0468566966028896, "grad_norm": 2.6395628452301025, "learning_rate": 2.384285681208445e-06, "loss": 0.3128, "step": 15726 }, { "epoch": 2.04724716907458, "grad_norm": 2.728309392929077, "learning_rate": 2.3825048507318806e-06, "loss": 0.3894, "step": 15729 }, { "epoch": 2.047637641546271, "grad_norm": 2.9258289337158203, "learning_rate": 2.380724477526214e-06, "loss": 0.4404, "step": 15732 }, { "epoch": 2.0480281140179617, "grad_norm": 3.706683397293091, "learning_rate": 2.3789445619024716e-06, "loss": 0.3687, "step": 15735 }, { "epoch": 2.0484185864896527, "grad_norm": 2.782982349395752, "learning_rate": 2.3771651041715978e-06, "loss": 0.3007, "step": 15738 }, { "epoch": 2.0488090589613432, "grad_norm": 2.7452354431152344, "learning_rate": 2.3753861046444647e-06, "loss": 0.3442, "step": 15741 }, { "epoch": 2.049199531433034, "grad_norm": 2.8988213539123535, "learning_rate": 2.373607563631858e-06, "loss": 0.3732, "step": 15744 }, { "epoch": 2.049590003904725, "grad_norm": 3.2119078636169434, "learning_rate": 2.371829481444483e-06, "loss": 0.4084, "step": 15747 }, { "epoch": 2.0499804763764153, "grad_norm": 2.665807008743286, "learning_rate": 2.3700518583929704e-06, "loss": 0.3438, "step": 15750 }, { "epoch": 2.0503709488481063, "grad_norm": 3.3220880031585693, "learning_rate": 2.3682746947878653e-06, "loss": 0.3147, "step": 15753 }, { "epoch": 2.050761421319797, "grad_norm": 2.919126510620117, "learning_rate": 2.3664979909396334e-06, "loss": 0.3434, "step": 15756 }, { "epoch": 2.051151893791488, "grad_norm": 2.8159265518188477, "learning_rate": 2.364721747158662e-06, "loss": 0.37, "step": 15759 }, { "epoch": 2.0515423662631784, "grad_norm": 2.8563246726989746, "learning_rate": 2.3629459637552593e-06, "loss": 0.3405, "step": 15762 }, { "epoch": 2.051932838734869, "grad_norm": 2.744643449783325, "learning_rate": 2.3611706410396497e-06, "loss": 0.4583, "step": 15765 }, { "epoch": 2.05232331120656, "grad_norm": 2.9006803035736084, "learning_rate": 2.3593957793219757e-06, "loss": 0.4063, "step": 15768 }, { "epoch": 2.0527137836782505, "grad_norm": 2.7164108753204346, "learning_rate": 2.357621378912306e-06, "loss": 0.3646, "step": 15771 }, { "epoch": 2.0531042561499415, "grad_norm": 2.580453395843506, "learning_rate": 2.3558474401206222e-06, "loss": 0.3469, "step": 15774 }, { "epoch": 2.053494728621632, "grad_norm": 3.763662099838257, "learning_rate": 2.354073963256829e-06, "loss": 0.3854, "step": 15777 }, { "epoch": 2.053885201093323, "grad_norm": 2.7697255611419678, "learning_rate": 2.352300948630745e-06, "loss": 0.294, "step": 15780 }, { "epoch": 2.0542756735650136, "grad_norm": 2.634119749069214, "learning_rate": 2.350528396552118e-06, "loss": 0.3524, "step": 15783 }, { "epoch": 2.0546661460367046, "grad_norm": 2.6129114627838135, "learning_rate": 2.348756307330607e-06, "loss": 0.3661, "step": 15786 }, { "epoch": 2.055056618508395, "grad_norm": 3.03295636177063, "learning_rate": 2.3469846812757892e-06, "loss": 0.3947, "step": 15789 }, { "epoch": 2.0554470909800857, "grad_norm": 2.5799500942230225, "learning_rate": 2.345213518697168e-06, "loss": 0.3157, "step": 15792 }, { "epoch": 2.0558375634517767, "grad_norm": 2.8251993656158447, "learning_rate": 2.343442819904161e-06, "loss": 0.3364, "step": 15795 }, { "epoch": 2.0562280359234673, "grad_norm": 3.4497342109680176, "learning_rate": 2.341672585206102e-06, "loss": 0.3281, "step": 15798 }, { "epoch": 2.0566185083951583, "grad_norm": 2.605712652206421, "learning_rate": 2.339902814912251e-06, "loss": 0.4153, "step": 15801 }, { "epoch": 2.057008980866849, "grad_norm": 2.881829261779785, "learning_rate": 2.3381335093317837e-06, "loss": 0.3554, "step": 15804 }, { "epoch": 2.05739945333854, "grad_norm": 2.6509134769439697, "learning_rate": 2.3363646687737925e-06, "loss": 0.3419, "step": 15807 }, { "epoch": 2.0577899258102303, "grad_norm": 2.6324145793914795, "learning_rate": 2.3345962935472884e-06, "loss": 0.3845, "step": 15810 }, { "epoch": 2.0581803982819213, "grad_norm": 3.0626988410949707, "learning_rate": 2.3328283839612063e-06, "loss": 0.3581, "step": 15813 }, { "epoch": 2.058570870753612, "grad_norm": 2.609157085418701, "learning_rate": 2.331060940324395e-06, "loss": 0.3613, "step": 15816 }, { "epoch": 2.0589613432253024, "grad_norm": 2.7834722995758057, "learning_rate": 2.3292939629456206e-06, "loss": 0.418, "step": 15819 }, { "epoch": 2.0593518156969934, "grad_norm": 2.971318006515503, "learning_rate": 2.3275274521335743e-06, "loss": 0.3108, "step": 15822 }, { "epoch": 2.059742288168684, "grad_norm": 3.0066676139831543, "learning_rate": 2.3257614081968606e-06, "loss": 0.3441, "step": 15825 }, { "epoch": 2.060132760640375, "grad_norm": 2.56901216506958, "learning_rate": 2.3239958314440027e-06, "loss": 0.3672, "step": 15828 }, { "epoch": 2.0605232331120655, "grad_norm": 2.7101621627807617, "learning_rate": 2.3222307221834417e-06, "loss": 0.3722, "step": 15831 }, { "epoch": 2.0609137055837565, "grad_norm": 2.768842935562134, "learning_rate": 2.3204660807235426e-06, "loss": 0.4064, "step": 15834 }, { "epoch": 2.061304178055447, "grad_norm": 2.6292800903320312, "learning_rate": 2.3187019073725816e-06, "loss": 0.3984, "step": 15837 }, { "epoch": 2.0616946505271376, "grad_norm": 2.5181849002838135, "learning_rate": 2.3169382024387547e-06, "loss": 0.3555, "step": 15840 }, { "epoch": 2.0620851229988286, "grad_norm": 2.9010989665985107, "learning_rate": 2.3151749662301803e-06, "loss": 0.3347, "step": 15843 }, { "epoch": 2.062475595470519, "grad_norm": 2.7419636249542236, "learning_rate": 2.313412199054893e-06, "loss": 0.3382, "step": 15846 }, { "epoch": 2.06286606794221, "grad_norm": 2.6093106269836426, "learning_rate": 2.3116499012208428e-06, "loss": 0.3602, "step": 15849 }, { "epoch": 2.0632565404139007, "grad_norm": 2.8060364723205566, "learning_rate": 2.3098880730358968e-06, "loss": 0.3394, "step": 15852 }, { "epoch": 2.0636470128855917, "grad_norm": 2.528196334838867, "learning_rate": 2.308126714807848e-06, "loss": 0.3496, "step": 15855 }, { "epoch": 2.0640374853572823, "grad_norm": 2.6221911907196045, "learning_rate": 2.306365826844399e-06, "loss": 0.3142, "step": 15858 }, { "epoch": 2.0644279578289733, "grad_norm": 2.5942671298980713, "learning_rate": 2.3046054094531715e-06, "loss": 0.3163, "step": 15861 }, { "epoch": 2.064818430300664, "grad_norm": 2.879770040512085, "learning_rate": 2.3028454629417106e-06, "loss": 0.3983, "step": 15864 }, { "epoch": 2.0652089027723544, "grad_norm": 3.0213136672973633, "learning_rate": 2.3010859876174734e-06, "loss": 0.3028, "step": 15867 }, { "epoch": 2.0655993752440454, "grad_norm": 2.5107553005218506, "learning_rate": 2.2993269837878346e-06, "loss": 0.355, "step": 15870 }, { "epoch": 2.065989847715736, "grad_norm": 2.631068229675293, "learning_rate": 2.297568451760092e-06, "loss": 0.387, "step": 15873 }, { "epoch": 2.066380320187427, "grad_norm": 3.205779552459717, "learning_rate": 2.295810391841456e-06, "loss": 0.3629, "step": 15876 }, { "epoch": 2.0667707926591175, "grad_norm": 2.532114028930664, "learning_rate": 2.294052804339056e-06, "loss": 0.4009, "step": 15879 }, { "epoch": 2.0671612651308084, "grad_norm": 2.673632860183716, "learning_rate": 2.292295689559934e-06, "loss": 0.3439, "step": 15882 }, { "epoch": 2.067551737602499, "grad_norm": 2.8151535987854004, "learning_rate": 2.2905390478110635e-06, "loss": 0.3562, "step": 15885 }, { "epoch": 2.0679422100741895, "grad_norm": 2.834294557571411, "learning_rate": 2.2887828793993212e-06, "loss": 0.3683, "step": 15888 }, { "epoch": 2.0683326825458805, "grad_norm": 2.7955880165100098, "learning_rate": 2.287027184631506e-06, "loss": 0.4139, "step": 15891 }, { "epoch": 2.068723155017571, "grad_norm": 3.368584394454956, "learning_rate": 2.285271963814333e-06, "loss": 0.3226, "step": 15894 }, { "epoch": 2.069113627489262, "grad_norm": 2.835724353790283, "learning_rate": 2.2835172172544384e-06, "loss": 0.3797, "step": 15897 }, { "epoch": 2.0695040999609526, "grad_norm": 3.315255880355835, "learning_rate": 2.281762945258372e-06, "loss": 0.379, "step": 15900 }, { "epoch": 2.0698945724326436, "grad_norm": 2.753709316253662, "learning_rate": 2.2800091481325983e-06, "loss": 0.3426, "step": 15903 }, { "epoch": 2.070285044904334, "grad_norm": 3.0424439907073975, "learning_rate": 2.278255826183506e-06, "loss": 0.4311, "step": 15906 }, { "epoch": 2.070675517376025, "grad_norm": 2.6219685077667236, "learning_rate": 2.2765029797173954e-06, "loss": 0.3532, "step": 15909 }, { "epoch": 2.0710659898477157, "grad_norm": 3.344362735748291, "learning_rate": 2.274750609040483e-06, "loss": 0.3858, "step": 15912 }, { "epoch": 2.0714564623194063, "grad_norm": 3.3275110721588135, "learning_rate": 2.2729987144589083e-06, "loss": 0.3672, "step": 15915 }, { "epoch": 2.0718469347910973, "grad_norm": 2.963124990463257, "learning_rate": 2.271247296278721e-06, "loss": 0.3988, "step": 15918 }, { "epoch": 2.072237407262788, "grad_norm": 2.8209590911865234, "learning_rate": 2.2694963548058885e-06, "loss": 0.3627, "step": 15921 }, { "epoch": 2.072627879734479, "grad_norm": 2.65281081199646, "learning_rate": 2.2677458903462994e-06, "loss": 0.4011, "step": 15924 }, { "epoch": 2.0730183522061694, "grad_norm": 2.652015209197998, "learning_rate": 2.2659959032057566e-06, "loss": 0.3114, "step": 15927 }, { "epoch": 2.0734088246778604, "grad_norm": 2.65907883644104, "learning_rate": 2.2642463936899785e-06, "loss": 0.3698, "step": 15930 }, { "epoch": 2.073799297149551, "grad_norm": 2.8742198944091797, "learning_rate": 2.2624973621045983e-06, "loss": 0.3786, "step": 15933 }, { "epoch": 2.074189769621242, "grad_norm": 2.9052553176879883, "learning_rate": 2.2607488087551716e-06, "loss": 0.3868, "step": 15936 }, { "epoch": 2.0745802420929325, "grad_norm": 2.704512596130371, "learning_rate": 2.2590007339471657e-06, "loss": 0.3433, "step": 15939 }, { "epoch": 2.074970714564623, "grad_norm": 2.6483607292175293, "learning_rate": 2.257253137985966e-06, "loss": 0.3306, "step": 15942 }, { "epoch": 2.075361187036314, "grad_norm": 2.474104642868042, "learning_rate": 2.25550602117687e-06, "loss": 0.3005, "step": 15945 }, { "epoch": 2.0757516595080046, "grad_norm": 2.596907138824463, "learning_rate": 2.2537593838251016e-06, "loss": 0.3277, "step": 15948 }, { "epoch": 2.0761421319796955, "grad_norm": 2.7480621337890625, "learning_rate": 2.252013226235791e-06, "loss": 0.4159, "step": 15951 }, { "epoch": 2.076532604451386, "grad_norm": 2.6667375564575195, "learning_rate": 2.250267548713987e-06, "loss": 0.3156, "step": 15954 }, { "epoch": 2.076923076923077, "grad_norm": 3.392796516418457, "learning_rate": 2.2485223515646597e-06, "loss": 0.3844, "step": 15957 }, { "epoch": 2.0773135493947676, "grad_norm": 2.91853404045105, "learning_rate": 2.246777635092689e-06, "loss": 0.4067, "step": 15960 }, { "epoch": 2.0777040218664586, "grad_norm": 2.7192370891571045, "learning_rate": 2.245033399602872e-06, "loss": 0.3416, "step": 15963 }, { "epoch": 2.078094494338149, "grad_norm": 2.7866413593292236, "learning_rate": 2.2432896453999243e-06, "loss": 0.3529, "step": 15966 }, { "epoch": 2.0784849668098397, "grad_norm": 2.8930845260620117, "learning_rate": 2.2415463727884785e-06, "loss": 0.3483, "step": 15969 }, { "epoch": 2.0788754392815307, "grad_norm": 2.707551956176758, "learning_rate": 2.239803582073078e-06, "loss": 0.3398, "step": 15972 }, { "epoch": 2.0792659117532213, "grad_norm": 2.8100128173828125, "learning_rate": 2.2380612735581835e-06, "loss": 0.3944, "step": 15975 }, { "epoch": 2.0796563842249123, "grad_norm": 2.5131068229675293, "learning_rate": 2.236319447548176e-06, "loss": 0.3561, "step": 15978 }, { "epoch": 2.080046856696603, "grad_norm": 2.6995739936828613, "learning_rate": 2.234578104347347e-06, "loss": 0.3422, "step": 15981 }, { "epoch": 2.080437329168294, "grad_norm": 4.326542854309082, "learning_rate": 2.2328372442599057e-06, "loss": 0.3936, "step": 15984 }, { "epoch": 2.0808278016399844, "grad_norm": 2.6090240478515625, "learning_rate": 2.231096867589975e-06, "loss": 0.3217, "step": 15987 }, { "epoch": 2.081218274111675, "grad_norm": 3.206176519393921, "learning_rate": 2.2293569746415976e-06, "loss": 0.3001, "step": 15990 }, { "epoch": 2.081608746583366, "grad_norm": 2.488536834716797, "learning_rate": 2.2276175657187288e-06, "loss": 0.2904, "step": 15993 }, { "epoch": 2.0819992190550565, "grad_norm": 2.5979604721069336, "learning_rate": 2.225878641125237e-06, "loss": 0.3798, "step": 15996 }, { "epoch": 2.0823896915267475, "grad_norm": 2.6461970806121826, "learning_rate": 2.2241402011649127e-06, "loss": 0.3123, "step": 15999 }, { "epoch": 2.082780163998438, "grad_norm": 2.82248592376709, "learning_rate": 2.2224022461414553e-06, "loss": 0.374, "step": 16002 }, { "epoch": 2.083170636470129, "grad_norm": 2.730311632156372, "learning_rate": 2.220664776358481e-06, "loss": 0.44, "step": 16005 }, { "epoch": 2.0835611089418196, "grad_norm": 2.5103518962860107, "learning_rate": 2.2189277921195228e-06, "loss": 0.3888, "step": 16008 }, { "epoch": 2.0839515814135106, "grad_norm": 2.6272377967834473, "learning_rate": 2.217191293728031e-06, "loss": 0.3516, "step": 16011 }, { "epoch": 2.084342053885201, "grad_norm": 2.5898125171661377, "learning_rate": 2.2154552814873663e-06, "loss": 0.3519, "step": 16014 }, { "epoch": 2.0847325263568917, "grad_norm": 2.7031917572021484, "learning_rate": 2.213719755700804e-06, "loss": 0.3611, "step": 16017 }, { "epoch": 2.0851229988285827, "grad_norm": 2.8753886222839355, "learning_rate": 2.21198471667154e-06, "loss": 0.4155, "step": 16020 }, { "epoch": 2.085513471300273, "grad_norm": 2.8421316146850586, "learning_rate": 2.210250164702682e-06, "loss": 0.3379, "step": 16023 }, { "epoch": 2.085903943771964, "grad_norm": 2.7893972396850586, "learning_rate": 2.208516100097249e-06, "loss": 0.3434, "step": 16026 }, { "epoch": 2.0862944162436547, "grad_norm": 2.8135573863983154, "learning_rate": 2.206782523158183e-06, "loss": 0.3668, "step": 16029 }, { "epoch": 2.0866848887153457, "grad_norm": 2.7129807472229004, "learning_rate": 2.2050494341883344e-06, "loss": 0.3845, "step": 16032 }, { "epoch": 2.0870753611870363, "grad_norm": 3.1642825603485107, "learning_rate": 2.203316833490469e-06, "loss": 0.3545, "step": 16035 }, { "epoch": 2.087465833658727, "grad_norm": 2.634582757949829, "learning_rate": 2.2015847213672686e-06, "loss": 0.3136, "step": 16038 }, { "epoch": 2.087856306130418, "grad_norm": 3.1089882850646973, "learning_rate": 2.1998530981213318e-06, "loss": 0.3507, "step": 16041 }, { "epoch": 2.0882467786021084, "grad_norm": 3.0786855220794678, "learning_rate": 2.1981219640551683e-06, "loss": 0.3322, "step": 16044 }, { "epoch": 2.0886372510737994, "grad_norm": 2.9572317600250244, "learning_rate": 2.1963913194712013e-06, "loss": 0.4171, "step": 16047 }, { "epoch": 2.08902772354549, "grad_norm": 3.005173921585083, "learning_rate": 2.1946611646717726e-06, "loss": 0.3304, "step": 16050 }, { "epoch": 2.089418196017181, "grad_norm": 2.7299466133117676, "learning_rate": 2.192931499959139e-06, "loss": 0.3869, "step": 16053 }, { "epoch": 2.0898086684888715, "grad_norm": 3.0412538051605225, "learning_rate": 2.191202325635467e-06, "loss": 0.3458, "step": 16056 }, { "epoch": 2.0901991409605625, "grad_norm": 3.1715352535247803, "learning_rate": 2.1894736420028383e-06, "loss": 0.3498, "step": 16059 }, { "epoch": 2.090589613432253, "grad_norm": 2.733077049255371, "learning_rate": 2.1877454493632533e-06, "loss": 0.3784, "step": 16062 }, { "epoch": 2.0909800859039436, "grad_norm": 2.767333745956421, "learning_rate": 2.1860177480186224e-06, "loss": 0.3984, "step": 16065 }, { "epoch": 2.0913705583756346, "grad_norm": 2.9046363830566406, "learning_rate": 2.1842905382707695e-06, "loss": 0.3301, "step": 16068 }, { "epoch": 2.091761030847325, "grad_norm": 2.561779737472534, "learning_rate": 2.182563820421438e-06, "loss": 0.3179, "step": 16071 }, { "epoch": 2.092151503319016, "grad_norm": 2.805349111557007, "learning_rate": 2.18083759477228e-06, "loss": 0.4154, "step": 16074 }, { "epoch": 2.0925419757907067, "grad_norm": 2.7377660274505615, "learning_rate": 2.1791118616248615e-06, "loss": 0.3782, "step": 16077 }, { "epoch": 2.0929324482623977, "grad_norm": 2.9340991973876953, "learning_rate": 2.1773866212806684e-06, "loss": 0.3573, "step": 16080 }, { "epoch": 2.093322920734088, "grad_norm": 3.0035879611968994, "learning_rate": 2.1756618740410944e-06, "loss": 0.3522, "step": 16083 }, { "epoch": 2.093713393205779, "grad_norm": 2.919433832168579, "learning_rate": 2.1739376202074504e-06, "loss": 0.4147, "step": 16086 }, { "epoch": 2.0941038656774698, "grad_norm": 2.8312833309173584, "learning_rate": 2.172213860080956e-06, "loss": 0.3854, "step": 16089 }, { "epoch": 2.0944943381491603, "grad_norm": 2.7150704860687256, "learning_rate": 2.1704905939627523e-06, "loss": 0.3951, "step": 16092 }, { "epoch": 2.0948848106208513, "grad_norm": 2.916109323501587, "learning_rate": 2.168767822153891e-06, "loss": 0.3698, "step": 16095 }, { "epoch": 2.095275283092542, "grad_norm": 2.8838951587677, "learning_rate": 2.1670455449553352e-06, "loss": 0.3452, "step": 16098 }, { "epoch": 2.095665755564233, "grad_norm": 2.8266618251800537, "learning_rate": 2.1653237626679607e-06, "loss": 0.3314, "step": 16101 }, { "epoch": 2.0960562280359234, "grad_norm": 2.667581796646118, "learning_rate": 2.163602475592564e-06, "loss": 0.3685, "step": 16104 }, { "epoch": 2.0964467005076144, "grad_norm": 2.537261724472046, "learning_rate": 2.1618816840298474e-06, "loss": 0.2771, "step": 16107 }, { "epoch": 2.096837172979305, "grad_norm": 2.6051278114318848, "learning_rate": 2.1601613882804283e-06, "loss": 0.346, "step": 16110 }, { "epoch": 2.0972276454509955, "grad_norm": 3.2211477756500244, "learning_rate": 2.158441588644843e-06, "loss": 0.3435, "step": 16113 }, { "epoch": 2.0976181179226865, "grad_norm": 2.916541576385498, "learning_rate": 2.1567222854235337e-06, "loss": 0.3735, "step": 16116 }, { "epoch": 2.098008590394377, "grad_norm": 2.6063573360443115, "learning_rate": 2.1550034789168584e-06, "loss": 0.3075, "step": 16119 }, { "epoch": 2.098399062866068, "grad_norm": 2.746978759765625, "learning_rate": 2.1532851694250916e-06, "loss": 0.3371, "step": 16122 }, { "epoch": 2.0987895353377586, "grad_norm": 2.748178243637085, "learning_rate": 2.1515673572484173e-06, "loss": 0.3474, "step": 16125 }, { "epoch": 2.0991800078094496, "grad_norm": 2.8788228034973145, "learning_rate": 2.1498500426869325e-06, "loss": 0.3808, "step": 16128 }, { "epoch": 2.09957048028114, "grad_norm": 3.1142728328704834, "learning_rate": 2.1481332260406502e-06, "loss": 0.4338, "step": 16131 }, { "epoch": 2.099960952752831, "grad_norm": 2.57977032661438, "learning_rate": 2.1464169076094922e-06, "loss": 0.3976, "step": 16134 }, { "epoch": 2.1003514252245217, "grad_norm": 2.79011869430542, "learning_rate": 2.1447010876932992e-06, "loss": 0.3907, "step": 16137 }, { "epoch": 2.1007418976962122, "grad_norm": 2.7332963943481445, "learning_rate": 2.142985766591818e-06, "loss": 0.3246, "step": 16140 }, { "epoch": 2.101132370167903, "grad_norm": 3.274784803390503, "learning_rate": 2.141270944604715e-06, "loss": 0.3906, "step": 16143 }, { "epoch": 2.1015228426395938, "grad_norm": 2.857478618621826, "learning_rate": 2.139556622031564e-06, "loss": 0.5127, "step": 16146 }, { "epoch": 2.1019133151112848, "grad_norm": 2.596496105194092, "learning_rate": 2.1378427991718533e-06, "loss": 0.3371, "step": 16149 }, { "epoch": 2.1023037875829753, "grad_norm": 2.747124671936035, "learning_rate": 2.1361294763249828e-06, "loss": 0.3566, "step": 16152 }, { "epoch": 2.1026942600546663, "grad_norm": 2.936936140060425, "learning_rate": 2.13441665379027e-06, "loss": 0.3858, "step": 16155 }, { "epoch": 2.103084732526357, "grad_norm": 2.8474388122558594, "learning_rate": 2.1327043318669396e-06, "loss": 0.3285, "step": 16158 }, { "epoch": 2.1034752049980474, "grad_norm": 2.6837317943573, "learning_rate": 2.130992510854128e-06, "loss": 0.3799, "step": 16161 }, { "epoch": 2.1038656774697384, "grad_norm": 3.10606050491333, "learning_rate": 2.1292811910508916e-06, "loss": 0.3795, "step": 16164 }, { "epoch": 2.104256149941429, "grad_norm": 2.5549697875976562, "learning_rate": 2.127570372756192e-06, "loss": 0.3574, "step": 16167 }, { "epoch": 2.10464662241312, "grad_norm": 2.6415367126464844, "learning_rate": 2.1258600562689035e-06, "loss": 0.3661, "step": 16170 }, { "epoch": 2.1050370948848105, "grad_norm": 2.8995654582977295, "learning_rate": 2.124150241887819e-06, "loss": 0.3972, "step": 16173 }, { "epoch": 2.1054275673565015, "grad_norm": 2.8920884132385254, "learning_rate": 2.1224409299116356e-06, "loss": 0.3544, "step": 16176 }, { "epoch": 2.105818039828192, "grad_norm": 2.7235703468322754, "learning_rate": 2.1207321206389702e-06, "loss": 0.3626, "step": 16179 }, { "epoch": 2.106208512299883, "grad_norm": 2.957944869995117, "learning_rate": 2.119023814368344e-06, "loss": 0.3354, "step": 16182 }, { "epoch": 2.1065989847715736, "grad_norm": 2.9017412662506104, "learning_rate": 2.117316011398199e-06, "loss": 0.4035, "step": 16185 }, { "epoch": 2.106989457243264, "grad_norm": 2.5684914588928223, "learning_rate": 2.115608712026882e-06, "loss": 0.313, "step": 16188 }, { "epoch": 2.107379929714955, "grad_norm": 2.6348636150360107, "learning_rate": 2.113901916552653e-06, "loss": 0.3506, "step": 16191 }, { "epoch": 2.1077704021866457, "grad_norm": 2.8889946937561035, "learning_rate": 2.1121956252736903e-06, "loss": 0.3517, "step": 16194 }, { "epoch": 2.1081608746583367, "grad_norm": 3.1232848167419434, "learning_rate": 2.1104898384880766e-06, "loss": 0.3806, "step": 16197 }, { "epoch": 2.1085513471300272, "grad_norm": 2.5369369983673096, "learning_rate": 2.10878455649381e-06, "loss": 0.3079, "step": 16200 }, { "epoch": 2.1089418196017182, "grad_norm": 2.765028953552246, "learning_rate": 2.1070797795887965e-06, "loss": 0.3596, "step": 16203 }, { "epoch": 2.1093322920734088, "grad_norm": 2.585745096206665, "learning_rate": 2.1053755080708614e-06, "loss": 0.357, "step": 16206 }, { "epoch": 2.1097227645450998, "grad_norm": 3.108854055404663, "learning_rate": 2.1036717422377364e-06, "loss": 0.3649, "step": 16209 }, { "epoch": 2.1101132370167903, "grad_norm": 2.787053108215332, "learning_rate": 2.101968482387063e-06, "loss": 0.378, "step": 16212 }, { "epoch": 2.110503709488481, "grad_norm": 2.599722146987915, "learning_rate": 2.1002657288164002e-06, "loss": 0.3513, "step": 16215 }, { "epoch": 2.110894181960172, "grad_norm": 2.803863763809204, "learning_rate": 2.0985634818232136e-06, "loss": 0.3057, "step": 16218 }, { "epoch": 2.1112846544318624, "grad_norm": 2.749025344848633, "learning_rate": 2.096861741704884e-06, "loss": 0.4025, "step": 16221 }, { "epoch": 2.1116751269035534, "grad_norm": 2.9662301540374756, "learning_rate": 2.0951605087586994e-06, "loss": 0.4046, "step": 16224 }, { "epoch": 2.112065599375244, "grad_norm": 2.85591721534729, "learning_rate": 2.0934597832818653e-06, "loss": 0.3853, "step": 16227 }, { "epoch": 2.112456071846935, "grad_norm": 3.045888900756836, "learning_rate": 2.0917595655714925e-06, "loss": 0.3448, "step": 16230 }, { "epoch": 2.1128465443186255, "grad_norm": 2.8176419734954834, "learning_rate": 2.0900598559246032e-06, "loss": 0.312, "step": 16233 }, { "epoch": 2.1132370167903165, "grad_norm": 2.6675214767456055, "learning_rate": 2.0883606546381372e-06, "loss": 0.3868, "step": 16236 }, { "epoch": 2.113627489262007, "grad_norm": 2.7344720363616943, "learning_rate": 2.08666196200894e-06, "loss": 0.3555, "step": 16239 }, { "epoch": 2.1140179617336976, "grad_norm": 2.40110445022583, "learning_rate": 2.084963778333768e-06, "loss": 0.3102, "step": 16242 }, { "epoch": 2.1144084342053886, "grad_norm": 2.716338634490967, "learning_rate": 2.083266103909292e-06, "loss": 0.3574, "step": 16245 }, { "epoch": 2.114798906677079, "grad_norm": 2.801283121109009, "learning_rate": 2.081568939032093e-06, "loss": 0.3879, "step": 16248 }, { "epoch": 2.11518937914877, "grad_norm": 2.7228639125823975, "learning_rate": 2.07987228399866e-06, "loss": 0.3327, "step": 16251 }, { "epoch": 2.1155798516204607, "grad_norm": 3.10695743560791, "learning_rate": 2.0781761391053944e-06, "loss": 0.4271, "step": 16254 }, { "epoch": 2.1159703240921517, "grad_norm": 2.5058023929595947, "learning_rate": 2.076480504648613e-06, "loss": 0.3314, "step": 16257 }, { "epoch": 2.1163607965638422, "grad_norm": 2.8873941898345947, "learning_rate": 2.074785380924535e-06, "loss": 0.4765, "step": 16260 }, { "epoch": 2.116751269035533, "grad_norm": 2.7840540409088135, "learning_rate": 2.073090768229299e-06, "loss": 0.4192, "step": 16263 }, { "epoch": 2.117141741507224, "grad_norm": 2.253389596939087, "learning_rate": 2.071396666858947e-06, "loss": 0.3191, "step": 16266 }, { "epoch": 2.1175322139789143, "grad_norm": 2.8525044918060303, "learning_rate": 2.069703077109438e-06, "loss": 0.3817, "step": 16269 }, { "epoch": 2.1179226864506053, "grad_norm": 2.5376346111297607, "learning_rate": 2.0680099992766366e-06, "loss": 0.276, "step": 16272 }, { "epoch": 2.118313158922296, "grad_norm": 2.743023633956909, "learning_rate": 2.0663174336563193e-06, "loss": 0.3222, "step": 16275 }, { "epoch": 2.118703631393987, "grad_norm": 2.61458158493042, "learning_rate": 2.0646253805441757e-06, "loss": 0.2982, "step": 16278 }, { "epoch": 2.1190941038656774, "grad_norm": 3.0593173503875732, "learning_rate": 2.0629338402358035e-06, "loss": 0.3798, "step": 16281 }, { "epoch": 2.1194845763373684, "grad_norm": 2.7648446559906006, "learning_rate": 2.0612428130267087e-06, "loss": 0.4024, "step": 16284 }, { "epoch": 2.119875048809059, "grad_norm": 3.026467800140381, "learning_rate": 2.0595522992123148e-06, "loss": 0.3453, "step": 16287 }, { "epoch": 2.1202655212807495, "grad_norm": 2.49055814743042, "learning_rate": 2.057862299087947e-06, "loss": 0.3484, "step": 16290 }, { "epoch": 2.1206559937524405, "grad_norm": 2.641812324523926, "learning_rate": 2.056172812948846e-06, "loss": 0.351, "step": 16293 }, { "epoch": 2.121046466224131, "grad_norm": 2.7587594985961914, "learning_rate": 2.0544838410901625e-06, "loss": 0.3781, "step": 16296 }, { "epoch": 2.121436938695822, "grad_norm": 2.873347759246826, "learning_rate": 2.052795383806955e-06, "loss": 0.3831, "step": 16299 }, { "epoch": 2.1218274111675126, "grad_norm": 2.9109420776367188, "learning_rate": 2.0511074413941934e-06, "loss": 0.4032, "step": 16302 }, { "epoch": 2.1222178836392036, "grad_norm": 2.9024267196655273, "learning_rate": 2.0494200141467576e-06, "loss": 0.3648, "step": 16305 }, { "epoch": 2.122608356110894, "grad_norm": 2.8061764240264893, "learning_rate": 2.04773310235944e-06, "loss": 0.3407, "step": 16308 }, { "epoch": 2.1229988285825847, "grad_norm": 2.516742706298828, "learning_rate": 2.0460467063269384e-06, "loss": 0.3815, "step": 16311 }, { "epoch": 2.1233893010542757, "grad_norm": 2.9556503295898438, "learning_rate": 2.0443608263438635e-06, "loss": 0.3183, "step": 16314 }, { "epoch": 2.1237797735259663, "grad_norm": 2.734612464904785, "learning_rate": 2.0426754627047328e-06, "loss": 0.3251, "step": 16317 }, { "epoch": 2.1241702459976572, "grad_norm": 2.9752840995788574, "learning_rate": 2.040990615703979e-06, "loss": 0.3447, "step": 16320 }, { "epoch": 2.124560718469348, "grad_norm": 2.810737371444702, "learning_rate": 2.0393062856359396e-06, "loss": 0.3616, "step": 16323 }, { "epoch": 2.124951190941039, "grad_norm": 2.919602870941162, "learning_rate": 2.0376224727948625e-06, "loss": 0.3873, "step": 16326 }, { "epoch": 2.1253416634127293, "grad_norm": 2.8167080879211426, "learning_rate": 2.035939177474909e-06, "loss": 0.347, "step": 16329 }, { "epoch": 2.1257321358844203, "grad_norm": 3.348229169845581, "learning_rate": 2.0342563999701454e-06, "loss": 0.3292, "step": 16332 }, { "epoch": 2.126122608356111, "grad_norm": 2.9174301624298096, "learning_rate": 2.032574140574548e-06, "loss": 0.338, "step": 16335 }, { "epoch": 2.1265130808278014, "grad_norm": 3.1886136531829834, "learning_rate": 2.0308923995820077e-06, "loss": 0.3348, "step": 16338 }, { "epoch": 2.1269035532994924, "grad_norm": 2.831287145614624, "learning_rate": 2.0292111772863193e-06, "loss": 0.419, "step": 16341 }, { "epoch": 2.127294025771183, "grad_norm": 2.922455310821533, "learning_rate": 2.0275304739811864e-06, "loss": 0.3707, "step": 16344 }, { "epoch": 2.127684498242874, "grad_norm": 2.8592848777770996, "learning_rate": 2.0258502899602266e-06, "loss": 0.3573, "step": 16347 }, { "epoch": 2.1280749707145645, "grad_norm": 3.3974833488464355, "learning_rate": 2.0241706255169663e-06, "loss": 0.3741, "step": 16350 }, { "epoch": 2.1284654431862555, "grad_norm": 2.918564558029175, "learning_rate": 2.0224914809448374e-06, "loss": 0.3557, "step": 16353 }, { "epoch": 2.128855915657946, "grad_norm": 2.8875043392181396, "learning_rate": 2.0208128565371813e-06, "loss": 0.4248, "step": 16356 }, { "epoch": 2.129246388129637, "grad_norm": 2.7835309505462646, "learning_rate": 2.019134752587254e-06, "loss": 0.4035, "step": 16359 }, { "epoch": 2.1296368606013276, "grad_norm": 2.815490245819092, "learning_rate": 2.017457169388214e-06, "loss": 0.3947, "step": 16362 }, { "epoch": 2.130027333073018, "grad_norm": 3.1463191509246826, "learning_rate": 2.0157801072331325e-06, "loss": 0.3536, "step": 16365 }, { "epoch": 2.130417805544709, "grad_norm": 2.900385618209839, "learning_rate": 2.0141035664149868e-06, "loss": 0.3911, "step": 16368 }, { "epoch": 2.1308082780163997, "grad_norm": 3.139719009399414, "learning_rate": 2.0124275472266678e-06, "loss": 0.3523, "step": 16371 }, { "epoch": 2.1311987504880907, "grad_norm": 2.8128786087036133, "learning_rate": 2.010752049960972e-06, "loss": 0.4084, "step": 16374 }, { "epoch": 2.1315892229597813, "grad_norm": 2.783865213394165, "learning_rate": 2.0090770749106024e-06, "loss": 0.3048, "step": 16377 }, { "epoch": 2.1319796954314723, "grad_norm": 2.7417304515838623, "learning_rate": 2.007402622368178e-06, "loss": 0.3362, "step": 16380 }, { "epoch": 2.132370167903163, "grad_norm": 2.974012613296509, "learning_rate": 2.00572869262622e-06, "loss": 0.3618, "step": 16383 }, { "epoch": 2.132760640374854, "grad_norm": 2.9564473628997803, "learning_rate": 2.004055285977158e-06, "loss": 0.3369, "step": 16386 }, { "epoch": 2.1331511128465444, "grad_norm": 2.8104441165924072, "learning_rate": 2.0023824027133356e-06, "loss": 0.3538, "step": 16389 }, { "epoch": 2.133541585318235, "grad_norm": 3.0070455074310303, "learning_rate": 2.0007100431270027e-06, "loss": 0.3255, "step": 16392 }, { "epoch": 2.133932057789926, "grad_norm": 2.8861727714538574, "learning_rate": 1.999038207510316e-06, "loss": 0.3306, "step": 16395 }, { "epoch": 2.1343225302616164, "grad_norm": 2.9251303672790527, "learning_rate": 1.9973668961553394e-06, "loss": 0.3543, "step": 16398 }, { "epoch": 2.1347130027333074, "grad_norm": 2.2964024543762207, "learning_rate": 1.9956961093540513e-06, "loss": 0.3386, "step": 16401 }, { "epoch": 2.135103475204998, "grad_norm": 2.716637134552002, "learning_rate": 1.9940258473983326e-06, "loss": 0.3383, "step": 16404 }, { "epoch": 2.135493947676689, "grad_norm": 2.994713068008423, "learning_rate": 1.992356110579975e-06, "loss": 0.3323, "step": 16407 }, { "epoch": 2.1358844201483795, "grad_norm": 2.87947940826416, "learning_rate": 1.9906868991906754e-06, "loss": 0.4167, "step": 16410 }, { "epoch": 2.13627489262007, "grad_norm": 2.95758318901062, "learning_rate": 1.989018213522046e-06, "loss": 0.3449, "step": 16413 }, { "epoch": 2.136665365091761, "grad_norm": 2.653963565826416, "learning_rate": 1.9873500538656005e-06, "loss": 0.3985, "step": 16416 }, { "epoch": 2.1370558375634516, "grad_norm": 2.6448793411254883, "learning_rate": 1.985682420512761e-06, "loss": 0.3981, "step": 16419 }, { "epoch": 2.1374463100351426, "grad_norm": 2.5381240844726562, "learning_rate": 1.9840153137548634e-06, "loss": 0.2987, "step": 16422 }, { "epoch": 2.137836782506833, "grad_norm": 2.6718051433563232, "learning_rate": 1.982348733883146e-06, "loss": 0.3729, "step": 16425 }, { "epoch": 2.138227254978524, "grad_norm": 3.0153956413269043, "learning_rate": 1.980682681188754e-06, "loss": 0.416, "step": 16428 }, { "epoch": 2.1386177274502147, "grad_norm": 3.0083117485046387, "learning_rate": 1.979017155962747e-06, "loss": 0.382, "step": 16431 }, { "epoch": 2.1390081999219053, "grad_norm": 2.849533796310425, "learning_rate": 1.9773521584960888e-06, "loss": 0.3712, "step": 16434 }, { "epoch": 2.1393986723935963, "grad_norm": 2.806645154953003, "learning_rate": 1.9756876890796496e-06, "loss": 0.3822, "step": 16437 }, { "epoch": 2.139789144865287, "grad_norm": 2.8463988304138184, "learning_rate": 1.9740237480042075e-06, "loss": 0.357, "step": 16440 }, { "epoch": 2.140179617336978, "grad_norm": 3.0570833683013916, "learning_rate": 1.9723603355604526e-06, "loss": 0.3629, "step": 16443 }, { "epoch": 2.1405700898086684, "grad_norm": 2.503617286682129, "learning_rate": 1.9706974520389776e-06, "loss": 0.3127, "step": 16446 }, { "epoch": 2.1409605622803594, "grad_norm": 3.2526721954345703, "learning_rate": 1.9690350977302837e-06, "loss": 0.3756, "step": 16449 }, { "epoch": 2.14135103475205, "grad_norm": 2.4304678440093994, "learning_rate": 1.967373272924783e-06, "loss": 0.2854, "step": 16452 }, { "epoch": 2.141741507223741, "grad_norm": 2.5894525051116943, "learning_rate": 1.9657119779127926e-06, "loss": 0.3817, "step": 16455 }, { "epoch": 2.1421319796954315, "grad_norm": 2.8062849044799805, "learning_rate": 1.9640512129845365e-06, "loss": 0.3606, "step": 16458 }, { "epoch": 2.142522452167122, "grad_norm": 2.921401262283325, "learning_rate": 1.9623909784301442e-06, "loss": 0.4018, "step": 16461 }, { "epoch": 2.142912924638813, "grad_norm": 2.643878698348999, "learning_rate": 1.9607312745396602e-06, "loss": 0.3517, "step": 16464 }, { "epoch": 2.1433033971105035, "grad_norm": 2.543466806411743, "learning_rate": 1.9590721016030285e-06, "loss": 0.3569, "step": 16467 }, { "epoch": 2.1436938695821945, "grad_norm": 2.8661959171295166, "learning_rate": 1.957413459910102e-06, "loss": 0.3746, "step": 16470 }, { "epoch": 2.144084342053885, "grad_norm": 2.730772018432617, "learning_rate": 1.9557553497506432e-06, "loss": 0.2907, "step": 16473 }, { "epoch": 2.144474814525576, "grad_norm": 2.7955386638641357, "learning_rate": 1.954097771414322e-06, "loss": 0.3574, "step": 16476 }, { "epoch": 2.1448652869972666, "grad_norm": 2.8942129611968994, "learning_rate": 1.952440725190713e-06, "loss": 0.3566, "step": 16479 }, { "epoch": 2.1452557594689576, "grad_norm": 2.645179510116577, "learning_rate": 1.9507842113692967e-06, "loss": 0.3757, "step": 16482 }, { "epoch": 2.145646231940648, "grad_norm": 2.6048803329467773, "learning_rate": 1.9491282302394653e-06, "loss": 0.3349, "step": 16485 }, { "epoch": 2.1460367044123387, "grad_norm": 3.326573371887207, "learning_rate": 1.947472782090514e-06, "loss": 0.3798, "step": 16488 }, { "epoch": 2.1464271768840297, "grad_norm": 2.8624017238616943, "learning_rate": 1.9458178672116445e-06, "loss": 0.3991, "step": 16491 }, { "epoch": 2.1468176493557203, "grad_norm": 2.8629748821258545, "learning_rate": 1.9441634858919705e-06, "loss": 0.3984, "step": 16494 }, { "epoch": 2.1472081218274113, "grad_norm": 2.9364376068115234, "learning_rate": 1.9425096384205066e-06, "loss": 0.3873, "step": 16497 }, { "epoch": 2.147598594299102, "grad_norm": 2.7493503093719482, "learning_rate": 1.9408563250861756e-06, "loss": 0.3023, "step": 16500 }, { "epoch": 2.147989066770793, "grad_norm": 2.7298777103424072, "learning_rate": 1.9392035461778104e-06, "loss": 0.3675, "step": 16503 }, { "epoch": 2.1483795392424834, "grad_norm": 3.0014255046844482, "learning_rate": 1.937551301984147e-06, "loss": 0.3866, "step": 16506 }, { "epoch": 2.1487700117141744, "grad_norm": 2.657407522201538, "learning_rate": 1.9358995927938284e-06, "loss": 0.4154, "step": 16509 }, { "epoch": 2.149160484185865, "grad_norm": 2.729097843170166, "learning_rate": 1.934248418895401e-06, "loss": 0.3707, "step": 16512 }, { "epoch": 2.1495509566575555, "grad_norm": 3.432372808456421, "learning_rate": 1.93259778057733e-06, "loss": 0.3662, "step": 16515 }, { "epoch": 2.1499414291292465, "grad_norm": 2.9015185832977295, "learning_rate": 1.9309476781279735e-06, "loss": 0.3547, "step": 16518 }, { "epoch": 2.150331901600937, "grad_norm": 2.588021993637085, "learning_rate": 1.9292981118356013e-06, "loss": 0.3424, "step": 16521 }, { "epoch": 2.150722374072628, "grad_norm": 2.5547730922698975, "learning_rate": 1.927649081988387e-06, "loss": 0.357, "step": 16524 }, { "epoch": 2.1511128465443186, "grad_norm": 2.5534119606018066, "learning_rate": 1.926000588874417e-06, "loss": 0.3456, "step": 16527 }, { "epoch": 2.1515033190160096, "grad_norm": 3.0660245418548584, "learning_rate": 1.924352632781677e-06, "loss": 0.3396, "step": 16530 }, { "epoch": 2.1518937914877, "grad_norm": 2.4878122806549072, "learning_rate": 1.9227052139980606e-06, "loss": 0.3132, "step": 16533 }, { "epoch": 2.152284263959391, "grad_norm": 2.8623673915863037, "learning_rate": 1.921058332811371e-06, "loss": 0.3601, "step": 16536 }, { "epoch": 2.1526747364310816, "grad_norm": 2.5879554748535156, "learning_rate": 1.9194119895093137e-06, "loss": 0.3428, "step": 16539 }, { "epoch": 2.153065208902772, "grad_norm": 2.7629878520965576, "learning_rate": 1.9177661843794994e-06, "loss": 0.367, "step": 16542 }, { "epoch": 2.153455681374463, "grad_norm": 2.715153694152832, "learning_rate": 1.9161209177094504e-06, "loss": 0.3238, "step": 16545 }, { "epoch": 2.1538461538461537, "grad_norm": 3.1860060691833496, "learning_rate": 1.9144761897865895e-06, "loss": 0.3534, "step": 16548 }, { "epoch": 2.1542366263178447, "grad_norm": 2.8940200805664062, "learning_rate": 1.9128320008982456e-06, "loss": 0.3401, "step": 16551 }, { "epoch": 2.1546270987895353, "grad_norm": 2.862811326980591, "learning_rate": 1.9111883513316564e-06, "loss": 0.3994, "step": 16554 }, { "epoch": 2.1550175712612263, "grad_norm": 2.693480968475342, "learning_rate": 1.909545241373966e-06, "loss": 0.3594, "step": 16557 }, { "epoch": 2.155408043732917, "grad_norm": 2.9128196239471436, "learning_rate": 1.9079026713122206e-06, "loss": 0.3923, "step": 16560 }, { "epoch": 2.1557985162046074, "grad_norm": 2.5476996898651123, "learning_rate": 1.9062606414333723e-06, "loss": 0.3357, "step": 16563 }, { "epoch": 2.1561889886762984, "grad_norm": 2.8649590015411377, "learning_rate": 1.9046191520242835e-06, "loss": 0.3318, "step": 16566 }, { "epoch": 2.156579461147989, "grad_norm": 3.019973039627075, "learning_rate": 1.902978203371717e-06, "loss": 0.3477, "step": 16569 }, { "epoch": 2.15696993361968, "grad_norm": 2.824190855026245, "learning_rate": 1.901337795762343e-06, "loss": 0.3407, "step": 16572 }, { "epoch": 2.1573604060913705, "grad_norm": 2.4870200157165527, "learning_rate": 1.8996979294827355e-06, "loss": 0.3377, "step": 16575 }, { "epoch": 2.1577508785630615, "grad_norm": 2.9998185634613037, "learning_rate": 1.89805860481938e-06, "loss": 0.3986, "step": 16578 }, { "epoch": 2.158141351034752, "grad_norm": 2.575596809387207, "learning_rate": 1.8964198220586599e-06, "loss": 0.3309, "step": 16581 }, { "epoch": 2.1585318235064426, "grad_norm": 3.0732362270355225, "learning_rate": 1.894781581486867e-06, "loss": 0.3105, "step": 16584 }, { "epoch": 2.1589222959781336, "grad_norm": 2.591250419616699, "learning_rate": 1.8931438833902005e-06, "loss": 0.3741, "step": 16587 }, { "epoch": 2.159312768449824, "grad_norm": 2.6407053470611572, "learning_rate": 1.8915067280547622e-06, "loss": 0.3275, "step": 16590 }, { "epoch": 2.159703240921515, "grad_norm": 3.031137704849243, "learning_rate": 1.8898701157665572e-06, "loss": 0.36, "step": 16593 }, { "epoch": 2.1600937133932057, "grad_norm": 2.810857057571411, "learning_rate": 1.8882340468115002e-06, "loss": 0.3636, "step": 16596 }, { "epoch": 2.1604841858648967, "grad_norm": 3.037799119949341, "learning_rate": 1.8865985214754107e-06, "loss": 0.3521, "step": 16599 }, { "epoch": 2.160874658336587, "grad_norm": 3.4607412815093994, "learning_rate": 1.8849635400440098e-06, "loss": 0.3723, "step": 16602 }, { "epoch": 2.161265130808278, "grad_norm": 2.7808799743652344, "learning_rate": 1.8833291028029239e-06, "loss": 0.3518, "step": 16605 }, { "epoch": 2.1616556032799688, "grad_norm": 2.7576420307159424, "learning_rate": 1.881695210037689e-06, "loss": 0.3491, "step": 16608 }, { "epoch": 2.1620460757516593, "grad_norm": 2.637984037399292, "learning_rate": 1.8800618620337407e-06, "loss": 0.3452, "step": 16611 }, { "epoch": 2.1624365482233503, "grad_norm": 2.8887691497802734, "learning_rate": 1.8784290590764199e-06, "loss": 0.3166, "step": 16614 }, { "epoch": 2.162827020695041, "grad_norm": 2.620800495147705, "learning_rate": 1.8767968014509774e-06, "loss": 0.3097, "step": 16617 }, { "epoch": 2.163217493166732, "grad_norm": 2.542872190475464, "learning_rate": 1.8751650894425639e-06, "loss": 0.326, "step": 16620 }, { "epoch": 2.1636079656384224, "grad_norm": 3.511993169784546, "learning_rate": 1.8735339233362355e-06, "loss": 0.39, "step": 16623 }, { "epoch": 2.1639984381101134, "grad_norm": 2.7915451526641846, "learning_rate": 1.8719033034169514e-06, "loss": 0.4285, "step": 16626 }, { "epoch": 2.164388910581804, "grad_norm": 2.7394583225250244, "learning_rate": 1.8702732299695813e-06, "loss": 0.3401, "step": 16629 }, { "epoch": 2.164779383053495, "grad_norm": 2.7059719562530518, "learning_rate": 1.8686437032788945e-06, "loss": 0.3685, "step": 16632 }, { "epoch": 2.1651698555251855, "grad_norm": 2.651163339614868, "learning_rate": 1.8670147236295632e-06, "loss": 0.3437, "step": 16635 }, { "epoch": 2.165560327996876, "grad_norm": 2.559772253036499, "learning_rate": 1.8653862913061693e-06, "loss": 0.2954, "step": 16638 }, { "epoch": 2.165950800468567, "grad_norm": 2.9166457653045654, "learning_rate": 1.8637584065931974e-06, "loss": 0.2884, "step": 16641 }, { "epoch": 2.1663412729402576, "grad_norm": 2.951847791671753, "learning_rate": 1.862131069775034e-06, "loss": 0.4385, "step": 16644 }, { "epoch": 2.1667317454119486, "grad_norm": 2.7294256687164307, "learning_rate": 1.8605042811359702e-06, "loss": 0.3622, "step": 16647 }, { "epoch": 2.167122217883639, "grad_norm": 2.931398630142212, "learning_rate": 1.8588780409602053e-06, "loss": 0.3915, "step": 16650 }, { "epoch": 2.16751269035533, "grad_norm": 3.169494152069092, "learning_rate": 1.8572523495318389e-06, "loss": 0.3643, "step": 16653 }, { "epoch": 2.1679031628270207, "grad_norm": 3.00234055519104, "learning_rate": 1.855627207134874e-06, "loss": 0.3221, "step": 16656 }, { "epoch": 2.1682936352987117, "grad_norm": 2.8568825721740723, "learning_rate": 1.854002614053223e-06, "loss": 0.4029, "step": 16659 }, { "epoch": 2.168684107770402, "grad_norm": 2.6517200469970703, "learning_rate": 1.852378570570697e-06, "loss": 0.3926, "step": 16662 }, { "epoch": 2.1690745802420928, "grad_norm": 2.9220314025878906, "learning_rate": 1.8507550769710115e-06, "loss": 0.3665, "step": 16665 }, { "epoch": 2.1694650527137838, "grad_norm": 2.624634265899658, "learning_rate": 1.849132133537791e-06, "loss": 0.3212, "step": 16668 }, { "epoch": 2.1698555251854743, "grad_norm": 2.7517733573913574, "learning_rate": 1.8475097405545578e-06, "loss": 0.3628, "step": 16671 }, { "epoch": 2.1702459976571653, "grad_norm": 2.7567718029022217, "learning_rate": 1.8458878983047412e-06, "loss": 0.3733, "step": 16674 }, { "epoch": 2.170636470128856, "grad_norm": 2.902865171432495, "learning_rate": 1.8442666070716719e-06, "loss": 0.3259, "step": 16677 }, { "epoch": 2.171026942600547, "grad_norm": 3.230361223220825, "learning_rate": 1.842645867138587e-06, "loss": 0.3797, "step": 16680 }, { "epoch": 2.1714174150722374, "grad_norm": 3.068331003189087, "learning_rate": 1.8410256787886298e-06, "loss": 0.4157, "step": 16683 }, { "epoch": 2.171807887543928, "grad_norm": 3.102295160293579, "learning_rate": 1.8394060423048404e-06, "loss": 0.3912, "step": 16686 }, { "epoch": 2.172198360015619, "grad_norm": 2.4631145000457764, "learning_rate": 1.8377869579701647e-06, "loss": 0.3233, "step": 16689 }, { "epoch": 2.1725888324873095, "grad_norm": 3.171217679977417, "learning_rate": 1.8361684260674572e-06, "loss": 0.3941, "step": 16692 }, { "epoch": 2.1729793049590005, "grad_norm": 3.546006679534912, "learning_rate": 1.8345504468794694e-06, "loss": 0.3908, "step": 16695 }, { "epoch": 2.173369777430691, "grad_norm": 2.745622158050537, "learning_rate": 1.832933020688858e-06, "loss": 0.3229, "step": 16698 }, { "epoch": 2.173760249902382, "grad_norm": 2.7853684425354004, "learning_rate": 1.8313161477781871e-06, "loss": 0.3425, "step": 16701 }, { "epoch": 2.1741507223740726, "grad_norm": 2.7178430557250977, "learning_rate": 1.8296998284299195e-06, "loss": 0.4037, "step": 16704 }, { "epoch": 2.1745411948457636, "grad_norm": 2.7298033237457275, "learning_rate": 1.8280840629264202e-06, "loss": 0.3505, "step": 16707 }, { "epoch": 2.174931667317454, "grad_norm": 2.750066041946411, "learning_rate": 1.8264688515499645e-06, "loss": 0.3359, "step": 16710 }, { "epoch": 2.1753221397891447, "grad_norm": 3.090998888015747, "learning_rate": 1.824854194582724e-06, "loss": 0.3278, "step": 16713 }, { "epoch": 2.1757126122608357, "grad_norm": 2.6128833293914795, "learning_rate": 1.823240092306775e-06, "loss": 0.3463, "step": 16716 }, { "epoch": 2.1761030847325262, "grad_norm": 2.647390604019165, "learning_rate": 1.8216265450041004e-06, "loss": 0.3686, "step": 16719 }, { "epoch": 2.176493557204217, "grad_norm": 2.6248843669891357, "learning_rate": 1.8200135529565805e-06, "loss": 0.3933, "step": 16722 }, { "epoch": 2.1768840296759078, "grad_norm": 2.8739254474639893, "learning_rate": 1.8184011164460046e-06, "loss": 0.394, "step": 16725 }, { "epoch": 2.1772745021475988, "grad_norm": 2.8517467975616455, "learning_rate": 1.816789235754059e-06, "loss": 0.3486, "step": 16728 }, { "epoch": 2.1776649746192893, "grad_norm": 2.5461232662200928, "learning_rate": 1.8151779111623392e-06, "loss": 0.3518, "step": 16731 }, { "epoch": 2.17805544709098, "grad_norm": 2.520355463027954, "learning_rate": 1.8135671429523377e-06, "loss": 0.3742, "step": 16734 }, { "epoch": 2.178445919562671, "grad_norm": 2.9696972370147705, "learning_rate": 1.811956931405454e-06, "loss": 0.3798, "step": 16737 }, { "epoch": 2.1788363920343614, "grad_norm": 2.75524640083313, "learning_rate": 1.8103472768029856e-06, "loss": 0.329, "step": 16740 }, { "epoch": 2.1792268645060524, "grad_norm": 2.857969284057617, "learning_rate": 1.8087381794261394e-06, "loss": 0.3514, "step": 16743 }, { "epoch": 2.179617336977743, "grad_norm": 3.1860451698303223, "learning_rate": 1.80712963955602e-06, "loss": 0.3085, "step": 16746 }, { "epoch": 2.180007809449434, "grad_norm": 3.36942458152771, "learning_rate": 1.8055216574736346e-06, "loss": 0.3323, "step": 16749 }, { "epoch": 2.1803982819211245, "grad_norm": 3.2723255157470703, "learning_rate": 1.8039142334598964e-06, "loss": 0.4076, "step": 16752 }, { "epoch": 2.1807887543928155, "grad_norm": 2.8995213508605957, "learning_rate": 1.8023073677956183e-06, "loss": 0.3193, "step": 16755 }, { "epoch": 2.181179226864506, "grad_norm": 3.000784397125244, "learning_rate": 1.8007010607615144e-06, "loss": 0.4389, "step": 16758 }, { "epoch": 2.1815696993361966, "grad_norm": 2.861037492752075, "learning_rate": 1.7990953126382065e-06, "loss": 0.3881, "step": 16761 }, { "epoch": 2.1819601718078876, "grad_norm": 2.6603360176086426, "learning_rate": 1.797490123706212e-06, "loss": 0.3744, "step": 16764 }, { "epoch": 2.182350644279578, "grad_norm": 2.7340359687805176, "learning_rate": 1.795885494245958e-06, "loss": 0.3644, "step": 16767 }, { "epoch": 2.182741116751269, "grad_norm": 2.9198856353759766, "learning_rate": 1.7942814245377654e-06, "loss": 0.3499, "step": 16770 }, { "epoch": 2.1831315892229597, "grad_norm": 3.0503599643707275, "learning_rate": 1.7926779148618661e-06, "loss": 0.3224, "step": 16773 }, { "epoch": 2.1835220616946507, "grad_norm": 2.8313817977905273, "learning_rate": 1.7910749654983879e-06, "loss": 0.3579, "step": 16776 }, { "epoch": 2.1839125341663412, "grad_norm": 2.788543224334717, "learning_rate": 1.7894725767273601e-06, "loss": 0.3443, "step": 16779 }, { "epoch": 2.1843030066380322, "grad_norm": 2.825108289718628, "learning_rate": 1.7878707488287216e-06, "loss": 0.3615, "step": 16782 }, { "epoch": 2.184693479109723, "grad_norm": 2.4772067070007324, "learning_rate": 1.7862694820823062e-06, "loss": 0.3303, "step": 16785 }, { "epoch": 2.1850839515814133, "grad_norm": 2.8775320053100586, "learning_rate": 1.784668776767851e-06, "loss": 0.3626, "step": 16788 }, { "epoch": 2.1854744240531043, "grad_norm": 2.8523988723754883, "learning_rate": 1.783068633164995e-06, "loss": 0.2922, "step": 16791 }, { "epoch": 2.185864896524795, "grad_norm": 2.919550657272339, "learning_rate": 1.7814690515532828e-06, "loss": 0.3725, "step": 16794 }, { "epoch": 2.186255368996486, "grad_norm": 2.9476890563964844, "learning_rate": 1.779870032212157e-06, "loss": 0.3546, "step": 16797 }, { "epoch": 2.1866458414681764, "grad_norm": 2.7363481521606445, "learning_rate": 1.7782715754209607e-06, "loss": 0.4004, "step": 16800 }, { "epoch": 2.1870363139398674, "grad_norm": 2.9156899452209473, "learning_rate": 1.776673681458944e-06, "loss": 0.3733, "step": 16803 }, { "epoch": 2.187426786411558, "grad_norm": 2.7484629154205322, "learning_rate": 1.7750763506052526e-06, "loss": 0.3773, "step": 16806 }, { "epoch": 2.187817258883249, "grad_norm": 2.7555909156799316, "learning_rate": 1.77347958313894e-06, "loss": 0.3394, "step": 16809 }, { "epoch": 2.1882077313549395, "grad_norm": 2.8585703372955322, "learning_rate": 1.7718833793389556e-06, "loss": 0.349, "step": 16812 }, { "epoch": 2.18859820382663, "grad_norm": 2.7602007389068604, "learning_rate": 1.770287739484155e-06, "loss": 0.3731, "step": 16815 }, { "epoch": 2.188988676298321, "grad_norm": 2.4553885459899902, "learning_rate": 1.768692663853292e-06, "loss": 0.3372, "step": 16818 }, { "epoch": 2.1893791487700116, "grad_norm": 2.777724504470825, "learning_rate": 1.7670981527250213e-06, "loss": 0.4093, "step": 16821 }, { "epoch": 2.1897696212417026, "grad_norm": 2.8050687313079834, "learning_rate": 1.7655042063779043e-06, "loss": 0.3752, "step": 16824 }, { "epoch": 2.190160093713393, "grad_norm": 3.285545587539673, "learning_rate": 1.7639108250903974e-06, "loss": 0.4672, "step": 16827 }, { "epoch": 2.190550566185084, "grad_norm": 3.2588181495666504, "learning_rate": 1.762318009140862e-06, "loss": 0.4154, "step": 16830 }, { "epoch": 2.1909410386567747, "grad_norm": 2.734658718109131, "learning_rate": 1.7607257588075582e-06, "loss": 0.4109, "step": 16833 }, { "epoch": 2.1913315111284652, "grad_norm": 2.7743442058563232, "learning_rate": 1.7591340743686507e-06, "loss": 0.3764, "step": 16836 }, { "epoch": 2.1917219836001562, "grad_norm": 2.652029514312744, "learning_rate": 1.7575429561022029e-06, "loss": 0.4011, "step": 16839 }, { "epoch": 2.192112456071847, "grad_norm": 2.7200498580932617, "learning_rate": 1.755952404286178e-06, "loss": 0.3842, "step": 16842 }, { "epoch": 2.192502928543538, "grad_norm": 2.506895065307617, "learning_rate": 1.7543624191984455e-06, "loss": 0.3566, "step": 16845 }, { "epoch": 2.1928934010152283, "grad_norm": 3.026963233947754, "learning_rate": 1.7527730011167681e-06, "loss": 0.3718, "step": 16848 }, { "epoch": 2.1932838734869193, "grad_norm": 2.6555018424987793, "learning_rate": 1.7511841503188187e-06, "loss": 0.3382, "step": 16851 }, { "epoch": 2.19367434595861, "grad_norm": 2.6906399726867676, "learning_rate": 1.7495958670821617e-06, "loss": 0.3304, "step": 16854 }, { "epoch": 2.1940648184303004, "grad_norm": 3.381065607070923, "learning_rate": 1.7480081516842705e-06, "loss": 0.3892, "step": 16857 }, { "epoch": 2.1944552909019914, "grad_norm": 2.733503580093384, "learning_rate": 1.7464210044025144e-06, "loss": 0.3378, "step": 16860 }, { "epoch": 2.194845763373682, "grad_norm": 2.755075693130493, "learning_rate": 1.744834425514162e-06, "loss": 0.3439, "step": 16863 }, { "epoch": 2.195236235845373, "grad_norm": 2.853973388671875, "learning_rate": 1.7432484152963896e-06, "loss": 0.3667, "step": 16866 }, { "epoch": 2.1956267083170635, "grad_norm": 2.4795007705688477, "learning_rate": 1.7416629740262681e-06, "loss": 0.3341, "step": 16869 }, { "epoch": 2.1960171807887545, "grad_norm": 2.5564217567443848, "learning_rate": 1.7400781019807684e-06, "loss": 0.3706, "step": 16872 }, { "epoch": 2.196407653260445, "grad_norm": 2.830836057662964, "learning_rate": 1.738493799436768e-06, "loss": 0.354, "step": 16875 }, { "epoch": 2.196798125732136, "grad_norm": 2.753403425216675, "learning_rate": 1.7369100666710398e-06, "loss": 0.3636, "step": 16878 }, { "epoch": 2.1971885982038266, "grad_norm": 2.559079170227051, "learning_rate": 1.7353269039602588e-06, "loss": 0.3772, "step": 16881 }, { "epoch": 2.197579070675517, "grad_norm": 2.9409003257751465, "learning_rate": 1.7337443115809976e-06, "loss": 0.3509, "step": 16884 }, { "epoch": 2.197969543147208, "grad_norm": 2.7231342792510986, "learning_rate": 1.7321622898097362e-06, "loss": 0.4125, "step": 16887 }, { "epoch": 2.1983600156188987, "grad_norm": 2.7584779262542725, "learning_rate": 1.7305808389228462e-06, "loss": 0.3734, "step": 16890 }, { "epoch": 2.1987504880905897, "grad_norm": 2.9606661796569824, "learning_rate": 1.7289999591966072e-06, "loss": 0.3863, "step": 16893 }, { "epoch": 2.1991409605622803, "grad_norm": 2.588604688644409, "learning_rate": 1.7274196509071927e-06, "loss": 0.3034, "step": 16896 }, { "epoch": 2.1995314330339713, "grad_norm": 2.7741434574127197, "learning_rate": 1.7258399143306825e-06, "loss": 0.3722, "step": 16899 }, { "epoch": 2.199921905505662, "grad_norm": 2.741807699203491, "learning_rate": 1.7242607497430514e-06, "loss": 0.3266, "step": 16902 }, { "epoch": 2.200312377977353, "grad_norm": 2.6828136444091797, "learning_rate": 1.7226821574201747e-06, "loss": 0.4204, "step": 16905 }, { "epoch": 2.2007028504490433, "grad_norm": 2.492523431777954, "learning_rate": 1.721104137637832e-06, "loss": 0.3388, "step": 16908 }, { "epoch": 2.201093322920734, "grad_norm": 2.79616117477417, "learning_rate": 1.7195266906716985e-06, "loss": 0.3348, "step": 16911 }, { "epoch": 2.201483795392425, "grad_norm": 2.682907819747925, "learning_rate": 1.7179498167973496e-06, "loss": 0.3421, "step": 16914 }, { "epoch": 2.2018742678641154, "grad_norm": 3.588071823120117, "learning_rate": 1.7163735162902651e-06, "loss": 0.374, "step": 16917 }, { "epoch": 2.2022647403358064, "grad_norm": 3.160140037536621, "learning_rate": 1.7147977894258193e-06, "loss": 0.4139, "step": 16920 }, { "epoch": 2.202655212807497, "grad_norm": 2.9492440223693848, "learning_rate": 1.713222636479287e-06, "loss": 0.4267, "step": 16923 }, { "epoch": 2.203045685279188, "grad_norm": 2.716421365737915, "learning_rate": 1.7116480577258477e-06, "loss": 0.3267, "step": 16926 }, { "epoch": 2.2034361577508785, "grad_norm": 3.1146531105041504, "learning_rate": 1.7100740534405746e-06, "loss": 0.3581, "step": 16929 }, { "epoch": 2.2038266302225695, "grad_norm": 3.101052761077881, "learning_rate": 1.708500623898442e-06, "loss": 0.3646, "step": 16932 }, { "epoch": 2.20421710269426, "grad_norm": 2.7972285747528076, "learning_rate": 1.7069277693743258e-06, "loss": 0.3556, "step": 16935 }, { "epoch": 2.2046075751659506, "grad_norm": 3.1059136390686035, "learning_rate": 1.705355490143003e-06, "loss": 0.3617, "step": 16938 }, { "epoch": 2.2049980476376416, "grad_norm": 2.703691244125366, "learning_rate": 1.7037837864791445e-06, "loss": 0.3557, "step": 16941 }, { "epoch": 2.205388520109332, "grad_norm": 2.6937639713287354, "learning_rate": 1.7022126586573246e-06, "loss": 0.3241, "step": 16944 }, { "epoch": 2.205778992581023, "grad_norm": 3.200873613357544, "learning_rate": 1.7006421069520141e-06, "loss": 0.3686, "step": 16947 }, { "epoch": 2.2061694650527137, "grad_norm": 2.610318183898926, "learning_rate": 1.699072131637588e-06, "loss": 0.339, "step": 16950 }, { "epoch": 2.2065599375244047, "grad_norm": 3.0442748069763184, "learning_rate": 1.6975027329883166e-06, "loss": 0.4444, "step": 16953 }, { "epoch": 2.2069504099960953, "grad_norm": 2.716014862060547, "learning_rate": 1.6959339112783685e-06, "loss": 0.3636, "step": 16956 }, { "epoch": 2.207340882467786, "grad_norm": 2.61544132232666, "learning_rate": 1.694365666781817e-06, "loss": 0.3378, "step": 16959 }, { "epoch": 2.207731354939477, "grad_norm": 2.998279094696045, "learning_rate": 1.6927979997726295e-06, "loss": 0.2909, "step": 16962 }, { "epoch": 2.2081218274111674, "grad_norm": 2.755953311920166, "learning_rate": 1.6912309105246726e-06, "loss": 0.3346, "step": 16965 }, { "epoch": 2.2085122998828584, "grad_norm": 2.780045986175537, "learning_rate": 1.6896643993117168e-06, "loss": 0.3338, "step": 16968 }, { "epoch": 2.208902772354549, "grad_norm": 2.9842076301574707, "learning_rate": 1.6880984664074262e-06, "loss": 0.3819, "step": 16971 }, { "epoch": 2.20929324482624, "grad_norm": 2.786750078201294, "learning_rate": 1.6865331120853645e-06, "loss": 0.3506, "step": 16974 }, { "epoch": 2.2096837172979304, "grad_norm": 2.8136911392211914, "learning_rate": 1.6849683366189978e-06, "loss": 0.3695, "step": 16977 }, { "epoch": 2.2100741897696214, "grad_norm": 2.6133673191070557, "learning_rate": 1.6834041402816908e-06, "loss": 0.3035, "step": 16980 }, { "epoch": 2.210464662241312, "grad_norm": 2.6975417137145996, "learning_rate": 1.6818405233467034e-06, "loss": 0.314, "step": 16983 }, { "epoch": 2.2108551347130025, "grad_norm": 2.7998573780059814, "learning_rate": 1.6802774860871939e-06, "loss": 0.3707, "step": 16986 }, { "epoch": 2.2112456071846935, "grad_norm": 2.9110021591186523, "learning_rate": 1.678715028776226e-06, "loss": 0.4747, "step": 16989 }, { "epoch": 2.211636079656384, "grad_norm": 2.649843215942383, "learning_rate": 1.6771531516867557e-06, "loss": 0.3751, "step": 16992 }, { "epoch": 2.212026552128075, "grad_norm": 3.525862693786621, "learning_rate": 1.6755918550916395e-06, "loss": 0.3078, "step": 16995 }, { "epoch": 2.2124170245997656, "grad_norm": 2.670433759689331, "learning_rate": 1.6740311392636311e-06, "loss": 0.3935, "step": 16998 }, { "epoch": 2.2128074970714566, "grad_norm": 2.793560266494751, "learning_rate": 1.6724710044753872e-06, "loss": 0.3365, "step": 17001 }, { "epoch": 2.213197969543147, "grad_norm": 3.4935295581817627, "learning_rate": 1.6709114509994588e-06, "loss": 0.3708, "step": 17004 }, { "epoch": 2.2135884420148377, "grad_norm": 2.874403238296509, "learning_rate": 1.6693524791082948e-06, "loss": 0.3428, "step": 17007 }, { "epoch": 2.2139789144865287, "grad_norm": 2.7574665546417236, "learning_rate": 1.6677940890742484e-06, "loss": 0.3819, "step": 17010 }, { "epoch": 2.2143693869582193, "grad_norm": 2.5127577781677246, "learning_rate": 1.6662362811695637e-06, "loss": 0.3112, "step": 17013 }, { "epoch": 2.2147598594299103, "grad_norm": 2.6434106826782227, "learning_rate": 1.6646790556663867e-06, "loss": 0.3468, "step": 17016 }, { "epoch": 2.215150331901601, "grad_norm": 2.921630859375, "learning_rate": 1.6631224128367612e-06, "loss": 0.3818, "step": 17019 }, { "epoch": 2.215540804373292, "grad_norm": 3.2600367069244385, "learning_rate": 1.6615663529526328e-06, "loss": 0.407, "step": 17022 }, { "epoch": 2.2159312768449824, "grad_norm": 2.5993316173553467, "learning_rate": 1.6600108762858392e-06, "loss": 0.3508, "step": 17025 }, { "epoch": 2.2163217493166734, "grad_norm": 3.402862310409546, "learning_rate": 1.6584559831081176e-06, "loss": 0.3205, "step": 17028 }, { "epoch": 2.216712221788364, "grad_norm": 2.991856813430786, "learning_rate": 1.6569016736911082e-06, "loss": 0.3868, "step": 17031 }, { "epoch": 2.2171026942600545, "grad_norm": 2.9160122871398926, "learning_rate": 1.6553479483063434e-06, "loss": 0.3622, "step": 17034 }, { "epoch": 2.2174931667317455, "grad_norm": 2.687974691390991, "learning_rate": 1.653794807225254e-06, "loss": 0.3137, "step": 17037 }, { "epoch": 2.217883639203436, "grad_norm": 2.7909791469573975, "learning_rate": 1.6522422507191744e-06, "loss": 0.3004, "step": 17040 }, { "epoch": 2.218274111675127, "grad_norm": 2.7353012561798096, "learning_rate": 1.6506902790593303e-06, "loss": 0.3487, "step": 17043 }, { "epoch": 2.2186645841468176, "grad_norm": 2.6116976737976074, "learning_rate": 1.649138892516849e-06, "loss": 0.3245, "step": 17046 }, { "epoch": 2.2190550566185085, "grad_norm": 2.7857844829559326, "learning_rate": 1.6475880913627522e-06, "loss": 0.3987, "step": 17049 }, { "epoch": 2.219445529090199, "grad_norm": 2.628438711166382, "learning_rate": 1.646037875867965e-06, "loss": 0.3248, "step": 17052 }, { "epoch": 2.21983600156189, "grad_norm": 2.738862991333008, "learning_rate": 1.6444882463033058e-06, "loss": 0.3295, "step": 17055 }, { "epoch": 2.2202264740335806, "grad_norm": 2.46667742729187, "learning_rate": 1.6429392029394886e-06, "loss": 0.3429, "step": 17058 }, { "epoch": 2.220616946505271, "grad_norm": 2.5244455337524414, "learning_rate": 1.6413907460471306e-06, "loss": 0.3478, "step": 17061 }, { "epoch": 2.221007418976962, "grad_norm": 2.5417511463165283, "learning_rate": 1.6398428758967455e-06, "loss": 0.3285, "step": 17064 }, { "epoch": 2.2213978914486527, "grad_norm": 2.653949499130249, "learning_rate": 1.6382955927587414e-06, "loss": 0.3263, "step": 17067 }, { "epoch": 2.2217883639203437, "grad_norm": 2.884857416152954, "learning_rate": 1.6367488969034234e-06, "loss": 0.3631, "step": 17070 }, { "epoch": 2.2221788363920343, "grad_norm": 2.860215663909912, "learning_rate": 1.6352027886009997e-06, "loss": 0.3883, "step": 17073 }, { "epoch": 2.2225693088637253, "grad_norm": 2.657209873199463, "learning_rate": 1.6336572681215701e-06, "loss": 0.3437, "step": 17076 }, { "epoch": 2.222959781335416, "grad_norm": 2.648395538330078, "learning_rate": 1.6321123357351327e-06, "loss": 0.3321, "step": 17079 }, { "epoch": 2.223350253807107, "grad_norm": 2.7004330158233643, "learning_rate": 1.6305679917115864e-06, "loss": 0.3254, "step": 17082 }, { "epoch": 2.2237407262787974, "grad_norm": 2.858809232711792, "learning_rate": 1.6290242363207238e-06, "loss": 0.3445, "step": 17085 }, { "epoch": 2.224131198750488, "grad_norm": 2.9426462650299072, "learning_rate": 1.6274810698322341e-06, "loss": 0.3974, "step": 17088 }, { "epoch": 2.224521671222179, "grad_norm": 2.777961015701294, "learning_rate": 1.6259384925157084e-06, "loss": 0.4052, "step": 17091 }, { "epoch": 2.2249121436938695, "grad_norm": 2.6763064861297607, "learning_rate": 1.6243965046406302e-06, "loss": 0.3834, "step": 17094 }, { "epoch": 2.2253026161655605, "grad_norm": 2.739316463470459, "learning_rate": 1.6228551064763814e-06, "loss": 0.3819, "step": 17097 }, { "epoch": 2.225693088637251, "grad_norm": 2.7301228046417236, "learning_rate": 1.6213142982922376e-06, "loss": 0.2822, "step": 17100 }, { "epoch": 2.226083561108942, "grad_norm": 3.0507218837738037, "learning_rate": 1.6197740803573813e-06, "loss": 0.3252, "step": 17103 }, { "epoch": 2.2264740335806326, "grad_norm": 2.8858330249786377, "learning_rate": 1.6182344529408828e-06, "loss": 0.3756, "step": 17106 }, { "epoch": 2.226864506052323, "grad_norm": 2.5173003673553467, "learning_rate": 1.6166954163117105e-06, "loss": 0.3249, "step": 17109 }, { "epoch": 2.227254978524014, "grad_norm": 2.6058590412139893, "learning_rate": 1.6151569707387305e-06, "loss": 0.3337, "step": 17112 }, { "epoch": 2.2276454509957047, "grad_norm": 3.0819878578186035, "learning_rate": 1.6136191164907084e-06, "loss": 0.4114, "step": 17115 }, { "epoch": 2.2280359234673957, "grad_norm": 3.0594425201416016, "learning_rate": 1.6120818538363026e-06, "loss": 0.4065, "step": 17118 }, { "epoch": 2.228426395939086, "grad_norm": 3.019721746444702, "learning_rate": 1.6105451830440683e-06, "loss": 0.333, "step": 17121 }, { "epoch": 2.228816868410777, "grad_norm": 2.7887985706329346, "learning_rate": 1.6090091043824618e-06, "loss": 0.37, "step": 17124 }, { "epoch": 2.2292073408824677, "grad_norm": 2.4664344787597656, "learning_rate": 1.6074736181198309e-06, "loss": 0.3093, "step": 17127 }, { "epoch": 2.2295978133541583, "grad_norm": 3.16357684135437, "learning_rate": 1.6059387245244208e-06, "loss": 0.4148, "step": 17130 }, { "epoch": 2.2299882858258493, "grad_norm": 2.7439839839935303, "learning_rate": 1.604404423864377e-06, "loss": 0.3784, "step": 17133 }, { "epoch": 2.23037875829754, "grad_norm": 3.130523443222046, "learning_rate": 1.6028707164077367e-06, "loss": 0.4011, "step": 17136 }, { "epoch": 2.230769230769231, "grad_norm": 2.8755338191986084, "learning_rate": 1.6013376024224363e-06, "loss": 0.3792, "step": 17139 }, { "epoch": 2.2311597032409214, "grad_norm": 2.6047468185424805, "learning_rate": 1.5998050821763033e-06, "loss": 0.3807, "step": 17142 }, { "epoch": 2.2315501757126124, "grad_norm": 2.492357015609741, "learning_rate": 1.598273155937073e-06, "loss": 0.3403, "step": 17145 }, { "epoch": 2.231940648184303, "grad_norm": 3.0767757892608643, "learning_rate": 1.5967418239723664e-06, "loss": 0.3441, "step": 17148 }, { "epoch": 2.232331120655994, "grad_norm": 2.8667490482330322, "learning_rate": 1.5952110865497017e-06, "loss": 0.3956, "step": 17151 }, { "epoch": 2.2327215931276845, "grad_norm": 2.9534928798675537, "learning_rate": 1.5936809439364992e-06, "loss": 0.3098, "step": 17154 }, { "epoch": 2.233112065599375, "grad_norm": 2.540276288986206, "learning_rate": 1.59215139640007e-06, "loss": 0.3845, "step": 17157 }, { "epoch": 2.233502538071066, "grad_norm": 2.741945743560791, "learning_rate": 1.590622444207623e-06, "loss": 0.3631, "step": 17160 }, { "epoch": 2.2338930105427566, "grad_norm": 2.531273603439331, "learning_rate": 1.5890940876262612e-06, "loss": 0.3323, "step": 17163 }, { "epoch": 2.2342834830144476, "grad_norm": 3.036818027496338, "learning_rate": 1.587566326922988e-06, "loss": 0.3745, "step": 17166 }, { "epoch": 2.234673955486138, "grad_norm": 2.696261167526245, "learning_rate": 1.5860391623646986e-06, "loss": 0.3351, "step": 17169 }, { "epoch": 2.235064427957829, "grad_norm": 2.7615208625793457, "learning_rate": 1.5845125942181844e-06, "loss": 0.3886, "step": 17172 }, { "epoch": 2.2354549004295197, "grad_norm": 3.1380362510681152, "learning_rate": 1.5829866227501367e-06, "loss": 0.3734, "step": 17175 }, { "epoch": 2.2358453729012107, "grad_norm": 2.6224167346954346, "learning_rate": 1.5814612482271368e-06, "loss": 0.3422, "step": 17178 }, { "epoch": 2.236235845372901, "grad_norm": 2.8184878826141357, "learning_rate": 1.5799364709156645e-06, "loss": 0.3194, "step": 17181 }, { "epoch": 2.2366263178445918, "grad_norm": 2.749946355819702, "learning_rate": 1.5784122910820976e-06, "loss": 0.3587, "step": 17184 }, { "epoch": 2.2370167903162828, "grad_norm": 3.1600887775421143, "learning_rate": 1.5768887089927031e-06, "loss": 0.3815, "step": 17187 }, { "epoch": 2.2374072627879733, "grad_norm": 2.7639830112457275, "learning_rate": 1.5753657249136523e-06, "loss": 0.3757, "step": 17190 }, { "epoch": 2.2377977352596643, "grad_norm": 2.792595624923706, "learning_rate": 1.573843339111003e-06, "loss": 0.3124, "step": 17193 }, { "epoch": 2.238188207731355, "grad_norm": 3.1691551208496094, "learning_rate": 1.5723215518507168e-06, "loss": 0.3515, "step": 17196 }, { "epoch": 2.238578680203046, "grad_norm": 2.5455732345581055, "learning_rate": 1.570800363398644e-06, "loss": 0.3376, "step": 17199 }, { "epoch": 2.2389691526747364, "grad_norm": 2.700528383255005, "learning_rate": 1.5692797740205345e-06, "loss": 0.3563, "step": 17202 }, { "epoch": 2.2393596251464274, "grad_norm": 3.191154718399048, "learning_rate": 1.5677597839820292e-06, "loss": 0.402, "step": 17205 }, { "epoch": 2.239750097618118, "grad_norm": 2.647813320159912, "learning_rate": 1.566240393548671e-06, "loss": 0.3639, "step": 17208 }, { "epoch": 2.2401405700898085, "grad_norm": 2.85390043258667, "learning_rate": 1.5647216029858924e-06, "loss": 0.3551, "step": 17211 }, { "epoch": 2.2405310425614995, "grad_norm": 2.7630879878997803, "learning_rate": 1.5632034125590212e-06, "loss": 0.3158, "step": 17214 }, { "epoch": 2.24092151503319, "grad_norm": 2.9064273834228516, "learning_rate": 1.5616858225332858e-06, "loss": 0.3789, "step": 17217 }, { "epoch": 2.241311987504881, "grad_norm": 2.9090540409088135, "learning_rate": 1.560168833173804e-06, "loss": 0.3402, "step": 17220 }, { "epoch": 2.2417024599765716, "grad_norm": 3.207202911376953, "learning_rate": 1.5586524447455892e-06, "loss": 0.4046, "step": 17223 }, { "epoch": 2.2420929324482626, "grad_norm": 2.5313596725463867, "learning_rate": 1.5571366575135544e-06, "loss": 0.3498, "step": 17226 }, { "epoch": 2.242483404919953, "grad_norm": 3.0708365440368652, "learning_rate": 1.555621471742501e-06, "loss": 0.3484, "step": 17229 }, { "epoch": 2.242873877391644, "grad_norm": 3.1129088401794434, "learning_rate": 1.5541068876971322e-06, "loss": 0.4346, "step": 17232 }, { "epoch": 2.2432643498633347, "grad_norm": 2.6781063079833984, "learning_rate": 1.552592905642039e-06, "loss": 0.3724, "step": 17235 }, { "epoch": 2.2436548223350252, "grad_norm": 3.136171817779541, "learning_rate": 1.5510795258417149e-06, "loss": 0.4596, "step": 17238 }, { "epoch": 2.244045294806716, "grad_norm": 2.706580877304077, "learning_rate": 1.5495667485605425e-06, "loss": 0.3578, "step": 17241 }, { "epoch": 2.2444357672784068, "grad_norm": 2.8641068935394287, "learning_rate": 1.5480545740627984e-06, "loss": 0.3521, "step": 17244 }, { "epoch": 2.2448262397500978, "grad_norm": 2.81913161277771, "learning_rate": 1.5465430026126605e-06, "loss": 0.385, "step": 17247 }, { "epoch": 2.2452167122217883, "grad_norm": 2.8231277465820312, "learning_rate": 1.5450320344741942e-06, "loss": 0.3675, "step": 17250 }, { "epoch": 2.2456071846934793, "grad_norm": 2.623539686203003, "learning_rate": 1.5435216699113641e-06, "loss": 0.3081, "step": 17253 }, { "epoch": 2.24599765716517, "grad_norm": 3.059145927429199, "learning_rate": 1.5420119091880247e-06, "loss": 0.4299, "step": 17256 }, { "epoch": 2.2463881296368604, "grad_norm": 2.7407338619232178, "learning_rate": 1.5405027525679323e-06, "loss": 0.329, "step": 17259 }, { "epoch": 2.2467786021085514, "grad_norm": 3.146299123764038, "learning_rate": 1.5389942003147313e-06, "loss": 0.3063, "step": 17262 }, { "epoch": 2.247169074580242, "grad_norm": 2.4751954078674316, "learning_rate": 1.5374862526919605e-06, "loss": 0.3154, "step": 17265 }, { "epoch": 2.247559547051933, "grad_norm": 2.857436180114746, "learning_rate": 1.5359789099630596e-06, "loss": 0.4555, "step": 17268 }, { "epoch": 2.2479500195236235, "grad_norm": 3.6831679344177246, "learning_rate": 1.5344721723913535e-06, "loss": 0.3972, "step": 17271 }, { "epoch": 2.2483404919953145, "grad_norm": 2.737274646759033, "learning_rate": 1.5329660402400703e-06, "loss": 0.3751, "step": 17274 }, { "epoch": 2.248730964467005, "grad_norm": 3.0916683673858643, "learning_rate": 1.5314605137723254e-06, "loss": 0.4125, "step": 17277 }, { "epoch": 2.2491214369386956, "grad_norm": 2.5740551948547363, "learning_rate": 1.529955593251133e-06, "loss": 0.3866, "step": 17280 }, { "epoch": 2.2495119094103866, "grad_norm": 2.7472755908966064, "learning_rate": 1.5284512789393984e-06, "loss": 0.3552, "step": 17283 }, { "epoch": 2.249902381882077, "grad_norm": 2.672215461730957, "learning_rate": 1.526947571099921e-06, "loss": 0.3339, "step": 17286 }, { "epoch": 2.250292854353768, "grad_norm": 2.8021116256713867, "learning_rate": 1.525444469995398e-06, "loss": 0.3135, "step": 17289 }, { "epoch": 2.2506833268254587, "grad_norm": 2.7901899814605713, "learning_rate": 1.5239419758884171e-06, "loss": 0.2812, "step": 17292 }, { "epoch": 2.2510737992971497, "grad_norm": 2.8856074810028076, "learning_rate": 1.5224400890414587e-06, "loss": 0.4185, "step": 17295 }, { "epoch": 2.2514642717688402, "grad_norm": 2.6049904823303223, "learning_rate": 1.5209388097169026e-06, "loss": 0.3618, "step": 17298 }, { "epoch": 2.2518547442405312, "grad_norm": 2.7991714477539062, "learning_rate": 1.5194381381770173e-06, "loss": 0.3726, "step": 17301 }, { "epoch": 2.2522452167122218, "grad_norm": 3.187734603881836, "learning_rate": 1.5179380746839678e-06, "loss": 0.4094, "step": 17304 }, { "epoch": 2.2526356891839123, "grad_norm": 2.6936240196228027, "learning_rate": 1.5164386194998094e-06, "loss": 0.3345, "step": 17307 }, { "epoch": 2.2530261616556033, "grad_norm": 2.7314226627349854, "learning_rate": 1.5149397728864979e-06, "loss": 0.3792, "step": 17310 }, { "epoch": 2.253416634127294, "grad_norm": 2.7893855571746826, "learning_rate": 1.5134415351058744e-06, "loss": 0.3403, "step": 17313 }, { "epoch": 2.253807106598985, "grad_norm": 2.6449899673461914, "learning_rate": 1.5119439064196823e-06, "loss": 0.3472, "step": 17316 }, { "epoch": 2.2541975790706754, "grad_norm": 3.2970783710479736, "learning_rate": 1.5104468870895495e-06, "loss": 0.4036, "step": 17319 }, { "epoch": 2.2545880515423664, "grad_norm": 3.0442137718200684, "learning_rate": 1.5089504773770069e-06, "loss": 0.324, "step": 17322 }, { "epoch": 2.254978524014057, "grad_norm": 2.8000681400299072, "learning_rate": 1.5074546775434718e-06, "loss": 0.3458, "step": 17325 }, { "epoch": 2.255368996485748, "grad_norm": 2.9774391651153564, "learning_rate": 1.5059594878502554e-06, "loss": 0.3931, "step": 17328 }, { "epoch": 2.2557594689574385, "grad_norm": 2.576894521713257, "learning_rate": 1.5044649085585678e-06, "loss": 0.3472, "step": 17331 }, { "epoch": 2.256149941429129, "grad_norm": 2.661025285720825, "learning_rate": 1.5029709399295066e-06, "loss": 0.3077, "step": 17334 }, { "epoch": 2.25654041390082, "grad_norm": 2.5842506885528564, "learning_rate": 1.5014775822240645e-06, "loss": 0.4093, "step": 17337 }, { "epoch": 2.2569308863725106, "grad_norm": 3.0119876861572266, "learning_rate": 1.4999848357031305e-06, "loss": 0.3561, "step": 17340 }, { "epoch": 2.2573213588442016, "grad_norm": 2.854466438293457, "learning_rate": 1.498492700627483e-06, "loss": 0.3519, "step": 17343 }, { "epoch": 2.257711831315892, "grad_norm": 3.8666045665740967, "learning_rate": 1.4970011772577925e-06, "loss": 0.3475, "step": 17346 }, { "epoch": 2.258102303787583, "grad_norm": 2.4900221824645996, "learning_rate": 1.495510265854629e-06, "loss": 0.3278, "step": 17349 }, { "epoch": 2.2584927762592737, "grad_norm": 2.904358386993408, "learning_rate": 1.4940199666784495e-06, "loss": 0.3085, "step": 17352 }, { "epoch": 2.2588832487309647, "grad_norm": 2.5294814109802246, "learning_rate": 1.4925302799896053e-06, "loss": 0.3618, "step": 17355 }, { "epoch": 2.2592737212026552, "grad_norm": 2.6248788833618164, "learning_rate": 1.491041206048342e-06, "loss": 0.355, "step": 17358 }, { "epoch": 2.259664193674346, "grad_norm": 2.460315465927124, "learning_rate": 1.4895527451147995e-06, "loss": 0.3503, "step": 17361 }, { "epoch": 2.260054666146037, "grad_norm": 2.8616855144500732, "learning_rate": 1.488064897449008e-06, "loss": 0.4144, "step": 17364 }, { "epoch": 2.2604451386177273, "grad_norm": 2.8192522525787354, "learning_rate": 1.4865776633108908e-06, "loss": 0.372, "step": 17367 }, { "epoch": 2.2608356110894183, "grad_norm": 2.8044068813323975, "learning_rate": 1.4850910429602633e-06, "loss": 0.3388, "step": 17370 }, { "epoch": 2.261226083561109, "grad_norm": 2.620159864425659, "learning_rate": 1.4836050366568378e-06, "loss": 0.356, "step": 17373 }, { "epoch": 2.2616165560328, "grad_norm": 2.9459471702575684, "learning_rate": 1.4821196446602148e-06, "loss": 0.4086, "step": 17376 }, { "epoch": 2.2620070285044904, "grad_norm": 2.8624160289764404, "learning_rate": 1.4806348672298875e-06, "loss": 0.4079, "step": 17379 }, { "epoch": 2.2623975009761814, "grad_norm": 2.7950611114501953, "learning_rate": 1.4791507046252474e-06, "loss": 0.3334, "step": 17382 }, { "epoch": 2.262787973447872, "grad_norm": 2.842926502227783, "learning_rate": 1.4776671571055723e-06, "loss": 0.3641, "step": 17385 }, { "epoch": 2.2631784459195625, "grad_norm": 2.3689680099487305, "learning_rate": 1.476184224930033e-06, "loss": 0.3775, "step": 17388 }, { "epoch": 2.2635689183912535, "grad_norm": 2.761807918548584, "learning_rate": 1.4747019083576986e-06, "loss": 0.3717, "step": 17391 }, { "epoch": 2.263959390862944, "grad_norm": 2.9553542137145996, "learning_rate": 1.4732202076475244e-06, "loss": 0.3061, "step": 17394 }, { "epoch": 2.264349863334635, "grad_norm": 2.7715632915496826, "learning_rate": 1.4717391230583595e-06, "loss": 0.3667, "step": 17397 }, { "epoch": 2.2647403358063256, "grad_norm": 2.5581488609313965, "learning_rate": 1.4702586548489467e-06, "loss": 0.3156, "step": 17400 }, { "epoch": 2.265130808278016, "grad_norm": 2.83203387260437, "learning_rate": 1.4687788032779233e-06, "loss": 0.3902, "step": 17403 }, { "epoch": 2.265521280749707, "grad_norm": 3.0823655128479004, "learning_rate": 1.4672995686038145e-06, "loss": 0.4572, "step": 17406 }, { "epoch": 2.2659117532213977, "grad_norm": 2.8829867839813232, "learning_rate": 1.4658209510850373e-06, "loss": 0.3649, "step": 17409 }, { "epoch": 2.2663022256930887, "grad_norm": 2.9928228855133057, "learning_rate": 1.4643429509799073e-06, "loss": 0.3896, "step": 17412 }, { "epoch": 2.2666926981647793, "grad_norm": 2.664480686187744, "learning_rate": 1.4628655685466258e-06, "loss": 0.3732, "step": 17415 }, { "epoch": 2.2670831706364702, "grad_norm": 2.658304452896118, "learning_rate": 1.4613888040432884e-06, "loss": 0.3118, "step": 17418 }, { "epoch": 2.267473643108161, "grad_norm": 2.6738052368164062, "learning_rate": 1.459912657727881e-06, "loss": 0.3854, "step": 17421 }, { "epoch": 2.267864115579852, "grad_norm": 3.1468684673309326, "learning_rate": 1.4584371298582866e-06, "loss": 0.3526, "step": 17424 }, { "epoch": 2.2682545880515423, "grad_norm": 2.8303110599517822, "learning_rate": 1.456962220692275e-06, "loss": 0.3028, "step": 17427 }, { "epoch": 2.268645060523233, "grad_norm": 2.5083484649658203, "learning_rate": 1.455487930487509e-06, "loss": 0.3114, "step": 17430 }, { "epoch": 2.269035532994924, "grad_norm": 2.9310407638549805, "learning_rate": 1.4540142595015461e-06, "loss": 0.3595, "step": 17433 }, { "epoch": 2.2694260054666144, "grad_norm": 3.0042102336883545, "learning_rate": 1.4525412079918327e-06, "loss": 0.3652, "step": 17436 }, { "epoch": 2.2698164779383054, "grad_norm": 2.961367130279541, "learning_rate": 1.451068776215706e-06, "loss": 0.4294, "step": 17439 }, { "epoch": 2.270206950409996, "grad_norm": 2.6684181690216064, "learning_rate": 1.449596964430398e-06, "loss": 0.3593, "step": 17442 }, { "epoch": 2.270597422881687, "grad_norm": 3.18241548538208, "learning_rate": 1.448125772893033e-06, "loss": 0.3256, "step": 17445 }, { "epoch": 2.2709878953533775, "grad_norm": 2.6874358654022217, "learning_rate": 1.4466552018606235e-06, "loss": 0.3528, "step": 17448 }, { "epoch": 2.2713783678250685, "grad_norm": 2.966919422149658, "learning_rate": 1.4451852515900733e-06, "loss": 0.4124, "step": 17451 }, { "epoch": 2.271768840296759, "grad_norm": 2.7124547958374023, "learning_rate": 1.443715922338183e-06, "loss": 0.3237, "step": 17454 }, { "epoch": 2.2721593127684496, "grad_norm": 2.423863172531128, "learning_rate": 1.44224721436164e-06, "loss": 0.3078, "step": 17457 }, { "epoch": 2.2725497852401406, "grad_norm": 2.627487897872925, "learning_rate": 1.4407791279170225e-06, "loss": 0.3764, "step": 17460 }, { "epoch": 2.272940257711831, "grad_norm": 2.7886862754821777, "learning_rate": 1.439311663260805e-06, "loss": 0.3625, "step": 17463 }, { "epoch": 2.273330730183522, "grad_norm": 2.791762113571167, "learning_rate": 1.43784482064935e-06, "loss": 0.3917, "step": 17466 }, { "epoch": 2.2737212026552127, "grad_norm": 3.137033700942993, "learning_rate": 1.4363786003389108e-06, "loss": 0.3928, "step": 17469 }, { "epoch": 2.2741116751269037, "grad_norm": 2.5043063163757324, "learning_rate": 1.4349130025856322e-06, "loss": 0.3395, "step": 17472 }, { "epoch": 2.2745021475985943, "grad_norm": 2.988461494445801, "learning_rate": 1.4334480276455532e-06, "loss": 0.3501, "step": 17475 }, { "epoch": 2.2748926200702853, "grad_norm": 3.358941078186035, "learning_rate": 1.4319836757746014e-06, "loss": 0.3746, "step": 17478 }, { "epoch": 2.275283092541976, "grad_norm": 2.6883740425109863, "learning_rate": 1.4305199472285936e-06, "loss": 0.3567, "step": 17481 }, { "epoch": 2.2756735650136664, "grad_norm": 2.756044864654541, "learning_rate": 1.4290568422632417e-06, "loss": 0.3877, "step": 17484 }, { "epoch": 2.2760640374853573, "grad_norm": 2.9537763595581055, "learning_rate": 1.4275943611341491e-06, "loss": 0.3604, "step": 17487 }, { "epoch": 2.276454509957048, "grad_norm": 3.069265365600586, "learning_rate": 1.4261325040968065e-06, "loss": 0.3909, "step": 17490 }, { "epoch": 2.276844982428739, "grad_norm": 2.7966959476470947, "learning_rate": 1.4246712714065953e-06, "loss": 0.3825, "step": 17493 }, { "epoch": 2.2772354549004294, "grad_norm": 2.7966370582580566, "learning_rate": 1.4232106633187932e-06, "loss": 0.3359, "step": 17496 }, { "epoch": 2.2776259273721204, "grad_norm": 2.9485177993774414, "learning_rate": 1.4217506800885638e-06, "loss": 0.3706, "step": 17499 }, { "epoch": 2.278016399843811, "grad_norm": 2.909208297729492, "learning_rate": 1.4202913219709612e-06, "loss": 0.3484, "step": 17502 }, { "epoch": 2.278406872315502, "grad_norm": 2.8797898292541504, "learning_rate": 1.4188325892209359e-06, "loss": 0.4045, "step": 17505 }, { "epoch": 2.2787973447871925, "grad_norm": 2.849977970123291, "learning_rate": 1.417374482093324e-06, "loss": 0.3927, "step": 17508 }, { "epoch": 2.279187817258883, "grad_norm": 2.766188383102417, "learning_rate": 1.4159170008428513e-06, "loss": 0.3263, "step": 17511 }, { "epoch": 2.279578289730574, "grad_norm": 2.4690568447113037, "learning_rate": 1.4144601457241402e-06, "loss": 0.3001, "step": 17514 }, { "epoch": 2.2799687622022646, "grad_norm": 2.894524335861206, "learning_rate": 1.4130039169916986e-06, "loss": 0.3283, "step": 17517 }, { "epoch": 2.2803592346739556, "grad_norm": 2.6616241931915283, "learning_rate": 1.4115483148999277e-06, "loss": 0.3245, "step": 17520 }, { "epoch": 2.280749707145646, "grad_norm": 2.9152584075927734, "learning_rate": 1.410093339703113e-06, "loss": 0.3555, "step": 17523 }, { "epoch": 2.281140179617337, "grad_norm": 2.7940874099731445, "learning_rate": 1.4086389916554439e-06, "loss": 0.3349, "step": 17526 }, { "epoch": 2.2815306520890277, "grad_norm": 2.692840814590454, "learning_rate": 1.4071852710109867e-06, "loss": 0.3424, "step": 17529 }, { "epoch": 2.2819211245607187, "grad_norm": 2.8541390895843506, "learning_rate": 1.4057321780237055e-06, "loss": 0.4011, "step": 17532 }, { "epoch": 2.2823115970324093, "grad_norm": 2.5435984134674072, "learning_rate": 1.4042797129474495e-06, "loss": 0.2974, "step": 17535 }, { "epoch": 2.2827020695041, "grad_norm": 2.908129930496216, "learning_rate": 1.4028278760359649e-06, "loss": 0.3302, "step": 17538 }, { "epoch": 2.283092541975791, "grad_norm": 2.641526222229004, "learning_rate": 1.4013766675428831e-06, "loss": 0.332, "step": 17541 }, { "epoch": 2.2834830144474814, "grad_norm": 2.8526651859283447, "learning_rate": 1.3999260877217259e-06, "loss": 0.3843, "step": 17544 }, { "epoch": 2.2838734869191724, "grad_norm": 2.7316031455993652, "learning_rate": 1.3984761368259087e-06, "loss": 0.3498, "step": 17547 }, { "epoch": 2.284263959390863, "grad_norm": 2.5377614498138428, "learning_rate": 1.3970268151087341e-06, "loss": 0.3165, "step": 17550 }, { "epoch": 2.2846544318625535, "grad_norm": 2.78267502784729, "learning_rate": 1.3955781228233938e-06, "loss": 0.3007, "step": 17553 }, { "epoch": 2.2850449043342445, "grad_norm": 2.560595989227295, "learning_rate": 1.3941300602229746e-06, "loss": 0.3325, "step": 17556 }, { "epoch": 2.285435376805935, "grad_norm": 2.923177480697632, "learning_rate": 1.3926826275604476e-06, "loss": 0.4159, "step": 17559 }, { "epoch": 2.285825849277626, "grad_norm": 2.99003267288208, "learning_rate": 1.3912358250886775e-06, "loss": 0.3853, "step": 17562 }, { "epoch": 2.2862163217493165, "grad_norm": 2.788792610168457, "learning_rate": 1.3897896530604138e-06, "loss": 0.3513, "step": 17565 }, { "epoch": 2.2866067942210075, "grad_norm": 3.1519463062286377, "learning_rate": 1.3883441117283058e-06, "loss": 0.4351, "step": 17568 }, { "epoch": 2.286997266692698, "grad_norm": 2.7040798664093018, "learning_rate": 1.386899201344884e-06, "loss": 0.3028, "step": 17571 }, { "epoch": 2.287387739164389, "grad_norm": 3.0929603576660156, "learning_rate": 1.3854549221625696e-06, "loss": 0.3445, "step": 17574 }, { "epoch": 2.2877782116360796, "grad_norm": 2.985767364501953, "learning_rate": 1.3840112744336775e-06, "loss": 0.3272, "step": 17577 }, { "epoch": 2.28816868410777, "grad_norm": 2.829904794692993, "learning_rate": 1.3825682584104088e-06, "loss": 0.3655, "step": 17580 }, { "epoch": 2.288559156579461, "grad_norm": 3.0158586502075195, "learning_rate": 1.3811258743448553e-06, "loss": 0.3236, "step": 17583 }, { "epoch": 2.2889496290511517, "grad_norm": 2.844355344772339, "learning_rate": 1.3796841224889973e-06, "loss": 0.3448, "step": 17586 }, { "epoch": 2.2893401015228427, "grad_norm": 2.8782236576080322, "learning_rate": 1.3782430030947087e-06, "loss": 0.4191, "step": 17589 }, { "epoch": 2.2897305739945333, "grad_norm": 2.368623733520508, "learning_rate": 1.3768025164137478e-06, "loss": 0.3288, "step": 17592 }, { "epoch": 2.2901210464662243, "grad_norm": 2.9353768825531006, "learning_rate": 1.375362662697764e-06, "loss": 0.3488, "step": 17595 }, { "epoch": 2.290511518937915, "grad_norm": 2.769230604171753, "learning_rate": 1.3739234421982995e-06, "loss": 0.3142, "step": 17598 }, { "epoch": 2.290901991409606, "grad_norm": 2.760913133621216, "learning_rate": 1.3724848551667812e-06, "loss": 0.3319, "step": 17601 }, { "epoch": 2.2912924638812964, "grad_norm": 2.777470350265503, "learning_rate": 1.3710469018545263e-06, "loss": 0.3467, "step": 17604 }, { "epoch": 2.291682936352987, "grad_norm": 2.680386543273926, "learning_rate": 1.3696095825127436e-06, "loss": 0.3023, "step": 17607 }, { "epoch": 2.292073408824678, "grad_norm": 2.7917463779449463, "learning_rate": 1.3681728973925313e-06, "loss": 0.393, "step": 17610 }, { "epoch": 2.2924638812963685, "grad_norm": 3.1995327472686768, "learning_rate": 1.3667368467448734e-06, "loss": 0.3997, "step": 17613 }, { "epoch": 2.2928543537680595, "grad_norm": 2.46683406829834, "learning_rate": 1.365301430820643e-06, "loss": 0.3356, "step": 17616 }, { "epoch": 2.29324482623975, "grad_norm": 3.129554271697998, "learning_rate": 1.3638666498706082e-06, "loss": 0.3385, "step": 17619 }, { "epoch": 2.293635298711441, "grad_norm": 2.8934786319732666, "learning_rate": 1.3624325041454206e-06, "loss": 0.3568, "step": 17622 }, { "epoch": 2.2940257711831316, "grad_norm": 2.703233242034912, "learning_rate": 1.360998993895622e-06, "loss": 0.3981, "step": 17625 }, { "epoch": 2.2944162436548226, "grad_norm": 2.704498529434204, "learning_rate": 1.3595661193716426e-06, "loss": 0.3571, "step": 17628 }, { "epoch": 2.294806716126513, "grad_norm": 2.624086856842041, "learning_rate": 1.3581338808238048e-06, "loss": 0.3518, "step": 17631 }, { "epoch": 2.2951971885982037, "grad_norm": 2.516305685043335, "learning_rate": 1.356702278502317e-06, "loss": 0.3229, "step": 17634 }, { "epoch": 2.2955876610698946, "grad_norm": 2.7615859508514404, "learning_rate": 1.3552713126572752e-06, "loss": 0.3107, "step": 17637 }, { "epoch": 2.295978133541585, "grad_norm": 2.727630138397217, "learning_rate": 1.353840983538669e-06, "loss": 0.367, "step": 17640 }, { "epoch": 2.296368606013276, "grad_norm": 2.636464834213257, "learning_rate": 1.3524112913963728e-06, "loss": 0.3203, "step": 17643 }, { "epoch": 2.2967590784849667, "grad_norm": 2.5415730476379395, "learning_rate": 1.3509822364801489e-06, "loss": 0.3375, "step": 17646 }, { "epoch": 2.2971495509566577, "grad_norm": 3.3716161251068115, "learning_rate": 1.3495538190396524e-06, "loss": 0.3362, "step": 17649 }, { "epoch": 2.2975400234283483, "grad_norm": 2.9915719032287598, "learning_rate": 1.348126039324425e-06, "loss": 0.3083, "step": 17652 }, { "epoch": 2.2979304959000393, "grad_norm": 3.0488333702087402, "learning_rate": 1.3466988975838967e-06, "loss": 0.3817, "step": 17655 }, { "epoch": 2.29832096837173, "grad_norm": 3.0410192012786865, "learning_rate": 1.3452723940673839e-06, "loss": 0.367, "step": 17658 }, { "epoch": 2.2987114408434204, "grad_norm": 3.0488669872283936, "learning_rate": 1.343846529024097e-06, "loss": 0.3775, "step": 17661 }, { "epoch": 2.2991019133151114, "grad_norm": 2.820769786834717, "learning_rate": 1.3424213027031297e-06, "loss": 0.3809, "step": 17664 }, { "epoch": 2.299492385786802, "grad_norm": 2.7791459560394287, "learning_rate": 1.3409967153534654e-06, "loss": 0.3571, "step": 17667 }, { "epoch": 2.299882858258493, "grad_norm": 3.0136008262634277, "learning_rate": 1.3395727672239789e-06, "loss": 0.3502, "step": 17670 }, { "epoch": 2.3002733307301835, "grad_norm": 2.868488073348999, "learning_rate": 1.3381494585634292e-06, "loss": 0.3603, "step": 17673 }, { "epoch": 2.300663803201874, "grad_norm": 2.7699248790740967, "learning_rate": 1.3367267896204662e-06, "loss": 0.3638, "step": 17676 }, { "epoch": 2.301054275673565, "grad_norm": 2.7452714443206787, "learning_rate": 1.3353047606436248e-06, "loss": 0.3532, "step": 17679 }, { "epoch": 2.3014447481452556, "grad_norm": 2.6756961345672607, "learning_rate": 1.3338833718813337e-06, "loss": 0.3326, "step": 17682 }, { "epoch": 2.3018352206169466, "grad_norm": 2.506359100341797, "learning_rate": 1.3324626235819055e-06, "loss": 0.3736, "step": 17685 }, { "epoch": 2.302225693088637, "grad_norm": 2.6358907222747803, "learning_rate": 1.3310425159935398e-06, "loss": 0.3283, "step": 17688 }, { "epoch": 2.302616165560328, "grad_norm": 2.714524745941162, "learning_rate": 1.3296230493643282e-06, "loss": 0.3764, "step": 17691 }, { "epoch": 2.3030066380320187, "grad_norm": 2.8931455612182617, "learning_rate": 1.3282042239422505e-06, "loss": 0.3603, "step": 17694 }, { "epoch": 2.3033971105037097, "grad_norm": 3.029452323913574, "learning_rate": 1.3267860399751698e-06, "loss": 0.3331, "step": 17697 }, { "epoch": 2.3037875829754, "grad_norm": 2.932361125946045, "learning_rate": 1.3253684977108394e-06, "loss": 0.3672, "step": 17700 }, { "epoch": 2.3041780554470908, "grad_norm": 3.243929624557495, "learning_rate": 1.3239515973969042e-06, "loss": 0.3559, "step": 17703 }, { "epoch": 2.3045685279187818, "grad_norm": 2.824859142303467, "learning_rate": 1.322535339280891e-06, "loss": 0.4494, "step": 17706 }, { "epoch": 2.3049590003904723, "grad_norm": 2.740846633911133, "learning_rate": 1.3211197236102163e-06, "loss": 0.3501, "step": 17709 }, { "epoch": 2.3053494728621633, "grad_norm": 2.9181246757507324, "learning_rate": 1.3197047506321887e-06, "loss": 0.3736, "step": 17712 }, { "epoch": 2.305739945333854, "grad_norm": 2.6927707195281982, "learning_rate": 1.3182904205939983e-06, "loss": 0.348, "step": 17715 }, { "epoch": 2.306130417805545, "grad_norm": 2.88547945022583, "learning_rate": 1.3168767337427251e-06, "loss": 0.4355, "step": 17718 }, { "epoch": 2.3065208902772354, "grad_norm": 2.848472833633423, "learning_rate": 1.3154636903253398e-06, "loss": 0.3914, "step": 17721 }, { "epoch": 2.3069113627489264, "grad_norm": 2.7487449645996094, "learning_rate": 1.3140512905886965e-06, "loss": 0.3643, "step": 17724 }, { "epoch": 2.307301835220617, "grad_norm": 2.6262028217315674, "learning_rate": 1.312639534779539e-06, "loss": 0.3678, "step": 17727 }, { "epoch": 2.3076923076923075, "grad_norm": 2.6408939361572266, "learning_rate": 1.3112284231444961e-06, "loss": 0.3232, "step": 17730 }, { "epoch": 2.3080827801639985, "grad_norm": 2.451420307159424, "learning_rate": 1.3098179559300877e-06, "loss": 0.3868, "step": 17733 }, { "epoch": 2.308473252635689, "grad_norm": 2.4723563194274902, "learning_rate": 1.3084081333827204e-06, "loss": 0.3023, "step": 17736 }, { "epoch": 2.30886372510738, "grad_norm": 2.9598135948181152, "learning_rate": 1.3069989557486868e-06, "loss": 0.3302, "step": 17739 }, { "epoch": 2.3092541975790706, "grad_norm": 2.8125874996185303, "learning_rate": 1.305590423274165e-06, "loss": 0.3879, "step": 17742 }, { "epoch": 2.3096446700507616, "grad_norm": 2.6237545013427734, "learning_rate": 1.3041825362052258e-06, "loss": 0.4228, "step": 17745 }, { "epoch": 2.310035142522452, "grad_norm": 2.6856138706207275, "learning_rate": 1.3027752947878226e-06, "loss": 0.3903, "step": 17748 }, { "epoch": 2.310425614994143, "grad_norm": 2.5263748168945312, "learning_rate": 1.301368699267796e-06, "loss": 0.328, "step": 17751 }, { "epoch": 2.3108160874658337, "grad_norm": 2.7629663944244385, "learning_rate": 1.2999627498908785e-06, "loss": 0.345, "step": 17754 }, { "epoch": 2.311206559937524, "grad_norm": 2.620549201965332, "learning_rate": 1.2985574469026847e-06, "loss": 0.3989, "step": 17757 }, { "epoch": 2.311597032409215, "grad_norm": 3.176088333129883, "learning_rate": 1.297152790548717e-06, "loss": 0.3533, "step": 17760 }, { "epoch": 2.3119875048809058, "grad_norm": 2.64750075340271, "learning_rate": 1.2957487810743686e-06, "loss": 0.3045, "step": 17763 }, { "epoch": 2.3123779773525968, "grad_norm": 2.6987838745117188, "learning_rate": 1.2943454187249154e-06, "loss": 0.3716, "step": 17766 }, { "epoch": 2.3127684498242873, "grad_norm": 2.43686580657959, "learning_rate": 1.2929427037455206e-06, "loss": 0.328, "step": 17769 }, { "epoch": 2.3131589222959783, "grad_norm": 2.7529258728027344, "learning_rate": 1.2915406363812384e-06, "loss": 0.3639, "step": 17772 }, { "epoch": 2.313549394767669, "grad_norm": 2.7594821453094482, "learning_rate": 1.2901392168770038e-06, "loss": 0.3543, "step": 17775 }, { "epoch": 2.31393986723936, "grad_norm": 2.7785696983337402, "learning_rate": 1.2887384454776453e-06, "loss": 0.3617, "step": 17778 }, { "epoch": 2.3143303397110504, "grad_norm": 2.8949971199035645, "learning_rate": 1.2873383224278717e-06, "loss": 0.3065, "step": 17781 }, { "epoch": 2.314720812182741, "grad_norm": 2.9128878116607666, "learning_rate": 1.2859388479722846e-06, "loss": 0.3951, "step": 17784 }, { "epoch": 2.315111284654432, "grad_norm": 3.0318446159362793, "learning_rate": 1.2845400223553666e-06, "loss": 0.4716, "step": 17787 }, { "epoch": 2.3155017571261225, "grad_norm": 3.099668264389038, "learning_rate": 1.2831418458214912e-06, "loss": 0.3485, "step": 17790 }, { "epoch": 2.3158922295978135, "grad_norm": 2.8386688232421875, "learning_rate": 1.2817443186149148e-06, "loss": 0.3256, "step": 17793 }, { "epoch": 2.316282702069504, "grad_norm": 2.8773865699768066, "learning_rate": 1.280347440979785e-06, "loss": 0.377, "step": 17796 }, { "epoch": 2.316673174541195, "grad_norm": 2.7028415203094482, "learning_rate": 1.2789512131601323e-06, "loss": 0.3315, "step": 17799 }, { "epoch": 2.3170636470128856, "grad_norm": 2.7776808738708496, "learning_rate": 1.2775556353998736e-06, "loss": 0.3006, "step": 17802 }, { "epoch": 2.3174541194845766, "grad_norm": 3.288285732269287, "learning_rate": 1.2761607079428157e-06, "loss": 0.4505, "step": 17805 }, { "epoch": 2.317844591956267, "grad_norm": 2.7252891063690186, "learning_rate": 1.2747664310326486e-06, "loss": 0.3552, "step": 17808 }, { "epoch": 2.3182350644279577, "grad_norm": 2.534891128540039, "learning_rate": 1.2733728049129473e-06, "loss": 0.3113, "step": 17811 }, { "epoch": 2.3186255368996487, "grad_norm": 2.530785083770752, "learning_rate": 1.271979829827179e-06, "loss": 0.3388, "step": 17814 }, { "epoch": 2.3190160093713392, "grad_norm": 2.7788338661193848, "learning_rate": 1.2705875060186902e-06, "loss": 0.3419, "step": 17817 }, { "epoch": 2.31940648184303, "grad_norm": 2.8521158695220947, "learning_rate": 1.2691958337307204e-06, "loss": 0.3486, "step": 17820 }, { "epoch": 2.3197969543147208, "grad_norm": 3.0471034049987793, "learning_rate": 1.267804813206388e-06, "loss": 0.3333, "step": 17823 }, { "epoch": 2.3201874267864113, "grad_norm": 2.7590341567993164, "learning_rate": 1.266414444688705e-06, "loss": 0.3046, "step": 17826 }, { "epoch": 2.3205778992581023, "grad_norm": 2.6539900302886963, "learning_rate": 1.2650247284205646e-06, "loss": 0.3385, "step": 17829 }, { "epoch": 2.320968371729793, "grad_norm": 2.5913002490997314, "learning_rate": 1.263635664644745e-06, "loss": 0.3055, "step": 17832 }, { "epoch": 2.321358844201484, "grad_norm": 2.7210516929626465, "learning_rate": 1.2622472536039164e-06, "loss": 0.3651, "step": 17835 }, { "epoch": 2.3217493166731744, "grad_norm": 2.820171594619751, "learning_rate": 1.2608594955406296e-06, "loss": 0.3353, "step": 17838 }, { "epoch": 2.3221397891448654, "grad_norm": 2.9673337936401367, "learning_rate": 1.2594723906973222e-06, "loss": 0.401, "step": 17841 }, { "epoch": 2.322530261616556, "grad_norm": 2.832606792449951, "learning_rate": 1.2580859393163185e-06, "loss": 0.4028, "step": 17844 }, { "epoch": 2.322920734088247, "grad_norm": 2.623936653137207, "learning_rate": 1.2567001416398306e-06, "loss": 0.3175, "step": 17847 }, { "epoch": 2.3233112065599375, "grad_norm": 2.713925361633301, "learning_rate": 1.2553149979099533e-06, "loss": 0.3578, "step": 17850 }, { "epoch": 2.323701679031628, "grad_norm": 2.729015827178955, "learning_rate": 1.2539305083686665e-06, "loss": 0.3648, "step": 17853 }, { "epoch": 2.324092151503319, "grad_norm": 2.9586987495422363, "learning_rate": 1.2525466732578407e-06, "loss": 0.3634, "step": 17856 }, { "epoch": 2.3244826239750096, "grad_norm": 2.6526246070861816, "learning_rate": 1.2511634928192262e-06, "loss": 0.3579, "step": 17859 }, { "epoch": 2.3248730964467006, "grad_norm": 2.8939921855926514, "learning_rate": 1.249780967294465e-06, "loss": 0.3976, "step": 17862 }, { "epoch": 2.325263568918391, "grad_norm": 2.95613169670105, "learning_rate": 1.2483990969250776e-06, "loss": 0.3352, "step": 17865 }, { "epoch": 2.325654041390082, "grad_norm": 2.9669291973114014, "learning_rate": 1.247017881952477e-06, "loss": 0.3695, "step": 17868 }, { "epoch": 2.3260445138617727, "grad_norm": 2.924299478530884, "learning_rate": 1.245637322617958e-06, "loss": 0.4432, "step": 17871 }, { "epoch": 2.3264349863334637, "grad_norm": 2.776327610015869, "learning_rate": 1.2442574191626988e-06, "loss": 0.4004, "step": 17874 }, { "epoch": 2.3268254588051542, "grad_norm": 2.6248810291290283, "learning_rate": 1.2428781718277688e-06, "loss": 0.3616, "step": 17877 }, { "epoch": 2.327215931276845, "grad_norm": 2.729130983352661, "learning_rate": 1.2414995808541186e-06, "loss": 0.3167, "step": 17880 }, { "epoch": 2.327606403748536, "grad_norm": 2.6671786308288574, "learning_rate": 1.2401216464825838e-06, "loss": 0.3463, "step": 17883 }, { "epoch": 2.3279968762202263, "grad_norm": 2.7406859397888184, "learning_rate": 1.2387443689538886e-06, "loss": 0.4256, "step": 17886 }, { "epoch": 2.3283873486919173, "grad_norm": 2.8111987113952637, "learning_rate": 1.23736774850864e-06, "loss": 0.3992, "step": 17889 }, { "epoch": 2.328777821163608, "grad_norm": 2.890775680541992, "learning_rate": 1.2359917853873304e-06, "loss": 0.3903, "step": 17892 }, { "epoch": 2.329168293635299, "grad_norm": 2.70546817779541, "learning_rate": 1.2346164798303356e-06, "loss": 0.3601, "step": 17895 }, { "epoch": 2.3295587661069894, "grad_norm": 2.588521957397461, "learning_rate": 1.2332418320779226e-06, "loss": 0.3838, "step": 17898 }, { "epoch": 2.3299492385786804, "grad_norm": 2.766190767288208, "learning_rate": 1.2318678423702358e-06, "loss": 0.362, "step": 17901 }, { "epoch": 2.330339711050371, "grad_norm": 2.680659055709839, "learning_rate": 1.230494510947311e-06, "loss": 0.4384, "step": 17904 }, { "epoch": 2.3307301835220615, "grad_norm": 2.6411266326904297, "learning_rate": 1.2291218380490644e-06, "loss": 0.3412, "step": 17907 }, { "epoch": 2.3311206559937525, "grad_norm": 2.884023666381836, "learning_rate": 1.2277498239153007e-06, "loss": 0.3798, "step": 17910 }, { "epoch": 2.331511128465443, "grad_norm": 2.8152637481689453, "learning_rate": 1.2263784687857078e-06, "loss": 0.4039, "step": 17913 }, { "epoch": 2.331901600937134, "grad_norm": 3.315389394760132, "learning_rate": 1.225007772899856e-06, "loss": 0.3791, "step": 17916 }, { "epoch": 2.3322920734088246, "grad_norm": 3.0322203636169434, "learning_rate": 1.2236377364972063e-06, "loss": 0.3572, "step": 17919 }, { "epoch": 2.3326825458805156, "grad_norm": 2.6254141330718994, "learning_rate": 1.2222683598171003e-06, "loss": 0.3524, "step": 17922 }, { "epoch": 2.333073018352206, "grad_norm": 2.877248525619507, "learning_rate": 1.2208996430987625e-06, "loss": 0.3472, "step": 17925 }, { "epoch": 2.333463490823897, "grad_norm": 2.834516763687134, "learning_rate": 1.2195315865813085e-06, "loss": 0.4036, "step": 17928 }, { "epoch": 2.3338539632955877, "grad_norm": 2.6819605827331543, "learning_rate": 1.2181641905037338e-06, "loss": 0.3461, "step": 17931 }, { "epoch": 2.3342444357672782, "grad_norm": 2.962890148162842, "learning_rate": 1.2167974551049177e-06, "loss": 0.403, "step": 17934 }, { "epoch": 2.3346349082389692, "grad_norm": 3.020080089569092, "learning_rate": 1.2154313806236284e-06, "loss": 0.4079, "step": 17937 }, { "epoch": 2.33502538071066, "grad_norm": 2.56555438041687, "learning_rate": 1.214065967298516e-06, "loss": 0.3579, "step": 17940 }, { "epoch": 2.335415853182351, "grad_norm": 3.1184959411621094, "learning_rate": 1.2127012153681128e-06, "loss": 0.4087, "step": 17943 }, { "epoch": 2.3358063256540413, "grad_norm": 2.3602640628814697, "learning_rate": 1.21133712507084e-06, "loss": 0.3074, "step": 17946 }, { "epoch": 2.336196798125732, "grad_norm": 3.22487735748291, "learning_rate": 1.2099736966450026e-06, "loss": 0.3818, "step": 17949 }, { "epoch": 2.336587270597423, "grad_norm": 2.820575475692749, "learning_rate": 1.208610930328788e-06, "loss": 0.3451, "step": 17952 }, { "epoch": 2.3369777430691134, "grad_norm": 2.8845815658569336, "learning_rate": 1.2072488263602672e-06, "loss": 0.3954, "step": 17955 }, { "epoch": 2.3373682155408044, "grad_norm": 2.554292917251587, "learning_rate": 1.2058873849773966e-06, "loss": 0.3808, "step": 17958 }, { "epoch": 2.337758688012495, "grad_norm": 2.7079639434814453, "learning_rate": 1.2045266064180195e-06, "loss": 0.4351, "step": 17961 }, { "epoch": 2.338149160484186, "grad_norm": 2.7546820640563965, "learning_rate": 1.2031664909198597e-06, "loss": 0.4072, "step": 17964 }, { "epoch": 2.3385396329558765, "grad_norm": 2.8127317428588867, "learning_rate": 1.2018070387205256e-06, "loss": 0.3719, "step": 17967 }, { "epoch": 2.3389301054275675, "grad_norm": 2.782156229019165, "learning_rate": 1.2004482500575126e-06, "loss": 0.3663, "step": 17970 }, { "epoch": 2.339320577899258, "grad_norm": 3.034802198410034, "learning_rate": 1.1990901251681974e-06, "loss": 0.3969, "step": 17973 }, { "epoch": 2.3397110503709486, "grad_norm": 2.7988011837005615, "learning_rate": 1.1977326642898395e-06, "loss": 0.3998, "step": 17976 }, { "epoch": 2.3401015228426396, "grad_norm": 2.6385715007781982, "learning_rate": 1.1963758676595883e-06, "loss": 0.3839, "step": 17979 }, { "epoch": 2.34049199531433, "grad_norm": 2.976228713989258, "learning_rate": 1.195019735514471e-06, "loss": 0.3575, "step": 17982 }, { "epoch": 2.340882467786021, "grad_norm": 2.7459535598754883, "learning_rate": 1.1936642680914007e-06, "loss": 0.3541, "step": 17985 }, { "epoch": 2.3412729402577117, "grad_norm": 2.916914939880371, "learning_rate": 1.1923094656271745e-06, "loss": 0.395, "step": 17988 }, { "epoch": 2.3416634127294027, "grad_norm": 2.652353286743164, "learning_rate": 1.1909553283584763e-06, "loss": 0.3298, "step": 17991 }, { "epoch": 2.3420538852010933, "grad_norm": 2.817486524581909, "learning_rate": 1.1896018565218692e-06, "loss": 0.3436, "step": 17994 }, { "epoch": 2.3424443576727842, "grad_norm": 2.9051315784454346, "learning_rate": 1.1882490503538003e-06, "loss": 0.3559, "step": 17997 }, { "epoch": 2.342834830144475, "grad_norm": 2.805586099624634, "learning_rate": 1.1868969100906052e-06, "loss": 0.3098, "step": 18000 }, { "epoch": 2.3432253026161654, "grad_norm": 2.8245179653167725, "learning_rate": 1.1855454359684982e-06, "loss": 0.3487, "step": 18003 }, { "epoch": 2.3436157750878563, "grad_norm": 2.939572811126709, "learning_rate": 1.1841946282235788e-06, "loss": 0.3594, "step": 18006 }, { "epoch": 2.344006247559547, "grad_norm": 2.8865296840667725, "learning_rate": 1.1828444870918292e-06, "loss": 0.3613, "step": 18009 }, { "epoch": 2.344396720031238, "grad_norm": 2.665978193283081, "learning_rate": 1.1814950128091197e-06, "loss": 0.3966, "step": 18012 }, { "epoch": 2.3447871925029284, "grad_norm": 2.8467841148376465, "learning_rate": 1.180146205611198e-06, "loss": 0.354, "step": 18015 }, { "epoch": 2.3451776649746194, "grad_norm": 2.763833999633789, "learning_rate": 1.1787980657336967e-06, "loss": 0.3064, "step": 18018 }, { "epoch": 2.34556813744631, "grad_norm": 2.5303876399993896, "learning_rate": 1.1774505934121361e-06, "loss": 0.3062, "step": 18021 }, { "epoch": 2.345958609918001, "grad_norm": 2.5517313480377197, "learning_rate": 1.176103788881916e-06, "loss": 0.3531, "step": 18024 }, { "epoch": 2.3463490823896915, "grad_norm": 2.689030885696411, "learning_rate": 1.1747576523783178e-06, "loss": 0.3974, "step": 18027 }, { "epoch": 2.346739554861382, "grad_norm": 2.534513235092163, "learning_rate": 1.1734121841365104e-06, "loss": 0.335, "step": 18030 }, { "epoch": 2.347130027333073, "grad_norm": 2.8086178302764893, "learning_rate": 1.1720673843915465e-06, "loss": 0.3382, "step": 18033 }, { "epoch": 2.3475204998047636, "grad_norm": 2.9617831707000732, "learning_rate": 1.1707232533783574e-06, "loss": 0.3986, "step": 18036 }, { "epoch": 2.3479109722764546, "grad_norm": 2.74955415725708, "learning_rate": 1.169379791331759e-06, "loss": 0.3935, "step": 18039 }, { "epoch": 2.348301444748145, "grad_norm": 2.898005962371826, "learning_rate": 1.1680369984864536e-06, "loss": 0.5264, "step": 18042 }, { "epoch": 2.348691917219836, "grad_norm": 3.2025671005249023, "learning_rate": 1.1666948750770236e-06, "loss": 0.3295, "step": 18045 }, { "epoch": 2.3490823896915267, "grad_norm": 2.760274887084961, "learning_rate": 1.1653534213379348e-06, "loss": 0.4275, "step": 18048 }, { "epoch": 2.3494728621632177, "grad_norm": 2.571065664291382, "learning_rate": 1.1640126375035348e-06, "loss": 0.4015, "step": 18051 }, { "epoch": 2.3498633346349083, "grad_norm": 2.9767799377441406, "learning_rate": 1.1626725238080593e-06, "loss": 0.3862, "step": 18054 }, { "epoch": 2.350253807106599, "grad_norm": 2.5151243209838867, "learning_rate": 1.161333080485621e-06, "loss": 0.3542, "step": 18057 }, { "epoch": 2.35064427957829, "grad_norm": 3.4934723377227783, "learning_rate": 1.1599943077702163e-06, "loss": 0.3933, "step": 18060 }, { "epoch": 2.3510347520499804, "grad_norm": 2.7796919345855713, "learning_rate": 1.1586562058957302e-06, "loss": 0.3271, "step": 18063 }, { "epoch": 2.3514252245216714, "grad_norm": 2.8203134536743164, "learning_rate": 1.157318775095923e-06, "loss": 0.3522, "step": 18066 }, { "epoch": 2.351815696993362, "grad_norm": 3.154359817504883, "learning_rate": 1.155982015604441e-06, "loss": 0.2913, "step": 18069 }, { "epoch": 2.352206169465053, "grad_norm": 2.685695171356201, "learning_rate": 1.1546459276548145e-06, "loss": 0.3054, "step": 18072 }, { "epoch": 2.3525966419367434, "grad_norm": 3.0085608959198, "learning_rate": 1.153310511480456e-06, "loss": 0.4078, "step": 18075 }, { "epoch": 2.3529871144084344, "grad_norm": 3.074324607849121, "learning_rate": 1.1519757673146586e-06, "loss": 0.3455, "step": 18078 }, { "epoch": 2.353377586880125, "grad_norm": 3.065185546875, "learning_rate": 1.1506416953905986e-06, "loss": 0.3692, "step": 18081 }, { "epoch": 2.3537680593518155, "grad_norm": 2.8767306804656982, "learning_rate": 1.1493082959413383e-06, "loss": 0.3868, "step": 18084 }, { "epoch": 2.3541585318235065, "grad_norm": 3.119493007659912, "learning_rate": 1.1479755691998172e-06, "loss": 0.3715, "step": 18087 }, { "epoch": 2.354549004295197, "grad_norm": 2.8516952991485596, "learning_rate": 1.1466435153988597e-06, "loss": 0.3386, "step": 18090 }, { "epoch": 2.354939476766888, "grad_norm": 2.712786912918091, "learning_rate": 1.1453121347711755e-06, "loss": 0.3865, "step": 18093 }, { "epoch": 2.3553299492385786, "grad_norm": 3.0909574031829834, "learning_rate": 1.1439814275493522e-06, "loss": 0.3825, "step": 18096 }, { "epoch": 2.355720421710269, "grad_norm": 2.726844549179077, "learning_rate": 1.1426513939658611e-06, "loss": 0.3344, "step": 18099 }, { "epoch": 2.35611089418196, "grad_norm": 2.7704012393951416, "learning_rate": 1.1413220342530556e-06, "loss": 0.4041, "step": 18102 }, { "epoch": 2.3565013666536507, "grad_norm": 2.8426220417022705, "learning_rate": 1.1399933486431747e-06, "loss": 0.3427, "step": 18105 }, { "epoch": 2.3568918391253417, "grad_norm": 2.8885271549224854, "learning_rate": 1.138665337368336e-06, "loss": 0.4052, "step": 18108 }, { "epoch": 2.3572823115970323, "grad_norm": 2.9842803478240967, "learning_rate": 1.1373380006605378e-06, "loss": 0.3765, "step": 18111 }, { "epoch": 2.3576727840687233, "grad_norm": 2.573024272918701, "learning_rate": 1.1360113387516654e-06, "loss": 0.2962, "step": 18114 }, { "epoch": 2.358063256540414, "grad_norm": 2.5947093963623047, "learning_rate": 1.1346853518734845e-06, "loss": 0.322, "step": 18117 }, { "epoch": 2.358453729012105, "grad_norm": 3.0403120517730713, "learning_rate": 1.133360040257641e-06, "loss": 0.4033, "step": 18120 }, { "epoch": 2.3588442014837954, "grad_norm": 2.958102226257324, "learning_rate": 1.1320354041356636e-06, "loss": 0.3175, "step": 18123 }, { "epoch": 2.359234673955486, "grad_norm": 2.5290329456329346, "learning_rate": 1.1307114437389648e-06, "loss": 0.3399, "step": 18126 }, { "epoch": 2.359625146427177, "grad_norm": 2.7605209350585938, "learning_rate": 1.1293881592988375e-06, "loss": 0.3571, "step": 18129 }, { "epoch": 2.3600156188988675, "grad_norm": 3.629070997238159, "learning_rate": 1.128065551046455e-06, "loss": 0.3737, "step": 18132 }, { "epoch": 2.3604060913705585, "grad_norm": 3.1546218395233154, "learning_rate": 1.1267436192128762e-06, "loss": 0.3364, "step": 18135 }, { "epoch": 2.360796563842249, "grad_norm": 2.7438509464263916, "learning_rate": 1.1254223640290391e-06, "loss": 0.322, "step": 18138 }, { "epoch": 2.36118703631394, "grad_norm": 2.711299419403076, "learning_rate": 1.1241017857257624e-06, "loss": 0.3045, "step": 18141 }, { "epoch": 2.3615775087856306, "grad_norm": 3.051964282989502, "learning_rate": 1.1227818845337523e-06, "loss": 0.3652, "step": 18144 }, { "epoch": 2.3619679812573215, "grad_norm": 2.6929006576538086, "learning_rate": 1.12146266068359e-06, "loss": 0.3793, "step": 18147 }, { "epoch": 2.362358453729012, "grad_norm": 2.7894413471221924, "learning_rate": 1.1201441144057413e-06, "loss": 0.3304, "step": 18150 }, { "epoch": 2.3627489262007026, "grad_norm": 2.5840792655944824, "learning_rate": 1.1188262459305515e-06, "loss": 0.3482, "step": 18153 }, { "epoch": 2.3631393986723936, "grad_norm": 3.0223450660705566, "learning_rate": 1.117509055488254e-06, "loss": 0.4019, "step": 18156 }, { "epoch": 2.363529871144084, "grad_norm": 2.9108870029449463, "learning_rate": 1.1161925433089578e-06, "loss": 0.3457, "step": 18159 }, { "epoch": 2.363920343615775, "grad_norm": 2.797194480895996, "learning_rate": 1.114876709622653e-06, "loss": 0.3378, "step": 18162 }, { "epoch": 2.3643108160874657, "grad_norm": 2.627267599105835, "learning_rate": 1.1135615546592132e-06, "loss": 0.3834, "step": 18165 }, { "epoch": 2.3647012885591567, "grad_norm": 2.9231019020080566, "learning_rate": 1.1122470786483946e-06, "loss": 0.3934, "step": 18168 }, { "epoch": 2.3650917610308473, "grad_norm": 2.57883358001709, "learning_rate": 1.1109332818198338e-06, "loss": 0.4163, "step": 18171 }, { "epoch": 2.3654822335025383, "grad_norm": 3.0240585803985596, "learning_rate": 1.1096201644030446e-06, "loss": 0.371, "step": 18174 }, { "epoch": 2.365872705974229, "grad_norm": 2.887322425842285, "learning_rate": 1.108307726627431e-06, "loss": 0.3658, "step": 18177 }, { "epoch": 2.3662631784459194, "grad_norm": 2.607095956802368, "learning_rate": 1.1069959687222704e-06, "loss": 0.3982, "step": 18180 }, { "epoch": 2.3666536509176104, "grad_norm": 3.5808451175689697, "learning_rate": 1.1056848909167223e-06, "loss": 0.3628, "step": 18183 }, { "epoch": 2.367044123389301, "grad_norm": 3.329209566116333, "learning_rate": 1.1043744934398332e-06, "loss": 0.3591, "step": 18186 }, { "epoch": 2.367434595860992, "grad_norm": 2.737271308898926, "learning_rate": 1.1030647765205248e-06, "loss": 0.3535, "step": 18189 }, { "epoch": 2.3678250683326825, "grad_norm": 2.989947557449341, "learning_rate": 1.1017557403876012e-06, "loss": 0.3633, "step": 18192 }, { "epoch": 2.3682155408043735, "grad_norm": 2.9825870990753174, "learning_rate": 1.1004473852697484e-06, "loss": 0.3497, "step": 18195 }, { "epoch": 2.368606013276064, "grad_norm": 3.08147931098938, "learning_rate": 1.0991397113955355e-06, "loss": 0.371, "step": 18198 }, { "epoch": 2.368996485747755, "grad_norm": 2.5311295986175537, "learning_rate": 1.0978327189934085e-06, "loss": 0.2979, "step": 18201 }, { "epoch": 2.3693869582194456, "grad_norm": 2.984403371810913, "learning_rate": 1.0965264082916954e-06, "loss": 0.388, "step": 18204 }, { "epoch": 2.369777430691136, "grad_norm": 2.8916494846343994, "learning_rate": 1.0952207795186086e-06, "loss": 0.3466, "step": 18207 }, { "epoch": 2.370167903162827, "grad_norm": 2.8561973571777344, "learning_rate": 1.0939158329022371e-06, "loss": 0.3492, "step": 18210 }, { "epoch": 2.3705583756345177, "grad_norm": 2.66711163520813, "learning_rate": 1.0926115686705523e-06, "loss": 0.3246, "step": 18213 }, { "epoch": 2.3709488481062087, "grad_norm": 2.8468332290649414, "learning_rate": 1.0913079870514055e-06, "loss": 0.4033, "step": 18216 }, { "epoch": 2.371339320577899, "grad_norm": 2.7105188369750977, "learning_rate": 1.0900050882725316e-06, "loss": 0.3426, "step": 18219 }, { "epoch": 2.37172979304959, "grad_norm": 3.1098861694335938, "learning_rate": 1.0887028725615433e-06, "loss": 0.3738, "step": 18222 }, { "epoch": 2.3721202655212807, "grad_norm": 2.924067258834839, "learning_rate": 1.0874013401459338e-06, "loss": 0.3517, "step": 18225 }, { "epoch": 2.3725107379929717, "grad_norm": 2.9183595180511475, "learning_rate": 1.0861004912530804e-06, "loss": 0.3759, "step": 18228 }, { "epoch": 2.3729012104646623, "grad_norm": 2.9503722190856934, "learning_rate": 1.084800326110238e-06, "loss": 0.3523, "step": 18231 }, { "epoch": 2.373291682936353, "grad_norm": 2.875462770462036, "learning_rate": 1.0835008449445406e-06, "loss": 0.33, "step": 18234 }, { "epoch": 2.373682155408044, "grad_norm": 2.600781202316284, "learning_rate": 1.0822020479830064e-06, "loss": 0.3591, "step": 18237 }, { "epoch": 2.3740726278797344, "grad_norm": 2.702268123626709, "learning_rate": 1.0809039354525342e-06, "loss": 0.3282, "step": 18240 }, { "epoch": 2.3744631003514254, "grad_norm": 2.848515748977661, "learning_rate": 1.0796065075798995e-06, "loss": 0.2933, "step": 18243 }, { "epoch": 2.374853572823116, "grad_norm": 2.4967212677001953, "learning_rate": 1.0783097645917594e-06, "loss": 0.3887, "step": 18246 }, { "epoch": 2.3752440452948065, "grad_norm": 2.6020994186401367, "learning_rate": 1.0770137067146552e-06, "loss": 0.3011, "step": 18249 }, { "epoch": 2.3756345177664975, "grad_norm": 2.908611297607422, "learning_rate": 1.0757183341750033e-06, "loss": 0.3948, "step": 18252 }, { "epoch": 2.376024990238188, "grad_norm": 3.1716883182525635, "learning_rate": 1.0744236471991016e-06, "loss": 0.3868, "step": 18255 }, { "epoch": 2.376415462709879, "grad_norm": 2.6089026927948, "learning_rate": 1.0731296460131319e-06, "loss": 0.3903, "step": 18258 }, { "epoch": 2.3768059351815696, "grad_norm": 2.7589609622955322, "learning_rate": 1.0718363308431524e-06, "loss": 0.3843, "step": 18261 }, { "epoch": 2.3771964076532606, "grad_norm": 2.489070415496826, "learning_rate": 1.0705437019151016e-06, "loss": 0.3683, "step": 18264 }, { "epoch": 2.377586880124951, "grad_norm": 2.6437742710113525, "learning_rate": 1.069251759454799e-06, "loss": 0.4111, "step": 18267 }, { "epoch": 2.377977352596642, "grad_norm": 2.870964288711548, "learning_rate": 1.067960503687946e-06, "loss": 0.3937, "step": 18270 }, { "epoch": 2.3783678250683327, "grad_norm": 2.932481527328491, "learning_rate": 1.066669934840121e-06, "loss": 0.3452, "step": 18273 }, { "epoch": 2.378758297540023, "grad_norm": 2.969200611114502, "learning_rate": 1.065380053136783e-06, "loss": 0.3703, "step": 18276 }, { "epoch": 2.379148770011714, "grad_norm": 3.3855032920837402, "learning_rate": 1.0640908588032722e-06, "loss": 0.3405, "step": 18279 }, { "epoch": 2.3795392424834048, "grad_norm": 2.7585716247558594, "learning_rate": 1.0628023520648102e-06, "loss": 0.3448, "step": 18282 }, { "epoch": 2.3799297149550958, "grad_norm": 2.9133167266845703, "learning_rate": 1.0615145331464937e-06, "loss": 0.3375, "step": 18285 }, { "epoch": 2.3803201874267863, "grad_norm": 2.9258482456207275, "learning_rate": 1.0602274022733023e-06, "loss": 0.4111, "step": 18288 }, { "epoch": 2.3807106598984773, "grad_norm": 2.8225510120391846, "learning_rate": 1.0589409596700966e-06, "loss": 0.3206, "step": 18291 }, { "epoch": 2.381101132370168, "grad_norm": 2.6298437118530273, "learning_rate": 1.0576552055616151e-06, "loss": 0.3135, "step": 18294 }, { "epoch": 2.381491604841859, "grad_norm": 2.665774345397949, "learning_rate": 1.0563701401724735e-06, "loss": 0.4122, "step": 18297 }, { "epoch": 2.3818820773135494, "grad_norm": 2.916292190551758, "learning_rate": 1.0550857637271744e-06, "loss": 0.4741, "step": 18300 }, { "epoch": 2.38227254978524, "grad_norm": 2.903151512145996, "learning_rate": 1.0538020764500929e-06, "loss": 0.3216, "step": 18303 }, { "epoch": 2.382663022256931, "grad_norm": 2.8354744911193848, "learning_rate": 1.052519078565486e-06, "loss": 0.3428, "step": 18306 }, { "epoch": 2.3830534947286215, "grad_norm": 2.986859083175659, "learning_rate": 1.051236770297493e-06, "loss": 0.3477, "step": 18309 }, { "epoch": 2.3834439672003125, "grad_norm": 2.7683563232421875, "learning_rate": 1.0499551518701296e-06, "loss": 0.3927, "step": 18312 }, { "epoch": 2.383834439672003, "grad_norm": 2.669161319732666, "learning_rate": 1.048674223507291e-06, "loss": 0.3973, "step": 18315 }, { "epoch": 2.384224912143694, "grad_norm": 2.9369685649871826, "learning_rate": 1.047393985432752e-06, "loss": 0.3683, "step": 18318 }, { "epoch": 2.3846153846153846, "grad_norm": 2.722029209136963, "learning_rate": 1.0461144378701688e-06, "loss": 0.2915, "step": 18321 }, { "epoch": 2.3850058570870756, "grad_norm": 2.844775438308716, "learning_rate": 1.0448355810430766e-06, "loss": 0.3954, "step": 18324 }, { "epoch": 2.385396329558766, "grad_norm": 2.715589761734009, "learning_rate": 1.0435574151748878e-06, "loss": 0.3586, "step": 18327 }, { "epoch": 2.3857868020304567, "grad_norm": 3.002556562423706, "learning_rate": 1.0422799404888945e-06, "loss": 0.4168, "step": 18330 }, { "epoch": 2.3861772745021477, "grad_norm": 2.758995532989502, "learning_rate": 1.0410031572082712e-06, "loss": 0.3743, "step": 18333 }, { "epoch": 2.3865677469738382, "grad_norm": 2.8067500591278076, "learning_rate": 1.0397270655560676e-06, "loss": 0.3647, "step": 18336 }, { "epoch": 2.386958219445529, "grad_norm": 2.654280185699463, "learning_rate": 1.0384516657552129e-06, "loss": 0.3467, "step": 18339 }, { "epoch": 2.3873486919172198, "grad_norm": 2.651869535446167, "learning_rate": 1.03717695802852e-06, "loss": 0.3492, "step": 18342 }, { "epoch": 2.3877391643889108, "grad_norm": 2.8447647094726562, "learning_rate": 1.035902942598676e-06, "loss": 0.3398, "step": 18345 }, { "epoch": 2.3881296368606013, "grad_norm": 2.987757444381714, "learning_rate": 1.034629619688247e-06, "loss": 0.441, "step": 18348 }, { "epoch": 2.3885201093322923, "grad_norm": 2.6804354190826416, "learning_rate": 1.0333569895196832e-06, "loss": 0.3859, "step": 18351 }, { "epoch": 2.388910581803983, "grad_norm": 2.683786153793335, "learning_rate": 1.0320850523153087e-06, "loss": 0.3567, "step": 18354 }, { "epoch": 2.3893010542756734, "grad_norm": 2.5274264812469482, "learning_rate": 1.0308138082973285e-06, "loss": 0.2944, "step": 18357 }, { "epoch": 2.3896915267473644, "grad_norm": 2.8091444969177246, "learning_rate": 1.0295432576878246e-06, "loss": 0.343, "step": 18360 }, { "epoch": 2.390081999219055, "grad_norm": 2.602357864379883, "learning_rate": 1.0282734007087601e-06, "loss": 0.312, "step": 18363 }, { "epoch": 2.390472471690746, "grad_norm": 2.9505271911621094, "learning_rate": 1.0270042375819795e-06, "loss": 0.3419, "step": 18366 }, { "epoch": 2.3908629441624365, "grad_norm": 2.739809989929199, "learning_rate": 1.025735768529199e-06, "loss": 0.3484, "step": 18369 }, { "epoch": 2.391253416634127, "grad_norm": 2.7395405769348145, "learning_rate": 1.0244679937720203e-06, "loss": 0.3406, "step": 18372 }, { "epoch": 2.391643889105818, "grad_norm": 3.019829034805298, "learning_rate": 1.0232009135319198e-06, "loss": 0.4535, "step": 18375 }, { "epoch": 2.3920343615775086, "grad_norm": 2.979292631149292, "learning_rate": 1.021934528030254e-06, "loss": 0.4343, "step": 18378 }, { "epoch": 2.3924248340491996, "grad_norm": 2.9323439598083496, "learning_rate": 1.0206688374882562e-06, "loss": 0.3784, "step": 18381 }, { "epoch": 2.39281530652089, "grad_norm": 2.577038049697876, "learning_rate": 1.0194038421270426e-06, "loss": 0.3109, "step": 18384 }, { "epoch": 2.393205778992581, "grad_norm": 3.026864767074585, "learning_rate": 1.0181395421676038e-06, "loss": 0.3493, "step": 18387 }, { "epoch": 2.3935962514642717, "grad_norm": 2.9030680656433105, "learning_rate": 1.0168759378308085e-06, "loss": 0.348, "step": 18390 }, { "epoch": 2.3939867239359627, "grad_norm": 2.8374221324920654, "learning_rate": 1.0156130293374094e-06, "loss": 0.3319, "step": 18393 }, { "epoch": 2.3943771964076532, "grad_norm": 2.8880085945129395, "learning_rate": 1.0143508169080323e-06, "loss": 0.3814, "step": 18396 }, { "epoch": 2.394767668879344, "grad_norm": 2.656985282897949, "learning_rate": 1.013089300763181e-06, "loss": 0.371, "step": 18399 }, { "epoch": 2.3951581413510348, "grad_norm": 2.770444631576538, "learning_rate": 1.0118284811232432e-06, "loss": 0.3254, "step": 18402 }, { "epoch": 2.3955486138227253, "grad_norm": 2.928893804550171, "learning_rate": 1.010568358208479e-06, "loss": 0.3686, "step": 18405 }, { "epoch": 2.3959390862944163, "grad_norm": 3.908521890640259, "learning_rate": 1.009308932239031e-06, "loss": 0.3932, "step": 18408 }, { "epoch": 2.396329558766107, "grad_norm": 2.9738271236419678, "learning_rate": 1.008050203434916e-06, "loss": 0.3626, "step": 18411 }, { "epoch": 2.396720031237798, "grad_norm": 3.0634217262268066, "learning_rate": 1.006792172016034e-06, "loss": 0.3842, "step": 18414 }, { "epoch": 2.3971105037094884, "grad_norm": 2.783146858215332, "learning_rate": 1.005534838202159e-06, "loss": 0.3737, "step": 18417 }, { "epoch": 2.3975009761811794, "grad_norm": 2.868107795715332, "learning_rate": 1.004278202212945e-06, "loss": 0.3692, "step": 18420 }, { "epoch": 2.39789144865287, "grad_norm": 3.5386221408843994, "learning_rate": 1.0030222642679217e-06, "loss": 0.3371, "step": 18423 }, { "epoch": 2.3982819211245605, "grad_norm": 2.8862059116363525, "learning_rate": 1.0017670245865014e-06, "loss": 0.3265, "step": 18426 }, { "epoch": 2.3986723935962515, "grad_norm": 2.635298490524292, "learning_rate": 1.0005124833879714e-06, "loss": 0.329, "step": 18429 }, { "epoch": 2.399062866067942, "grad_norm": 2.8082430362701416, "learning_rate": 9.99258640891495e-07, "loss": 0.3584, "step": 18432 }, { "epoch": 2.399453338539633, "grad_norm": 3.525629997253418, "learning_rate": 9.980054973161196e-07, "loss": 0.381, "step": 18435 }, { "epoch": 2.3998438110113236, "grad_norm": 3.2471086978912354, "learning_rate": 9.967530528807644e-07, "loss": 0.3784, "step": 18438 }, { "epoch": 2.4002342834830146, "grad_norm": 3.815002202987671, "learning_rate": 9.95501307804228e-07, "loss": 0.3329, "step": 18441 }, { "epoch": 2.400624755954705, "grad_norm": 3.0479953289031982, "learning_rate": 9.942502623051908e-07, "loss": 0.3522, "step": 18444 }, { "epoch": 2.401015228426396, "grad_norm": 2.807485818862915, "learning_rate": 9.929999166022042e-07, "loss": 0.3269, "step": 18447 }, { "epoch": 2.4014057008980867, "grad_norm": 2.907914400100708, "learning_rate": 9.91750270913704e-07, "loss": 0.3308, "step": 18450 }, { "epoch": 2.4017961733697772, "grad_norm": 2.5052411556243896, "learning_rate": 9.905013254579976e-07, "loss": 0.3099, "step": 18453 }, { "epoch": 2.4021866458414682, "grad_norm": 2.7181320190429688, "learning_rate": 9.892530804532768e-07, "loss": 0.3439, "step": 18456 }, { "epoch": 2.402577118313159, "grad_norm": 3.0974040031433105, "learning_rate": 9.880055361176049e-07, "loss": 0.4185, "step": 18459 }, { "epoch": 2.40296759078485, "grad_norm": 3.3100745677948, "learning_rate": 9.867586926689249e-07, "loss": 0.3995, "step": 18462 }, { "epoch": 2.4033580632565403, "grad_norm": 2.790778160095215, "learning_rate": 9.8551255032506e-07, "loss": 0.3481, "step": 18465 }, { "epoch": 2.4037485357282313, "grad_norm": 2.46254563331604, "learning_rate": 9.842671093037075e-07, "loss": 0.3661, "step": 18468 }, { "epoch": 2.404139008199922, "grad_norm": 2.7810397148132324, "learning_rate": 9.830223698224428e-07, "loss": 0.365, "step": 18471 }, { "epoch": 2.404529480671613, "grad_norm": 2.764301300048828, "learning_rate": 9.817783320987183e-07, "loss": 0.3454, "step": 18474 }, { "epoch": 2.4049199531433034, "grad_norm": 3.241234302520752, "learning_rate": 9.805349963498672e-07, "loss": 0.3636, "step": 18477 }, { "epoch": 2.405310425614994, "grad_norm": 2.733487844467163, "learning_rate": 9.792923627930972e-07, "loss": 0.3371, "step": 18480 }, { "epoch": 2.405700898086685, "grad_norm": 2.4645349979400635, "learning_rate": 9.780504316454915e-07, "loss": 0.351, "step": 18483 }, { "epoch": 2.4060913705583755, "grad_norm": 2.7485122680664062, "learning_rate": 9.768092031240155e-07, "loss": 0.399, "step": 18486 }, { "epoch": 2.4064818430300665, "grad_norm": 2.626877784729004, "learning_rate": 9.75568677445507e-07, "loss": 0.3811, "step": 18489 }, { "epoch": 2.406872315501757, "grad_norm": 2.7127082347869873, "learning_rate": 9.743288548266855e-07, "loss": 0.3375, "step": 18492 }, { "epoch": 2.407262787973448, "grad_norm": 2.722588300704956, "learning_rate": 9.730897354841435e-07, "loss": 0.3514, "step": 18495 }, { "epoch": 2.4076532604451386, "grad_norm": 3.016986846923828, "learning_rate": 9.718513196343539e-07, "loss": 0.3149, "step": 18498 }, { "epoch": 2.4080437329168296, "grad_norm": 2.604357957839966, "learning_rate": 9.70613607493665e-07, "loss": 0.3478, "step": 18501 }, { "epoch": 2.40843420538852, "grad_norm": 2.603797435760498, "learning_rate": 9.693765992783017e-07, "loss": 0.309, "step": 18504 }, { "epoch": 2.4088246778602107, "grad_norm": 2.9746592044830322, "learning_rate": 9.681402952043677e-07, "loss": 0.3852, "step": 18507 }, { "epoch": 2.4092151503319017, "grad_norm": 3.175025224685669, "learning_rate": 9.669046954878425e-07, "loss": 0.3769, "step": 18510 }, { "epoch": 2.4096056228035923, "grad_norm": 2.841076374053955, "learning_rate": 9.65669800344582e-07, "loss": 0.3367, "step": 18513 }, { "epoch": 2.4099960952752832, "grad_norm": 2.7390999794006348, "learning_rate": 9.644356099903208e-07, "loss": 0.3437, "step": 18516 }, { "epoch": 2.410386567746974, "grad_norm": 2.828277349472046, "learning_rate": 9.632021246406693e-07, "loss": 0.3318, "step": 18519 }, { "epoch": 2.4107770402186643, "grad_norm": 2.4388415813446045, "learning_rate": 9.619693445111145e-07, "loss": 0.344, "step": 18522 }, { "epoch": 2.4111675126903553, "grad_norm": 2.6832144260406494, "learning_rate": 9.607372698170191e-07, "loss": 0.3834, "step": 18525 }, { "epoch": 2.411557985162046, "grad_norm": 2.858412742614746, "learning_rate": 9.595059007736268e-07, "loss": 0.3879, "step": 18528 }, { "epoch": 2.411948457633737, "grad_norm": 2.7546768188476562, "learning_rate": 9.582752375960519e-07, "loss": 0.3568, "step": 18531 }, { "epoch": 2.4123389301054274, "grad_norm": 2.633880853652954, "learning_rate": 9.570452804992925e-07, "loss": 0.3444, "step": 18534 }, { "epoch": 2.4127294025771184, "grad_norm": 2.5289742946624756, "learning_rate": 9.558160296982154e-07, "loss": 0.346, "step": 18537 }, { "epoch": 2.413119875048809, "grad_norm": 2.8704309463500977, "learning_rate": 9.54587485407572e-07, "loss": 0.3572, "step": 18540 }, { "epoch": 2.4135103475205, "grad_norm": 2.7778470516204834, "learning_rate": 9.533596478419843e-07, "loss": 0.417, "step": 18543 }, { "epoch": 2.4139008199921905, "grad_norm": 2.997497320175171, "learning_rate": 9.521325172159518e-07, "loss": 0.3553, "step": 18546 }, { "epoch": 2.414291292463881, "grad_norm": 3.075047016143799, "learning_rate": 9.509060937438546e-07, "loss": 0.3914, "step": 18549 }, { "epoch": 2.414681764935572, "grad_norm": 2.607013463973999, "learning_rate": 9.496803776399449e-07, "loss": 0.3676, "step": 18552 }, { "epoch": 2.4150722374072626, "grad_norm": 2.543388843536377, "learning_rate": 9.484553691183512e-07, "loss": 0.3634, "step": 18555 }, { "epoch": 2.4154627098789536, "grad_norm": 2.790480852127075, "learning_rate": 9.472310683930824e-07, "loss": 0.4357, "step": 18558 }, { "epoch": 2.415853182350644, "grad_norm": 2.679603338241577, "learning_rate": 9.460074756780202e-07, "loss": 0.3298, "step": 18561 }, { "epoch": 2.416243654822335, "grad_norm": 2.8844940662384033, "learning_rate": 9.44784591186923e-07, "loss": 0.3547, "step": 18564 }, { "epoch": 2.4166341272940257, "grad_norm": 2.9198670387268066, "learning_rate": 9.435624151334272e-07, "loss": 0.3595, "step": 18567 }, { "epoch": 2.4170245997657167, "grad_norm": 2.825845956802368, "learning_rate": 9.423409477310446e-07, "loss": 0.3814, "step": 18570 }, { "epoch": 2.4174150722374073, "grad_norm": 2.549248456954956, "learning_rate": 9.411201891931609e-07, "loss": 0.3238, "step": 18573 }, { "epoch": 2.417805544709098, "grad_norm": 3.1343023777008057, "learning_rate": 9.399001397330415e-07, "loss": 0.4215, "step": 18576 }, { "epoch": 2.418196017180789, "grad_norm": 2.932382822036743, "learning_rate": 9.386807995638275e-07, "loss": 0.3348, "step": 18579 }, { "epoch": 2.4185864896524794, "grad_norm": 2.6527950763702393, "learning_rate": 9.374621688985341e-07, "loss": 0.3421, "step": 18582 }, { "epoch": 2.4189769621241703, "grad_norm": 2.6357645988464355, "learning_rate": 9.362442479500539e-07, "loss": 0.33, "step": 18585 }, { "epoch": 2.419367434595861, "grad_norm": 2.9315764904022217, "learning_rate": 9.350270369311531e-07, "loss": 0.3976, "step": 18588 }, { "epoch": 2.419757907067552, "grad_norm": 3.037123441696167, "learning_rate": 9.338105360544786e-07, "loss": 0.3024, "step": 18591 }, { "epoch": 2.4201483795392424, "grad_norm": 2.859992742538452, "learning_rate": 9.325947455325496e-07, "loss": 0.4043, "step": 18594 }, { "epoch": 2.4205388520109334, "grad_norm": 2.6656060218811035, "learning_rate": 9.313796655777613e-07, "loss": 0.3422, "step": 18597 }, { "epoch": 2.420929324482624, "grad_norm": 2.7755658626556396, "learning_rate": 9.301652964023866e-07, "loss": 0.3565, "step": 18600 }, { "epoch": 2.4213197969543145, "grad_norm": 2.9836435317993164, "learning_rate": 9.289516382185737e-07, "loss": 0.3223, "step": 18603 }, { "epoch": 2.4217102694260055, "grad_norm": 2.6815292835235596, "learning_rate": 9.277386912383435e-07, "loss": 0.403, "step": 18606 }, { "epoch": 2.422100741897696, "grad_norm": 3.0170223712921143, "learning_rate": 9.265264556735987e-07, "loss": 0.2842, "step": 18609 }, { "epoch": 2.422491214369387, "grad_norm": 2.64164662361145, "learning_rate": 9.253149317361126e-07, "loss": 0.3211, "step": 18612 }, { "epoch": 2.4228816868410776, "grad_norm": 2.9962964057922363, "learning_rate": 9.24104119637535e-07, "loss": 0.3325, "step": 18615 }, { "epoch": 2.4232721593127686, "grad_norm": 2.4860761165618896, "learning_rate": 9.228940195893932e-07, "loss": 0.3338, "step": 18618 }, { "epoch": 2.423662631784459, "grad_norm": 2.7918808460235596, "learning_rate": 9.216846318030908e-07, "loss": 0.3377, "step": 18621 }, { "epoch": 2.42405310425615, "grad_norm": 3.0834054946899414, "learning_rate": 9.204759564899029e-07, "loss": 0.3111, "step": 18624 }, { "epoch": 2.4244435767278407, "grad_norm": 3.3791403770446777, "learning_rate": 9.192679938609827e-07, "loss": 0.3618, "step": 18627 }, { "epoch": 2.4248340491995313, "grad_norm": 2.551917314529419, "learning_rate": 9.180607441273604e-07, "loss": 0.368, "step": 18630 }, { "epoch": 2.4252245216712223, "grad_norm": 2.7034595012664795, "learning_rate": 9.168542074999392e-07, "loss": 0.3756, "step": 18633 }, { "epoch": 2.425614994142913, "grad_norm": 2.543304443359375, "learning_rate": 9.15648384189498e-07, "loss": 0.3233, "step": 18636 }, { "epoch": 2.426005466614604, "grad_norm": 3.1651837825775146, "learning_rate": 9.144432744066905e-07, "loss": 0.3206, "step": 18639 }, { "epoch": 2.4263959390862944, "grad_norm": 2.559799909591675, "learning_rate": 9.132388783620499e-07, "loss": 0.3498, "step": 18642 }, { "epoch": 2.426786411557985, "grad_norm": 2.8475115299224854, "learning_rate": 9.120351962659796e-07, "loss": 0.346, "step": 18645 }, { "epoch": 2.427176884029676, "grad_norm": 2.8560378551483154, "learning_rate": 9.108322283287596e-07, "loss": 0.3304, "step": 18648 }, { "epoch": 2.427567356501367, "grad_norm": 2.9788451194763184, "learning_rate": 9.096299747605481e-07, "loss": 0.3449, "step": 18651 }, { "epoch": 2.4279578289730575, "grad_norm": 2.8146660327911377, "learning_rate": 9.084284357713752e-07, "loss": 0.4016, "step": 18654 }, { "epoch": 2.428348301444748, "grad_norm": 2.616931200027466, "learning_rate": 9.072276115711459e-07, "loss": 0.3531, "step": 18657 }, { "epoch": 2.428738773916439, "grad_norm": 2.9515371322631836, "learning_rate": 9.06027502369643e-07, "loss": 0.3594, "step": 18660 }, { "epoch": 2.4291292463881295, "grad_norm": 2.725242853164673, "learning_rate": 9.048281083765243e-07, "loss": 0.3437, "step": 18663 }, { "epoch": 2.4295197188598205, "grad_norm": 3.099229574203491, "learning_rate": 9.036294298013199e-07, "loss": 0.4032, "step": 18666 }, { "epoch": 2.429910191331511, "grad_norm": 2.691160202026367, "learning_rate": 9.024314668534356e-07, "loss": 0.3516, "step": 18669 }, { "epoch": 2.4303006638032016, "grad_norm": 2.8280410766601562, "learning_rate": 9.012342197421548e-07, "loss": 0.3906, "step": 18672 }, { "epoch": 2.4306911362748926, "grad_norm": 2.8767683506011963, "learning_rate": 9.000376886766337e-07, "loss": 0.3625, "step": 18675 }, { "epoch": 2.431081608746583, "grad_norm": 2.8653829097747803, "learning_rate": 8.988418738659016e-07, "loss": 0.3302, "step": 18678 }, { "epoch": 2.431472081218274, "grad_norm": 2.558035135269165, "learning_rate": 8.976467755188684e-07, "loss": 0.3544, "step": 18681 }, { "epoch": 2.4318625536899647, "grad_norm": 2.6624937057495117, "learning_rate": 8.964523938443131e-07, "loss": 0.3324, "step": 18684 }, { "epoch": 2.4322530261616557, "grad_norm": 2.9672672748565674, "learning_rate": 8.952587290508919e-07, "loss": 0.3928, "step": 18687 }, { "epoch": 2.4326434986333463, "grad_norm": 2.827082395553589, "learning_rate": 8.940657813471349e-07, "loss": 0.3936, "step": 18690 }, { "epoch": 2.4330339711050373, "grad_norm": 2.829801321029663, "learning_rate": 8.928735509414488e-07, "loss": 0.3735, "step": 18693 }, { "epoch": 2.433424443576728, "grad_norm": 2.6571168899536133, "learning_rate": 8.916820380421138e-07, "loss": 0.3167, "step": 18696 }, { "epoch": 2.4338149160484184, "grad_norm": 3.000652551651001, "learning_rate": 8.904912428572827e-07, "loss": 0.3927, "step": 18699 }, { "epoch": 2.4342053885201094, "grad_norm": 3.0371198654174805, "learning_rate": 8.893011655949862e-07, "loss": 0.4359, "step": 18702 }, { "epoch": 2.4345958609918, "grad_norm": 2.5225956439971924, "learning_rate": 8.881118064631294e-07, "loss": 0.3617, "step": 18705 }, { "epoch": 2.434986333463491, "grad_norm": 3.0411794185638428, "learning_rate": 8.869231656694904e-07, "loss": 0.4085, "step": 18708 }, { "epoch": 2.4353768059351815, "grad_norm": 2.582329750061035, "learning_rate": 8.857352434217203e-07, "loss": 0.3344, "step": 18711 }, { "epoch": 2.4357672784068725, "grad_norm": 2.818873643875122, "learning_rate": 8.845480399273493e-07, "loss": 0.3445, "step": 18714 }, { "epoch": 2.436157750878563, "grad_norm": 2.471068859100342, "learning_rate": 8.83361555393778e-07, "loss": 0.3724, "step": 18717 }, { "epoch": 2.436548223350254, "grad_norm": 2.8934266567230225, "learning_rate": 8.821757900282812e-07, "loss": 0.3259, "step": 18720 }, { "epoch": 2.4369386958219446, "grad_norm": 3.3927173614501953, "learning_rate": 8.809907440380134e-07, "loss": 0.4121, "step": 18723 }, { "epoch": 2.437329168293635, "grad_norm": 2.8776323795318604, "learning_rate": 8.798064176299964e-07, "loss": 0.3497, "step": 18726 }, { "epoch": 2.437719640765326, "grad_norm": 2.814983606338501, "learning_rate": 8.7862281101113e-07, "loss": 0.3731, "step": 18729 }, { "epoch": 2.4381101132370167, "grad_norm": 2.808781862258911, "learning_rate": 8.774399243881898e-07, "loss": 0.403, "step": 18732 }, { "epoch": 2.4385005857087076, "grad_norm": 3.211045980453491, "learning_rate": 8.762577579678222e-07, "loss": 0.3331, "step": 18735 }, { "epoch": 2.438891058180398, "grad_norm": 2.9599382877349854, "learning_rate": 8.750763119565486e-07, "loss": 0.3298, "step": 18738 }, { "epoch": 2.439281530652089, "grad_norm": 2.901627779006958, "learning_rate": 8.73895586560764e-07, "loss": 0.4148, "step": 18741 }, { "epoch": 2.4396720031237797, "grad_norm": 2.719869375228882, "learning_rate": 8.727155819867423e-07, "loss": 0.3681, "step": 18744 }, { "epoch": 2.4400624755954707, "grad_norm": 2.781661033630371, "learning_rate": 8.715362984406261e-07, "loss": 0.3967, "step": 18747 }, { "epoch": 2.4404529480671613, "grad_norm": 3.1544594764709473, "learning_rate": 8.703577361284338e-07, "loss": 0.3487, "step": 18750 }, { "epoch": 2.440843420538852, "grad_norm": 3.0680270195007324, "learning_rate": 8.691798952560559e-07, "loss": 0.3621, "step": 18753 }, { "epoch": 2.441233893010543, "grad_norm": 2.580583333969116, "learning_rate": 8.680027760292614e-07, "loss": 0.3696, "step": 18756 }, { "epoch": 2.4416243654822334, "grad_norm": 2.498162269592285, "learning_rate": 8.668263786536896e-07, "loss": 0.3002, "step": 18759 }, { "epoch": 2.4420148379539244, "grad_norm": 2.785461902618408, "learning_rate": 8.656507033348538e-07, "loss": 0.3497, "step": 18762 }, { "epoch": 2.442405310425615, "grad_norm": 2.962099313735962, "learning_rate": 8.644757502781437e-07, "loss": 0.4551, "step": 18765 }, { "epoch": 2.442795782897306, "grad_norm": 2.782252550125122, "learning_rate": 8.633015196888201e-07, "loss": 0.3868, "step": 18768 }, { "epoch": 2.4431862553689965, "grad_norm": 2.9131968021392822, "learning_rate": 8.621280117720171e-07, "loss": 0.3787, "step": 18771 }, { "epoch": 2.4435767278406875, "grad_norm": 2.526874303817749, "learning_rate": 8.60955226732747e-07, "loss": 0.306, "step": 18774 }, { "epoch": 2.443967200312378, "grad_norm": 2.654900312423706, "learning_rate": 8.59783164775892e-07, "loss": 0.3161, "step": 18777 }, { "epoch": 2.4443576727840686, "grad_norm": 2.776625633239746, "learning_rate": 8.586118261062076e-07, "loss": 0.3483, "step": 18780 }, { "epoch": 2.4447481452557596, "grad_norm": 2.6517176628112793, "learning_rate": 8.574412109283232e-07, "loss": 0.3079, "step": 18783 }, { "epoch": 2.44513861772745, "grad_norm": 2.642512321472168, "learning_rate": 8.562713194467465e-07, "loss": 0.3765, "step": 18786 }, { "epoch": 2.445529090199141, "grad_norm": 2.8423049449920654, "learning_rate": 8.551021518658536e-07, "loss": 0.3594, "step": 18789 }, { "epoch": 2.4459195626708317, "grad_norm": 3.162525177001953, "learning_rate": 8.539337083898936e-07, "loss": 0.3961, "step": 18792 }, { "epoch": 2.446310035142522, "grad_norm": 2.591289520263672, "learning_rate": 8.527659892229944e-07, "loss": 0.4013, "step": 18795 }, { "epoch": 2.446700507614213, "grad_norm": 2.7982068061828613, "learning_rate": 8.515989945691522e-07, "loss": 0.3741, "step": 18798 }, { "epoch": 2.4470909800859038, "grad_norm": 2.731457471847534, "learning_rate": 8.504327246322386e-07, "loss": 0.3658, "step": 18801 }, { "epoch": 2.4474814525575947, "grad_norm": 2.960292100906372, "learning_rate": 8.492671796159968e-07, "loss": 0.4037, "step": 18804 }, { "epoch": 2.4478719250292853, "grad_norm": 2.7071995735168457, "learning_rate": 8.48102359724049e-07, "loss": 0.392, "step": 18807 }, { "epoch": 2.4482623975009763, "grad_norm": 2.889734983444214, "learning_rate": 8.46938265159884e-07, "loss": 0.3443, "step": 18810 }, { "epoch": 2.448652869972667, "grad_norm": 2.8918561935424805, "learning_rate": 8.457748961268664e-07, "loss": 0.3643, "step": 18813 }, { "epoch": 2.449043342444358, "grad_norm": 3.017657518386841, "learning_rate": 8.446122528282363e-07, "loss": 0.3877, "step": 18816 }, { "epoch": 2.4494338149160484, "grad_norm": 2.657172679901123, "learning_rate": 8.434503354671042e-07, "loss": 0.3669, "step": 18819 }, { "epoch": 2.449824287387739, "grad_norm": 2.905705451965332, "learning_rate": 8.422891442464531e-07, "loss": 0.3495, "step": 18822 }, { "epoch": 2.45021475985943, "grad_norm": 2.8718855381011963, "learning_rate": 8.41128679369142e-07, "loss": 0.4314, "step": 18825 }, { "epoch": 2.4506052323311205, "grad_norm": 2.7882487773895264, "learning_rate": 8.399689410379025e-07, "loss": 0.3464, "step": 18828 }, { "epoch": 2.4509957048028115, "grad_norm": 2.7789225578308105, "learning_rate": 8.388099294553382e-07, "loss": 0.3243, "step": 18831 }, { "epoch": 2.451386177274502, "grad_norm": 2.7858150005340576, "learning_rate": 8.376516448239236e-07, "loss": 0.3557, "step": 18834 }, { "epoch": 2.451776649746193, "grad_norm": 2.6816978454589844, "learning_rate": 8.364940873460115e-07, "loss": 0.382, "step": 18837 }, { "epoch": 2.4521671222178836, "grad_norm": 3.012741804122925, "learning_rate": 8.353372572238238e-07, "loss": 0.3826, "step": 18840 }, { "epoch": 2.4525575946895746, "grad_norm": 2.689615488052368, "learning_rate": 8.341811546594564e-07, "loss": 0.3332, "step": 18843 }, { "epoch": 2.452948067161265, "grad_norm": 2.9826838970184326, "learning_rate": 8.330257798548763e-07, "loss": 0.3838, "step": 18846 }, { "epoch": 2.4533385396329557, "grad_norm": 2.5051567554473877, "learning_rate": 8.318711330119272e-07, "loss": 0.3054, "step": 18849 }, { "epoch": 2.4537290121046467, "grad_norm": 2.6066651344299316, "learning_rate": 8.307172143323233e-07, "loss": 0.3481, "step": 18852 }, { "epoch": 2.454119484576337, "grad_norm": 2.808889865875244, "learning_rate": 8.295640240176494e-07, "loss": 0.3487, "step": 18855 }, { "epoch": 2.454509957048028, "grad_norm": 2.8130900859832764, "learning_rate": 8.28411562269369e-07, "loss": 0.3552, "step": 18858 }, { "epoch": 2.4549004295197188, "grad_norm": 2.8734047412872314, "learning_rate": 8.272598292888124e-07, "loss": 0.3521, "step": 18861 }, { "epoch": 2.4552909019914098, "grad_norm": 2.4436731338500977, "learning_rate": 8.261088252771848e-07, "loss": 0.3438, "step": 18864 }, { "epoch": 2.4556813744631003, "grad_norm": 2.721705675125122, "learning_rate": 8.249585504355645e-07, "loss": 0.4103, "step": 18867 }, { "epoch": 2.4560718469347913, "grad_norm": 2.883448839187622, "learning_rate": 8.238090049649033e-07, "loss": 0.368, "step": 18870 }, { "epoch": 2.456462319406482, "grad_norm": 2.8921525478363037, "learning_rate": 8.226601890660241e-07, "loss": 0.3569, "step": 18873 }, { "epoch": 2.4568527918781724, "grad_norm": 2.69545316696167, "learning_rate": 8.215121029396206e-07, "loss": 0.3431, "step": 18876 }, { "epoch": 2.4572432643498634, "grad_norm": 3.1997456550598145, "learning_rate": 8.203647467862636e-07, "loss": 0.286, "step": 18879 }, { "epoch": 2.457633736821554, "grad_norm": 3.0184547901153564, "learning_rate": 8.192181208063926e-07, "loss": 0.3426, "step": 18882 }, { "epoch": 2.458024209293245, "grad_norm": 2.5744235515594482, "learning_rate": 8.180722252003198e-07, "loss": 0.3675, "step": 18885 }, { "epoch": 2.4584146817649355, "grad_norm": 2.549051523208618, "learning_rate": 8.169270601682328e-07, "loss": 0.39, "step": 18888 }, { "epoch": 2.4588051542366265, "grad_norm": 2.9480321407318115, "learning_rate": 8.157826259101886e-07, "loss": 0.3437, "step": 18891 }, { "epoch": 2.459195626708317, "grad_norm": 2.6580848693847656, "learning_rate": 8.146389226261176e-07, "loss": 0.2907, "step": 18894 }, { "epoch": 2.459586099180008, "grad_norm": 2.8073930740356445, "learning_rate": 8.134959505158208e-07, "loss": 0.346, "step": 18897 }, { "epoch": 2.4599765716516986, "grad_norm": 2.8346378803253174, "learning_rate": 8.123537097789752e-07, "loss": 0.3428, "step": 18900 }, { "epoch": 2.460367044123389, "grad_norm": 2.9591760635375977, "learning_rate": 8.112122006151268e-07, "loss": 0.3972, "step": 18903 }, { "epoch": 2.46075751659508, "grad_norm": 3.013507843017578, "learning_rate": 8.100714232236945e-07, "loss": 0.3298, "step": 18906 }, { "epoch": 2.4611479890667707, "grad_norm": 2.7471742630004883, "learning_rate": 8.089313778039698e-07, "loss": 0.309, "step": 18909 }, { "epoch": 2.4615384615384617, "grad_norm": 3.1585850715637207, "learning_rate": 8.077920645551179e-07, "loss": 0.361, "step": 18912 }, { "epoch": 2.4619289340101522, "grad_norm": 2.589958906173706, "learning_rate": 8.066534836761736e-07, "loss": 0.3519, "step": 18915 }, { "epoch": 2.462319406481843, "grad_norm": 3.804331064224243, "learning_rate": 8.055156353660426e-07, "loss": 0.3417, "step": 18918 }, { "epoch": 2.4627098789535338, "grad_norm": 3.136734962463379, "learning_rate": 8.043785198235076e-07, "loss": 0.3533, "step": 18921 }, { "epoch": 2.4631003514252248, "grad_norm": 2.8037362098693848, "learning_rate": 8.032421372472188e-07, "loss": 0.3386, "step": 18924 }, { "epoch": 2.4634908238969153, "grad_norm": 2.6273999214172363, "learning_rate": 8.021064878356987e-07, "loss": 0.3358, "step": 18927 }, { "epoch": 2.463881296368606, "grad_norm": 2.6162285804748535, "learning_rate": 8.009715717873451e-07, "loss": 0.366, "step": 18930 }, { "epoch": 2.464271768840297, "grad_norm": 2.9784958362579346, "learning_rate": 7.998373893004246e-07, "loss": 0.3863, "step": 18933 }, { "epoch": 2.4646622413119874, "grad_norm": 3.3731791973114014, "learning_rate": 7.987039405730757e-07, "loss": 0.3216, "step": 18936 }, { "epoch": 2.4650527137836784, "grad_norm": 2.652818441390991, "learning_rate": 7.975712258033108e-07, "loss": 0.3587, "step": 18939 }, { "epoch": 2.465443186255369, "grad_norm": 2.968871831893921, "learning_rate": 7.964392451890119e-07, "loss": 0.3334, "step": 18942 }, { "epoch": 2.4658336587270595, "grad_norm": 2.5186870098114014, "learning_rate": 7.953079989279344e-07, "loss": 0.3153, "step": 18945 }, { "epoch": 2.4662241311987505, "grad_norm": 2.8169445991516113, "learning_rate": 7.941774872177027e-07, "loss": 0.3013, "step": 18948 }, { "epoch": 2.466614603670441, "grad_norm": 2.731722831726074, "learning_rate": 7.930477102558159e-07, "loss": 0.3281, "step": 18951 }, { "epoch": 2.467005076142132, "grad_norm": 2.587888240814209, "learning_rate": 7.919186682396457e-07, "loss": 0.3223, "step": 18954 }, { "epoch": 2.4673955486138226, "grad_norm": 2.6688907146453857, "learning_rate": 7.907903613664314e-07, "loss": 0.3775, "step": 18957 }, { "epoch": 2.4677860210855136, "grad_norm": 2.8114142417907715, "learning_rate": 7.896627898332848e-07, "loss": 0.3064, "step": 18960 }, { "epoch": 2.468176493557204, "grad_norm": 2.9139301776885986, "learning_rate": 7.885359538371929e-07, "loss": 0.4012, "step": 18963 }, { "epoch": 2.468566966028895, "grad_norm": 2.7291030883789062, "learning_rate": 7.874098535750103e-07, "loss": 0.3384, "step": 18966 }, { "epoch": 2.4689574385005857, "grad_norm": 2.8256447315216064, "learning_rate": 7.862844892434629e-07, "loss": 0.3605, "step": 18969 }, { "epoch": 2.4693479109722762, "grad_norm": 2.81532621383667, "learning_rate": 7.85159861039152e-07, "loss": 0.3608, "step": 18972 }, { "epoch": 2.4697383834439672, "grad_norm": 2.861802816390991, "learning_rate": 7.84035969158547e-07, "loss": 0.3409, "step": 18975 }, { "epoch": 2.470128855915658, "grad_norm": 2.916384696960449, "learning_rate": 7.829128137979875e-07, "loss": 0.3839, "step": 18978 }, { "epoch": 2.470519328387349, "grad_norm": 2.9499900341033936, "learning_rate": 7.817903951536892e-07, "loss": 0.3667, "step": 18981 }, { "epoch": 2.4709098008590393, "grad_norm": 2.9499289989471436, "learning_rate": 7.806687134217356e-07, "loss": 0.3399, "step": 18984 }, { "epoch": 2.4713002733307303, "grad_norm": 2.9317288398742676, "learning_rate": 7.795477687980801e-07, "loss": 0.3157, "step": 18987 }, { "epoch": 2.471690745802421, "grad_norm": 3.2856903076171875, "learning_rate": 7.784275614785519e-07, "loss": 0.386, "step": 18990 }, { "epoch": 2.472081218274112, "grad_norm": 3.034611225128174, "learning_rate": 7.773080916588466e-07, "loss": 0.3312, "step": 18993 }, { "epoch": 2.4724716907458024, "grad_norm": 2.9121174812316895, "learning_rate": 7.761893595345354e-07, "loss": 0.4084, "step": 18996 }, { "epoch": 2.472862163217493, "grad_norm": 2.7845377922058105, "learning_rate": 7.750713653010567e-07, "loss": 0.3619, "step": 18999 }, { "epoch": 2.473252635689184, "grad_norm": 2.673917531967163, "learning_rate": 7.73954109153724e-07, "loss": 0.4123, "step": 19002 }, { "epoch": 2.4736431081608745, "grad_norm": 2.777308225631714, "learning_rate": 7.728375912877178e-07, "loss": 0.3202, "step": 19005 }, { "epoch": 2.4740335806325655, "grad_norm": 3.0712320804595947, "learning_rate": 7.717218118980918e-07, "loss": 0.3864, "step": 19008 }, { "epoch": 2.474424053104256, "grad_norm": 3.155332088470459, "learning_rate": 7.706067711797687e-07, "loss": 0.3849, "step": 19011 }, { "epoch": 2.474814525575947, "grad_norm": 2.9384818077087402, "learning_rate": 7.694924693275468e-07, "loss": 0.3452, "step": 19014 }, { "epoch": 2.4752049980476376, "grad_norm": 2.8019471168518066, "learning_rate": 7.683789065360908e-07, "loss": 0.3621, "step": 19017 }, { "epoch": 2.4755954705193286, "grad_norm": 3.059307098388672, "learning_rate": 7.672660829999367e-07, "loss": 0.3504, "step": 19020 }, { "epoch": 2.475985942991019, "grad_norm": 3.4540011882781982, "learning_rate": 7.661539989134947e-07, "loss": 0.3184, "step": 19023 }, { "epoch": 2.4763764154627097, "grad_norm": 2.706732988357544, "learning_rate": 7.650426544710427e-07, "loss": 0.3627, "step": 19026 }, { "epoch": 2.4767668879344007, "grad_norm": 2.591989040374756, "learning_rate": 7.63932049866728e-07, "loss": 0.4024, "step": 19029 }, { "epoch": 2.4771573604060912, "grad_norm": 2.8087947368621826, "learning_rate": 7.628221852945744e-07, "loss": 0.3468, "step": 19032 }, { "epoch": 2.4775478328777822, "grad_norm": 2.681309700012207, "learning_rate": 7.617130609484702e-07, "loss": 0.3382, "step": 19035 }, { "epoch": 2.477938305349473, "grad_norm": 3.0248937606811523, "learning_rate": 7.606046770221792e-07, "loss": 0.417, "step": 19038 }, { "epoch": 2.478328777821164, "grad_norm": 2.839232921600342, "learning_rate": 7.594970337093316e-07, "loss": 0.3179, "step": 19041 }, { "epoch": 2.4787192502928543, "grad_norm": 2.8499197959899902, "learning_rate": 7.583901312034331e-07, "loss": 0.367, "step": 19044 }, { "epoch": 2.4791097227645453, "grad_norm": 2.664505958557129, "learning_rate": 7.572839696978551e-07, "loss": 0.3077, "step": 19047 }, { "epoch": 2.479500195236236, "grad_norm": 2.8773906230926514, "learning_rate": 7.561785493858409e-07, "loss": 0.3265, "step": 19050 }, { "epoch": 2.4798906677079264, "grad_norm": 3.14871883392334, "learning_rate": 7.550738704605082e-07, "loss": 0.3526, "step": 19053 }, { "epoch": 2.4802811401796174, "grad_norm": 2.6195621490478516, "learning_rate": 7.539699331148398e-07, "loss": 0.3692, "step": 19056 }, { "epoch": 2.480671612651308, "grad_norm": 2.551382541656494, "learning_rate": 7.52866737541692e-07, "loss": 0.2936, "step": 19059 }, { "epoch": 2.481062085122999, "grad_norm": 2.6689510345458984, "learning_rate": 7.517642839337896e-07, "loss": 0.3484, "step": 19062 }, { "epoch": 2.4814525575946895, "grad_norm": 2.526609420776367, "learning_rate": 7.506625724837302e-07, "loss": 0.403, "step": 19065 }, { "epoch": 2.48184303006638, "grad_norm": 2.6223344802856445, "learning_rate": 7.495616033839808e-07, "loss": 0.3567, "step": 19068 }, { "epoch": 2.482233502538071, "grad_norm": 2.91428804397583, "learning_rate": 7.484613768268762e-07, "loss": 0.3981, "step": 19071 }, { "epoch": 2.4826239750097616, "grad_norm": 2.6756317615509033, "learning_rate": 7.473618930046267e-07, "loss": 0.3424, "step": 19074 }, { "epoch": 2.4830144474814526, "grad_norm": 2.6484274864196777, "learning_rate": 7.462631521093066e-07, "loss": 0.302, "step": 19077 }, { "epoch": 2.483404919953143, "grad_norm": 2.8635783195495605, "learning_rate": 7.451651543328664e-07, "loss": 0.3657, "step": 19080 }, { "epoch": 2.483795392424834, "grad_norm": 2.7862300872802734, "learning_rate": 7.440678998671219e-07, "loss": 0.3287, "step": 19083 }, { "epoch": 2.4841858648965247, "grad_norm": 2.5042202472686768, "learning_rate": 7.429713889037632e-07, "loss": 0.3273, "step": 19086 }, { "epoch": 2.4845763373682157, "grad_norm": 2.9982264041900635, "learning_rate": 7.418756216343475e-07, "loss": 0.4191, "step": 19089 }, { "epoch": 2.4849668098399063, "grad_norm": 2.7572524547576904, "learning_rate": 7.407805982503019e-07, "loss": 0.3332, "step": 19092 }, { "epoch": 2.485357282311597, "grad_norm": 2.9605908393859863, "learning_rate": 7.396863189429265e-07, "loss": 0.3352, "step": 19095 }, { "epoch": 2.485747754783288, "grad_norm": 2.8530290126800537, "learning_rate": 7.385927839033891e-07, "loss": 0.3067, "step": 19098 }, { "epoch": 2.4861382272549784, "grad_norm": 2.7384355068206787, "learning_rate": 7.374999933227261e-07, "loss": 0.365, "step": 19101 }, { "epoch": 2.4865286997266693, "grad_norm": 2.806218385696411, "learning_rate": 7.36407947391849e-07, "loss": 0.3437, "step": 19104 }, { "epoch": 2.48691917219836, "grad_norm": 2.798231363296509, "learning_rate": 7.353166463015338e-07, "loss": 0.3763, "step": 19107 }, { "epoch": 2.487309644670051, "grad_norm": 2.5547235012054443, "learning_rate": 7.342260902424292e-07, "loss": 0.3022, "step": 19110 }, { "epoch": 2.4877001171417414, "grad_norm": 2.6578516960144043, "learning_rate": 7.331362794050512e-07, "loss": 0.3696, "step": 19113 }, { "epoch": 2.4880905896134324, "grad_norm": 2.6646409034729004, "learning_rate": 7.320472139797902e-07, "loss": 0.3517, "step": 19116 }, { "epoch": 2.488481062085123, "grad_norm": 2.4392144680023193, "learning_rate": 7.309588941569018e-07, "loss": 0.308, "step": 19119 }, { "epoch": 2.4888715345568135, "grad_norm": 2.8057920932769775, "learning_rate": 7.298713201265145e-07, "loss": 0.4005, "step": 19122 }, { "epoch": 2.4892620070285045, "grad_norm": 2.7063517570495605, "learning_rate": 7.287844920786236e-07, "loss": 0.3574, "step": 19125 }, { "epoch": 2.489652479500195, "grad_norm": 2.536086082458496, "learning_rate": 7.276984102030976e-07, "loss": 0.3572, "step": 19128 }, { "epoch": 2.490042951971886, "grad_norm": 2.7980449199676514, "learning_rate": 7.266130746896722e-07, "loss": 0.4331, "step": 19131 }, { "epoch": 2.4904334244435766, "grad_norm": 2.7580485343933105, "learning_rate": 7.25528485727951e-07, "loss": 0.3337, "step": 19134 }, { "epoch": 2.4908238969152676, "grad_norm": 2.91790771484375, "learning_rate": 7.244446435074126e-07, "loss": 0.3567, "step": 19137 }, { "epoch": 2.491214369386958, "grad_norm": 2.8744025230407715, "learning_rate": 7.233615482174005e-07, "loss": 0.3997, "step": 19140 }, { "epoch": 2.491604841858649, "grad_norm": 2.7797534465789795, "learning_rate": 7.22279200047128e-07, "loss": 0.3623, "step": 19143 }, { "epoch": 2.4919953143303397, "grad_norm": 2.9023027420043945, "learning_rate": 7.211975991856812e-07, "loss": 0.3873, "step": 19146 }, { "epoch": 2.4923857868020303, "grad_norm": 2.7867956161499023, "learning_rate": 7.201167458220131e-07, "loss": 0.4055, "step": 19149 }, { "epoch": 2.4927762592737213, "grad_norm": 2.779937505722046, "learning_rate": 7.190366401449444e-07, "loss": 0.3783, "step": 19152 }, { "epoch": 2.493166731745412, "grad_norm": 2.892500400543213, "learning_rate": 7.179572823431702e-07, "loss": 0.3472, "step": 19155 }, { "epoch": 2.493557204217103, "grad_norm": 2.9717111587524414, "learning_rate": 7.168786726052501e-07, "loss": 0.3959, "step": 19158 }, { "epoch": 2.4939476766887934, "grad_norm": 3.3151369094848633, "learning_rate": 7.158008111196152e-07, "loss": 0.3453, "step": 19161 }, { "epoch": 2.4943381491604844, "grad_norm": 2.6525325775146484, "learning_rate": 7.147236980745653e-07, "loss": 0.2747, "step": 19164 }, { "epoch": 2.494728621632175, "grad_norm": 2.505096197128296, "learning_rate": 7.136473336582722e-07, "loss": 0.3938, "step": 19167 }, { "epoch": 2.495119094103866, "grad_norm": 3.1451778411865234, "learning_rate": 7.125717180587721e-07, "loss": 0.3658, "step": 19170 }, { "epoch": 2.4955095665755564, "grad_norm": 3.026200294494629, "learning_rate": 7.114968514639736e-07, "loss": 0.3788, "step": 19173 }, { "epoch": 2.495900039047247, "grad_norm": 2.9026551246643066, "learning_rate": 7.104227340616527e-07, "loss": 0.3708, "step": 19176 }, { "epoch": 2.496290511518938, "grad_norm": 2.9777603149414062, "learning_rate": 7.093493660394568e-07, "loss": 0.3116, "step": 19179 }, { "epoch": 2.4966809839906285, "grad_norm": 2.5679221153259277, "learning_rate": 7.082767475849011e-07, "loss": 0.3198, "step": 19182 }, { "epoch": 2.4970714564623195, "grad_norm": 2.768268585205078, "learning_rate": 7.072048788853675e-07, "loss": 0.4067, "step": 19185 }, { "epoch": 2.49746192893401, "grad_norm": 2.612443447113037, "learning_rate": 7.061337601281121e-07, "loss": 0.3287, "step": 19188 }, { "epoch": 2.497852401405701, "grad_norm": 2.776230573654175, "learning_rate": 7.050633915002559e-07, "loss": 0.3972, "step": 19191 }, { "epoch": 2.4982428738773916, "grad_norm": 2.519664764404297, "learning_rate": 7.039937731887886e-07, "loss": 0.3023, "step": 19194 }, { "epoch": 2.4986333463490826, "grad_norm": 3.676971197128296, "learning_rate": 7.029249053805731e-07, "loss": 0.398, "step": 19197 }, { "epoch": 2.499023818820773, "grad_norm": 2.587867021560669, "learning_rate": 7.018567882623368e-07, "loss": 0.3471, "step": 19200 }, { "epoch": 2.4994142912924637, "grad_norm": 3.008563280105591, "learning_rate": 7.007894220206763e-07, "loss": 0.3319, "step": 19203 }, { "epoch": 2.4998047637641547, "grad_norm": 3.0095622539520264, "learning_rate": 6.997228068420597e-07, "loss": 0.299, "step": 19206 }, { "epoch": 2.5001952362358453, "grad_norm": 3.0652036666870117, "learning_rate": 6.986569429128238e-07, "loss": 0.4295, "step": 19209 }, { "epoch": 2.5005857087075363, "grad_norm": 2.4899280071258545, "learning_rate": 6.975918304191709e-07, "loss": 0.4072, "step": 19212 }, { "epoch": 2.500976181179227, "grad_norm": 2.737656593322754, "learning_rate": 6.965274695471729e-07, "loss": 0.392, "step": 19215 }, { "epoch": 2.5013666536509174, "grad_norm": 2.7664875984191895, "learning_rate": 6.954638604827741e-07, "loss": 0.2942, "step": 19218 }, { "epoch": 2.5017571261226084, "grad_norm": 2.665586233139038, "learning_rate": 6.944010034117837e-07, "loss": 0.3066, "step": 19221 }, { "epoch": 2.5021475985942994, "grad_norm": 2.6148107051849365, "learning_rate": 6.933388985198802e-07, "loss": 0.3325, "step": 19224 }, { "epoch": 2.50253807106599, "grad_norm": 2.9085710048675537, "learning_rate": 6.922775459926101e-07, "loss": 0.3846, "step": 19227 }, { "epoch": 2.5029285435376805, "grad_norm": 2.714949369430542, "learning_rate": 6.912169460153917e-07, "loss": 0.3562, "step": 19230 }, { "epoch": 2.5033190160093715, "grad_norm": 2.8644955158233643, "learning_rate": 6.901570987735085e-07, "loss": 0.2756, "step": 19233 }, { "epoch": 2.503709488481062, "grad_norm": 2.9508283138275146, "learning_rate": 6.890980044521123e-07, "loss": 0.4106, "step": 19236 }, { "epoch": 2.504099960952753, "grad_norm": 2.899592161178589, "learning_rate": 6.880396632362268e-07, "loss": 0.3553, "step": 19239 }, { "epoch": 2.5044904334244436, "grad_norm": 2.7175285816192627, "learning_rate": 6.869820753107415e-07, "loss": 0.3397, "step": 19242 }, { "epoch": 2.504880905896134, "grad_norm": 2.8348207473754883, "learning_rate": 6.859252408604134e-07, "loss": 0.4374, "step": 19245 }, { "epoch": 2.505271378367825, "grad_norm": 2.8067331314086914, "learning_rate": 6.848691600698698e-07, "loss": 0.3312, "step": 19248 }, { "epoch": 2.505661850839516, "grad_norm": 3.3372321128845215, "learning_rate": 6.838138331236077e-07, "loss": 0.3608, "step": 19251 }, { "epoch": 2.5060523233112066, "grad_norm": 2.9257891178131104, "learning_rate": 6.827592602059891e-07, "loss": 0.3447, "step": 19254 }, { "epoch": 2.506442795782897, "grad_norm": 3.0877225399017334, "learning_rate": 6.817054415012441e-07, "loss": 0.3567, "step": 19257 }, { "epoch": 2.506833268254588, "grad_norm": 3.7706382274627686, "learning_rate": 6.806523771934753e-07, "loss": 0.3516, "step": 19260 }, { "epoch": 2.5072237407262787, "grad_norm": 2.557373285293579, "learning_rate": 6.796000674666498e-07, "loss": 0.3081, "step": 19263 }, { "epoch": 2.5076142131979697, "grad_norm": 2.9034101963043213, "learning_rate": 6.785485125046037e-07, "loss": 0.3248, "step": 19266 }, { "epoch": 2.5080046856696603, "grad_norm": 2.5061423778533936, "learning_rate": 6.774977124910398e-07, "loss": 0.328, "step": 19269 }, { "epoch": 2.508395158141351, "grad_norm": 2.783360481262207, "learning_rate": 6.764476676095344e-07, "loss": 0.3268, "step": 19272 }, { "epoch": 2.508785630613042, "grad_norm": 2.8032476902008057, "learning_rate": 6.753983780435253e-07, "loss": 0.3779, "step": 19275 }, { "epoch": 2.5091761030847324, "grad_norm": 3.029937267303467, "learning_rate": 6.743498439763213e-07, "loss": 0.356, "step": 19278 }, { "epoch": 2.5095665755564234, "grad_norm": 2.802793264389038, "learning_rate": 6.733020655911004e-07, "loss": 0.3366, "step": 19281 }, { "epoch": 2.509957048028114, "grad_norm": 2.936530590057373, "learning_rate": 6.722550430709068e-07, "loss": 0.3973, "step": 19284 }, { "epoch": 2.510347520499805, "grad_norm": 2.8553144931793213, "learning_rate": 6.71208776598652e-07, "loss": 0.3852, "step": 19287 }, { "epoch": 2.5107379929714955, "grad_norm": 2.726348400115967, "learning_rate": 6.701632663571172e-07, "loss": 0.3928, "step": 19290 }, { "epoch": 2.5111284654431865, "grad_norm": 3.0208041667938232, "learning_rate": 6.691185125289523e-07, "loss": 0.4231, "step": 19293 }, { "epoch": 2.511518937914877, "grad_norm": 2.8433890342712402, "learning_rate": 6.680745152966722e-07, "loss": 0.3642, "step": 19296 }, { "epoch": 2.5119094103865676, "grad_norm": 2.9164557456970215, "learning_rate": 6.670312748426605e-07, "loss": 0.3276, "step": 19299 }, { "epoch": 2.5122998828582586, "grad_norm": 2.7807259559631348, "learning_rate": 6.659887913491709e-07, "loss": 0.4351, "step": 19302 }, { "epoch": 2.512690355329949, "grad_norm": 2.888089179992676, "learning_rate": 6.64947064998322e-07, "loss": 0.354, "step": 19305 }, { "epoch": 2.51308082780164, "grad_norm": 2.8115479946136475, "learning_rate": 6.639060959721e-07, "loss": 0.3543, "step": 19308 }, { "epoch": 2.5134713002733307, "grad_norm": 2.8642683029174805, "learning_rate": 6.628658844523622e-07, "loss": 0.303, "step": 19311 }, { "epoch": 2.513861772745021, "grad_norm": 2.651221752166748, "learning_rate": 6.618264306208305e-07, "loss": 0.3848, "step": 19314 }, { "epoch": 2.514252245216712, "grad_norm": 3.1669631004333496, "learning_rate": 6.607877346590958e-07, "loss": 0.3538, "step": 19317 }, { "epoch": 2.514642717688403, "grad_norm": 2.709857225418091, "learning_rate": 6.597497967486139e-07, "loss": 0.4179, "step": 19320 }, { "epoch": 2.5150331901600937, "grad_norm": 2.7595081329345703, "learning_rate": 6.587126170707125e-07, "loss": 0.3483, "step": 19323 }, { "epoch": 2.5154236626317843, "grad_norm": 2.8310909271240234, "learning_rate": 6.576761958065847e-07, "loss": 0.3987, "step": 19326 }, { "epoch": 2.5158141351034753, "grad_norm": 2.7588865756988525, "learning_rate": 6.566405331372899e-07, "loss": 0.3472, "step": 19329 }, { "epoch": 2.516204607575166, "grad_norm": 2.9078924655914307, "learning_rate": 6.556056292437563e-07, "loss": 0.4267, "step": 19332 }, { "epoch": 2.516595080046857, "grad_norm": 2.8975186347961426, "learning_rate": 6.545714843067813e-07, "loss": 0.392, "step": 19335 }, { "epoch": 2.5169855525185474, "grad_norm": 2.952500581741333, "learning_rate": 6.535380985070261e-07, "loss": 0.3441, "step": 19338 }, { "epoch": 2.517376024990238, "grad_norm": 2.9215312004089355, "learning_rate": 6.525054720250207e-07, "loss": 0.4316, "step": 19341 }, { "epoch": 2.517766497461929, "grad_norm": 2.8318424224853516, "learning_rate": 6.514736050411652e-07, "loss": 0.3531, "step": 19344 }, { "epoch": 2.51815696993362, "grad_norm": 3.1043033599853516, "learning_rate": 6.504424977357221e-07, "loss": 0.4214, "step": 19347 }, { "epoch": 2.5185474424053105, "grad_norm": 2.7383644580841064, "learning_rate": 6.49412150288824e-07, "loss": 0.3557, "step": 19350 }, { "epoch": 2.518937914877001, "grad_norm": 2.9225306510925293, "learning_rate": 6.483825628804719e-07, "loss": 0.3435, "step": 19353 }, { "epoch": 2.519328387348692, "grad_norm": 2.5163514614105225, "learning_rate": 6.473537356905313e-07, "loss": 0.3363, "step": 19356 }, { "epoch": 2.5197188598203826, "grad_norm": 2.8150081634521484, "learning_rate": 6.463256688987357e-07, "loss": 0.3409, "step": 19359 }, { "epoch": 2.5201093322920736, "grad_norm": 2.6332247257232666, "learning_rate": 6.452983626846876e-07, "loss": 0.3493, "step": 19362 }, { "epoch": 2.520499804763764, "grad_norm": 2.6498818397521973, "learning_rate": 6.44271817227855e-07, "loss": 0.3274, "step": 19365 }, { "epoch": 2.5208902772354547, "grad_norm": 3.2936317920684814, "learning_rate": 6.432460327075723e-07, "loss": 0.3614, "step": 19368 }, { "epoch": 2.5212807497071457, "grad_norm": 2.7875030040740967, "learning_rate": 6.422210093030407e-07, "loss": 0.4025, "step": 19371 }, { "epoch": 2.5216712221788367, "grad_norm": 2.455900192260742, "learning_rate": 6.41196747193334e-07, "loss": 0.3627, "step": 19374 }, { "epoch": 2.522061694650527, "grad_norm": 2.7226195335388184, "learning_rate": 6.401732465573851e-07, "loss": 0.335, "step": 19377 }, { "epoch": 2.5224521671222178, "grad_norm": 2.6656506061553955, "learning_rate": 6.391505075739984e-07, "loss": 0.3006, "step": 19380 }, { "epoch": 2.5228426395939088, "grad_norm": 2.610862970352173, "learning_rate": 6.381285304218437e-07, "loss": 0.3197, "step": 19383 }, { "epoch": 2.5232331120655993, "grad_norm": 2.577313184738159, "learning_rate": 6.371073152794593e-07, "loss": 0.3171, "step": 19386 }, { "epoch": 2.5236235845372903, "grad_norm": 2.949810028076172, "learning_rate": 6.360868623252486e-07, "loss": 0.3888, "step": 19389 }, { "epoch": 2.524014057008981, "grad_norm": 3.948436975479126, "learning_rate": 6.350671717374818e-07, "loss": 0.3426, "step": 19392 }, { "epoch": 2.5244045294806714, "grad_norm": 2.535118341445923, "learning_rate": 6.340482436942991e-07, "loss": 0.3528, "step": 19395 }, { "epoch": 2.5247950019523624, "grad_norm": 2.7280428409576416, "learning_rate": 6.330300783737031e-07, "loss": 0.4199, "step": 19398 }, { "epoch": 2.525185474424053, "grad_norm": 2.778590440750122, "learning_rate": 6.320126759535645e-07, "loss": 0.3757, "step": 19401 }, { "epoch": 2.525575946895744, "grad_norm": 2.919978141784668, "learning_rate": 6.309960366116242e-07, "loss": 0.3648, "step": 19404 }, { "epoch": 2.5259664193674345, "grad_norm": 2.7053189277648926, "learning_rate": 6.299801605254846e-07, "loss": 0.3384, "step": 19407 }, { "epoch": 2.5263568918391255, "grad_norm": 2.8377797603607178, "learning_rate": 6.289650478726167e-07, "loss": 0.2978, "step": 19410 }, { "epoch": 2.526747364310816, "grad_norm": 2.6428401470184326, "learning_rate": 6.279506988303602e-07, "loss": 0.3359, "step": 19413 }, { "epoch": 2.527137836782507, "grad_norm": 2.710667610168457, "learning_rate": 6.2693711357592e-07, "loss": 0.3566, "step": 19416 }, { "epoch": 2.5275283092541976, "grad_norm": 2.882359504699707, "learning_rate": 6.259242922863662e-07, "loss": 0.3227, "step": 19419 }, { "epoch": 2.527918781725888, "grad_norm": 2.9712507724761963, "learning_rate": 6.249122351386361e-07, "loss": 0.4212, "step": 19422 }, { "epoch": 2.528309254197579, "grad_norm": 2.5499699115753174, "learning_rate": 6.239009423095355e-07, "loss": 0.3462, "step": 19425 }, { "epoch": 2.5286997266692697, "grad_norm": 3.2019617557525635, "learning_rate": 6.228904139757347e-07, "loss": 0.3946, "step": 19428 }, { "epoch": 2.5290901991409607, "grad_norm": 2.812934398651123, "learning_rate": 6.218806503137697e-07, "loss": 0.3762, "step": 19431 }, { "epoch": 2.529480671612651, "grad_norm": 2.723848581314087, "learning_rate": 6.208716515000446e-07, "loss": 0.377, "step": 19434 }, { "epoch": 2.529871144084342, "grad_norm": 2.7258222103118896, "learning_rate": 6.198634177108303e-07, "loss": 0.3686, "step": 19437 }, { "epoch": 2.5302616165560328, "grad_norm": 2.727672815322876, "learning_rate": 6.188559491222628e-07, "loss": 0.4371, "step": 19440 }, { "epoch": 2.5306520890277238, "grad_norm": 2.907092809677124, "learning_rate": 6.17849245910343e-07, "loss": 0.3584, "step": 19443 }, { "epoch": 2.5310425614994143, "grad_norm": 2.331328868865967, "learning_rate": 6.168433082509423e-07, "loss": 0.3078, "step": 19446 }, { "epoch": 2.531433033971105, "grad_norm": 3.0786802768707275, "learning_rate": 6.158381363197951e-07, "loss": 0.384, "step": 19449 }, { "epoch": 2.531823506442796, "grad_norm": 2.793458938598633, "learning_rate": 6.148337302925011e-07, "loss": 0.3512, "step": 19452 }, { "epoch": 2.5322139789144864, "grad_norm": 2.8592000007629395, "learning_rate": 6.138300903445299e-07, "loss": 0.3117, "step": 19455 }, { "epoch": 2.5326044513861774, "grad_norm": 2.7495779991149902, "learning_rate": 6.128272166512156e-07, "loss": 0.3858, "step": 19458 }, { "epoch": 2.532994923857868, "grad_norm": 2.65899920463562, "learning_rate": 6.11825109387757e-07, "loss": 0.3549, "step": 19461 }, { "epoch": 2.5333853963295585, "grad_norm": 2.677633285522461, "learning_rate": 6.108237687292202e-07, "loss": 0.3314, "step": 19464 }, { "epoch": 2.5337758688012495, "grad_norm": 2.7956135272979736, "learning_rate": 6.098231948505379e-07, "loss": 0.4367, "step": 19467 }, { "epoch": 2.5341663412729405, "grad_norm": 2.5864150524139404, "learning_rate": 6.088233879265076e-07, "loss": 0.3262, "step": 19470 }, { "epoch": 2.534556813744631, "grad_norm": 2.944986343383789, "learning_rate": 6.078243481317931e-07, "loss": 0.4178, "step": 19473 }, { "epoch": 2.5349472862163216, "grad_norm": 2.8859469890594482, "learning_rate": 6.068260756409261e-07, "loss": 0.443, "step": 19476 }, { "epoch": 2.5353377586880126, "grad_norm": 2.6689913272857666, "learning_rate": 6.058285706283023e-07, "loss": 0.331, "step": 19479 }, { "epoch": 2.535728231159703, "grad_norm": 2.97873592376709, "learning_rate": 6.048318332681824e-07, "loss": 0.3991, "step": 19482 }, { "epoch": 2.536118703631394, "grad_norm": 2.8189284801483154, "learning_rate": 6.038358637346947e-07, "loss": 0.3456, "step": 19485 }, { "epoch": 2.5365091761030847, "grad_norm": 2.695420026779175, "learning_rate": 6.028406622018346e-07, "loss": 0.3596, "step": 19488 }, { "epoch": 2.5368996485747752, "grad_norm": 2.9843835830688477, "learning_rate": 6.018462288434601e-07, "loss": 0.3328, "step": 19491 }, { "epoch": 2.5372901210464662, "grad_norm": 2.7242064476013184, "learning_rate": 6.008525638332963e-07, "loss": 0.4317, "step": 19494 }, { "epoch": 2.5376805935181572, "grad_norm": 2.6902647018432617, "learning_rate": 5.998596673449348e-07, "loss": 0.3795, "step": 19497 }, { "epoch": 2.5380710659898478, "grad_norm": 3.0032718181610107, "learning_rate": 5.98867539551834e-07, "loss": 0.3836, "step": 19500 }, { "epoch": 2.5384615384615383, "grad_norm": 3.2121152877807617, "learning_rate": 5.978761806273159e-07, "loss": 0.3354, "step": 19503 }, { "epoch": 2.5388520109332293, "grad_norm": 3.1183557510375977, "learning_rate": 5.968855907445669e-07, "loss": 0.2782, "step": 19506 }, { "epoch": 2.53924248340492, "grad_norm": 2.6611621379852295, "learning_rate": 5.958957700766432e-07, "loss": 0.3565, "step": 19509 }, { "epoch": 2.539632955876611, "grad_norm": 3.2292160987854004, "learning_rate": 5.949067187964642e-07, "loss": 0.3928, "step": 19512 }, { "epoch": 2.5400234283483014, "grad_norm": 2.88360595703125, "learning_rate": 5.939184370768131e-07, "loss": 0.3521, "step": 19515 }, { "epoch": 2.540413900819992, "grad_norm": 2.851783037185669, "learning_rate": 5.929309250903425e-07, "loss": 0.3978, "step": 19518 }, { "epoch": 2.540804373291683, "grad_norm": 2.560197114944458, "learning_rate": 5.91944183009569e-07, "loss": 0.3088, "step": 19521 }, { "epoch": 2.541194845763374, "grad_norm": 3.081719398498535, "learning_rate": 5.909582110068724e-07, "loss": 0.3519, "step": 19524 }, { "epoch": 2.5415853182350645, "grad_norm": 2.6352970600128174, "learning_rate": 5.899730092545014e-07, "loss": 0.3859, "step": 19527 }, { "epoch": 2.541975790706755, "grad_norm": 2.6711513996124268, "learning_rate": 5.889885779245691e-07, "loss": 0.3733, "step": 19530 }, { "epoch": 2.542366263178446, "grad_norm": 2.8868939876556396, "learning_rate": 5.880049171890523e-07, "loss": 0.3391, "step": 19533 }, { "epoch": 2.5427567356501366, "grad_norm": 2.6922616958618164, "learning_rate": 5.870220272197941e-07, "loss": 0.3535, "step": 19536 }, { "epoch": 2.5431472081218276, "grad_norm": 2.6204869747161865, "learning_rate": 5.860399081885043e-07, "loss": 0.3502, "step": 19539 }, { "epoch": 2.543537680593518, "grad_norm": 2.710123062133789, "learning_rate": 5.85058560266758e-07, "loss": 0.3269, "step": 19542 }, { "epoch": 2.5439281530652087, "grad_norm": 2.685530424118042, "learning_rate": 5.840779836259936e-07, "loss": 0.3666, "step": 19545 }, { "epoch": 2.5443186255368997, "grad_norm": 3.140651226043701, "learning_rate": 5.830981784375145e-07, "loss": 0.3366, "step": 19548 }, { "epoch": 2.5447090980085902, "grad_norm": 2.9641339778900146, "learning_rate": 5.821191448724934e-07, "loss": 0.3882, "step": 19551 }, { "epoch": 2.5450995704802812, "grad_norm": 2.5823094844818115, "learning_rate": 5.811408831019633e-07, "loss": 0.3802, "step": 19554 }, { "epoch": 2.545490042951972, "grad_norm": 3.241447925567627, "learning_rate": 5.801633932968237e-07, "loss": 0.4017, "step": 19557 }, { "epoch": 2.545880515423663, "grad_norm": 2.6996021270751953, "learning_rate": 5.79186675627843e-07, "loss": 0.3679, "step": 19560 }, { "epoch": 2.5462709878953533, "grad_norm": 3.129182815551758, "learning_rate": 5.782107302656497e-07, "loss": 0.4384, "step": 19563 }, { "epoch": 2.5466614603670443, "grad_norm": 2.9826695919036865, "learning_rate": 5.772355573807386e-07, "loss": 0.392, "step": 19566 }, { "epoch": 2.547051932838735, "grad_norm": 2.7523341178894043, "learning_rate": 5.762611571434729e-07, "loss": 0.337, "step": 19569 }, { "epoch": 2.5474424053104254, "grad_norm": 2.7614572048187256, "learning_rate": 5.752875297240762e-07, "loss": 0.3406, "step": 19572 }, { "epoch": 2.5478328777821164, "grad_norm": 3.031256675720215, "learning_rate": 5.743146752926404e-07, "loss": 0.3894, "step": 19575 }, { "epoch": 2.548223350253807, "grad_norm": 2.6525719165802, "learning_rate": 5.733425940191196e-07, "loss": 0.3594, "step": 19578 }, { "epoch": 2.548613822725498, "grad_norm": 3.2900519371032715, "learning_rate": 5.723712860733349e-07, "loss": 0.4027, "step": 19581 }, { "epoch": 2.5490042951971885, "grad_norm": 2.974825620651245, "learning_rate": 5.714007516249731e-07, "loss": 0.3328, "step": 19584 }, { "epoch": 2.549394767668879, "grad_norm": 3.1361944675445557, "learning_rate": 5.704309908435829e-07, "loss": 0.3499, "step": 19587 }, { "epoch": 2.54978524014057, "grad_norm": 2.7374889850616455, "learning_rate": 5.69462003898581e-07, "loss": 0.3019, "step": 19590 }, { "epoch": 2.550175712612261, "grad_norm": 2.7288341522216797, "learning_rate": 5.684937909592464e-07, "loss": 0.2799, "step": 19593 }, { "epoch": 2.5505661850839516, "grad_norm": 2.551090717315674, "learning_rate": 5.675263521947244e-07, "loss": 0.3312, "step": 19596 }, { "epoch": 2.550956657555642, "grad_norm": 2.9384982585906982, "learning_rate": 5.665596877740226e-07, "loss": 0.346, "step": 19599 }, { "epoch": 2.551347130027333, "grad_norm": 2.4525489807128906, "learning_rate": 5.655937978660181e-07, "loss": 0.267, "step": 19602 }, { "epoch": 2.5517376024990237, "grad_norm": 3.117671251296997, "learning_rate": 5.646286826394487e-07, "loss": 0.393, "step": 19605 }, { "epoch": 2.5521280749707147, "grad_norm": 2.629549264907837, "learning_rate": 5.636643422629162e-07, "loss": 0.341, "step": 19608 }, { "epoch": 2.5525185474424053, "grad_norm": 2.5423882007598877, "learning_rate": 5.62700776904892e-07, "loss": 0.3133, "step": 19611 }, { "epoch": 2.552909019914096, "grad_norm": 2.8479764461517334, "learning_rate": 5.617379867337069e-07, "loss": 0.3234, "step": 19614 }, { "epoch": 2.553299492385787, "grad_norm": 2.5962421894073486, "learning_rate": 5.607759719175581e-07, "loss": 0.3422, "step": 19617 }, { "epoch": 2.553689964857478, "grad_norm": 2.9569382667541504, "learning_rate": 5.59814732624509e-07, "loss": 0.3549, "step": 19620 }, { "epoch": 2.5540804373291683, "grad_norm": 2.6492676734924316, "learning_rate": 5.588542690224847e-07, "loss": 0.3782, "step": 19623 }, { "epoch": 2.554470909800859, "grad_norm": 2.628129482269287, "learning_rate": 5.578945812792774e-07, "loss": 0.2951, "step": 19626 }, { "epoch": 2.55486138227255, "grad_norm": 2.4740099906921387, "learning_rate": 5.569356695625411e-07, "loss": 0.2915, "step": 19629 }, { "epoch": 2.5552518547442404, "grad_norm": 2.6669182777404785, "learning_rate": 5.559775340397972e-07, "loss": 0.353, "step": 19632 }, { "epoch": 2.5556423272159314, "grad_norm": 3.07163667678833, "learning_rate": 5.550201748784295e-07, "loss": 0.3871, "step": 19635 }, { "epoch": 2.556032799687622, "grad_norm": 3.0605452060699463, "learning_rate": 5.54063592245686e-07, "loss": 0.3861, "step": 19638 }, { "epoch": 2.5564232721593125, "grad_norm": 3.2628283500671387, "learning_rate": 5.531077863086798e-07, "loss": 0.3629, "step": 19641 }, { "epoch": 2.5568137446310035, "grad_norm": 2.872985601425171, "learning_rate": 5.521527572343888e-07, "loss": 0.3232, "step": 19644 }, { "epoch": 2.5572042171026945, "grad_norm": 2.832007884979248, "learning_rate": 5.511985051896546e-07, "loss": 0.3507, "step": 19647 }, { "epoch": 2.557594689574385, "grad_norm": 3.344686985015869, "learning_rate": 5.502450303411816e-07, "loss": 0.2877, "step": 19650 }, { "epoch": 2.5579851620460756, "grad_norm": 2.880563974380493, "learning_rate": 5.492923328555416e-07, "loss": 0.3184, "step": 19653 }, { "epoch": 2.5583756345177666, "grad_norm": 2.620983839035034, "learning_rate": 5.483404128991682e-07, "loss": 0.3796, "step": 19656 }, { "epoch": 2.558766106989457, "grad_norm": 2.6286096572875977, "learning_rate": 5.473892706383587e-07, "loss": 0.3424, "step": 19659 }, { "epoch": 2.559156579461148, "grad_norm": 2.9008336067199707, "learning_rate": 5.464389062392783e-07, "loss": 0.3349, "step": 19662 }, { "epoch": 2.5595470519328387, "grad_norm": 2.709076404571533, "learning_rate": 5.454893198679507e-07, "loss": 0.3707, "step": 19665 }, { "epoch": 2.5599375244045293, "grad_norm": 3.218621253967285, "learning_rate": 5.445405116902686e-07, "loss": 0.3714, "step": 19668 }, { "epoch": 2.5603279968762203, "grad_norm": 2.944977283477783, "learning_rate": 5.435924818719857e-07, "loss": 0.3407, "step": 19671 }, { "epoch": 2.560718469347911, "grad_norm": 2.735933303833008, "learning_rate": 5.426452305787222e-07, "loss": 0.3715, "step": 19674 }, { "epoch": 2.561108941819602, "grad_norm": 2.785769462585449, "learning_rate": 5.416987579759597e-07, "loss": 0.3397, "step": 19677 }, { "epoch": 2.5614994142912924, "grad_norm": 3.0471670627593994, "learning_rate": 5.407530642290442e-07, "loss": 0.3995, "step": 19680 }, { "epoch": 2.5618898867629833, "grad_norm": 2.528057336807251, "learning_rate": 5.398081495031893e-07, "loss": 0.3123, "step": 19683 }, { "epoch": 2.562280359234674, "grad_norm": 2.9553000926971436, "learning_rate": 5.388640139634671e-07, "loss": 0.3747, "step": 19686 }, { "epoch": 2.562670831706365, "grad_norm": 2.6886420249938965, "learning_rate": 5.37920657774817e-07, "loss": 0.3329, "step": 19689 }, { "epoch": 2.5630613041780554, "grad_norm": 2.6885476112365723, "learning_rate": 5.369780811020403e-07, "loss": 0.318, "step": 19692 }, { "epoch": 2.563451776649746, "grad_norm": 2.7424001693725586, "learning_rate": 5.360362841098043e-07, "loss": 0.3761, "step": 19695 }, { "epoch": 2.563842249121437, "grad_norm": 2.803973913192749, "learning_rate": 5.350952669626397e-07, "loss": 0.3694, "step": 19698 }, { "epoch": 2.5642327215931275, "grad_norm": 2.916654109954834, "learning_rate": 5.341550298249376e-07, "loss": 0.4022, "step": 19701 }, { "epoch": 2.5646231940648185, "grad_norm": 3.0839455127716064, "learning_rate": 5.332155728609578e-07, "loss": 0.3461, "step": 19704 }, { "epoch": 2.565013666536509, "grad_norm": 2.8296546936035156, "learning_rate": 5.322768962348201e-07, "loss": 0.3642, "step": 19707 }, { "epoch": 2.5654041390082, "grad_norm": 2.6688482761383057, "learning_rate": 5.313390001105106e-07, "loss": 0.3447, "step": 19710 }, { "epoch": 2.5657946114798906, "grad_norm": 2.6866798400878906, "learning_rate": 5.304018846518765e-07, "loss": 0.3273, "step": 19713 }, { "epoch": 2.5661850839515816, "grad_norm": 2.8025333881378174, "learning_rate": 5.294655500226315e-07, "loss": 0.35, "step": 19716 }, { "epoch": 2.566575556423272, "grad_norm": 2.8305671215057373, "learning_rate": 5.285299963863499e-07, "loss": 0.3589, "step": 19719 }, { "epoch": 2.5669660288949627, "grad_norm": 2.790302276611328, "learning_rate": 5.275952239064708e-07, "loss": 0.2947, "step": 19722 }, { "epoch": 2.5673565013666537, "grad_norm": 2.739863872528076, "learning_rate": 5.266612327462978e-07, "loss": 0.3707, "step": 19725 }, { "epoch": 2.5677469738383443, "grad_norm": 2.8916966915130615, "learning_rate": 5.257280230689976e-07, "loss": 0.373, "step": 19728 }, { "epoch": 2.5681374463100353, "grad_norm": 2.925980567932129, "learning_rate": 5.247955950375977e-07, "loss": 0.3426, "step": 19731 }, { "epoch": 2.568527918781726, "grad_norm": 2.7908196449279785, "learning_rate": 5.238639488149944e-07, "loss": 0.3189, "step": 19734 }, { "epoch": 2.5689183912534164, "grad_norm": 2.8355512619018555, "learning_rate": 5.229330845639424e-07, "loss": 0.3587, "step": 19737 }, { "epoch": 2.5693088637251074, "grad_norm": 2.771878480911255, "learning_rate": 5.220030024470623e-07, "loss": 0.3447, "step": 19740 }, { "epoch": 2.5696993361967984, "grad_norm": 2.8163154125213623, "learning_rate": 5.210737026268364e-07, "loss": 0.3142, "step": 19743 }, { "epoch": 2.570089808668489, "grad_norm": 2.9766600131988525, "learning_rate": 5.201451852656137e-07, "loss": 0.396, "step": 19746 }, { "epoch": 2.5704802811401795, "grad_norm": 2.753567934036255, "learning_rate": 5.192174505256014e-07, "loss": 0.3244, "step": 19749 }, { "epoch": 2.5708707536118705, "grad_norm": 2.696614980697632, "learning_rate": 5.182904985688758e-07, "loss": 0.3593, "step": 19752 }, { "epoch": 2.571261226083561, "grad_norm": 3.015610456466675, "learning_rate": 5.173643295573704e-07, "loss": 0.3158, "step": 19755 }, { "epoch": 2.571651698555252, "grad_norm": 2.938066244125366, "learning_rate": 5.164389436528877e-07, "loss": 0.3686, "step": 19758 }, { "epoch": 2.5720421710269425, "grad_norm": 2.7356908321380615, "learning_rate": 5.155143410170899e-07, "loss": 0.3215, "step": 19761 }, { "epoch": 2.572432643498633, "grad_norm": 2.872227668762207, "learning_rate": 5.14590521811501e-07, "loss": 0.4187, "step": 19764 }, { "epoch": 2.572823115970324, "grad_norm": 2.6312549114227295, "learning_rate": 5.136674861975138e-07, "loss": 0.3705, "step": 19767 }, { "epoch": 2.573213588442015, "grad_norm": 3.510446310043335, "learning_rate": 5.127452343363787e-07, "loss": 0.3801, "step": 19770 }, { "epoch": 2.5736040609137056, "grad_norm": 2.8721954822540283, "learning_rate": 5.118237663892101e-07, "loss": 0.2959, "step": 19773 }, { "epoch": 2.573994533385396, "grad_norm": 2.6207385063171387, "learning_rate": 5.109030825169886e-07, "loss": 0.3646, "step": 19776 }, { "epoch": 2.574385005857087, "grad_norm": 2.5428993701934814, "learning_rate": 5.099831828805552e-07, "loss": 0.329, "step": 19779 }, { "epoch": 2.5747754783287777, "grad_norm": 2.899235725402832, "learning_rate": 5.090640676406134e-07, "loss": 0.3592, "step": 19782 }, { "epoch": 2.5751659508004687, "grad_norm": 2.9908111095428467, "learning_rate": 5.081457369577319e-07, "loss": 0.4394, "step": 19785 }, { "epoch": 2.5755564232721593, "grad_norm": 2.572739601135254, "learning_rate": 5.07228190992341e-07, "loss": 0.3404, "step": 19788 }, { "epoch": 2.57594689574385, "grad_norm": 2.801175117492676, "learning_rate": 5.063114299047328e-07, "loss": 0.3375, "step": 19791 }, { "epoch": 2.576337368215541, "grad_norm": 2.8451387882232666, "learning_rate": 5.053954538550643e-07, "loss": 0.3875, "step": 19794 }, { "epoch": 2.576727840687232, "grad_norm": 2.8498640060424805, "learning_rate": 5.044802630033557e-07, "loss": 0.4177, "step": 19797 }, { "epoch": 2.5771183131589224, "grad_norm": 3.0200397968292236, "learning_rate": 5.035658575094882e-07, "loss": 0.3213, "step": 19800 }, { "epoch": 2.577508785630613, "grad_norm": 3.0799405574798584, "learning_rate": 5.02652237533206e-07, "loss": 0.4103, "step": 19803 }, { "epoch": 2.577899258102304, "grad_norm": 2.6357455253601074, "learning_rate": 5.017394032341161e-07, "loss": 0.2962, "step": 19806 }, { "epoch": 2.5782897305739945, "grad_norm": 2.754474401473999, "learning_rate": 5.008273547716902e-07, "loss": 0.3561, "step": 19809 }, { "epoch": 2.5786802030456855, "grad_norm": 2.776348829269409, "learning_rate": 4.999160923052604e-07, "loss": 0.3432, "step": 19812 }, { "epoch": 2.579070675517376, "grad_norm": 2.6089048385620117, "learning_rate": 4.990056159940221e-07, "loss": 0.3669, "step": 19815 }, { "epoch": 2.5794611479890666, "grad_norm": 2.7450947761535645, "learning_rate": 4.980959259970347e-07, "loss": 0.3924, "step": 19818 }, { "epoch": 2.5798516204607576, "grad_norm": 2.4832842350006104, "learning_rate": 4.971870224732184e-07, "loss": 0.3488, "step": 19821 }, { "epoch": 2.580242092932448, "grad_norm": 3.0447306632995605, "learning_rate": 4.962789055813555e-07, "loss": 0.3573, "step": 19824 }, { "epoch": 2.580632565404139, "grad_norm": 2.752413272857666, "learning_rate": 4.953715754800947e-07, "loss": 0.3524, "step": 19827 }, { "epoch": 2.5810230378758297, "grad_norm": 2.7045111656188965, "learning_rate": 4.944650323279432e-07, "loss": 0.3342, "step": 19830 }, { "epoch": 2.5814135103475206, "grad_norm": 2.5593090057373047, "learning_rate": 4.935592762832714e-07, "loss": 0.3024, "step": 19833 }, { "epoch": 2.581803982819211, "grad_norm": 2.776071786880493, "learning_rate": 4.926543075043133e-07, "loss": 0.3213, "step": 19836 }, { "epoch": 2.582194455290902, "grad_norm": 2.6728298664093018, "learning_rate": 4.917501261491675e-07, "loss": 0.3532, "step": 19839 }, { "epoch": 2.5825849277625927, "grad_norm": 2.9024364948272705, "learning_rate": 4.908467323757898e-07, "loss": 0.3839, "step": 19842 }, { "epoch": 2.5829754002342833, "grad_norm": 2.7096471786499023, "learning_rate": 4.899441263420019e-07, "loss": 0.3769, "step": 19845 }, { "epoch": 2.5833658727059743, "grad_norm": 2.882371425628662, "learning_rate": 4.890423082054879e-07, "loss": 0.3751, "step": 19848 }, { "epoch": 2.583756345177665, "grad_norm": 2.866162061691284, "learning_rate": 4.881412781237927e-07, "loss": 0.3513, "step": 19851 }, { "epoch": 2.584146817649356, "grad_norm": 2.852534770965576, "learning_rate": 4.872410362543251e-07, "loss": 0.3674, "step": 19854 }, { "epoch": 2.5845372901210464, "grad_norm": 2.7616591453552246, "learning_rate": 4.863415827543539e-07, "loss": 0.3503, "step": 19857 }, { "epoch": 2.5849277625927374, "grad_norm": 2.753648519515991, "learning_rate": 4.854429177810138e-07, "loss": 0.3549, "step": 19860 }, { "epoch": 2.585318235064428, "grad_norm": 2.8216161727905273, "learning_rate": 4.845450414912989e-07, "loss": 0.3334, "step": 19863 }, { "epoch": 2.585708707536119, "grad_norm": 2.91745662689209, "learning_rate": 4.836479540420653e-07, "loss": 0.41, "step": 19866 }, { "epoch": 2.5860991800078095, "grad_norm": 3.008896589279175, "learning_rate": 4.827516555900335e-07, "loss": 0.3873, "step": 19869 }, { "epoch": 2.5864896524795, "grad_norm": 2.9034063816070557, "learning_rate": 4.818561462917848e-07, "loss": 0.3723, "step": 19872 }, { "epoch": 2.586880124951191, "grad_norm": 3.033580780029297, "learning_rate": 4.809614263037621e-07, "loss": 0.4597, "step": 19875 }, { "epoch": 2.5872705974228816, "grad_norm": 2.697132110595703, "learning_rate": 4.800674957822709e-07, "loss": 0.3399, "step": 19878 }, { "epoch": 2.5876610698945726, "grad_norm": 2.8508565425872803, "learning_rate": 4.791743548834809e-07, "loss": 0.4714, "step": 19881 }, { "epoch": 2.588051542366263, "grad_norm": 2.757885694503784, "learning_rate": 4.782820037634206e-07, "loss": 0.4015, "step": 19884 }, { "epoch": 2.5884420148379537, "grad_norm": 2.6383426189422607, "learning_rate": 4.773904425779807e-07, "loss": 0.3587, "step": 19887 }, { "epoch": 2.5888324873096447, "grad_norm": 2.7818305492401123, "learning_rate": 4.7649967148291787e-07, "loss": 0.3431, "step": 19890 }, { "epoch": 2.5892229597813357, "grad_norm": 2.781233549118042, "learning_rate": 4.7560969063384587e-07, "loss": 0.3586, "step": 19893 }, { "epoch": 2.589613432253026, "grad_norm": 2.4520440101623535, "learning_rate": 4.747205001862421e-07, "loss": 0.3147, "step": 19896 }, { "epoch": 2.5900039047247168, "grad_norm": 2.895876407623291, "learning_rate": 4.7383210029544826e-07, "loss": 0.4083, "step": 19899 }, { "epoch": 2.5903943771964077, "grad_norm": 2.7189314365386963, "learning_rate": 4.7294449111666474e-07, "loss": 0.3006, "step": 19902 }, { "epoch": 2.5907848496680983, "grad_norm": 2.5522122383117676, "learning_rate": 4.720576728049553e-07, "loss": 0.3353, "step": 19905 }, { "epoch": 2.5911753221397893, "grad_norm": 2.8779947757720947, "learning_rate": 4.711716455152437e-07, "loss": 0.3466, "step": 19908 }, { "epoch": 2.59156579461148, "grad_norm": 3.0736544132232666, "learning_rate": 4.7028640940231964e-07, "loss": 0.3948, "step": 19911 }, { "epoch": 2.5919562670831704, "grad_norm": 2.9793858528137207, "learning_rate": 4.694019646208309e-07, "loss": 0.4169, "step": 19914 }, { "epoch": 2.5923467395548614, "grad_norm": 2.964721441268921, "learning_rate": 4.685183113252867e-07, "loss": 0.3437, "step": 19917 }, { "epoch": 2.5927372120265524, "grad_norm": 2.8925373554229736, "learning_rate": 4.6763544967006027e-07, "loss": 0.3583, "step": 19920 }, { "epoch": 2.593127684498243, "grad_norm": 2.848095178604126, "learning_rate": 4.667533798093876e-07, "loss": 0.3217, "step": 19923 }, { "epoch": 2.5935181569699335, "grad_norm": 2.6200132369995117, "learning_rate": 4.6587210189736277e-07, "loss": 0.4197, "step": 19926 }, { "epoch": 2.5939086294416245, "grad_norm": 3.714834451675415, "learning_rate": 4.6499161608794253e-07, "loss": 0.3621, "step": 19929 }, { "epoch": 2.594299101913315, "grad_norm": 2.842902898788452, "learning_rate": 4.641119225349472e-07, "loss": 0.4068, "step": 19932 }, { "epoch": 2.594689574385006, "grad_norm": 2.6865339279174805, "learning_rate": 4.632330213920572e-07, "loss": 0.3629, "step": 19935 }, { "epoch": 2.5950800468566966, "grad_norm": 2.490020751953125, "learning_rate": 4.6235491281281354e-07, "loss": 0.3555, "step": 19938 }, { "epoch": 2.595470519328387, "grad_norm": 2.818798780441284, "learning_rate": 4.614775969506219e-07, "loss": 0.3819, "step": 19941 }, { "epoch": 2.595860991800078, "grad_norm": 2.8724567890167236, "learning_rate": 4.6060107395874575e-07, "loss": 0.3519, "step": 19944 }, { "epoch": 2.596251464271769, "grad_norm": 2.898418664932251, "learning_rate": 4.597253439903121e-07, "loss": 0.3509, "step": 19947 }, { "epoch": 2.5966419367434597, "grad_norm": 2.461851119995117, "learning_rate": 4.588504071983102e-07, "loss": 0.3343, "step": 19950 }, { "epoch": 2.59703240921515, "grad_norm": 3.4729630947113037, "learning_rate": 4.579762637355889e-07, "loss": 0.3653, "step": 19953 }, { "epoch": 2.597422881686841, "grad_norm": 2.9097652435302734, "learning_rate": 4.5710291375485995e-07, "loss": 0.3598, "step": 19956 }, { "epoch": 2.5978133541585318, "grad_norm": 2.9537763595581055, "learning_rate": 4.5623035740869237e-07, "loss": 0.3626, "step": 19959 }, { "epoch": 2.5982038266302228, "grad_norm": 2.8022215366363525, "learning_rate": 4.553585948495254e-07, "loss": 0.3239, "step": 19962 }, { "epoch": 2.5985942991019133, "grad_norm": 3.070570945739746, "learning_rate": 4.544876262296505e-07, "loss": 0.397, "step": 19965 }, { "epoch": 2.598984771573604, "grad_norm": 2.5882253646850586, "learning_rate": 4.536174517012254e-07, "loss": 0.3869, "step": 19968 }, { "epoch": 2.599375244045295, "grad_norm": 2.780194044113159, "learning_rate": 4.527480714162663e-07, "loss": 0.3459, "step": 19971 }, { "epoch": 2.5997657165169854, "grad_norm": 2.8268415927886963, "learning_rate": 4.5187948552665394e-07, "loss": 0.3352, "step": 19974 }, { "epoch": 2.6001561889886764, "grad_norm": 2.7509450912475586, "learning_rate": 4.5101169418412804e-07, "loss": 0.3166, "step": 19977 }, { "epoch": 2.600546661460367, "grad_norm": 2.9784107208251953, "learning_rate": 4.501446975402879e-07, "loss": 0.3974, "step": 19980 }, { "epoch": 2.600937133932058, "grad_norm": 2.658228874206543, "learning_rate": 4.49278495746599e-07, "loss": 0.2955, "step": 19983 }, { "epoch": 2.6013276064037485, "grad_norm": 2.9209887981414795, "learning_rate": 4.4841308895438363e-07, "loss": 0.4038, "step": 19986 }, { "epoch": 2.6017180788754395, "grad_norm": 3.115154981613159, "learning_rate": 4.475484773148253e-07, "loss": 0.4056, "step": 19989 }, { "epoch": 2.60210855134713, "grad_norm": 3.18581223487854, "learning_rate": 4.4668466097897214e-07, "loss": 0.3335, "step": 19992 }, { "epoch": 2.6024990238188206, "grad_norm": 2.865859270095825, "learning_rate": 4.458216400977294e-07, "loss": 0.3325, "step": 19995 }, { "epoch": 2.6028894962905116, "grad_norm": 2.806417465209961, "learning_rate": 4.449594148218661e-07, "loss": 0.3596, "step": 19998 }, { "epoch": 2.603279968762202, "grad_norm": 2.626926898956299, "learning_rate": 4.4409798530200887e-07, "loss": 0.3196, "step": 20001 }, { "epoch": 2.603670441233893, "grad_norm": 2.9506349563598633, "learning_rate": 4.4323735168865067e-07, "loss": 0.3033, "step": 20004 }, { "epoch": 2.6040609137055837, "grad_norm": 2.665252685546875, "learning_rate": 4.423775141321418e-07, "loss": 0.3409, "step": 20007 }, { "epoch": 2.6044513861772742, "grad_norm": 2.7964937686920166, "learning_rate": 4.4151847278269213e-07, "loss": 0.4056, "step": 20010 }, { "epoch": 2.6048418586489652, "grad_norm": 2.7492387294769287, "learning_rate": 4.40660227790376e-07, "loss": 0.3769, "step": 20013 }, { "epoch": 2.605232331120656, "grad_norm": 2.866215944290161, "learning_rate": 4.398027793051274e-07, "loss": 0.3084, "step": 20016 }, { "epoch": 2.6056228035923468, "grad_norm": 2.538553237915039, "learning_rate": 4.389461274767398e-07, "loss": 0.3312, "step": 20019 }, { "epoch": 2.6060132760640373, "grad_norm": 2.998384475708008, "learning_rate": 4.3809027245486745e-07, "loss": 0.3473, "step": 20022 }, { "epoch": 2.6064037485357283, "grad_norm": 3.2550208568573, "learning_rate": 4.3723521438902907e-07, "loss": 0.3076, "step": 20025 }, { "epoch": 2.606794221007419, "grad_norm": 2.7280972003936768, "learning_rate": 4.3638095342859953e-07, "loss": 0.393, "step": 20028 }, { "epoch": 2.60718469347911, "grad_norm": 2.8212859630584717, "learning_rate": 4.3552748972281623e-07, "loss": 0.3391, "step": 20031 }, { "epoch": 2.6075751659508004, "grad_norm": 2.6648576259613037, "learning_rate": 4.3467482342077927e-07, "loss": 0.2934, "step": 20034 }, { "epoch": 2.607965638422491, "grad_norm": 2.6012842655181885, "learning_rate": 4.3382295467144675e-07, "loss": 0.3, "step": 20037 }, { "epoch": 2.608356110894182, "grad_norm": 2.910468578338623, "learning_rate": 4.329718836236374e-07, "loss": 0.3812, "step": 20040 }, { "epoch": 2.608746583365873, "grad_norm": 3.135700225830078, "learning_rate": 4.3212161042603174e-07, "loss": 0.3906, "step": 20043 }, { "epoch": 2.6091370558375635, "grad_norm": 2.9458134174346924, "learning_rate": 4.312721352271726e-07, "loss": 0.3705, "step": 20046 }, { "epoch": 2.609527528309254, "grad_norm": 2.7890570163726807, "learning_rate": 4.304234581754602e-07, "loss": 0.3879, "step": 20049 }, { "epoch": 2.609918000780945, "grad_norm": 2.8146069049835205, "learning_rate": 4.2957557941915586e-07, "loss": 0.3378, "step": 20052 }, { "epoch": 2.6103084732526356, "grad_norm": 2.8290700912475586, "learning_rate": 4.287284991063839e-07, "loss": 0.3459, "step": 20055 }, { "epoch": 2.6106989457243266, "grad_norm": 2.8539445400238037, "learning_rate": 4.278822173851266e-07, "loss": 0.3173, "step": 20058 }, { "epoch": 2.611089418196017, "grad_norm": 2.866018056869507, "learning_rate": 4.270367344032278e-07, "loss": 0.3547, "step": 20061 }, { "epoch": 2.6114798906677077, "grad_norm": 2.7992513179779053, "learning_rate": 4.2619205030838993e-07, "loss": 0.2944, "step": 20064 }, { "epoch": 2.6118703631393987, "grad_norm": 2.9336869716644287, "learning_rate": 4.2534816524818054e-07, "loss": 0.327, "step": 20067 }, { "epoch": 2.6122608356110897, "grad_norm": 2.6383185386657715, "learning_rate": 4.245050793700228e-07, "loss": 0.3384, "step": 20070 }, { "epoch": 2.6126513080827802, "grad_norm": 2.8900146484375, "learning_rate": 4.2366279282120113e-07, "loss": 0.3804, "step": 20073 }, { "epoch": 2.613041780554471, "grad_norm": 2.861504316329956, "learning_rate": 4.2282130574886336e-07, "loss": 0.3865, "step": 20076 }, { "epoch": 2.6134322530261618, "grad_norm": 3.0964694023132324, "learning_rate": 4.2198061830001467e-07, "loss": 0.3979, "step": 20079 }, { "epoch": 2.6138227254978523, "grad_norm": 2.622819423675537, "learning_rate": 4.211407306215198e-07, "loss": 0.3653, "step": 20082 }, { "epoch": 2.6142131979695433, "grad_norm": 2.8246631622314453, "learning_rate": 4.20301642860107e-07, "loss": 0.3648, "step": 20085 }, { "epoch": 2.614603670441234, "grad_norm": 2.9626944065093994, "learning_rate": 4.194633551623645e-07, "loss": 0.3551, "step": 20088 }, { "epoch": 2.6149941429129244, "grad_norm": 3.4414031505584717, "learning_rate": 4.186258676747368e-07, "loss": 0.3116, "step": 20091 }, { "epoch": 2.6153846153846154, "grad_norm": 2.694920539855957, "learning_rate": 4.177891805435319e-07, "loss": 0.3415, "step": 20094 }, { "epoch": 2.615775087856306, "grad_norm": 2.726844549179077, "learning_rate": 4.169532939149185e-07, "loss": 0.349, "step": 20097 }, { "epoch": 2.616165560327997, "grad_norm": 2.7306230068206787, "learning_rate": 4.1611820793492294e-07, "loss": 0.3459, "step": 20100 }, { "epoch": 2.6165560327996875, "grad_norm": 2.7479240894317627, "learning_rate": 4.152839227494332e-07, "loss": 0.3669, "step": 20103 }, { "epoch": 2.6169465052713785, "grad_norm": 2.936683416366577, "learning_rate": 4.1445043850419754e-07, "loss": 0.3581, "step": 20106 }, { "epoch": 2.617336977743069, "grad_norm": 2.6035518646240234, "learning_rate": 4.1361775534482397e-07, "loss": 0.3312, "step": 20109 }, { "epoch": 2.61772745021476, "grad_norm": 2.8025290966033936, "learning_rate": 4.1278587341678e-07, "loss": 0.3405, "step": 20112 }, { "epoch": 2.6181179226864506, "grad_norm": 2.8133556842803955, "learning_rate": 4.119547928653933e-07, "loss": 0.3747, "step": 20115 }, { "epoch": 2.618508395158141, "grad_norm": 2.7612462043762207, "learning_rate": 4.111245138358527e-07, "loss": 0.3516, "step": 20118 }, { "epoch": 2.618898867629832, "grad_norm": 2.857440710067749, "learning_rate": 4.1029503647320666e-07, "loss": 0.3266, "step": 20121 }, { "epoch": 2.6192893401015227, "grad_norm": 2.9601211547851562, "learning_rate": 4.094663609223615e-07, "loss": 0.3357, "step": 20124 }, { "epoch": 2.6196798125732137, "grad_norm": 2.95589017868042, "learning_rate": 4.086384873280863e-07, "loss": 0.3328, "step": 20127 }, { "epoch": 2.6200702850449042, "grad_norm": 3.003638744354248, "learning_rate": 4.0781141583500825e-07, "loss": 0.3574, "step": 20130 }, { "epoch": 2.6204607575165952, "grad_norm": 2.928009033203125, "learning_rate": 4.069851465876157e-07, "loss": 0.4451, "step": 20133 }, { "epoch": 2.620851229988286, "grad_norm": 3.0818943977355957, "learning_rate": 4.061596797302553e-07, "loss": 0.3478, "step": 20136 }, { "epoch": 2.621241702459977, "grad_norm": 3.1439309120178223, "learning_rate": 4.053350154071356e-07, "loss": 0.3554, "step": 20139 }, { "epoch": 2.6216321749316673, "grad_norm": 2.61098313331604, "learning_rate": 4.045111537623231e-07, "loss": 0.3411, "step": 20142 }, { "epoch": 2.622022647403358, "grad_norm": 2.8817806243896484, "learning_rate": 4.0368809493974314e-07, "loss": 0.4183, "step": 20145 }, { "epoch": 2.622413119875049, "grad_norm": 2.906568765640259, "learning_rate": 4.028658390831852e-07, "loss": 0.3663, "step": 20148 }, { "epoch": 2.6228035923467394, "grad_norm": 2.7763681411743164, "learning_rate": 4.0204438633629383e-07, "loss": 0.395, "step": 20151 }, { "epoch": 2.6231940648184304, "grad_norm": 2.8315131664276123, "learning_rate": 4.0122373684257474e-07, "loss": 0.3649, "step": 20154 }, { "epoch": 2.623584537290121, "grad_norm": 3.1497440338134766, "learning_rate": 4.0040389074539555e-07, "loss": 0.3771, "step": 20157 }, { "epoch": 2.6239750097618115, "grad_norm": 2.719172477722168, "learning_rate": 3.9958484818797995e-07, "loss": 0.3331, "step": 20160 }, { "epoch": 2.6243654822335025, "grad_norm": 3.178786277770996, "learning_rate": 3.9876660931341405e-07, "loss": 0.3777, "step": 20163 }, { "epoch": 2.6247559547051935, "grad_norm": 2.772921085357666, "learning_rate": 3.9794917426464074e-07, "loss": 0.3759, "step": 20166 }, { "epoch": 2.625146427176884, "grad_norm": 2.7855470180511475, "learning_rate": 3.971325431844664e-07, "loss": 0.3807, "step": 20169 }, { "epoch": 2.6255368996485746, "grad_norm": 2.967480182647705, "learning_rate": 3.963167162155529e-07, "loss": 0.3764, "step": 20172 }, { "epoch": 2.6259273721202656, "grad_norm": 2.7995481491088867, "learning_rate": 3.9550169350042464e-07, "loss": 0.3978, "step": 20175 }, { "epoch": 2.626317844591956, "grad_norm": 3.274163246154785, "learning_rate": 3.946874751814639e-07, "loss": 0.3355, "step": 20178 }, { "epoch": 2.626708317063647, "grad_norm": 2.6372921466827393, "learning_rate": 3.93874061400914e-07, "loss": 0.3227, "step": 20181 }, { "epoch": 2.6270987895353377, "grad_norm": 2.845742702484131, "learning_rate": 3.9306145230087524e-07, "loss": 0.3701, "step": 20184 }, { "epoch": 2.6274892620070283, "grad_norm": 2.5727438926696777, "learning_rate": 3.9224964802330847e-07, "loss": 0.3382, "step": 20187 }, { "epoch": 2.6278797344787193, "grad_norm": 3.014756679534912, "learning_rate": 3.914386487100358e-07, "loss": 0.3586, "step": 20190 }, { "epoch": 2.6282702069504102, "grad_norm": 2.7406253814697266, "learning_rate": 3.9062845450273613e-07, "loss": 0.3508, "step": 20193 }, { "epoch": 2.628660679422101, "grad_norm": 2.826164960861206, "learning_rate": 3.898190655429479e-07, "loss": 0.4258, "step": 20196 }, { "epoch": 2.6290511518937913, "grad_norm": 3.219733238220215, "learning_rate": 3.890104819720719e-07, "loss": 0.3397, "step": 20199 }, { "epoch": 2.6294416243654823, "grad_norm": 2.6495490074157715, "learning_rate": 3.8820270393136403e-07, "loss": 0.3296, "step": 20202 }, { "epoch": 2.629832096837173, "grad_norm": 3.0028276443481445, "learning_rate": 3.873957315619414e-07, "loss": 0.3653, "step": 20205 }, { "epoch": 2.630222569308864, "grad_norm": 2.973289728164673, "learning_rate": 3.8658956500478127e-07, "loss": 0.3419, "step": 20208 }, { "epoch": 2.6306130417805544, "grad_norm": 3.0267601013183594, "learning_rate": 3.857842044007193e-07, "loss": 0.3816, "step": 20211 }, { "epoch": 2.631003514252245, "grad_norm": 2.8618972301483154, "learning_rate": 3.849796498904496e-07, "loss": 0.3335, "step": 20214 }, { "epoch": 2.631393986723936, "grad_norm": 3.1597700119018555, "learning_rate": 3.841759016145258e-07, "loss": 0.4617, "step": 20217 }, { "epoch": 2.631784459195627, "grad_norm": 3.0783212184906006, "learning_rate": 3.8337295971336285e-07, "loss": 0.4516, "step": 20220 }, { "epoch": 2.6321749316673175, "grad_norm": 2.588809013366699, "learning_rate": 3.825708243272319e-07, "loss": 0.3026, "step": 20223 }, { "epoch": 2.632565404139008, "grad_norm": 2.936833381652832, "learning_rate": 3.817694955962642e-07, "loss": 0.3481, "step": 20226 }, { "epoch": 2.632955876610699, "grad_norm": 2.49509596824646, "learning_rate": 3.8096897366044936e-07, "loss": 0.3365, "step": 20229 }, { "epoch": 2.6333463490823896, "grad_norm": 2.5137100219726562, "learning_rate": 3.801692586596384e-07, "loss": 0.3061, "step": 20232 }, { "epoch": 2.6337368215540806, "grad_norm": 3.77799916267395, "learning_rate": 3.7937035073353947e-07, "loss": 0.3653, "step": 20235 }, { "epoch": 2.634127294025771, "grad_norm": 3.221705198287964, "learning_rate": 3.785722500217193e-07, "loss": 0.354, "step": 20238 }, { "epoch": 2.6345177664974617, "grad_norm": 2.6136856079101562, "learning_rate": 3.7777495666360574e-07, "loss": 0.3911, "step": 20241 }, { "epoch": 2.6349082389691527, "grad_norm": 2.5668489933013916, "learning_rate": 3.76978470798483e-07, "loss": 0.2889, "step": 20244 }, { "epoch": 2.6352987114408433, "grad_norm": 2.760801076889038, "learning_rate": 3.7618279256549597e-07, "loss": 0.3479, "step": 20247 }, { "epoch": 2.6356891839125343, "grad_norm": 2.920722723007202, "learning_rate": 3.7538792210364825e-07, "loss": 0.3856, "step": 20250 }, { "epoch": 2.636079656384225, "grad_norm": 3.082993268966675, "learning_rate": 3.745938595518017e-07, "loss": 0.3859, "step": 20253 }, { "epoch": 2.636470128855916, "grad_norm": 3.254218101501465, "learning_rate": 3.7380060504867697e-07, "loss": 0.3902, "step": 20256 }, { "epoch": 2.6368606013276064, "grad_norm": 2.6879429817199707, "learning_rate": 3.7300815873285435e-07, "loss": 0.3567, "step": 20259 }, { "epoch": 2.6372510737992974, "grad_norm": 2.662904977798462, "learning_rate": 3.722165207427736e-07, "loss": 0.3279, "step": 20262 }, { "epoch": 2.637641546270988, "grad_norm": 2.596517562866211, "learning_rate": 3.7142569121673135e-07, "loss": 0.3851, "step": 20265 }, { "epoch": 2.6380320187426785, "grad_norm": 2.8582632541656494, "learning_rate": 3.706356702928826e-07, "loss": 0.364, "step": 20268 }, { "epoch": 2.6384224912143694, "grad_norm": 2.894286632537842, "learning_rate": 3.6984645810924423e-07, "loss": 0.3438, "step": 20271 }, { "epoch": 2.63881296368606, "grad_norm": 2.674154758453369, "learning_rate": 3.690580548036893e-07, "loss": 0.3641, "step": 20274 }, { "epoch": 2.639203436157751, "grad_norm": 2.6992299556732178, "learning_rate": 3.6827046051395035e-07, "loss": 0.3502, "step": 20277 }, { "epoch": 2.6395939086294415, "grad_norm": 2.4692561626434326, "learning_rate": 3.674836753776173e-07, "loss": 0.3447, "step": 20280 }, { "epoch": 2.639984381101132, "grad_norm": 2.7542686462402344, "learning_rate": 3.666976995321414e-07, "loss": 0.3577, "step": 20283 }, { "epoch": 2.640374853572823, "grad_norm": 3.003816843032837, "learning_rate": 3.6591253311483056e-07, "loss": 0.2958, "step": 20286 }, { "epoch": 2.640765326044514, "grad_norm": 2.7368361949920654, "learning_rate": 3.6512817626285056e-07, "loss": 0.3826, "step": 20289 }, { "epoch": 2.6411557985162046, "grad_norm": 2.821338176727295, "learning_rate": 3.6434462911322856e-07, "loss": 0.3569, "step": 20292 }, { "epoch": 2.641546270987895, "grad_norm": 2.715587615966797, "learning_rate": 3.635618918028477e-07, "loss": 0.369, "step": 20295 }, { "epoch": 2.641936743459586, "grad_norm": 2.71569561958313, "learning_rate": 3.627799644684504e-07, "loss": 0.3227, "step": 20298 }, { "epoch": 2.6423272159312767, "grad_norm": 3.0673129558563232, "learning_rate": 3.6199884724663734e-07, "loss": 0.3651, "step": 20301 }, { "epoch": 2.6427176884029677, "grad_norm": 3.4038453102111816, "learning_rate": 3.612185402738705e-07, "loss": 0.3598, "step": 20304 }, { "epoch": 2.6431081608746583, "grad_norm": 2.976976156234741, "learning_rate": 3.604390436864652e-07, "loss": 0.3942, "step": 20307 }, { "epoch": 2.643498633346349, "grad_norm": 3.015279531478882, "learning_rate": 3.596603576205987e-07, "loss": 0.413, "step": 20310 }, { "epoch": 2.64388910581804, "grad_norm": 2.8536202907562256, "learning_rate": 3.5888248221230605e-07, "loss": 0.3735, "step": 20313 }, { "epoch": 2.644279578289731, "grad_norm": 2.8353049755096436, "learning_rate": 3.5810541759748076e-07, "loss": 0.3604, "step": 20316 }, { "epoch": 2.6446700507614214, "grad_norm": 2.661177158355713, "learning_rate": 3.5732916391187254e-07, "loss": 0.3191, "step": 20319 }, { "epoch": 2.645060523233112, "grad_norm": 2.5510129928588867, "learning_rate": 3.5655372129109356e-07, "loss": 0.3296, "step": 20322 }, { "epoch": 2.645450995704803, "grad_norm": 2.8251776695251465, "learning_rate": 3.557790898706115e-07, "loss": 0.3705, "step": 20325 }, { "epoch": 2.6458414681764935, "grad_norm": 2.7708895206451416, "learning_rate": 3.55005269785752e-07, "loss": 0.3361, "step": 20328 }, { "epoch": 2.6462319406481845, "grad_norm": 2.593419313430786, "learning_rate": 3.5423226117169973e-07, "loss": 0.3692, "step": 20331 }, { "epoch": 2.646622413119875, "grad_norm": 3.5748655796051025, "learning_rate": 3.5346006416349886e-07, "loss": 0.3352, "step": 20334 }, { "epoch": 2.6470128855915656, "grad_norm": 3.5419743061065674, "learning_rate": 3.5268867889604983e-07, "loss": 0.3423, "step": 20337 }, { "epoch": 2.6474033580632566, "grad_norm": 2.934070587158203, "learning_rate": 3.5191810550411155e-07, "loss": 0.3658, "step": 20340 }, { "epoch": 2.6477938305349475, "grad_norm": 2.5605757236480713, "learning_rate": 3.511483441223018e-07, "loss": 0.3037, "step": 20343 }, { "epoch": 2.648184303006638, "grad_norm": 2.988960027694702, "learning_rate": 3.503793948850975e-07, "loss": 0.3674, "step": 20346 }, { "epoch": 2.6485747754783286, "grad_norm": 2.7395620346069336, "learning_rate": 3.4961125792683184e-07, "loss": 0.3509, "step": 20349 }, { "epoch": 2.6489652479500196, "grad_norm": 2.5244340896606445, "learning_rate": 3.488439333816951e-07, "loss": 0.3272, "step": 20352 }, { "epoch": 2.64935572042171, "grad_norm": 2.6971912384033203, "learning_rate": 3.480774213837396e-07, "loss": 0.3596, "step": 20355 }, { "epoch": 2.649746192893401, "grad_norm": 3.134498119354248, "learning_rate": 3.4731172206687257e-07, "loss": 0.3619, "step": 20358 }, { "epoch": 2.6501366653650917, "grad_norm": 2.9849419593811035, "learning_rate": 3.465468355648588e-07, "loss": 0.3809, "step": 20361 }, { "epoch": 2.6505271378367823, "grad_norm": 2.864431858062744, "learning_rate": 3.457827620113241e-07, "loss": 0.3084, "step": 20364 }, { "epoch": 2.6509176103084733, "grad_norm": 3.1214239597320557, "learning_rate": 3.4501950153975003e-07, "loss": 0.3852, "step": 20367 }, { "epoch": 2.651308082780164, "grad_norm": 2.6842384338378906, "learning_rate": 3.4425705428347556e-07, "loss": 0.3501, "step": 20370 }, { "epoch": 2.651698555251855, "grad_norm": 3.266386032104492, "learning_rate": 3.434954203757007e-07, "loss": 0.3648, "step": 20373 }, { "epoch": 2.6520890277235454, "grad_norm": 2.81931471824646, "learning_rate": 3.427345999494797e-07, "loss": 0.3676, "step": 20376 }, { "epoch": 2.6524795001952364, "grad_norm": 2.9153671264648438, "learning_rate": 3.419745931377261e-07, "loss": 0.3825, "step": 20379 }, { "epoch": 2.652869972666927, "grad_norm": 2.6502585411071777, "learning_rate": 3.41215400073211e-07, "loss": 0.3146, "step": 20382 }, { "epoch": 2.653260445138618, "grad_norm": 2.8781330585479736, "learning_rate": 3.404570208885666e-07, "loss": 0.4162, "step": 20385 }, { "epoch": 2.6536509176103085, "grad_norm": 3.011082410812378, "learning_rate": 3.3969945571627805e-07, "loss": 0.3475, "step": 20388 }, { "epoch": 2.654041390081999, "grad_norm": 2.7348506450653076, "learning_rate": 3.389427046886906e-07, "loss": 0.3648, "step": 20391 }, { "epoch": 2.65443186255369, "grad_norm": 2.8573429584503174, "learning_rate": 3.381867679380069e-07, "loss": 0.3945, "step": 20394 }, { "epoch": 2.6548223350253806, "grad_norm": 2.513740301132202, "learning_rate": 3.374316455962884e-07, "loss": 0.302, "step": 20397 }, { "epoch": 2.6552128074970716, "grad_norm": 2.592041015625, "learning_rate": 3.36677337795453e-07, "loss": 0.3118, "step": 20400 }, { "epoch": 2.655603279968762, "grad_norm": 2.622539520263672, "learning_rate": 3.359238446672752e-07, "loss": 0.346, "step": 20403 }, { "epoch": 2.655993752440453, "grad_norm": 2.9038712978363037, "learning_rate": 3.3517116634339097e-07, "loss": 0.3932, "step": 20406 }, { "epoch": 2.6563842249121437, "grad_norm": 2.3799030780792236, "learning_rate": 3.344193029552911e-07, "loss": 0.3106, "step": 20409 }, { "epoch": 2.6567746973838346, "grad_norm": 2.7198572158813477, "learning_rate": 3.336682546343228e-07, "loss": 0.3262, "step": 20412 }, { "epoch": 2.657165169855525, "grad_norm": 2.7029428482055664, "learning_rate": 3.329180215116945e-07, "loss": 0.3479, "step": 20415 }, { "epoch": 2.6575556423272158, "grad_norm": 2.6045374870300293, "learning_rate": 3.3216860371847013e-07, "loss": 0.3007, "step": 20418 }, { "epoch": 2.6579461147989067, "grad_norm": 2.965404748916626, "learning_rate": 3.314200013855706e-07, "loss": 0.3138, "step": 20421 }, { "epoch": 2.6583365872705973, "grad_norm": 2.752427577972412, "learning_rate": 3.3067221464377407e-07, "loss": 0.3779, "step": 20424 }, { "epoch": 2.6587270597422883, "grad_norm": 2.7018942832946777, "learning_rate": 3.29925243623721e-07, "loss": 0.3085, "step": 20427 }, { "epoch": 2.659117532213979, "grad_norm": 2.695830821990967, "learning_rate": 3.2917908845590263e-07, "loss": 0.3486, "step": 20430 }, { "epoch": 2.6595080046856694, "grad_norm": 2.9110679626464844, "learning_rate": 3.2843374927067126e-07, "loss": 0.3119, "step": 20433 }, { "epoch": 2.6598984771573604, "grad_norm": 2.8462071418762207, "learning_rate": 3.276892261982373e-07, "loss": 0.3364, "step": 20436 }, { "epoch": 2.6602889496290514, "grad_norm": 2.907721757888794, "learning_rate": 3.26945519368666e-07, "loss": 0.4112, "step": 20439 }, { "epoch": 2.660679422100742, "grad_norm": 2.566709041595459, "learning_rate": 3.2620262891188195e-07, "loss": 0.3851, "step": 20442 }, { "epoch": 2.6610698945724325, "grad_norm": 2.8268721103668213, "learning_rate": 3.254605549576656e-07, "loss": 0.3545, "step": 20445 }, { "epoch": 2.6614603670441235, "grad_norm": 2.767282485961914, "learning_rate": 3.2471929763565725e-07, "loss": 0.3658, "step": 20448 }, { "epoch": 2.661850839515814, "grad_norm": 3.0643694400787354, "learning_rate": 3.2397885707535216e-07, "loss": 0.3307, "step": 20451 }, { "epoch": 2.662241311987505, "grad_norm": 3.210627555847168, "learning_rate": 3.2323923340610296e-07, "loss": 0.3483, "step": 20454 }, { "epoch": 2.6626317844591956, "grad_norm": 2.5691981315612793, "learning_rate": 3.2250042675712246e-07, "loss": 0.2977, "step": 20457 }, { "epoch": 2.663022256930886, "grad_norm": 2.906088352203369, "learning_rate": 3.2176243725747736e-07, "loss": 0.3708, "step": 20460 }, { "epoch": 2.663412729402577, "grad_norm": 2.960641622543335, "learning_rate": 3.210252650360918e-07, "loss": 0.3184, "step": 20463 }, { "epoch": 2.663803201874268, "grad_norm": 2.767688274383545, "learning_rate": 3.2028891022174934e-07, "loss": 0.364, "step": 20466 }, { "epoch": 2.6641936743459587, "grad_norm": 2.6504275798797607, "learning_rate": 3.1955337294309054e-07, "loss": 0.3184, "step": 20469 }, { "epoch": 2.664584146817649, "grad_norm": 2.9848546981811523, "learning_rate": 3.1881865332861086e-07, "loss": 0.3828, "step": 20472 }, { "epoch": 2.66497461928934, "grad_norm": 2.9928441047668457, "learning_rate": 3.180847515066643e-07, "loss": 0.3589, "step": 20475 }, { "epoch": 2.6653650917610308, "grad_norm": 3.2330639362335205, "learning_rate": 3.173516676054628e-07, "loss": 0.3848, "step": 20478 }, { "epoch": 2.6657555642327218, "grad_norm": 2.6625139713287354, "learning_rate": 3.1661940175307437e-07, "loss": 0.365, "step": 20481 }, { "epoch": 2.6661460367044123, "grad_norm": 2.730323553085327, "learning_rate": 3.15887954077424e-07, "loss": 0.328, "step": 20484 }, { "epoch": 2.666536509176103, "grad_norm": 2.785428524017334, "learning_rate": 3.1515732470629335e-07, "loss": 0.3185, "step": 20487 }, { "epoch": 2.666926981647794, "grad_norm": 2.6901137828826904, "learning_rate": 3.144275137673236e-07, "loss": 0.298, "step": 20490 }, { "epoch": 2.667317454119485, "grad_norm": 2.662198066711426, "learning_rate": 3.1369852138801006e-07, "loss": 0.3452, "step": 20493 }, { "epoch": 2.6677079265911754, "grad_norm": 2.7666854858398438, "learning_rate": 3.1297034769570523e-07, "loss": 0.333, "step": 20496 }, { "epoch": 2.668098399062866, "grad_norm": 2.821671962738037, "learning_rate": 3.1224299281762184e-07, "loss": 0.3391, "step": 20499 }, { "epoch": 2.668488871534557, "grad_norm": 2.7123184204101562, "learning_rate": 3.115164568808254e-07, "loss": 0.3117, "step": 20502 }, { "epoch": 2.6688793440062475, "grad_norm": 2.6388189792633057, "learning_rate": 3.107907400122406e-07, "loss": 0.3992, "step": 20505 }, { "epoch": 2.6692698164779385, "grad_norm": 2.8583977222442627, "learning_rate": 3.100658423386488e-07, "loss": 0.3633, "step": 20508 }, { "epoch": 2.669660288949629, "grad_norm": 2.892751932144165, "learning_rate": 3.093417639866886e-07, "loss": 0.3982, "step": 20511 }, { "epoch": 2.6700507614213196, "grad_norm": 2.8561367988586426, "learning_rate": 3.0861850508285496e-07, "loss": 0.4017, "step": 20514 }, { "epoch": 2.6704412338930106, "grad_norm": 3.0648305416107178, "learning_rate": 3.07896065753498e-07, "loss": 0.4046, "step": 20517 }, { "epoch": 2.670831706364701, "grad_norm": 2.555460214614868, "learning_rate": 3.0717444612482883e-07, "loss": 0.3425, "step": 20520 }, { "epoch": 2.671222178836392, "grad_norm": 2.744659185409546, "learning_rate": 3.064536463229112e-07, "loss": 0.4306, "step": 20523 }, { "epoch": 2.6716126513080827, "grad_norm": 2.578082323074341, "learning_rate": 3.0573366647366764e-07, "loss": 0.3021, "step": 20526 }, { "epoch": 2.6720031237797737, "grad_norm": 2.590057849884033, "learning_rate": 3.0501450670287756e-07, "loss": 0.3247, "step": 20529 }, { "epoch": 2.672393596251464, "grad_norm": 2.5789554119110107, "learning_rate": 3.0429616713617607e-07, "loss": 0.3732, "step": 20532 }, { "epoch": 2.672784068723155, "grad_norm": 2.6770694255828857, "learning_rate": 3.0357864789905653e-07, "loss": 0.3459, "step": 20535 }, { "epoch": 2.6731745411948458, "grad_norm": 2.7113239765167236, "learning_rate": 3.0286194911686606e-07, "loss": 0.3877, "step": 20538 }, { "epoch": 2.6735650136665363, "grad_norm": 2.6579864025115967, "learning_rate": 3.0214607091481276e-07, "loss": 0.3727, "step": 20541 }, { "epoch": 2.6739554861382273, "grad_norm": 2.62597918510437, "learning_rate": 3.0143101341795823e-07, "loss": 0.3935, "step": 20544 }, { "epoch": 2.674345958609918, "grad_norm": 2.7485511302948, "learning_rate": 3.0071677675122035e-07, "loss": 0.4071, "step": 20547 }, { "epoch": 2.674736431081609, "grad_norm": 2.8608615398406982, "learning_rate": 3.0000336103937597e-07, "loss": 0.3377, "step": 20550 }, { "epoch": 2.6751269035532994, "grad_norm": 2.4845223426818848, "learning_rate": 2.9929076640705714e-07, "loss": 0.324, "step": 20553 }, { "epoch": 2.6755173760249904, "grad_norm": 2.7939839363098145, "learning_rate": 2.9857899297875304e-07, "loss": 0.4306, "step": 20556 }, { "epoch": 2.675907848496681, "grad_norm": 2.790100574493408, "learning_rate": 2.9786804087880816e-07, "loss": 0.4033, "step": 20559 }, { "epoch": 2.676298320968372, "grad_norm": 2.7309770584106445, "learning_rate": 2.9715791023142484e-07, "loss": 0.3931, "step": 20562 }, { "epoch": 2.6766887934400625, "grad_norm": 2.698127031326294, "learning_rate": 2.9644860116066155e-07, "loss": 0.3301, "step": 20565 }, { "epoch": 2.677079265911753, "grad_norm": 2.612318754196167, "learning_rate": 2.95740113790432e-07, "loss": 0.3219, "step": 20568 }, { "epoch": 2.677469738383444, "grad_norm": 2.7074458599090576, "learning_rate": 2.950324482445088e-07, "loss": 0.3249, "step": 20571 }, { "epoch": 2.6778602108551346, "grad_norm": 2.573791265487671, "learning_rate": 2.943256046465193e-07, "loss": 0.3419, "step": 20574 }, { "epoch": 2.6782506833268256, "grad_norm": 3.1822309494018555, "learning_rate": 2.936195831199468e-07, "loss": 0.3264, "step": 20577 }, { "epoch": 2.678641155798516, "grad_norm": 2.9114439487457275, "learning_rate": 2.929143837881326e-07, "loss": 0.4111, "step": 20580 }, { "epoch": 2.6790316282702067, "grad_norm": 2.9312427043914795, "learning_rate": 2.922100067742739e-07, "loss": 0.4224, "step": 20583 }, { "epoch": 2.6794221007418977, "grad_norm": 3.390676259994507, "learning_rate": 2.9150645220142273e-07, "loss": 0.3307, "step": 20586 }, { "epoch": 2.6798125732135887, "grad_norm": 2.750668525695801, "learning_rate": 2.908037201924885e-07, "loss": 0.4167, "step": 20589 }, { "epoch": 2.6802030456852792, "grad_norm": 2.667370319366455, "learning_rate": 2.9010181087023804e-07, "loss": 0.3587, "step": 20592 }, { "epoch": 2.68059351815697, "grad_norm": 3.1470043659210205, "learning_rate": 2.894007243572933e-07, "loss": 0.5031, "step": 20595 }, { "epoch": 2.6809839906286608, "grad_norm": 2.8547494411468506, "learning_rate": 2.887004607761329e-07, "loss": 0.3249, "step": 20598 }, { "epoch": 2.6813744631003513, "grad_norm": 2.8937506675720215, "learning_rate": 2.880010202490896e-07, "loss": 0.4048, "step": 20601 }, { "epoch": 2.6817649355720423, "grad_norm": 2.470564365386963, "learning_rate": 2.873024028983562e-07, "loss": 0.2885, "step": 20604 }, { "epoch": 2.682155408043733, "grad_norm": 2.804993152618408, "learning_rate": 2.8660460884597953e-07, "loss": 0.3877, "step": 20607 }, { "epoch": 2.6825458805154234, "grad_norm": 2.796926259994507, "learning_rate": 2.859076382138609e-07, "loss": 0.3878, "step": 20610 }, { "epoch": 2.6829363529871144, "grad_norm": 2.4991455078125, "learning_rate": 2.852114911237619e-07, "loss": 0.2931, "step": 20613 }, { "epoch": 2.6833268254588054, "grad_norm": 2.7926175594329834, "learning_rate": 2.845161676972968e-07, "loss": 0.3678, "step": 20616 }, { "epoch": 2.683717297930496, "grad_norm": 3.0255000591278076, "learning_rate": 2.838216680559364e-07, "loss": 0.3649, "step": 20619 }, { "epoch": 2.6841077704021865, "grad_norm": 2.6406142711639404, "learning_rate": 2.8312799232101007e-07, "loss": 0.3252, "step": 20622 }, { "epoch": 2.6844982428738775, "grad_norm": 2.9375603199005127, "learning_rate": 2.824351406137005e-07, "loss": 0.3596, "step": 20625 }, { "epoch": 2.684888715345568, "grad_norm": 2.7059028148651123, "learning_rate": 2.817431130550474e-07, "loss": 0.4015, "step": 20628 }, { "epoch": 2.685279187817259, "grad_norm": 2.975783586502075, "learning_rate": 2.81051909765947e-07, "loss": 0.345, "step": 20631 }, { "epoch": 2.6856696602889496, "grad_norm": 2.8388760089874268, "learning_rate": 2.8036153086714976e-07, "loss": 0.3673, "step": 20634 }, { "epoch": 2.68606013276064, "grad_norm": 2.8770854473114014, "learning_rate": 2.7967197647926547e-07, "loss": 0.3382, "step": 20637 }, { "epoch": 2.686450605232331, "grad_norm": 2.836329460144043, "learning_rate": 2.78983246722756e-07, "loss": 0.4078, "step": 20640 }, { "epoch": 2.686841077704022, "grad_norm": 2.931439161300659, "learning_rate": 2.7829534171794236e-07, "loss": 0.3589, "step": 20643 }, { "epoch": 2.6872315501757127, "grad_norm": 2.7307281494140625, "learning_rate": 2.7760826158499955e-07, "loss": 0.3384, "step": 20646 }, { "epoch": 2.6876220226474032, "grad_norm": 3.2210898399353027, "learning_rate": 2.769220064439593e-07, "loss": 0.4175, "step": 20649 }, { "epoch": 2.6880124951190942, "grad_norm": 2.8683855533599854, "learning_rate": 2.7623657641470734e-07, "loss": 0.3799, "step": 20652 }, { "epoch": 2.688402967590785, "grad_norm": 2.820208787918091, "learning_rate": 2.755519716169891e-07, "loss": 0.4057, "step": 20655 }, { "epoch": 2.688793440062476, "grad_norm": 2.7478857040405273, "learning_rate": 2.7486819217040273e-07, "loss": 0.3818, "step": 20658 }, { "epoch": 2.6891839125341663, "grad_norm": 2.4639649391174316, "learning_rate": 2.7418523819440214e-07, "loss": 0.2983, "step": 20661 }, { "epoch": 2.689574385005857, "grad_norm": 2.7031710147857666, "learning_rate": 2.735031098082996e-07, "loss": 0.3065, "step": 20664 }, { "epoch": 2.689964857477548, "grad_norm": 3.303699254989624, "learning_rate": 2.728218071312605e-07, "loss": 0.3628, "step": 20667 }, { "epoch": 2.6903553299492384, "grad_norm": 2.7157487869262695, "learning_rate": 2.721413302823067e-07, "loss": 0.3855, "step": 20670 }, { "epoch": 2.6907458024209294, "grad_norm": 2.9699556827545166, "learning_rate": 2.714616793803171e-07, "loss": 0.3907, "step": 20673 }, { "epoch": 2.69113627489262, "grad_norm": 2.9308390617370605, "learning_rate": 2.707828545440239e-07, "loss": 0.3205, "step": 20676 }, { "epoch": 2.691526747364311, "grad_norm": 2.65004563331604, "learning_rate": 2.701048558920183e-07, "loss": 0.2922, "step": 20679 }, { "epoch": 2.6919172198360015, "grad_norm": 3.5007753372192383, "learning_rate": 2.6942768354274283e-07, "loss": 0.354, "step": 20682 }, { "epoch": 2.6923076923076925, "grad_norm": 2.951852560043335, "learning_rate": 2.687513376145007e-07, "loss": 0.3592, "step": 20685 }, { "epoch": 2.692698164779383, "grad_norm": 2.6974475383758545, "learning_rate": 2.6807581822544616e-07, "loss": 0.3565, "step": 20688 }, { "epoch": 2.6930886372510736, "grad_norm": 3.6439194679260254, "learning_rate": 2.6740112549359154e-07, "loss": 0.3602, "step": 20691 }, { "epoch": 2.6934791097227646, "grad_norm": 2.9142510890960693, "learning_rate": 2.6672725953680476e-07, "loss": 0.4112, "step": 20694 }, { "epoch": 2.693869582194455, "grad_norm": 2.7999353408813477, "learning_rate": 2.660542204728084e-07, "loss": 0.3124, "step": 20697 }, { "epoch": 2.694260054666146, "grad_norm": 2.537796974182129, "learning_rate": 2.6538200841918103e-07, "loss": 0.3301, "step": 20700 }, { "epoch": 2.6946505271378367, "grad_norm": 3.0205585956573486, "learning_rate": 2.6471062349335606e-07, "loss": 0.3173, "step": 20703 }, { "epoch": 2.6950409996095273, "grad_norm": 2.9223034381866455, "learning_rate": 2.640400658126241e-07, "loss": 0.4226, "step": 20706 }, { "epoch": 2.6954314720812182, "grad_norm": 2.9517760276794434, "learning_rate": 2.633703354941297e-07, "loss": 0.3851, "step": 20709 }, { "epoch": 2.6958219445529092, "grad_norm": 3.43226957321167, "learning_rate": 2.6270143265487214e-07, "loss": 0.4494, "step": 20712 }, { "epoch": 2.6962124170246, "grad_norm": 2.9508416652679443, "learning_rate": 2.6203335741170955e-07, "loss": 0.4241, "step": 20715 }, { "epoch": 2.6966028894962903, "grad_norm": 2.6791441440582275, "learning_rate": 2.613661098813519e-07, "loss": 0.3442, "step": 20718 }, { "epoch": 2.6969933619679813, "grad_norm": 2.6088688373565674, "learning_rate": 2.6069969018036655e-07, "loss": 0.3336, "step": 20721 }, { "epoch": 2.697383834439672, "grad_norm": 2.9644179344177246, "learning_rate": 2.6003409842517425e-07, "loss": 0.3761, "step": 20724 }, { "epoch": 2.697774306911363, "grad_norm": 2.931863307952881, "learning_rate": 2.5936933473205473e-07, "loss": 0.3512, "step": 20727 }, { "epoch": 2.6981647793830534, "grad_norm": 3.0752289295196533, "learning_rate": 2.5870539921713955e-07, "loss": 0.3662, "step": 20730 }, { "epoch": 2.698555251854744, "grad_norm": 2.8674252033233643, "learning_rate": 2.5804229199641594e-07, "loss": 0.3455, "step": 20733 }, { "epoch": 2.698945724326435, "grad_norm": 2.8869755268096924, "learning_rate": 2.57380013185729e-07, "loss": 0.3543, "step": 20736 }, { "epoch": 2.699336196798126, "grad_norm": 2.9068305492401123, "learning_rate": 2.567185629007768e-07, "loss": 0.4262, "step": 20739 }, { "epoch": 2.6997266692698165, "grad_norm": 2.6254804134368896, "learning_rate": 2.560579412571129e-07, "loss": 0.4404, "step": 20742 }, { "epoch": 2.700117141741507, "grad_norm": 3.4937686920166016, "learning_rate": 2.5539814837014734e-07, "loss": 0.3142, "step": 20745 }, { "epoch": 2.700507614213198, "grad_norm": 2.548431158065796, "learning_rate": 2.547391843551439e-07, "loss": 0.3637, "step": 20748 }, { "epoch": 2.7008980866848886, "grad_norm": 3.108660936355591, "learning_rate": 2.540810493272222e-07, "loss": 0.3424, "step": 20751 }, { "epoch": 2.7012885591565796, "grad_norm": 2.6465303897857666, "learning_rate": 2.53423743401357e-07, "loss": 0.3347, "step": 20754 }, { "epoch": 2.70167903162827, "grad_norm": 2.571742057800293, "learning_rate": 2.5276726669237917e-07, "loss": 0.2957, "step": 20757 }, { "epoch": 2.7020695040999607, "grad_norm": 2.738924741744995, "learning_rate": 2.5211161931497195e-07, "loss": 0.3669, "step": 20760 }, { "epoch": 2.7024599765716517, "grad_norm": 2.8582653999328613, "learning_rate": 2.5145680138367823e-07, "loss": 0.4023, "step": 20763 }, { "epoch": 2.7028504490433427, "grad_norm": 2.8408925533294678, "learning_rate": 2.5080281301289034e-07, "loss": 0.424, "step": 20766 }, { "epoch": 2.7032409215150333, "grad_norm": 2.618551015853882, "learning_rate": 2.5014965431686133e-07, "loss": 0.2904, "step": 20769 }, { "epoch": 2.703631393986724, "grad_norm": 2.550246000289917, "learning_rate": 2.4949732540969553e-07, "loss": 0.3352, "step": 20772 }, { "epoch": 2.704021866458415, "grad_norm": 2.683685541152954, "learning_rate": 2.488458264053523e-07, "loss": 0.3334, "step": 20775 }, { "epoch": 2.7044123389301054, "grad_norm": 2.9266393184661865, "learning_rate": 2.481951574176494e-07, "loss": 0.3804, "step": 20778 }, { "epoch": 2.7048028114017963, "grad_norm": 2.7701256275177, "learning_rate": 2.4754531856025557e-07, "loss": 0.364, "step": 20781 }, { "epoch": 2.705193283873487, "grad_norm": 2.6166975498199463, "learning_rate": 2.4689630994669646e-07, "loss": 0.3529, "step": 20784 }, { "epoch": 2.7055837563451774, "grad_norm": 2.662623643875122, "learning_rate": 2.462481316903537e-07, "loss": 0.296, "step": 20787 }, { "epoch": 2.7059742288168684, "grad_norm": 3.0897302627563477, "learning_rate": 2.4560078390446216e-07, "loss": 0.4447, "step": 20790 }, { "epoch": 2.706364701288559, "grad_norm": 2.9640982151031494, "learning_rate": 2.4495426670211154e-07, "loss": 0.3694, "step": 20793 }, { "epoch": 2.70675517376025, "grad_norm": 2.899473190307617, "learning_rate": 2.443085801962469e-07, "loss": 0.3705, "step": 20796 }, { "epoch": 2.7071456462319405, "grad_norm": 2.732571601867676, "learning_rate": 2.4366372449966924e-07, "loss": 0.3467, "step": 20799 }, { "epoch": 2.7075361187036315, "grad_norm": 4.155490398406982, "learning_rate": 2.4301969972503223e-07, "loss": 0.3475, "step": 20802 }, { "epoch": 2.707926591175322, "grad_norm": 2.9572913646698, "learning_rate": 2.4237650598484707e-07, "loss": 0.3588, "step": 20805 }, { "epoch": 2.708317063647013, "grad_norm": 2.9816484451293945, "learning_rate": 2.417341433914777e-07, "loss": 0.3269, "step": 20808 }, { "epoch": 2.7087075361187036, "grad_norm": 2.436990261077881, "learning_rate": 2.4109261205714386e-07, "loss": 0.3758, "step": 20811 }, { "epoch": 2.709098008590394, "grad_norm": 2.87477707862854, "learning_rate": 2.404519120939197e-07, "loss": 0.3635, "step": 20814 }, { "epoch": 2.709488481062085, "grad_norm": 2.7576093673706055, "learning_rate": 2.3981204361373247e-07, "loss": 0.3162, "step": 20817 }, { "epoch": 2.7098789535337757, "grad_norm": 3.003218650817871, "learning_rate": 2.3917300672836876e-07, "loss": 0.344, "step": 20820 }, { "epoch": 2.7102694260054667, "grad_norm": 2.829570770263672, "learning_rate": 2.385348015494648e-07, "loss": 0.399, "step": 20823 }, { "epoch": 2.7106598984771573, "grad_norm": 2.7657134532928467, "learning_rate": 2.378974281885138e-07, "loss": 0.3849, "step": 20826 }, { "epoch": 2.7110503709488483, "grad_norm": 3.6430046558380127, "learning_rate": 2.3726088675686542e-07, "loss": 0.3418, "step": 20829 }, { "epoch": 2.711440843420539, "grad_norm": 3.0621814727783203, "learning_rate": 2.366251773657202e-07, "loss": 0.3826, "step": 20832 }, { "epoch": 2.71183131589223, "grad_norm": 2.840770721435547, "learning_rate": 2.359903001261349e-07, "loss": 0.3095, "step": 20835 }, { "epoch": 2.7122217883639204, "grad_norm": 2.6842424869537354, "learning_rate": 2.353562551490235e-07, "loss": 0.3479, "step": 20838 }, { "epoch": 2.712612260835611, "grad_norm": 2.5753138065338135, "learning_rate": 2.3472304254515022e-07, "loss": 0.3303, "step": 20841 }, { "epoch": 2.713002733307302, "grad_norm": 4.057579040527344, "learning_rate": 2.3409066242513655e-07, "loss": 0.3138, "step": 20844 }, { "epoch": 2.7133932057789925, "grad_norm": 2.7594196796417236, "learning_rate": 2.3345911489945806e-07, "loss": 0.35, "step": 20847 }, { "epoch": 2.7137836782506835, "grad_norm": 2.6751604080200195, "learning_rate": 2.3282840007844586e-07, "loss": 0.3223, "step": 20850 }, { "epoch": 2.714174150722374, "grad_norm": 3.2291057109832764, "learning_rate": 2.3219851807228298e-07, "loss": 0.4123, "step": 20853 }, { "epoch": 2.7145646231940646, "grad_norm": 2.8833391666412354, "learning_rate": 2.3156946899100918e-07, "loss": 0.3697, "step": 20856 }, { "epoch": 2.7149550956657555, "grad_norm": 2.8411707878112793, "learning_rate": 2.3094125294451709e-07, "loss": 0.3905, "step": 20859 }, { "epoch": 2.7153455681374465, "grad_norm": 2.843876361846924, "learning_rate": 2.3031387004255667e-07, "loss": 0.3087, "step": 20862 }, { "epoch": 2.715736040609137, "grad_norm": 2.7246623039245605, "learning_rate": 2.2968732039472864e-07, "loss": 0.3058, "step": 20865 }, { "epoch": 2.7161265130808276, "grad_norm": 3.1246981620788574, "learning_rate": 2.2906160411048982e-07, "loss": 0.4129, "step": 20868 }, { "epoch": 2.7165169855525186, "grad_norm": 2.8314807415008545, "learning_rate": 2.2843672129915284e-07, "loss": 0.3662, "step": 20871 }, { "epoch": 2.716907458024209, "grad_norm": 2.765364408493042, "learning_rate": 2.278126720698831e-07, "loss": 0.3987, "step": 20874 }, { "epoch": 2.7172979304959, "grad_norm": 2.515321969985962, "learning_rate": 2.2718945653169954e-07, "loss": 0.3245, "step": 20877 }, { "epoch": 2.7176884029675907, "grad_norm": 3.015044689178467, "learning_rate": 2.2656707479347783e-07, "loss": 0.3235, "step": 20880 }, { "epoch": 2.7180788754392813, "grad_norm": 2.9350194931030273, "learning_rate": 2.2594552696394655e-07, "loss": 0.3248, "step": 20883 }, { "epoch": 2.7184693479109723, "grad_norm": 2.5761871337890625, "learning_rate": 2.2532481315168774e-07, "loss": 0.4054, "step": 20886 }, { "epoch": 2.7188598203826633, "grad_norm": 2.6208813190460205, "learning_rate": 2.2470493346513967e-07, "loss": 0.3896, "step": 20889 }, { "epoch": 2.719250292854354, "grad_norm": 2.8215200901031494, "learning_rate": 2.2408588801259456e-07, "loss": 0.3916, "step": 20892 }, { "epoch": 2.7196407653260444, "grad_norm": 2.862316608428955, "learning_rate": 2.2346767690219762e-07, "loss": 0.3927, "step": 20895 }, { "epoch": 2.7200312377977354, "grad_norm": 2.7122604846954346, "learning_rate": 2.228503002419491e-07, "loss": 0.3755, "step": 20898 }, { "epoch": 2.720421710269426, "grad_norm": 2.7024004459381104, "learning_rate": 2.2223375813970382e-07, "loss": 0.382, "step": 20901 }, { "epoch": 2.720812182741117, "grad_norm": 3.0531258583068848, "learning_rate": 2.2161805070316955e-07, "loss": 0.427, "step": 20904 }, { "epoch": 2.7212026552128075, "grad_norm": 2.6628942489624023, "learning_rate": 2.2100317803991023e-07, "loss": 0.3531, "step": 20907 }, { "epoch": 2.721593127684498, "grad_norm": 2.617644786834717, "learning_rate": 2.203891402573416e-07, "loss": 0.3378, "step": 20910 }, { "epoch": 2.721983600156189, "grad_norm": 3.2814929485321045, "learning_rate": 2.197759374627356e-07, "loss": 0.4266, "step": 20913 }, { "epoch": 2.72237407262788, "grad_norm": 2.880181074142456, "learning_rate": 2.1916356976321717e-07, "loss": 0.4139, "step": 20916 }, { "epoch": 2.7227645450995706, "grad_norm": 3.127866506576538, "learning_rate": 2.1855203726576512e-07, "loss": 0.4289, "step": 20919 }, { "epoch": 2.723155017571261, "grad_norm": 2.892545461654663, "learning_rate": 2.17941340077214e-07, "loss": 0.3724, "step": 20922 }, { "epoch": 2.723545490042952, "grad_norm": 3.038330078125, "learning_rate": 2.1733147830425129e-07, "loss": 0.3305, "step": 20925 }, { "epoch": 2.7239359625146427, "grad_norm": 2.841825246810913, "learning_rate": 2.1672245205341668e-07, "loss": 0.355, "step": 20928 }, { "epoch": 2.7243264349863336, "grad_norm": 2.6401593685150146, "learning_rate": 2.1611426143110792e-07, "loss": 0.3588, "step": 20931 }, { "epoch": 2.724716907458024, "grad_norm": 2.7327427864074707, "learning_rate": 2.1550690654357387e-07, "loss": 0.3542, "step": 20934 }, { "epoch": 2.7251073799297147, "grad_norm": 2.9311492443084717, "learning_rate": 2.1490038749691855e-07, "loss": 0.3667, "step": 20937 }, { "epoch": 2.7254978524014057, "grad_norm": 2.581843614578247, "learning_rate": 2.1429470439709832e-07, "loss": 0.3361, "step": 20940 }, { "epoch": 2.7258883248730963, "grad_norm": 2.531803607940674, "learning_rate": 2.1368985734992632e-07, "loss": 0.3147, "step": 20943 }, { "epoch": 2.7262787973447873, "grad_norm": 2.8539505004882812, "learning_rate": 2.130858464610669e-07, "loss": 0.3391, "step": 20946 }, { "epoch": 2.726669269816478, "grad_norm": 2.8276593685150146, "learning_rate": 2.124826718360401e-07, "loss": 0.3272, "step": 20949 }, { "epoch": 2.727059742288169, "grad_norm": 3.1101760864257812, "learning_rate": 2.1188033358021887e-07, "loss": 0.4169, "step": 20952 }, { "epoch": 2.7274502147598594, "grad_norm": 2.8859715461730957, "learning_rate": 2.1127883179883123e-07, "loss": 0.343, "step": 20955 }, { "epoch": 2.7278406872315504, "grad_norm": 2.626885414123535, "learning_rate": 2.1067816659695705e-07, "loss": 0.354, "step": 20958 }, { "epoch": 2.728231159703241, "grad_norm": 3.709789276123047, "learning_rate": 2.100783380795318e-07, "loss": 0.4314, "step": 20961 }, { "epoch": 2.7286216321749315, "grad_norm": 3.0039596557617188, "learning_rate": 2.0947934635134504e-07, "loss": 0.3791, "step": 20964 }, { "epoch": 2.7290121046466225, "grad_norm": 2.7142927646636963, "learning_rate": 2.0888119151703855e-07, "loss": 0.4363, "step": 20967 }, { "epoch": 2.729402577118313, "grad_norm": 2.700593948364258, "learning_rate": 2.0828387368110825e-07, "loss": 0.2971, "step": 20970 }, { "epoch": 2.729793049590004, "grad_norm": 2.7692973613739014, "learning_rate": 2.0768739294790453e-07, "loss": 0.3486, "step": 20973 }, { "epoch": 2.7301835220616946, "grad_norm": 2.7702739238739014, "learning_rate": 2.070917494216329e-07, "loss": 0.3118, "step": 20976 }, { "epoch": 2.7305739945333856, "grad_norm": 2.789076805114746, "learning_rate": 2.0649694320634962e-07, "loss": 0.4196, "step": 20979 }, { "epoch": 2.730964467005076, "grad_norm": 2.585819721221924, "learning_rate": 2.059029744059654e-07, "loss": 0.3413, "step": 20982 }, { "epoch": 2.731354939476767, "grad_norm": 2.9140193462371826, "learning_rate": 2.0530984312424728e-07, "loss": 0.3709, "step": 20985 }, { "epoch": 2.7317454119484577, "grad_norm": 2.968364715576172, "learning_rate": 2.0471754946481293e-07, "loss": 0.3156, "step": 20988 }, { "epoch": 2.732135884420148, "grad_norm": 2.853219985961914, "learning_rate": 2.04126093531134e-07, "loss": 0.3475, "step": 20991 }, { "epoch": 2.732526356891839, "grad_norm": 2.715740919113159, "learning_rate": 2.035354754265384e-07, "loss": 0.337, "step": 20994 }, { "epoch": 2.7329168293635298, "grad_norm": 2.861161947250366, "learning_rate": 2.029456952542047e-07, "loss": 0.3736, "step": 20997 }, { "epoch": 2.7333073018352207, "grad_norm": 3.0572056770324707, "learning_rate": 2.0235675311716606e-07, "loss": 0.3697, "step": 21000 }, { "epoch": 2.7336977743069113, "grad_norm": 3.1050283908843994, "learning_rate": 2.0176864911831074e-07, "loss": 0.3451, "step": 21003 }, { "epoch": 2.734088246778602, "grad_norm": 2.7847084999084473, "learning_rate": 2.0118138336037818e-07, "loss": 0.3791, "step": 21006 }, { "epoch": 2.734478719250293, "grad_norm": 2.646023988723755, "learning_rate": 2.0059495594596245e-07, "loss": 0.3152, "step": 21009 }, { "epoch": 2.734869191721984, "grad_norm": 2.6113431453704834, "learning_rate": 2.000093669775105e-07, "loss": 0.3829, "step": 21012 }, { "epoch": 2.7352596641936744, "grad_norm": 3.1198410987854004, "learning_rate": 1.9942461655732604e-07, "loss": 0.3661, "step": 21015 }, { "epoch": 2.735650136665365, "grad_norm": 3.019252061843872, "learning_rate": 1.9884070478756124e-07, "loss": 0.3273, "step": 21018 }, { "epoch": 2.736040609137056, "grad_norm": 2.6802523136138916, "learning_rate": 1.982576317702256e-07, "loss": 0.3518, "step": 21021 }, { "epoch": 2.7364310816087465, "grad_norm": 2.885608196258545, "learning_rate": 1.976753976071799e-07, "loss": 0.3363, "step": 21024 }, { "epoch": 2.7368215540804375, "grad_norm": 3.0259361267089844, "learning_rate": 1.9709400240014e-07, "loss": 0.4027, "step": 21027 }, { "epoch": 2.737212026552128, "grad_norm": 2.8382019996643066, "learning_rate": 1.9651344625067404e-07, "loss": 0.3445, "step": 21030 }, { "epoch": 2.7376024990238186, "grad_norm": 2.760059356689453, "learning_rate": 1.959337292602037e-07, "loss": 0.341, "step": 21033 }, { "epoch": 2.7379929714955096, "grad_norm": 2.7008633613586426, "learning_rate": 1.9535485153000467e-07, "loss": 0.3574, "step": 21036 }, { "epoch": 2.7383834439672006, "grad_norm": 2.705885410308838, "learning_rate": 1.94776813161206e-07, "loss": 0.3451, "step": 21039 }, { "epoch": 2.738773916438891, "grad_norm": 2.571812629699707, "learning_rate": 1.941996142547886e-07, "loss": 0.3598, "step": 21042 }, { "epoch": 2.7391643889105817, "grad_norm": 2.7097597122192383, "learning_rate": 1.9362325491158907e-07, "loss": 0.4084, "step": 21045 }, { "epoch": 2.7395548613822727, "grad_norm": 2.7397844791412354, "learning_rate": 1.9304773523229626e-07, "loss": 0.3158, "step": 21048 }, { "epoch": 2.739945333853963, "grad_norm": 2.917585849761963, "learning_rate": 1.924730553174503e-07, "loss": 0.3487, "step": 21051 }, { "epoch": 2.740335806325654, "grad_norm": 3.6193621158599854, "learning_rate": 1.918992152674487e-07, "loss": 0.3871, "step": 21054 }, { "epoch": 2.7407262787973448, "grad_norm": 2.6469061374664307, "learning_rate": 1.9132621518254014e-07, "loss": 0.3751, "step": 21057 }, { "epoch": 2.7411167512690353, "grad_norm": 2.455784797668457, "learning_rate": 1.9075405516282562e-07, "loss": 0.3128, "step": 21060 }, { "epoch": 2.7415072237407263, "grad_norm": 2.900134325027466, "learning_rate": 1.9018273530825972e-07, "loss": 0.3579, "step": 21063 }, { "epoch": 2.7418976962124173, "grad_norm": 2.7763450145721436, "learning_rate": 1.8961225571865194e-07, "loss": 0.3071, "step": 21066 }, { "epoch": 2.742288168684108, "grad_norm": 3.2267343997955322, "learning_rate": 1.890426164936643e-07, "loss": 0.3962, "step": 21069 }, { "epoch": 2.7426786411557984, "grad_norm": 2.72977876663208, "learning_rate": 1.8847381773280991e-07, "loss": 0.3407, "step": 21072 }, { "epoch": 2.7430691136274894, "grad_norm": 3.236475944519043, "learning_rate": 1.879058595354577e-07, "loss": 0.4347, "step": 21075 }, { "epoch": 2.74345958609918, "grad_norm": 2.5034587383270264, "learning_rate": 1.873387420008288e-07, "loss": 0.3305, "step": 21078 }, { "epoch": 2.743850058570871, "grad_norm": 2.9593002796173096, "learning_rate": 1.867724652279973e-07, "loss": 0.3307, "step": 21081 }, { "epoch": 2.7442405310425615, "grad_norm": 2.766495943069458, "learning_rate": 1.8620702931589018e-07, "loss": 0.3772, "step": 21084 }, { "epoch": 2.744631003514252, "grad_norm": 2.7512776851654053, "learning_rate": 1.85642434363289e-07, "loss": 0.3637, "step": 21087 }, { "epoch": 2.745021475985943, "grad_norm": 2.5375959873199463, "learning_rate": 1.8507868046882648e-07, "loss": 0.3328, "step": 21090 }, { "epoch": 2.7454119484576336, "grad_norm": 3.020958662033081, "learning_rate": 1.845157677309889e-07, "loss": 0.3264, "step": 21093 }, { "epoch": 2.7458024209293246, "grad_norm": 2.4167263507843018, "learning_rate": 1.8395369624811643e-07, "loss": 0.2931, "step": 21096 }, { "epoch": 2.746192893401015, "grad_norm": 2.777470827102661, "learning_rate": 1.833924661184022e-07, "loss": 0.4431, "step": 21099 }, { "epoch": 2.746583365872706, "grad_norm": 2.5936362743377686, "learning_rate": 1.8283207743989118e-07, "loss": 0.3583, "step": 21102 }, { "epoch": 2.7469738383443967, "grad_norm": 2.837986946105957, "learning_rate": 1.822725303104822e-07, "loss": 0.3669, "step": 21105 }, { "epoch": 2.7473643108160877, "grad_norm": 2.606499671936035, "learning_rate": 1.8171382482792765e-07, "loss": 0.3378, "step": 21108 }, { "epoch": 2.7477547832877782, "grad_norm": 3.059852123260498, "learning_rate": 1.8115596108983168e-07, "loss": 0.3491, "step": 21111 }, { "epoch": 2.7481452557594688, "grad_norm": 2.9495184421539307, "learning_rate": 1.8059893919365135e-07, "loss": 0.4117, "step": 21114 }, { "epoch": 2.7485357282311598, "grad_norm": 2.9733073711395264, "learning_rate": 1.8004275923669824e-07, "loss": 0.4087, "step": 21117 }, { "epoch": 2.7489262007028503, "grad_norm": 2.503350019454956, "learning_rate": 1.7948742131613571e-07, "loss": 0.3365, "step": 21120 }, { "epoch": 2.7493166731745413, "grad_norm": 2.9972898960113525, "learning_rate": 1.7893292552897956e-07, "loss": 0.3652, "step": 21123 }, { "epoch": 2.749707145646232, "grad_norm": 2.9477357864379883, "learning_rate": 1.783792719720989e-07, "loss": 0.3609, "step": 21126 }, { "epoch": 2.7500976181179224, "grad_norm": 2.971484422683716, "learning_rate": 1.7782646074221643e-07, "loss": 0.3444, "step": 21129 }, { "epoch": 2.7504880905896134, "grad_norm": 2.396226644515991, "learning_rate": 1.7727449193590707e-07, "loss": 0.3327, "step": 21132 }, { "epoch": 2.7508785630613044, "grad_norm": 2.7505605220794678, "learning_rate": 1.7672336564959813e-07, "loss": 0.3816, "step": 21135 }, { "epoch": 2.751269035532995, "grad_norm": 2.9691169261932373, "learning_rate": 1.761730819795704e-07, "loss": 0.3654, "step": 21138 }, { "epoch": 2.7516595080046855, "grad_norm": 3.171504259109497, "learning_rate": 1.7562364102195806e-07, "loss": 0.3361, "step": 21141 }, { "epoch": 2.7520499804763765, "grad_norm": 2.5362813472747803, "learning_rate": 1.7507504287274603e-07, "loss": 0.2974, "step": 21144 }, { "epoch": 2.752440452948067, "grad_norm": 2.5266830921173096, "learning_rate": 1.7452728762777372e-07, "loss": 0.4153, "step": 21147 }, { "epoch": 2.752830925419758, "grad_norm": 3.282205820083618, "learning_rate": 1.739803753827335e-07, "loss": 0.2816, "step": 21150 }, { "epoch": 2.7532213978914486, "grad_norm": 2.7888989448547363, "learning_rate": 1.7343430623316947e-07, "loss": 0.3416, "step": 21153 }, { "epoch": 2.753611870363139, "grad_norm": 2.560568332672119, "learning_rate": 1.728890802744776e-07, "loss": 0.3341, "step": 21156 }, { "epoch": 2.75400234283483, "grad_norm": 3.4520578384399414, "learning_rate": 1.723446976019094e-07, "loss": 0.3717, "step": 21159 }, { "epoch": 2.754392815306521, "grad_norm": 2.7265379428863525, "learning_rate": 1.7180115831056665e-07, "loss": 0.3872, "step": 21162 }, { "epoch": 2.7547832877782117, "grad_norm": 2.842808246612549, "learning_rate": 1.712584624954039e-07, "loss": 0.3927, "step": 21165 }, { "epoch": 2.7551737602499022, "grad_norm": 2.4746949672698975, "learning_rate": 1.707166102512303e-07, "loss": 0.3752, "step": 21168 }, { "epoch": 2.7555642327215932, "grad_norm": 2.7992329597473145, "learning_rate": 1.7017560167270519e-07, "loss": 0.3562, "step": 21171 }, { "epoch": 2.755954705193284, "grad_norm": 3.015634059906006, "learning_rate": 1.6963543685434236e-07, "loss": 0.4082, "step": 21174 }, { "epoch": 2.7563451776649748, "grad_norm": 2.884288787841797, "learning_rate": 1.6909611589050635e-07, "loss": 0.3104, "step": 21177 }, { "epoch": 2.7567356501366653, "grad_norm": 3.6291792392730713, "learning_rate": 1.6855763887541565e-07, "loss": 0.3426, "step": 21180 }, { "epoch": 2.757126122608356, "grad_norm": 2.6148040294647217, "learning_rate": 1.6802000590314283e-07, "loss": 0.3069, "step": 21183 }, { "epoch": 2.757516595080047, "grad_norm": 3.024834156036377, "learning_rate": 1.6748321706760994e-07, "loss": 0.3358, "step": 21186 }, { "epoch": 2.757907067551738, "grad_norm": 2.5863471031188965, "learning_rate": 1.66947272462592e-07, "loss": 0.3238, "step": 21189 }, { "epoch": 2.7582975400234284, "grad_norm": 3.284332036972046, "learning_rate": 1.6641217218171912e-07, "loss": 0.4084, "step": 21192 }, { "epoch": 2.758688012495119, "grad_norm": 3.1912519931793213, "learning_rate": 1.658779163184715e-07, "loss": 0.353, "step": 21195 }, { "epoch": 2.75907848496681, "grad_norm": 2.728577136993408, "learning_rate": 1.6534450496618171e-07, "loss": 0.408, "step": 21198 }, { "epoch": 2.7594689574385005, "grad_norm": 2.7622365951538086, "learning_rate": 1.648119382180363e-07, "loss": 0.3854, "step": 21201 }, { "epoch": 2.7598594299101915, "grad_norm": 3.1888158321380615, "learning_rate": 1.6428021616707423e-07, "loss": 0.4322, "step": 21204 }, { "epoch": 2.760249902381882, "grad_norm": 2.6636276245117188, "learning_rate": 1.6374933890618504e-07, "loss": 0.3482, "step": 21207 }, { "epoch": 2.7606403748535726, "grad_norm": 3.0101702213287354, "learning_rate": 1.6321930652811236e-07, "loss": 0.3703, "step": 21210 }, { "epoch": 2.7610308473252636, "grad_norm": 3.2155416011810303, "learning_rate": 1.6269011912545208e-07, "loss": 0.384, "step": 21213 }, { "epoch": 2.761421319796954, "grad_norm": 2.743858814239502, "learning_rate": 1.6216177679065136e-07, "loss": 0.3099, "step": 21216 }, { "epoch": 2.761811792268645, "grad_norm": 2.7741172313690186, "learning_rate": 1.6163427961601086e-07, "loss": 0.3717, "step": 21219 }, { "epoch": 2.7622022647403357, "grad_norm": 3.037571907043457, "learning_rate": 1.6110762769368294e-07, "loss": 0.3058, "step": 21222 }, { "epoch": 2.7625927372120267, "grad_norm": 2.6147408485412598, "learning_rate": 1.60581821115674e-07, "loss": 0.3178, "step": 21225 }, { "epoch": 2.7629832096837172, "grad_norm": 2.956163167953491, "learning_rate": 1.6005685997383945e-07, "loss": 0.3459, "step": 21228 }, { "epoch": 2.7633736821554082, "grad_norm": 3.1551594734191895, "learning_rate": 1.5953274435988985e-07, "loss": 0.3189, "step": 21231 }, { "epoch": 2.763764154627099, "grad_norm": 2.789609670639038, "learning_rate": 1.590094743653875e-07, "loss": 0.3035, "step": 21234 }, { "epoch": 2.7641546270987893, "grad_norm": 3.474595308303833, "learning_rate": 1.5848705008174535e-07, "loss": 0.434, "step": 21237 }, { "epoch": 2.7645450995704803, "grad_norm": 2.9483280181884766, "learning_rate": 1.5796547160023046e-07, "loss": 0.3975, "step": 21240 }, { "epoch": 2.764935572042171, "grad_norm": 3.087123155593872, "learning_rate": 1.5744473901196211e-07, "loss": 0.306, "step": 21243 }, { "epoch": 2.765326044513862, "grad_norm": 2.7233099937438965, "learning_rate": 1.5692485240791034e-07, "loss": 0.3202, "step": 21246 }, { "epoch": 2.7657165169855524, "grad_norm": 3.1020421981811523, "learning_rate": 1.5640581187889857e-07, "loss": 0.3874, "step": 21249 }, { "epoch": 2.7661069894572434, "grad_norm": 3.102666139602661, "learning_rate": 1.558876175156021e-07, "loss": 0.3741, "step": 21252 }, { "epoch": 2.766497461928934, "grad_norm": 2.6903672218322754, "learning_rate": 1.5537026940854794e-07, "loss": 0.3412, "step": 21255 }, { "epoch": 2.766887934400625, "grad_norm": 2.9768612384796143, "learning_rate": 1.54853767648116e-07, "loss": 0.3017, "step": 21258 }, { "epoch": 2.7672784068723155, "grad_norm": 3.4429922103881836, "learning_rate": 1.543381123245391e-07, "loss": 0.321, "step": 21261 }, { "epoch": 2.767668879344006, "grad_norm": 2.8304970264434814, "learning_rate": 1.538233035278991e-07, "loss": 0.328, "step": 21264 }, { "epoch": 2.768059351815697, "grad_norm": 2.5452582836151123, "learning_rate": 1.5330934134813346e-07, "loss": 0.2859, "step": 21267 }, { "epoch": 2.7684498242873876, "grad_norm": 2.8561484813690186, "learning_rate": 1.5279622587502986e-07, "loss": 0.4397, "step": 21270 }, { "epoch": 2.7688402967590786, "grad_norm": 3.0606303215026855, "learning_rate": 1.5228395719822876e-07, "loss": 0.4187, "step": 21273 }, { "epoch": 2.769230769230769, "grad_norm": 2.421595335006714, "learning_rate": 1.5177253540722247e-07, "loss": 0.2697, "step": 21276 }, { "epoch": 2.7696212417024597, "grad_norm": 3.015394449234009, "learning_rate": 1.512619605913551e-07, "loss": 0.3576, "step": 21279 }, { "epoch": 2.7700117141741507, "grad_norm": 2.740720510482788, "learning_rate": 1.5075223283982255e-07, "loss": 0.3246, "step": 21282 }, { "epoch": 2.7704021866458417, "grad_norm": 2.849069833755493, "learning_rate": 1.5024335224167407e-07, "loss": 0.342, "step": 21285 }, { "epoch": 2.7707926591175323, "grad_norm": 2.9401018619537354, "learning_rate": 1.4973531888580916e-07, "loss": 0.3456, "step": 21288 }, { "epoch": 2.771183131589223, "grad_norm": 2.6727066040039062, "learning_rate": 1.4922813286098016e-07, "loss": 0.3463, "step": 21291 }, { "epoch": 2.771573604060914, "grad_norm": 3.0198166370391846, "learning_rate": 1.487217942557928e-07, "loss": 0.3842, "step": 21294 }, { "epoch": 2.7719640765326043, "grad_norm": 2.9366579055786133, "learning_rate": 1.4821630315870194e-07, "loss": 0.367, "step": 21297 }, { "epoch": 2.7723545490042953, "grad_norm": 3.7701351642608643, "learning_rate": 1.4771165965801582e-07, "loss": 0.4056, "step": 21300 }, { "epoch": 2.772745021475986, "grad_norm": 2.716285467147827, "learning_rate": 1.4720786384189557e-07, "loss": 0.3333, "step": 21303 }, { "epoch": 2.7731354939476764, "grad_norm": 2.849595546722412, "learning_rate": 1.4670491579835245e-07, "loss": 0.3918, "step": 21306 }, { "epoch": 2.7735259664193674, "grad_norm": 2.8046321868896484, "learning_rate": 1.462028156152512e-07, "loss": 0.3327, "step": 21309 }, { "epoch": 2.7739164388910584, "grad_norm": 3.198570966720581, "learning_rate": 1.4570156338030606e-07, "loss": 0.3182, "step": 21312 }, { "epoch": 2.774306911362749, "grad_norm": 2.8275673389434814, "learning_rate": 1.4520115918108701e-07, "loss": 0.3296, "step": 21315 }, { "epoch": 2.7746973838344395, "grad_norm": 2.7337069511413574, "learning_rate": 1.447016031050119e-07, "loss": 0.3556, "step": 21318 }, { "epoch": 2.7750878563061305, "grad_norm": 2.702484369277954, "learning_rate": 1.442028952393526e-07, "loss": 0.4011, "step": 21321 }, { "epoch": 2.775478328777821, "grad_norm": 2.6298041343688965, "learning_rate": 1.4370503567123274e-07, "loss": 0.3267, "step": 21324 }, { "epoch": 2.775868801249512, "grad_norm": 2.7082111835479736, "learning_rate": 1.4320802448762716e-07, "loss": 0.3419, "step": 21327 }, { "epoch": 2.7762592737212026, "grad_norm": 3.18163800239563, "learning_rate": 1.4271186177536256e-07, "loss": 0.3421, "step": 21330 }, { "epoch": 2.776649746192893, "grad_norm": 2.9443161487579346, "learning_rate": 1.4221654762111624e-07, "loss": 0.3675, "step": 21333 }, { "epoch": 2.777040218664584, "grad_norm": 3.03935170173645, "learning_rate": 1.4172208211142124e-07, "loss": 0.3989, "step": 21336 }, { "epoch": 2.777430691136275, "grad_norm": 2.619476795196533, "learning_rate": 1.4122846533265733e-07, "loss": 0.3682, "step": 21339 }, { "epoch": 2.7778211636079657, "grad_norm": 2.9960954189300537, "learning_rate": 1.4073569737105942e-07, "loss": 0.3476, "step": 21342 }, { "epoch": 2.7782116360796563, "grad_norm": 3.048250675201416, "learning_rate": 1.4024377831271253e-07, "loss": 0.38, "step": 21345 }, { "epoch": 2.7786021085513473, "grad_norm": 3.039592981338501, "learning_rate": 1.3975270824355402e-07, "loss": 0.2967, "step": 21348 }, { "epoch": 2.778992581023038, "grad_norm": 3.194598913192749, "learning_rate": 1.3926248724937363e-07, "loss": 0.4696, "step": 21351 }, { "epoch": 2.779383053494729, "grad_norm": 2.963008165359497, "learning_rate": 1.3877311541581063e-07, "loss": 0.3861, "step": 21354 }, { "epoch": 2.7797735259664194, "grad_norm": 2.9709367752075195, "learning_rate": 1.3828459282835828e-07, "loss": 0.4144, "step": 21357 }, { "epoch": 2.78016399843811, "grad_norm": 2.8352601528167725, "learning_rate": 1.3779691957235996e-07, "loss": 0.3747, "step": 21360 }, { "epoch": 2.780554470909801, "grad_norm": 2.657566547393799, "learning_rate": 1.3731009573301035e-07, "loss": 0.3459, "step": 21363 }, { "epoch": 2.7809449433814915, "grad_norm": 2.850067615509033, "learning_rate": 1.368241213953586e-07, "loss": 0.4104, "step": 21366 }, { "epoch": 2.7813354158531824, "grad_norm": 3.0880982875823975, "learning_rate": 1.3633899664430183e-07, "loss": 0.36, "step": 21369 }, { "epoch": 2.781725888324873, "grad_norm": 3.7259879112243652, "learning_rate": 1.3585472156459e-07, "loss": 0.3831, "step": 21372 }, { "epoch": 2.782116360796564, "grad_norm": 3.025913953781128, "learning_rate": 1.3537129624082657e-07, "loss": 0.3829, "step": 21375 }, { "epoch": 2.7825068332682545, "grad_norm": 3.209459066390991, "learning_rate": 1.3488872075746395e-07, "loss": 0.3427, "step": 21378 }, { "epoch": 2.7828973057399455, "grad_norm": 2.893686294555664, "learning_rate": 1.3440699519880695e-07, "loss": 0.3317, "step": 21381 }, { "epoch": 2.783287778211636, "grad_norm": 2.8265738487243652, "learning_rate": 1.3392611964901159e-07, "loss": 0.3864, "step": 21384 }, { "epoch": 2.7836782506833266, "grad_norm": 2.7982900142669678, "learning_rate": 1.334460941920873e-07, "loss": 0.3053, "step": 21387 }, { "epoch": 2.7840687231550176, "grad_norm": 2.68939471244812, "learning_rate": 1.329669189118915e-07, "loss": 0.4247, "step": 21390 }, { "epoch": 2.784459195626708, "grad_norm": 2.8545985221862793, "learning_rate": 1.324885938921372e-07, "loss": 0.3723, "step": 21393 }, { "epoch": 2.784849668098399, "grad_norm": 2.9172253608703613, "learning_rate": 1.3201111921638532e-07, "loss": 0.346, "step": 21396 }, { "epoch": 2.7852401405700897, "grad_norm": 2.608978271484375, "learning_rate": 1.3153449496805028e-07, "loss": 0.3201, "step": 21399 }, { "epoch": 2.7856306130417803, "grad_norm": 2.9829037189483643, "learning_rate": 1.3105872123039765e-07, "loss": 0.4295, "step": 21402 }, { "epoch": 2.7860210855134713, "grad_norm": 2.7616302967071533, "learning_rate": 1.305837980865432e-07, "loss": 0.4219, "step": 21405 }, { "epoch": 2.7864115579851623, "grad_norm": 3.1220195293426514, "learning_rate": 1.3010972561945555e-07, "loss": 0.3856, "step": 21408 }, { "epoch": 2.786802030456853, "grad_norm": 2.6154117584228516, "learning_rate": 1.2963650391195403e-07, "loss": 0.3607, "step": 21411 }, { "epoch": 2.7871925029285434, "grad_norm": 2.7787892818450928, "learning_rate": 1.2916413304670972e-07, "loss": 0.3516, "step": 21414 }, { "epoch": 2.7875829754002344, "grad_norm": 3.176905632019043, "learning_rate": 1.2869261310624437e-07, "loss": 0.3978, "step": 21417 }, { "epoch": 2.787973447871925, "grad_norm": 2.9123852252960205, "learning_rate": 1.282219441729321e-07, "loss": 0.3417, "step": 21420 }, { "epoch": 2.788363920343616, "grad_norm": 2.5749077796936035, "learning_rate": 1.2775212632899715e-07, "loss": 0.345, "step": 21423 }, { "epoch": 2.7887543928153065, "grad_norm": 2.712224006652832, "learning_rate": 1.2728315965651606e-07, "loss": 0.4133, "step": 21426 }, { "epoch": 2.789144865286997, "grad_norm": 2.8069446086883545, "learning_rate": 1.2681504423741665e-07, "loss": 0.3276, "step": 21429 }, { "epoch": 2.789535337758688, "grad_norm": 2.740924596786499, "learning_rate": 1.2634778015347682e-07, "loss": 0.3468, "step": 21432 }, { "epoch": 2.789925810230379, "grad_norm": 2.735494613647461, "learning_rate": 1.2588136748632685e-07, "loss": 0.3151, "step": 21435 }, { "epoch": 2.7903162827020696, "grad_norm": 2.7889745235443115, "learning_rate": 1.2541580631744931e-07, "loss": 0.338, "step": 21438 }, { "epoch": 2.79070675517376, "grad_norm": 2.608374834060669, "learning_rate": 1.249510967281753e-07, "loss": 0.3711, "step": 21441 }, { "epoch": 2.791097227645451, "grad_norm": 2.477095365524292, "learning_rate": 1.2448723879968927e-07, "loss": 0.3962, "step": 21444 }, { "epoch": 2.7914877001171416, "grad_norm": 3.061278820037842, "learning_rate": 1.2402423261302532e-07, "loss": 0.3421, "step": 21447 }, { "epoch": 2.7918781725888326, "grad_norm": 2.966050386428833, "learning_rate": 1.2356207824907152e-07, "loss": 0.3855, "step": 21450 }, { "epoch": 2.792268645060523, "grad_norm": 2.758970022201538, "learning_rate": 1.2310077578856328e-07, "loss": 0.3178, "step": 21453 }, { "epoch": 2.7926591175322137, "grad_norm": 2.657818555831909, "learning_rate": 1.2264032531209004e-07, "loss": 0.3399, "step": 21456 }, { "epoch": 2.7930495900039047, "grad_norm": 2.7079572677612305, "learning_rate": 1.2218072690009187e-07, "loss": 0.3295, "step": 21459 }, { "epoch": 2.7934400624755957, "grad_norm": 2.721400737762451, "learning_rate": 1.2172198063285957e-07, "loss": 0.3228, "step": 21462 }, { "epoch": 2.7938305349472863, "grad_norm": 2.8544654846191406, "learning_rate": 1.2126408659053402e-07, "loss": 0.3513, "step": 21465 }, { "epoch": 2.794221007418977, "grad_norm": 3.039076566696167, "learning_rate": 1.2080704485310957e-07, "loss": 0.4097, "step": 21468 }, { "epoch": 2.794611479890668, "grad_norm": 3.0144577026367188, "learning_rate": 1.2035085550043013e-07, "loss": 0.3648, "step": 21471 }, { "epoch": 2.7950019523623584, "grad_norm": 2.8478612899780273, "learning_rate": 1.1989551861219084e-07, "loss": 0.3556, "step": 21474 }, { "epoch": 2.7953924248340494, "grad_norm": 3.181854724884033, "learning_rate": 1.1944103426793808e-07, "loss": 0.2727, "step": 21477 }, { "epoch": 2.79578289730574, "grad_norm": 2.986677885055542, "learning_rate": 1.1898740254706942e-07, "loss": 0.3345, "step": 21480 }, { "epoch": 2.7961733697774305, "grad_norm": 2.7655344009399414, "learning_rate": 1.1853462352883371e-07, "loss": 0.4025, "step": 21483 }, { "epoch": 2.7965638422491215, "grad_norm": 2.599412202835083, "learning_rate": 1.180826972923299e-07, "loss": 0.3651, "step": 21486 }, { "epoch": 2.796954314720812, "grad_norm": 3.225376844406128, "learning_rate": 1.1763162391650929e-07, "loss": 0.3836, "step": 21489 }, { "epoch": 2.797344787192503, "grad_norm": 2.734292507171631, "learning_rate": 1.1718140348017271e-07, "loss": 0.3683, "step": 21492 }, { "epoch": 2.7977352596641936, "grad_norm": 2.984931230545044, "learning_rate": 1.1673203606197336e-07, "loss": 0.3585, "step": 21495 }, { "epoch": 2.7981257321358846, "grad_norm": 2.9327147006988525, "learning_rate": 1.1628352174041346e-07, "loss": 0.4024, "step": 21498 }, { "epoch": 2.798516204607575, "grad_norm": 2.466592788696289, "learning_rate": 1.1583586059384921e-07, "loss": 0.33, "step": 21501 }, { "epoch": 2.798906677079266, "grad_norm": 2.7316441535949707, "learning_rate": 1.1538905270048528e-07, "loss": 0.312, "step": 21504 }, { "epoch": 2.7992971495509567, "grad_norm": 2.5499911308288574, "learning_rate": 1.1494309813837756e-07, "loss": 0.3187, "step": 21507 }, { "epoch": 2.799687622022647, "grad_norm": 2.7340035438537598, "learning_rate": 1.1449799698543429e-07, "loss": 0.3893, "step": 21510 }, { "epoch": 2.800078094494338, "grad_norm": 2.626269578933716, "learning_rate": 1.1405374931941382e-07, "loss": 0.3604, "step": 21513 }, { "epoch": 2.8004685669660287, "grad_norm": 2.762885808944702, "learning_rate": 1.1361035521792407e-07, "loss": 0.3397, "step": 21516 }, { "epoch": 2.8008590394377197, "grad_norm": 2.9258060455322266, "learning_rate": 1.1316781475842586e-07, "loss": 0.3689, "step": 21519 }, { "epoch": 2.8012495119094103, "grad_norm": 2.73075008392334, "learning_rate": 1.1272612801823069e-07, "loss": 0.3302, "step": 21522 }, { "epoch": 2.8016399843811013, "grad_norm": 2.7661848068237305, "learning_rate": 1.1228529507449904e-07, "loss": 0.4068, "step": 21525 }, { "epoch": 2.802030456852792, "grad_norm": 2.8157827854156494, "learning_rate": 1.1184531600424431e-07, "loss": 0.3384, "step": 21528 }, { "epoch": 2.802420929324483, "grad_norm": 2.6738975048065186, "learning_rate": 1.1140619088432946e-07, "loss": 0.3366, "step": 21531 }, { "epoch": 2.8028114017961734, "grad_norm": 3.107644557952881, "learning_rate": 1.1096791979146981e-07, "loss": 0.3749, "step": 21534 }, { "epoch": 2.803201874267864, "grad_norm": 2.7247238159179688, "learning_rate": 1.1053050280222855e-07, "loss": 0.3524, "step": 21537 }, { "epoch": 2.803592346739555, "grad_norm": 2.4612529277801514, "learning_rate": 1.1009393999302287e-07, "loss": 0.3645, "step": 21540 }, { "epoch": 2.8039828192112455, "grad_norm": 2.981336832046509, "learning_rate": 1.09658231440119e-07, "loss": 0.4023, "step": 21543 }, { "epoch": 2.8043732916829365, "grad_norm": 2.6686904430389404, "learning_rate": 1.0922337721963494e-07, "loss": 0.3697, "step": 21546 }, { "epoch": 2.804763764154627, "grad_norm": 2.5626204013824463, "learning_rate": 1.0878937740753714e-07, "loss": 0.3182, "step": 21549 }, { "epoch": 2.8051542366263176, "grad_norm": 3.065051317214966, "learning_rate": 1.0835623207964607e-07, "loss": 0.3935, "step": 21552 }, { "epoch": 2.8055447090980086, "grad_norm": 4.100696086883545, "learning_rate": 1.0792394131163064e-07, "loss": 0.3165, "step": 21555 }, { "epoch": 2.8059351815696996, "grad_norm": 2.8829846382141113, "learning_rate": 1.07492505179011e-07, "loss": 0.3021, "step": 21558 }, { "epoch": 2.80632565404139, "grad_norm": 2.999645471572876, "learning_rate": 1.0706192375715851e-07, "loss": 0.3379, "step": 21561 }, { "epoch": 2.8067161265130807, "grad_norm": 2.903561592102051, "learning_rate": 1.0663219712129469e-07, "loss": 0.4568, "step": 21564 }, { "epoch": 2.8071065989847717, "grad_norm": 2.6506104469299316, "learning_rate": 1.0620332534649225e-07, "loss": 0.3272, "step": 21567 }, { "epoch": 2.807497071456462, "grad_norm": 2.9835636615753174, "learning_rate": 1.0577530850767348e-07, "loss": 0.3592, "step": 21570 }, { "epoch": 2.807887543928153, "grad_norm": 2.599337100982666, "learning_rate": 1.0534814667961246e-07, "loss": 0.3549, "step": 21573 }, { "epoch": 2.8082780163998438, "grad_norm": 2.8641128540039062, "learning_rate": 1.0492183993693394e-07, "loss": 0.3638, "step": 21576 }, { "epoch": 2.8086684888715343, "grad_norm": 2.740431547164917, "learning_rate": 1.0449638835411114e-07, "loss": 0.3511, "step": 21579 }, { "epoch": 2.8090589613432253, "grad_norm": 2.9625089168548584, "learning_rate": 1.0407179200547124e-07, "loss": 0.4805, "step": 21582 }, { "epoch": 2.8094494338149163, "grad_norm": 2.8378477096557617, "learning_rate": 1.0364805096518993e-07, "loss": 0.349, "step": 21585 }, { "epoch": 2.809839906286607, "grad_norm": 2.665264368057251, "learning_rate": 1.0322516530729298e-07, "loss": 0.3023, "step": 21588 }, { "epoch": 2.8102303787582974, "grad_norm": 2.7869749069213867, "learning_rate": 1.0280313510565909e-07, "loss": 0.3308, "step": 21591 }, { "epoch": 2.8106208512299884, "grad_norm": 3.251950740814209, "learning_rate": 1.0238196043401538e-07, "loss": 0.2892, "step": 21594 }, { "epoch": 2.811011323701679, "grad_norm": 2.933887004852295, "learning_rate": 1.0196164136594022e-07, "loss": 0.366, "step": 21597 }, { "epoch": 2.81140179617337, "grad_norm": 2.547628164291382, "learning_rate": 1.0154217797486098e-07, "loss": 0.3066, "step": 21600 }, { "epoch": 2.8117922686450605, "grad_norm": 2.8008322715759277, "learning_rate": 1.0112357033405962e-07, "loss": 0.3796, "step": 21603 }, { "epoch": 2.812182741116751, "grad_norm": 2.5808677673339844, "learning_rate": 1.0070581851666428e-07, "loss": 0.3241, "step": 21606 }, { "epoch": 2.812573213588442, "grad_norm": 2.5986881256103516, "learning_rate": 1.0028892259565658e-07, "loss": 0.3361, "step": 21609 }, { "epoch": 2.812963686060133, "grad_norm": 2.557717800140381, "learning_rate": 9.987288264386552e-08, "loss": 0.346, "step": 21612 }, { "epoch": 2.8133541585318236, "grad_norm": 2.836681842803955, "learning_rate": 9.945769873397404e-08, "loss": 0.3852, "step": 21615 }, { "epoch": 2.813744631003514, "grad_norm": 2.740900754928589, "learning_rate": 9.904337093851358e-08, "loss": 0.402, "step": 21618 }, { "epoch": 2.814135103475205, "grad_norm": 2.7481210231781006, "learning_rate": 9.862989932986566e-08, "loss": 0.347, "step": 21621 }, { "epoch": 2.8145255759468957, "grad_norm": 2.67153000831604, "learning_rate": 9.82172839802642e-08, "loss": 0.3092, "step": 21624 }, { "epoch": 2.8149160484185867, "grad_norm": 2.9304611682891846, "learning_rate": 9.780552496179096e-08, "loss": 0.3068, "step": 21627 }, { "epoch": 2.815306520890277, "grad_norm": 2.806035280227661, "learning_rate": 9.739462234637898e-08, "loss": 0.3729, "step": 21630 }, { "epoch": 2.8156969933619678, "grad_norm": 3.032376766204834, "learning_rate": 9.698457620581359e-08, "loss": 0.3683, "step": 21633 }, { "epoch": 2.8160874658336588, "grad_norm": 3.0666725635528564, "learning_rate": 9.657538661172861e-08, "loss": 0.3887, "step": 21636 }, { "epoch": 2.8164779383053493, "grad_norm": 2.8096840381622314, "learning_rate": 9.616705363560796e-08, "loss": 0.3416, "step": 21639 }, { "epoch": 2.8168684107770403, "grad_norm": 2.5482988357543945, "learning_rate": 9.575957734878627e-08, "loss": 0.3262, "step": 21642 }, { "epoch": 2.817258883248731, "grad_norm": 2.7446038722991943, "learning_rate": 9.535295782245046e-08, "loss": 0.3893, "step": 21645 }, { "epoch": 2.817649355720422, "grad_norm": 2.8740649223327637, "learning_rate": 9.494719512763429e-08, "loss": 0.3946, "step": 21648 }, { "epoch": 2.8180398281921124, "grad_norm": 2.647369623184204, "learning_rate": 9.454228933522491e-08, "loss": 0.3482, "step": 21651 }, { "epoch": 2.8184303006638034, "grad_norm": 2.5511598587036133, "learning_rate": 9.413824051595799e-08, "loss": 0.315, "step": 21654 }, { "epoch": 2.818820773135494, "grad_norm": 2.809553623199463, "learning_rate": 9.373504874041984e-08, "loss": 0.4283, "step": 21657 }, { "epoch": 2.8192112456071845, "grad_norm": 2.821474075317383, "learning_rate": 9.333271407904743e-08, "loss": 0.3556, "step": 21660 }, { "epoch": 2.8196017180788755, "grad_norm": 2.612962484359741, "learning_rate": 9.293123660212733e-08, "loss": 0.3268, "step": 21663 }, { "epoch": 2.819992190550566, "grad_norm": 3.18330454826355, "learning_rate": 9.253061637979788e-08, "loss": 0.3513, "step": 21666 }, { "epoch": 2.820382663022257, "grad_norm": 2.551689624786377, "learning_rate": 9.213085348204587e-08, "loss": 0.3637, "step": 21669 }, { "epoch": 2.8207731354939476, "grad_norm": 2.9806084632873535, "learning_rate": 9.173194797870877e-08, "loss": 0.4023, "step": 21672 }, { "epoch": 2.8211636079656386, "grad_norm": 2.7304067611694336, "learning_rate": 9.133389993947528e-08, "loss": 0.3305, "step": 21675 }, { "epoch": 2.821554080437329, "grad_norm": 3.107208013534546, "learning_rate": 9.09367094338831e-08, "loss": 0.3507, "step": 21678 }, { "epoch": 2.82194455290902, "grad_norm": 2.694958448410034, "learning_rate": 9.054037653132008e-08, "loss": 0.3679, "step": 21681 }, { "epoch": 2.8223350253807107, "grad_norm": 2.6622893810272217, "learning_rate": 9.014490130102527e-08, "loss": 0.3641, "step": 21684 }, { "epoch": 2.8227254978524012, "grad_norm": 2.8670809268951416, "learning_rate": 8.975028381208784e-08, "loss": 0.3707, "step": 21687 }, { "epoch": 2.8231159703240922, "grad_norm": 2.79952335357666, "learning_rate": 8.935652413344598e-08, "loss": 0.4008, "step": 21690 }, { "epoch": 2.823506442795783, "grad_norm": 3.0517783164978027, "learning_rate": 8.89636223338891e-08, "loss": 0.4025, "step": 21693 }, { "epoch": 2.8238969152674738, "grad_norm": 2.722447395324707, "learning_rate": 8.857157848205566e-08, "loss": 0.344, "step": 21696 }, { "epoch": 2.8242873877391643, "grad_norm": 2.9243016242980957, "learning_rate": 8.818039264643586e-08, "loss": 0.3724, "step": 21699 }, { "epoch": 2.824677860210855, "grad_norm": 3.2354788780212402, "learning_rate": 8.779006489536834e-08, "loss": 0.3583, "step": 21702 }, { "epoch": 2.825068332682546, "grad_norm": 2.571131706237793, "learning_rate": 8.740059529704248e-08, "loss": 0.3751, "step": 21705 }, { "epoch": 2.825458805154237, "grad_norm": 3.589383363723755, "learning_rate": 8.701198391949827e-08, "loss": 0.3349, "step": 21708 }, { "epoch": 2.8258492776259274, "grad_norm": 2.8357677459716797, "learning_rate": 8.662423083062532e-08, "loss": 0.3465, "step": 21711 }, { "epoch": 2.826239750097618, "grad_norm": 3.248215913772583, "learning_rate": 8.62373360981622e-08, "loss": 0.3655, "step": 21714 }, { "epoch": 2.826630222569309, "grad_norm": 2.9728636741638184, "learning_rate": 8.585129978969986e-08, "loss": 0.3857, "step": 21717 }, { "epoch": 2.8270206950409995, "grad_norm": 3.6474063396453857, "learning_rate": 8.546612197267768e-08, "loss": 0.3819, "step": 21720 }, { "epoch": 2.8274111675126905, "grad_norm": 2.7672176361083984, "learning_rate": 8.508180271438516e-08, "loss": 0.4161, "step": 21723 }, { "epoch": 2.827801639984381, "grad_norm": 2.943279504776001, "learning_rate": 8.469834208196193e-08, "loss": 0.3341, "step": 21726 }, { "epoch": 2.8281921124560716, "grad_norm": 3.383356809616089, "learning_rate": 8.431574014239885e-08, "loss": 0.3586, "step": 21729 }, { "epoch": 2.8285825849277626, "grad_norm": 2.866734266281128, "learning_rate": 8.393399696253412e-08, "loss": 0.3224, "step": 21732 }, { "epoch": 2.8289730573994536, "grad_norm": 2.8714816570281982, "learning_rate": 8.355311260905829e-08, "loss": 0.416, "step": 21735 }, { "epoch": 2.829363529871144, "grad_norm": 2.4566404819488525, "learning_rate": 8.317308714851146e-08, "loss": 0.3209, "step": 21738 }, { "epoch": 2.8297540023428347, "grad_norm": 3.835968255996704, "learning_rate": 8.279392064728276e-08, "loss": 0.3415, "step": 21741 }, { "epoch": 2.8301444748145257, "grad_norm": 2.9507713317871094, "learning_rate": 8.24156131716114e-08, "loss": 0.3388, "step": 21744 }, { "epoch": 2.8305349472862162, "grad_norm": 2.818152904510498, "learning_rate": 8.203816478758785e-08, "loss": 0.3826, "step": 21747 }, { "epoch": 2.8309254197579072, "grad_norm": 2.5711591243743896, "learning_rate": 8.166157556115107e-08, "loss": 0.3195, "step": 21750 }, { "epoch": 2.831315892229598, "grad_norm": 3.0602824687957764, "learning_rate": 8.128584555809005e-08, "loss": 0.333, "step": 21753 }, { "epoch": 2.8317063647012883, "grad_norm": 2.9426445960998535, "learning_rate": 8.091097484404454e-08, "loss": 0.3469, "step": 21756 }, { "epoch": 2.8320968371729793, "grad_norm": 2.615579843521118, "learning_rate": 8.053696348450324e-08, "loss": 0.3241, "step": 21759 }, { "epoch": 2.8324873096446703, "grad_norm": 2.8304200172424316, "learning_rate": 8.016381154480557e-08, "loss": 0.3492, "step": 21762 }, { "epoch": 2.832877782116361, "grad_norm": 2.8419861793518066, "learning_rate": 7.979151909013993e-08, "loss": 0.3959, "step": 21765 }, { "epoch": 2.8332682545880514, "grad_norm": 3.1419153213500977, "learning_rate": 7.942008618554543e-08, "loss": 0.3467, "step": 21768 }, { "epoch": 2.8336587270597424, "grad_norm": 2.6105754375457764, "learning_rate": 7.904951289591068e-08, "loss": 0.3219, "step": 21771 }, { "epoch": 2.834049199531433, "grad_norm": 2.874232530593872, "learning_rate": 7.867979928597336e-08, "loss": 0.4414, "step": 21774 }, { "epoch": 2.834439672003124, "grad_norm": 2.67608642578125, "learning_rate": 7.831094542032236e-08, "loss": 0.3289, "step": 21777 }, { "epoch": 2.8348301444748145, "grad_norm": 2.974731922149658, "learning_rate": 7.794295136339613e-08, "loss": 0.3602, "step": 21780 }, { "epoch": 2.835220616946505, "grad_norm": 2.8034987449645996, "learning_rate": 7.757581717948104e-08, "loss": 0.4196, "step": 21783 }, { "epoch": 2.835611089418196, "grad_norm": 2.63897442817688, "learning_rate": 7.720954293271576e-08, "loss": 0.3953, "step": 21786 }, { "epoch": 2.8360015618898866, "grad_norm": 2.5943167209625244, "learning_rate": 7.68441286870869e-08, "loss": 0.2894, "step": 21789 }, { "epoch": 2.8363920343615776, "grad_norm": 2.8868579864501953, "learning_rate": 7.647957450643284e-08, "loss": 0.4103, "step": 21792 }, { "epoch": 2.836782506833268, "grad_norm": 2.7333850860595703, "learning_rate": 7.611588045443874e-08, "loss": 0.3394, "step": 21795 }, { "epoch": 2.837172979304959, "grad_norm": 2.8235933780670166, "learning_rate": 7.575304659464211e-08, "loss": 0.3461, "step": 21798 }, { "epoch": 2.8375634517766497, "grad_norm": 2.699286699295044, "learning_rate": 7.539107299042947e-08, "loss": 0.3186, "step": 21801 }, { "epoch": 2.8379539242483407, "grad_norm": 2.984445095062256, "learning_rate": 7.502995970503634e-08, "loss": 0.3464, "step": 21804 }, { "epoch": 2.8383443967200312, "grad_norm": 2.8218915462493896, "learning_rate": 7.466970680154839e-08, "loss": 0.3115, "step": 21807 }, { "epoch": 2.838734869191722, "grad_norm": 2.748157262802124, "learning_rate": 7.431031434290137e-08, "loss": 0.3772, "step": 21810 }, { "epoch": 2.839125341663413, "grad_norm": 2.9761040210723877, "learning_rate": 7.395178239188006e-08, "loss": 0.4132, "step": 21813 }, { "epoch": 2.8395158141351033, "grad_norm": 2.9955594539642334, "learning_rate": 7.359411101111991e-08, "loss": 0.3501, "step": 21816 }, { "epoch": 2.8399062866067943, "grad_norm": 2.834073305130005, "learning_rate": 7.323730026310483e-08, "loss": 0.3444, "step": 21819 }, { "epoch": 2.840296759078485, "grad_norm": 2.5343804359436035, "learning_rate": 7.28813502101694e-08, "loss": 0.3856, "step": 21822 }, { "epoch": 2.8406872315501754, "grad_norm": 2.5103886127471924, "learning_rate": 7.252626091449666e-08, "loss": 0.3501, "step": 21825 }, { "epoch": 2.8410777040218664, "grad_norm": 2.7703588008880615, "learning_rate": 7.217203243811976e-08, "loss": 0.3207, "step": 21828 }, { "epoch": 2.8414681764935574, "grad_norm": 2.8706281185150146, "learning_rate": 7.181866484292255e-08, "loss": 0.36, "step": 21831 }, { "epoch": 2.841858648965248, "grad_norm": 2.7826225757598877, "learning_rate": 7.146615819063729e-08, "loss": 0.3871, "step": 21834 }, { "epoch": 2.8422491214369385, "grad_norm": 2.82706880569458, "learning_rate": 7.111451254284584e-08, "loss": 0.3323, "step": 21837 }, { "epoch": 2.8426395939086295, "grad_norm": 2.9153218269348145, "learning_rate": 7.076372796098019e-08, "loss": 0.3573, "step": 21840 }, { "epoch": 2.84303006638032, "grad_norm": 2.676373243331909, "learning_rate": 7.041380450632185e-08, "loss": 0.3178, "step": 21843 }, { "epoch": 2.843420538852011, "grad_norm": 2.866745948791504, "learning_rate": 7.006474224000138e-08, "loss": 0.3973, "step": 21846 }, { "epoch": 2.8438110113237016, "grad_norm": 2.364248514175415, "learning_rate": 6.971654122299943e-08, "loss": 0.3069, "step": 21849 }, { "epoch": 2.844201483795392, "grad_norm": 2.777941942214966, "learning_rate": 6.936920151614567e-08, "loss": 0.2974, "step": 21852 }, { "epoch": 2.844591956267083, "grad_norm": 2.982527017593384, "learning_rate": 6.90227231801205e-08, "loss": 0.3763, "step": 21855 }, { "epoch": 2.844982428738774, "grad_norm": 2.7366418838500977, "learning_rate": 6.867710627545154e-08, "loss": 0.3435, "step": 21858 }, { "epoch": 2.8453729012104647, "grad_norm": 3.094679832458496, "learning_rate": 6.833235086251889e-08, "loss": 0.3818, "step": 21861 }, { "epoch": 2.8457633736821553, "grad_norm": 2.775191068649292, "learning_rate": 6.798845700154987e-08, "loss": 0.3325, "step": 21864 }, { "epoch": 2.8461538461538463, "grad_norm": 2.8869082927703857, "learning_rate": 6.764542475262147e-08, "loss": 0.3708, "step": 21867 }, { "epoch": 2.846544318625537, "grad_norm": 3.177225112915039, "learning_rate": 6.73032541756613e-08, "loss": 0.3381, "step": 21870 }, { "epoch": 2.846934791097228, "grad_norm": 2.946268081665039, "learning_rate": 6.696194533044598e-08, "loss": 0.3494, "step": 21873 }, { "epoch": 2.8473252635689184, "grad_norm": 3.021143674850464, "learning_rate": 6.662149827660114e-08, "loss": 0.3219, "step": 21876 }, { "epoch": 2.847715736040609, "grad_norm": 2.826537847518921, "learning_rate": 6.628191307360199e-08, "loss": 0.341, "step": 21879 }, { "epoch": 2.8481062085123, "grad_norm": 2.8287034034729004, "learning_rate": 6.594318978077386e-08, "loss": 0.3956, "step": 21882 }, { "epoch": 2.848496680983991, "grad_norm": 3.175198793411255, "learning_rate": 6.56053284572905e-08, "loss": 0.2685, "step": 21885 }, { "epoch": 2.8488871534556814, "grad_norm": 2.6262009143829346, "learning_rate": 6.526832916217584e-08, "loss": 0.374, "step": 21888 }, { "epoch": 2.849277625927372, "grad_norm": 2.9307057857513428, "learning_rate": 6.493219195430334e-08, "loss": 0.4002, "step": 21891 }, { "epoch": 2.849668098399063, "grad_norm": 2.8721837997436523, "learning_rate": 6.459691689239433e-08, "loss": 0.3986, "step": 21894 }, { "epoch": 2.8500585708707535, "grad_norm": 2.7488579750061035, "learning_rate": 6.426250403502199e-08, "loss": 0.333, "step": 21897 }, { "epoch": 2.8504490433424445, "grad_norm": 2.9429168701171875, "learning_rate": 6.392895344060623e-08, "loss": 0.3574, "step": 21900 }, { "epoch": 2.850839515814135, "grad_norm": 2.7157819271087646, "learning_rate": 6.359626516741935e-08, "loss": 0.3414, "step": 21903 }, { "epoch": 2.8512299882858256, "grad_norm": 2.7233452796936035, "learning_rate": 6.326443927357983e-08, "loss": 0.3299, "step": 21906 }, { "epoch": 2.8516204607575166, "grad_norm": 2.926488161087036, "learning_rate": 6.293347581705689e-08, "loss": 0.3944, "step": 21909 }, { "epoch": 2.852010933229207, "grad_norm": 2.6980791091918945, "learning_rate": 6.260337485567037e-08, "loss": 0.3758, "step": 21912 }, { "epoch": 2.852401405700898, "grad_norm": 2.777859926223755, "learning_rate": 6.227413644708691e-08, "loss": 0.2841, "step": 21915 }, { "epoch": 2.8527918781725887, "grad_norm": 3.123918056488037, "learning_rate": 6.194576064882496e-08, "loss": 0.365, "step": 21918 }, { "epoch": 2.8531823506442797, "grad_norm": 2.804738998413086, "learning_rate": 6.161824751824974e-08, "loss": 0.3655, "step": 21921 }, { "epoch": 2.8535728231159703, "grad_norm": 2.6704933643341064, "learning_rate": 6.129159711257826e-08, "loss": 0.2882, "step": 21924 }, { "epoch": 2.8539632955876613, "grad_norm": 2.9849040508270264, "learning_rate": 6.096580948887543e-08, "loss": 0.3722, "step": 21927 }, { "epoch": 2.854353768059352, "grad_norm": 2.9017884731292725, "learning_rate": 6.064088470405516e-08, "loss": 0.3921, "step": 21930 }, { "epoch": 2.8547442405310424, "grad_norm": 2.6805455684661865, "learning_rate": 6.03168228148815e-08, "loss": 0.3453, "step": 21933 }, { "epoch": 2.8551347130027334, "grad_norm": 2.8592615127563477, "learning_rate": 5.999362387796747e-08, "loss": 0.3311, "step": 21936 }, { "epoch": 2.855525185474424, "grad_norm": 3.1579458713531494, "learning_rate": 5.967128794977462e-08, "loss": 0.3666, "step": 21939 }, { "epoch": 2.855915657946115, "grad_norm": 2.6894073486328125, "learning_rate": 5.9349815086615084e-08, "loss": 0.3648, "step": 21942 }, { "epoch": 2.8563061304178055, "grad_norm": 2.765660047531128, "learning_rate": 5.902920534464951e-08, "loss": 0.3761, "step": 21945 }, { "epoch": 2.8566966028894965, "grad_norm": 2.739934206008911, "learning_rate": 5.870945877988754e-08, "loss": 0.293, "step": 21948 }, { "epoch": 2.857087075361187, "grad_norm": 3.662998914718628, "learning_rate": 5.839057544818783e-08, "loss": 0.3738, "step": 21951 }, { "epoch": 2.857477547832878, "grad_norm": 2.789340019226074, "learning_rate": 5.8072555405259135e-08, "loss": 0.3837, "step": 21954 }, { "epoch": 2.8578680203045685, "grad_norm": 2.4492013454437256, "learning_rate": 5.7755398706658694e-08, "loss": 0.3126, "step": 21957 }, { "epoch": 2.858258492776259, "grad_norm": 2.5558364391326904, "learning_rate": 5.7439105407792736e-08, "loss": 0.31, "step": 21960 }, { "epoch": 2.85864896524795, "grad_norm": 2.8237719535827637, "learning_rate": 5.712367556391818e-08, "loss": 0.3935, "step": 21963 }, { "epoch": 2.8590394377196406, "grad_norm": 2.918532371520996, "learning_rate": 5.6809109230138714e-08, "loss": 0.3618, "step": 21966 }, { "epoch": 2.8594299101913316, "grad_norm": 3.382922410964966, "learning_rate": 5.6495406461409274e-08, "loss": 0.3443, "step": 21969 }, { "epoch": 2.859820382663022, "grad_norm": 2.8826119899749756, "learning_rate": 5.6182567312532134e-08, "loss": 0.4316, "step": 21972 }, { "epoch": 2.8602108551347127, "grad_norm": 2.8180668354034424, "learning_rate": 5.587059183816079e-08, "loss": 0.3589, "step": 21975 }, { "epoch": 2.8606013276064037, "grad_norm": 2.677135705947876, "learning_rate": 5.5559480092795545e-08, "loss": 0.3745, "step": 21978 }, { "epoch": 2.8609918000780947, "grad_norm": 2.9708173274993896, "learning_rate": 5.5249232130787924e-08, "loss": 0.3381, "step": 21981 }, { "epoch": 2.8613822725497853, "grad_norm": 3.1857826709747314, "learning_rate": 5.493984800633734e-08, "loss": 0.3931, "step": 21984 }, { "epoch": 2.861772745021476, "grad_norm": 3.0874578952789307, "learning_rate": 5.4631327773492226e-08, "loss": 0.3146, "step": 21987 }, { "epoch": 2.862163217493167, "grad_norm": 2.952526807785034, "learning_rate": 5.4323671486150585e-08, "loss": 0.3709, "step": 21990 }, { "epoch": 2.8625536899648574, "grad_norm": 2.7421607971191406, "learning_rate": 5.401687919805942e-08, "loss": 0.3305, "step": 21993 }, { "epoch": 2.8629441624365484, "grad_norm": 3.3687474727630615, "learning_rate": 5.3710950962814755e-08, "loss": 0.4446, "step": 21996 }, { "epoch": 2.863334634908239, "grad_norm": 3.39333176612854, "learning_rate": 5.3405886833861054e-08, "loss": 0.3432, "step": 21999 }, { "epoch": 2.8637251073799295, "grad_norm": 2.7644834518432617, "learning_rate": 5.3101686864492904e-08, "loss": 0.3486, "step": 22002 }, { "epoch": 2.8641155798516205, "grad_norm": 2.7170677185058594, "learning_rate": 5.279835110785392e-08, "loss": 0.3633, "step": 22005 }, { "epoch": 2.8645060523233115, "grad_norm": 3.4381911754608154, "learning_rate": 5.249587961693503e-08, "loss": 0.3431, "step": 22008 }, { "epoch": 2.864896524795002, "grad_norm": 2.583317756652832, "learning_rate": 5.219427244457842e-08, "loss": 0.3621, "step": 22011 }, { "epoch": 2.8652869972666926, "grad_norm": 2.8179497718811035, "learning_rate": 5.189352964347305e-08, "loss": 0.3758, "step": 22014 }, { "epoch": 2.8656774697383836, "grad_norm": 2.632049560546875, "learning_rate": 5.159365126615967e-08, "loss": 0.3725, "step": 22017 }, { "epoch": 2.866067942210074, "grad_norm": 3.091775894165039, "learning_rate": 5.129463736502471e-08, "loss": 0.3826, "step": 22020 }, { "epoch": 2.866458414681765, "grad_norm": 2.7316322326660156, "learning_rate": 5.099648799230583e-08, "loss": 0.3998, "step": 22023 }, { "epoch": 2.8668488871534556, "grad_norm": 2.750880002975464, "learning_rate": 5.0699203200089694e-08, "loss": 0.4074, "step": 22026 }, { "epoch": 2.867239359625146, "grad_norm": 3.477369785308838, "learning_rate": 5.0402783040311435e-08, "loss": 0.4091, "step": 22029 }, { "epoch": 2.867629832096837, "grad_norm": 2.8512279987335205, "learning_rate": 5.010722756475406e-08, "loss": 0.3391, "step": 22032 }, { "epoch": 2.868020304568528, "grad_norm": 2.6762948036193848, "learning_rate": 4.981253682505127e-08, "loss": 0.2929, "step": 22035 }, { "epoch": 2.8684107770402187, "grad_norm": 3.086232900619507, "learning_rate": 4.951871087268412e-08, "loss": 0.3973, "step": 22038 }, { "epoch": 2.8688012495119093, "grad_norm": 2.8311057090759277, "learning_rate": 4.922574975898431e-08, "loss": 0.3275, "step": 22041 }, { "epoch": 2.8691917219836003, "grad_norm": 2.6848394870758057, "learning_rate": 4.893365353513091e-08, "loss": 0.3176, "step": 22044 }, { "epoch": 2.869582194455291, "grad_norm": 2.7619194984436035, "learning_rate": 4.86424222521531e-08, "loss": 0.3016, "step": 22047 }, { "epoch": 2.869972666926982, "grad_norm": 2.7256760597229004, "learning_rate": 4.835205596092796e-08, "loss": 0.3447, "step": 22050 }, { "epoch": 2.8703631393986724, "grad_norm": 2.8061459064483643, "learning_rate": 4.806255471218102e-08, "loss": 0.3466, "step": 22053 }, { "epoch": 2.870753611870363, "grad_norm": 2.5725607872009277, "learning_rate": 4.7773918556489054e-08, "loss": 0.3098, "step": 22056 }, { "epoch": 2.871144084342054, "grad_norm": 2.703907012939453, "learning_rate": 4.748614754427561e-08, "loss": 0.3863, "step": 22059 }, { "epoch": 2.8715345568137445, "grad_norm": 2.798107385635376, "learning_rate": 4.71992417258138e-08, "loss": 0.3247, "step": 22062 }, { "epoch": 2.8719250292854355, "grad_norm": 2.4142208099365234, "learning_rate": 4.691320115122466e-08, "loss": 0.3145, "step": 22065 }, { "epoch": 2.872315501757126, "grad_norm": 2.5614852905273438, "learning_rate": 4.6628025870479875e-08, "loss": 0.3519, "step": 22068 }, { "epoch": 2.872705974228817, "grad_norm": 2.7241270542144775, "learning_rate": 4.6343715933399034e-08, "loss": 0.3275, "step": 22071 }, { "epoch": 2.8730964467005076, "grad_norm": 2.801772356033325, "learning_rate": 4.6060271389649635e-08, "loss": 0.3661, "step": 22074 }, { "epoch": 2.8734869191721986, "grad_norm": 2.6916000843048096, "learning_rate": 4.577769228874873e-08, "loss": 0.3467, "step": 22077 }, { "epoch": 2.873877391643889, "grad_norm": 2.7385640144348145, "learning_rate": 4.54959786800635e-08, "loss": 0.3489, "step": 22080 }, { "epoch": 2.8742678641155797, "grad_norm": 2.9576168060302734, "learning_rate": 4.521513061280791e-08, "loss": 0.333, "step": 22083 }, { "epoch": 2.8746583365872707, "grad_norm": 2.8429853916168213, "learning_rate": 4.4935148136045495e-08, "loss": 0.3436, "step": 22086 }, { "epoch": 2.875048809058961, "grad_norm": 2.8129966259002686, "learning_rate": 4.465603129868934e-08, "loss": 0.3853, "step": 22089 }, { "epoch": 2.875439281530652, "grad_norm": 2.891261100769043, "learning_rate": 4.437778014949934e-08, "loss": 0.3138, "step": 22092 }, { "epoch": 2.8758297540023428, "grad_norm": 2.6374096870422363, "learning_rate": 4.410039473708605e-08, "loss": 0.2911, "step": 22095 }, { "epoch": 2.8762202264740333, "grad_norm": 2.888641119003296, "learning_rate": 4.3823875109908486e-08, "loss": 0.4262, "step": 22098 }, { "epoch": 2.8766106989457243, "grad_norm": 3.0171844959259033, "learning_rate": 4.354822131627357e-08, "loss": 0.427, "step": 22101 }, { "epoch": 2.8770011714174153, "grad_norm": 2.790205717086792, "learning_rate": 4.327343340433721e-08, "loss": 0.3196, "step": 22104 }, { "epoch": 2.877391643889106, "grad_norm": 2.8684253692626953, "learning_rate": 4.29995114221049e-08, "loss": 0.3728, "step": 22107 }, { "epoch": 2.8777821163607964, "grad_norm": 2.8882906436920166, "learning_rate": 4.2726455417430037e-08, "loss": 0.3488, "step": 22110 }, { "epoch": 2.8781725888324874, "grad_norm": 2.429396152496338, "learning_rate": 4.2454265438014454e-08, "loss": 0.3179, "step": 22113 }, { "epoch": 2.878563061304178, "grad_norm": 3.0140392780303955, "learning_rate": 4.2182941531410113e-08, "loss": 0.3496, "step": 22116 }, { "epoch": 2.878953533775869, "grad_norm": 3.1088218688964844, "learning_rate": 4.191248374501577e-08, "loss": 0.3918, "step": 22119 }, { "epoch": 2.8793440062475595, "grad_norm": 2.7448649406433105, "learning_rate": 4.16428921260803e-08, "loss": 0.4052, "step": 22122 }, { "epoch": 2.87973447871925, "grad_norm": 2.797527551651001, "learning_rate": 4.1374166721701026e-08, "loss": 0.3408, "step": 22125 }, { "epoch": 2.880124951190941, "grad_norm": 2.954578399658203, "learning_rate": 4.110630757882261e-08, "loss": 0.3704, "step": 22128 }, { "epoch": 2.880515423662632, "grad_norm": 2.8430421352386475, "learning_rate": 4.0839314744240966e-08, "loss": 0.4094, "step": 22131 }, { "epoch": 2.8809058961343226, "grad_norm": 2.5656564235687256, "learning_rate": 4.057318826459822e-08, "loss": 0.3717, "step": 22134 }, { "epoch": 2.881296368606013, "grad_norm": 2.829988956451416, "learning_rate": 4.0307928186386625e-08, "loss": 0.3751, "step": 22137 }, { "epoch": 2.881686841077704, "grad_norm": 2.584980010986328, "learning_rate": 4.004353455594578e-08, "loss": 0.3584, "step": 22140 }, { "epoch": 2.8820773135493947, "grad_norm": 3.4724276065826416, "learning_rate": 3.978000741946597e-08, "loss": 0.3441, "step": 22143 }, { "epoch": 2.8824677860210857, "grad_norm": 2.891096830368042, "learning_rate": 3.951734682298314e-08, "loss": 0.3937, "step": 22146 }, { "epoch": 2.882858258492776, "grad_norm": 3.165163993835449, "learning_rate": 3.9255552812385025e-08, "loss": 0.3763, "step": 22149 }, { "epoch": 2.8832487309644668, "grad_norm": 2.5909931659698486, "learning_rate": 3.8994625433406155e-08, "loss": 0.3113, "step": 22152 }, { "epoch": 2.8836392034361578, "grad_norm": 2.9258785247802734, "learning_rate": 3.873456473162951e-08, "loss": 0.3515, "step": 22155 }, { "epoch": 2.8840296759078488, "grad_norm": 2.447117805480957, "learning_rate": 3.847537075248764e-08, "loss": 0.333, "step": 22158 }, { "epoch": 2.8844201483795393, "grad_norm": 2.608942985534668, "learning_rate": 3.821704354126099e-08, "loss": 0.3675, "step": 22161 }, { "epoch": 2.88481062085123, "grad_norm": 2.856931447982788, "learning_rate": 3.795958314307846e-08, "loss": 0.3207, "step": 22164 }, { "epoch": 2.885201093322921, "grad_norm": 3.8806073665618896, "learning_rate": 3.770298960291796e-08, "loss": 0.3051, "step": 22167 }, { "epoch": 2.8855915657946114, "grad_norm": 2.8698956966400146, "learning_rate": 3.744726296560641e-08, "loss": 0.3686, "step": 22170 }, { "epoch": 2.8859820382663024, "grad_norm": 2.8077917098999023, "learning_rate": 3.7192403275818636e-08, "loss": 0.3602, "step": 22173 }, { "epoch": 2.886372510737993, "grad_norm": 2.8903465270996094, "learning_rate": 3.693841057807734e-08, "loss": 0.3563, "step": 22176 }, { "epoch": 2.8867629832096835, "grad_norm": 2.8778324127197266, "learning_rate": 3.6685284916755384e-08, "loss": 0.3589, "step": 22179 }, { "epoch": 2.8871534556813745, "grad_norm": 2.758140802383423, "learning_rate": 3.643302633607237e-08, "loss": 0.3862, "step": 22182 }, { "epoch": 2.887543928153065, "grad_norm": 2.449650526046753, "learning_rate": 3.618163488009807e-08, "loss": 0.3352, "step": 22185 }, { "epoch": 2.887934400624756, "grad_norm": 2.983010768890381, "learning_rate": 3.593111059274956e-08, "loss": 0.3191, "step": 22188 }, { "epoch": 2.8883248730964466, "grad_norm": 2.961902618408203, "learning_rate": 3.5681453517793506e-08, "loss": 0.3256, "step": 22191 }, { "epoch": 2.8887153455681376, "grad_norm": 2.506606340408325, "learning_rate": 3.54326636988439e-08, "loss": 0.3348, "step": 22194 }, { "epoch": 2.889105818039828, "grad_norm": 2.9733147621154785, "learning_rate": 3.518474117936432e-08, "loss": 0.3812, "step": 22197 }, { "epoch": 2.889496290511519, "grad_norm": 2.4794435501098633, "learning_rate": 3.493768600266567e-08, "loss": 0.3011, "step": 22200 }, { "epoch": 2.8898867629832097, "grad_norm": 2.974116563796997, "learning_rate": 3.469149821190842e-08, "loss": 0.337, "step": 22203 }, { "epoch": 2.8902772354549002, "grad_norm": 3.0011672973632812, "learning_rate": 3.4446177850100957e-08, "loss": 0.4151, "step": 22206 }, { "epoch": 2.8906677079265912, "grad_norm": 2.728896141052246, "learning_rate": 3.42017249601001e-08, "loss": 0.3306, "step": 22209 }, { "epoch": 2.8910581803982818, "grad_norm": 2.9429938793182373, "learning_rate": 3.395813958461169e-08, "loss": 0.3284, "step": 22212 }, { "epoch": 2.8914486528699728, "grad_norm": 2.7792723178863525, "learning_rate": 3.371542176618891e-08, "loss": 0.3404, "step": 22215 }, { "epoch": 2.8918391253416633, "grad_norm": 2.7647454738616943, "learning_rate": 3.347357154723452e-08, "loss": 0.3339, "step": 22218 }, { "epoch": 2.8922295978133543, "grad_norm": 2.6491119861602783, "learning_rate": 3.323258896999915e-08, "loss": 0.3466, "step": 22221 }, { "epoch": 2.892620070285045, "grad_norm": 2.659365177154541, "learning_rate": 3.2992474076581904e-08, "loss": 0.3819, "step": 22224 }, { "epoch": 2.893010542756736, "grad_norm": 3.4432740211486816, "learning_rate": 3.27532269089309e-08, "loss": 0.3364, "step": 22227 }, { "epoch": 2.8934010152284264, "grad_norm": 2.9346230030059814, "learning_rate": 3.251484750884048e-08, "loss": 0.3694, "step": 22230 }, { "epoch": 2.893791487700117, "grad_norm": 3.1397318840026855, "learning_rate": 3.227733591795734e-08, "loss": 0.3735, "step": 22233 }, { "epoch": 2.894181960171808, "grad_norm": 2.8210697174072266, "learning_rate": 3.204069217777217e-08, "loss": 0.3545, "step": 22236 }, { "epoch": 2.8945724326434985, "grad_norm": 2.865011692047119, "learning_rate": 3.1804916329627456e-08, "loss": 0.3738, "step": 22239 }, { "epoch": 2.8949629051151895, "grad_norm": 2.7513821125030518, "learning_rate": 3.157000841471247e-08, "loss": 0.3312, "step": 22242 }, { "epoch": 2.89535337758688, "grad_norm": 2.894726037979126, "learning_rate": 3.1335968474064395e-08, "loss": 0.3482, "step": 22245 }, { "epoch": 2.8957438500585706, "grad_norm": 2.9314725399017334, "learning_rate": 3.110279654857051e-08, "loss": 0.3727, "step": 22248 }, { "epoch": 2.8961343225302616, "grad_norm": 2.801659107208252, "learning_rate": 3.087049267896492e-08, "loss": 0.3829, "step": 22251 }, { "epoch": 2.8965247950019526, "grad_norm": 2.8862311840057373, "learning_rate": 3.063905690583069e-08, "loss": 0.3442, "step": 22254 }, { "epoch": 2.896915267473643, "grad_norm": 2.668565273284912, "learning_rate": 3.040848926959938e-08, "loss": 0.3629, "step": 22257 }, { "epoch": 2.8973057399453337, "grad_norm": 2.784465789794922, "learning_rate": 3.017878981055045e-08, "loss": 0.3723, "step": 22260 }, { "epoch": 2.8976962124170247, "grad_norm": 2.8275580406188965, "learning_rate": 2.9949958568811774e-08, "loss": 0.4457, "step": 22263 }, { "epoch": 2.8980866848887152, "grad_norm": 2.947969913482666, "learning_rate": 2.9721995584360286e-08, "loss": 0.3491, "step": 22266 }, { "epoch": 2.8984771573604062, "grad_norm": 3.009458303451538, "learning_rate": 2.9494900897019675e-08, "loss": 0.3924, "step": 22269 }, { "epoch": 2.898867629832097, "grad_norm": 2.875168561935425, "learning_rate": 2.926867454646376e-08, "loss": 0.3703, "step": 22272 }, { "epoch": 2.8992581023037873, "grad_norm": 2.945216417312622, "learning_rate": 2.904331657221371e-08, "loss": 0.3615, "step": 22275 }, { "epoch": 2.8996485747754783, "grad_norm": 3.359584331512451, "learning_rate": 2.8818827013638583e-08, "loss": 0.3504, "step": 22278 }, { "epoch": 2.9000390472471693, "grad_norm": 2.8335955142974854, "learning_rate": 2.859520590995646e-08, "loss": 0.3456, "step": 22281 }, { "epoch": 2.90042951971886, "grad_norm": 2.685680627822876, "learning_rate": 2.837245330023386e-08, "loss": 0.3402, "step": 22284 }, { "epoch": 2.9008199921905504, "grad_norm": 2.90457820892334, "learning_rate": 2.815056922338466e-08, "loss": 0.3305, "step": 22287 }, { "epoch": 2.9012104646622414, "grad_norm": 2.739206314086914, "learning_rate": 2.79295537181723e-08, "loss": 0.338, "step": 22290 }, { "epoch": 2.901600937133932, "grad_norm": 3.032250165939331, "learning_rate": 2.770940682320644e-08, "loss": 0.3378, "step": 22293 }, { "epoch": 2.901991409605623, "grad_norm": 2.631166696548462, "learning_rate": 2.749012857694744e-08, "loss": 0.3898, "step": 22296 }, { "epoch": 2.9023818820773135, "grad_norm": 2.825129747390747, "learning_rate": 2.7271719017702424e-08, "loss": 0.3608, "step": 22299 }, { "epoch": 2.902772354549004, "grad_norm": 2.757324457168579, "learning_rate": 2.7054178183626988e-08, "loss": 0.4074, "step": 22302 }, { "epoch": 2.903162827020695, "grad_norm": 2.9708240032196045, "learning_rate": 2.683750611272462e-08, "loss": 0.314, "step": 22305 }, { "epoch": 2.903553299492386, "grad_norm": 3.2644076347351074, "learning_rate": 2.6621702842848372e-08, "loss": 0.3526, "step": 22308 }, { "epoch": 2.9039437719640766, "grad_norm": 2.9245588779449463, "learning_rate": 2.6406768411698093e-08, "loss": 0.3377, "step": 22311 }, { "epoch": 2.904334244435767, "grad_norm": 2.6325671672821045, "learning_rate": 2.6192702856822073e-08, "loss": 0.3118, "step": 22314 }, { "epoch": 2.904724716907458, "grad_norm": 3.0001933574676514, "learning_rate": 2.597950621561818e-08, "loss": 0.3108, "step": 22317 }, { "epoch": 2.9051151893791487, "grad_norm": 3.227618932723999, "learning_rate": 2.5767178525330504e-08, "loss": 0.39, "step": 22320 }, { "epoch": 2.9055056618508397, "grad_norm": 2.91813325881958, "learning_rate": 2.555571982305216e-08, "loss": 0.3227, "step": 22323 }, { "epoch": 2.9058961343225302, "grad_norm": 2.577854633331299, "learning_rate": 2.534513014572526e-08, "loss": 0.2979, "step": 22326 }, { "epoch": 2.906286606794221, "grad_norm": 2.738893985748291, "learning_rate": 2.513540953013871e-08, "loss": 0.377, "step": 22329 }, { "epoch": 2.906677079265912, "grad_norm": 2.668468475341797, "learning_rate": 2.4926558012930425e-08, "loss": 0.3525, "step": 22332 }, { "epoch": 2.9070675517376023, "grad_norm": 2.666583299636841, "learning_rate": 2.471857563058677e-08, "loss": 0.3823, "step": 22335 }, { "epoch": 2.9074580242092933, "grad_norm": 3.1053268909454346, "learning_rate": 2.4511462419441466e-08, "loss": 0.3824, "step": 22338 }, { "epoch": 2.907848496680984, "grad_norm": 2.7575018405914307, "learning_rate": 2.4305218415677235e-08, "loss": 0.3984, "step": 22341 }, { "epoch": 2.908238969152675, "grad_norm": 3.4204914569854736, "learning_rate": 2.409984365532303e-08, "loss": 0.3681, "step": 22344 }, { "epoch": 2.9086294416243654, "grad_norm": 2.5891804695129395, "learning_rate": 2.389533817425904e-08, "loss": 0.318, "step": 22347 }, { "epoch": 2.9090199140960564, "grad_norm": 2.9645187854766846, "learning_rate": 2.3691702008211136e-08, "loss": 0.3515, "step": 22350 }, { "epoch": 2.909410386567747, "grad_norm": 3.2020936012268066, "learning_rate": 2.348893519275364e-08, "loss": 0.3788, "step": 22353 }, { "epoch": 2.9098008590394375, "grad_norm": 2.665403127670288, "learning_rate": 2.3287037763310984e-08, "loss": 0.3548, "step": 22356 }, { "epoch": 2.9101913315111285, "grad_norm": 2.6909213066101074, "learning_rate": 2.3086009755152738e-08, "loss": 0.3125, "step": 22359 }, { "epoch": 2.910581803982819, "grad_norm": 2.763498067855835, "learning_rate": 2.2885851203399146e-08, "loss": 0.3864, "step": 22362 }, { "epoch": 2.91097227645451, "grad_norm": 2.798532485961914, "learning_rate": 2.268656214301668e-08, "loss": 0.3635, "step": 22365 }, { "epoch": 2.9113627489262006, "grad_norm": 2.9937210083007812, "learning_rate": 2.2488142608821373e-08, "loss": 0.4099, "step": 22368 }, { "epoch": 2.9117532213978916, "grad_norm": 2.972405195236206, "learning_rate": 2.2290592635476615e-08, "loss": 0.4077, "step": 22371 }, { "epoch": 2.912143693869582, "grad_norm": 2.8520312309265137, "learning_rate": 2.2093912257493133e-08, "loss": 0.3675, "step": 22374 }, { "epoch": 2.912534166341273, "grad_norm": 2.894411087036133, "learning_rate": 2.1898101509231772e-08, "loss": 0.3879, "step": 22377 }, { "epoch": 2.9129246388129637, "grad_norm": 2.9919564723968506, "learning_rate": 2.170316042489906e-08, "loss": 0.4072, "step": 22380 }, { "epoch": 2.9133151112846543, "grad_norm": 2.7866508960723877, "learning_rate": 2.1509089038551645e-08, "loss": 0.3156, "step": 22383 }, { "epoch": 2.9137055837563453, "grad_norm": 3.07585072517395, "learning_rate": 2.1315887384093513e-08, "loss": 0.3686, "step": 22386 }, { "epoch": 2.914096056228036, "grad_norm": 3.257378339767456, "learning_rate": 2.1123555495276005e-08, "loss": 0.3546, "step": 22389 }, { "epoch": 2.914486528699727, "grad_norm": 3.2451460361480713, "learning_rate": 2.093209340569946e-08, "loss": 0.346, "step": 22392 }, { "epoch": 2.9148770011714173, "grad_norm": 2.622567653656006, "learning_rate": 2.074150114881157e-08, "loss": 0.3506, "step": 22395 }, { "epoch": 2.915267473643108, "grad_norm": 2.6424460411071777, "learning_rate": 2.055177875790848e-08, "loss": 0.3617, "step": 22398 }, { "epoch": 2.915657946114799, "grad_norm": 2.814903974533081, "learning_rate": 2.036292626613423e-08, "loss": 0.3418, "step": 22401 }, { "epoch": 2.91604841858649, "grad_norm": 2.869673728942871, "learning_rate": 2.0174943706481874e-08, "loss": 0.3141, "step": 22404 }, { "epoch": 2.9164388910581804, "grad_norm": 2.6564950942993164, "learning_rate": 1.9987831111790147e-08, "loss": 0.3139, "step": 22407 }, { "epoch": 2.916829363529871, "grad_norm": 2.9242870807647705, "learning_rate": 1.980158851474845e-08, "loss": 0.4097, "step": 22410 }, { "epoch": 2.917219836001562, "grad_norm": 2.8616063594818115, "learning_rate": 1.9616215947892427e-08, "loss": 0.3452, "step": 22413 }, { "epoch": 2.9176103084732525, "grad_norm": 2.781572103500366, "learning_rate": 1.9431713443605616e-08, "loss": 0.384, "step": 22416 }, { "epoch": 2.9180007809449435, "grad_norm": 3.0392205715179443, "learning_rate": 1.924808103412168e-08, "loss": 0.4144, "step": 22419 }, { "epoch": 2.918391253416634, "grad_norm": 2.8837602138519287, "learning_rate": 1.9065318751519402e-08, "loss": 0.3395, "step": 22422 }, { "epoch": 2.9187817258883246, "grad_norm": 2.5549280643463135, "learning_rate": 1.8883426627727684e-08, "loss": 0.3312, "step": 22425 }, { "epoch": 2.9191721983600156, "grad_norm": 3.014940023422241, "learning_rate": 1.870240469452278e-08, "loss": 0.3757, "step": 22428 }, { "epoch": 2.9195626708317066, "grad_norm": 2.8069894313812256, "learning_rate": 1.8522252983528832e-08, "loss": 0.3599, "step": 22431 }, { "epoch": 2.919953143303397, "grad_norm": 4.53384256362915, "learning_rate": 1.8342971526217334e-08, "loss": 0.3301, "step": 22434 }, { "epoch": 2.9203436157750877, "grad_norm": 2.5881564617156982, "learning_rate": 1.8164560353909344e-08, "loss": 0.3605, "step": 22437 }, { "epoch": 2.9207340882467787, "grad_norm": 2.660621166229248, "learning_rate": 1.798701949777215e-08, "loss": 0.3414, "step": 22440 }, { "epoch": 2.9211245607184693, "grad_norm": 2.7579736709594727, "learning_rate": 1.7810348988822058e-08, "loss": 0.3739, "step": 22443 }, { "epoch": 2.9215150331901603, "grad_norm": 3.0707061290740967, "learning_rate": 1.7634548857922707e-08, "loss": 0.3433, "step": 22446 }, { "epoch": 2.921905505661851, "grad_norm": 2.9784891605377197, "learning_rate": 1.7459619135786753e-08, "loss": 0.3297, "step": 22449 }, { "epoch": 2.9222959781335414, "grad_norm": 2.9023778438568115, "learning_rate": 1.728555985297309e-08, "loss": 0.3122, "step": 22452 }, { "epoch": 2.9226864506052324, "grad_norm": 2.9779181480407715, "learning_rate": 1.711237103989072e-08, "loss": 0.3818, "step": 22455 }, { "epoch": 2.9230769230769234, "grad_norm": 2.76466703414917, "learning_rate": 1.6940052726793776e-08, "loss": 0.3163, "step": 22458 }, { "epoch": 2.923467395548614, "grad_norm": 2.9506185054779053, "learning_rate": 1.6768604943787624e-08, "loss": 0.2745, "step": 22461 }, { "epoch": 2.9238578680203045, "grad_norm": 2.865941047668457, "learning_rate": 1.659802772082275e-08, "loss": 0.3797, "step": 22464 }, { "epoch": 2.9242483404919954, "grad_norm": 2.825014591217041, "learning_rate": 1.642832108769865e-08, "loss": 0.4346, "step": 22467 }, { "epoch": 2.924638812963686, "grad_norm": 2.9251394271850586, "learning_rate": 1.6259485074063276e-08, "loss": 0.3382, "step": 22470 }, { "epoch": 2.925029285435377, "grad_norm": 2.5438222885131836, "learning_rate": 1.6091519709411363e-08, "loss": 0.3026, "step": 22473 }, { "epoch": 2.9254197579070675, "grad_norm": 2.7614340782165527, "learning_rate": 1.5924425023086665e-08, "loss": 0.3606, "step": 22476 }, { "epoch": 2.925810230378758, "grad_norm": 2.884199619293213, "learning_rate": 1.575820104427972e-08, "loss": 0.3452, "step": 22479 }, { "epoch": 2.926200702850449, "grad_norm": 2.60504150390625, "learning_rate": 1.559284780202952e-08, "loss": 0.3219, "step": 22482 }, { "epoch": 2.9265911753221396, "grad_norm": 3.0407423973083496, "learning_rate": 1.5428365325223516e-08, "loss": 0.3445, "step": 22485 }, { "epoch": 2.9269816477938306, "grad_norm": 2.683939218521118, "learning_rate": 1.5264753642595387e-08, "loss": 0.3529, "step": 22488 }, { "epoch": 2.927372120265521, "grad_norm": 2.9559898376464844, "learning_rate": 1.510201278272949e-08, "loss": 0.3729, "step": 22491 }, { "epoch": 2.927762592737212, "grad_norm": 2.676271438598633, "learning_rate": 1.4940142774054755e-08, "loss": 0.4197, "step": 22494 }, { "epoch": 2.9281530652089027, "grad_norm": 3.0455739498138428, "learning_rate": 1.4779143644850225e-08, "loss": 0.4026, "step": 22497 }, { "epoch": 2.9285435376805937, "grad_norm": 3.0256705284118652, "learning_rate": 1.4619015423241178e-08, "loss": 0.4349, "step": 22500 }, { "epoch": 2.9289340101522843, "grad_norm": 2.6953423023223877, "learning_rate": 1.4459758137203572e-08, "loss": 0.324, "step": 22503 }, { "epoch": 2.929324482623975, "grad_norm": 3.021960735321045, "learning_rate": 1.4301371814557374e-08, "loss": 0.3427, "step": 22506 }, { "epoch": 2.929714955095666, "grad_norm": 2.9862983226776123, "learning_rate": 1.4143856482973228e-08, "loss": 0.3093, "step": 22509 }, { "epoch": 2.9301054275673564, "grad_norm": 2.836103916168213, "learning_rate": 1.3987212169969121e-08, "loss": 0.3272, "step": 22512 }, { "epoch": 2.9304959000390474, "grad_norm": 2.806823492050171, "learning_rate": 1.3831438902909834e-08, "loss": 0.3633, "step": 22515 }, { "epoch": 2.930886372510738, "grad_norm": 3.1762378215789795, "learning_rate": 1.3676536709008593e-08, "loss": 0.3304, "step": 22518 }, { "epoch": 2.9312768449824285, "grad_norm": 2.651766300201416, "learning_rate": 1.3522505615326531e-08, "loss": 0.3698, "step": 22521 }, { "epoch": 2.9316673174541195, "grad_norm": 2.5449414253234863, "learning_rate": 1.336934564877268e-08, "loss": 0.4071, "step": 22524 }, { "epoch": 2.9320577899258105, "grad_norm": 2.726243495941162, "learning_rate": 1.3217056836104525e-08, "loss": 0.4334, "step": 22527 }, { "epoch": 2.932448262397501, "grad_norm": 2.689444065093994, "learning_rate": 1.3065639203925229e-08, "loss": 0.3296, "step": 22530 }, { "epoch": 2.9328387348691916, "grad_norm": 2.9110662937164307, "learning_rate": 1.2915092778688077e-08, "loss": 0.381, "step": 22533 }, { "epoch": 2.9332292073408825, "grad_norm": 3.017112970352173, "learning_rate": 1.2765417586692586e-08, "loss": 0.366, "step": 22536 }, { "epoch": 2.933619679812573, "grad_norm": 2.5785021781921387, "learning_rate": 1.2616613654087285e-08, "loss": 0.3984, "step": 22539 }, { "epoch": 2.934010152284264, "grad_norm": 2.9283220767974854, "learning_rate": 1.2468681006868044e-08, "loss": 0.3302, "step": 22542 }, { "epoch": 2.9344006247559546, "grad_norm": 2.7230112552642822, "learning_rate": 1.2321619670877527e-08, "loss": 0.3774, "step": 22545 }, { "epoch": 2.934791097227645, "grad_norm": 2.7927682399749756, "learning_rate": 1.2175429671807405e-08, "loss": 0.421, "step": 22548 }, { "epoch": 2.935181569699336, "grad_norm": 2.6731719970703125, "learning_rate": 1.2030111035197245e-08, "loss": 0.3555, "step": 22551 }, { "epoch": 2.935572042171027, "grad_norm": 2.5603408813476562, "learning_rate": 1.1885663786433411e-08, "loss": 0.3375, "step": 22554 }, { "epoch": 2.9359625146427177, "grad_norm": 2.7101588249206543, "learning_rate": 1.1742087950750714e-08, "loss": 0.3384, "step": 22557 }, { "epoch": 2.9363529871144083, "grad_norm": 2.6867103576660156, "learning_rate": 1.1599383553231314e-08, "loss": 0.3236, "step": 22560 }, { "epoch": 2.9367434595860993, "grad_norm": 3.0580341815948486, "learning_rate": 1.1457550618805824e-08, "loss": 0.3854, "step": 22563 }, { "epoch": 2.93713393205779, "grad_norm": 2.929643154144287, "learning_rate": 1.1316589172251091e-08, "loss": 0.344, "step": 22566 }, { "epoch": 2.937524404529481, "grad_norm": 2.806877613067627, "learning_rate": 1.1176499238194639e-08, "loss": 0.3385, "step": 22569 }, { "epoch": 2.9379148770011714, "grad_norm": 2.79831600189209, "learning_rate": 1.1037280841108e-08, "loss": 0.3901, "step": 22572 }, { "epoch": 2.938305349472862, "grad_norm": 2.647631883621216, "learning_rate": 1.0898934005313389e-08, "loss": 0.3789, "step": 22575 }, { "epoch": 2.938695821944553, "grad_norm": 2.671846866607666, "learning_rate": 1.0761458754979804e-08, "loss": 0.2923, "step": 22578 }, { "epoch": 2.939086294416244, "grad_norm": 2.559683084487915, "learning_rate": 1.0624855114123035e-08, "loss": 0.3096, "step": 22581 }, { "epoch": 2.9394767668879345, "grad_norm": 2.7834372520446777, "learning_rate": 1.0489123106608434e-08, "loss": 0.3449, "step": 22584 }, { "epoch": 2.939867239359625, "grad_norm": 2.7174694538116455, "learning_rate": 1.0354262756147593e-08, "loss": 0.3508, "step": 22587 }, { "epoch": 2.940257711831316, "grad_norm": 2.7829127311706543, "learning_rate": 1.0220274086299998e-08, "loss": 0.397, "step": 22590 }, { "epoch": 2.9406481843030066, "grad_norm": 2.9258644580841064, "learning_rate": 1.0087157120474145e-08, "loss": 0.3757, "step": 22593 }, { "epoch": 2.9410386567746976, "grad_norm": 2.8890163898468018, "learning_rate": 9.954911881924212e-09, "loss": 0.3301, "step": 22596 }, { "epoch": 2.941429129246388, "grad_norm": 2.4715800285339355, "learning_rate": 9.82353839375394e-09, "loss": 0.3031, "step": 22599 }, { "epoch": 2.9418196017180787, "grad_norm": 2.8599045276641846, "learning_rate": 9.693036678913303e-09, "loss": 0.3638, "step": 22602 }, { "epoch": 2.9422100741897697, "grad_norm": 2.724221706390381, "learning_rate": 9.563406760201288e-09, "loss": 0.4034, "step": 22605 }, { "epoch": 2.94260054666146, "grad_norm": 2.7690560817718506, "learning_rate": 9.434648660263668e-09, "loss": 0.3274, "step": 22608 }, { "epoch": 2.942991019133151, "grad_norm": 2.534320592880249, "learning_rate": 9.30676240159467e-09, "loss": 0.3847, "step": 22611 }, { "epoch": 2.9433814916048417, "grad_norm": 2.9107460975646973, "learning_rate": 9.179748006535317e-09, "loss": 0.3455, "step": 22614 }, { "epoch": 2.9437719640765327, "grad_norm": 2.507080554962158, "learning_rate": 9.05360549727452e-09, "loss": 0.3132, "step": 22617 }, { "epoch": 2.9441624365482233, "grad_norm": 2.537360191345215, "learning_rate": 8.928334895849656e-09, "loss": 0.3198, "step": 22620 }, { "epoch": 2.9445529090199143, "grad_norm": 2.297013282775879, "learning_rate": 8.803936224144883e-09, "loss": 0.2882, "step": 22623 }, { "epoch": 2.944943381491605, "grad_norm": 2.7976059913635254, "learning_rate": 8.680409503892817e-09, "loss": 0.4199, "step": 22626 }, { "epoch": 2.9453338539632954, "grad_norm": 2.64867901802063, "learning_rate": 8.557754756672864e-09, "loss": 0.3469, "step": 22629 }, { "epoch": 2.9457243264349864, "grad_norm": 2.9491302967071533, "learning_rate": 8.435972003912329e-09, "loss": 0.3483, "step": 22632 }, { "epoch": 2.946114798906677, "grad_norm": 2.6923861503601074, "learning_rate": 8.31506126688697e-09, "loss": 0.3447, "step": 22635 }, { "epoch": 2.946505271378368, "grad_norm": 3.363018035888672, "learning_rate": 8.19502256671989e-09, "loss": 0.3398, "step": 22638 }, { "epoch": 2.9468957438500585, "grad_norm": 2.8834447860717773, "learning_rate": 8.075855924380427e-09, "loss": 0.4022, "step": 22641 }, { "epoch": 2.9472862163217495, "grad_norm": 2.857146978378296, "learning_rate": 7.957561360688038e-09, "loss": 0.3495, "step": 22644 }, { "epoch": 2.94767668879344, "grad_norm": 2.984123468399048, "learning_rate": 7.840138896307303e-09, "loss": 0.308, "step": 22647 }, { "epoch": 2.948067161265131, "grad_norm": 3.035522222518921, "learning_rate": 7.723588551752925e-09, "loss": 0.3807, "step": 22650 }, { "epoch": 2.9484576337368216, "grad_norm": 2.7213213443756104, "learning_rate": 7.607910347385283e-09, "loss": 0.3534, "step": 22653 }, { "epoch": 2.948848106208512, "grad_norm": 2.8660976886749268, "learning_rate": 7.493104303413212e-09, "loss": 0.3385, "step": 22656 }, { "epoch": 2.949238578680203, "grad_norm": 2.926711320877075, "learning_rate": 7.379170439892891e-09, "loss": 0.3535, "step": 22659 }, { "epoch": 2.9496290511518937, "grad_norm": 2.7307538986206055, "learning_rate": 7.266108776728953e-09, "loss": 0.3584, "step": 22662 }, { "epoch": 2.9500195236235847, "grad_norm": 2.8166987895965576, "learning_rate": 7.153919333672266e-09, "loss": 0.322, "step": 22665 }, { "epoch": 2.950409996095275, "grad_norm": 2.866868734359741, "learning_rate": 7.042602130322707e-09, "loss": 0.3712, "step": 22668 }, { "epoch": 2.9508004685669658, "grad_norm": 2.841460704803467, "learning_rate": 6.932157186126942e-09, "loss": 0.4131, "step": 22671 }, { "epoch": 2.9511909410386568, "grad_norm": 2.5666918754577637, "learning_rate": 6.822584520379538e-09, "loss": 0.3417, "step": 22674 }, { "epoch": 2.9515814135103478, "grad_norm": 3.38871431350708, "learning_rate": 6.713884152222405e-09, "loss": 0.3104, "step": 22677 }, { "epoch": 2.9519718859820383, "grad_norm": 3.0104832649230957, "learning_rate": 6.6060561006453525e-09, "loss": 0.4312, "step": 22680 }, { "epoch": 2.952362358453729, "grad_norm": 3.21549129486084, "learning_rate": 6.499100384485535e-09, "loss": 0.3817, "step": 22683 }, { "epoch": 2.95275283092542, "grad_norm": 3.07248592376709, "learning_rate": 6.393017022428005e-09, "loss": 0.3889, "step": 22686 }, { "epoch": 2.9531433033971104, "grad_norm": 2.6400251388549805, "learning_rate": 6.287806033005717e-09, "loss": 0.3869, "step": 22689 }, { "epoch": 2.9535337758688014, "grad_norm": 2.670640707015991, "learning_rate": 6.1834674345984115e-09, "loss": 0.2964, "step": 22692 }, { "epoch": 2.953924248340492, "grad_norm": 2.874105215072632, "learning_rate": 6.080001245433731e-09, "loss": 0.3724, "step": 22695 }, { "epoch": 2.9543147208121825, "grad_norm": 2.8548583984375, "learning_rate": 5.977407483587217e-09, "loss": 0.3864, "step": 22698 }, { "epoch": 2.9547051932838735, "grad_norm": 2.812763214111328, "learning_rate": 5.875686166981753e-09, "loss": 0.3374, "step": 22701 }, { "epoch": 2.9550956657555645, "grad_norm": 2.711881637573242, "learning_rate": 5.7748373133875715e-09, "loss": 0.3739, "step": 22704 }, { "epoch": 2.955486138227255, "grad_norm": 2.6988415718078613, "learning_rate": 5.674860940423354e-09, "loss": 0.334, "step": 22707 }, { "epoch": 2.9558766106989456, "grad_norm": 2.473297357559204, "learning_rate": 5.5757570655545765e-09, "loss": 0.3704, "step": 22710 }, { "epoch": 2.9562670831706366, "grad_norm": 2.6674001216888428, "learning_rate": 5.477525706094056e-09, "loss": 0.364, "step": 22713 }, { "epoch": 2.956657555642327, "grad_norm": 2.651705741882324, "learning_rate": 5.380166879202508e-09, "loss": 0.3361, "step": 22716 }, { "epoch": 2.957048028114018, "grad_norm": 2.93682599067688, "learning_rate": 5.283680601889107e-09, "loss": 0.4136, "step": 22719 }, { "epoch": 2.9574385005857087, "grad_norm": 2.921872854232788, "learning_rate": 5.188066891009258e-09, "loss": 0.3346, "step": 22722 }, { "epoch": 2.9578289730573992, "grad_norm": 2.8283815383911133, "learning_rate": 5.093325763266821e-09, "loss": 0.2995, "step": 22725 }, { "epoch": 2.95821944552909, "grad_norm": 2.4506702423095703, "learning_rate": 4.999457235212446e-09, "loss": 0.3262, "step": 22728 }, { "epoch": 2.958609918000781, "grad_norm": 2.559972047805786, "learning_rate": 4.906461323244683e-09, "loss": 0.3491, "step": 22731 }, { "epoch": 2.9590003904724718, "grad_norm": 2.777678966522217, "learning_rate": 4.814338043609979e-09, "loss": 0.3715, "step": 22734 }, { "epoch": 2.9593908629441623, "grad_norm": 2.797595977783203, "learning_rate": 4.7230874124026825e-09, "loss": 0.3736, "step": 22737 }, { "epoch": 2.9597813354158533, "grad_norm": 2.576273202896118, "learning_rate": 4.632709445562822e-09, "loss": 0.3177, "step": 22740 }, { "epoch": 2.960171807887544, "grad_norm": 2.8237977027893066, "learning_rate": 4.543204158879988e-09, "loss": 0.3287, "step": 22743 }, { "epoch": 2.960562280359235, "grad_norm": 2.5393621921539307, "learning_rate": 4.454571567991117e-09, "loss": 0.3608, "step": 22746 }, { "epoch": 2.9609527528309254, "grad_norm": 2.9079854488372803, "learning_rate": 4.3668116883788245e-09, "loss": 0.3497, "step": 22749 }, { "epoch": 2.961343225302616, "grad_norm": 2.9478375911712646, "learning_rate": 4.2799245353752905e-09, "loss": 0.3858, "step": 22752 }, { "epoch": 2.961733697774307, "grad_norm": 2.845107316970825, "learning_rate": 4.193910124160039e-09, "loss": 0.3556, "step": 22755 }, { "epoch": 2.9621241702459975, "grad_norm": 2.638409376144409, "learning_rate": 4.108768469758273e-09, "loss": 0.2997, "step": 22758 }, { "epoch": 2.9625146427176885, "grad_norm": 3.1192007064819336, "learning_rate": 4.0244995870453166e-09, "loss": 0.3626, "step": 22761 }, { "epoch": 2.962905115189379, "grad_norm": 2.773621082305908, "learning_rate": 3.941103490742171e-09, "loss": 0.2836, "step": 22764 }, { "epoch": 2.96329558766107, "grad_norm": 2.718259811401367, "learning_rate": 3.858580195418293e-09, "loss": 0.3435, "step": 22767 }, { "epoch": 2.9636860601327606, "grad_norm": 2.8922083377838135, "learning_rate": 3.77692971548993e-09, "loss": 0.3966, "step": 22770 }, { "epoch": 2.9640765326044516, "grad_norm": 2.6764767169952393, "learning_rate": 3.6961520652212256e-09, "loss": 0.3352, "step": 22773 }, { "epoch": 2.964467005076142, "grad_norm": 2.891240119934082, "learning_rate": 3.6162472587242257e-09, "loss": 0.3341, "step": 22776 }, { "epoch": 2.9648574775478327, "grad_norm": 2.7254581451416016, "learning_rate": 3.5372153099583195e-09, "loss": 0.3189, "step": 22779 }, { "epoch": 2.9652479500195237, "grad_norm": 3.07497501373291, "learning_rate": 3.459056232729685e-09, "loss": 0.3985, "step": 22782 }, { "epoch": 2.9656384224912142, "grad_norm": 2.8364953994750977, "learning_rate": 3.3817700406924004e-09, "loss": 0.3572, "step": 22785 }, { "epoch": 2.9660288949629052, "grad_norm": 2.677969217300415, "learning_rate": 3.305356747348998e-09, "loss": 0.3667, "step": 22788 }, { "epoch": 2.9664193674345958, "grad_norm": 2.8867709636688232, "learning_rate": 3.229816366047689e-09, "loss": 0.3836, "step": 22791 }, { "epoch": 2.9668098399062868, "grad_norm": 2.7615604400634766, "learning_rate": 3.15514890998625e-09, "loss": 0.3543, "step": 22794 }, { "epoch": 2.9672003123779773, "grad_norm": 2.8149735927581787, "learning_rate": 3.0813543922081357e-09, "loss": 0.3806, "step": 22797 }, { "epoch": 2.9675907848496683, "grad_norm": 2.686567544937134, "learning_rate": 3.0084328256058116e-09, "loss": 0.3664, "step": 22800 }, { "epoch": 2.967981257321359, "grad_norm": 2.6036901473999023, "learning_rate": 2.9363842229179763e-09, "loss": 0.3127, "step": 22803 }, { "epoch": 2.9683717297930494, "grad_norm": 2.843297004699707, "learning_rate": 2.865208596731783e-09, "loss": 0.3899, "step": 22806 }, { "epoch": 2.9687622022647404, "grad_norm": 2.4631242752075195, "learning_rate": 2.7949059594806203e-09, "loss": 0.3895, "step": 22809 }, { "epoch": 2.969152674736431, "grad_norm": 2.619086980819702, "learning_rate": 2.7254763234474404e-09, "loss": 0.277, "step": 22812 }, { "epoch": 2.969543147208122, "grad_norm": 2.584728717803955, "learning_rate": 2.6569197007603186e-09, "loss": 0.3257, "step": 22815 }, { "epoch": 2.9699336196798125, "grad_norm": 3.1949074268341064, "learning_rate": 2.589236103396897e-09, "loss": 0.408, "step": 22818 }, { "epoch": 2.970324092151503, "grad_norm": 2.800149440765381, "learning_rate": 2.5224255431804957e-09, "loss": 0.3305, "step": 22821 }, { "epoch": 2.970714564623194, "grad_norm": 2.811697006225586, "learning_rate": 2.4564880317834438e-09, "loss": 0.3886, "step": 22824 }, { "epoch": 2.971105037094885, "grad_norm": 2.7836661338806152, "learning_rate": 2.391423580724861e-09, "loss": 0.3741, "step": 22827 }, { "epoch": 2.9714955095665756, "grad_norm": 2.9304723739624023, "learning_rate": 2.327232201370655e-09, "loss": 0.3688, "step": 22830 }, { "epoch": 2.971885982038266, "grad_norm": 2.7736597061157227, "learning_rate": 2.263913904935744e-09, "loss": 0.3645, "step": 22833 }, { "epoch": 2.972276454509957, "grad_norm": 2.527412176132202, "learning_rate": 2.20146870248128e-09, "loss": 0.3668, "step": 22836 }, { "epoch": 2.9726669269816477, "grad_norm": 2.9895877838134766, "learning_rate": 2.139896604916314e-09, "loss": 0.4066, "step": 22839 }, { "epoch": 2.9730573994533387, "grad_norm": 2.8589468002319336, "learning_rate": 2.079197622997242e-09, "loss": 0.3825, "step": 22842 }, { "epoch": 2.9734478719250292, "grad_norm": 2.533005475997925, "learning_rate": 2.0193717673283597e-09, "loss": 0.3998, "step": 22845 }, { "epoch": 2.97383834439672, "grad_norm": 2.7130699157714844, "learning_rate": 1.9604190483613062e-09, "loss": 0.3201, "step": 22848 }, { "epoch": 2.974228816868411, "grad_norm": 2.8447370529174805, "learning_rate": 1.9023394763945104e-09, "loss": 0.3743, "step": 22851 }, { "epoch": 2.974619289340102, "grad_norm": 3.0323686599731445, "learning_rate": 1.8451330615748553e-09, "loss": 0.3989, "step": 22854 }, { "epoch": 2.9750097618117923, "grad_norm": 3.1013906002044678, "learning_rate": 1.7887998138954587e-09, "loss": 0.3767, "step": 22857 }, { "epoch": 2.975400234283483, "grad_norm": 2.792752504348755, "learning_rate": 1.7333397431984477e-09, "loss": 0.3696, "step": 22860 }, { "epoch": 2.975790706755174, "grad_norm": 2.7394256591796875, "learning_rate": 1.6787528591716284e-09, "loss": 0.3893, "step": 22863 }, { "epoch": 2.9761811792268644, "grad_norm": 3.603832244873047, "learning_rate": 1.6250391713523717e-09, "loss": 0.3438, "step": 22866 }, { "epoch": 2.9765716516985554, "grad_norm": 2.743468761444092, "learning_rate": 1.5721986891231722e-09, "loss": 0.3395, "step": 22869 }, { "epoch": 2.976962124170246, "grad_norm": 2.7976534366607666, "learning_rate": 1.5202314217160895e-09, "loss": 0.3345, "step": 22872 }, { "epoch": 2.9773525966419365, "grad_norm": 2.8233566284179688, "learning_rate": 1.4691373782088625e-09, "loss": 0.4015, "step": 22875 }, { "epoch": 2.9777430691136275, "grad_norm": 2.91632080078125, "learning_rate": 1.418916567528239e-09, "loss": 0.3493, "step": 22878 }, { "epoch": 2.9781335415853185, "grad_norm": 2.812035322189331, "learning_rate": 1.3695689984477566e-09, "loss": 0.3718, "step": 22881 }, { "epoch": 2.978524014057009, "grad_norm": 2.8257293701171875, "learning_rate": 1.3210946795877423e-09, "loss": 0.3545, "step": 22884 }, { "epoch": 2.9789144865286996, "grad_norm": 2.968236207962036, "learning_rate": 1.2734936194164215e-09, "loss": 0.3936, "step": 22887 }, { "epoch": 2.9793049590003906, "grad_norm": 2.8840749263763428, "learning_rate": 1.2267658262504756e-09, "loss": 0.4015, "step": 22890 }, { "epoch": 2.979695431472081, "grad_norm": 2.828843355178833, "learning_rate": 1.1809113082522638e-09, "loss": 0.2979, "step": 22893 }, { "epoch": 2.980085903943772, "grad_norm": 2.849477767944336, "learning_rate": 1.1359300734331558e-09, "loss": 0.3837, "step": 22896 }, { "epoch": 2.9804763764154627, "grad_norm": 2.721007823944092, "learning_rate": 1.0918221296507547e-09, "loss": 0.4118, "step": 22899 }, { "epoch": 2.9808668488871533, "grad_norm": 2.5880935192108154, "learning_rate": 1.0485874846111189e-09, "loss": 0.3802, "step": 22902 }, { "epoch": 2.9812573213588442, "grad_norm": 2.763317108154297, "learning_rate": 1.0062261458670952e-09, "loss": 0.3266, "step": 22905 }, { "epoch": 2.981647793830535, "grad_norm": 2.6199758052825928, "learning_rate": 9.647381208188755e-10, "loss": 0.3894, "step": 22908 }, { "epoch": 2.982038266302226, "grad_norm": 3.725672960281372, "learning_rate": 9.241234167139956e-10, "loss": 0.4053, "step": 22911 }, { "epoch": 2.9824287387739163, "grad_norm": 2.939629077911377, "learning_rate": 8.843820406490011e-10, "loss": 0.3776, "step": 22914 }, { "epoch": 2.9828192112456073, "grad_norm": 2.6625006198883057, "learning_rate": 8.455139995655615e-10, "loss": 0.3032, "step": 22917 }, { "epoch": 2.983209683717298, "grad_norm": 2.914444923400879, "learning_rate": 8.075193002538006e-10, "loss": 0.3967, "step": 22920 }, { "epoch": 2.983600156188989, "grad_norm": 2.957470655441284, "learning_rate": 7.703979493522973e-10, "loss": 0.3776, "step": 22923 }, { "epoch": 2.9839906286606794, "grad_norm": 3.152146100997925, "learning_rate": 7.341499533447538e-10, "loss": 0.3585, "step": 22926 }, { "epoch": 2.98438110113237, "grad_norm": 2.8697428703308105, "learning_rate": 6.987753185649926e-10, "loss": 0.4167, "step": 22929 }, { "epoch": 2.984771573604061, "grad_norm": 3.1371231079101562, "learning_rate": 6.642740511914047e-10, "loss": 0.3816, "step": 22932 }, { "epoch": 2.9851620460757515, "grad_norm": 3.021503210067749, "learning_rate": 6.306461572525013e-10, "loss": 0.3535, "step": 22935 }, { "epoch": 2.9855525185474425, "grad_norm": 2.5989933013916016, "learning_rate": 5.978916426230274e-10, "loss": 0.3127, "step": 22938 }, { "epoch": 2.985942991019133, "grad_norm": 2.7822329998016357, "learning_rate": 5.660105130239624e-10, "loss": 0.354, "step": 22941 }, { "epoch": 2.9863334634908236, "grad_norm": 2.7127301692962646, "learning_rate": 5.350027740258501e-10, "loss": 0.3868, "step": 22944 }, { "epoch": 2.9867239359625146, "grad_norm": 3.0589182376861572, "learning_rate": 5.048684310454688e-10, "loss": 0.3267, "step": 22947 }, { "epoch": 2.9871144084342056, "grad_norm": 2.661449670791626, "learning_rate": 4.756074893469409e-10, "loss": 0.3292, "step": 22950 }, { "epoch": 2.987504880905896, "grad_norm": 2.9641380310058594, "learning_rate": 4.4721995404284345e-10, "loss": 0.4186, "step": 22953 }, { "epoch": 2.9878953533775867, "grad_norm": 2.9655439853668213, "learning_rate": 4.197058300914325e-10, "loss": 0.3803, "step": 22956 }, { "epoch": 2.9882858258492777, "grad_norm": 3.2909016609191895, "learning_rate": 3.930651222999737e-10, "loss": 0.391, "step": 22959 }, { "epoch": 2.9886762983209683, "grad_norm": 2.9940195083618164, "learning_rate": 3.6729783532196696e-10, "loss": 0.3484, "step": 22962 }, { "epoch": 2.9890667707926593, "grad_norm": 3.0727415084838867, "learning_rate": 3.424039736599216e-10, "loss": 0.3758, "step": 22965 }, { "epoch": 2.98945724326435, "grad_norm": 2.5275540351867676, "learning_rate": 3.1838354166202623e-10, "loss": 0.3679, "step": 22968 }, { "epoch": 2.9898477157360404, "grad_norm": 2.776193380355835, "learning_rate": 2.9523654352492384e-10, "loss": 0.354, "step": 22971 }, { "epoch": 2.9902381882077314, "grad_norm": 2.7540743350982666, "learning_rate": 2.7296298329204664e-10, "loss": 0.3332, "step": 22974 }, { "epoch": 2.9906286606794223, "grad_norm": 2.7912518978118896, "learning_rate": 2.5156286485417126e-10, "loss": 0.327, "step": 22977 }, { "epoch": 2.991019133151113, "grad_norm": 3.1262850761413574, "learning_rate": 2.3103619195052884e-10, "loss": 0.3989, "step": 22980 }, { "epoch": 2.9914096056228034, "grad_norm": 3.24723744392395, "learning_rate": 2.1138296816713976e-10, "loss": 0.3658, "step": 22983 }, { "epoch": 2.9918000780944944, "grad_norm": 2.843860149383545, "learning_rate": 1.9260319693736874e-10, "loss": 0.3578, "step": 22986 }, { "epoch": 2.992190550566185, "grad_norm": 3.0490710735321045, "learning_rate": 1.7469688154136966e-10, "loss": 0.3589, "step": 22989 }, { "epoch": 2.992581023037876, "grad_norm": 2.6627109050750732, "learning_rate": 1.5766402510775102e-10, "loss": 0.3676, "step": 22992 }, { "epoch": 2.9929714955095665, "grad_norm": 3.4538979530334473, "learning_rate": 1.4150463061191055e-10, "loss": 0.3702, "step": 22995 }, { "epoch": 2.993361967981257, "grad_norm": 2.638334274291992, "learning_rate": 1.2621870087714538e-10, "loss": 0.3135, "step": 22998 }, { "epoch": 2.993752440452948, "grad_norm": 2.811047077178955, "learning_rate": 1.11806238574097e-10, "loss": 0.3619, "step": 23001 }, { "epoch": 2.994142912924639, "grad_norm": 2.9179980754852295, "learning_rate": 9.826724622019613e-11, "loss": 0.3805, "step": 23004 }, { "epoch": 2.9945333853963296, "grad_norm": 2.6326956748962402, "learning_rate": 8.56017261807729e-11, "loss": 0.3641, "step": 23007 }, { "epoch": 2.99492385786802, "grad_norm": 2.799358367919922, "learning_rate": 7.380968066794669e-11, "loss": 0.3647, "step": 23010 }, { "epoch": 2.995314330339711, "grad_norm": 2.7768449783325195, "learning_rate": 6.289111174284657e-11, "loss": 0.3445, "step": 23013 }, { "epoch": 2.9957048028114017, "grad_norm": 2.603933095932007, "learning_rate": 5.284602131228056e-11, "loss": 0.3638, "step": 23016 }, { "epoch": 2.9960952752830927, "grad_norm": 2.9793689250946045, "learning_rate": 4.367441113095616e-11, "loss": 0.393, "step": 23019 }, { "epoch": 2.9964857477547833, "grad_norm": 3.043372631072998, "learning_rate": 3.53762828009252e-11, "loss": 0.4288, "step": 23022 }, { "epoch": 2.996876220226474, "grad_norm": 3.007805109024048, "learning_rate": 2.795163777269405e-11, "loss": 0.3576, "step": 23025 }, { "epoch": 2.997266692698165, "grad_norm": 2.5332658290863037, "learning_rate": 2.1400477343003213e-11, "loss": 0.3063, "step": 23028 }, { "epoch": 2.9976571651698554, "grad_norm": 2.6876015663146973, "learning_rate": 1.572280265649262e-11, "loss": 0.3617, "step": 23031 }, { "epoch": 2.9980476376415464, "grad_norm": 3.169299840927124, "learning_rate": 1.0918614704036323e-11, "loss": 0.4214, "step": 23034 }, { "epoch": 2.998438110113237, "grad_norm": 2.8437774181365967, "learning_rate": 6.987914326073153e-12, "loss": 0.3391, "step": 23037 }, { "epoch": 2.998828582584928, "grad_norm": 3.0276384353637695, "learning_rate": 3.93070220927605e-12, "loss": 0.3807, "step": 23040 }, { "epoch": 2.9992190550566185, "grad_norm": 2.6813414096832275, "learning_rate": 1.7469788865520642e-12, "loss": 0.3787, "step": 23043 }, { "epoch": 2.9996095275283094, "grad_norm": 2.8834426403045654, "learning_rate": 4.367447409281411e-13, "loss": 0.3336, "step": 23046 }, { "epoch": 3.0, "grad_norm": 5.5650634765625, "learning_rate": 0.0, "loss": 0.2652, "step": 23049 } ], "logging_steps": 3, "max_steps": 23049, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.421608510493819e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }