{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.993939393939394, "eval_steps": 500, "global_step": 3090, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 15.9424, "step": 2 }, { "epoch": 0.01, "learning_rate": 0.0, "loss": 16.295, "step": 4 }, { "epoch": 0.01, "learning_rate": 4.3010752688172045e-08, "loss": 15.4463, "step": 6 }, { "epoch": 0.01, "learning_rate": 8.602150537634409e-08, "loss": 16.1327, "step": 8 }, { "epoch": 0.02, "learning_rate": 1.0752688172043011e-07, "loss": 14.9217, "step": 10 }, { "epoch": 0.02, "learning_rate": 1.5053763440860215e-07, "loss": 15.1591, "step": 12 }, { "epoch": 0.02, "learning_rate": 1.9354838709677418e-07, "loss": 15.6472, "step": 14 }, { "epoch": 0.03, "learning_rate": 2.3655913978494625e-07, "loss": 16.3197, "step": 16 }, { "epoch": 0.03, "learning_rate": 2.7956989247311823e-07, "loss": 16.4411, "step": 18 }, { "epoch": 0.03, "learning_rate": 3.225806451612903e-07, "loss": 16.096, "step": 20 }, { "epoch": 0.04, "learning_rate": 3.6559139784946236e-07, "loss": 15.8512, "step": 22 }, { "epoch": 0.04, "learning_rate": 4.0860215053763443e-07, "loss": 15.4195, "step": 24 }, { "epoch": 0.04, "learning_rate": 4.5161290322580644e-07, "loss": 15.002, "step": 26 }, { "epoch": 0.05, "learning_rate": 4.946236559139784e-07, "loss": 15.2072, "step": 28 }, { "epoch": 0.05, "learning_rate": 5.376344086021505e-07, "loss": 15.1285, "step": 30 }, { "epoch": 0.05, "learning_rate": 5.806451612903226e-07, "loss": 15.4741, "step": 32 }, { "epoch": 0.05, "learning_rate": 6.236559139784946e-07, "loss": 15.239, "step": 34 }, { "epoch": 0.06, "learning_rate": 6.666666666666666e-07, "loss": 15.074, "step": 36 }, { "epoch": 0.06, "learning_rate": 7.096774193548387e-07, "loss": 14.9462, "step": 38 }, { "epoch": 0.06, "learning_rate": 7.526881720430107e-07, "loss": 15.4049, "step": 40 }, { "epoch": 0.07, "learning_rate": 7.956989247311827e-07, "loss": 15.2125, "step": 42 }, { "epoch": 0.07, "learning_rate": 8.387096774193549e-07, "loss": 14.8559, "step": 44 }, { "epoch": 0.07, "learning_rate": 8.817204301075269e-07, "loss": 14.7259, "step": 46 }, { "epoch": 0.08, "learning_rate": 9.247311827956989e-07, "loss": 15.2511, "step": 48 }, { "epoch": 0.08, "learning_rate": 9.67741935483871e-07, "loss": 14.5694, "step": 50 }, { "epoch": 0.08, "learning_rate": 1.010752688172043e-06, "loss": 14.0111, "step": 52 }, { "epoch": 0.09, "learning_rate": 1.053763440860215e-06, "loss": 14.6142, "step": 54 }, { "epoch": 0.09, "learning_rate": 1.096774193548387e-06, "loss": 13.0768, "step": 56 }, { "epoch": 0.09, "learning_rate": 1.1397849462365593e-06, "loss": 13.8173, "step": 58 }, { "epoch": 0.1, "learning_rate": 1.1827956989247313e-06, "loss": 13.8097, "step": 60 }, { "epoch": 0.1, "learning_rate": 1.2258064516129033e-06, "loss": 14.053, "step": 62 }, { "epoch": 0.1, "learning_rate": 1.2688172043010753e-06, "loss": 14.0456, "step": 64 }, { "epoch": 0.11, "learning_rate": 1.3118279569892473e-06, "loss": 13.4909, "step": 66 }, { "epoch": 0.11, "learning_rate": 1.354838709677419e-06, "loss": 14.0707, "step": 68 }, { "epoch": 0.11, "learning_rate": 1.3978494623655913e-06, "loss": 12.4678, "step": 70 }, { "epoch": 0.12, "learning_rate": 1.4408602150537633e-06, "loss": 12.8635, "step": 72 }, { "epoch": 0.12, "learning_rate": 1.4838709677419353e-06, "loss": 12.8816, "step": 74 }, { "epoch": 0.12, "learning_rate": 1.5268817204301074e-06, "loss": 11.6928, "step": 76 }, { "epoch": 0.13, "learning_rate": 1.5698924731182794e-06, "loss": 12.3063, "step": 78 }, { "epoch": 0.13, "learning_rate": 1.6129032258064514e-06, "loss": 11.9676, "step": 80 }, { "epoch": 0.13, "learning_rate": 1.6559139784946236e-06, "loss": 12.5602, "step": 82 }, { "epoch": 0.14, "learning_rate": 1.6989247311827956e-06, "loss": 11.1249, "step": 84 }, { "epoch": 0.14, "learning_rate": 1.7419354838709676e-06, "loss": 12.8276, "step": 86 }, { "epoch": 0.14, "learning_rate": 1.7849462365591396e-06, "loss": 11.7053, "step": 88 }, { "epoch": 0.15, "learning_rate": 1.8279569892473117e-06, "loss": 11.9643, "step": 90 }, { "epoch": 0.15, "learning_rate": 1.8709677419354837e-06, "loss": 11.5187, "step": 92 }, { "epoch": 0.15, "learning_rate": 1.913978494623656e-06, "loss": 10.6268, "step": 94 }, { "epoch": 0.16, "learning_rate": 1.956989247311828e-06, "loss": 10.5529, "step": 96 }, { "epoch": 0.16, "learning_rate": 1.978494623655914e-06, "loss": 10.9569, "step": 98 }, { "epoch": 0.16, "learning_rate": 1.9999994505904246e-06, "loss": 10.389, "step": 100 }, { "epoch": 0.16, "learning_rate": 1.999995055317446e-06, "loss": 11.2734, "step": 102 }, { "epoch": 0.17, "learning_rate": 1.9999862647908065e-06, "loss": 10.9547, "step": 104 }, { "epoch": 0.17, "learning_rate": 1.999973079049143e-06, "loss": 10.8081, "step": 106 }, { "epoch": 0.17, "learning_rate": 1.999955498150411e-06, "loss": 9.9185, "step": 108 }, { "epoch": 0.18, "learning_rate": 1.999933522171883e-06, "loss": 10.0234, "step": 110 }, { "epoch": 0.18, "learning_rate": 1.9999071512101496e-06, "loss": 9.7738, "step": 112 }, { "epoch": 0.18, "learning_rate": 1.999876385381118e-06, "loss": 10.5572, "step": 114 }, { "epoch": 0.19, "learning_rate": 1.999841224820014e-06, "loss": 10.809, "step": 116 }, { "epoch": 0.19, "learning_rate": 1.9998016696813757e-06, "loss": 9.3994, "step": 118 }, { "epoch": 0.19, "learning_rate": 1.9997577201390606e-06, "loss": 10.4461, "step": 120 }, { "epoch": 0.2, "learning_rate": 1.9997093763862384e-06, "loss": 10.3454, "step": 122 }, { "epoch": 0.2, "learning_rate": 1.999656638635393e-06, "loss": 9.2653, "step": 124 }, { "epoch": 0.2, "learning_rate": 1.9995995071183215e-06, "loss": 8.1009, "step": 126 }, { "epoch": 0.21, "learning_rate": 1.999537982086133e-06, "loss": 8.6041, "step": 128 }, { "epoch": 0.21, "learning_rate": 1.9994720638092465e-06, "loss": 9.7334, "step": 130 }, { "epoch": 0.21, "learning_rate": 1.9994017525773913e-06, "loss": 10.2584, "step": 132 }, { "epoch": 0.22, "learning_rate": 1.9993270486996043e-06, "loss": 10.2385, "step": 134 }, { "epoch": 0.22, "learning_rate": 1.99924795250423e-06, "loss": 9.471, "step": 136 }, { "epoch": 0.22, "learning_rate": 1.999164464338918e-06, "loss": 9.7777, "step": 138 }, { "epoch": 0.23, "learning_rate": 1.999076584570621e-06, "loss": 9.7583, "step": 140 }, { "epoch": 0.23, "learning_rate": 1.9989843135855954e-06, "loss": 9.3903, "step": 142 }, { "epoch": 0.23, "learning_rate": 1.9988876517893977e-06, "loss": 9.416, "step": 144 }, { "epoch": 0.24, "learning_rate": 1.998786599606883e-06, "loss": 9.2708, "step": 146 }, { "epoch": 0.24, "learning_rate": 1.9986811574822033e-06, "loss": 9.844, "step": 148 }, { "epoch": 0.24, "learning_rate": 1.998571325878806e-06, "loss": 9.6433, "step": 150 }, { "epoch": 0.25, "learning_rate": 1.998457105279431e-06, "loss": 9.2691, "step": 152 }, { "epoch": 0.25, "learning_rate": 1.9983384961861095e-06, "loss": 9.4914, "step": 154 }, { "epoch": 0.25, "learning_rate": 1.9982154991201607e-06, "loss": 9.2666, "step": 156 }, { "epoch": 0.26, "learning_rate": 1.9980881146221913e-06, "loss": 8.2851, "step": 158 }, { "epoch": 0.26, "learning_rate": 1.997956343252091e-06, "loss": 9.1172, "step": 160 }, { "epoch": 0.26, "learning_rate": 1.9978201855890305e-06, "loss": 9.1622, "step": 162 }, { "epoch": 0.27, "learning_rate": 1.9976796422314615e-06, "loss": 9.1079, "step": 164 }, { "epoch": 0.27, "learning_rate": 1.9975347137971096e-06, "loss": 9.4297, "step": 166 }, { "epoch": 0.27, "learning_rate": 1.997385400922976e-06, "loss": 8.043, "step": 168 }, { "epoch": 0.27, "learning_rate": 1.9972317042653317e-06, "loss": 8.6009, "step": 170 }, { "epoch": 0.28, "learning_rate": 1.997073624499716e-06, "loss": 9.0201, "step": 172 }, { "epoch": 0.28, "learning_rate": 1.9969111623209322e-06, "loss": 8.6173, "step": 174 }, { "epoch": 0.28, "learning_rate": 1.9967443184430464e-06, "loss": 9.2188, "step": 176 }, { "epoch": 0.29, "learning_rate": 1.996573093599385e-06, "loss": 8.8459, "step": 178 }, { "epoch": 0.29, "learning_rate": 1.996397488542526e-06, "loss": 8.8776, "step": 180 }, { "epoch": 0.29, "learning_rate": 1.996217504044304e-06, "loss": 8.771, "step": 182 }, { "epoch": 0.3, "learning_rate": 1.9960331408957996e-06, "loss": 9.1059, "step": 184 }, { "epoch": 0.3, "learning_rate": 1.9958443999073394e-06, "loss": 8.9898, "step": 186 }, { "epoch": 0.3, "learning_rate": 1.9956512819084924e-06, "loss": 8.3461, "step": 188 }, { "epoch": 0.31, "learning_rate": 1.995453787748065e-06, "loss": 8.4594, "step": 190 }, { "epoch": 0.31, "learning_rate": 1.995251918294099e-06, "loss": 8.8926, "step": 192 }, { "epoch": 0.31, "learning_rate": 1.9950456744338658e-06, "loss": 9.1989, "step": 194 }, { "epoch": 0.32, "learning_rate": 1.9948350570738638e-06, "loss": 7.9779, "step": 196 }, { "epoch": 0.32, "learning_rate": 1.9946200671398146e-06, "loss": 9.0904, "step": 198 }, { "epoch": 0.32, "learning_rate": 1.9944007055766586e-06, "loss": 8.3997, "step": 200 }, { "epoch": 0.33, "learning_rate": 1.994176973348549e-06, "loss": 8.5416, "step": 202 }, { "epoch": 0.33, "learning_rate": 1.993948871438852e-06, "loss": 8.6818, "step": 204 }, { "epoch": 0.33, "learning_rate": 1.9937164008501376e-06, "loss": 8.6807, "step": 206 }, { "epoch": 0.34, "learning_rate": 1.993479562604178e-06, "loss": 8.596, "step": 208 }, { "epoch": 0.34, "learning_rate": 1.9932383577419428e-06, "loss": 8.1365, "step": 210 }, { "epoch": 0.34, "learning_rate": 1.9929927873235937e-06, "loss": 8.8869, "step": 212 }, { "epoch": 0.35, "learning_rate": 1.99274285242848e-06, "loss": 8.7621, "step": 214 }, { "epoch": 0.35, "learning_rate": 1.9924885541551347e-06, "loss": 8.4504, "step": 216 }, { "epoch": 0.35, "learning_rate": 1.992229893621269e-06, "loss": 8.7773, "step": 218 }, { "epoch": 0.36, "learning_rate": 1.9919668719637667e-06, "loss": 8.6132, "step": 220 }, { "epoch": 0.36, "learning_rate": 1.9916994903386806e-06, "loss": 8.4847, "step": 222 }, { "epoch": 0.36, "learning_rate": 1.9914277499212265e-06, "loss": 8.4073, "step": 224 }, { "epoch": 0.37, "learning_rate": 1.9911516519057786e-06, "loss": 8.1962, "step": 226 }, { "epoch": 0.37, "learning_rate": 1.9908711975058636e-06, "loss": 8.3719, "step": 228 }, { "epoch": 0.37, "learning_rate": 1.990586387954156e-06, "loss": 8.566, "step": 230 }, { "epoch": 0.37, "learning_rate": 1.9902972245024713e-06, "loss": 8.2501, "step": 232 }, { "epoch": 0.38, "learning_rate": 1.9900037084217634e-06, "loss": 8.4245, "step": 234 }, { "epoch": 0.38, "learning_rate": 1.9897058410021164e-06, "loss": 8.5001, "step": 236 }, { "epoch": 0.38, "learning_rate": 1.9894036235527395e-06, "loss": 8.553, "step": 238 }, { "epoch": 0.39, "learning_rate": 1.9890970574019616e-06, "loss": 8.2188, "step": 240 }, { "epoch": 0.39, "learning_rate": 1.9887861438972245e-06, "loss": 8.5532, "step": 242 }, { "epoch": 0.39, "learning_rate": 1.98847088440508e-06, "loss": 8.3859, "step": 244 }, { "epoch": 0.4, "learning_rate": 1.9881512803111793e-06, "loss": 8.3388, "step": 246 }, { "epoch": 0.4, "learning_rate": 1.9878273330202714e-06, "loss": 8.2367, "step": 248 }, { "epoch": 0.4, "learning_rate": 1.987499043956193e-06, "loss": 8.383, "step": 250 }, { "epoch": 0.41, "learning_rate": 1.9871664145618655e-06, "loss": 8.6529, "step": 252 }, { "epoch": 0.41, "learning_rate": 1.9868294462992865e-06, "loss": 7.9699, "step": 254 }, { "epoch": 0.41, "learning_rate": 1.9864881406495245e-06, "loss": 7.9447, "step": 256 }, { "epoch": 0.42, "learning_rate": 1.9861424991127113e-06, "loss": 7.0923, "step": 258 }, { "epoch": 0.42, "learning_rate": 1.9857925232080374e-06, "loss": 8.0928, "step": 260 }, { "epoch": 0.42, "learning_rate": 1.9854382144737426e-06, "loss": 7.5764, "step": 262 }, { "epoch": 0.43, "learning_rate": 1.9850795744671112e-06, "loss": 7.6531, "step": 264 }, { "epoch": 0.43, "learning_rate": 1.9847166047644658e-06, "loss": 8.1521, "step": 266 }, { "epoch": 0.43, "learning_rate": 1.9843493069611576e-06, "loss": 7.9413, "step": 268 }, { "epoch": 0.44, "learning_rate": 1.9839776826715613e-06, "loss": 7.9654, "step": 270 }, { "epoch": 0.44, "learning_rate": 1.983601733529069e-06, "loss": 7.659, "step": 272 }, { "epoch": 0.44, "learning_rate": 1.9832214611860793e-06, "loss": 8.2267, "step": 274 }, { "epoch": 0.45, "learning_rate": 1.9828368673139946e-06, "loss": 7.1799, "step": 276 }, { "epoch": 0.45, "learning_rate": 1.982447953603211e-06, "loss": 8.1048, "step": 278 }, { "epoch": 0.45, "learning_rate": 1.9820547217631115e-06, "loss": 7.9811, "step": 280 }, { "epoch": 0.46, "learning_rate": 1.981657173522058e-06, "loss": 8.4555, "step": 282 }, { "epoch": 0.46, "learning_rate": 1.9812553106273845e-06, "loss": 7.7934, "step": 284 }, { "epoch": 0.46, "learning_rate": 1.980849134845389e-06, "loss": 8.1038, "step": 286 }, { "epoch": 0.47, "learning_rate": 1.9804386479613267e-06, "loss": 7.4383, "step": 288 }, { "epoch": 0.47, "learning_rate": 1.9800238517793994e-06, "loss": 7.7877, "step": 290 }, { "epoch": 0.47, "learning_rate": 1.9796047481227515e-06, "loss": 7.5177, "step": 292 }, { "epoch": 0.48, "learning_rate": 1.979181338833458e-06, "loss": 7.2789, "step": 294 }, { "epoch": 0.48, "learning_rate": 1.97875362577252e-06, "loss": 7.0517, "step": 296 }, { "epoch": 0.48, "learning_rate": 1.9783216108198542e-06, "loss": 7.9153, "step": 298 }, { "epoch": 0.48, "learning_rate": 1.9778852958742852e-06, "loss": 7.9618, "step": 300 }, { "epoch": 0.49, "learning_rate": 1.977444682853537e-06, "loss": 7.2271, "step": 302 }, { "epoch": 0.49, "learning_rate": 1.9769997736942255e-06, "loss": 7.6914, "step": 304 }, { "epoch": 0.49, "learning_rate": 1.9765505703518493e-06, "loss": 8.0209, "step": 306 }, { "epoch": 0.5, "learning_rate": 1.97609707480078e-06, "loss": 7.2783, "step": 308 }, { "epoch": 0.5, "learning_rate": 1.975639289034256e-06, "loss": 7.7316, "step": 310 }, { "epoch": 0.5, "learning_rate": 1.975177215064372e-06, "loss": 7.4457, "step": 312 }, { "epoch": 0.51, "learning_rate": 1.97471085492207e-06, "loss": 7.9042, "step": 314 }, { "epoch": 0.51, "learning_rate": 1.974240210657131e-06, "loss": 7.4322, "step": 316 }, { "epoch": 0.51, "learning_rate": 1.973765284338167e-06, "loss": 7.2353, "step": 318 }, { "epoch": 0.52, "learning_rate": 1.9732860780526087e-06, "loss": 7.9842, "step": 320 }, { "epoch": 0.52, "learning_rate": 1.972802593906701e-06, "loss": 7.2809, "step": 322 }, { "epoch": 0.52, "learning_rate": 1.972314834025489e-06, "loss": 7.7988, "step": 324 }, { "epoch": 0.53, "learning_rate": 1.971822800552812e-06, "loss": 7.7482, "step": 326 }, { "epoch": 0.53, "learning_rate": 1.9713264956512927e-06, "loss": 7.3617, "step": 328 }, { "epoch": 0.53, "learning_rate": 1.9708259215023275e-06, "loss": 7.4253, "step": 330 }, { "epoch": 0.54, "learning_rate": 1.970321080306078e-06, "loss": 7.5589, "step": 332 }, { "epoch": 0.54, "learning_rate": 1.9698119742814604e-06, "loss": 7.4905, "step": 334 }, { "epoch": 0.54, "learning_rate": 1.9692986056661354e-06, "loss": 7.4516, "step": 336 }, { "epoch": 0.55, "learning_rate": 1.9687809767165e-06, "loss": 7.555, "step": 338 }, { "epoch": 0.55, "learning_rate": 1.9682590897076753e-06, "loss": 7.7357, "step": 340 }, { "epoch": 0.55, "learning_rate": 1.967732946933499e-06, "loss": 7.6685, "step": 342 }, { "epoch": 0.56, "learning_rate": 1.9672025507065133e-06, "loss": 7.6611, "step": 344 }, { "epoch": 0.56, "learning_rate": 1.966667903357955e-06, "loss": 7.7605, "step": 346 }, { "epoch": 0.56, "learning_rate": 1.9661290072377477e-06, "loss": 7.9287, "step": 348 }, { "epoch": 0.57, "learning_rate": 1.965585864714488e-06, "loss": 7.5854, "step": 350 }, { "epoch": 0.57, "learning_rate": 1.965038478175436e-06, "loss": 6.5967, "step": 352 }, { "epoch": 0.57, "learning_rate": 1.964486850026507e-06, "loss": 7.6664, "step": 354 }, { "epoch": 0.58, "learning_rate": 1.9639309826922583e-06, "loss": 6.9259, "step": 356 }, { "epoch": 0.58, "learning_rate": 1.9633708786158803e-06, "loss": 7.3177, "step": 358 }, { "epoch": 0.58, "learning_rate": 1.962806540259184e-06, "loss": 7.5336, "step": 360 }, { "epoch": 0.59, "learning_rate": 1.962237970102593e-06, "loss": 7.4839, "step": 362 }, { "epoch": 0.59, "learning_rate": 1.9616651706451285e-06, "loss": 7.3067, "step": 364 }, { "epoch": 0.59, "learning_rate": 1.9610881444044027e-06, "loss": 7.4986, "step": 366 }, { "epoch": 0.59, "learning_rate": 1.9605068939166045e-06, "loss": 7.3467, "step": 368 }, { "epoch": 0.6, "learning_rate": 1.95992142173649e-06, "loss": 7.7502, "step": 370 }, { "epoch": 0.6, "learning_rate": 1.9593317304373703e-06, "loss": 7.4302, "step": 372 }, { "epoch": 0.6, "learning_rate": 1.958737822611101e-06, "loss": 7.1672, "step": 374 }, { "epoch": 0.61, "learning_rate": 1.9581397008680715e-06, "loss": 7.0433, "step": 376 }, { "epoch": 0.61, "learning_rate": 1.957537367837191e-06, "loss": 7.467, "step": 378 }, { "epoch": 0.61, "learning_rate": 1.9569308261658788e-06, "loss": 7.6979, "step": 380 }, { "epoch": 0.62, "learning_rate": 1.9563200785200524e-06, "loss": 7.4041, "step": 382 }, { "epoch": 0.62, "learning_rate": 1.955705127584117e-06, "loss": 7.1015, "step": 384 }, { "epoch": 0.62, "learning_rate": 1.95508597606095e-06, "loss": 7.3307, "step": 386 }, { "epoch": 0.63, "learning_rate": 1.954462626671894e-06, "loss": 7.3478, "step": 388 }, { "epoch": 0.63, "learning_rate": 1.9538350821567403e-06, "loss": 7.2176, "step": 390 }, { "epoch": 0.63, "learning_rate": 1.9532033452737203e-06, "loss": 7.6799, "step": 392 }, { "epoch": 0.64, "learning_rate": 1.952567418799492e-06, "loss": 7.2302, "step": 394 }, { "epoch": 0.64, "learning_rate": 1.9519273055291264e-06, "loss": 7.2812, "step": 396 }, { "epoch": 0.64, "learning_rate": 1.9512830082760985e-06, "loss": 7.0137, "step": 398 }, { "epoch": 0.65, "learning_rate": 1.9506345298722714e-06, "loss": 7.6786, "step": 400 }, { "epoch": 0.65, "learning_rate": 1.949981873167887e-06, "loss": 7.0638, "step": 402 }, { "epoch": 0.65, "learning_rate": 1.9493250410315505e-06, "loss": 6.877, "step": 404 }, { "epoch": 0.66, "learning_rate": 1.948664036350221e-06, "loss": 7.1874, "step": 406 }, { "epoch": 0.66, "learning_rate": 1.9479988620291952e-06, "loss": 7.5073, "step": 408 }, { "epoch": 0.66, "learning_rate": 1.9473295209920983e-06, "loss": 6.605, "step": 410 }, { "epoch": 0.67, "learning_rate": 1.946656016180867e-06, "loss": 7.1983, "step": 412 }, { "epoch": 0.67, "learning_rate": 1.9459783505557422e-06, "loss": 7.3831, "step": 414 }, { "epoch": 0.67, "learning_rate": 1.9452965270952497e-06, "loss": 7.1163, "step": 416 }, { "epoch": 0.68, "learning_rate": 1.9446105487961925e-06, "loss": 7.4892, "step": 418 }, { "epoch": 0.68, "learning_rate": 1.943920418673633e-06, "loss": 7.138, "step": 420 }, { "epoch": 0.68, "learning_rate": 1.9432261397608833e-06, "loss": 7.21, "step": 422 }, { "epoch": 0.69, "learning_rate": 1.942527715109491e-06, "loss": 6.8101, "step": 424 }, { "epoch": 0.69, "learning_rate": 1.941825147789225e-06, "loss": 7.1995, "step": 426 }, { "epoch": 0.69, "learning_rate": 1.941118440888061e-06, "loss": 7.3979, "step": 428 }, { "epoch": 0.69, "learning_rate": 1.9404075975121716e-06, "loss": 6.9963, "step": 430 }, { "epoch": 0.7, "learning_rate": 1.9396926207859082e-06, "loss": 6.8627, "step": 432 }, { "epoch": 0.7, "learning_rate": 1.9389735138517915e-06, "loss": 7.1982, "step": 434 }, { "epoch": 0.7, "learning_rate": 1.9382502798704935e-06, "loss": 7.0028, "step": 436 }, { "epoch": 0.71, "learning_rate": 1.9375229220208273e-06, "loss": 7.3183, "step": 438 }, { "epoch": 0.71, "learning_rate": 1.936791443499731e-06, "loss": 7.2714, "step": 440 }, { "epoch": 0.71, "learning_rate": 1.936055847522254e-06, "loss": 6.7713, "step": 442 }, { "epoch": 0.72, "learning_rate": 1.935316137321543e-06, "loss": 6.9329, "step": 444 }, { "epoch": 0.72, "learning_rate": 1.934572316148828e-06, "loss": 7.0025, "step": 446 }, { "epoch": 0.72, "learning_rate": 1.9338243872734083e-06, "loss": 6.9061, "step": 448 }, { "epoch": 0.73, "learning_rate": 1.933072353982637e-06, "loss": 7.4755, "step": 450 }, { "epoch": 0.73, "learning_rate": 1.932316219581908e-06, "loss": 7.4846, "step": 452 }, { "epoch": 0.73, "learning_rate": 1.93155598739464e-06, "loss": 7.2087, "step": 454 }, { "epoch": 0.74, "learning_rate": 1.930791660762262e-06, "loss": 7.2197, "step": 456 }, { "epoch": 0.74, "learning_rate": 1.930023243044201e-06, "loss": 7.2201, "step": 458 }, { "epoch": 0.74, "learning_rate": 1.929250737617864e-06, "loss": 6.7586, "step": 460 }, { "epoch": 0.75, "learning_rate": 1.9284741478786258e-06, "loss": 7.1637, "step": 462 }, { "epoch": 0.75, "learning_rate": 1.9276934772398113e-06, "loss": 6.8489, "step": 464 }, { "epoch": 0.75, "learning_rate": 1.926908729132683e-06, "loss": 7.0103, "step": 466 }, { "epoch": 0.76, "learning_rate": 1.926119907006426e-06, "loss": 6.7451, "step": 468 }, { "epoch": 0.76, "learning_rate": 1.9253270143281295e-06, "loss": 6.8193, "step": 470 }, { "epoch": 0.76, "learning_rate": 1.924530054582776e-06, "loss": 6.8033, "step": 472 }, { "epoch": 0.77, "learning_rate": 1.923729031273222e-06, "loss": 6.7076, "step": 474 }, { "epoch": 0.77, "learning_rate": 1.9229239479201874e-06, "loss": 7.2125, "step": 476 }, { "epoch": 0.77, "learning_rate": 1.9221148080622338e-06, "loss": 6.7966, "step": 478 }, { "epoch": 0.78, "learning_rate": 1.921301615255754e-06, "loss": 6.6421, "step": 480 }, { "epoch": 0.78, "learning_rate": 1.9204843730749544e-06, "loss": 6.8844, "step": 482 }, { "epoch": 0.78, "learning_rate": 1.9196630851118395e-06, "loss": 6.1133, "step": 484 }, { "epoch": 0.79, "learning_rate": 1.918837754976196e-06, "loss": 7.1229, "step": 486 }, { "epoch": 0.79, "learning_rate": 1.918008386295577e-06, "loss": 7.2234, "step": 488 }, { "epoch": 0.79, "learning_rate": 1.917174982715287e-06, "loss": 6.9052, "step": 490 }, { "epoch": 0.8, "learning_rate": 1.916337547898363e-06, "loss": 7.2216, "step": 492 }, { "epoch": 0.8, "learning_rate": 1.9154960855255626e-06, "loss": 7.1983, "step": 494 }, { "epoch": 0.8, "learning_rate": 1.9146505992953444e-06, "loss": 7.0274, "step": 496 }, { "epoch": 0.8, "learning_rate": 1.9138010929238533e-06, "loss": 7.0934, "step": 498 }, { "epoch": 0.81, "learning_rate": 1.9129475701449035e-06, "loss": 6.565, "step": 500 }, { "epoch": 0.81, "learning_rate": 1.912090034709963e-06, "loss": 6.7059, "step": 502 }, { "epoch": 0.81, "learning_rate": 1.9112284903881357e-06, "loss": 6.2099, "step": 504 }, { "epoch": 0.82, "learning_rate": 1.9103629409661467e-06, "loss": 6.7425, "step": 506 }, { "epoch": 0.82, "learning_rate": 1.909493390248324e-06, "loss": 6.9777, "step": 508 }, { "epoch": 0.82, "learning_rate": 1.908619842056582e-06, "loss": 6.9865, "step": 510 }, { "epoch": 0.83, "learning_rate": 1.9077423002304058e-06, "loss": 6.3805, "step": 512 }, { "epoch": 0.83, "learning_rate": 1.906860768626834e-06, "loss": 7.2092, "step": 514 }, { "epoch": 0.83, "learning_rate": 1.9059752511204396e-06, "loss": 7.2097, "step": 516 }, { "epoch": 0.84, "learning_rate": 1.9050857516033173e-06, "loss": 6.7913, "step": 518 }, { "epoch": 0.84, "learning_rate": 1.9041922739850614e-06, "loss": 6.771, "step": 520 }, { "epoch": 0.84, "learning_rate": 1.9032948221927522e-06, "loss": 6.8683, "step": 522 }, { "epoch": 0.85, "learning_rate": 1.902393400170938e-06, "loss": 6.8623, "step": 524 }, { "epoch": 0.85, "learning_rate": 1.9014880118816162e-06, "loss": 7.1279, "step": 526 }, { "epoch": 0.85, "learning_rate": 1.9005786613042183e-06, "loss": 7.1608, "step": 528 }, { "epoch": 0.86, "learning_rate": 1.8996653524355902e-06, "loss": 7.095, "step": 530 }, { "epoch": 0.86, "learning_rate": 1.8987480892899756e-06, "loss": 6.5173, "step": 532 }, { "epoch": 0.86, "learning_rate": 1.897826875898999e-06, "loss": 6.8354, "step": 534 }, { "epoch": 0.87, "learning_rate": 1.896901716311647e-06, "loss": 6.5117, "step": 536 }, { "epoch": 0.87, "learning_rate": 1.8959726145942505e-06, "loss": 6.8697, "step": 538 }, { "epoch": 0.87, "learning_rate": 1.8950395748304678e-06, "loss": 7.0196, "step": 540 }, { "epoch": 0.88, "learning_rate": 1.8941026011212653e-06, "loss": 7.1429, "step": 542 }, { "epoch": 0.88, "learning_rate": 1.8931616975849006e-06, "loss": 6.4905, "step": 544 }, { "epoch": 0.88, "learning_rate": 1.8922168683569037e-06, "loss": 6.5658, "step": 546 }, { "epoch": 0.89, "learning_rate": 1.8912681175900595e-06, "loss": 6.7222, "step": 548 }, { "epoch": 0.89, "learning_rate": 1.8903154494543887e-06, "loss": 6.8789, "step": 550 }, { "epoch": 0.89, "learning_rate": 1.8893588681371301e-06, "loss": 6.9062, "step": 552 }, { "epoch": 0.9, "learning_rate": 1.8883983778427223e-06, "loss": 6.5114, "step": 554 }, { "epoch": 0.9, "learning_rate": 1.8874339827927845e-06, "loss": 6.5592, "step": 556 }, { "epoch": 0.9, "learning_rate": 1.8864656872260985e-06, "loss": 6.8886, "step": 558 }, { "epoch": 0.91, "learning_rate": 1.88549349539859e-06, "loss": 6.9465, "step": 560 }, { "epoch": 0.91, "learning_rate": 1.8845174115833097e-06, "loss": 6.8442, "step": 562 }, { "epoch": 0.91, "learning_rate": 1.8835374400704152e-06, "loss": 6.618, "step": 564 }, { "epoch": 0.91, "learning_rate": 1.8825535851671506e-06, "loss": 6.4021, "step": 566 }, { "epoch": 0.92, "learning_rate": 1.8815658511978296e-06, "loss": 6.799, "step": 568 }, { "epoch": 0.92, "learning_rate": 1.8805742425038146e-06, "loss": 6.785, "step": 570 }, { "epoch": 0.92, "learning_rate": 1.8795787634434992e-06, "loss": 7.0524, "step": 572 }, { "epoch": 0.93, "learning_rate": 1.878579418392288e-06, "loss": 6.8829, "step": 574 }, { "epoch": 0.93, "learning_rate": 1.8775762117425775e-06, "loss": 6.8318, "step": 576 }, { "epoch": 0.93, "learning_rate": 1.8765691479037375e-06, "loss": 6.8338, "step": 578 }, { "epoch": 0.94, "learning_rate": 1.8755582313020908e-06, "loss": 6.2545, "step": 580 }, { "epoch": 0.94, "learning_rate": 1.8745434663808941e-06, "loss": 6.8133, "step": 582 }, { "epoch": 0.94, "learning_rate": 1.873524857600319e-06, "loss": 6.8556, "step": 584 }, { "epoch": 0.95, "learning_rate": 1.8725024094374313e-06, "loss": 6.6332, "step": 586 }, { "epoch": 0.95, "learning_rate": 1.8714761263861725e-06, "loss": 6.8382, "step": 588 }, { "epoch": 0.95, "learning_rate": 1.870446012957339e-06, "loss": 6.6768, "step": 590 }, { "epoch": 0.96, "learning_rate": 1.869412073678563e-06, "loss": 6.8513, "step": 592 }, { "epoch": 0.96, "learning_rate": 1.8683743130942926e-06, "loss": 6.4011, "step": 594 }, { "epoch": 0.96, "learning_rate": 1.8673327357657713e-06, "loss": 6.8065, "step": 596 }, { "epoch": 0.97, "learning_rate": 1.8662873462710182e-06, "loss": 6.6735, "step": 598 }, { "epoch": 0.97, "learning_rate": 1.865238149204808e-06, "loss": 6.1697, "step": 600 }, { "epoch": 0.97, "learning_rate": 1.8641851491786512e-06, "loss": 6.3478, "step": 602 }, { "epoch": 0.98, "learning_rate": 1.8631283508207723e-06, "loss": 6.9435, "step": 604 }, { "epoch": 0.98, "learning_rate": 1.8620677587760913e-06, "loss": 6.771, "step": 606 }, { "epoch": 0.98, "learning_rate": 1.8610033777062024e-06, "loss": 5.8656, "step": 608 }, { "epoch": 0.99, "learning_rate": 1.8599352122893537e-06, "loss": 6.3271, "step": 610 }, { "epoch": 0.99, "learning_rate": 1.858863267220426e-06, "loss": 7.3395, "step": 612 }, { "epoch": 0.99, "learning_rate": 1.8577875472109132e-06, "loss": 6.7548, "step": 614 }, { "epoch": 1.0, "learning_rate": 1.8567080569889014e-06, "loss": 6.9543, "step": 616 }, { "epoch": 1.0, "learning_rate": 1.8556248012990466e-06, "loss": 7.0467, "step": 618 }, { "epoch": 1.0, "learning_rate": 1.8545377849025564e-06, "loss": 7.1106, "step": 620 }, { "epoch": 1.01, "learning_rate": 1.8534470125771674e-06, "loss": 6.4241, "step": 622 }, { "epoch": 1.01, "learning_rate": 1.8523524891171239e-06, "loss": 6.951, "step": 624 }, { "epoch": 1.01, "learning_rate": 1.8512542193331582e-06, "loss": 6.6515, "step": 626 }, { "epoch": 1.01, "learning_rate": 1.8501522080524686e-06, "loss": 6.6356, "step": 628 }, { "epoch": 1.02, "learning_rate": 1.8490464601186977e-06, "loss": 6.1777, "step": 630 }, { "epoch": 1.02, "learning_rate": 1.8479369803919126e-06, "loss": 6.5791, "step": 632 }, { "epoch": 1.02, "learning_rate": 1.846823773748582e-06, "loss": 6.7036, "step": 634 }, { "epoch": 1.03, "learning_rate": 1.845706845081556e-06, "loss": 6.3115, "step": 636 }, { "epoch": 1.03, "learning_rate": 1.8445861993000433e-06, "loss": 6.8141, "step": 638 }, { "epoch": 1.03, "learning_rate": 1.8434618413295909e-06, "loss": 6.6293, "step": 640 }, { "epoch": 1.04, "learning_rate": 1.8423337761120617e-06, "loss": 6.9529, "step": 642 }, { "epoch": 1.04, "learning_rate": 1.841202008605613e-06, "loss": 6.8302, "step": 644 }, { "epoch": 1.04, "learning_rate": 1.8400665437846746e-06, "loss": 6.8518, "step": 646 }, { "epoch": 1.05, "learning_rate": 1.8389273866399274e-06, "loss": 6.4488, "step": 648 }, { "epoch": 1.05, "learning_rate": 1.8377845421782802e-06, "loss": 6.3835, "step": 650 }, { "epoch": 1.05, "learning_rate": 1.8366380154228497e-06, "loss": 6.829, "step": 652 }, { "epoch": 1.06, "learning_rate": 1.8354878114129364e-06, "loss": 6.9445, "step": 654 }, { "epoch": 1.06, "learning_rate": 1.834333935204004e-06, "loss": 6.8705, "step": 656 }, { "epoch": 1.06, "learning_rate": 1.8331763918676555e-06, "loss": 6.3082, "step": 658 }, { "epoch": 1.07, "learning_rate": 1.8320151864916133e-06, "loss": 6.7029, "step": 660 }, { "epoch": 1.07, "learning_rate": 1.8308503241796948e-06, "loss": 6.5772, "step": 662 }, { "epoch": 1.07, "learning_rate": 1.8296818100517909e-06, "loss": 6.7506, "step": 664 }, { "epoch": 1.08, "learning_rate": 1.828509649243842e-06, "loss": 6.8545, "step": 666 }, { "epoch": 1.08, "learning_rate": 1.8273338469078186e-06, "loss": 5.9733, "step": 668 }, { "epoch": 1.08, "learning_rate": 1.8261544082116953e-06, "loss": 6.3656, "step": 670 }, { "epoch": 1.09, "learning_rate": 1.8249713383394301e-06, "loss": 6.7071, "step": 672 }, { "epoch": 1.09, "learning_rate": 1.8237846424909411e-06, "loss": 6.6789, "step": 674 }, { "epoch": 1.09, "learning_rate": 1.8225943258820832e-06, "loss": 7.0611, "step": 676 }, { "epoch": 1.1, "learning_rate": 1.8214003937446253e-06, "loss": 6.8078, "step": 678 }, { "epoch": 1.1, "learning_rate": 1.8202028513262286e-06, "loss": 6.7964, "step": 680 }, { "epoch": 1.1, "learning_rate": 1.8190017038904214e-06, "loss": 6.4661, "step": 682 }, { "epoch": 1.11, "learning_rate": 1.8177969567165778e-06, "loss": 5.7196, "step": 684 }, { "epoch": 1.11, "learning_rate": 1.8165886150998931e-06, "loss": 6.6864, "step": 686 }, { "epoch": 1.11, "learning_rate": 1.8153766843513617e-06, "loss": 6.7598, "step": 688 }, { "epoch": 1.12, "learning_rate": 1.8141611697977526e-06, "loss": 5.9103, "step": 690 }, { "epoch": 1.12, "learning_rate": 1.8129420767815878e-06, "loss": 6.5564, "step": 692 }, { "epoch": 1.12, "learning_rate": 1.811719410661116e-06, "loss": 6.6198, "step": 694 }, { "epoch": 1.12, "learning_rate": 1.8104931768102917e-06, "loss": 6.3773, "step": 696 }, { "epoch": 1.13, "learning_rate": 1.8092633806187511e-06, "loss": 6.596, "step": 698 }, { "epoch": 1.13, "learning_rate": 1.8080300274917861e-06, "loss": 6.5877, "step": 700 }, { "epoch": 1.13, "learning_rate": 1.8067931228503243e-06, "loss": 6.7652, "step": 702 }, { "epoch": 1.14, "learning_rate": 1.8055526721309013e-06, "loss": 6.6674, "step": 704 }, { "epoch": 1.14, "learning_rate": 1.8043086807856402e-06, "loss": 6.6695, "step": 706 }, { "epoch": 1.14, "learning_rate": 1.8030611542822255e-06, "loss": 6.9025, "step": 708 }, { "epoch": 1.15, "learning_rate": 1.8018100981038797e-06, "loss": 6.5107, "step": 710 }, { "epoch": 1.15, "learning_rate": 1.8005555177493392e-06, "loss": 6.2374, "step": 712 }, { "epoch": 1.15, "learning_rate": 1.7992974187328303e-06, "loss": 6.8202, "step": 714 }, { "epoch": 1.16, "learning_rate": 1.798035806584044e-06, "loss": 6.5293, "step": 716 }, { "epoch": 1.16, "learning_rate": 1.7967706868481143e-06, "loss": 6.7172, "step": 718 }, { "epoch": 1.16, "learning_rate": 1.79550206508559e-06, "loss": 6.9248, "step": 720 }, { "epoch": 1.17, "learning_rate": 1.7942299468724131e-06, "loss": 6.3225, "step": 722 }, { "epoch": 1.17, "learning_rate": 1.792954337799894e-06, "loss": 6.6829, "step": 724 }, { "epoch": 1.17, "learning_rate": 1.7916752434746853e-06, "loss": 6.7468, "step": 726 }, { "epoch": 1.18, "learning_rate": 1.7903926695187592e-06, "loss": 6.6429, "step": 728 }, { "epoch": 1.18, "learning_rate": 1.7891066215693816e-06, "loss": 6.5349, "step": 730 }, { "epoch": 1.18, "learning_rate": 1.7878171052790867e-06, "loss": 6.288, "step": 732 }, { "epoch": 1.19, "learning_rate": 1.7865241263156543e-06, "loss": 6.5498, "step": 734 }, { "epoch": 1.19, "learning_rate": 1.7852276903620828e-06, "loss": 6.605, "step": 736 }, { "epoch": 1.19, "learning_rate": 1.7839278031165656e-06, "loss": 6.2279, "step": 738 }, { "epoch": 1.2, "learning_rate": 1.7826244702924647e-06, "loss": 6.3011, "step": 740 }, { "epoch": 1.2, "learning_rate": 1.7813176976182872e-06, "loss": 6.4668, "step": 742 }, { "epoch": 1.2, "learning_rate": 1.7800074908376584e-06, "loss": 6.3857, "step": 744 }, { "epoch": 1.21, "learning_rate": 1.778693855709298e-06, "loss": 6.5626, "step": 746 }, { "epoch": 1.21, "learning_rate": 1.7773767980069944e-06, "loss": 6.5039, "step": 748 }, { "epoch": 1.21, "learning_rate": 1.776056323519579e-06, "loss": 6.5606, "step": 750 }, { "epoch": 1.22, "learning_rate": 1.7747324380509006e-06, "loss": 6.3044, "step": 752 }, { "epoch": 1.22, "learning_rate": 1.7734051474198e-06, "loss": 6.7221, "step": 754 }, { "epoch": 1.22, "learning_rate": 1.7720744574600862e-06, "loss": 6.2063, "step": 756 }, { "epoch": 1.23, "learning_rate": 1.7707403740205068e-06, "loss": 6.3192, "step": 758 }, { "epoch": 1.23, "learning_rate": 1.7694029029647268e-06, "loss": 6.1671, "step": 760 }, { "epoch": 1.23, "learning_rate": 1.7680620501712995e-06, "loss": 6.6476, "step": 762 }, { "epoch": 1.23, "learning_rate": 1.7667178215336423e-06, "loss": 6.5156, "step": 764 }, { "epoch": 1.24, "learning_rate": 1.7653702229600098e-06, "loss": 5.8151, "step": 766 }, { "epoch": 1.24, "learning_rate": 1.764019260373469e-06, "loss": 6.6839, "step": 768 }, { "epoch": 1.24, "learning_rate": 1.7626649397118732e-06, "loss": 6.4756, "step": 770 }, { "epoch": 1.25, "learning_rate": 1.7613072669278342e-06, "loss": 6.3091, "step": 772 }, { "epoch": 1.25, "learning_rate": 1.7599462479886973e-06, "loss": 6.0635, "step": 774 }, { "epoch": 1.25, "learning_rate": 1.7585818888765164e-06, "loss": 6.3993, "step": 776 }, { "epoch": 1.26, "learning_rate": 1.7572141955880251e-06, "loss": 6.5637, "step": 778 }, { "epoch": 1.26, "learning_rate": 1.755843174134612e-06, "loss": 5.9163, "step": 780 }, { "epoch": 1.26, "learning_rate": 1.7544688305422941e-06, "loss": 6.687, "step": 782 }, { "epoch": 1.27, "learning_rate": 1.75309117085169e-06, "loss": 6.4887, "step": 784 }, { "epoch": 1.27, "learning_rate": 1.7517102011179931e-06, "loss": 6.8291, "step": 786 }, { "epoch": 1.27, "learning_rate": 1.7503259274109462e-06, "loss": 6.5768, "step": 788 }, { "epoch": 1.28, "learning_rate": 1.7489383558148135e-06, "loss": 6.4883, "step": 790 }, { "epoch": 1.28, "learning_rate": 1.7475474924283536e-06, "loss": 6.5535, "step": 792 }, { "epoch": 1.28, "learning_rate": 1.7461533433647946e-06, "loss": 6.4518, "step": 794 }, { "epoch": 1.29, "learning_rate": 1.7447559147518054e-06, "loss": 6.5409, "step": 796 }, { "epoch": 1.29, "learning_rate": 1.74335521273147e-06, "loss": 6.6906, "step": 798 }, { "epoch": 1.29, "learning_rate": 1.7419512434602591e-06, "loss": 6.6106, "step": 800 }, { "epoch": 1.3, "learning_rate": 1.7405440131090047e-06, "loss": 6.1888, "step": 802 }, { "epoch": 1.3, "learning_rate": 1.739133527862871e-06, "loss": 6.4989, "step": 804 }, { "epoch": 1.3, "learning_rate": 1.7377197939213298e-06, "loss": 6.6128, "step": 806 }, { "epoch": 1.31, "learning_rate": 1.736302817498131e-06, "loss": 6.5109, "step": 808 }, { "epoch": 1.31, "learning_rate": 1.7348826048212758e-06, "loss": 6.394, "step": 810 }, { "epoch": 1.31, "learning_rate": 1.7334591621329904e-06, "loss": 6.5515, "step": 812 }, { "epoch": 1.32, "learning_rate": 1.7320324956896973e-06, "loss": 6.7792, "step": 814 }, { "epoch": 1.32, "learning_rate": 1.7306026117619888e-06, "loss": 6.4862, "step": 816 }, { "epoch": 1.32, "learning_rate": 1.7291695166345978e-06, "loss": 6.4119, "step": 818 }, { "epoch": 1.33, "learning_rate": 1.7277332166063725e-06, "loss": 6.4111, "step": 820 }, { "epoch": 1.33, "learning_rate": 1.726293717990247e-06, "loss": 5.8316, "step": 822 }, { "epoch": 1.33, "learning_rate": 1.7248510271132141e-06, "loss": 6.5505, "step": 824 }, { "epoch": 1.33, "learning_rate": 1.7234051503162977e-06, "loss": 6.5862, "step": 826 }, { "epoch": 1.34, "learning_rate": 1.7219560939545242e-06, "loss": 6.556, "step": 828 }, { "epoch": 1.34, "learning_rate": 1.7205038643968958e-06, "loss": 6.0874, "step": 830 }, { "epoch": 1.34, "learning_rate": 1.719048468026361e-06, "loss": 6.6705, "step": 832 }, { "epoch": 1.35, "learning_rate": 1.7175899112397878e-06, "loss": 6.604, "step": 834 }, { "epoch": 1.35, "learning_rate": 1.716128200447935e-06, "loss": 6.1485, "step": 836 }, { "epoch": 1.35, "learning_rate": 1.7146633420754237e-06, "loss": 6.4457, "step": 838 }, { "epoch": 1.36, "learning_rate": 1.7131953425607102e-06, "loss": 6.1297, "step": 840 }, { "epoch": 1.36, "learning_rate": 1.7117242083560566e-06, "loss": 6.4114, "step": 842 }, { "epoch": 1.36, "learning_rate": 1.7102499459275025e-06, "loss": 6.1976, "step": 844 }, { "epoch": 1.37, "learning_rate": 1.7087725617548382e-06, "loss": 6.5855, "step": 846 }, { "epoch": 1.37, "learning_rate": 1.7072920623315731e-06, "loss": 6.6856, "step": 848 }, { "epoch": 1.37, "learning_rate": 1.7058084541649103e-06, "loss": 5.9883, "step": 850 }, { "epoch": 1.38, "learning_rate": 1.7043217437757163e-06, "loss": 6.4898, "step": 852 }, { "epoch": 1.38, "learning_rate": 1.7028319376984925e-06, "loss": 6.2324, "step": 854 }, { "epoch": 1.38, "learning_rate": 1.7013390424813467e-06, "loss": 5.9353, "step": 856 }, { "epoch": 1.39, "learning_rate": 1.6998430646859653e-06, "loss": 6.0376, "step": 858 }, { "epoch": 1.39, "learning_rate": 1.698344010887582e-06, "loss": 6.5051, "step": 860 }, { "epoch": 1.39, "learning_rate": 1.6968418876749508e-06, "loss": 6.2818, "step": 862 }, { "epoch": 1.4, "learning_rate": 1.695336701650318e-06, "loss": 6.4458, "step": 864 }, { "epoch": 1.4, "learning_rate": 1.6938284594293894e-06, "loss": 6.0794, "step": 866 }, { "epoch": 1.4, "learning_rate": 1.6923171676413062e-06, "loss": 6.1156, "step": 868 }, { "epoch": 1.41, "learning_rate": 1.690802832928611e-06, "loss": 6.4838, "step": 870 }, { "epoch": 1.41, "learning_rate": 1.689285461947222e-06, "loss": 6.5291, "step": 872 }, { "epoch": 1.41, "learning_rate": 1.6877650613664031e-06, "loss": 6.6217, "step": 874 }, { "epoch": 1.42, "learning_rate": 1.6862416378687337e-06, "loss": 6.3847, "step": 876 }, { "epoch": 1.42, "learning_rate": 1.6847151981500786e-06, "loss": 6.4331, "step": 878 }, { "epoch": 1.42, "learning_rate": 1.6831857489195616e-06, "loss": 6.302, "step": 880 }, { "epoch": 1.43, "learning_rate": 1.6816532968995328e-06, "loss": 6.2486, "step": 882 }, { "epoch": 1.43, "learning_rate": 1.6801178488255411e-06, "loss": 6.2877, "step": 884 }, { "epoch": 1.43, "learning_rate": 1.6785794114463036e-06, "loss": 6.2413, "step": 886 }, { "epoch": 1.44, "learning_rate": 1.6770379915236763e-06, "loss": 6.011, "step": 888 }, { "epoch": 1.44, "learning_rate": 1.6754935958326241e-06, "loss": 5.8661, "step": 890 }, { "epoch": 1.44, "learning_rate": 1.6739462311611915e-06, "loss": 6.2365, "step": 892 }, { "epoch": 1.44, "learning_rate": 1.6723959043104726e-06, "loss": 6.0454, "step": 894 }, { "epoch": 1.45, "learning_rate": 1.67084262209458e-06, "loss": 6.686, "step": 896 }, { "epoch": 1.45, "learning_rate": 1.669286391340618e-06, "loss": 6.2254, "step": 898 }, { "epoch": 1.45, "learning_rate": 1.6677272188886483e-06, "loss": 6.3332, "step": 900 }, { "epoch": 1.46, "learning_rate": 1.666165111591664e-06, "loss": 6.2119, "step": 902 }, { "epoch": 1.46, "learning_rate": 1.6646000763155566e-06, "loss": 6.0778, "step": 904 }, { "epoch": 1.46, "learning_rate": 1.6630321199390867e-06, "loss": 6.3578, "step": 906 }, { "epoch": 1.47, "learning_rate": 1.6614612493538548e-06, "loss": 6.6655, "step": 908 }, { "epoch": 1.47, "learning_rate": 1.6598874714642697e-06, "loss": 6.2806, "step": 910 }, { "epoch": 1.47, "learning_rate": 1.6583107931875188e-06, "loss": 6.1602, "step": 912 }, { "epoch": 1.48, "learning_rate": 1.656731221453537e-06, "loss": 6.1191, "step": 914 }, { "epoch": 1.48, "learning_rate": 1.655148763204977e-06, "loss": 5.7852, "step": 916 }, { "epoch": 1.48, "learning_rate": 1.6535634253971794e-06, "loss": 6.5334, "step": 918 }, { "epoch": 1.49, "learning_rate": 1.6519752149981395e-06, "loss": 6.5009, "step": 920 }, { "epoch": 1.49, "learning_rate": 1.6503841389884796e-06, "loss": 6.5337, "step": 922 }, { "epoch": 1.49, "learning_rate": 1.648790204361417e-06, "loss": 6.3533, "step": 924 }, { "epoch": 1.5, "learning_rate": 1.6471934181227337e-06, "loss": 6.333, "step": 926 }, { "epoch": 1.5, "learning_rate": 1.6455937872907449e-06, "loss": 6.3058, "step": 928 }, { "epoch": 1.5, "learning_rate": 1.6439913188962681e-06, "loss": 6.0227, "step": 930 }, { "epoch": 1.51, "learning_rate": 1.642386019982594e-06, "loss": 6.3999, "step": 932 }, { "epoch": 1.51, "learning_rate": 1.6407778976054526e-06, "loss": 6.3557, "step": 934 }, { "epoch": 1.51, "learning_rate": 1.6391669588329848e-06, "loss": 6.5799, "step": 936 }, { "epoch": 1.52, "learning_rate": 1.6375532107457105e-06, "loss": 5.9422, "step": 938 }, { "epoch": 1.52, "learning_rate": 1.6359366604364971e-06, "loss": 6.6235, "step": 940 }, { "epoch": 1.52, "learning_rate": 1.6343173150105275e-06, "loss": 6.0267, "step": 942 }, { "epoch": 1.53, "learning_rate": 1.6326951815852719e-06, "loss": 6.327, "step": 944 }, { "epoch": 1.53, "learning_rate": 1.6310702672904526e-06, "loss": 6.473, "step": 946 }, { "epoch": 1.53, "learning_rate": 1.6294425792680158e-06, "loss": 6.4169, "step": 948 }, { "epoch": 1.54, "learning_rate": 1.6278121246720987e-06, "loss": 5.8404, "step": 950 }, { "epoch": 1.54, "learning_rate": 1.6261789106689978e-06, "loss": 5.8924, "step": 952 }, { "epoch": 1.54, "learning_rate": 1.624542944437139e-06, "loss": 6.2631, "step": 954 }, { "epoch": 1.55, "learning_rate": 1.622904233167044e-06, "loss": 6.4961, "step": 956 }, { "epoch": 1.55, "learning_rate": 1.6212627840613001e-06, "loss": 6.3894, "step": 958 }, { "epoch": 1.55, "learning_rate": 1.6196186043345285e-06, "loss": 6.5512, "step": 960 }, { "epoch": 1.55, "learning_rate": 1.617971701213352e-06, "loss": 6.1691, "step": 962 }, { "epoch": 1.56, "learning_rate": 1.6163220819363628e-06, "loss": 6.3324, "step": 964 }, { "epoch": 1.56, "learning_rate": 1.6146697537540923e-06, "loss": 6.6786, "step": 966 }, { "epoch": 1.56, "learning_rate": 1.6130147239289777e-06, "loss": 6.5543, "step": 968 }, { "epoch": 1.57, "learning_rate": 1.611356999735331e-06, "loss": 6.3715, "step": 970 }, { "epoch": 1.57, "learning_rate": 1.609696588459307e-06, "loss": 6.5001, "step": 972 }, { "epoch": 1.57, "learning_rate": 1.6080334973988695e-06, "loss": 5.6829, "step": 974 }, { "epoch": 1.58, "learning_rate": 1.6063677338637626e-06, "loss": 6.1467, "step": 976 }, { "epoch": 1.58, "learning_rate": 1.6046993051754754e-06, "loss": 6.1594, "step": 978 }, { "epoch": 1.58, "learning_rate": 1.6030282186672115e-06, "loss": 5.9862, "step": 980 }, { "epoch": 1.59, "learning_rate": 1.6013544816838563e-06, "loss": 6.5831, "step": 982 }, { "epoch": 1.59, "learning_rate": 1.5996781015819447e-06, "loss": 6.3911, "step": 984 }, { "epoch": 1.59, "learning_rate": 1.5979990857296293e-06, "loss": 6.5145, "step": 986 }, { "epoch": 1.6, "learning_rate": 1.5963174415066467e-06, "loss": 6.3745, "step": 988 }, { "epoch": 1.6, "learning_rate": 1.5946331763042866e-06, "loss": 6.4732, "step": 990 }, { "epoch": 1.6, "learning_rate": 1.5929462975253586e-06, "loss": 6.4846, "step": 992 }, { "epoch": 1.61, "learning_rate": 1.5912568125841588e-06, "loss": 5.7701, "step": 994 }, { "epoch": 1.61, "learning_rate": 1.5895647289064393e-06, "loss": 6.1163, "step": 996 }, { "epoch": 1.61, "learning_rate": 1.5878700539293738e-06, "loss": 6.1529, "step": 998 }, { "epoch": 1.62, "learning_rate": 1.5861727951015258e-06, "loss": 6.2204, "step": 1000 }, { "epoch": 1.62, "learning_rate": 1.5844729598828149e-06, "loss": 6.5347, "step": 1002 }, { "epoch": 1.62, "learning_rate": 1.5827705557444851e-06, "loss": 6.0563, "step": 1004 }, { "epoch": 1.63, "learning_rate": 1.5810655901690713e-06, "loss": 6.4918, "step": 1006 }, { "epoch": 1.63, "learning_rate": 1.579358070650367e-06, "loss": 6.1495, "step": 1008 }, { "epoch": 1.63, "learning_rate": 1.5776480046933905e-06, "loss": 6.4362, "step": 1010 }, { "epoch": 1.64, "learning_rate": 1.5759353998143525e-06, "loss": 5.8431, "step": 1012 }, { "epoch": 1.64, "learning_rate": 1.5742202635406235e-06, "loss": 5.7574, "step": 1014 }, { "epoch": 1.64, "learning_rate": 1.5725026034106996e-06, "loss": 6.4673, "step": 1016 }, { "epoch": 1.65, "learning_rate": 1.57078242697417e-06, "loss": 6.2053, "step": 1018 }, { "epoch": 1.65, "learning_rate": 1.5690597417916837e-06, "loss": 6.4113, "step": 1020 }, { "epoch": 1.65, "learning_rate": 1.5673345554349168e-06, "loss": 6.1851, "step": 1022 }, { "epoch": 1.65, "learning_rate": 1.5656068754865386e-06, "loss": 6.2666, "step": 1024 }, { "epoch": 1.66, "learning_rate": 1.5638767095401778e-06, "loss": 6.4074, "step": 1026 }, { "epoch": 1.66, "learning_rate": 1.5621440652003905e-06, "loss": 6.0503, "step": 1028 }, { "epoch": 1.66, "learning_rate": 1.5604089500826257e-06, "loss": 6.428, "step": 1030 }, { "epoch": 1.67, "learning_rate": 1.558671371813192e-06, "loss": 6.1145, "step": 1032 }, { "epoch": 1.67, "learning_rate": 1.5569313380292246e-06, "loss": 6.4294, "step": 1034 }, { "epoch": 1.67, "learning_rate": 1.5551888563786512e-06, "loss": 6.4701, "step": 1036 }, { "epoch": 1.68, "learning_rate": 1.5534439345201586e-06, "loss": 6.3885, "step": 1038 }, { "epoch": 1.68, "learning_rate": 1.5516965801231586e-06, "loss": 6.2604, "step": 1040 }, { "epoch": 1.68, "learning_rate": 1.5499468008677549e-06, "loss": 5.6512, "step": 1042 }, { "epoch": 1.69, "learning_rate": 1.5481946044447098e-06, "loss": 6.3669, "step": 1044 }, { "epoch": 1.69, "learning_rate": 1.5464399985554088e-06, "loss": 6.3416, "step": 1046 }, { "epoch": 1.69, "learning_rate": 1.5446829909118275e-06, "loss": 6.5978, "step": 1048 }, { "epoch": 1.7, "learning_rate": 1.5429235892364994e-06, "loss": 6.3573, "step": 1050 }, { "epoch": 1.7, "learning_rate": 1.5411618012624786e-06, "loss": 5.7089, "step": 1052 }, { "epoch": 1.7, "learning_rate": 1.5393976347333085e-06, "loss": 5.6281, "step": 1054 }, { "epoch": 1.71, "learning_rate": 1.537631097402987e-06, "loss": 6.2042, "step": 1056 }, { "epoch": 1.71, "learning_rate": 1.5358621970359324e-06, "loss": 6.112, "step": 1058 }, { "epoch": 1.71, "learning_rate": 1.5340909414069486e-06, "loss": 6.4812, "step": 1060 }, { "epoch": 1.72, "learning_rate": 1.5323173383011921e-06, "loss": 6.0084, "step": 1062 }, { "epoch": 1.72, "learning_rate": 1.5305413955141364e-06, "loss": 6.715, "step": 1064 }, { "epoch": 1.72, "learning_rate": 1.5287631208515404e-06, "loss": 6.163, "step": 1066 }, { "epoch": 1.73, "learning_rate": 1.5269825221294098e-06, "loss": 5.9242, "step": 1068 }, { "epoch": 1.73, "learning_rate": 1.5251996071739664e-06, "loss": 6.2658, "step": 1070 }, { "epoch": 1.73, "learning_rate": 1.523414383821613e-06, "loss": 6.4945, "step": 1072 }, { "epoch": 1.74, "learning_rate": 1.5216268599188976e-06, "loss": 6.1643, "step": 1074 }, { "epoch": 1.74, "learning_rate": 1.5198370433224805e-06, "loss": 6.0366, "step": 1076 }, { "epoch": 1.74, "learning_rate": 1.5180449418990975e-06, "loss": 6.4008, "step": 1078 }, { "epoch": 1.75, "learning_rate": 1.5162505635255288e-06, "loss": 6.2841, "step": 1080 }, { "epoch": 1.75, "learning_rate": 1.514453916088561e-06, "loss": 6.1791, "step": 1082 }, { "epoch": 1.75, "learning_rate": 1.5126550074849548e-06, "loss": 6.6, "step": 1084 }, { "epoch": 1.76, "learning_rate": 1.5108538456214085e-06, "loss": 6.4791, "step": 1086 }, { "epoch": 1.76, "learning_rate": 1.5090504384145249e-06, "loss": 5.9182, "step": 1088 }, { "epoch": 1.76, "learning_rate": 1.507244793790775e-06, "loss": 6.0225, "step": 1090 }, { "epoch": 1.76, "learning_rate": 1.5054369196864643e-06, "loss": 6.2072, "step": 1092 }, { "epoch": 1.77, "learning_rate": 1.5036268240476977e-06, "loss": 6.5093, "step": 1094 }, { "epoch": 1.77, "learning_rate": 1.5018145148303435e-06, "loss": 6.098, "step": 1096 }, { "epoch": 1.77, "learning_rate": 1.5e-06, "loss": 5.8986, "step": 1098 }, { "epoch": 1.78, "learning_rate": 1.4981832875319595e-06, "loss": 6.381, "step": 1100 }, { "epoch": 1.78, "learning_rate": 1.4963643854111738e-06, "loss": 6.0649, "step": 1102 }, { "epoch": 1.78, "learning_rate": 1.494543301632219e-06, "loss": 5.5922, "step": 1104 }, { "epoch": 1.79, "learning_rate": 1.4927200441992588e-06, "loss": 6.24, "step": 1106 }, { "epoch": 1.79, "learning_rate": 1.4908946211260123e-06, "loss": 5.5124, "step": 1108 }, { "epoch": 1.79, "learning_rate": 1.4890670404357169e-06, "loss": 6.4337, "step": 1110 }, { "epoch": 1.8, "learning_rate": 1.4872373101610927e-06, "loss": 6.3064, "step": 1112 }, { "epoch": 1.8, "learning_rate": 1.485405438344308e-06, "loss": 6.1087, "step": 1114 }, { "epoch": 1.8, "learning_rate": 1.4835714330369445e-06, "loss": 6.2753, "step": 1116 }, { "epoch": 1.81, "learning_rate": 1.4817353022999599e-06, "loss": 5.8132, "step": 1118 }, { "epoch": 1.81, "learning_rate": 1.4798970542036548e-06, "loss": 6.4297, "step": 1120 }, { "epoch": 1.81, "learning_rate": 1.478056696827636e-06, "loss": 6.4729, "step": 1122 }, { "epoch": 1.82, "learning_rate": 1.4762142382607808e-06, "loss": 6.0052, "step": 1124 }, { "epoch": 1.82, "learning_rate": 1.474369686601202e-06, "loss": 5.8651, "step": 1126 }, { "epoch": 1.82, "learning_rate": 1.4725230499562118e-06, "loss": 6.3222, "step": 1128 }, { "epoch": 1.83, "learning_rate": 1.4706743364422876e-06, "loss": 5.9397, "step": 1130 }, { "epoch": 1.83, "learning_rate": 1.4688235541850336e-06, "loss": 6.3193, "step": 1132 }, { "epoch": 1.83, "learning_rate": 1.466970711319148e-06, "loss": 6.3106, "step": 1134 }, { "epoch": 1.84, "learning_rate": 1.4651158159883854e-06, "loss": 6.2976, "step": 1136 }, { "epoch": 1.84, "learning_rate": 1.4632588763455212e-06, "loss": 6.1376, "step": 1138 }, { "epoch": 1.84, "learning_rate": 1.4613999005523173e-06, "loss": 5.8214, "step": 1140 }, { "epoch": 1.85, "learning_rate": 1.4595388967794836e-06, "loss": 6.134, "step": 1142 }, { "epoch": 1.85, "learning_rate": 1.4576758732066441e-06, "loss": 6.3464, "step": 1144 }, { "epoch": 1.85, "learning_rate": 1.4558108380223011e-06, "loss": 6.193, "step": 1146 }, { "epoch": 1.86, "learning_rate": 1.4539437994237976e-06, "loss": 6.3255, "step": 1148 }, { "epoch": 1.86, "learning_rate": 1.4520747656172823e-06, "loss": 6.3425, "step": 1150 }, { "epoch": 1.86, "learning_rate": 1.4502037448176732e-06, "loss": 6.1904, "step": 1152 }, { "epoch": 1.87, "learning_rate": 1.4483307452486226e-06, "loss": 6.2422, "step": 1154 }, { "epoch": 1.87, "learning_rate": 1.446455775142479e-06, "loss": 6.2154, "step": 1156 }, { "epoch": 1.87, "learning_rate": 1.4445788427402526e-06, "loss": 6.0472, "step": 1158 }, { "epoch": 1.87, "learning_rate": 1.442699956291578e-06, "loss": 6.1858, "step": 1160 }, { "epoch": 1.88, "learning_rate": 1.4408191240546787e-06, "loss": 5.3012, "step": 1162 }, { "epoch": 1.88, "learning_rate": 1.4389363542963305e-06, "loss": 6.4031, "step": 1164 }, { "epoch": 1.88, "learning_rate": 1.437051655291825e-06, "loss": 6.4019, "step": 1166 }, { "epoch": 1.89, "learning_rate": 1.4351650353249328e-06, "loss": 5.6293, "step": 1168 }, { "epoch": 1.89, "learning_rate": 1.4332765026878687e-06, "loss": 6.4613, "step": 1170 }, { "epoch": 1.89, "learning_rate": 1.4313860656812535e-06, "loss": 6.2912, "step": 1172 }, { "epoch": 1.9, "learning_rate": 1.4294937326140785e-06, "loss": 6.2582, "step": 1174 }, { "epoch": 1.9, "learning_rate": 1.4275995118036692e-06, "loss": 6.5683, "step": 1176 }, { "epoch": 1.9, "learning_rate": 1.425703411575647e-06, "loss": 6.2619, "step": 1178 }, { "epoch": 1.91, "learning_rate": 1.4238054402638949e-06, "loss": 6.0469, "step": 1180 }, { "epoch": 1.91, "learning_rate": 1.4219056062105193e-06, "loss": 6.4802, "step": 1182 }, { "epoch": 1.91, "learning_rate": 1.4200039177658142e-06, "loss": 6.3938, "step": 1184 }, { "epoch": 1.92, "learning_rate": 1.4181003832882246e-06, "loss": 6.073, "step": 1186 }, { "epoch": 1.92, "learning_rate": 1.4161950111443074e-06, "loss": 6.2202, "step": 1188 }, { "epoch": 1.92, "learning_rate": 1.4142878097086994e-06, "loss": 6.0532, "step": 1190 }, { "epoch": 1.93, "learning_rate": 1.4123787873640751e-06, "loss": 5.8988, "step": 1192 }, { "epoch": 1.93, "learning_rate": 1.4104679525011139e-06, "loss": 6.513, "step": 1194 }, { "epoch": 1.93, "learning_rate": 1.408555313518461e-06, "loss": 6.1953, "step": 1196 }, { "epoch": 1.94, "learning_rate": 1.4066408788226918e-06, "loss": 6.177, "step": 1198 }, { "epoch": 1.94, "learning_rate": 1.4047246568282737e-06, "loss": 6.1422, "step": 1200 }, { "epoch": 1.94, "learning_rate": 1.40280665595753e-06, "loss": 6.4582, "step": 1202 }, { "epoch": 1.95, "learning_rate": 1.400886884640603e-06, "loss": 6.086, "step": 1204 }, { "epoch": 1.95, "learning_rate": 1.3989653513154163e-06, "loss": 6.3604, "step": 1206 }, { "epoch": 1.95, "learning_rate": 1.3970420644276382e-06, "loss": 6.3485, "step": 1208 }, { "epoch": 1.96, "learning_rate": 1.3951170324306434e-06, "loss": 5.7222, "step": 1210 }, { "epoch": 1.96, "learning_rate": 1.3931902637854788e-06, "loss": 5.0449, "step": 1212 }, { "epoch": 1.96, "learning_rate": 1.3912617669608229e-06, "loss": 6.1346, "step": 1214 }, { "epoch": 1.97, "learning_rate": 1.3893315504329497e-06, "loss": 6.0923, "step": 1216 }, { "epoch": 1.97, "learning_rate": 1.387399622685693e-06, "loss": 6.1759, "step": 1218 }, { "epoch": 1.97, "learning_rate": 1.385465992210407e-06, "loss": 6.4627, "step": 1220 }, { "epoch": 1.97, "learning_rate": 1.3835306675059308e-06, "loss": 5.8395, "step": 1222 }, { "epoch": 1.98, "learning_rate": 1.3815936570785485e-06, "loss": 6.0278, "step": 1224 }, { "epoch": 1.98, "learning_rate": 1.3796549694419548e-06, "loss": 6.3969, "step": 1226 }, { "epoch": 1.98, "learning_rate": 1.377714613117216e-06, "loss": 6.5435, "step": 1228 }, { "epoch": 1.99, "learning_rate": 1.375772596632732e-06, "loss": 6.0634, "step": 1230 }, { "epoch": 1.99, "learning_rate": 1.3738289285242012e-06, "loss": 5.8032, "step": 1232 }, { "epoch": 1.99, "learning_rate": 1.3718836173345783e-06, "loss": 6.3641, "step": 1234 }, { "epoch": 2.0, "learning_rate": 1.3699366716140433e-06, "loss": 6.4425, "step": 1236 }, { "epoch": 2.0, "learning_rate": 1.367988099919958e-06, "loss": 5.8852, "step": 1238 }, { "epoch": 2.0, "learning_rate": 1.3660379108168322e-06, "loss": 6.2662, "step": 1240 }, { "epoch": 2.01, "learning_rate": 1.3640861128762838e-06, "loss": 6.3743, "step": 1242 }, { "epoch": 2.01, "learning_rate": 1.3621327146770022e-06, "loss": 6.2544, "step": 1244 }, { "epoch": 2.01, "learning_rate": 1.3601777248047103e-06, "loss": 6.1536, "step": 1246 }, { "epoch": 2.02, "learning_rate": 1.3582211518521272e-06, "loss": 6.2739, "step": 1248 }, { "epoch": 2.02, "learning_rate": 1.3562630044189303e-06, "loss": 5.7099, "step": 1250 }, { "epoch": 2.02, "learning_rate": 1.354303291111716e-06, "loss": 6.1313, "step": 1252 }, { "epoch": 2.03, "learning_rate": 1.3523420205439645e-06, "loss": 6.2576, "step": 1254 }, { "epoch": 2.03, "learning_rate": 1.3503792013359996e-06, "loss": 5.8142, "step": 1256 }, { "epoch": 2.03, "learning_rate": 1.3484148421149527e-06, "loss": 6.1916, "step": 1258 }, { "epoch": 2.04, "learning_rate": 1.3464489515147237e-06, "loss": 5.6573, "step": 1260 }, { "epoch": 2.04, "learning_rate": 1.3444815381759425e-06, "loss": 6.4434, "step": 1262 }, { "epoch": 2.04, "learning_rate": 1.342512610745933e-06, "loss": 6.3341, "step": 1264 }, { "epoch": 2.05, "learning_rate": 1.3405421778786736e-06, "loss": 5.9855, "step": 1266 }, { "epoch": 2.05, "learning_rate": 1.3385702482347593e-06, "loss": 5.7618, "step": 1268 }, { "epoch": 2.05, "learning_rate": 1.3365968304813637e-06, "loss": 6.0913, "step": 1270 }, { "epoch": 2.06, "learning_rate": 1.3346219332922015e-06, "loss": 5.9369, "step": 1272 }, { "epoch": 2.06, "learning_rate": 1.3326455653474895e-06, "loss": 6.0815, "step": 1274 }, { "epoch": 2.06, "learning_rate": 1.3306677353339095e-06, "loss": 6.1302, "step": 1276 }, { "epoch": 2.07, "learning_rate": 1.328688451944569e-06, "loss": 6.2322, "step": 1278 }, { "epoch": 2.07, "learning_rate": 1.3267077238789632e-06, "loss": 6.0083, "step": 1280 }, { "epoch": 2.07, "learning_rate": 1.3247255598429376e-06, "loss": 6.3264, "step": 1282 }, { "epoch": 2.08, "learning_rate": 1.3227419685486491e-06, "loss": 5.502, "step": 1284 }, { "epoch": 2.08, "learning_rate": 1.320756958714528e-06, "loss": 6.0936, "step": 1286 }, { "epoch": 2.08, "learning_rate": 1.3187705390652388e-06, "loss": 6.2401, "step": 1288 }, { "epoch": 2.08, "learning_rate": 1.3167827183316429e-06, "loss": 6.3647, "step": 1290 }, { "epoch": 2.09, "learning_rate": 1.3147935052507598e-06, "loss": 6.2787, "step": 1292 }, { "epoch": 2.09, "learning_rate": 1.3128029085657288e-06, "loss": 5.6309, "step": 1294 }, { "epoch": 2.09, "learning_rate": 1.3108109370257712e-06, "loss": 5.9382, "step": 1296 }, { "epoch": 2.1, "learning_rate": 1.30881759938615e-06, "loss": 5.8292, "step": 1298 }, { "epoch": 2.1, "learning_rate": 1.3068229044081322e-06, "loss": 6.2961, "step": 1300 }, { "epoch": 2.1, "learning_rate": 1.3048268608589533e-06, "loss": 6.4635, "step": 1302 }, { "epoch": 2.11, "learning_rate": 1.302829477511773e-06, "loss": 6.06, "step": 1304 }, { "epoch": 2.11, "learning_rate": 1.3008307631456418e-06, "loss": 5.8465, "step": 1306 }, { "epoch": 2.11, "learning_rate": 1.2988307265454596e-06, "loss": 5.9795, "step": 1308 }, { "epoch": 2.12, "learning_rate": 1.2968293765019382e-06, "loss": 6.2658, "step": 1310 }, { "epoch": 2.12, "learning_rate": 1.2948267218115623e-06, "loss": 6.1564, "step": 1312 }, { "epoch": 2.12, "learning_rate": 1.2928227712765502e-06, "loss": 6.4266, "step": 1314 }, { "epoch": 2.13, "learning_rate": 1.2908175337048173e-06, "loss": 5.7499, "step": 1316 }, { "epoch": 2.13, "learning_rate": 1.288811017909934e-06, "loss": 6.2519, "step": 1318 }, { "epoch": 2.13, "learning_rate": 1.2868032327110903e-06, "loss": 5.8981, "step": 1320 }, { "epoch": 2.14, "learning_rate": 1.284794186933055e-06, "loss": 6.3804, "step": 1322 }, { "epoch": 2.14, "learning_rate": 1.2827838894061376e-06, "loss": 5.8079, "step": 1324 }, { "epoch": 2.14, "learning_rate": 1.2807723489661494e-06, "loss": 6.4474, "step": 1326 }, { "epoch": 2.15, "learning_rate": 1.2787595744543644e-06, "loss": 6.367, "step": 1328 }, { "epoch": 2.15, "learning_rate": 1.2767455747174809e-06, "loss": 6.1068, "step": 1330 }, { "epoch": 2.15, "learning_rate": 1.2747303586075831e-06, "loss": 5.7674, "step": 1332 }, { "epoch": 2.16, "learning_rate": 1.2727139349820996e-06, "loss": 6.1313, "step": 1334 }, { "epoch": 2.16, "learning_rate": 1.2706963127037683e-06, "loss": 5.6983, "step": 1336 }, { "epoch": 2.16, "learning_rate": 1.2686775006405944e-06, "loss": 6.3627, "step": 1338 }, { "epoch": 2.17, "learning_rate": 1.2666575076658132e-06, "loss": 5.8205, "step": 1340 }, { "epoch": 2.17, "learning_rate": 1.2646363426578503e-06, "loss": 6.2026, "step": 1342 }, { "epoch": 2.17, "learning_rate": 1.262614014500282e-06, "loss": 6.375, "step": 1344 }, { "epoch": 2.18, "learning_rate": 1.2605905320817976e-06, "loss": 6.4086, "step": 1346 }, { "epoch": 2.18, "learning_rate": 1.2585659042961597e-06, "loss": 6.1262, "step": 1348 }, { "epoch": 2.18, "learning_rate": 1.2565401400421652e-06, "loss": 5.8055, "step": 1350 }, { "epoch": 2.19, "learning_rate": 1.2545132482236055e-06, "loss": 6.595, "step": 1352 }, { "epoch": 2.19, "learning_rate": 1.2524852377492284e-06, "loss": 5.9699, "step": 1354 }, { "epoch": 2.19, "learning_rate": 1.2504561175326985e-06, "loss": 5.7263, "step": 1356 }, { "epoch": 2.19, "learning_rate": 1.248425896492558e-06, "loss": 6.1658, "step": 1358 }, { "epoch": 2.2, "learning_rate": 1.2463945835521875e-06, "loss": 6.5257, "step": 1360 }, { "epoch": 2.2, "learning_rate": 1.244362187639767e-06, "loss": 6.1039, "step": 1362 }, { "epoch": 2.2, "learning_rate": 1.2423287176882357e-06, "loss": 6.1913, "step": 1364 }, { "epoch": 2.21, "learning_rate": 1.2402941826352545e-06, "loss": 5.79, "step": 1366 }, { "epoch": 2.21, "learning_rate": 1.2382585914231648e-06, "loss": 6.0318, "step": 1368 }, { "epoch": 2.21, "learning_rate": 1.2362219529989512e-06, "loss": 6.3702, "step": 1370 }, { "epoch": 2.22, "learning_rate": 1.2341842763142002e-06, "loss": 5.7685, "step": 1372 }, { "epoch": 2.22, "learning_rate": 1.2321455703250613e-06, "loss": 6.1761, "step": 1374 }, { "epoch": 2.22, "learning_rate": 1.2301058439922102e-06, "loss": 5.8961, "step": 1376 }, { "epoch": 2.23, "learning_rate": 1.2280651062808045e-06, "loss": 6.2063, "step": 1378 }, { "epoch": 2.23, "learning_rate": 1.226023366160449e-06, "loss": 5.9086, "step": 1380 }, { "epoch": 2.23, "learning_rate": 1.223980632605154e-06, "loss": 6.1152, "step": 1382 }, { "epoch": 2.24, "learning_rate": 1.2219369145932955e-06, "loss": 6.1352, "step": 1384 }, { "epoch": 2.24, "learning_rate": 1.2198922211075776e-06, "loss": 6.1588, "step": 1386 }, { "epoch": 2.24, "learning_rate": 1.217846561134991e-06, "loss": 6.2379, "step": 1388 }, { "epoch": 2.25, "learning_rate": 1.2157999436667746e-06, "loss": 6.0753, "step": 1390 }, { "epoch": 2.25, "learning_rate": 1.2137523776983756e-06, "loss": 6.3966, "step": 1392 }, { "epoch": 2.25, "learning_rate": 1.2117038722294109e-06, "loss": 5.8844, "step": 1394 }, { "epoch": 2.26, "learning_rate": 1.2096544362636254e-06, "loss": 6.2611, "step": 1396 }, { "epoch": 2.26, "learning_rate": 1.2076040788088553e-06, "loss": 6.1557, "step": 1398 }, { "epoch": 2.26, "learning_rate": 1.205552808876986e-06, "loss": 6.2408, "step": 1400 }, { "epoch": 2.27, "learning_rate": 1.2035006354839132e-06, "loss": 5.8395, "step": 1402 }, { "epoch": 2.27, "learning_rate": 1.201447567649505e-06, "loss": 5.8175, "step": 1404 }, { "epoch": 2.27, "learning_rate": 1.1993936143975598e-06, "loss": 5.4642, "step": 1406 }, { "epoch": 2.28, "learning_rate": 1.1973387847557675e-06, "loss": 5.88, "step": 1408 }, { "epoch": 2.28, "learning_rate": 1.1952830877556698e-06, "loss": 6.2455, "step": 1410 }, { "epoch": 2.28, "learning_rate": 1.193226532432622e-06, "loss": 6.1702, "step": 1412 }, { "epoch": 2.29, "learning_rate": 1.1911691278257509e-06, "loss": 6.2456, "step": 1414 }, { "epoch": 2.29, "learning_rate": 1.1891108829779163e-06, "loss": 6.2208, "step": 1416 }, { "epoch": 2.29, "learning_rate": 1.1870518069356708e-06, "loss": 6.455, "step": 1418 }, { "epoch": 2.29, "learning_rate": 1.1849919087492211e-06, "loss": 6.2341, "step": 1420 }, { "epoch": 2.3, "learning_rate": 1.1829311974723866e-06, "loss": 6.2769, "step": 1422 }, { "epoch": 2.3, "learning_rate": 1.1808696821625612e-06, "loss": 5.7287, "step": 1424 }, { "epoch": 2.3, "learning_rate": 1.1788073718806724e-06, "loss": 5.9381, "step": 1426 }, { "epoch": 2.31, "learning_rate": 1.1767442756911417e-06, "loss": 5.663, "step": 1428 }, { "epoch": 2.31, "learning_rate": 1.174680402661845e-06, "loss": 6.0789, "step": 1430 }, { "epoch": 2.31, "learning_rate": 1.1726157618640726e-06, "loss": 6.2935, "step": 1432 }, { "epoch": 2.32, "learning_rate": 1.1705503623724897e-06, "loss": 6.5166, "step": 1434 }, { "epoch": 2.32, "learning_rate": 1.1684842132650956e-06, "loss": 5.4518, "step": 1436 }, { "epoch": 2.32, "learning_rate": 1.1664173236231846e-06, "loss": 6.2656, "step": 1438 }, { "epoch": 2.33, "learning_rate": 1.164349702531306e-06, "loss": 5.5944, "step": 1440 }, { "epoch": 2.33, "learning_rate": 1.1622813590772243e-06, "loss": 6.3351, "step": 1442 }, { "epoch": 2.33, "learning_rate": 1.1602123023518777e-06, "loss": 6.4678, "step": 1444 }, { "epoch": 2.34, "learning_rate": 1.1581425414493408e-06, "loss": 5.9154, "step": 1446 }, { "epoch": 2.34, "learning_rate": 1.156072085466783e-06, "loss": 6.3876, "step": 1448 }, { "epoch": 2.34, "learning_rate": 1.154000943504428e-06, "loss": 6.0515, "step": 1450 }, { "epoch": 2.35, "learning_rate": 1.1519291246655158e-06, "loss": 6.2604, "step": 1452 }, { "epoch": 2.35, "learning_rate": 1.14985663805626e-06, "loss": 5.7749, "step": 1454 }, { "epoch": 2.35, "learning_rate": 1.1477834927858103e-06, "loss": 6.0236, "step": 1456 }, { "epoch": 2.36, "learning_rate": 1.1457096979662113e-06, "loss": 6.4244, "step": 1458 }, { "epoch": 2.36, "learning_rate": 1.1436352627123623e-06, "loss": 6.2735, "step": 1460 }, { "epoch": 2.36, "learning_rate": 1.1415601961419774e-06, "loss": 6.3662, "step": 1462 }, { "epoch": 2.37, "learning_rate": 1.1394845073755455e-06, "loss": 6.2688, "step": 1464 }, { "epoch": 2.37, "learning_rate": 1.1374082055362908e-06, "loss": 5.4211, "step": 1466 }, { "epoch": 2.37, "learning_rate": 1.1353312997501312e-06, "loss": 5.9967, "step": 1468 }, { "epoch": 2.38, "learning_rate": 1.1332537991456397e-06, "loss": 6.2811, "step": 1470 }, { "epoch": 2.38, "learning_rate": 1.1311757128540039e-06, "loss": 6.0461, "step": 1472 }, { "epoch": 2.38, "learning_rate": 1.129097050008985e-06, "loss": 6.3685, "step": 1474 }, { "epoch": 2.39, "learning_rate": 1.1270178197468786e-06, "loss": 5.7119, "step": 1476 }, { "epoch": 2.39, "learning_rate": 1.1249380312064749e-06, "loss": 5.9253, "step": 1478 }, { "epoch": 2.39, "learning_rate": 1.1228576935290168e-06, "loss": 6.2371, "step": 1480 }, { "epoch": 2.4, "learning_rate": 1.1207768158581613e-06, "loss": 6.2198, "step": 1482 }, { "epoch": 2.4, "learning_rate": 1.1186954073399386e-06, "loss": 5.9667, "step": 1484 }, { "epoch": 2.4, "learning_rate": 1.116613477122713e-06, "loss": 5.8026, "step": 1486 }, { "epoch": 2.4, "learning_rate": 1.114531034357141e-06, "loss": 5.9698, "step": 1488 }, { "epoch": 2.41, "learning_rate": 1.1124480881961318e-06, "loss": 6.2043, "step": 1490 }, { "epoch": 2.41, "learning_rate": 1.110364647794807e-06, "loss": 5.9397, "step": 1492 }, { "epoch": 2.41, "learning_rate": 1.1082807223104617e-06, "loss": 6.0089, "step": 1494 }, { "epoch": 2.42, "learning_rate": 1.1061963209025222e-06, "loss": 5.7651, "step": 1496 }, { "epoch": 2.42, "learning_rate": 1.1041114527325064e-06, "loss": 6.2575, "step": 1498 }, { "epoch": 2.42, "learning_rate": 1.1020261269639842e-06, "loss": 6.3937, "step": 1500 }, { "epoch": 2.43, "learning_rate": 1.0999403527625366e-06, "loss": 6.2106, "step": 1502 }, { "epoch": 2.43, "learning_rate": 1.0978541392957154e-06, "loss": 5.7234, "step": 1504 }, { "epoch": 2.43, "learning_rate": 1.095767495733004e-06, "loss": 6.2505, "step": 1506 }, { "epoch": 2.44, "learning_rate": 1.0936804312457747e-06, "loss": 5.6241, "step": 1508 }, { "epoch": 2.44, "learning_rate": 1.0915929550072515e-06, "loss": 5.4477, "step": 1510 }, { "epoch": 2.44, "learning_rate": 1.0895050761924667e-06, "loss": 6.3568, "step": 1512 }, { "epoch": 2.45, "learning_rate": 1.0874168039782225e-06, "loss": 6.2436, "step": 1514 }, { "epoch": 2.45, "learning_rate": 1.0853281475430516e-06, "loss": 6.265, "step": 1516 }, { "epoch": 2.45, "learning_rate": 1.0832391160671728e-06, "loss": 5.6497, "step": 1518 }, { "epoch": 2.46, "learning_rate": 1.0811497187324555e-06, "loss": 5.7196, "step": 1520 }, { "epoch": 2.46, "learning_rate": 1.0790599647223762e-06, "loss": 6.0961, "step": 1522 }, { "epoch": 2.46, "learning_rate": 1.0769698632219792e-06, "loss": 5.7752, "step": 1524 }, { "epoch": 2.47, "learning_rate": 1.074879423417837e-06, "loss": 5.9801, "step": 1526 }, { "epoch": 2.47, "learning_rate": 1.0727886544980067e-06, "loss": 5.9149, "step": 1528 }, { "epoch": 2.47, "learning_rate": 1.0706975656519944e-06, "loss": 6.2367, "step": 1530 }, { "epoch": 2.48, "learning_rate": 1.068606166070712e-06, "loss": 6.3122, "step": 1532 }, { "epoch": 2.48, "learning_rate": 1.0665144649464355e-06, "loss": 6.1117, "step": 1534 }, { "epoch": 2.48, "learning_rate": 1.064422471472768e-06, "loss": 6.0872, "step": 1536 }, { "epoch": 2.49, "learning_rate": 1.0623301948445971e-06, "loss": 6.1226, "step": 1538 }, { "epoch": 2.49, "learning_rate": 1.0602376442580543e-06, "loss": 5.5424, "step": 1540 }, { "epoch": 2.49, "learning_rate": 1.0581448289104758e-06, "loss": 5.9785, "step": 1542 }, { "epoch": 2.5, "learning_rate": 1.0560517580003615e-06, "loss": 6.0885, "step": 1544 }, { "epoch": 2.5, "learning_rate": 1.0539584407273347e-06, "loss": 6.1934, "step": 1546 }, { "epoch": 2.5, "learning_rate": 1.0518648862921012e-06, "loss": 6.1035, "step": 1548 }, { "epoch": 2.51, "learning_rate": 1.0497711038964086e-06, "loss": 5.9217, "step": 1550 }, { "epoch": 2.51, "learning_rate": 1.0476771027430085e-06, "loss": 6.2234, "step": 1552 }, { "epoch": 2.51, "learning_rate": 1.0455828920356114e-06, "loss": 6.2327, "step": 1554 }, { "epoch": 2.51, "learning_rate": 1.0434884809788507e-06, "loss": 6.4939, "step": 1556 }, { "epoch": 2.52, "learning_rate": 1.0413938787782392e-06, "loss": 5.9045, "step": 1558 }, { "epoch": 2.52, "learning_rate": 1.0392990946401312e-06, "loss": 6.3292, "step": 1560 }, { "epoch": 2.52, "learning_rate": 1.03720413777168e-06, "loss": 6.0232, "step": 1562 }, { "epoch": 2.53, "learning_rate": 1.0351090173807968e-06, "loss": 6.2403, "step": 1564 }, { "epoch": 2.53, "learning_rate": 1.0330137426761133e-06, "loss": 6.1885, "step": 1566 }, { "epoch": 2.53, "learning_rate": 1.0309183228669396e-06, "loss": 5.8844, "step": 1568 }, { "epoch": 2.54, "learning_rate": 1.0288227671632219e-06, "loss": 6.4348, "step": 1570 }, { "epoch": 2.54, "learning_rate": 1.0267270847755047e-06, "loss": 6.5083, "step": 1572 }, { "epoch": 2.54, "learning_rate": 1.0246312849148897e-06, "loss": 6.0011, "step": 1574 }, { "epoch": 2.55, "learning_rate": 1.0225353767929943e-06, "loss": 5.9635, "step": 1576 }, { "epoch": 2.55, "learning_rate": 1.0204393696219115e-06, "loss": 6.3792, "step": 1578 }, { "epoch": 2.55, "learning_rate": 1.0183432726141706e-06, "loss": 6.174, "step": 1580 }, { "epoch": 2.56, "learning_rate": 1.0162470949826946e-06, "loss": 6.2007, "step": 1582 }, { "epoch": 2.56, "learning_rate": 1.014150845940762e-06, "loss": 5.8667, "step": 1584 }, { "epoch": 2.56, "learning_rate": 1.0120545347019646e-06, "loss": 6.1047, "step": 1586 }, { "epoch": 2.57, "learning_rate": 1.0099581704801673e-06, "loss": 6.2524, "step": 1588 }, { "epoch": 2.57, "learning_rate": 1.0078617624894683e-06, "loss": 5.9776, "step": 1590 }, { "epoch": 2.57, "learning_rate": 1.005765319944158e-06, "loss": 6.3895, "step": 1592 }, { "epoch": 2.58, "learning_rate": 1.0036688520586787e-06, "loss": 6.2257, "step": 1594 }, { "epoch": 2.58, "learning_rate": 1.0015723680475844e-06, "loss": 6.3385, "step": 1596 }, { "epoch": 2.58, "learning_rate": 9.994758771254996e-07, "loss": 6.0314, "step": 1598 }, { "epoch": 2.59, "learning_rate": 9.973793885070792e-07, "loss": 6.1063, "step": 1600 }, { "epoch": 2.59, "learning_rate": 9.952829114069678e-07, "loss": 6.002, "step": 1602 }, { "epoch": 2.59, "learning_rate": 9.9318645503976e-07, "loss": 5.6794, "step": 1604 }, { "epoch": 2.6, "learning_rate": 9.910900286199586e-07, "loss": 6.2951, "step": 1606 }, { "epoch": 2.6, "learning_rate": 9.889936413619356e-07, "loss": 5.8184, "step": 1608 }, { "epoch": 2.6, "learning_rate": 9.868973024798895e-07, "loss": 5.4467, "step": 1610 }, { "epoch": 2.61, "learning_rate": 9.848010211878072e-07, "loss": 6.0832, "step": 1612 }, { "epoch": 2.61, "learning_rate": 9.827048066994224e-07, "loss": 6.3475, "step": 1614 }, { "epoch": 2.61, "learning_rate": 9.806086682281757e-07, "loss": 6.4013, "step": 1616 }, { "epoch": 2.61, "learning_rate": 9.78512614987172e-07, "loss": 4.5245, "step": 1618 }, { "epoch": 2.62, "learning_rate": 9.764166561891432e-07, "loss": 6.2644, "step": 1620 }, { "epoch": 2.62, "learning_rate": 9.743208010464048e-07, "loss": 6.0541, "step": 1622 }, { "epoch": 2.62, "learning_rate": 9.72225058770818e-07, "loss": 5.9177, "step": 1624 }, { "epoch": 2.63, "learning_rate": 9.70129438573747e-07, "loss": 6.2789, "step": 1626 }, { "epoch": 2.63, "learning_rate": 9.680339496660191e-07, "loss": 6.0265, "step": 1628 }, { "epoch": 2.63, "learning_rate": 9.659386012578863e-07, "loss": 6.5556, "step": 1630 }, { "epoch": 2.64, "learning_rate": 9.638434025589809e-07, "loss": 6.4456, "step": 1632 }, { "epoch": 2.64, "learning_rate": 9.617483627782786e-07, "loss": 6.3286, "step": 1634 }, { "epoch": 2.64, "learning_rate": 9.596534911240565e-07, "loss": 6.1645, "step": 1636 }, { "epoch": 2.65, "learning_rate": 9.575587968038518e-07, "loss": 5.7545, "step": 1638 }, { "epoch": 2.65, "learning_rate": 9.554642890244233e-07, "loss": 6.0317, "step": 1640 }, { "epoch": 2.65, "learning_rate": 9.53369976991709e-07, "loss": 5.4503, "step": 1642 }, { "epoch": 2.66, "learning_rate": 9.512758699107878e-07, "loss": 5.8737, "step": 1644 }, { "epoch": 2.66, "learning_rate": 9.491819769858366e-07, "loss": 6.0376, "step": 1646 }, { "epoch": 2.66, "learning_rate": 9.470883074200915e-07, "loss": 6.2006, "step": 1648 }, { "epoch": 2.67, "learning_rate": 9.44994870415807e-07, "loss": 6.078, "step": 1650 }, { "epoch": 2.67, "learning_rate": 9.429016751742149e-07, "loss": 5.8481, "step": 1652 }, { "epoch": 2.67, "learning_rate": 9.408087308954852e-07, "loss": 6.0188, "step": 1654 }, { "epoch": 2.68, "learning_rate": 9.387160467786838e-07, "loss": 5.8085, "step": 1656 }, { "epoch": 2.68, "learning_rate": 9.366236320217337e-07, "loss": 5.838, "step": 1658 }, { "epoch": 2.68, "learning_rate": 9.34531495821375e-07, "loss": 5.7474, "step": 1660 }, { "epoch": 2.69, "learning_rate": 9.324396473731216e-07, "loss": 6.3246, "step": 1662 }, { "epoch": 2.69, "learning_rate": 9.303480958712238e-07, "loss": 6.1301, "step": 1664 }, { "epoch": 2.69, "learning_rate": 9.282568505086262e-07, "loss": 6.2999, "step": 1666 }, { "epoch": 2.7, "learning_rate": 9.261659204769282e-07, "loss": 5.5501, "step": 1668 }, { "epoch": 2.7, "learning_rate": 9.240753149663431e-07, "loss": 6.2039, "step": 1670 }, { "epoch": 2.7, "learning_rate": 9.219850431656578e-07, "loss": 6.3152, "step": 1672 }, { "epoch": 2.71, "learning_rate": 9.198951142621928e-07, "loss": 5.5462, "step": 1674 }, { "epoch": 2.71, "learning_rate": 9.17805537441761e-07, "loss": 5.8592, "step": 1676 }, { "epoch": 2.71, "learning_rate": 9.15716321888628e-07, "loss": 5.5209, "step": 1678 }, { "epoch": 2.72, "learning_rate": 9.136274767854716e-07, "loss": 5.9594, "step": 1680 }, { "epoch": 2.72, "learning_rate": 9.115390113133413e-07, "loss": 6.2646, "step": 1682 }, { "epoch": 2.72, "learning_rate": 9.094509346516177e-07, "loss": 5.9554, "step": 1684 }, { "epoch": 2.72, "learning_rate": 9.073632559779729e-07, "loss": 5.8664, "step": 1686 }, { "epoch": 2.73, "learning_rate": 9.052759844683294e-07, "loss": 6.1002, "step": 1688 }, { "epoch": 2.73, "learning_rate": 9.031891292968209e-07, "loss": 6.1675, "step": 1690 }, { "epoch": 2.73, "learning_rate": 9.011026996357502e-07, "loss": 6.0664, "step": 1692 }, { "epoch": 2.74, "learning_rate": 8.990167046555504e-07, "loss": 6.2686, "step": 1694 }, { "epoch": 2.74, "learning_rate": 8.969311535247437e-07, "loss": 6.0531, "step": 1696 }, { "epoch": 2.74, "learning_rate": 8.948460554099018e-07, "loss": 6.0652, "step": 1698 }, { "epoch": 2.75, "learning_rate": 8.927614194756052e-07, "loss": 6.0999, "step": 1700 }, { "epoch": 2.75, "learning_rate": 8.906772548844025e-07, "loss": 6.1437, "step": 1702 }, { "epoch": 2.75, "learning_rate": 8.885935707967715e-07, "loss": 6.1177, "step": 1704 }, { "epoch": 2.76, "learning_rate": 8.865103763710778e-07, "loss": 6.1103, "step": 1706 }, { "epoch": 2.76, "learning_rate": 8.844276807635342e-07, "loss": 5.8831, "step": 1708 }, { "epoch": 2.76, "learning_rate": 8.823454931281616e-07, "loss": 5.9109, "step": 1710 }, { "epoch": 2.77, "learning_rate": 8.802638226167478e-07, "loss": 6.5164, "step": 1712 }, { "epoch": 2.77, "learning_rate": 8.781826783788083e-07, "loss": 6.0546, "step": 1714 }, { "epoch": 2.77, "learning_rate": 8.761020695615449e-07, "loss": 6.1878, "step": 1716 }, { "epoch": 2.78, "learning_rate": 8.740220053098066e-07, "loss": 6.2062, "step": 1718 }, { "epoch": 2.78, "learning_rate": 8.719424947660485e-07, "loss": 5.1807, "step": 1720 }, { "epoch": 2.78, "learning_rate": 8.698635470702923e-07, "loss": 6.3489, "step": 1722 }, { "epoch": 2.79, "learning_rate": 8.677851713600854e-07, "loss": 6.1953, "step": 1724 }, { "epoch": 2.79, "learning_rate": 8.657073767704615e-07, "loss": 6.0458, "step": 1726 }, { "epoch": 2.79, "learning_rate": 8.636301724339003e-07, "loss": 6.2063, "step": 1728 }, { "epoch": 2.8, "learning_rate": 8.615535674802864e-07, "loss": 6.0141, "step": 1730 }, { "epoch": 2.8, "learning_rate": 8.594775710368703e-07, "loss": 6.3073, "step": 1732 }, { "epoch": 2.8, "learning_rate": 8.574021922282292e-07, "loss": 6.0074, "step": 1734 }, { "epoch": 2.81, "learning_rate": 8.553274401762236e-07, "loss": 6.3685, "step": 1736 }, { "epoch": 2.81, "learning_rate": 8.532533239999602e-07, "loss": 5.7029, "step": 1738 }, { "epoch": 2.81, "learning_rate": 8.511798528157511e-07, "loss": 6.1316, "step": 1740 }, { "epoch": 2.82, "learning_rate": 8.491070357370729e-07, "loss": 5.805, "step": 1742 }, { "epoch": 2.82, "learning_rate": 8.470348818745276e-07, "loss": 6.0199, "step": 1744 }, { "epoch": 2.82, "learning_rate": 8.44963400335802e-07, "loss": 5.8261, "step": 1746 }, { "epoch": 2.83, "learning_rate": 8.428926002256282e-07, "loss": 6.3244, "step": 1748 }, { "epoch": 2.83, "learning_rate": 8.408224906457429e-07, "loss": 5.5886, "step": 1750 }, { "epoch": 2.83, "learning_rate": 8.387530806948477e-07, "loss": 5.9277, "step": 1752 }, { "epoch": 2.83, "learning_rate": 8.366843794685694e-07, "loss": 6.1806, "step": 1754 }, { "epoch": 2.84, "learning_rate": 8.346163960594192e-07, "loss": 6.1859, "step": 1756 }, { "epoch": 2.84, "learning_rate": 8.325491395567539e-07, "loss": 6.0103, "step": 1758 }, { "epoch": 2.84, "learning_rate": 8.304826190467349e-07, "loss": 5.8642, "step": 1760 }, { "epoch": 2.85, "learning_rate": 8.284168436122897e-07, "loss": 6.0721, "step": 1762 }, { "epoch": 2.85, "learning_rate": 8.263518223330696e-07, "loss": 5.2597, "step": 1764 }, { "epoch": 2.85, "learning_rate": 8.24287564285412e-07, "loss": 5.9716, "step": 1766 }, { "epoch": 2.86, "learning_rate": 8.222240785422995e-07, "loss": 6.1662, "step": 1768 }, { "epoch": 2.86, "learning_rate": 8.201613741733202e-07, "loss": 6.4284, "step": 1770 }, { "epoch": 2.86, "learning_rate": 8.180994602446279e-07, "loss": 5.5088, "step": 1772 }, { "epoch": 2.87, "learning_rate": 8.160383458189022e-07, "loss": 6.2377, "step": 1774 }, { "epoch": 2.87, "learning_rate": 8.139780399553079e-07, "loss": 6.2402, "step": 1776 }, { "epoch": 2.87, "learning_rate": 8.119185517094577e-07, "loss": 6.4094, "step": 1778 }, { "epoch": 2.88, "learning_rate": 8.098598901333692e-07, "loss": 5.8309, "step": 1780 }, { "epoch": 2.88, "learning_rate": 8.078020642754273e-07, "loss": 5.8735, "step": 1782 }, { "epoch": 2.88, "learning_rate": 8.057450831803427e-07, "loss": 6.2882, "step": 1784 }, { "epoch": 2.89, "learning_rate": 8.036889558891142e-07, "loss": 6.0149, "step": 1786 }, { "epoch": 2.89, "learning_rate": 8.016336914389873e-07, "loss": 6.2319, "step": 1788 }, { "epoch": 2.89, "learning_rate": 7.995792988634151e-07, "loss": 5.6783, "step": 1790 }, { "epoch": 2.9, "learning_rate": 7.975257871920193e-07, "loss": 5.5368, "step": 1792 }, { "epoch": 2.9, "learning_rate": 7.954731654505491e-07, "loss": 5.7339, "step": 1794 }, { "epoch": 2.9, "learning_rate": 7.93421442660842e-07, "loss": 5.8137, "step": 1796 }, { "epoch": 2.91, "learning_rate": 7.913706278407849e-07, "loss": 6.0696, "step": 1798 }, { "epoch": 2.91, "learning_rate": 7.89320730004274e-07, "loss": 6.0298, "step": 1800 }, { "epoch": 2.91, "learning_rate": 7.872717581611741e-07, "loss": 5.9934, "step": 1802 }, { "epoch": 2.92, "learning_rate": 7.852237213172811e-07, "loss": 5.8865, "step": 1804 }, { "epoch": 2.92, "learning_rate": 7.831766284742806e-07, "loss": 6.481, "step": 1806 }, { "epoch": 2.92, "learning_rate": 7.811304886297104e-07, "loss": 6.1973, "step": 1808 }, { "epoch": 2.93, "learning_rate": 7.790853107769178e-07, "loss": 6.3481, "step": 1810 }, { "epoch": 2.93, "learning_rate": 7.770411039050229e-07, "loss": 6.235, "step": 1812 }, { "epoch": 2.93, "learning_rate": 7.749978769988777e-07, "loss": 6.1487, "step": 1814 }, { "epoch": 2.93, "learning_rate": 7.729556390390275e-07, "loss": 5.6685, "step": 1816 }, { "epoch": 2.94, "learning_rate": 7.709143990016701e-07, "loss": 6.3257, "step": 1818 }, { "epoch": 2.94, "learning_rate": 7.688741658586178e-07, "loss": 6.0141, "step": 1820 }, { "epoch": 2.94, "learning_rate": 7.668349485772571e-07, "loss": 6.3748, "step": 1822 }, { "epoch": 2.95, "learning_rate": 7.6479675612051e-07, "loss": 5.9491, "step": 1824 }, { "epoch": 2.95, "learning_rate": 7.627595974467929e-07, "loss": 6.2564, "step": 1826 }, { "epoch": 2.95, "learning_rate": 7.607234815099801e-07, "loss": 6.2111, "step": 1828 }, { "epoch": 2.96, "learning_rate": 7.586884172593608e-07, "loss": 5.5946, "step": 1830 }, { "epoch": 2.96, "learning_rate": 7.566544136396036e-07, "loss": 6.3797, "step": 1832 }, { "epoch": 2.96, "learning_rate": 7.546214795907139e-07, "loss": 5.0831, "step": 1834 }, { "epoch": 2.97, "learning_rate": 7.525896240479976e-07, "loss": 6.0609, "step": 1836 }, { "epoch": 2.97, "learning_rate": 7.505588559420187e-07, "loss": 6.1916, "step": 1838 }, { "epoch": 2.97, "learning_rate": 7.485291841985626e-07, "loss": 6.0715, "step": 1840 }, { "epoch": 2.98, "learning_rate": 7.465006177385952e-07, "loss": 6.2253, "step": 1842 }, { "epoch": 2.98, "learning_rate": 7.444731654782253e-07, "loss": 5.6089, "step": 1844 }, { "epoch": 2.98, "learning_rate": 7.424468363286633e-07, "loss": 5.9458, "step": 1846 }, { "epoch": 2.99, "learning_rate": 7.404216391961847e-07, "loss": 6.1995, "step": 1848 }, { "epoch": 2.99, "learning_rate": 7.383975829820873e-07, "loss": 5.889, "step": 1850 }, { "epoch": 2.99, "learning_rate": 7.363746765826568e-07, "loss": 5.7558, "step": 1852 }, { "epoch": 3.0, "learning_rate": 7.343529288891239e-07, "loss": 6.2806, "step": 1854 }, { "epoch": 3.0, "learning_rate": 7.323323487876256e-07, "loss": 6.117, "step": 1856 }, { "epoch": 3.0, "learning_rate": 7.303129451591686e-07, "loss": 5.9999, "step": 1858 }, { "epoch": 3.01, "learning_rate": 7.282947268795876e-07, "loss": 6.1178, "step": 1860 }, { "epoch": 3.01, "learning_rate": 7.262777028195079e-07, "loss": 6.1407, "step": 1862 }, { "epoch": 3.01, "learning_rate": 7.242618818443056e-07, "loss": 6.3491, "step": 1864 }, { "epoch": 3.02, "learning_rate": 7.222472728140694e-07, "loss": 6.2381, "step": 1866 }, { "epoch": 3.02, "learning_rate": 7.202338845835605e-07, "loss": 5.9172, "step": 1868 }, { "epoch": 3.02, "learning_rate": 7.182217260021748e-07, "loss": 6.3349, "step": 1870 }, { "epoch": 3.03, "learning_rate": 7.162108059139032e-07, "loss": 5.8465, "step": 1872 }, { "epoch": 3.03, "learning_rate": 7.142011331572936e-07, "loss": 6.0377, "step": 1874 }, { "epoch": 3.03, "learning_rate": 7.121927165654108e-07, "loss": 5.9383, "step": 1876 }, { "epoch": 3.04, "learning_rate": 7.101855649657991e-07, "loss": 6.3187, "step": 1878 }, { "epoch": 3.04, "learning_rate": 7.08179687180442e-07, "loss": 6.2913, "step": 1880 }, { "epoch": 3.04, "learning_rate": 7.061750920257258e-07, "loss": 5.9955, "step": 1882 }, { "epoch": 3.04, "learning_rate": 7.041717883123976e-07, "loss": 6.126, "step": 1884 }, { "epoch": 3.05, "learning_rate": 7.02169784845529e-07, "loss": 6.1044, "step": 1886 }, { "epoch": 3.05, "learning_rate": 7.001690904244766e-07, "loss": 6.1142, "step": 1888 }, { "epoch": 3.05, "learning_rate": 6.981697138428433e-07, "loss": 6.0607, "step": 1890 }, { "epoch": 3.06, "learning_rate": 6.961716638884399e-07, "loss": 6.1981, "step": 1892 }, { "epoch": 3.06, "learning_rate": 6.94174949343246e-07, "loss": 6.3634, "step": 1894 }, { "epoch": 3.06, "learning_rate": 6.921795789833722e-07, "loss": 5.6222, "step": 1896 }, { "epoch": 3.07, "learning_rate": 6.901855615790205e-07, "loss": 6.1772, "step": 1898 }, { "epoch": 3.07, "learning_rate": 6.881929058944469e-07, "loss": 5.7795, "step": 1900 }, { "epoch": 3.07, "learning_rate": 6.862016206879216e-07, "loss": 6.1229, "step": 1902 }, { "epoch": 3.08, "learning_rate": 6.842117147116913e-07, "loss": 5.6223, "step": 1904 }, { "epoch": 3.08, "learning_rate": 6.822231967119409e-07, "loss": 5.7032, "step": 1906 }, { "epoch": 3.08, "learning_rate": 6.802360754287547e-07, "loss": 5.5354, "step": 1908 }, { "epoch": 3.09, "learning_rate": 6.782503595960781e-07, "loss": 6.0374, "step": 1910 }, { "epoch": 3.09, "learning_rate": 6.76266057941679e-07, "loss": 6.3156, "step": 1912 }, { "epoch": 3.09, "learning_rate": 6.742831791871095e-07, "loss": 6.0218, "step": 1914 }, { "epoch": 3.1, "learning_rate": 6.723017320476678e-07, "loss": 5.9377, "step": 1916 }, { "epoch": 3.1, "learning_rate": 6.7032172523236e-07, "loss": 5.8099, "step": 1918 }, { "epoch": 3.1, "learning_rate": 6.683431674438612e-07, "loss": 5.9519, "step": 1920 }, { "epoch": 3.11, "learning_rate": 6.663660673784775e-07, "loss": 5.8745, "step": 1922 }, { "epoch": 3.11, "learning_rate": 6.643904337261082e-07, "loss": 6.1688, "step": 1924 }, { "epoch": 3.11, "learning_rate": 6.624162751702076e-07, "loss": 5.8773, "step": 1926 }, { "epoch": 3.12, "learning_rate": 6.604436003877464e-07, "loss": 6.0271, "step": 1928 }, { "epoch": 3.12, "learning_rate": 6.584724180491729e-07, "loss": 5.7793, "step": 1930 }, { "epoch": 3.12, "learning_rate": 6.565027368183768e-07, "loss": 6.1935, "step": 1932 }, { "epoch": 3.13, "learning_rate": 6.545345653526495e-07, "loss": 6.0947, "step": 1934 }, { "epoch": 3.13, "learning_rate": 6.525679123026463e-07, "loss": 5.8929, "step": 1936 }, { "epoch": 3.13, "learning_rate": 6.506027863123491e-07, "loss": 6.1234, "step": 1938 }, { "epoch": 3.14, "learning_rate": 6.48639196019028e-07, "loss": 5.3506, "step": 1940 }, { "epoch": 3.14, "learning_rate": 6.466771500532029e-07, "loss": 5.9521, "step": 1942 }, { "epoch": 3.14, "learning_rate": 6.447166570386063e-07, "loss": 6.1885, "step": 1944 }, { "epoch": 3.15, "learning_rate": 6.427577255921449e-07, "loss": 5.851, "step": 1946 }, { "epoch": 3.15, "learning_rate": 6.40800364323862e-07, "loss": 5.8766, "step": 1948 }, { "epoch": 3.15, "learning_rate": 6.38844581836899e-07, "loss": 5.7743, "step": 1950 }, { "epoch": 3.15, "learning_rate": 6.368903867274584e-07, "loss": 6.2211, "step": 1952 }, { "epoch": 3.16, "learning_rate": 6.34937787584767e-07, "loss": 6.1324, "step": 1954 }, { "epoch": 3.16, "learning_rate": 6.329867929910347e-07, "loss": 6.384, "step": 1956 }, { "epoch": 3.16, "learning_rate": 6.310374115214203e-07, "loss": 5.9948, "step": 1958 }, { "epoch": 3.17, "learning_rate": 6.290896517439924e-07, "loss": 5.6496, "step": 1960 }, { "epoch": 3.17, "learning_rate": 6.271435222196914e-07, "loss": 6.2404, "step": 1962 }, { "epoch": 3.17, "learning_rate": 6.251990315022927e-07, "loss": 5.3699, "step": 1964 }, { "epoch": 3.18, "learning_rate": 6.232561881383686e-07, "loss": 5.9554, "step": 1966 }, { "epoch": 3.18, "learning_rate": 6.213150006672499e-07, "loss": 6.4513, "step": 1968 }, { "epoch": 3.18, "learning_rate": 6.193754776209911e-07, "loss": 6.2884, "step": 1970 }, { "epoch": 3.19, "learning_rate": 6.174376275243298e-07, "loss": 6.1467, "step": 1972 }, { "epoch": 3.19, "learning_rate": 6.15501458894651e-07, "loss": 5.8498, "step": 1974 }, { "epoch": 3.19, "learning_rate": 6.135669802419487e-07, "loss": 5.7347, "step": 1976 }, { "epoch": 3.2, "learning_rate": 6.116342000687896e-07, "loss": 5.9732, "step": 1978 }, { "epoch": 3.2, "learning_rate": 6.097031268702745e-07, "loss": 6.2086, "step": 1980 }, { "epoch": 3.2, "learning_rate": 6.077737691340023e-07, "loss": 5.7324, "step": 1982 }, { "epoch": 3.21, "learning_rate": 6.058461353400314e-07, "loss": 5.7416, "step": 1984 }, { "epoch": 3.21, "learning_rate": 6.039202339608431e-07, "loss": 5.3508, "step": 1986 }, { "epoch": 3.21, "learning_rate": 6.019960734613047e-07, "loss": 6.0552, "step": 1988 }, { "epoch": 3.22, "learning_rate": 6.000736622986311e-07, "loss": 6.3086, "step": 1990 }, { "epoch": 3.22, "learning_rate": 5.981530089223488e-07, "loss": 6.1699, "step": 1992 }, { "epoch": 3.22, "learning_rate": 5.962341217742588e-07, "loss": 6.2947, "step": 1994 }, { "epoch": 3.23, "learning_rate": 5.94317009288398e-07, "loss": 6.3022, "step": 1996 }, { "epoch": 3.23, "learning_rate": 5.924016798910037e-07, "loss": 6.2368, "step": 1998 }, { "epoch": 3.23, "learning_rate": 5.904881420004767e-07, "loss": 5.5054, "step": 2000 }, { "epoch": 3.24, "learning_rate": 5.885764040273426e-07, "loss": 6.0251, "step": 2002 }, { "epoch": 3.24, "learning_rate": 5.866664743742162e-07, "loss": 5.7491, "step": 2004 }, { "epoch": 3.24, "learning_rate": 5.847583614357643e-07, "loss": 6.2488, "step": 2006 }, { "epoch": 3.25, "learning_rate": 5.828520735986693e-07, "loss": 6.3708, "step": 2008 }, { "epoch": 3.25, "learning_rate": 5.809476192415904e-07, "loss": 5.9273, "step": 2010 }, { "epoch": 3.25, "learning_rate": 5.79045006735129e-07, "loss": 6.2291, "step": 2012 }, { "epoch": 3.25, "learning_rate": 5.771442444417918e-07, "loss": 5.2932, "step": 2014 }, { "epoch": 3.26, "learning_rate": 5.752453407159521e-07, "loss": 5.9299, "step": 2016 }, { "epoch": 3.26, "learning_rate": 5.733483039038148e-07, "loss": 6.2873, "step": 2018 }, { "epoch": 3.26, "learning_rate": 5.71453142343379e-07, "loss": 5.9898, "step": 2020 }, { "epoch": 3.27, "learning_rate": 5.69559864364402e-07, "loss": 6.3883, "step": 2022 }, { "epoch": 3.27, "learning_rate": 5.676684782883614e-07, "loss": 5.7538, "step": 2024 }, { "epoch": 3.27, "learning_rate": 5.657789924284202e-07, "loss": 6.2761, "step": 2026 }, { "epoch": 3.28, "learning_rate": 5.63891415089389e-07, "loss": 6.3131, "step": 2028 }, { "epoch": 3.28, "learning_rate": 5.6200575456769e-07, "loss": 5.9519, "step": 2030 }, { "epoch": 3.28, "learning_rate": 5.601220191513207e-07, "loss": 5.4945, "step": 2032 }, { "epoch": 3.29, "learning_rate": 5.582402171198168e-07, "loss": 5.6255, "step": 2034 }, { "epoch": 3.29, "learning_rate": 5.563603567442168e-07, "loss": 6.5243, "step": 2036 }, { "epoch": 3.29, "learning_rate": 5.544824462870243e-07, "loss": 6.1559, "step": 2038 }, { "epoch": 3.3, "learning_rate": 5.526064940021732e-07, "loss": 5.7072, "step": 2040 }, { "epoch": 3.3, "learning_rate": 5.507325081349903e-07, "loss": 5.9227, "step": 2042 }, { "epoch": 3.3, "learning_rate": 5.488604969221596e-07, "loss": 6.2318, "step": 2044 }, { "epoch": 3.31, "learning_rate": 5.46990468591686e-07, "loss": 6.0336, "step": 2046 }, { "epoch": 3.31, "learning_rate": 5.451224313628591e-07, "loss": 6.2192, "step": 2048 }, { "epoch": 3.31, "learning_rate": 5.432563934462166e-07, "loss": 6.1455, "step": 2050 }, { "epoch": 3.32, "learning_rate": 5.413923630435093e-07, "loss": 5.8435, "step": 2052 }, { "epoch": 3.32, "learning_rate": 5.395303483476641e-07, "loss": 5.8657, "step": 2054 }, { "epoch": 3.32, "learning_rate": 5.37670357542748e-07, "loss": 5.9615, "step": 2056 }, { "epoch": 3.33, "learning_rate": 5.358123988039337e-07, "loss": 6.0928, "step": 2058 }, { "epoch": 3.33, "learning_rate": 5.339564802974614e-07, "loss": 5.6679, "step": 2060 }, { "epoch": 3.33, "learning_rate": 5.321026101806032e-07, "loss": 5.9366, "step": 2062 }, { "epoch": 3.34, "learning_rate": 5.302507966016295e-07, "loss": 5.9127, "step": 2064 }, { "epoch": 3.34, "learning_rate": 5.284010476997704e-07, "loss": 6.1515, "step": 2066 }, { "epoch": 3.34, "learning_rate": 5.265533716051824e-07, "loss": 6.135, "step": 2068 }, { "epoch": 3.35, "learning_rate": 5.247077764389099e-07, "loss": 6.117, "step": 2070 }, { "epoch": 3.35, "learning_rate": 5.22864270312853e-07, "loss": 5.5347, "step": 2072 }, { "epoch": 3.35, "learning_rate": 5.210228613297281e-07, "loss": 5.9705, "step": 2074 }, { "epoch": 3.36, "learning_rate": 5.191835575830351e-07, "loss": 6.0285, "step": 2076 }, { "epoch": 3.36, "learning_rate": 5.173463671570204e-07, "loss": 5.7762, "step": 2078 }, { "epoch": 3.36, "learning_rate": 5.155112981266422e-07, "loss": 6.1704, "step": 2080 }, { "epoch": 3.36, "learning_rate": 5.136783585575335e-07, "loss": 6.1445, "step": 2082 }, { "epoch": 3.37, "learning_rate": 5.11847556505969e-07, "loss": 5.7711, "step": 2084 }, { "epoch": 3.37, "learning_rate": 5.100189000188273e-07, "loss": 6.0446, "step": 2086 }, { "epoch": 3.37, "learning_rate": 5.081923971335583e-07, "loss": 5.7676, "step": 2088 }, { "epoch": 3.38, "learning_rate": 5.063680558781445e-07, "loss": 6.422, "step": 2090 }, { "epoch": 3.38, "learning_rate": 5.045458842710683e-07, "loss": 6.1722, "step": 2092 }, { "epoch": 3.38, "learning_rate": 5.027258903212759e-07, "loss": 5.9113, "step": 2094 }, { "epoch": 3.39, "learning_rate": 5.009080820281415e-07, "loss": 5.7517, "step": 2096 }, { "epoch": 3.39, "learning_rate": 4.990924673814336e-07, "loss": 5.7869, "step": 2098 }, { "epoch": 3.39, "learning_rate": 4.972790543612782e-07, "loss": 6.1813, "step": 2100 }, { "epoch": 3.4, "learning_rate": 4.954678509381253e-07, "loss": 5.8249, "step": 2102 }, { "epoch": 3.4, "learning_rate": 4.936588650727124e-07, "loss": 5.9902, "step": 2104 }, { "epoch": 3.4, "learning_rate": 4.918521047160307e-07, "loss": 6.2357, "step": 2106 }, { "epoch": 3.41, "learning_rate": 4.900475778092897e-07, "loss": 6.0328, "step": 2108 }, { "epoch": 3.41, "learning_rate": 4.882452922838817e-07, "loss": 6.0947, "step": 2110 }, { "epoch": 3.41, "learning_rate": 4.864452560613484e-07, "loss": 5.9981, "step": 2112 }, { "epoch": 3.42, "learning_rate": 4.846474770533445e-07, "loss": 6.0417, "step": 2114 }, { "epoch": 3.42, "learning_rate": 4.828519631616037e-07, "loss": 5.6535, "step": 2116 }, { "epoch": 3.42, "learning_rate": 4.819550581009024e-07, "loss": 6.1429, "step": 2118 }, { "epoch": 3.43, "learning_rate": 4.801629566775196e-07, "loss": 5.8783, "step": 2120 }, { "epoch": 3.43, "learning_rate": 4.783731400811022e-07, "loss": 5.8331, "step": 2122 }, { "epoch": 3.43, "learning_rate": 4.7658561617838677e-07, "loss": 6.1839, "step": 2124 }, { "epoch": 3.44, "learning_rate": 4.7480039282603345e-07, "loss": 6.1042, "step": 2126 }, { "epoch": 3.44, "learning_rate": 4.730174778705908e-07, "loss": 5.8208, "step": 2128 }, { "epoch": 3.44, "learning_rate": 4.7123687914845966e-07, "loss": 6.2347, "step": 2130 }, { "epoch": 3.45, "learning_rate": 4.694586044858633e-07, "loss": 6.0339, "step": 2132 }, { "epoch": 3.45, "learning_rate": 4.67682661698808e-07, "loss": 5.8287, "step": 2134 }, { "epoch": 3.45, "learning_rate": 4.659090585930513e-07, "loss": 5.9094, "step": 2136 }, { "epoch": 3.46, "learning_rate": 4.641378029640676e-07, "loss": 5.9343, "step": 2138 }, { "epoch": 3.46, "learning_rate": 4.6236890259701277e-07, "loss": 6.2003, "step": 2140 }, { "epoch": 3.46, "learning_rate": 4.606023652666915e-07, "loss": 6.2596, "step": 2142 }, { "epoch": 3.47, "learning_rate": 4.588381987375215e-07, "loss": 6.1376, "step": 2144 }, { "epoch": 3.47, "learning_rate": 4.570764107635007e-07, "loss": 6.2819, "step": 2146 }, { "epoch": 3.47, "learning_rate": 4.553170090881724e-07, "loss": 5.7263, "step": 2148 }, { "epoch": 3.47, "learning_rate": 4.535600014445914e-07, "loss": 5.8772, "step": 2150 }, { "epoch": 3.48, "learning_rate": 4.518053955552903e-07, "loss": 6.1401, "step": 2152 }, { "epoch": 3.48, "learning_rate": 4.50053199132245e-07, "loss": 5.937, "step": 2154 }, { "epoch": 3.48, "learning_rate": 4.483034198768416e-07, "loss": 6.1582, "step": 2156 }, { "epoch": 3.49, "learning_rate": 4.465560654798416e-07, "loss": 5.2896, "step": 2158 }, { "epoch": 3.49, "learning_rate": 4.448111436213485e-07, "loss": 5.7746, "step": 2160 }, { "epoch": 3.49, "learning_rate": 4.4306866197077543e-07, "loss": 5.535, "step": 2162 }, { "epoch": 3.5, "learning_rate": 4.4132862818680803e-07, "loss": 6.0889, "step": 2164 }, { "epoch": 3.5, "learning_rate": 4.395910499173745e-07, "loss": 5.7546, "step": 2166 }, { "epoch": 3.5, "learning_rate": 4.378559347996096e-07, "loss": 6.0215, "step": 2168 }, { "epoch": 3.51, "learning_rate": 4.361232904598223e-07, "loss": 5.8593, "step": 2170 }, { "epoch": 3.51, "learning_rate": 4.3439312451346154e-07, "loss": 6.0951, "step": 2172 }, { "epoch": 3.51, "learning_rate": 4.3266544456508327e-07, "loss": 6.0411, "step": 2174 }, { "epoch": 3.52, "learning_rate": 4.3094025820831606e-07, "loss": 6.2975, "step": 2176 }, { "epoch": 3.52, "learning_rate": 4.2921757302582996e-07, "loss": 5.9875, "step": 2178 }, { "epoch": 3.52, "learning_rate": 4.2749739658930026e-07, "loss": 6.2231, "step": 2180 }, { "epoch": 3.53, "learning_rate": 4.257797364593767e-07, "loss": 6.0196, "step": 2182 }, { "epoch": 3.53, "learning_rate": 4.240646001856476e-07, "loss": 5.7214, "step": 2184 }, { "epoch": 3.53, "learning_rate": 4.223519953066098e-07, "loss": 5.6502, "step": 2186 }, { "epoch": 3.54, "learning_rate": 4.206419293496333e-07, "loss": 5.6593, "step": 2188 }, { "epoch": 3.54, "learning_rate": 4.1893440983092853e-07, "loss": 6.1549, "step": 2190 }, { "epoch": 3.54, "learning_rate": 4.172294442555148e-07, "loss": 6.1882, "step": 2192 }, { "epoch": 3.55, "learning_rate": 4.1552704011718497e-07, "loss": 6.3202, "step": 2194 }, { "epoch": 3.55, "learning_rate": 4.13827204898474e-07, "loss": 6.2742, "step": 2196 }, { "epoch": 3.55, "learning_rate": 4.121299460706259e-07, "loss": 5.9497, "step": 2198 }, { "epoch": 3.56, "learning_rate": 4.104352710935609e-07, "loss": 6.2062, "step": 2200 }, { "epoch": 3.56, "learning_rate": 4.087431874158416e-07, "loss": 6.0651, "step": 2202 }, { "epoch": 3.56, "learning_rate": 4.0705370247464155e-07, "loss": 6.12, "step": 2204 }, { "epoch": 3.57, "learning_rate": 4.053668236957134e-07, "loss": 6.2323, "step": 2206 }, { "epoch": 3.57, "learning_rate": 4.036825584933533e-07, "loss": 6.018, "step": 2208 }, { "epoch": 3.57, "learning_rate": 4.0200091427037075e-07, "loss": 6.148, "step": 2210 }, { "epoch": 3.57, "learning_rate": 4.003218984180552e-07, "loss": 5.937, "step": 2212 }, { "epoch": 3.58, "learning_rate": 3.986455183161437e-07, "loss": 6.1027, "step": 2214 }, { "epoch": 3.58, "learning_rate": 3.9697178133278854e-07, "loss": 5.4278, "step": 2216 }, { "epoch": 3.58, "learning_rate": 3.9530069482452466e-07, "loss": 5.784, "step": 2218 }, { "epoch": 3.59, "learning_rate": 3.9363226613623733e-07, "loss": 6.0116, "step": 2220 }, { "epoch": 3.59, "learning_rate": 3.919665026011304e-07, "loss": 5.9013, "step": 2222 }, { "epoch": 3.59, "learning_rate": 3.9030341154069314e-07, "loss": 5.626, "step": 2224 }, { "epoch": 3.6, "learning_rate": 3.886430002646688e-07, "loss": 5.9606, "step": 2226 }, { "epoch": 3.6, "learning_rate": 3.8698527607102214e-07, "loss": 5.9847, "step": 2228 }, { "epoch": 3.6, "learning_rate": 3.853302462459077e-07, "loss": 6.2028, "step": 2230 }, { "epoch": 3.61, "learning_rate": 3.8367791806363724e-07, "loss": 6.0791, "step": 2232 }, { "epoch": 3.61, "learning_rate": 3.820282987866481e-07, "loss": 6.0794, "step": 2234 }, { "epoch": 3.61, "learning_rate": 3.8038139566547144e-07, "loss": 5.8957, "step": 2236 }, { "epoch": 3.62, "learning_rate": 3.787372159386999e-07, "loss": 5.7328, "step": 2238 }, { "epoch": 3.62, "learning_rate": 3.7709576683295616e-07, "loss": 6.1582, "step": 2240 }, { "epoch": 3.62, "learning_rate": 3.7545705556286124e-07, "loss": 5.8639, "step": 2242 }, { "epoch": 3.63, "learning_rate": 3.738210893310023e-07, "loss": 5.8205, "step": 2244 }, { "epoch": 3.63, "learning_rate": 3.721878753279016e-07, "loss": 6.0627, "step": 2246 }, { "epoch": 3.63, "learning_rate": 3.705574207319844e-07, "loss": 5.7041, "step": 2248 }, { "epoch": 3.64, "learning_rate": 3.689297327095472e-07, "loss": 6.1213, "step": 2250 }, { "epoch": 3.64, "learning_rate": 3.6730481841472805e-07, "loss": 6.2363, "step": 2252 }, { "epoch": 3.64, "learning_rate": 3.656826849894725e-07, "loss": 5.768, "step": 2254 }, { "epoch": 3.65, "learning_rate": 3.640633395635032e-07, "loss": 5.9267, "step": 2256 }, { "epoch": 3.65, "learning_rate": 3.624467892542895e-07, "loss": 6.0327, "step": 2258 }, { "epoch": 3.65, "learning_rate": 3.608330411670153e-07, "loss": 6.1895, "step": 2260 }, { "epoch": 3.66, "learning_rate": 3.5922210239454764e-07, "loss": 6.1527, "step": 2262 }, { "epoch": 3.66, "learning_rate": 3.5761398001740597e-07, "loss": 6.0396, "step": 2264 }, { "epoch": 3.66, "learning_rate": 3.5600868110373163e-07, "loss": 5.818, "step": 2266 }, { "epoch": 3.67, "learning_rate": 3.5440621270925497e-07, "loss": 6.3069, "step": 2268 }, { "epoch": 3.67, "learning_rate": 3.5280658187726597e-07, "loss": 6.5033, "step": 2270 }, { "epoch": 3.67, "learning_rate": 3.5120979563858267e-07, "loss": 5.7382, "step": 2272 }, { "epoch": 3.68, "learning_rate": 3.4961586101152065e-07, "loss": 5.9984, "step": 2274 }, { "epoch": 3.68, "learning_rate": 3.4802478500186094e-07, "loss": 5.9253, "step": 2276 }, { "epoch": 3.68, "learning_rate": 3.4643657460282073e-07, "loss": 6.0152, "step": 2278 }, { "epoch": 3.68, "learning_rate": 3.448512367950227e-07, "loss": 6.2795, "step": 2280 }, { "epoch": 3.69, "learning_rate": 3.43268778546463e-07, "loss": 5.8249, "step": 2282 }, { "epoch": 3.69, "learning_rate": 3.4168920681248117e-07, "loss": 6.0889, "step": 2284 }, { "epoch": 3.69, "learning_rate": 3.4011252853573013e-07, "loss": 5.3738, "step": 2286 }, { "epoch": 3.7, "learning_rate": 3.3853875064614514e-07, "loss": 5.7754, "step": 2288 }, { "epoch": 3.7, "learning_rate": 3.369678800609134e-07, "loss": 6.0388, "step": 2290 }, { "epoch": 3.7, "learning_rate": 3.353999236844436e-07, "loss": 6.1277, "step": 2292 }, { "epoch": 3.71, "learning_rate": 3.3383488840833596e-07, "loss": 6.1172, "step": 2294 }, { "epoch": 3.71, "learning_rate": 3.322727811113516e-07, "loss": 5.9489, "step": 2296 }, { "epoch": 3.71, "learning_rate": 3.3071360865938205e-07, "loss": 6.1349, "step": 2298 }, { "epoch": 3.72, "learning_rate": 3.2915737790541986e-07, "loss": 5.6581, "step": 2300 }, { "epoch": 3.72, "learning_rate": 3.276040956895276e-07, "loss": 5.9057, "step": 2302 }, { "epoch": 3.72, "learning_rate": 3.260537688388085e-07, "loss": 6.2865, "step": 2304 }, { "epoch": 3.73, "learning_rate": 3.2450640416737595e-07, "loss": 5.987, "step": 2306 }, { "epoch": 3.73, "learning_rate": 3.229620084763237e-07, "loss": 6.0143, "step": 2308 }, { "epoch": 3.73, "learning_rate": 3.214205885536965e-07, "loss": 5.7721, "step": 2310 }, { "epoch": 3.74, "learning_rate": 3.198821511744589e-07, "loss": 5.3875, "step": 2312 }, { "epoch": 3.74, "learning_rate": 3.183467031004673e-07, "loss": 6.1836, "step": 2314 }, { "epoch": 3.74, "learning_rate": 3.168142510804386e-07, "loss": 5.9135, "step": 2316 }, { "epoch": 3.75, "learning_rate": 3.1528480184992144e-07, "loss": 5.8897, "step": 2318 }, { "epoch": 3.75, "learning_rate": 3.137583621312665e-07, "loss": 6.1612, "step": 2320 }, { "epoch": 3.75, "learning_rate": 3.122349386335964e-07, "loss": 6.0063, "step": 2322 }, { "epoch": 3.76, "learning_rate": 3.1071453805277757e-07, "loss": 5.8636, "step": 2324 }, { "epoch": 3.76, "learning_rate": 3.0919716707138887e-07, "loss": 5.9821, "step": 2326 }, { "epoch": 3.76, "learning_rate": 3.0768283235869406e-07, "loss": 5.9865, "step": 2328 }, { "epoch": 3.77, "learning_rate": 3.0617154057061054e-07, "loss": 6.0986, "step": 2330 }, { "epoch": 3.77, "learning_rate": 3.046632983496823e-07, "loss": 5.807, "step": 2332 }, { "epoch": 3.77, "learning_rate": 3.0315811232504916e-07, "loss": 6.2453, "step": 2334 }, { "epoch": 3.78, "learning_rate": 3.016559891124183e-07, "loss": 6.0405, "step": 2336 }, { "epoch": 3.78, "learning_rate": 3.001569353140346e-07, "loss": 6.1985, "step": 2338 }, { "epoch": 3.78, "learning_rate": 2.9866095751865297e-07, "loss": 5.9151, "step": 2340 }, { "epoch": 3.79, "learning_rate": 2.971680623015074e-07, "loss": 6.1751, "step": 2342 }, { "epoch": 3.79, "learning_rate": 2.9567825622428356e-07, "loss": 6.2544, "step": 2344 }, { "epoch": 3.79, "learning_rate": 2.9419154583508974e-07, "loss": 5.9883, "step": 2346 }, { "epoch": 3.79, "learning_rate": 2.9270793766842696e-07, "loss": 6.0128, "step": 2348 }, { "epoch": 3.8, "learning_rate": 2.9122743824516195e-07, "loss": 6.0721, "step": 2350 }, { "epoch": 3.8, "learning_rate": 2.897500540724972e-07, "loss": 5.7388, "step": 2352 }, { "epoch": 3.8, "learning_rate": 2.882757916439434e-07, "loss": 6.0928, "step": 2354 }, { "epoch": 3.81, "learning_rate": 2.868046574392898e-07, "loss": 6.2217, "step": 2356 }, { "epoch": 3.81, "learning_rate": 2.853366579245764e-07, "loss": 6.2321, "step": 2358 }, { "epoch": 3.81, "learning_rate": 2.838717995520652e-07, "loss": 6.0278, "step": 2360 }, { "epoch": 3.82, "learning_rate": 2.824100887602121e-07, "loss": 6.1201, "step": 2362 }, { "epoch": 3.82, "learning_rate": 2.8095153197363884e-07, "loss": 5.7258, "step": 2364 }, { "epoch": 3.82, "learning_rate": 2.794961356031044e-07, "loss": 5.6672, "step": 2366 }, { "epoch": 3.83, "learning_rate": 2.7804390604547556e-07, "loss": 6.2415, "step": 2368 }, { "epoch": 3.83, "learning_rate": 2.7659484968370216e-07, "loss": 6.2554, "step": 2370 }, { "epoch": 3.83, "learning_rate": 2.7514897288678574e-07, "loss": 6.202, "step": 2372 }, { "epoch": 3.84, "learning_rate": 2.73706282009753e-07, "loss": 6.0814, "step": 2374 }, { "epoch": 3.84, "learning_rate": 2.722667833936275e-07, "loss": 6.1788, "step": 2376 }, { "epoch": 3.84, "learning_rate": 2.708304833654023e-07, "loss": 6.1503, "step": 2378 }, { "epoch": 3.85, "learning_rate": 2.693973882380114e-07, "loss": 5.7419, "step": 2380 }, { "epoch": 3.85, "learning_rate": 2.6796750431030256e-07, "loss": 6.1539, "step": 2382 }, { "epoch": 3.85, "learning_rate": 2.6654083786700955e-07, "loss": 5.6726, "step": 2384 }, { "epoch": 3.86, "learning_rate": 2.651173951787242e-07, "loss": 6.1883, "step": 2386 }, { "epoch": 3.86, "learning_rate": 2.6369718250186915e-07, "loss": 6.0832, "step": 2388 }, { "epoch": 3.86, "learning_rate": 2.622802060786702e-07, "loss": 6.3862, "step": 2390 }, { "epoch": 3.87, "learning_rate": 2.6157293340899857e-07, "loss": 5.9917, "step": 2392 }, { "epoch": 3.87, "learning_rate": 2.601608230393345e-07, "loss": 6.4018, "step": 2394 }, { "epoch": 3.87, "learning_rate": 2.587519644666001e-07, "loss": 5.7595, "step": 2396 }, { "epoch": 3.88, "learning_rate": 2.573463638831166e-07, "loss": 5.9134, "step": 2398 }, { "epoch": 3.88, "learning_rate": 2.5594402746688636e-07, "loss": 6.1006, "step": 2400 }, { "epoch": 3.88, "learning_rate": 2.545449613815639e-07, "loss": 6.3605, "step": 2402 }, { "epoch": 3.89, "learning_rate": 2.531491717764297e-07, "loss": 6.207, "step": 2404 }, { "epoch": 3.89, "learning_rate": 2.517566647863637e-07, "loss": 6.2167, "step": 2406 }, { "epoch": 3.89, "learning_rate": 2.503674465318175e-07, "loss": 5.9579, "step": 2408 }, { "epoch": 3.89, "learning_rate": 2.4898152311878797e-07, "loss": 5.4079, "step": 2410 }, { "epoch": 3.9, "learning_rate": 2.4759890063879006e-07, "loss": 5.4221, "step": 2412 }, { "epoch": 3.9, "learning_rate": 2.462195851688306e-07, "loss": 6.1915, "step": 2414 }, { "epoch": 3.9, "learning_rate": 2.448435827713806e-07, "loss": 6.202, "step": 2416 }, { "epoch": 3.91, "learning_rate": 2.4347089949434984e-07, "loss": 5.7824, "step": 2418 }, { "epoch": 3.91, "learning_rate": 2.421015413710591e-07, "loss": 6.23, "step": 2420 }, { "epoch": 3.91, "learning_rate": 2.407355144202147e-07, "loss": 6.0515, "step": 2422 }, { "epoch": 3.92, "learning_rate": 2.39372824645881e-07, "loss": 6.0654, "step": 2424 }, { "epoch": 3.92, "learning_rate": 2.380134780374551e-07, "loss": 5.7883, "step": 2426 }, { "epoch": 3.92, "learning_rate": 2.3665748056963952e-07, "loss": 5.7154, "step": 2428 }, { "epoch": 3.93, "learning_rate": 2.3530483820241655e-07, "loss": 5.7811, "step": 2430 }, { "epoch": 3.93, "learning_rate": 2.339555568810221e-07, "loss": 6.1055, "step": 2432 }, { "epoch": 3.93, "learning_rate": 2.3260964253591898e-07, "loss": 6.0643, "step": 2434 }, { "epoch": 3.94, "learning_rate": 2.3126710108277148e-07, "loss": 5.8521, "step": 2436 }, { "epoch": 3.94, "learning_rate": 2.29927938422419e-07, "loss": 6.3257, "step": 2438 }, { "epoch": 3.94, "learning_rate": 2.2859216044085017e-07, "loss": 6.2957, "step": 2440 }, { "epoch": 3.95, "learning_rate": 2.2725977300917687e-07, "loss": 6.2473, "step": 2442 }, { "epoch": 3.95, "learning_rate": 2.2593078198360927e-07, "loss": 5.7792, "step": 2444 }, { "epoch": 3.95, "learning_rate": 2.2460519320542881e-07, "loss": 5.0003, "step": 2446 }, { "epoch": 3.96, "learning_rate": 2.2328301250096326e-07, "loss": 5.6349, "step": 2448 }, { "epoch": 3.96, "learning_rate": 2.219642456815607e-07, "loss": 5.7349, "step": 2450 }, { "epoch": 3.96, "learning_rate": 2.206488985435645e-07, "loss": 5.8819, "step": 2452 }, { "epoch": 3.97, "learning_rate": 2.1933697686828767e-07, "loss": 5.5308, "step": 2454 }, { "epoch": 3.97, "learning_rate": 2.180284864219869e-07, "loss": 5.892, "step": 2456 }, { "epoch": 3.97, "learning_rate": 2.1672343295583873e-07, "loss": 5.7391, "step": 2458 }, { "epoch": 3.98, "learning_rate": 2.154218222059122e-07, "loss": 5.812, "step": 2460 }, { "epoch": 3.98, "learning_rate": 2.1412365989314508e-07, "loss": 5.4554, "step": 2462 }, { "epoch": 3.98, "learning_rate": 2.1282895172331816e-07, "loss": 5.7584, "step": 2464 }, { "epoch": 3.99, "learning_rate": 2.1153770338703048e-07, "loss": 5.6391, "step": 2466 }, { "epoch": 3.99, "learning_rate": 2.102499205596743e-07, "loss": 6.1335, "step": 2468 }, { "epoch": 3.99, "learning_rate": 2.0896560890140913e-07, "loss": 6.1353, "step": 2470 }, { "epoch": 4.0, "learning_rate": 2.076847740571387e-07, "loss": 5.8624, "step": 2472 }, { "epoch": 4.0, "learning_rate": 2.0640742165648518e-07, "loss": 5.3207, "step": 2474 }, { "epoch": 4.0, "learning_rate": 2.0513355731376392e-07, "loss": 5.5816, "step": 2476 }, { "epoch": 4.0, "learning_rate": 2.0386318662795954e-07, "loss": 6.1561, "step": 2478 }, { "epoch": 4.01, "learning_rate": 2.0259631518270104e-07, "loss": 6.2594, "step": 2480 }, { "epoch": 4.01, "learning_rate": 2.013329485462374e-07, "loss": 5.812, "step": 2482 }, { "epoch": 4.01, "learning_rate": 2.000730922714128e-07, "loss": 6.2848, "step": 2484 }, { "epoch": 4.02, "learning_rate": 1.988167518956425e-07, "loss": 6.1702, "step": 2486 }, { "epoch": 4.02, "learning_rate": 1.975639329408887e-07, "loss": 5.9088, "step": 2488 }, { "epoch": 4.02, "learning_rate": 1.9631464091363537e-07, "loss": 5.8951, "step": 2490 }, { "epoch": 4.03, "learning_rate": 1.950688813048652e-07, "loss": 5.9474, "step": 2492 }, { "epoch": 4.03, "learning_rate": 1.9382665959003475e-07, "loss": 6.1231, "step": 2494 }, { "epoch": 4.03, "learning_rate": 1.9258798122905061e-07, "loss": 5.8997, "step": 2496 }, { "epoch": 4.04, "learning_rate": 1.9135285166624514e-07, "loss": 5.191, "step": 2498 }, { "epoch": 4.04, "learning_rate": 1.9012127633035302e-07, "loss": 6.1011, "step": 2500 }, { "epoch": 4.04, "learning_rate": 1.8889326063448696e-07, "loss": 6.0884, "step": 2502 }, { "epoch": 4.05, "learning_rate": 1.8766880997611424e-07, "loss": 5.8573, "step": 2504 }, { "epoch": 4.05, "learning_rate": 1.864479297370325e-07, "loss": 5.6426, "step": 2506 }, { "epoch": 4.05, "learning_rate": 1.8523062528334688e-07, "loss": 5.9523, "step": 2508 }, { "epoch": 4.06, "learning_rate": 1.840169019654455e-07, "loss": 6.0296, "step": 2510 }, { "epoch": 4.06, "learning_rate": 1.8280676511797665e-07, "loss": 5.7186, "step": 2512 }, { "epoch": 4.06, "learning_rate": 1.816002200598251e-07, "loss": 5.8323, "step": 2514 }, { "epoch": 4.07, "learning_rate": 1.803972720940884e-07, "loss": 5.6306, "step": 2516 }, { "epoch": 4.07, "learning_rate": 1.7919792650805455e-07, "loss": 5.6633, "step": 2518 }, { "epoch": 4.07, "learning_rate": 1.780021885731774e-07, "loss": 6.2717, "step": 2520 }, { "epoch": 4.08, "learning_rate": 1.768100635450549e-07, "loss": 6.0222, "step": 2522 }, { "epoch": 4.08, "learning_rate": 1.756215566634043e-07, "loss": 6.0548, "step": 2524 }, { "epoch": 4.08, "learning_rate": 1.744366731520408e-07, "loss": 5.9844, "step": 2526 }, { "epoch": 4.09, "learning_rate": 1.732554182188538e-07, "loss": 6.0736, "step": 2528 }, { "epoch": 4.09, "learning_rate": 1.7207779705578373e-07, "loss": 6.1283, "step": 2530 }, { "epoch": 4.09, "learning_rate": 1.7090381483880068e-07, "loss": 6.0131, "step": 2532 }, { "epoch": 4.1, "learning_rate": 1.697334767278792e-07, "loss": 5.7743, "step": 2534 }, { "epoch": 4.1, "learning_rate": 1.6856678786697777e-07, "loss": 5.8014, "step": 2536 }, { "epoch": 4.1, "learning_rate": 1.6740375338401524e-07, "loss": 5.9811, "step": 2538 }, { "epoch": 4.11, "learning_rate": 1.662443783908486e-07, "loss": 6.236, "step": 2540 }, { "epoch": 4.11, "learning_rate": 1.6508866798324983e-07, "loss": 5.6924, "step": 2542 }, { "epoch": 4.11, "learning_rate": 1.6393662724088475e-07, "loss": 5.6433, "step": 2544 }, { "epoch": 4.11, "learning_rate": 1.6278826122728928e-07, "loss": 6.1821, "step": 2546 }, { "epoch": 4.12, "learning_rate": 1.6164357498984893e-07, "loss": 6.1518, "step": 2548 }, { "epoch": 4.12, "learning_rate": 1.605025735597746e-07, "loss": 6.4069, "step": 2550 }, { "epoch": 4.12, "learning_rate": 1.5936526195208189e-07, "loss": 6.1081, "step": 2552 }, { "epoch": 4.13, "learning_rate": 1.582316451655684e-07, "loss": 6.1456, "step": 2554 }, { "epoch": 4.13, "learning_rate": 1.5710172818279222e-07, "loss": 5.9343, "step": 2556 }, { "epoch": 4.13, "learning_rate": 1.5597551597004964e-07, "loss": 6.0851, "step": 2558 }, { "epoch": 4.14, "learning_rate": 1.5485301347735348e-07, "loss": 5.8427, "step": 2560 }, { "epoch": 4.14, "learning_rate": 1.5373422563841131e-07, "loss": 6.1268, "step": 2562 }, { "epoch": 4.14, "learning_rate": 1.5261915737060382e-07, "loss": 6.5334, "step": 2564 }, { "epoch": 4.15, "learning_rate": 1.5150781357496312e-07, "loss": 5.6949, "step": 2566 }, { "epoch": 4.15, "learning_rate": 1.504001991361512e-07, "loss": 6.3782, "step": 2568 }, { "epoch": 4.15, "learning_rate": 1.4929631892243855e-07, "loss": 6.0292, "step": 2570 }, { "epoch": 4.16, "learning_rate": 1.4819617778568282e-07, "loss": 6.235, "step": 2572 }, { "epoch": 4.16, "learning_rate": 1.4709978056130712e-07, "loss": 6.035, "step": 2574 }, { "epoch": 4.16, "learning_rate": 1.460071320682793e-07, "loss": 5.9589, "step": 2576 }, { "epoch": 4.17, "learning_rate": 1.4491823710909045e-07, "loss": 6.0054, "step": 2578 }, { "epoch": 4.17, "learning_rate": 1.4383310046973362e-07, "loss": 6.1784, "step": 2580 }, { "epoch": 4.17, "learning_rate": 1.427517269196833e-07, "loss": 5.0114, "step": 2582 }, { "epoch": 4.18, "learning_rate": 1.4167412121187406e-07, "loss": 5.903, "step": 2584 }, { "epoch": 4.18, "learning_rate": 1.4060028808267964e-07, "loss": 5.6401, "step": 2586 }, { "epoch": 4.18, "learning_rate": 1.3953023225189243e-07, "loss": 5.4205, "step": 2588 }, { "epoch": 4.19, "learning_rate": 1.384639584227023e-07, "loss": 6.2168, "step": 2590 }, { "epoch": 4.19, "learning_rate": 1.3740147128167677e-07, "loss": 6.3063, "step": 2592 }, { "epoch": 4.19, "learning_rate": 1.363427754987395e-07, "loss": 6.0987, "step": 2594 }, { "epoch": 4.2, "learning_rate": 1.352878757271495e-07, "loss": 5.9746, "step": 2596 }, { "epoch": 4.2, "learning_rate": 1.342367766034821e-07, "loss": 5.6164, "step": 2598 }, { "epoch": 4.2, "learning_rate": 1.3318948274760734e-07, "loss": 5.333, "step": 2600 }, { "epoch": 4.21, "learning_rate": 1.3214599876266996e-07, "loss": 5.9316, "step": 2602 }, { "epoch": 4.21, "learning_rate": 1.311063292350696e-07, "loss": 5.9, "step": 2604 }, { "epoch": 4.21, "learning_rate": 1.3007047873444034e-07, "loss": 5.6936, "step": 2606 }, { "epoch": 4.21, "learning_rate": 1.2903845181363017e-07, "loss": 5.7382, "step": 2608 }, { "epoch": 4.22, "learning_rate": 1.2801025300868162e-07, "loss": 5.9571, "step": 2610 }, { "epoch": 4.22, "learning_rate": 1.2698588683881184e-07, "loss": 5.3818, "step": 2612 }, { "epoch": 4.22, "learning_rate": 1.2596535780639218e-07, "loss": 6.2568, "step": 2614 }, { "epoch": 4.23, "learning_rate": 1.2494867039692846e-07, "loss": 5.8648, "step": 2616 }, { "epoch": 4.23, "learning_rate": 1.2393582907904199e-07, "loss": 6.2207, "step": 2618 }, { "epoch": 4.23, "learning_rate": 1.2292683830444915e-07, "loss": 5.7402, "step": 2620 }, { "epoch": 4.24, "learning_rate": 1.2192170250794276e-07, "loss": 5.8071, "step": 2622 }, { "epoch": 4.24, "learning_rate": 1.2092042610737107e-07, "loss": 6.3271, "step": 2624 }, { "epoch": 4.24, "learning_rate": 1.1992301350361977e-07, "loss": 6.1244, "step": 2626 }, { "epoch": 4.25, "learning_rate": 1.1892946908059188e-07, "loss": 5.9335, "step": 2628 }, { "epoch": 4.25, "learning_rate": 1.1793979720518865e-07, "loss": 6.1779, "step": 2630 }, { "epoch": 4.25, "learning_rate": 1.1695400222729057e-07, "loss": 5.7196, "step": 2632 }, { "epoch": 4.26, "learning_rate": 1.1597208847973816e-07, "loss": 5.7874, "step": 2634 }, { "epoch": 4.26, "learning_rate": 1.149940602783126e-07, "loss": 6.3751, "step": 2636 }, { "epoch": 4.26, "learning_rate": 1.1401992192171739e-07, "loss": 5.3379, "step": 2638 }, { "epoch": 4.27, "learning_rate": 1.130496776915586e-07, "loss": 5.9969, "step": 2640 }, { "epoch": 4.27, "learning_rate": 1.120833318523271e-07, "loss": 5.9765, "step": 2642 }, { "epoch": 4.27, "learning_rate": 1.111208886513787e-07, "loss": 5.4064, "step": 2644 }, { "epoch": 4.28, "learning_rate": 1.1016235231891657e-07, "loss": 6.1345, "step": 2646 }, { "epoch": 4.28, "learning_rate": 1.0920772706797165e-07, "loss": 6.2301, "step": 2648 }, { "epoch": 4.28, "learning_rate": 1.0825701709438506e-07, "loss": 6.2183, "step": 2650 }, { "epoch": 4.29, "learning_rate": 1.0731022657678867e-07, "loss": 6.2923, "step": 2652 }, { "epoch": 4.29, "learning_rate": 1.0636735967658784e-07, "loss": 5.9882, "step": 2654 }, { "epoch": 4.29, "learning_rate": 1.0542842053794198e-07, "loss": 6.1671, "step": 2656 }, { "epoch": 4.3, "learning_rate": 1.0449341328774741e-07, "loss": 6.11, "step": 2658 }, { "epoch": 4.3, "learning_rate": 1.0356234203561831e-07, "loss": 5.9462, "step": 2660 }, { "epoch": 4.3, "learning_rate": 1.026352108738694e-07, "loss": 6.0572, "step": 2662 }, { "epoch": 4.31, "learning_rate": 1.0171202387749722e-07, "loss": 6.0026, "step": 2664 }, { "epoch": 4.31, "learning_rate": 1.0079278510416312e-07, "loss": 5.2601, "step": 2666 }, { "epoch": 4.31, "learning_rate": 9.987749859417483e-08, "loss": 6.1281, "step": 2668 }, { "epoch": 4.32, "learning_rate": 9.896616837046811e-08, "loss": 5.5855, "step": 2670 }, { "epoch": 4.32, "learning_rate": 9.805879843859055e-08, "loss": 6.2328, "step": 2672 }, { "epoch": 4.32, "learning_rate": 9.715539278668283e-08, "loss": 6.3162, "step": 2674 }, { "epoch": 4.32, "learning_rate": 9.625595538546171e-08, "loss": 5.8745, "step": 2676 }, { "epoch": 4.33, "learning_rate": 9.536049018820191e-08, "loss": 6.216, "step": 2678 }, { "epoch": 4.33, "learning_rate": 9.446900113071999e-08, "loss": 6.1148, "step": 2680 }, { "epoch": 4.33, "learning_rate": 9.358149213135569e-08, "loss": 6.1663, "step": 2682 }, { "epoch": 4.34, "learning_rate": 9.269796709095556e-08, "loss": 6.1012, "step": 2684 }, { "epoch": 4.34, "learning_rate": 9.181842989285559e-08, "loss": 5.7841, "step": 2686 }, { "epoch": 4.34, "learning_rate": 9.094288440286368e-08, "loss": 6.1725, "step": 2688 }, { "epoch": 4.35, "learning_rate": 9.007133446924342e-08, "loss": 6.0184, "step": 2690 }, { "epoch": 4.35, "learning_rate": 8.92037839226969e-08, "loss": 6.0421, "step": 2692 }, { "epoch": 4.35, "learning_rate": 8.834023657634737e-08, "loss": 5.9721, "step": 2694 }, { "epoch": 4.36, "learning_rate": 8.748069622572385e-08, "loss": 6.104, "step": 2696 }, { "epoch": 4.36, "learning_rate": 8.662516664874254e-08, "loss": 6.0882, "step": 2698 }, { "epoch": 4.36, "learning_rate": 8.57736516056915e-08, "loss": 5.938, "step": 2700 }, { "epoch": 4.37, "learning_rate": 8.492615483921395e-08, "loss": 6.0638, "step": 2702 }, { "epoch": 4.37, "learning_rate": 8.408268007429153e-08, "loss": 6.2967, "step": 2704 }, { "epoch": 4.37, "learning_rate": 8.324323101822827e-08, "loss": 5.7991, "step": 2706 }, { "epoch": 4.38, "learning_rate": 8.240781136063346e-08, "loss": 5.8908, "step": 2708 }, { "epoch": 4.38, "learning_rate": 8.157642477340709e-08, "loss": 5.9115, "step": 2710 }, { "epoch": 4.38, "learning_rate": 8.074907491072202e-08, "loss": 6.4553, "step": 2712 }, { "epoch": 4.39, "learning_rate": 7.992576540900875e-08, "loss": 6.1312, "step": 2714 }, { "epoch": 4.39, "learning_rate": 7.910649988693907e-08, "loss": 5.9191, "step": 2716 }, { "epoch": 4.39, "learning_rate": 7.82912819454109e-08, "loss": 6.352, "step": 2718 }, { "epoch": 4.4, "learning_rate": 7.748011516753139e-08, "loss": 5.7628, "step": 2720 }, { "epoch": 4.4, "learning_rate": 7.667300311860192e-08, "loss": 5.9204, "step": 2722 }, { "epoch": 4.4, "learning_rate": 7.586994934610225e-08, "loss": 5.8677, "step": 2724 }, { "epoch": 4.41, "learning_rate": 7.507095737967495e-08, "loss": 6.1465, "step": 2726 }, { "epoch": 4.41, "learning_rate": 7.427603073110966e-08, "loss": 5.5993, "step": 2728 }, { "epoch": 4.41, "learning_rate": 7.348517289432799e-08, "loss": 6.0467, "step": 2730 }, { "epoch": 4.42, "learning_rate": 7.269838734536771e-08, "loss": 5.6803, "step": 2732 }, { "epoch": 4.42, "learning_rate": 7.191567754236827e-08, "loss": 5.7175, "step": 2734 }, { "epoch": 4.42, "learning_rate": 7.113704692555467e-08, "loss": 6.099, "step": 2736 }, { "epoch": 4.43, "learning_rate": 7.03624989172228e-08, "loss": 5.8287, "step": 2738 }, { "epoch": 4.43, "learning_rate": 6.959203692172489e-08, "loss": 6.0513, "step": 2740 }, { "epoch": 4.43, "learning_rate": 6.8825664325453e-08, "loss": 5.8603, "step": 2742 }, { "epoch": 4.43, "learning_rate": 6.806338449682614e-08, "loss": 5.7971, "step": 2744 }, { "epoch": 4.44, "learning_rate": 6.7305200786274e-08, "loss": 6.3585, "step": 2746 }, { "epoch": 4.44, "learning_rate": 6.65511165262227e-08, "loss": 5.9156, "step": 2748 }, { "epoch": 4.44, "learning_rate": 6.580113503108031e-08, "loss": 6.1018, "step": 2750 }, { "epoch": 4.45, "learning_rate": 6.50552595972218e-08, "loss": 6.4561, "step": 2752 }, { "epoch": 4.45, "learning_rate": 6.431349350297555e-08, "loss": 6.1506, "step": 2754 }, { "epoch": 4.45, "learning_rate": 6.35758400086076e-08, "loss": 6.0168, "step": 2756 }, { "epoch": 4.46, "learning_rate": 6.284230235630827e-08, "loss": 5.9695, "step": 2758 }, { "epoch": 4.46, "learning_rate": 6.211288377017754e-08, "loss": 6.1156, "step": 2760 }, { "epoch": 4.46, "learning_rate": 6.138758745621086e-08, "loss": 6.0857, "step": 2762 }, { "epoch": 4.47, "learning_rate": 6.066641660228522e-08, "loss": 6.2212, "step": 2764 }, { "epoch": 4.47, "learning_rate": 5.994937437814518e-08, "loss": 5.5856, "step": 2766 }, { "epoch": 4.47, "learning_rate": 5.923646393538906e-08, "loss": 5.8842, "step": 2768 }, { "epoch": 4.48, "learning_rate": 5.8527688407454254e-08, "loss": 6.084, "step": 2770 }, { "epoch": 4.48, "learning_rate": 5.78230509096046e-08, "loss": 6.1464, "step": 2772 }, { "epoch": 4.48, "learning_rate": 5.712255453891579e-08, "loss": 5.7051, "step": 2774 }, { "epoch": 4.49, "learning_rate": 5.642620237426243e-08, "loss": 6.2802, "step": 2776 }, { "epoch": 4.49, "learning_rate": 5.573399747630403e-08, "loss": 5.8334, "step": 2778 }, { "epoch": 4.49, "learning_rate": 5.5045942887471885e-08, "loss": 6.1747, "step": 2780 }, { "epoch": 4.5, "learning_rate": 5.436204163195479e-08, "loss": 6.1003, "step": 2782 }, { "epoch": 4.5, "learning_rate": 5.36822967156878e-08, "loss": 6.2131, "step": 2784 }, { "epoch": 4.5, "learning_rate": 5.30067111263367e-08, "loss": 5.2912, "step": 2786 }, { "epoch": 4.51, "learning_rate": 5.233528783328634e-08, "loss": 5.8937, "step": 2788 }, { "epoch": 4.51, "learning_rate": 5.166802978762696e-08, "loss": 5.1261, "step": 2790 }, { "epoch": 4.51, "learning_rate": 5.1004939922141274e-08, "loss": 6.0996, "step": 2792 }, { "epoch": 4.52, "learning_rate": 5.034602115129205e-08, "loss": 5.7728, "step": 2794 }, { "epoch": 4.52, "learning_rate": 4.969127637120862e-08, "loss": 6.1683, "step": 2796 }, { "epoch": 4.52, "learning_rate": 4.904070845967467e-08, "loss": 5.7688, "step": 2798 }, { "epoch": 4.53, "learning_rate": 4.839432027611534e-08, "loss": 6.2196, "step": 2800 }, { "epoch": 4.53, "learning_rate": 4.7752114661584685e-08, "loss": 5.9887, "step": 2802 }, { "epoch": 4.53, "learning_rate": 4.711409443875325e-08, "loss": 6.1383, "step": 2804 }, { "epoch": 4.53, "learning_rate": 4.648026241189562e-08, "loss": 5.973, "step": 2806 }, { "epoch": 4.54, "learning_rate": 4.585062136687812e-08, "loss": 5.6179, "step": 2808 }, { "epoch": 4.54, "learning_rate": 4.522517407114645e-08, "loss": 6.1717, "step": 2810 }, { "epoch": 4.54, "learning_rate": 4.460392327371376e-08, "loss": 5.5463, "step": 2812 }, { "epoch": 4.55, "learning_rate": 4.3986871705148586e-08, "loss": 5.1993, "step": 2814 }, { "epoch": 4.55, "learning_rate": 4.337402207756235e-08, "loss": 6.0538, "step": 2816 }, { "epoch": 4.55, "learning_rate": 4.276537708459782e-08, "loss": 5.9359, "step": 2818 }, { "epoch": 4.56, "learning_rate": 4.2160939401417516e-08, "loss": 6.2784, "step": 2820 }, { "epoch": 4.56, "learning_rate": 4.156071168469144e-08, "loss": 6.0201, "step": 2822 }, { "epoch": 4.56, "learning_rate": 4.096469657258572e-08, "loss": 6.1371, "step": 2824 }, { "epoch": 4.57, "learning_rate": 4.037289668475086e-08, "loss": 5.6743, "step": 2826 }, { "epoch": 4.57, "learning_rate": 3.97853146223105e-08, "loss": 6.4075, "step": 2828 }, { "epoch": 4.57, "learning_rate": 3.920195296784956e-08, "loss": 5.8316, "step": 2830 }, { "epoch": 4.58, "learning_rate": 3.862281428540315e-08, "loss": 5.4858, "step": 2832 }, { "epoch": 4.58, "learning_rate": 3.8047901120445315e-08, "loss": 5.7017, "step": 2834 }, { "epoch": 4.58, "learning_rate": 3.747721599987763e-08, "loss": 5.648, "step": 2836 }, { "epoch": 4.59, "learning_rate": 3.691076143201832e-08, "loss": 5.9246, "step": 2838 }, { "epoch": 4.59, "learning_rate": 3.634853990659126e-08, "loss": 6.1155, "step": 2840 }, { "epoch": 4.59, "learning_rate": 3.579055389471508e-08, "loss": 6.0291, "step": 2842 }, { "epoch": 4.6, "learning_rate": 3.523680584889188e-08, "loss": 5.8402, "step": 2844 }, { "epoch": 4.6, "learning_rate": 3.4687298202996654e-08, "loss": 6.3115, "step": 2846 }, { "epoch": 4.6, "learning_rate": 3.414203337226695e-08, "loss": 5.715, "step": 2848 }, { "epoch": 4.61, "learning_rate": 3.360101375329194e-08, "loss": 5.9762, "step": 2850 }, { "epoch": 4.61, "learning_rate": 3.3064241724001794e-08, "loss": 5.6715, "step": 2852 }, { "epoch": 4.61, "learning_rate": 3.253171964365731e-08, "loss": 6.0563, "step": 2854 }, { "epoch": 4.62, "learning_rate": 3.200344985283965e-08, "loss": 6.2123, "step": 2856 }, { "epoch": 4.62, "learning_rate": 3.147943467344016e-08, "loss": 5.653, "step": 2858 }, { "epoch": 4.62, "learning_rate": 3.0959676408649824e-08, "loss": 5.8813, "step": 2860 }, { "epoch": 4.63, "learning_rate": 3.0444177342949464e-08, "loss": 5.839, "step": 2862 }, { "epoch": 4.63, "learning_rate": 2.993293974209921e-08, "loss": 6.2093, "step": 2864 }, { "epoch": 4.63, "learning_rate": 2.9425965853129285e-08, "loss": 6.016, "step": 2866 }, { "epoch": 4.64, "learning_rate": 2.8923257904329478e-08, "loss": 6.3201, "step": 2868 }, { "epoch": 4.64, "learning_rate": 2.8424818105239777e-08, "loss": 6.1385, "step": 2870 }, { "epoch": 4.64, "learning_rate": 2.7930648646640186e-08, "loss": 5.7025, "step": 2872 }, { "epoch": 4.64, "learning_rate": 2.7440751700541607e-08, "loss": 6.1556, "step": 2874 }, { "epoch": 4.65, "learning_rate": 2.6955129420176193e-08, "loss": 6.0627, "step": 2876 }, { "epoch": 4.65, "learning_rate": 2.6473783939987448e-08, "loss": 6.0179, "step": 2878 }, { "epoch": 4.65, "learning_rate": 2.599671737562137e-08, "loss": 5.8187, "step": 2880 }, { "epoch": 4.66, "learning_rate": 2.5523931823916768e-08, "loss": 6.3774, "step": 2882 }, { "epoch": 4.66, "learning_rate": 2.505542936289651e-08, "loss": 5.7561, "step": 2884 }, { "epoch": 4.66, "learning_rate": 2.4591212051757958e-08, "loss": 5.9467, "step": 2886 }, { "epoch": 4.67, "learning_rate": 2.4131281930864e-08, "loss": 5.6536, "step": 2888 }, { "epoch": 4.67, "learning_rate": 2.3675641021734026e-08, "loss": 5.851, "step": 2890 }, { "epoch": 4.67, "learning_rate": 2.3224291327035404e-08, "loss": 6.0569, "step": 2892 }, { "epoch": 4.68, "learning_rate": 2.2777234830574476e-08, "loss": 6.06, "step": 2894 }, { "epoch": 4.68, "learning_rate": 2.2334473497287453e-08, "loss": 6.3221, "step": 2896 }, { "epoch": 4.68, "learning_rate": 2.189600927323243e-08, "loss": 5.5794, "step": 2898 }, { "epoch": 4.69, "learning_rate": 2.146184408558038e-08, "loss": 6.3434, "step": 2900 }, { "epoch": 4.69, "learning_rate": 2.1031979842606852e-08, "loss": 5.804, "step": 2902 }, { "epoch": 4.69, "learning_rate": 2.0606418433683824e-08, "loss": 5.9346, "step": 2904 }, { "epoch": 4.7, "learning_rate": 2.018516172927065e-08, "loss": 5.8539, "step": 2906 }, { "epoch": 4.7, "learning_rate": 1.9768211580906468e-08, "loss": 6.2849, "step": 2908 }, { "epoch": 4.7, "learning_rate": 1.9355569821202234e-08, "loss": 6.0772, "step": 2910 }, { "epoch": 4.71, "learning_rate": 1.8947238263832043e-08, "loss": 5.8683, "step": 2912 }, { "epoch": 4.71, "learning_rate": 1.8543218703525376e-08, "loss": 5.4705, "step": 2914 }, { "epoch": 4.71, "learning_rate": 1.8143512916059644e-08, "loss": 5.9988, "step": 2916 }, { "epoch": 4.72, "learning_rate": 1.7748122658251872e-08, "loss": 5.9572, "step": 2918 }, { "epoch": 4.72, "learning_rate": 1.735704966795104e-08, "loss": 6.1466, "step": 2920 }, { "epoch": 4.72, "learning_rate": 1.697029566403074e-08, "loss": 5.9623, "step": 2922 }, { "epoch": 4.73, "learning_rate": 1.658786234638132e-08, "loss": 5.9641, "step": 2924 }, { "epoch": 4.73, "learning_rate": 1.6209751395902416e-08, "loss": 6.1721, "step": 2926 }, { "epoch": 4.73, "learning_rate": 1.5835964474495865e-08, "loss": 6.0639, "step": 2928 }, { "epoch": 4.74, "learning_rate": 1.5466503225058046e-08, "loss": 6.2786, "step": 2930 }, { "epoch": 4.74, "learning_rate": 1.5101369271472987e-08, "loss": 5.4821, "step": 2932 }, { "epoch": 4.74, "learning_rate": 1.4740564218605034e-08, "loss": 5.7989, "step": 2934 }, { "epoch": 4.75, "learning_rate": 1.4384089652291543e-08, "loss": 6.0321, "step": 2936 }, { "epoch": 4.75, "learning_rate": 1.4031947139336641e-08, "loss": 5.9969, "step": 2938 }, { "epoch": 4.75, "learning_rate": 1.3684138227503472e-08, "loss": 6.1363, "step": 2940 }, { "epoch": 4.75, "learning_rate": 1.3340664445507966e-08, "loss": 6.1536, "step": 2942 }, { "epoch": 4.76, "learning_rate": 1.3001527303012183e-08, "loss": 6.0459, "step": 2944 }, { "epoch": 4.76, "learning_rate": 1.2666728290617212e-08, "loss": 5.8207, "step": 2946 }, { "epoch": 4.76, "learning_rate": 1.2336268879856726e-08, "loss": 6.0452, "step": 2948 }, { "epoch": 4.77, "learning_rate": 1.2010150523190988e-08, "loss": 5.9519, "step": 2950 }, { "epoch": 4.77, "learning_rate": 1.1688374654000076e-08, "loss": 5.9094, "step": 2952 }, { "epoch": 4.77, "learning_rate": 1.1370942686577345e-08, "loss": 5.9469, "step": 2954 }, { "epoch": 4.78, "learning_rate": 1.1057856016123857e-08, "loss": 5.5568, "step": 2956 }, { "epoch": 4.78, "learning_rate": 1.0749116018741621e-08, "loss": 6.0991, "step": 2958 }, { "epoch": 4.78, "learning_rate": 1.0444724051428155e-08, "loss": 6.1865, "step": 2960 }, { "epoch": 4.79, "learning_rate": 1.0144681452069703e-08, "loss": 5.8771, "step": 2962 }, { "epoch": 4.79, "learning_rate": 9.84898953943636e-09, "loss": 6.2052, "step": 2964 }, { "epoch": 4.79, "learning_rate": 9.5576496131754e-09, "loss": 5.171, "step": 2966 }, { "epoch": 4.8, "learning_rate": 9.270662953806186e-09, "loss": 6.3066, "step": 2968 }, { "epoch": 4.8, "learning_rate": 8.988030822713821e-09, "loss": 5.9792, "step": 2970 }, { "epoch": 4.8, "learning_rate": 8.709754462144615e-09, "loss": 5.3831, "step": 2972 }, { "epoch": 4.81, "learning_rate": 8.435835095199628e-09, "loss": 5.7783, "step": 2974 }, { "epoch": 4.81, "learning_rate": 8.166273925830135e-09, "loss": 6.1661, "step": 2976 }, { "epoch": 4.81, "learning_rate": 7.90107213883151e-09, "loss": 5.8294, "step": 2978 }, { "epoch": 4.82, "learning_rate": 7.640230899838784e-09, "loss": 5.9972, "step": 2980 }, { "epoch": 4.82, "learning_rate": 7.3837513553209885e-09, "loss": 6.2389, "step": 2982 }, { "epoch": 4.82, "learning_rate": 7.131634632576267e-09, "loss": 5.7572, "step": 2984 }, { "epoch": 4.83, "learning_rate": 6.883881839727101e-09, "loss": 5.4765, "step": 2986 }, { "epoch": 4.83, "learning_rate": 6.640494065715207e-09, "loss": 5.7902, "step": 2988 }, { "epoch": 4.83, "learning_rate": 6.40147238029709e-09, "loss": 6.2533, "step": 2990 }, { "epoch": 4.84, "learning_rate": 6.166817834038607e-09, "loss": 5.929, "step": 2992 }, { "epoch": 4.84, "learning_rate": 5.936531458311189e-09, "loss": 6.0872, "step": 2994 }, { "epoch": 4.84, "learning_rate": 5.710614265287073e-09, "loss": 5.8763, "step": 2996 }, { "epoch": 4.85, "learning_rate": 5.489067247934298e-09, "loss": 6.1908, "step": 2998 }, { "epoch": 4.85, "learning_rate": 5.27189138001316e-09, "loss": 5.9865, "step": 3000 }, { "epoch": 4.85, "learning_rate": 5.059087616071212e-09, "loss": 5.8444, "step": 3002 }, { "epoch": 4.85, "learning_rate": 4.850656891439819e-09, "loss": 5.8512, "step": 3004 }, { "epoch": 4.86, "learning_rate": 4.646600122229283e-09, "loss": 6.4206, "step": 3006 }, { "epoch": 4.86, "learning_rate": 4.446918205325389e-09, "loss": 6.4128, "step": 3008 }, { "epoch": 4.86, "learning_rate": 4.251612018385087e-09, "loss": 5.4091, "step": 3010 }, { "epoch": 4.87, "learning_rate": 4.060682419832928e-09, "loss": 6.5193, "step": 3012 }, { "epoch": 4.87, "learning_rate": 3.874130248857077e-09, "loss": 6.1928, "step": 3014 }, { "epoch": 4.87, "learning_rate": 3.691956325405643e-09, "loss": 5.8905, "step": 3016 }, { "epoch": 4.88, "learning_rate": 3.5141614501831285e-09, "loss": 6.2318, "step": 3018 }, { "epoch": 4.88, "learning_rate": 3.340746404647099e-09, "loss": 5.7658, "step": 3020 }, { "epoch": 4.88, "learning_rate": 3.1717119510044076e-09, "loss": 5.5051, "step": 3022 }, { "epoch": 4.89, "learning_rate": 3.007058832207976e-09, "loss": 5.8644, "step": 3024 }, { "epoch": 4.89, "learning_rate": 2.8467877719535736e-09, "loss": 6.4457, "step": 3026 }, { "epoch": 4.89, "learning_rate": 2.690899474676822e-09, "loss": 5.5942, "step": 3028 }, { "epoch": 4.9, "learning_rate": 2.5393946255495293e-09, "loss": 6.0458, "step": 3030 }, { "epoch": 4.9, "learning_rate": 2.3922738904773587e-09, "loss": 6.0967, "step": 3032 }, { "epoch": 4.9, "learning_rate": 2.249537916096389e-09, "loss": 6.1345, "step": 3034 }, { "epoch": 4.91, "learning_rate": 2.1111873297706695e-09, "loss": 5.7031, "step": 3036 }, { "epoch": 4.91, "learning_rate": 1.9772227395888905e-09, "loss": 6.0675, "step": 3038 }, { "epoch": 4.91, "learning_rate": 1.8476447343624968e-09, "loss": 5.5652, "step": 3040 }, { "epoch": 4.92, "learning_rate": 1.7224538836223545e-09, "loss": 6.0337, "step": 3042 }, { "epoch": 4.92, "learning_rate": 1.6016507376169774e-09, "loss": 6.1635, "step": 3044 }, { "epoch": 4.92, "learning_rate": 1.4852358273091947e-09, "loss": 5.2145, "step": 3046 }, { "epoch": 4.93, "learning_rate": 1.3732096643747082e-09, "loss": 5.837, "step": 3048 }, { "epoch": 4.93, "learning_rate": 1.2655727411994276e-09, "loss": 6.4039, "step": 3050 }, { "epoch": 4.93, "learning_rate": 1.1623255308772507e-09, "loss": 6.0581, "step": 3052 }, { "epoch": 4.94, "learning_rate": 1.0634684872079525e-09, "loss": 5.8881, "step": 3054 }, { "epoch": 4.94, "learning_rate": 9.690020446956327e-10, "loss": 5.9365, "step": 3056 }, { "epoch": 4.94, "learning_rate": 8.789266185461608e-10, "loss": 6.265, "step": 3058 }, { "epoch": 4.95, "learning_rate": 7.932426046660667e-10, "loss": 6.209, "step": 3060 }, { "epoch": 4.95, "learning_rate": 7.119503796599868e-10, "loss": 5.9455, "step": 3062 }, { "epoch": 4.95, "learning_rate": 6.350503008296648e-10, "loss": 6.1457, "step": 3064 }, { "epoch": 4.96, "learning_rate": 5.625427061722865e-10, "loss": 5.8044, "step": 3066 }, { "epoch": 4.96, "learning_rate": 4.944279143784813e-10, "loss": 6.093, "step": 3068 }, { "epoch": 4.96, "learning_rate": 4.3070622483165617e-10, "loss": 6.2533, "step": 3070 }, { "epoch": 4.96, "learning_rate": 3.7137791760610824e-10, "loss": 6.0355, "step": 3072 }, { "epoch": 4.97, "learning_rate": 3.1644325346624757e-10, "loss": 5.6227, "step": 3074 }, { "epoch": 4.97, "learning_rate": 2.659024738648208e-10, "loss": 6.157, "step": 3076 }, { "epoch": 4.97, "learning_rate": 2.1975580094257818e-10, "loss": 5.9123, "step": 3078 }, { "epoch": 4.98, "learning_rate": 1.7800343752683021e-10, "loss": 5.2241, "step": 3080 }, { "epoch": 4.98, "learning_rate": 1.406455671308926e-10, "loss": 6.0805, "step": 3082 }, { "epoch": 4.98, "learning_rate": 1.076823539526428e-10, "loss": 5.9766, "step": 3084 }, { "epoch": 4.99, "learning_rate": 7.911394287452022e-11, "loss": 5.9986, "step": 3086 }, { "epoch": 4.99, "learning_rate": 5.494045946263792e-11, "loss": 6.1283, "step": 3088 }, { "epoch": 4.99, "learning_rate": 3.5162009966227535e-11, "loss": 5.881, "step": 3090 }, { "epoch": 4.99, "step": 3090, "total_flos": 1.1758946324001587e+17, "train_loss": 6.616625037085277, "train_runtime": 22066.2449, "train_samples_per_second": 8.973, "train_steps_per_second": 0.14 } ], "logging_steps": 2, "max_steps": 3090, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 4000, "total_flos": 1.1758946324001587e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }