TienAnh's picture
Model save
41648f1 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.993939393939394,
"eval_steps": 500,
"global_step": 3090,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0,
"loss": 15.9424,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 0.0,
"loss": 16.295,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 4.3010752688172045e-08,
"loss": 15.4463,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 8.602150537634409e-08,
"loss": 16.1327,
"step": 8
},
{
"epoch": 0.02,
"learning_rate": 1.0752688172043011e-07,
"loss": 14.9217,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 1.5053763440860215e-07,
"loss": 15.1591,
"step": 12
},
{
"epoch": 0.02,
"learning_rate": 1.9354838709677418e-07,
"loss": 15.6472,
"step": 14
},
{
"epoch": 0.03,
"learning_rate": 2.3655913978494625e-07,
"loss": 16.3197,
"step": 16
},
{
"epoch": 0.03,
"learning_rate": 2.7956989247311823e-07,
"loss": 16.4411,
"step": 18
},
{
"epoch": 0.03,
"learning_rate": 3.225806451612903e-07,
"loss": 16.096,
"step": 20
},
{
"epoch": 0.04,
"learning_rate": 3.6559139784946236e-07,
"loss": 15.8512,
"step": 22
},
{
"epoch": 0.04,
"learning_rate": 4.0860215053763443e-07,
"loss": 15.4195,
"step": 24
},
{
"epoch": 0.04,
"learning_rate": 4.5161290322580644e-07,
"loss": 15.002,
"step": 26
},
{
"epoch": 0.05,
"learning_rate": 4.946236559139784e-07,
"loss": 15.2072,
"step": 28
},
{
"epoch": 0.05,
"learning_rate": 5.376344086021505e-07,
"loss": 15.1285,
"step": 30
},
{
"epoch": 0.05,
"learning_rate": 5.806451612903226e-07,
"loss": 15.4741,
"step": 32
},
{
"epoch": 0.05,
"learning_rate": 6.236559139784946e-07,
"loss": 15.239,
"step": 34
},
{
"epoch": 0.06,
"learning_rate": 6.666666666666666e-07,
"loss": 15.074,
"step": 36
},
{
"epoch": 0.06,
"learning_rate": 7.096774193548387e-07,
"loss": 14.9462,
"step": 38
},
{
"epoch": 0.06,
"learning_rate": 7.526881720430107e-07,
"loss": 15.4049,
"step": 40
},
{
"epoch": 0.07,
"learning_rate": 7.956989247311827e-07,
"loss": 15.2125,
"step": 42
},
{
"epoch": 0.07,
"learning_rate": 8.387096774193549e-07,
"loss": 14.8559,
"step": 44
},
{
"epoch": 0.07,
"learning_rate": 8.817204301075269e-07,
"loss": 14.7259,
"step": 46
},
{
"epoch": 0.08,
"learning_rate": 9.247311827956989e-07,
"loss": 15.2511,
"step": 48
},
{
"epoch": 0.08,
"learning_rate": 9.67741935483871e-07,
"loss": 14.5694,
"step": 50
},
{
"epoch": 0.08,
"learning_rate": 1.010752688172043e-06,
"loss": 14.0111,
"step": 52
},
{
"epoch": 0.09,
"learning_rate": 1.053763440860215e-06,
"loss": 14.6142,
"step": 54
},
{
"epoch": 0.09,
"learning_rate": 1.096774193548387e-06,
"loss": 13.0768,
"step": 56
},
{
"epoch": 0.09,
"learning_rate": 1.1397849462365593e-06,
"loss": 13.8173,
"step": 58
},
{
"epoch": 0.1,
"learning_rate": 1.1827956989247313e-06,
"loss": 13.8097,
"step": 60
},
{
"epoch": 0.1,
"learning_rate": 1.2258064516129033e-06,
"loss": 14.053,
"step": 62
},
{
"epoch": 0.1,
"learning_rate": 1.2688172043010753e-06,
"loss": 14.0456,
"step": 64
},
{
"epoch": 0.11,
"learning_rate": 1.3118279569892473e-06,
"loss": 13.4909,
"step": 66
},
{
"epoch": 0.11,
"learning_rate": 1.354838709677419e-06,
"loss": 14.0707,
"step": 68
},
{
"epoch": 0.11,
"learning_rate": 1.3978494623655913e-06,
"loss": 12.4678,
"step": 70
},
{
"epoch": 0.12,
"learning_rate": 1.4408602150537633e-06,
"loss": 12.8635,
"step": 72
},
{
"epoch": 0.12,
"learning_rate": 1.4838709677419353e-06,
"loss": 12.8816,
"step": 74
},
{
"epoch": 0.12,
"learning_rate": 1.5268817204301074e-06,
"loss": 11.6928,
"step": 76
},
{
"epoch": 0.13,
"learning_rate": 1.5698924731182794e-06,
"loss": 12.3063,
"step": 78
},
{
"epoch": 0.13,
"learning_rate": 1.6129032258064514e-06,
"loss": 11.9676,
"step": 80
},
{
"epoch": 0.13,
"learning_rate": 1.6559139784946236e-06,
"loss": 12.5602,
"step": 82
},
{
"epoch": 0.14,
"learning_rate": 1.6989247311827956e-06,
"loss": 11.1249,
"step": 84
},
{
"epoch": 0.14,
"learning_rate": 1.7419354838709676e-06,
"loss": 12.8276,
"step": 86
},
{
"epoch": 0.14,
"learning_rate": 1.7849462365591396e-06,
"loss": 11.7053,
"step": 88
},
{
"epoch": 0.15,
"learning_rate": 1.8279569892473117e-06,
"loss": 11.9643,
"step": 90
},
{
"epoch": 0.15,
"learning_rate": 1.8709677419354837e-06,
"loss": 11.5187,
"step": 92
},
{
"epoch": 0.15,
"learning_rate": 1.913978494623656e-06,
"loss": 10.6268,
"step": 94
},
{
"epoch": 0.16,
"learning_rate": 1.956989247311828e-06,
"loss": 10.5529,
"step": 96
},
{
"epoch": 0.16,
"learning_rate": 1.978494623655914e-06,
"loss": 10.9569,
"step": 98
},
{
"epoch": 0.16,
"learning_rate": 1.9999994505904246e-06,
"loss": 10.389,
"step": 100
},
{
"epoch": 0.16,
"learning_rate": 1.999995055317446e-06,
"loss": 11.2734,
"step": 102
},
{
"epoch": 0.17,
"learning_rate": 1.9999862647908065e-06,
"loss": 10.9547,
"step": 104
},
{
"epoch": 0.17,
"learning_rate": 1.999973079049143e-06,
"loss": 10.8081,
"step": 106
},
{
"epoch": 0.17,
"learning_rate": 1.999955498150411e-06,
"loss": 9.9185,
"step": 108
},
{
"epoch": 0.18,
"learning_rate": 1.999933522171883e-06,
"loss": 10.0234,
"step": 110
},
{
"epoch": 0.18,
"learning_rate": 1.9999071512101496e-06,
"loss": 9.7738,
"step": 112
},
{
"epoch": 0.18,
"learning_rate": 1.999876385381118e-06,
"loss": 10.5572,
"step": 114
},
{
"epoch": 0.19,
"learning_rate": 1.999841224820014e-06,
"loss": 10.809,
"step": 116
},
{
"epoch": 0.19,
"learning_rate": 1.9998016696813757e-06,
"loss": 9.3994,
"step": 118
},
{
"epoch": 0.19,
"learning_rate": 1.9997577201390606e-06,
"loss": 10.4461,
"step": 120
},
{
"epoch": 0.2,
"learning_rate": 1.9997093763862384e-06,
"loss": 10.3454,
"step": 122
},
{
"epoch": 0.2,
"learning_rate": 1.999656638635393e-06,
"loss": 9.2653,
"step": 124
},
{
"epoch": 0.2,
"learning_rate": 1.9995995071183215e-06,
"loss": 8.1009,
"step": 126
},
{
"epoch": 0.21,
"learning_rate": 1.999537982086133e-06,
"loss": 8.6041,
"step": 128
},
{
"epoch": 0.21,
"learning_rate": 1.9994720638092465e-06,
"loss": 9.7334,
"step": 130
},
{
"epoch": 0.21,
"learning_rate": 1.9994017525773913e-06,
"loss": 10.2584,
"step": 132
},
{
"epoch": 0.22,
"learning_rate": 1.9993270486996043e-06,
"loss": 10.2385,
"step": 134
},
{
"epoch": 0.22,
"learning_rate": 1.99924795250423e-06,
"loss": 9.471,
"step": 136
},
{
"epoch": 0.22,
"learning_rate": 1.999164464338918e-06,
"loss": 9.7777,
"step": 138
},
{
"epoch": 0.23,
"learning_rate": 1.999076584570621e-06,
"loss": 9.7583,
"step": 140
},
{
"epoch": 0.23,
"learning_rate": 1.9989843135855954e-06,
"loss": 9.3903,
"step": 142
},
{
"epoch": 0.23,
"learning_rate": 1.9988876517893977e-06,
"loss": 9.416,
"step": 144
},
{
"epoch": 0.24,
"learning_rate": 1.998786599606883e-06,
"loss": 9.2708,
"step": 146
},
{
"epoch": 0.24,
"learning_rate": 1.9986811574822033e-06,
"loss": 9.844,
"step": 148
},
{
"epoch": 0.24,
"learning_rate": 1.998571325878806e-06,
"loss": 9.6433,
"step": 150
},
{
"epoch": 0.25,
"learning_rate": 1.998457105279431e-06,
"loss": 9.2691,
"step": 152
},
{
"epoch": 0.25,
"learning_rate": 1.9983384961861095e-06,
"loss": 9.4914,
"step": 154
},
{
"epoch": 0.25,
"learning_rate": 1.9982154991201607e-06,
"loss": 9.2666,
"step": 156
},
{
"epoch": 0.26,
"learning_rate": 1.9980881146221913e-06,
"loss": 8.2851,
"step": 158
},
{
"epoch": 0.26,
"learning_rate": 1.997956343252091e-06,
"loss": 9.1172,
"step": 160
},
{
"epoch": 0.26,
"learning_rate": 1.9978201855890305e-06,
"loss": 9.1622,
"step": 162
},
{
"epoch": 0.27,
"learning_rate": 1.9976796422314615e-06,
"loss": 9.1079,
"step": 164
},
{
"epoch": 0.27,
"learning_rate": 1.9975347137971096e-06,
"loss": 9.4297,
"step": 166
},
{
"epoch": 0.27,
"learning_rate": 1.997385400922976e-06,
"loss": 8.043,
"step": 168
},
{
"epoch": 0.27,
"learning_rate": 1.9972317042653317e-06,
"loss": 8.6009,
"step": 170
},
{
"epoch": 0.28,
"learning_rate": 1.997073624499716e-06,
"loss": 9.0201,
"step": 172
},
{
"epoch": 0.28,
"learning_rate": 1.9969111623209322e-06,
"loss": 8.6173,
"step": 174
},
{
"epoch": 0.28,
"learning_rate": 1.9967443184430464e-06,
"loss": 9.2188,
"step": 176
},
{
"epoch": 0.29,
"learning_rate": 1.996573093599385e-06,
"loss": 8.8459,
"step": 178
},
{
"epoch": 0.29,
"learning_rate": 1.996397488542526e-06,
"loss": 8.8776,
"step": 180
},
{
"epoch": 0.29,
"learning_rate": 1.996217504044304e-06,
"loss": 8.771,
"step": 182
},
{
"epoch": 0.3,
"learning_rate": 1.9960331408957996e-06,
"loss": 9.1059,
"step": 184
},
{
"epoch": 0.3,
"learning_rate": 1.9958443999073394e-06,
"loss": 8.9898,
"step": 186
},
{
"epoch": 0.3,
"learning_rate": 1.9956512819084924e-06,
"loss": 8.3461,
"step": 188
},
{
"epoch": 0.31,
"learning_rate": 1.995453787748065e-06,
"loss": 8.4594,
"step": 190
},
{
"epoch": 0.31,
"learning_rate": 1.995251918294099e-06,
"loss": 8.8926,
"step": 192
},
{
"epoch": 0.31,
"learning_rate": 1.9950456744338658e-06,
"loss": 9.1989,
"step": 194
},
{
"epoch": 0.32,
"learning_rate": 1.9948350570738638e-06,
"loss": 7.9779,
"step": 196
},
{
"epoch": 0.32,
"learning_rate": 1.9946200671398146e-06,
"loss": 9.0904,
"step": 198
},
{
"epoch": 0.32,
"learning_rate": 1.9944007055766586e-06,
"loss": 8.3997,
"step": 200
},
{
"epoch": 0.33,
"learning_rate": 1.994176973348549e-06,
"loss": 8.5416,
"step": 202
},
{
"epoch": 0.33,
"learning_rate": 1.993948871438852e-06,
"loss": 8.6818,
"step": 204
},
{
"epoch": 0.33,
"learning_rate": 1.9937164008501376e-06,
"loss": 8.6807,
"step": 206
},
{
"epoch": 0.34,
"learning_rate": 1.993479562604178e-06,
"loss": 8.596,
"step": 208
},
{
"epoch": 0.34,
"learning_rate": 1.9932383577419428e-06,
"loss": 8.1365,
"step": 210
},
{
"epoch": 0.34,
"learning_rate": 1.9929927873235937e-06,
"loss": 8.8869,
"step": 212
},
{
"epoch": 0.35,
"learning_rate": 1.99274285242848e-06,
"loss": 8.7621,
"step": 214
},
{
"epoch": 0.35,
"learning_rate": 1.9924885541551347e-06,
"loss": 8.4504,
"step": 216
},
{
"epoch": 0.35,
"learning_rate": 1.992229893621269e-06,
"loss": 8.7773,
"step": 218
},
{
"epoch": 0.36,
"learning_rate": 1.9919668719637667e-06,
"loss": 8.6132,
"step": 220
},
{
"epoch": 0.36,
"learning_rate": 1.9916994903386806e-06,
"loss": 8.4847,
"step": 222
},
{
"epoch": 0.36,
"learning_rate": 1.9914277499212265e-06,
"loss": 8.4073,
"step": 224
},
{
"epoch": 0.37,
"learning_rate": 1.9911516519057786e-06,
"loss": 8.1962,
"step": 226
},
{
"epoch": 0.37,
"learning_rate": 1.9908711975058636e-06,
"loss": 8.3719,
"step": 228
},
{
"epoch": 0.37,
"learning_rate": 1.990586387954156e-06,
"loss": 8.566,
"step": 230
},
{
"epoch": 0.37,
"learning_rate": 1.9902972245024713e-06,
"loss": 8.2501,
"step": 232
},
{
"epoch": 0.38,
"learning_rate": 1.9900037084217634e-06,
"loss": 8.4245,
"step": 234
},
{
"epoch": 0.38,
"learning_rate": 1.9897058410021164e-06,
"loss": 8.5001,
"step": 236
},
{
"epoch": 0.38,
"learning_rate": 1.9894036235527395e-06,
"loss": 8.553,
"step": 238
},
{
"epoch": 0.39,
"learning_rate": 1.9890970574019616e-06,
"loss": 8.2188,
"step": 240
},
{
"epoch": 0.39,
"learning_rate": 1.9887861438972245e-06,
"loss": 8.5532,
"step": 242
},
{
"epoch": 0.39,
"learning_rate": 1.98847088440508e-06,
"loss": 8.3859,
"step": 244
},
{
"epoch": 0.4,
"learning_rate": 1.9881512803111793e-06,
"loss": 8.3388,
"step": 246
},
{
"epoch": 0.4,
"learning_rate": 1.9878273330202714e-06,
"loss": 8.2367,
"step": 248
},
{
"epoch": 0.4,
"learning_rate": 1.987499043956193e-06,
"loss": 8.383,
"step": 250
},
{
"epoch": 0.41,
"learning_rate": 1.9871664145618655e-06,
"loss": 8.6529,
"step": 252
},
{
"epoch": 0.41,
"learning_rate": 1.9868294462992865e-06,
"loss": 7.9699,
"step": 254
},
{
"epoch": 0.41,
"learning_rate": 1.9864881406495245e-06,
"loss": 7.9447,
"step": 256
},
{
"epoch": 0.42,
"learning_rate": 1.9861424991127113e-06,
"loss": 7.0923,
"step": 258
},
{
"epoch": 0.42,
"learning_rate": 1.9857925232080374e-06,
"loss": 8.0928,
"step": 260
},
{
"epoch": 0.42,
"learning_rate": 1.9854382144737426e-06,
"loss": 7.5764,
"step": 262
},
{
"epoch": 0.43,
"learning_rate": 1.9850795744671112e-06,
"loss": 7.6531,
"step": 264
},
{
"epoch": 0.43,
"learning_rate": 1.9847166047644658e-06,
"loss": 8.1521,
"step": 266
},
{
"epoch": 0.43,
"learning_rate": 1.9843493069611576e-06,
"loss": 7.9413,
"step": 268
},
{
"epoch": 0.44,
"learning_rate": 1.9839776826715613e-06,
"loss": 7.9654,
"step": 270
},
{
"epoch": 0.44,
"learning_rate": 1.983601733529069e-06,
"loss": 7.659,
"step": 272
},
{
"epoch": 0.44,
"learning_rate": 1.9832214611860793e-06,
"loss": 8.2267,
"step": 274
},
{
"epoch": 0.45,
"learning_rate": 1.9828368673139946e-06,
"loss": 7.1799,
"step": 276
},
{
"epoch": 0.45,
"learning_rate": 1.982447953603211e-06,
"loss": 8.1048,
"step": 278
},
{
"epoch": 0.45,
"learning_rate": 1.9820547217631115e-06,
"loss": 7.9811,
"step": 280
},
{
"epoch": 0.46,
"learning_rate": 1.981657173522058e-06,
"loss": 8.4555,
"step": 282
},
{
"epoch": 0.46,
"learning_rate": 1.9812553106273845e-06,
"loss": 7.7934,
"step": 284
},
{
"epoch": 0.46,
"learning_rate": 1.980849134845389e-06,
"loss": 8.1038,
"step": 286
},
{
"epoch": 0.47,
"learning_rate": 1.9804386479613267e-06,
"loss": 7.4383,
"step": 288
},
{
"epoch": 0.47,
"learning_rate": 1.9800238517793994e-06,
"loss": 7.7877,
"step": 290
},
{
"epoch": 0.47,
"learning_rate": 1.9796047481227515e-06,
"loss": 7.5177,
"step": 292
},
{
"epoch": 0.48,
"learning_rate": 1.979181338833458e-06,
"loss": 7.2789,
"step": 294
},
{
"epoch": 0.48,
"learning_rate": 1.97875362577252e-06,
"loss": 7.0517,
"step": 296
},
{
"epoch": 0.48,
"learning_rate": 1.9783216108198542e-06,
"loss": 7.9153,
"step": 298
},
{
"epoch": 0.48,
"learning_rate": 1.9778852958742852e-06,
"loss": 7.9618,
"step": 300
},
{
"epoch": 0.49,
"learning_rate": 1.977444682853537e-06,
"loss": 7.2271,
"step": 302
},
{
"epoch": 0.49,
"learning_rate": 1.9769997736942255e-06,
"loss": 7.6914,
"step": 304
},
{
"epoch": 0.49,
"learning_rate": 1.9765505703518493e-06,
"loss": 8.0209,
"step": 306
},
{
"epoch": 0.5,
"learning_rate": 1.97609707480078e-06,
"loss": 7.2783,
"step": 308
},
{
"epoch": 0.5,
"learning_rate": 1.975639289034256e-06,
"loss": 7.7316,
"step": 310
},
{
"epoch": 0.5,
"learning_rate": 1.975177215064372e-06,
"loss": 7.4457,
"step": 312
},
{
"epoch": 0.51,
"learning_rate": 1.97471085492207e-06,
"loss": 7.9042,
"step": 314
},
{
"epoch": 0.51,
"learning_rate": 1.974240210657131e-06,
"loss": 7.4322,
"step": 316
},
{
"epoch": 0.51,
"learning_rate": 1.973765284338167e-06,
"loss": 7.2353,
"step": 318
},
{
"epoch": 0.52,
"learning_rate": 1.9732860780526087e-06,
"loss": 7.9842,
"step": 320
},
{
"epoch": 0.52,
"learning_rate": 1.972802593906701e-06,
"loss": 7.2809,
"step": 322
},
{
"epoch": 0.52,
"learning_rate": 1.972314834025489e-06,
"loss": 7.7988,
"step": 324
},
{
"epoch": 0.53,
"learning_rate": 1.971822800552812e-06,
"loss": 7.7482,
"step": 326
},
{
"epoch": 0.53,
"learning_rate": 1.9713264956512927e-06,
"loss": 7.3617,
"step": 328
},
{
"epoch": 0.53,
"learning_rate": 1.9708259215023275e-06,
"loss": 7.4253,
"step": 330
},
{
"epoch": 0.54,
"learning_rate": 1.970321080306078e-06,
"loss": 7.5589,
"step": 332
},
{
"epoch": 0.54,
"learning_rate": 1.9698119742814604e-06,
"loss": 7.4905,
"step": 334
},
{
"epoch": 0.54,
"learning_rate": 1.9692986056661354e-06,
"loss": 7.4516,
"step": 336
},
{
"epoch": 0.55,
"learning_rate": 1.9687809767165e-06,
"loss": 7.555,
"step": 338
},
{
"epoch": 0.55,
"learning_rate": 1.9682590897076753e-06,
"loss": 7.7357,
"step": 340
},
{
"epoch": 0.55,
"learning_rate": 1.967732946933499e-06,
"loss": 7.6685,
"step": 342
},
{
"epoch": 0.56,
"learning_rate": 1.9672025507065133e-06,
"loss": 7.6611,
"step": 344
},
{
"epoch": 0.56,
"learning_rate": 1.966667903357955e-06,
"loss": 7.7605,
"step": 346
},
{
"epoch": 0.56,
"learning_rate": 1.9661290072377477e-06,
"loss": 7.9287,
"step": 348
},
{
"epoch": 0.57,
"learning_rate": 1.965585864714488e-06,
"loss": 7.5854,
"step": 350
},
{
"epoch": 0.57,
"learning_rate": 1.965038478175436e-06,
"loss": 6.5967,
"step": 352
},
{
"epoch": 0.57,
"learning_rate": 1.964486850026507e-06,
"loss": 7.6664,
"step": 354
},
{
"epoch": 0.58,
"learning_rate": 1.9639309826922583e-06,
"loss": 6.9259,
"step": 356
},
{
"epoch": 0.58,
"learning_rate": 1.9633708786158803e-06,
"loss": 7.3177,
"step": 358
},
{
"epoch": 0.58,
"learning_rate": 1.962806540259184e-06,
"loss": 7.5336,
"step": 360
},
{
"epoch": 0.59,
"learning_rate": 1.962237970102593e-06,
"loss": 7.4839,
"step": 362
},
{
"epoch": 0.59,
"learning_rate": 1.9616651706451285e-06,
"loss": 7.3067,
"step": 364
},
{
"epoch": 0.59,
"learning_rate": 1.9610881444044027e-06,
"loss": 7.4986,
"step": 366
},
{
"epoch": 0.59,
"learning_rate": 1.9605068939166045e-06,
"loss": 7.3467,
"step": 368
},
{
"epoch": 0.6,
"learning_rate": 1.95992142173649e-06,
"loss": 7.7502,
"step": 370
},
{
"epoch": 0.6,
"learning_rate": 1.9593317304373703e-06,
"loss": 7.4302,
"step": 372
},
{
"epoch": 0.6,
"learning_rate": 1.958737822611101e-06,
"loss": 7.1672,
"step": 374
},
{
"epoch": 0.61,
"learning_rate": 1.9581397008680715e-06,
"loss": 7.0433,
"step": 376
},
{
"epoch": 0.61,
"learning_rate": 1.957537367837191e-06,
"loss": 7.467,
"step": 378
},
{
"epoch": 0.61,
"learning_rate": 1.9569308261658788e-06,
"loss": 7.6979,
"step": 380
},
{
"epoch": 0.62,
"learning_rate": 1.9563200785200524e-06,
"loss": 7.4041,
"step": 382
},
{
"epoch": 0.62,
"learning_rate": 1.955705127584117e-06,
"loss": 7.1015,
"step": 384
},
{
"epoch": 0.62,
"learning_rate": 1.95508597606095e-06,
"loss": 7.3307,
"step": 386
},
{
"epoch": 0.63,
"learning_rate": 1.954462626671894e-06,
"loss": 7.3478,
"step": 388
},
{
"epoch": 0.63,
"learning_rate": 1.9538350821567403e-06,
"loss": 7.2176,
"step": 390
},
{
"epoch": 0.63,
"learning_rate": 1.9532033452737203e-06,
"loss": 7.6799,
"step": 392
},
{
"epoch": 0.64,
"learning_rate": 1.952567418799492e-06,
"loss": 7.2302,
"step": 394
},
{
"epoch": 0.64,
"learning_rate": 1.9519273055291264e-06,
"loss": 7.2812,
"step": 396
},
{
"epoch": 0.64,
"learning_rate": 1.9512830082760985e-06,
"loss": 7.0137,
"step": 398
},
{
"epoch": 0.65,
"learning_rate": 1.9506345298722714e-06,
"loss": 7.6786,
"step": 400
},
{
"epoch": 0.65,
"learning_rate": 1.949981873167887e-06,
"loss": 7.0638,
"step": 402
},
{
"epoch": 0.65,
"learning_rate": 1.9493250410315505e-06,
"loss": 6.877,
"step": 404
},
{
"epoch": 0.66,
"learning_rate": 1.948664036350221e-06,
"loss": 7.1874,
"step": 406
},
{
"epoch": 0.66,
"learning_rate": 1.9479988620291952e-06,
"loss": 7.5073,
"step": 408
},
{
"epoch": 0.66,
"learning_rate": 1.9473295209920983e-06,
"loss": 6.605,
"step": 410
},
{
"epoch": 0.67,
"learning_rate": 1.946656016180867e-06,
"loss": 7.1983,
"step": 412
},
{
"epoch": 0.67,
"learning_rate": 1.9459783505557422e-06,
"loss": 7.3831,
"step": 414
},
{
"epoch": 0.67,
"learning_rate": 1.9452965270952497e-06,
"loss": 7.1163,
"step": 416
},
{
"epoch": 0.68,
"learning_rate": 1.9446105487961925e-06,
"loss": 7.4892,
"step": 418
},
{
"epoch": 0.68,
"learning_rate": 1.943920418673633e-06,
"loss": 7.138,
"step": 420
},
{
"epoch": 0.68,
"learning_rate": 1.9432261397608833e-06,
"loss": 7.21,
"step": 422
},
{
"epoch": 0.69,
"learning_rate": 1.942527715109491e-06,
"loss": 6.8101,
"step": 424
},
{
"epoch": 0.69,
"learning_rate": 1.941825147789225e-06,
"loss": 7.1995,
"step": 426
},
{
"epoch": 0.69,
"learning_rate": 1.941118440888061e-06,
"loss": 7.3979,
"step": 428
},
{
"epoch": 0.69,
"learning_rate": 1.9404075975121716e-06,
"loss": 6.9963,
"step": 430
},
{
"epoch": 0.7,
"learning_rate": 1.9396926207859082e-06,
"loss": 6.8627,
"step": 432
},
{
"epoch": 0.7,
"learning_rate": 1.9389735138517915e-06,
"loss": 7.1982,
"step": 434
},
{
"epoch": 0.7,
"learning_rate": 1.9382502798704935e-06,
"loss": 7.0028,
"step": 436
},
{
"epoch": 0.71,
"learning_rate": 1.9375229220208273e-06,
"loss": 7.3183,
"step": 438
},
{
"epoch": 0.71,
"learning_rate": 1.936791443499731e-06,
"loss": 7.2714,
"step": 440
},
{
"epoch": 0.71,
"learning_rate": 1.936055847522254e-06,
"loss": 6.7713,
"step": 442
},
{
"epoch": 0.72,
"learning_rate": 1.935316137321543e-06,
"loss": 6.9329,
"step": 444
},
{
"epoch": 0.72,
"learning_rate": 1.934572316148828e-06,
"loss": 7.0025,
"step": 446
},
{
"epoch": 0.72,
"learning_rate": 1.9338243872734083e-06,
"loss": 6.9061,
"step": 448
},
{
"epoch": 0.73,
"learning_rate": 1.933072353982637e-06,
"loss": 7.4755,
"step": 450
},
{
"epoch": 0.73,
"learning_rate": 1.932316219581908e-06,
"loss": 7.4846,
"step": 452
},
{
"epoch": 0.73,
"learning_rate": 1.93155598739464e-06,
"loss": 7.2087,
"step": 454
},
{
"epoch": 0.74,
"learning_rate": 1.930791660762262e-06,
"loss": 7.2197,
"step": 456
},
{
"epoch": 0.74,
"learning_rate": 1.930023243044201e-06,
"loss": 7.2201,
"step": 458
},
{
"epoch": 0.74,
"learning_rate": 1.929250737617864e-06,
"loss": 6.7586,
"step": 460
},
{
"epoch": 0.75,
"learning_rate": 1.9284741478786258e-06,
"loss": 7.1637,
"step": 462
},
{
"epoch": 0.75,
"learning_rate": 1.9276934772398113e-06,
"loss": 6.8489,
"step": 464
},
{
"epoch": 0.75,
"learning_rate": 1.926908729132683e-06,
"loss": 7.0103,
"step": 466
},
{
"epoch": 0.76,
"learning_rate": 1.926119907006426e-06,
"loss": 6.7451,
"step": 468
},
{
"epoch": 0.76,
"learning_rate": 1.9253270143281295e-06,
"loss": 6.8193,
"step": 470
},
{
"epoch": 0.76,
"learning_rate": 1.924530054582776e-06,
"loss": 6.8033,
"step": 472
},
{
"epoch": 0.77,
"learning_rate": 1.923729031273222e-06,
"loss": 6.7076,
"step": 474
},
{
"epoch": 0.77,
"learning_rate": 1.9229239479201874e-06,
"loss": 7.2125,
"step": 476
},
{
"epoch": 0.77,
"learning_rate": 1.9221148080622338e-06,
"loss": 6.7966,
"step": 478
},
{
"epoch": 0.78,
"learning_rate": 1.921301615255754e-06,
"loss": 6.6421,
"step": 480
},
{
"epoch": 0.78,
"learning_rate": 1.9204843730749544e-06,
"loss": 6.8844,
"step": 482
},
{
"epoch": 0.78,
"learning_rate": 1.9196630851118395e-06,
"loss": 6.1133,
"step": 484
},
{
"epoch": 0.79,
"learning_rate": 1.918837754976196e-06,
"loss": 7.1229,
"step": 486
},
{
"epoch": 0.79,
"learning_rate": 1.918008386295577e-06,
"loss": 7.2234,
"step": 488
},
{
"epoch": 0.79,
"learning_rate": 1.917174982715287e-06,
"loss": 6.9052,
"step": 490
},
{
"epoch": 0.8,
"learning_rate": 1.916337547898363e-06,
"loss": 7.2216,
"step": 492
},
{
"epoch": 0.8,
"learning_rate": 1.9154960855255626e-06,
"loss": 7.1983,
"step": 494
},
{
"epoch": 0.8,
"learning_rate": 1.9146505992953444e-06,
"loss": 7.0274,
"step": 496
},
{
"epoch": 0.8,
"learning_rate": 1.9138010929238533e-06,
"loss": 7.0934,
"step": 498
},
{
"epoch": 0.81,
"learning_rate": 1.9129475701449035e-06,
"loss": 6.565,
"step": 500
},
{
"epoch": 0.81,
"learning_rate": 1.912090034709963e-06,
"loss": 6.7059,
"step": 502
},
{
"epoch": 0.81,
"learning_rate": 1.9112284903881357e-06,
"loss": 6.2099,
"step": 504
},
{
"epoch": 0.82,
"learning_rate": 1.9103629409661467e-06,
"loss": 6.7425,
"step": 506
},
{
"epoch": 0.82,
"learning_rate": 1.909493390248324e-06,
"loss": 6.9777,
"step": 508
},
{
"epoch": 0.82,
"learning_rate": 1.908619842056582e-06,
"loss": 6.9865,
"step": 510
},
{
"epoch": 0.83,
"learning_rate": 1.9077423002304058e-06,
"loss": 6.3805,
"step": 512
},
{
"epoch": 0.83,
"learning_rate": 1.906860768626834e-06,
"loss": 7.2092,
"step": 514
},
{
"epoch": 0.83,
"learning_rate": 1.9059752511204396e-06,
"loss": 7.2097,
"step": 516
},
{
"epoch": 0.84,
"learning_rate": 1.9050857516033173e-06,
"loss": 6.7913,
"step": 518
},
{
"epoch": 0.84,
"learning_rate": 1.9041922739850614e-06,
"loss": 6.771,
"step": 520
},
{
"epoch": 0.84,
"learning_rate": 1.9032948221927522e-06,
"loss": 6.8683,
"step": 522
},
{
"epoch": 0.85,
"learning_rate": 1.902393400170938e-06,
"loss": 6.8623,
"step": 524
},
{
"epoch": 0.85,
"learning_rate": 1.9014880118816162e-06,
"loss": 7.1279,
"step": 526
},
{
"epoch": 0.85,
"learning_rate": 1.9005786613042183e-06,
"loss": 7.1608,
"step": 528
},
{
"epoch": 0.86,
"learning_rate": 1.8996653524355902e-06,
"loss": 7.095,
"step": 530
},
{
"epoch": 0.86,
"learning_rate": 1.8987480892899756e-06,
"loss": 6.5173,
"step": 532
},
{
"epoch": 0.86,
"learning_rate": 1.897826875898999e-06,
"loss": 6.8354,
"step": 534
},
{
"epoch": 0.87,
"learning_rate": 1.896901716311647e-06,
"loss": 6.5117,
"step": 536
},
{
"epoch": 0.87,
"learning_rate": 1.8959726145942505e-06,
"loss": 6.8697,
"step": 538
},
{
"epoch": 0.87,
"learning_rate": 1.8950395748304678e-06,
"loss": 7.0196,
"step": 540
},
{
"epoch": 0.88,
"learning_rate": 1.8941026011212653e-06,
"loss": 7.1429,
"step": 542
},
{
"epoch": 0.88,
"learning_rate": 1.8931616975849006e-06,
"loss": 6.4905,
"step": 544
},
{
"epoch": 0.88,
"learning_rate": 1.8922168683569037e-06,
"loss": 6.5658,
"step": 546
},
{
"epoch": 0.89,
"learning_rate": 1.8912681175900595e-06,
"loss": 6.7222,
"step": 548
},
{
"epoch": 0.89,
"learning_rate": 1.8903154494543887e-06,
"loss": 6.8789,
"step": 550
},
{
"epoch": 0.89,
"learning_rate": 1.8893588681371301e-06,
"loss": 6.9062,
"step": 552
},
{
"epoch": 0.9,
"learning_rate": 1.8883983778427223e-06,
"loss": 6.5114,
"step": 554
},
{
"epoch": 0.9,
"learning_rate": 1.8874339827927845e-06,
"loss": 6.5592,
"step": 556
},
{
"epoch": 0.9,
"learning_rate": 1.8864656872260985e-06,
"loss": 6.8886,
"step": 558
},
{
"epoch": 0.91,
"learning_rate": 1.88549349539859e-06,
"loss": 6.9465,
"step": 560
},
{
"epoch": 0.91,
"learning_rate": 1.8845174115833097e-06,
"loss": 6.8442,
"step": 562
},
{
"epoch": 0.91,
"learning_rate": 1.8835374400704152e-06,
"loss": 6.618,
"step": 564
},
{
"epoch": 0.91,
"learning_rate": 1.8825535851671506e-06,
"loss": 6.4021,
"step": 566
},
{
"epoch": 0.92,
"learning_rate": 1.8815658511978296e-06,
"loss": 6.799,
"step": 568
},
{
"epoch": 0.92,
"learning_rate": 1.8805742425038146e-06,
"loss": 6.785,
"step": 570
},
{
"epoch": 0.92,
"learning_rate": 1.8795787634434992e-06,
"loss": 7.0524,
"step": 572
},
{
"epoch": 0.93,
"learning_rate": 1.878579418392288e-06,
"loss": 6.8829,
"step": 574
},
{
"epoch": 0.93,
"learning_rate": 1.8775762117425775e-06,
"loss": 6.8318,
"step": 576
},
{
"epoch": 0.93,
"learning_rate": 1.8765691479037375e-06,
"loss": 6.8338,
"step": 578
},
{
"epoch": 0.94,
"learning_rate": 1.8755582313020908e-06,
"loss": 6.2545,
"step": 580
},
{
"epoch": 0.94,
"learning_rate": 1.8745434663808941e-06,
"loss": 6.8133,
"step": 582
},
{
"epoch": 0.94,
"learning_rate": 1.873524857600319e-06,
"loss": 6.8556,
"step": 584
},
{
"epoch": 0.95,
"learning_rate": 1.8725024094374313e-06,
"loss": 6.6332,
"step": 586
},
{
"epoch": 0.95,
"learning_rate": 1.8714761263861725e-06,
"loss": 6.8382,
"step": 588
},
{
"epoch": 0.95,
"learning_rate": 1.870446012957339e-06,
"loss": 6.6768,
"step": 590
},
{
"epoch": 0.96,
"learning_rate": 1.869412073678563e-06,
"loss": 6.8513,
"step": 592
},
{
"epoch": 0.96,
"learning_rate": 1.8683743130942926e-06,
"loss": 6.4011,
"step": 594
},
{
"epoch": 0.96,
"learning_rate": 1.8673327357657713e-06,
"loss": 6.8065,
"step": 596
},
{
"epoch": 0.97,
"learning_rate": 1.8662873462710182e-06,
"loss": 6.6735,
"step": 598
},
{
"epoch": 0.97,
"learning_rate": 1.865238149204808e-06,
"loss": 6.1697,
"step": 600
},
{
"epoch": 0.97,
"learning_rate": 1.8641851491786512e-06,
"loss": 6.3478,
"step": 602
},
{
"epoch": 0.98,
"learning_rate": 1.8631283508207723e-06,
"loss": 6.9435,
"step": 604
},
{
"epoch": 0.98,
"learning_rate": 1.8620677587760913e-06,
"loss": 6.771,
"step": 606
},
{
"epoch": 0.98,
"learning_rate": 1.8610033777062024e-06,
"loss": 5.8656,
"step": 608
},
{
"epoch": 0.99,
"learning_rate": 1.8599352122893537e-06,
"loss": 6.3271,
"step": 610
},
{
"epoch": 0.99,
"learning_rate": 1.858863267220426e-06,
"loss": 7.3395,
"step": 612
},
{
"epoch": 0.99,
"learning_rate": 1.8577875472109132e-06,
"loss": 6.7548,
"step": 614
},
{
"epoch": 1.0,
"learning_rate": 1.8567080569889014e-06,
"loss": 6.9543,
"step": 616
},
{
"epoch": 1.0,
"learning_rate": 1.8556248012990466e-06,
"loss": 7.0467,
"step": 618
},
{
"epoch": 1.0,
"learning_rate": 1.8545377849025564e-06,
"loss": 7.1106,
"step": 620
},
{
"epoch": 1.01,
"learning_rate": 1.8534470125771674e-06,
"loss": 6.4241,
"step": 622
},
{
"epoch": 1.01,
"learning_rate": 1.8523524891171239e-06,
"loss": 6.951,
"step": 624
},
{
"epoch": 1.01,
"learning_rate": 1.8512542193331582e-06,
"loss": 6.6515,
"step": 626
},
{
"epoch": 1.01,
"learning_rate": 1.8501522080524686e-06,
"loss": 6.6356,
"step": 628
},
{
"epoch": 1.02,
"learning_rate": 1.8490464601186977e-06,
"loss": 6.1777,
"step": 630
},
{
"epoch": 1.02,
"learning_rate": 1.8479369803919126e-06,
"loss": 6.5791,
"step": 632
},
{
"epoch": 1.02,
"learning_rate": 1.846823773748582e-06,
"loss": 6.7036,
"step": 634
},
{
"epoch": 1.03,
"learning_rate": 1.845706845081556e-06,
"loss": 6.3115,
"step": 636
},
{
"epoch": 1.03,
"learning_rate": 1.8445861993000433e-06,
"loss": 6.8141,
"step": 638
},
{
"epoch": 1.03,
"learning_rate": 1.8434618413295909e-06,
"loss": 6.6293,
"step": 640
},
{
"epoch": 1.04,
"learning_rate": 1.8423337761120617e-06,
"loss": 6.9529,
"step": 642
},
{
"epoch": 1.04,
"learning_rate": 1.841202008605613e-06,
"loss": 6.8302,
"step": 644
},
{
"epoch": 1.04,
"learning_rate": 1.8400665437846746e-06,
"loss": 6.8518,
"step": 646
},
{
"epoch": 1.05,
"learning_rate": 1.8389273866399274e-06,
"loss": 6.4488,
"step": 648
},
{
"epoch": 1.05,
"learning_rate": 1.8377845421782802e-06,
"loss": 6.3835,
"step": 650
},
{
"epoch": 1.05,
"learning_rate": 1.8366380154228497e-06,
"loss": 6.829,
"step": 652
},
{
"epoch": 1.06,
"learning_rate": 1.8354878114129364e-06,
"loss": 6.9445,
"step": 654
},
{
"epoch": 1.06,
"learning_rate": 1.834333935204004e-06,
"loss": 6.8705,
"step": 656
},
{
"epoch": 1.06,
"learning_rate": 1.8331763918676555e-06,
"loss": 6.3082,
"step": 658
},
{
"epoch": 1.07,
"learning_rate": 1.8320151864916133e-06,
"loss": 6.7029,
"step": 660
},
{
"epoch": 1.07,
"learning_rate": 1.8308503241796948e-06,
"loss": 6.5772,
"step": 662
},
{
"epoch": 1.07,
"learning_rate": 1.8296818100517909e-06,
"loss": 6.7506,
"step": 664
},
{
"epoch": 1.08,
"learning_rate": 1.828509649243842e-06,
"loss": 6.8545,
"step": 666
},
{
"epoch": 1.08,
"learning_rate": 1.8273338469078186e-06,
"loss": 5.9733,
"step": 668
},
{
"epoch": 1.08,
"learning_rate": 1.8261544082116953e-06,
"loss": 6.3656,
"step": 670
},
{
"epoch": 1.09,
"learning_rate": 1.8249713383394301e-06,
"loss": 6.7071,
"step": 672
},
{
"epoch": 1.09,
"learning_rate": 1.8237846424909411e-06,
"loss": 6.6789,
"step": 674
},
{
"epoch": 1.09,
"learning_rate": 1.8225943258820832e-06,
"loss": 7.0611,
"step": 676
},
{
"epoch": 1.1,
"learning_rate": 1.8214003937446253e-06,
"loss": 6.8078,
"step": 678
},
{
"epoch": 1.1,
"learning_rate": 1.8202028513262286e-06,
"loss": 6.7964,
"step": 680
},
{
"epoch": 1.1,
"learning_rate": 1.8190017038904214e-06,
"loss": 6.4661,
"step": 682
},
{
"epoch": 1.11,
"learning_rate": 1.8177969567165778e-06,
"loss": 5.7196,
"step": 684
},
{
"epoch": 1.11,
"learning_rate": 1.8165886150998931e-06,
"loss": 6.6864,
"step": 686
},
{
"epoch": 1.11,
"learning_rate": 1.8153766843513617e-06,
"loss": 6.7598,
"step": 688
},
{
"epoch": 1.12,
"learning_rate": 1.8141611697977526e-06,
"loss": 5.9103,
"step": 690
},
{
"epoch": 1.12,
"learning_rate": 1.8129420767815878e-06,
"loss": 6.5564,
"step": 692
},
{
"epoch": 1.12,
"learning_rate": 1.811719410661116e-06,
"loss": 6.6198,
"step": 694
},
{
"epoch": 1.12,
"learning_rate": 1.8104931768102917e-06,
"loss": 6.3773,
"step": 696
},
{
"epoch": 1.13,
"learning_rate": 1.8092633806187511e-06,
"loss": 6.596,
"step": 698
},
{
"epoch": 1.13,
"learning_rate": 1.8080300274917861e-06,
"loss": 6.5877,
"step": 700
},
{
"epoch": 1.13,
"learning_rate": 1.8067931228503243e-06,
"loss": 6.7652,
"step": 702
},
{
"epoch": 1.14,
"learning_rate": 1.8055526721309013e-06,
"loss": 6.6674,
"step": 704
},
{
"epoch": 1.14,
"learning_rate": 1.8043086807856402e-06,
"loss": 6.6695,
"step": 706
},
{
"epoch": 1.14,
"learning_rate": 1.8030611542822255e-06,
"loss": 6.9025,
"step": 708
},
{
"epoch": 1.15,
"learning_rate": 1.8018100981038797e-06,
"loss": 6.5107,
"step": 710
},
{
"epoch": 1.15,
"learning_rate": 1.8005555177493392e-06,
"loss": 6.2374,
"step": 712
},
{
"epoch": 1.15,
"learning_rate": 1.7992974187328303e-06,
"loss": 6.8202,
"step": 714
},
{
"epoch": 1.16,
"learning_rate": 1.798035806584044e-06,
"loss": 6.5293,
"step": 716
},
{
"epoch": 1.16,
"learning_rate": 1.7967706868481143e-06,
"loss": 6.7172,
"step": 718
},
{
"epoch": 1.16,
"learning_rate": 1.79550206508559e-06,
"loss": 6.9248,
"step": 720
},
{
"epoch": 1.17,
"learning_rate": 1.7942299468724131e-06,
"loss": 6.3225,
"step": 722
},
{
"epoch": 1.17,
"learning_rate": 1.792954337799894e-06,
"loss": 6.6829,
"step": 724
},
{
"epoch": 1.17,
"learning_rate": 1.7916752434746853e-06,
"loss": 6.7468,
"step": 726
},
{
"epoch": 1.18,
"learning_rate": 1.7903926695187592e-06,
"loss": 6.6429,
"step": 728
},
{
"epoch": 1.18,
"learning_rate": 1.7891066215693816e-06,
"loss": 6.5349,
"step": 730
},
{
"epoch": 1.18,
"learning_rate": 1.7878171052790867e-06,
"loss": 6.288,
"step": 732
},
{
"epoch": 1.19,
"learning_rate": 1.7865241263156543e-06,
"loss": 6.5498,
"step": 734
},
{
"epoch": 1.19,
"learning_rate": 1.7852276903620828e-06,
"loss": 6.605,
"step": 736
},
{
"epoch": 1.19,
"learning_rate": 1.7839278031165656e-06,
"loss": 6.2279,
"step": 738
},
{
"epoch": 1.2,
"learning_rate": 1.7826244702924647e-06,
"loss": 6.3011,
"step": 740
},
{
"epoch": 1.2,
"learning_rate": 1.7813176976182872e-06,
"loss": 6.4668,
"step": 742
},
{
"epoch": 1.2,
"learning_rate": 1.7800074908376584e-06,
"loss": 6.3857,
"step": 744
},
{
"epoch": 1.21,
"learning_rate": 1.778693855709298e-06,
"loss": 6.5626,
"step": 746
},
{
"epoch": 1.21,
"learning_rate": 1.7773767980069944e-06,
"loss": 6.5039,
"step": 748
},
{
"epoch": 1.21,
"learning_rate": 1.776056323519579e-06,
"loss": 6.5606,
"step": 750
},
{
"epoch": 1.22,
"learning_rate": 1.7747324380509006e-06,
"loss": 6.3044,
"step": 752
},
{
"epoch": 1.22,
"learning_rate": 1.7734051474198e-06,
"loss": 6.7221,
"step": 754
},
{
"epoch": 1.22,
"learning_rate": 1.7720744574600862e-06,
"loss": 6.2063,
"step": 756
},
{
"epoch": 1.23,
"learning_rate": 1.7707403740205068e-06,
"loss": 6.3192,
"step": 758
},
{
"epoch": 1.23,
"learning_rate": 1.7694029029647268e-06,
"loss": 6.1671,
"step": 760
},
{
"epoch": 1.23,
"learning_rate": 1.7680620501712995e-06,
"loss": 6.6476,
"step": 762
},
{
"epoch": 1.23,
"learning_rate": 1.7667178215336423e-06,
"loss": 6.5156,
"step": 764
},
{
"epoch": 1.24,
"learning_rate": 1.7653702229600098e-06,
"loss": 5.8151,
"step": 766
},
{
"epoch": 1.24,
"learning_rate": 1.764019260373469e-06,
"loss": 6.6839,
"step": 768
},
{
"epoch": 1.24,
"learning_rate": 1.7626649397118732e-06,
"loss": 6.4756,
"step": 770
},
{
"epoch": 1.25,
"learning_rate": 1.7613072669278342e-06,
"loss": 6.3091,
"step": 772
},
{
"epoch": 1.25,
"learning_rate": 1.7599462479886973e-06,
"loss": 6.0635,
"step": 774
},
{
"epoch": 1.25,
"learning_rate": 1.7585818888765164e-06,
"loss": 6.3993,
"step": 776
},
{
"epoch": 1.26,
"learning_rate": 1.7572141955880251e-06,
"loss": 6.5637,
"step": 778
},
{
"epoch": 1.26,
"learning_rate": 1.755843174134612e-06,
"loss": 5.9163,
"step": 780
},
{
"epoch": 1.26,
"learning_rate": 1.7544688305422941e-06,
"loss": 6.687,
"step": 782
},
{
"epoch": 1.27,
"learning_rate": 1.75309117085169e-06,
"loss": 6.4887,
"step": 784
},
{
"epoch": 1.27,
"learning_rate": 1.7517102011179931e-06,
"loss": 6.8291,
"step": 786
},
{
"epoch": 1.27,
"learning_rate": 1.7503259274109462e-06,
"loss": 6.5768,
"step": 788
},
{
"epoch": 1.28,
"learning_rate": 1.7489383558148135e-06,
"loss": 6.4883,
"step": 790
},
{
"epoch": 1.28,
"learning_rate": 1.7475474924283536e-06,
"loss": 6.5535,
"step": 792
},
{
"epoch": 1.28,
"learning_rate": 1.7461533433647946e-06,
"loss": 6.4518,
"step": 794
},
{
"epoch": 1.29,
"learning_rate": 1.7447559147518054e-06,
"loss": 6.5409,
"step": 796
},
{
"epoch": 1.29,
"learning_rate": 1.74335521273147e-06,
"loss": 6.6906,
"step": 798
},
{
"epoch": 1.29,
"learning_rate": 1.7419512434602591e-06,
"loss": 6.6106,
"step": 800
},
{
"epoch": 1.3,
"learning_rate": 1.7405440131090047e-06,
"loss": 6.1888,
"step": 802
},
{
"epoch": 1.3,
"learning_rate": 1.739133527862871e-06,
"loss": 6.4989,
"step": 804
},
{
"epoch": 1.3,
"learning_rate": 1.7377197939213298e-06,
"loss": 6.6128,
"step": 806
},
{
"epoch": 1.31,
"learning_rate": 1.736302817498131e-06,
"loss": 6.5109,
"step": 808
},
{
"epoch": 1.31,
"learning_rate": 1.7348826048212758e-06,
"loss": 6.394,
"step": 810
},
{
"epoch": 1.31,
"learning_rate": 1.7334591621329904e-06,
"loss": 6.5515,
"step": 812
},
{
"epoch": 1.32,
"learning_rate": 1.7320324956896973e-06,
"loss": 6.7792,
"step": 814
},
{
"epoch": 1.32,
"learning_rate": 1.7306026117619888e-06,
"loss": 6.4862,
"step": 816
},
{
"epoch": 1.32,
"learning_rate": 1.7291695166345978e-06,
"loss": 6.4119,
"step": 818
},
{
"epoch": 1.33,
"learning_rate": 1.7277332166063725e-06,
"loss": 6.4111,
"step": 820
},
{
"epoch": 1.33,
"learning_rate": 1.726293717990247e-06,
"loss": 5.8316,
"step": 822
},
{
"epoch": 1.33,
"learning_rate": 1.7248510271132141e-06,
"loss": 6.5505,
"step": 824
},
{
"epoch": 1.33,
"learning_rate": 1.7234051503162977e-06,
"loss": 6.5862,
"step": 826
},
{
"epoch": 1.34,
"learning_rate": 1.7219560939545242e-06,
"loss": 6.556,
"step": 828
},
{
"epoch": 1.34,
"learning_rate": 1.7205038643968958e-06,
"loss": 6.0874,
"step": 830
},
{
"epoch": 1.34,
"learning_rate": 1.719048468026361e-06,
"loss": 6.6705,
"step": 832
},
{
"epoch": 1.35,
"learning_rate": 1.7175899112397878e-06,
"loss": 6.604,
"step": 834
},
{
"epoch": 1.35,
"learning_rate": 1.716128200447935e-06,
"loss": 6.1485,
"step": 836
},
{
"epoch": 1.35,
"learning_rate": 1.7146633420754237e-06,
"loss": 6.4457,
"step": 838
},
{
"epoch": 1.36,
"learning_rate": 1.7131953425607102e-06,
"loss": 6.1297,
"step": 840
},
{
"epoch": 1.36,
"learning_rate": 1.7117242083560566e-06,
"loss": 6.4114,
"step": 842
},
{
"epoch": 1.36,
"learning_rate": 1.7102499459275025e-06,
"loss": 6.1976,
"step": 844
},
{
"epoch": 1.37,
"learning_rate": 1.7087725617548382e-06,
"loss": 6.5855,
"step": 846
},
{
"epoch": 1.37,
"learning_rate": 1.7072920623315731e-06,
"loss": 6.6856,
"step": 848
},
{
"epoch": 1.37,
"learning_rate": 1.7058084541649103e-06,
"loss": 5.9883,
"step": 850
},
{
"epoch": 1.38,
"learning_rate": 1.7043217437757163e-06,
"loss": 6.4898,
"step": 852
},
{
"epoch": 1.38,
"learning_rate": 1.7028319376984925e-06,
"loss": 6.2324,
"step": 854
},
{
"epoch": 1.38,
"learning_rate": 1.7013390424813467e-06,
"loss": 5.9353,
"step": 856
},
{
"epoch": 1.39,
"learning_rate": 1.6998430646859653e-06,
"loss": 6.0376,
"step": 858
},
{
"epoch": 1.39,
"learning_rate": 1.698344010887582e-06,
"loss": 6.5051,
"step": 860
},
{
"epoch": 1.39,
"learning_rate": 1.6968418876749508e-06,
"loss": 6.2818,
"step": 862
},
{
"epoch": 1.4,
"learning_rate": 1.695336701650318e-06,
"loss": 6.4458,
"step": 864
},
{
"epoch": 1.4,
"learning_rate": 1.6938284594293894e-06,
"loss": 6.0794,
"step": 866
},
{
"epoch": 1.4,
"learning_rate": 1.6923171676413062e-06,
"loss": 6.1156,
"step": 868
},
{
"epoch": 1.41,
"learning_rate": 1.690802832928611e-06,
"loss": 6.4838,
"step": 870
},
{
"epoch": 1.41,
"learning_rate": 1.689285461947222e-06,
"loss": 6.5291,
"step": 872
},
{
"epoch": 1.41,
"learning_rate": 1.6877650613664031e-06,
"loss": 6.6217,
"step": 874
},
{
"epoch": 1.42,
"learning_rate": 1.6862416378687337e-06,
"loss": 6.3847,
"step": 876
},
{
"epoch": 1.42,
"learning_rate": 1.6847151981500786e-06,
"loss": 6.4331,
"step": 878
},
{
"epoch": 1.42,
"learning_rate": 1.6831857489195616e-06,
"loss": 6.302,
"step": 880
},
{
"epoch": 1.43,
"learning_rate": 1.6816532968995328e-06,
"loss": 6.2486,
"step": 882
},
{
"epoch": 1.43,
"learning_rate": 1.6801178488255411e-06,
"loss": 6.2877,
"step": 884
},
{
"epoch": 1.43,
"learning_rate": 1.6785794114463036e-06,
"loss": 6.2413,
"step": 886
},
{
"epoch": 1.44,
"learning_rate": 1.6770379915236763e-06,
"loss": 6.011,
"step": 888
},
{
"epoch": 1.44,
"learning_rate": 1.6754935958326241e-06,
"loss": 5.8661,
"step": 890
},
{
"epoch": 1.44,
"learning_rate": 1.6739462311611915e-06,
"loss": 6.2365,
"step": 892
},
{
"epoch": 1.44,
"learning_rate": 1.6723959043104726e-06,
"loss": 6.0454,
"step": 894
},
{
"epoch": 1.45,
"learning_rate": 1.67084262209458e-06,
"loss": 6.686,
"step": 896
},
{
"epoch": 1.45,
"learning_rate": 1.669286391340618e-06,
"loss": 6.2254,
"step": 898
},
{
"epoch": 1.45,
"learning_rate": 1.6677272188886483e-06,
"loss": 6.3332,
"step": 900
},
{
"epoch": 1.46,
"learning_rate": 1.666165111591664e-06,
"loss": 6.2119,
"step": 902
},
{
"epoch": 1.46,
"learning_rate": 1.6646000763155566e-06,
"loss": 6.0778,
"step": 904
},
{
"epoch": 1.46,
"learning_rate": 1.6630321199390867e-06,
"loss": 6.3578,
"step": 906
},
{
"epoch": 1.47,
"learning_rate": 1.6614612493538548e-06,
"loss": 6.6655,
"step": 908
},
{
"epoch": 1.47,
"learning_rate": 1.6598874714642697e-06,
"loss": 6.2806,
"step": 910
},
{
"epoch": 1.47,
"learning_rate": 1.6583107931875188e-06,
"loss": 6.1602,
"step": 912
},
{
"epoch": 1.48,
"learning_rate": 1.656731221453537e-06,
"loss": 6.1191,
"step": 914
},
{
"epoch": 1.48,
"learning_rate": 1.655148763204977e-06,
"loss": 5.7852,
"step": 916
},
{
"epoch": 1.48,
"learning_rate": 1.6535634253971794e-06,
"loss": 6.5334,
"step": 918
},
{
"epoch": 1.49,
"learning_rate": 1.6519752149981395e-06,
"loss": 6.5009,
"step": 920
},
{
"epoch": 1.49,
"learning_rate": 1.6503841389884796e-06,
"loss": 6.5337,
"step": 922
},
{
"epoch": 1.49,
"learning_rate": 1.648790204361417e-06,
"loss": 6.3533,
"step": 924
},
{
"epoch": 1.5,
"learning_rate": 1.6471934181227337e-06,
"loss": 6.333,
"step": 926
},
{
"epoch": 1.5,
"learning_rate": 1.6455937872907449e-06,
"loss": 6.3058,
"step": 928
},
{
"epoch": 1.5,
"learning_rate": 1.6439913188962681e-06,
"loss": 6.0227,
"step": 930
},
{
"epoch": 1.51,
"learning_rate": 1.642386019982594e-06,
"loss": 6.3999,
"step": 932
},
{
"epoch": 1.51,
"learning_rate": 1.6407778976054526e-06,
"loss": 6.3557,
"step": 934
},
{
"epoch": 1.51,
"learning_rate": 1.6391669588329848e-06,
"loss": 6.5799,
"step": 936
},
{
"epoch": 1.52,
"learning_rate": 1.6375532107457105e-06,
"loss": 5.9422,
"step": 938
},
{
"epoch": 1.52,
"learning_rate": 1.6359366604364971e-06,
"loss": 6.6235,
"step": 940
},
{
"epoch": 1.52,
"learning_rate": 1.6343173150105275e-06,
"loss": 6.0267,
"step": 942
},
{
"epoch": 1.53,
"learning_rate": 1.6326951815852719e-06,
"loss": 6.327,
"step": 944
},
{
"epoch": 1.53,
"learning_rate": 1.6310702672904526e-06,
"loss": 6.473,
"step": 946
},
{
"epoch": 1.53,
"learning_rate": 1.6294425792680158e-06,
"loss": 6.4169,
"step": 948
},
{
"epoch": 1.54,
"learning_rate": 1.6278121246720987e-06,
"loss": 5.8404,
"step": 950
},
{
"epoch": 1.54,
"learning_rate": 1.6261789106689978e-06,
"loss": 5.8924,
"step": 952
},
{
"epoch": 1.54,
"learning_rate": 1.624542944437139e-06,
"loss": 6.2631,
"step": 954
},
{
"epoch": 1.55,
"learning_rate": 1.622904233167044e-06,
"loss": 6.4961,
"step": 956
},
{
"epoch": 1.55,
"learning_rate": 1.6212627840613001e-06,
"loss": 6.3894,
"step": 958
},
{
"epoch": 1.55,
"learning_rate": 1.6196186043345285e-06,
"loss": 6.5512,
"step": 960
},
{
"epoch": 1.55,
"learning_rate": 1.617971701213352e-06,
"loss": 6.1691,
"step": 962
},
{
"epoch": 1.56,
"learning_rate": 1.6163220819363628e-06,
"loss": 6.3324,
"step": 964
},
{
"epoch": 1.56,
"learning_rate": 1.6146697537540923e-06,
"loss": 6.6786,
"step": 966
},
{
"epoch": 1.56,
"learning_rate": 1.6130147239289777e-06,
"loss": 6.5543,
"step": 968
},
{
"epoch": 1.57,
"learning_rate": 1.611356999735331e-06,
"loss": 6.3715,
"step": 970
},
{
"epoch": 1.57,
"learning_rate": 1.609696588459307e-06,
"loss": 6.5001,
"step": 972
},
{
"epoch": 1.57,
"learning_rate": 1.6080334973988695e-06,
"loss": 5.6829,
"step": 974
},
{
"epoch": 1.58,
"learning_rate": 1.6063677338637626e-06,
"loss": 6.1467,
"step": 976
},
{
"epoch": 1.58,
"learning_rate": 1.6046993051754754e-06,
"loss": 6.1594,
"step": 978
},
{
"epoch": 1.58,
"learning_rate": 1.6030282186672115e-06,
"loss": 5.9862,
"step": 980
},
{
"epoch": 1.59,
"learning_rate": 1.6013544816838563e-06,
"loss": 6.5831,
"step": 982
},
{
"epoch": 1.59,
"learning_rate": 1.5996781015819447e-06,
"loss": 6.3911,
"step": 984
},
{
"epoch": 1.59,
"learning_rate": 1.5979990857296293e-06,
"loss": 6.5145,
"step": 986
},
{
"epoch": 1.6,
"learning_rate": 1.5963174415066467e-06,
"loss": 6.3745,
"step": 988
},
{
"epoch": 1.6,
"learning_rate": 1.5946331763042866e-06,
"loss": 6.4732,
"step": 990
},
{
"epoch": 1.6,
"learning_rate": 1.5929462975253586e-06,
"loss": 6.4846,
"step": 992
},
{
"epoch": 1.61,
"learning_rate": 1.5912568125841588e-06,
"loss": 5.7701,
"step": 994
},
{
"epoch": 1.61,
"learning_rate": 1.5895647289064393e-06,
"loss": 6.1163,
"step": 996
},
{
"epoch": 1.61,
"learning_rate": 1.5878700539293738e-06,
"loss": 6.1529,
"step": 998
},
{
"epoch": 1.62,
"learning_rate": 1.5861727951015258e-06,
"loss": 6.2204,
"step": 1000
},
{
"epoch": 1.62,
"learning_rate": 1.5844729598828149e-06,
"loss": 6.5347,
"step": 1002
},
{
"epoch": 1.62,
"learning_rate": 1.5827705557444851e-06,
"loss": 6.0563,
"step": 1004
},
{
"epoch": 1.63,
"learning_rate": 1.5810655901690713e-06,
"loss": 6.4918,
"step": 1006
},
{
"epoch": 1.63,
"learning_rate": 1.579358070650367e-06,
"loss": 6.1495,
"step": 1008
},
{
"epoch": 1.63,
"learning_rate": 1.5776480046933905e-06,
"loss": 6.4362,
"step": 1010
},
{
"epoch": 1.64,
"learning_rate": 1.5759353998143525e-06,
"loss": 5.8431,
"step": 1012
},
{
"epoch": 1.64,
"learning_rate": 1.5742202635406235e-06,
"loss": 5.7574,
"step": 1014
},
{
"epoch": 1.64,
"learning_rate": 1.5725026034106996e-06,
"loss": 6.4673,
"step": 1016
},
{
"epoch": 1.65,
"learning_rate": 1.57078242697417e-06,
"loss": 6.2053,
"step": 1018
},
{
"epoch": 1.65,
"learning_rate": 1.5690597417916837e-06,
"loss": 6.4113,
"step": 1020
},
{
"epoch": 1.65,
"learning_rate": 1.5673345554349168e-06,
"loss": 6.1851,
"step": 1022
},
{
"epoch": 1.65,
"learning_rate": 1.5656068754865386e-06,
"loss": 6.2666,
"step": 1024
},
{
"epoch": 1.66,
"learning_rate": 1.5638767095401778e-06,
"loss": 6.4074,
"step": 1026
},
{
"epoch": 1.66,
"learning_rate": 1.5621440652003905e-06,
"loss": 6.0503,
"step": 1028
},
{
"epoch": 1.66,
"learning_rate": 1.5604089500826257e-06,
"loss": 6.428,
"step": 1030
},
{
"epoch": 1.67,
"learning_rate": 1.558671371813192e-06,
"loss": 6.1145,
"step": 1032
},
{
"epoch": 1.67,
"learning_rate": 1.5569313380292246e-06,
"loss": 6.4294,
"step": 1034
},
{
"epoch": 1.67,
"learning_rate": 1.5551888563786512e-06,
"loss": 6.4701,
"step": 1036
},
{
"epoch": 1.68,
"learning_rate": 1.5534439345201586e-06,
"loss": 6.3885,
"step": 1038
},
{
"epoch": 1.68,
"learning_rate": 1.5516965801231586e-06,
"loss": 6.2604,
"step": 1040
},
{
"epoch": 1.68,
"learning_rate": 1.5499468008677549e-06,
"loss": 5.6512,
"step": 1042
},
{
"epoch": 1.69,
"learning_rate": 1.5481946044447098e-06,
"loss": 6.3669,
"step": 1044
},
{
"epoch": 1.69,
"learning_rate": 1.5464399985554088e-06,
"loss": 6.3416,
"step": 1046
},
{
"epoch": 1.69,
"learning_rate": 1.5446829909118275e-06,
"loss": 6.5978,
"step": 1048
},
{
"epoch": 1.7,
"learning_rate": 1.5429235892364994e-06,
"loss": 6.3573,
"step": 1050
},
{
"epoch": 1.7,
"learning_rate": 1.5411618012624786e-06,
"loss": 5.7089,
"step": 1052
},
{
"epoch": 1.7,
"learning_rate": 1.5393976347333085e-06,
"loss": 5.6281,
"step": 1054
},
{
"epoch": 1.71,
"learning_rate": 1.537631097402987e-06,
"loss": 6.2042,
"step": 1056
},
{
"epoch": 1.71,
"learning_rate": 1.5358621970359324e-06,
"loss": 6.112,
"step": 1058
},
{
"epoch": 1.71,
"learning_rate": 1.5340909414069486e-06,
"loss": 6.4812,
"step": 1060
},
{
"epoch": 1.72,
"learning_rate": 1.5323173383011921e-06,
"loss": 6.0084,
"step": 1062
},
{
"epoch": 1.72,
"learning_rate": 1.5305413955141364e-06,
"loss": 6.715,
"step": 1064
},
{
"epoch": 1.72,
"learning_rate": 1.5287631208515404e-06,
"loss": 6.163,
"step": 1066
},
{
"epoch": 1.73,
"learning_rate": 1.5269825221294098e-06,
"loss": 5.9242,
"step": 1068
},
{
"epoch": 1.73,
"learning_rate": 1.5251996071739664e-06,
"loss": 6.2658,
"step": 1070
},
{
"epoch": 1.73,
"learning_rate": 1.523414383821613e-06,
"loss": 6.4945,
"step": 1072
},
{
"epoch": 1.74,
"learning_rate": 1.5216268599188976e-06,
"loss": 6.1643,
"step": 1074
},
{
"epoch": 1.74,
"learning_rate": 1.5198370433224805e-06,
"loss": 6.0366,
"step": 1076
},
{
"epoch": 1.74,
"learning_rate": 1.5180449418990975e-06,
"loss": 6.4008,
"step": 1078
},
{
"epoch": 1.75,
"learning_rate": 1.5162505635255288e-06,
"loss": 6.2841,
"step": 1080
},
{
"epoch": 1.75,
"learning_rate": 1.514453916088561e-06,
"loss": 6.1791,
"step": 1082
},
{
"epoch": 1.75,
"learning_rate": 1.5126550074849548e-06,
"loss": 6.6,
"step": 1084
},
{
"epoch": 1.76,
"learning_rate": 1.5108538456214085e-06,
"loss": 6.4791,
"step": 1086
},
{
"epoch": 1.76,
"learning_rate": 1.5090504384145249e-06,
"loss": 5.9182,
"step": 1088
},
{
"epoch": 1.76,
"learning_rate": 1.507244793790775e-06,
"loss": 6.0225,
"step": 1090
},
{
"epoch": 1.76,
"learning_rate": 1.5054369196864643e-06,
"loss": 6.2072,
"step": 1092
},
{
"epoch": 1.77,
"learning_rate": 1.5036268240476977e-06,
"loss": 6.5093,
"step": 1094
},
{
"epoch": 1.77,
"learning_rate": 1.5018145148303435e-06,
"loss": 6.098,
"step": 1096
},
{
"epoch": 1.77,
"learning_rate": 1.5e-06,
"loss": 5.8986,
"step": 1098
},
{
"epoch": 1.78,
"learning_rate": 1.4981832875319595e-06,
"loss": 6.381,
"step": 1100
},
{
"epoch": 1.78,
"learning_rate": 1.4963643854111738e-06,
"loss": 6.0649,
"step": 1102
},
{
"epoch": 1.78,
"learning_rate": 1.494543301632219e-06,
"loss": 5.5922,
"step": 1104
},
{
"epoch": 1.79,
"learning_rate": 1.4927200441992588e-06,
"loss": 6.24,
"step": 1106
},
{
"epoch": 1.79,
"learning_rate": 1.4908946211260123e-06,
"loss": 5.5124,
"step": 1108
},
{
"epoch": 1.79,
"learning_rate": 1.4890670404357169e-06,
"loss": 6.4337,
"step": 1110
},
{
"epoch": 1.8,
"learning_rate": 1.4872373101610927e-06,
"loss": 6.3064,
"step": 1112
},
{
"epoch": 1.8,
"learning_rate": 1.485405438344308e-06,
"loss": 6.1087,
"step": 1114
},
{
"epoch": 1.8,
"learning_rate": 1.4835714330369445e-06,
"loss": 6.2753,
"step": 1116
},
{
"epoch": 1.81,
"learning_rate": 1.4817353022999599e-06,
"loss": 5.8132,
"step": 1118
},
{
"epoch": 1.81,
"learning_rate": 1.4798970542036548e-06,
"loss": 6.4297,
"step": 1120
},
{
"epoch": 1.81,
"learning_rate": 1.478056696827636e-06,
"loss": 6.4729,
"step": 1122
},
{
"epoch": 1.82,
"learning_rate": 1.4762142382607808e-06,
"loss": 6.0052,
"step": 1124
},
{
"epoch": 1.82,
"learning_rate": 1.474369686601202e-06,
"loss": 5.8651,
"step": 1126
},
{
"epoch": 1.82,
"learning_rate": 1.4725230499562118e-06,
"loss": 6.3222,
"step": 1128
},
{
"epoch": 1.83,
"learning_rate": 1.4706743364422876e-06,
"loss": 5.9397,
"step": 1130
},
{
"epoch": 1.83,
"learning_rate": 1.4688235541850336e-06,
"loss": 6.3193,
"step": 1132
},
{
"epoch": 1.83,
"learning_rate": 1.466970711319148e-06,
"loss": 6.3106,
"step": 1134
},
{
"epoch": 1.84,
"learning_rate": 1.4651158159883854e-06,
"loss": 6.2976,
"step": 1136
},
{
"epoch": 1.84,
"learning_rate": 1.4632588763455212e-06,
"loss": 6.1376,
"step": 1138
},
{
"epoch": 1.84,
"learning_rate": 1.4613999005523173e-06,
"loss": 5.8214,
"step": 1140
},
{
"epoch": 1.85,
"learning_rate": 1.4595388967794836e-06,
"loss": 6.134,
"step": 1142
},
{
"epoch": 1.85,
"learning_rate": 1.4576758732066441e-06,
"loss": 6.3464,
"step": 1144
},
{
"epoch": 1.85,
"learning_rate": 1.4558108380223011e-06,
"loss": 6.193,
"step": 1146
},
{
"epoch": 1.86,
"learning_rate": 1.4539437994237976e-06,
"loss": 6.3255,
"step": 1148
},
{
"epoch": 1.86,
"learning_rate": 1.4520747656172823e-06,
"loss": 6.3425,
"step": 1150
},
{
"epoch": 1.86,
"learning_rate": 1.4502037448176732e-06,
"loss": 6.1904,
"step": 1152
},
{
"epoch": 1.87,
"learning_rate": 1.4483307452486226e-06,
"loss": 6.2422,
"step": 1154
},
{
"epoch": 1.87,
"learning_rate": 1.446455775142479e-06,
"loss": 6.2154,
"step": 1156
},
{
"epoch": 1.87,
"learning_rate": 1.4445788427402526e-06,
"loss": 6.0472,
"step": 1158
},
{
"epoch": 1.87,
"learning_rate": 1.442699956291578e-06,
"loss": 6.1858,
"step": 1160
},
{
"epoch": 1.88,
"learning_rate": 1.4408191240546787e-06,
"loss": 5.3012,
"step": 1162
},
{
"epoch": 1.88,
"learning_rate": 1.4389363542963305e-06,
"loss": 6.4031,
"step": 1164
},
{
"epoch": 1.88,
"learning_rate": 1.437051655291825e-06,
"loss": 6.4019,
"step": 1166
},
{
"epoch": 1.89,
"learning_rate": 1.4351650353249328e-06,
"loss": 5.6293,
"step": 1168
},
{
"epoch": 1.89,
"learning_rate": 1.4332765026878687e-06,
"loss": 6.4613,
"step": 1170
},
{
"epoch": 1.89,
"learning_rate": 1.4313860656812535e-06,
"loss": 6.2912,
"step": 1172
},
{
"epoch": 1.9,
"learning_rate": 1.4294937326140785e-06,
"loss": 6.2582,
"step": 1174
},
{
"epoch": 1.9,
"learning_rate": 1.4275995118036692e-06,
"loss": 6.5683,
"step": 1176
},
{
"epoch": 1.9,
"learning_rate": 1.425703411575647e-06,
"loss": 6.2619,
"step": 1178
},
{
"epoch": 1.91,
"learning_rate": 1.4238054402638949e-06,
"loss": 6.0469,
"step": 1180
},
{
"epoch": 1.91,
"learning_rate": 1.4219056062105193e-06,
"loss": 6.4802,
"step": 1182
},
{
"epoch": 1.91,
"learning_rate": 1.4200039177658142e-06,
"loss": 6.3938,
"step": 1184
},
{
"epoch": 1.92,
"learning_rate": 1.4181003832882246e-06,
"loss": 6.073,
"step": 1186
},
{
"epoch": 1.92,
"learning_rate": 1.4161950111443074e-06,
"loss": 6.2202,
"step": 1188
},
{
"epoch": 1.92,
"learning_rate": 1.4142878097086994e-06,
"loss": 6.0532,
"step": 1190
},
{
"epoch": 1.93,
"learning_rate": 1.4123787873640751e-06,
"loss": 5.8988,
"step": 1192
},
{
"epoch": 1.93,
"learning_rate": 1.4104679525011139e-06,
"loss": 6.513,
"step": 1194
},
{
"epoch": 1.93,
"learning_rate": 1.408555313518461e-06,
"loss": 6.1953,
"step": 1196
},
{
"epoch": 1.94,
"learning_rate": 1.4066408788226918e-06,
"loss": 6.177,
"step": 1198
},
{
"epoch": 1.94,
"learning_rate": 1.4047246568282737e-06,
"loss": 6.1422,
"step": 1200
},
{
"epoch": 1.94,
"learning_rate": 1.40280665595753e-06,
"loss": 6.4582,
"step": 1202
},
{
"epoch": 1.95,
"learning_rate": 1.400886884640603e-06,
"loss": 6.086,
"step": 1204
},
{
"epoch": 1.95,
"learning_rate": 1.3989653513154163e-06,
"loss": 6.3604,
"step": 1206
},
{
"epoch": 1.95,
"learning_rate": 1.3970420644276382e-06,
"loss": 6.3485,
"step": 1208
},
{
"epoch": 1.96,
"learning_rate": 1.3951170324306434e-06,
"loss": 5.7222,
"step": 1210
},
{
"epoch": 1.96,
"learning_rate": 1.3931902637854788e-06,
"loss": 5.0449,
"step": 1212
},
{
"epoch": 1.96,
"learning_rate": 1.3912617669608229e-06,
"loss": 6.1346,
"step": 1214
},
{
"epoch": 1.97,
"learning_rate": 1.3893315504329497e-06,
"loss": 6.0923,
"step": 1216
},
{
"epoch": 1.97,
"learning_rate": 1.387399622685693e-06,
"loss": 6.1759,
"step": 1218
},
{
"epoch": 1.97,
"learning_rate": 1.385465992210407e-06,
"loss": 6.4627,
"step": 1220
},
{
"epoch": 1.97,
"learning_rate": 1.3835306675059308e-06,
"loss": 5.8395,
"step": 1222
},
{
"epoch": 1.98,
"learning_rate": 1.3815936570785485e-06,
"loss": 6.0278,
"step": 1224
},
{
"epoch": 1.98,
"learning_rate": 1.3796549694419548e-06,
"loss": 6.3969,
"step": 1226
},
{
"epoch": 1.98,
"learning_rate": 1.377714613117216e-06,
"loss": 6.5435,
"step": 1228
},
{
"epoch": 1.99,
"learning_rate": 1.375772596632732e-06,
"loss": 6.0634,
"step": 1230
},
{
"epoch": 1.99,
"learning_rate": 1.3738289285242012e-06,
"loss": 5.8032,
"step": 1232
},
{
"epoch": 1.99,
"learning_rate": 1.3718836173345783e-06,
"loss": 6.3641,
"step": 1234
},
{
"epoch": 2.0,
"learning_rate": 1.3699366716140433e-06,
"loss": 6.4425,
"step": 1236
},
{
"epoch": 2.0,
"learning_rate": 1.367988099919958e-06,
"loss": 5.8852,
"step": 1238
},
{
"epoch": 2.0,
"learning_rate": 1.3660379108168322e-06,
"loss": 6.2662,
"step": 1240
},
{
"epoch": 2.01,
"learning_rate": 1.3640861128762838e-06,
"loss": 6.3743,
"step": 1242
},
{
"epoch": 2.01,
"learning_rate": 1.3621327146770022e-06,
"loss": 6.2544,
"step": 1244
},
{
"epoch": 2.01,
"learning_rate": 1.3601777248047103e-06,
"loss": 6.1536,
"step": 1246
},
{
"epoch": 2.02,
"learning_rate": 1.3582211518521272e-06,
"loss": 6.2739,
"step": 1248
},
{
"epoch": 2.02,
"learning_rate": 1.3562630044189303e-06,
"loss": 5.7099,
"step": 1250
},
{
"epoch": 2.02,
"learning_rate": 1.354303291111716e-06,
"loss": 6.1313,
"step": 1252
},
{
"epoch": 2.03,
"learning_rate": 1.3523420205439645e-06,
"loss": 6.2576,
"step": 1254
},
{
"epoch": 2.03,
"learning_rate": 1.3503792013359996e-06,
"loss": 5.8142,
"step": 1256
},
{
"epoch": 2.03,
"learning_rate": 1.3484148421149527e-06,
"loss": 6.1916,
"step": 1258
},
{
"epoch": 2.04,
"learning_rate": 1.3464489515147237e-06,
"loss": 5.6573,
"step": 1260
},
{
"epoch": 2.04,
"learning_rate": 1.3444815381759425e-06,
"loss": 6.4434,
"step": 1262
},
{
"epoch": 2.04,
"learning_rate": 1.342512610745933e-06,
"loss": 6.3341,
"step": 1264
},
{
"epoch": 2.05,
"learning_rate": 1.3405421778786736e-06,
"loss": 5.9855,
"step": 1266
},
{
"epoch": 2.05,
"learning_rate": 1.3385702482347593e-06,
"loss": 5.7618,
"step": 1268
},
{
"epoch": 2.05,
"learning_rate": 1.3365968304813637e-06,
"loss": 6.0913,
"step": 1270
},
{
"epoch": 2.06,
"learning_rate": 1.3346219332922015e-06,
"loss": 5.9369,
"step": 1272
},
{
"epoch": 2.06,
"learning_rate": 1.3326455653474895e-06,
"loss": 6.0815,
"step": 1274
},
{
"epoch": 2.06,
"learning_rate": 1.3306677353339095e-06,
"loss": 6.1302,
"step": 1276
},
{
"epoch": 2.07,
"learning_rate": 1.328688451944569e-06,
"loss": 6.2322,
"step": 1278
},
{
"epoch": 2.07,
"learning_rate": 1.3267077238789632e-06,
"loss": 6.0083,
"step": 1280
},
{
"epoch": 2.07,
"learning_rate": 1.3247255598429376e-06,
"loss": 6.3264,
"step": 1282
},
{
"epoch": 2.08,
"learning_rate": 1.3227419685486491e-06,
"loss": 5.502,
"step": 1284
},
{
"epoch": 2.08,
"learning_rate": 1.320756958714528e-06,
"loss": 6.0936,
"step": 1286
},
{
"epoch": 2.08,
"learning_rate": 1.3187705390652388e-06,
"loss": 6.2401,
"step": 1288
},
{
"epoch": 2.08,
"learning_rate": 1.3167827183316429e-06,
"loss": 6.3647,
"step": 1290
},
{
"epoch": 2.09,
"learning_rate": 1.3147935052507598e-06,
"loss": 6.2787,
"step": 1292
},
{
"epoch": 2.09,
"learning_rate": 1.3128029085657288e-06,
"loss": 5.6309,
"step": 1294
},
{
"epoch": 2.09,
"learning_rate": 1.3108109370257712e-06,
"loss": 5.9382,
"step": 1296
},
{
"epoch": 2.1,
"learning_rate": 1.30881759938615e-06,
"loss": 5.8292,
"step": 1298
},
{
"epoch": 2.1,
"learning_rate": 1.3068229044081322e-06,
"loss": 6.2961,
"step": 1300
},
{
"epoch": 2.1,
"learning_rate": 1.3048268608589533e-06,
"loss": 6.4635,
"step": 1302
},
{
"epoch": 2.11,
"learning_rate": 1.302829477511773e-06,
"loss": 6.06,
"step": 1304
},
{
"epoch": 2.11,
"learning_rate": 1.3008307631456418e-06,
"loss": 5.8465,
"step": 1306
},
{
"epoch": 2.11,
"learning_rate": 1.2988307265454596e-06,
"loss": 5.9795,
"step": 1308
},
{
"epoch": 2.12,
"learning_rate": 1.2968293765019382e-06,
"loss": 6.2658,
"step": 1310
},
{
"epoch": 2.12,
"learning_rate": 1.2948267218115623e-06,
"loss": 6.1564,
"step": 1312
},
{
"epoch": 2.12,
"learning_rate": 1.2928227712765502e-06,
"loss": 6.4266,
"step": 1314
},
{
"epoch": 2.13,
"learning_rate": 1.2908175337048173e-06,
"loss": 5.7499,
"step": 1316
},
{
"epoch": 2.13,
"learning_rate": 1.288811017909934e-06,
"loss": 6.2519,
"step": 1318
},
{
"epoch": 2.13,
"learning_rate": 1.2868032327110903e-06,
"loss": 5.8981,
"step": 1320
},
{
"epoch": 2.14,
"learning_rate": 1.284794186933055e-06,
"loss": 6.3804,
"step": 1322
},
{
"epoch": 2.14,
"learning_rate": 1.2827838894061376e-06,
"loss": 5.8079,
"step": 1324
},
{
"epoch": 2.14,
"learning_rate": 1.2807723489661494e-06,
"loss": 6.4474,
"step": 1326
},
{
"epoch": 2.15,
"learning_rate": 1.2787595744543644e-06,
"loss": 6.367,
"step": 1328
},
{
"epoch": 2.15,
"learning_rate": 1.2767455747174809e-06,
"loss": 6.1068,
"step": 1330
},
{
"epoch": 2.15,
"learning_rate": 1.2747303586075831e-06,
"loss": 5.7674,
"step": 1332
},
{
"epoch": 2.16,
"learning_rate": 1.2727139349820996e-06,
"loss": 6.1313,
"step": 1334
},
{
"epoch": 2.16,
"learning_rate": 1.2706963127037683e-06,
"loss": 5.6983,
"step": 1336
},
{
"epoch": 2.16,
"learning_rate": 1.2686775006405944e-06,
"loss": 6.3627,
"step": 1338
},
{
"epoch": 2.17,
"learning_rate": 1.2666575076658132e-06,
"loss": 5.8205,
"step": 1340
},
{
"epoch": 2.17,
"learning_rate": 1.2646363426578503e-06,
"loss": 6.2026,
"step": 1342
},
{
"epoch": 2.17,
"learning_rate": 1.262614014500282e-06,
"loss": 6.375,
"step": 1344
},
{
"epoch": 2.18,
"learning_rate": 1.2605905320817976e-06,
"loss": 6.4086,
"step": 1346
},
{
"epoch": 2.18,
"learning_rate": 1.2585659042961597e-06,
"loss": 6.1262,
"step": 1348
},
{
"epoch": 2.18,
"learning_rate": 1.2565401400421652e-06,
"loss": 5.8055,
"step": 1350
},
{
"epoch": 2.19,
"learning_rate": 1.2545132482236055e-06,
"loss": 6.595,
"step": 1352
},
{
"epoch": 2.19,
"learning_rate": 1.2524852377492284e-06,
"loss": 5.9699,
"step": 1354
},
{
"epoch": 2.19,
"learning_rate": 1.2504561175326985e-06,
"loss": 5.7263,
"step": 1356
},
{
"epoch": 2.19,
"learning_rate": 1.248425896492558e-06,
"loss": 6.1658,
"step": 1358
},
{
"epoch": 2.2,
"learning_rate": 1.2463945835521875e-06,
"loss": 6.5257,
"step": 1360
},
{
"epoch": 2.2,
"learning_rate": 1.244362187639767e-06,
"loss": 6.1039,
"step": 1362
},
{
"epoch": 2.2,
"learning_rate": 1.2423287176882357e-06,
"loss": 6.1913,
"step": 1364
},
{
"epoch": 2.21,
"learning_rate": 1.2402941826352545e-06,
"loss": 5.79,
"step": 1366
},
{
"epoch": 2.21,
"learning_rate": 1.2382585914231648e-06,
"loss": 6.0318,
"step": 1368
},
{
"epoch": 2.21,
"learning_rate": 1.2362219529989512e-06,
"loss": 6.3702,
"step": 1370
},
{
"epoch": 2.22,
"learning_rate": 1.2341842763142002e-06,
"loss": 5.7685,
"step": 1372
},
{
"epoch": 2.22,
"learning_rate": 1.2321455703250613e-06,
"loss": 6.1761,
"step": 1374
},
{
"epoch": 2.22,
"learning_rate": 1.2301058439922102e-06,
"loss": 5.8961,
"step": 1376
},
{
"epoch": 2.23,
"learning_rate": 1.2280651062808045e-06,
"loss": 6.2063,
"step": 1378
},
{
"epoch": 2.23,
"learning_rate": 1.226023366160449e-06,
"loss": 5.9086,
"step": 1380
},
{
"epoch": 2.23,
"learning_rate": 1.223980632605154e-06,
"loss": 6.1152,
"step": 1382
},
{
"epoch": 2.24,
"learning_rate": 1.2219369145932955e-06,
"loss": 6.1352,
"step": 1384
},
{
"epoch": 2.24,
"learning_rate": 1.2198922211075776e-06,
"loss": 6.1588,
"step": 1386
},
{
"epoch": 2.24,
"learning_rate": 1.217846561134991e-06,
"loss": 6.2379,
"step": 1388
},
{
"epoch": 2.25,
"learning_rate": 1.2157999436667746e-06,
"loss": 6.0753,
"step": 1390
},
{
"epoch": 2.25,
"learning_rate": 1.2137523776983756e-06,
"loss": 6.3966,
"step": 1392
},
{
"epoch": 2.25,
"learning_rate": 1.2117038722294109e-06,
"loss": 5.8844,
"step": 1394
},
{
"epoch": 2.26,
"learning_rate": 1.2096544362636254e-06,
"loss": 6.2611,
"step": 1396
},
{
"epoch": 2.26,
"learning_rate": 1.2076040788088553e-06,
"loss": 6.1557,
"step": 1398
},
{
"epoch": 2.26,
"learning_rate": 1.205552808876986e-06,
"loss": 6.2408,
"step": 1400
},
{
"epoch": 2.27,
"learning_rate": 1.2035006354839132e-06,
"loss": 5.8395,
"step": 1402
},
{
"epoch": 2.27,
"learning_rate": 1.201447567649505e-06,
"loss": 5.8175,
"step": 1404
},
{
"epoch": 2.27,
"learning_rate": 1.1993936143975598e-06,
"loss": 5.4642,
"step": 1406
},
{
"epoch": 2.28,
"learning_rate": 1.1973387847557675e-06,
"loss": 5.88,
"step": 1408
},
{
"epoch": 2.28,
"learning_rate": 1.1952830877556698e-06,
"loss": 6.2455,
"step": 1410
},
{
"epoch": 2.28,
"learning_rate": 1.193226532432622e-06,
"loss": 6.1702,
"step": 1412
},
{
"epoch": 2.29,
"learning_rate": 1.1911691278257509e-06,
"loss": 6.2456,
"step": 1414
},
{
"epoch": 2.29,
"learning_rate": 1.1891108829779163e-06,
"loss": 6.2208,
"step": 1416
},
{
"epoch": 2.29,
"learning_rate": 1.1870518069356708e-06,
"loss": 6.455,
"step": 1418
},
{
"epoch": 2.29,
"learning_rate": 1.1849919087492211e-06,
"loss": 6.2341,
"step": 1420
},
{
"epoch": 2.3,
"learning_rate": 1.1829311974723866e-06,
"loss": 6.2769,
"step": 1422
},
{
"epoch": 2.3,
"learning_rate": 1.1808696821625612e-06,
"loss": 5.7287,
"step": 1424
},
{
"epoch": 2.3,
"learning_rate": 1.1788073718806724e-06,
"loss": 5.9381,
"step": 1426
},
{
"epoch": 2.31,
"learning_rate": 1.1767442756911417e-06,
"loss": 5.663,
"step": 1428
},
{
"epoch": 2.31,
"learning_rate": 1.174680402661845e-06,
"loss": 6.0789,
"step": 1430
},
{
"epoch": 2.31,
"learning_rate": 1.1726157618640726e-06,
"loss": 6.2935,
"step": 1432
},
{
"epoch": 2.32,
"learning_rate": 1.1705503623724897e-06,
"loss": 6.5166,
"step": 1434
},
{
"epoch": 2.32,
"learning_rate": 1.1684842132650956e-06,
"loss": 5.4518,
"step": 1436
},
{
"epoch": 2.32,
"learning_rate": 1.1664173236231846e-06,
"loss": 6.2656,
"step": 1438
},
{
"epoch": 2.33,
"learning_rate": 1.164349702531306e-06,
"loss": 5.5944,
"step": 1440
},
{
"epoch": 2.33,
"learning_rate": 1.1622813590772243e-06,
"loss": 6.3351,
"step": 1442
},
{
"epoch": 2.33,
"learning_rate": 1.1602123023518777e-06,
"loss": 6.4678,
"step": 1444
},
{
"epoch": 2.34,
"learning_rate": 1.1581425414493408e-06,
"loss": 5.9154,
"step": 1446
},
{
"epoch": 2.34,
"learning_rate": 1.156072085466783e-06,
"loss": 6.3876,
"step": 1448
},
{
"epoch": 2.34,
"learning_rate": 1.154000943504428e-06,
"loss": 6.0515,
"step": 1450
},
{
"epoch": 2.35,
"learning_rate": 1.1519291246655158e-06,
"loss": 6.2604,
"step": 1452
},
{
"epoch": 2.35,
"learning_rate": 1.14985663805626e-06,
"loss": 5.7749,
"step": 1454
},
{
"epoch": 2.35,
"learning_rate": 1.1477834927858103e-06,
"loss": 6.0236,
"step": 1456
},
{
"epoch": 2.36,
"learning_rate": 1.1457096979662113e-06,
"loss": 6.4244,
"step": 1458
},
{
"epoch": 2.36,
"learning_rate": 1.1436352627123623e-06,
"loss": 6.2735,
"step": 1460
},
{
"epoch": 2.36,
"learning_rate": 1.1415601961419774e-06,
"loss": 6.3662,
"step": 1462
},
{
"epoch": 2.37,
"learning_rate": 1.1394845073755455e-06,
"loss": 6.2688,
"step": 1464
},
{
"epoch": 2.37,
"learning_rate": 1.1374082055362908e-06,
"loss": 5.4211,
"step": 1466
},
{
"epoch": 2.37,
"learning_rate": 1.1353312997501312e-06,
"loss": 5.9967,
"step": 1468
},
{
"epoch": 2.38,
"learning_rate": 1.1332537991456397e-06,
"loss": 6.2811,
"step": 1470
},
{
"epoch": 2.38,
"learning_rate": 1.1311757128540039e-06,
"loss": 6.0461,
"step": 1472
},
{
"epoch": 2.38,
"learning_rate": 1.129097050008985e-06,
"loss": 6.3685,
"step": 1474
},
{
"epoch": 2.39,
"learning_rate": 1.1270178197468786e-06,
"loss": 5.7119,
"step": 1476
},
{
"epoch": 2.39,
"learning_rate": 1.1249380312064749e-06,
"loss": 5.9253,
"step": 1478
},
{
"epoch": 2.39,
"learning_rate": 1.1228576935290168e-06,
"loss": 6.2371,
"step": 1480
},
{
"epoch": 2.4,
"learning_rate": 1.1207768158581613e-06,
"loss": 6.2198,
"step": 1482
},
{
"epoch": 2.4,
"learning_rate": 1.1186954073399386e-06,
"loss": 5.9667,
"step": 1484
},
{
"epoch": 2.4,
"learning_rate": 1.116613477122713e-06,
"loss": 5.8026,
"step": 1486
},
{
"epoch": 2.4,
"learning_rate": 1.114531034357141e-06,
"loss": 5.9698,
"step": 1488
},
{
"epoch": 2.41,
"learning_rate": 1.1124480881961318e-06,
"loss": 6.2043,
"step": 1490
},
{
"epoch": 2.41,
"learning_rate": 1.110364647794807e-06,
"loss": 5.9397,
"step": 1492
},
{
"epoch": 2.41,
"learning_rate": 1.1082807223104617e-06,
"loss": 6.0089,
"step": 1494
},
{
"epoch": 2.42,
"learning_rate": 1.1061963209025222e-06,
"loss": 5.7651,
"step": 1496
},
{
"epoch": 2.42,
"learning_rate": 1.1041114527325064e-06,
"loss": 6.2575,
"step": 1498
},
{
"epoch": 2.42,
"learning_rate": 1.1020261269639842e-06,
"loss": 6.3937,
"step": 1500
},
{
"epoch": 2.43,
"learning_rate": 1.0999403527625366e-06,
"loss": 6.2106,
"step": 1502
},
{
"epoch": 2.43,
"learning_rate": 1.0978541392957154e-06,
"loss": 5.7234,
"step": 1504
},
{
"epoch": 2.43,
"learning_rate": 1.095767495733004e-06,
"loss": 6.2505,
"step": 1506
},
{
"epoch": 2.44,
"learning_rate": 1.0936804312457747e-06,
"loss": 5.6241,
"step": 1508
},
{
"epoch": 2.44,
"learning_rate": 1.0915929550072515e-06,
"loss": 5.4477,
"step": 1510
},
{
"epoch": 2.44,
"learning_rate": 1.0895050761924667e-06,
"loss": 6.3568,
"step": 1512
},
{
"epoch": 2.45,
"learning_rate": 1.0874168039782225e-06,
"loss": 6.2436,
"step": 1514
},
{
"epoch": 2.45,
"learning_rate": 1.0853281475430516e-06,
"loss": 6.265,
"step": 1516
},
{
"epoch": 2.45,
"learning_rate": 1.0832391160671728e-06,
"loss": 5.6497,
"step": 1518
},
{
"epoch": 2.46,
"learning_rate": 1.0811497187324555e-06,
"loss": 5.7196,
"step": 1520
},
{
"epoch": 2.46,
"learning_rate": 1.0790599647223762e-06,
"loss": 6.0961,
"step": 1522
},
{
"epoch": 2.46,
"learning_rate": 1.0769698632219792e-06,
"loss": 5.7752,
"step": 1524
},
{
"epoch": 2.47,
"learning_rate": 1.074879423417837e-06,
"loss": 5.9801,
"step": 1526
},
{
"epoch": 2.47,
"learning_rate": 1.0727886544980067e-06,
"loss": 5.9149,
"step": 1528
},
{
"epoch": 2.47,
"learning_rate": 1.0706975656519944e-06,
"loss": 6.2367,
"step": 1530
},
{
"epoch": 2.48,
"learning_rate": 1.068606166070712e-06,
"loss": 6.3122,
"step": 1532
},
{
"epoch": 2.48,
"learning_rate": 1.0665144649464355e-06,
"loss": 6.1117,
"step": 1534
},
{
"epoch": 2.48,
"learning_rate": 1.064422471472768e-06,
"loss": 6.0872,
"step": 1536
},
{
"epoch": 2.49,
"learning_rate": 1.0623301948445971e-06,
"loss": 6.1226,
"step": 1538
},
{
"epoch": 2.49,
"learning_rate": 1.0602376442580543e-06,
"loss": 5.5424,
"step": 1540
},
{
"epoch": 2.49,
"learning_rate": 1.0581448289104758e-06,
"loss": 5.9785,
"step": 1542
},
{
"epoch": 2.5,
"learning_rate": 1.0560517580003615e-06,
"loss": 6.0885,
"step": 1544
},
{
"epoch": 2.5,
"learning_rate": 1.0539584407273347e-06,
"loss": 6.1934,
"step": 1546
},
{
"epoch": 2.5,
"learning_rate": 1.0518648862921012e-06,
"loss": 6.1035,
"step": 1548
},
{
"epoch": 2.51,
"learning_rate": 1.0497711038964086e-06,
"loss": 5.9217,
"step": 1550
},
{
"epoch": 2.51,
"learning_rate": 1.0476771027430085e-06,
"loss": 6.2234,
"step": 1552
},
{
"epoch": 2.51,
"learning_rate": 1.0455828920356114e-06,
"loss": 6.2327,
"step": 1554
},
{
"epoch": 2.51,
"learning_rate": 1.0434884809788507e-06,
"loss": 6.4939,
"step": 1556
},
{
"epoch": 2.52,
"learning_rate": 1.0413938787782392e-06,
"loss": 5.9045,
"step": 1558
},
{
"epoch": 2.52,
"learning_rate": 1.0392990946401312e-06,
"loss": 6.3292,
"step": 1560
},
{
"epoch": 2.52,
"learning_rate": 1.03720413777168e-06,
"loss": 6.0232,
"step": 1562
},
{
"epoch": 2.53,
"learning_rate": 1.0351090173807968e-06,
"loss": 6.2403,
"step": 1564
},
{
"epoch": 2.53,
"learning_rate": 1.0330137426761133e-06,
"loss": 6.1885,
"step": 1566
},
{
"epoch": 2.53,
"learning_rate": 1.0309183228669396e-06,
"loss": 5.8844,
"step": 1568
},
{
"epoch": 2.54,
"learning_rate": 1.0288227671632219e-06,
"loss": 6.4348,
"step": 1570
},
{
"epoch": 2.54,
"learning_rate": 1.0267270847755047e-06,
"loss": 6.5083,
"step": 1572
},
{
"epoch": 2.54,
"learning_rate": 1.0246312849148897e-06,
"loss": 6.0011,
"step": 1574
},
{
"epoch": 2.55,
"learning_rate": 1.0225353767929943e-06,
"loss": 5.9635,
"step": 1576
},
{
"epoch": 2.55,
"learning_rate": 1.0204393696219115e-06,
"loss": 6.3792,
"step": 1578
},
{
"epoch": 2.55,
"learning_rate": 1.0183432726141706e-06,
"loss": 6.174,
"step": 1580
},
{
"epoch": 2.56,
"learning_rate": 1.0162470949826946e-06,
"loss": 6.2007,
"step": 1582
},
{
"epoch": 2.56,
"learning_rate": 1.014150845940762e-06,
"loss": 5.8667,
"step": 1584
},
{
"epoch": 2.56,
"learning_rate": 1.0120545347019646e-06,
"loss": 6.1047,
"step": 1586
},
{
"epoch": 2.57,
"learning_rate": 1.0099581704801673e-06,
"loss": 6.2524,
"step": 1588
},
{
"epoch": 2.57,
"learning_rate": 1.0078617624894683e-06,
"loss": 5.9776,
"step": 1590
},
{
"epoch": 2.57,
"learning_rate": 1.005765319944158e-06,
"loss": 6.3895,
"step": 1592
},
{
"epoch": 2.58,
"learning_rate": 1.0036688520586787e-06,
"loss": 6.2257,
"step": 1594
},
{
"epoch": 2.58,
"learning_rate": 1.0015723680475844e-06,
"loss": 6.3385,
"step": 1596
},
{
"epoch": 2.58,
"learning_rate": 9.994758771254996e-07,
"loss": 6.0314,
"step": 1598
},
{
"epoch": 2.59,
"learning_rate": 9.973793885070792e-07,
"loss": 6.1063,
"step": 1600
},
{
"epoch": 2.59,
"learning_rate": 9.952829114069678e-07,
"loss": 6.002,
"step": 1602
},
{
"epoch": 2.59,
"learning_rate": 9.9318645503976e-07,
"loss": 5.6794,
"step": 1604
},
{
"epoch": 2.6,
"learning_rate": 9.910900286199586e-07,
"loss": 6.2951,
"step": 1606
},
{
"epoch": 2.6,
"learning_rate": 9.889936413619356e-07,
"loss": 5.8184,
"step": 1608
},
{
"epoch": 2.6,
"learning_rate": 9.868973024798895e-07,
"loss": 5.4467,
"step": 1610
},
{
"epoch": 2.61,
"learning_rate": 9.848010211878072e-07,
"loss": 6.0832,
"step": 1612
},
{
"epoch": 2.61,
"learning_rate": 9.827048066994224e-07,
"loss": 6.3475,
"step": 1614
},
{
"epoch": 2.61,
"learning_rate": 9.806086682281757e-07,
"loss": 6.4013,
"step": 1616
},
{
"epoch": 2.61,
"learning_rate": 9.78512614987172e-07,
"loss": 4.5245,
"step": 1618
},
{
"epoch": 2.62,
"learning_rate": 9.764166561891432e-07,
"loss": 6.2644,
"step": 1620
},
{
"epoch": 2.62,
"learning_rate": 9.743208010464048e-07,
"loss": 6.0541,
"step": 1622
},
{
"epoch": 2.62,
"learning_rate": 9.72225058770818e-07,
"loss": 5.9177,
"step": 1624
},
{
"epoch": 2.63,
"learning_rate": 9.70129438573747e-07,
"loss": 6.2789,
"step": 1626
},
{
"epoch": 2.63,
"learning_rate": 9.680339496660191e-07,
"loss": 6.0265,
"step": 1628
},
{
"epoch": 2.63,
"learning_rate": 9.659386012578863e-07,
"loss": 6.5556,
"step": 1630
},
{
"epoch": 2.64,
"learning_rate": 9.638434025589809e-07,
"loss": 6.4456,
"step": 1632
},
{
"epoch": 2.64,
"learning_rate": 9.617483627782786e-07,
"loss": 6.3286,
"step": 1634
},
{
"epoch": 2.64,
"learning_rate": 9.596534911240565e-07,
"loss": 6.1645,
"step": 1636
},
{
"epoch": 2.65,
"learning_rate": 9.575587968038518e-07,
"loss": 5.7545,
"step": 1638
},
{
"epoch": 2.65,
"learning_rate": 9.554642890244233e-07,
"loss": 6.0317,
"step": 1640
},
{
"epoch": 2.65,
"learning_rate": 9.53369976991709e-07,
"loss": 5.4503,
"step": 1642
},
{
"epoch": 2.66,
"learning_rate": 9.512758699107878e-07,
"loss": 5.8737,
"step": 1644
},
{
"epoch": 2.66,
"learning_rate": 9.491819769858366e-07,
"loss": 6.0376,
"step": 1646
},
{
"epoch": 2.66,
"learning_rate": 9.470883074200915e-07,
"loss": 6.2006,
"step": 1648
},
{
"epoch": 2.67,
"learning_rate": 9.44994870415807e-07,
"loss": 6.078,
"step": 1650
},
{
"epoch": 2.67,
"learning_rate": 9.429016751742149e-07,
"loss": 5.8481,
"step": 1652
},
{
"epoch": 2.67,
"learning_rate": 9.408087308954852e-07,
"loss": 6.0188,
"step": 1654
},
{
"epoch": 2.68,
"learning_rate": 9.387160467786838e-07,
"loss": 5.8085,
"step": 1656
},
{
"epoch": 2.68,
"learning_rate": 9.366236320217337e-07,
"loss": 5.838,
"step": 1658
},
{
"epoch": 2.68,
"learning_rate": 9.34531495821375e-07,
"loss": 5.7474,
"step": 1660
},
{
"epoch": 2.69,
"learning_rate": 9.324396473731216e-07,
"loss": 6.3246,
"step": 1662
},
{
"epoch": 2.69,
"learning_rate": 9.303480958712238e-07,
"loss": 6.1301,
"step": 1664
},
{
"epoch": 2.69,
"learning_rate": 9.282568505086262e-07,
"loss": 6.2999,
"step": 1666
},
{
"epoch": 2.7,
"learning_rate": 9.261659204769282e-07,
"loss": 5.5501,
"step": 1668
},
{
"epoch": 2.7,
"learning_rate": 9.240753149663431e-07,
"loss": 6.2039,
"step": 1670
},
{
"epoch": 2.7,
"learning_rate": 9.219850431656578e-07,
"loss": 6.3152,
"step": 1672
},
{
"epoch": 2.71,
"learning_rate": 9.198951142621928e-07,
"loss": 5.5462,
"step": 1674
},
{
"epoch": 2.71,
"learning_rate": 9.17805537441761e-07,
"loss": 5.8592,
"step": 1676
},
{
"epoch": 2.71,
"learning_rate": 9.15716321888628e-07,
"loss": 5.5209,
"step": 1678
},
{
"epoch": 2.72,
"learning_rate": 9.136274767854716e-07,
"loss": 5.9594,
"step": 1680
},
{
"epoch": 2.72,
"learning_rate": 9.115390113133413e-07,
"loss": 6.2646,
"step": 1682
},
{
"epoch": 2.72,
"learning_rate": 9.094509346516177e-07,
"loss": 5.9554,
"step": 1684
},
{
"epoch": 2.72,
"learning_rate": 9.073632559779729e-07,
"loss": 5.8664,
"step": 1686
},
{
"epoch": 2.73,
"learning_rate": 9.052759844683294e-07,
"loss": 6.1002,
"step": 1688
},
{
"epoch": 2.73,
"learning_rate": 9.031891292968209e-07,
"loss": 6.1675,
"step": 1690
},
{
"epoch": 2.73,
"learning_rate": 9.011026996357502e-07,
"loss": 6.0664,
"step": 1692
},
{
"epoch": 2.74,
"learning_rate": 8.990167046555504e-07,
"loss": 6.2686,
"step": 1694
},
{
"epoch": 2.74,
"learning_rate": 8.969311535247437e-07,
"loss": 6.0531,
"step": 1696
},
{
"epoch": 2.74,
"learning_rate": 8.948460554099018e-07,
"loss": 6.0652,
"step": 1698
},
{
"epoch": 2.75,
"learning_rate": 8.927614194756052e-07,
"loss": 6.0999,
"step": 1700
},
{
"epoch": 2.75,
"learning_rate": 8.906772548844025e-07,
"loss": 6.1437,
"step": 1702
},
{
"epoch": 2.75,
"learning_rate": 8.885935707967715e-07,
"loss": 6.1177,
"step": 1704
},
{
"epoch": 2.76,
"learning_rate": 8.865103763710778e-07,
"loss": 6.1103,
"step": 1706
},
{
"epoch": 2.76,
"learning_rate": 8.844276807635342e-07,
"loss": 5.8831,
"step": 1708
},
{
"epoch": 2.76,
"learning_rate": 8.823454931281616e-07,
"loss": 5.9109,
"step": 1710
},
{
"epoch": 2.77,
"learning_rate": 8.802638226167478e-07,
"loss": 6.5164,
"step": 1712
},
{
"epoch": 2.77,
"learning_rate": 8.781826783788083e-07,
"loss": 6.0546,
"step": 1714
},
{
"epoch": 2.77,
"learning_rate": 8.761020695615449e-07,
"loss": 6.1878,
"step": 1716
},
{
"epoch": 2.78,
"learning_rate": 8.740220053098066e-07,
"loss": 6.2062,
"step": 1718
},
{
"epoch": 2.78,
"learning_rate": 8.719424947660485e-07,
"loss": 5.1807,
"step": 1720
},
{
"epoch": 2.78,
"learning_rate": 8.698635470702923e-07,
"loss": 6.3489,
"step": 1722
},
{
"epoch": 2.79,
"learning_rate": 8.677851713600854e-07,
"loss": 6.1953,
"step": 1724
},
{
"epoch": 2.79,
"learning_rate": 8.657073767704615e-07,
"loss": 6.0458,
"step": 1726
},
{
"epoch": 2.79,
"learning_rate": 8.636301724339003e-07,
"loss": 6.2063,
"step": 1728
},
{
"epoch": 2.8,
"learning_rate": 8.615535674802864e-07,
"loss": 6.0141,
"step": 1730
},
{
"epoch": 2.8,
"learning_rate": 8.594775710368703e-07,
"loss": 6.3073,
"step": 1732
},
{
"epoch": 2.8,
"learning_rate": 8.574021922282292e-07,
"loss": 6.0074,
"step": 1734
},
{
"epoch": 2.81,
"learning_rate": 8.553274401762236e-07,
"loss": 6.3685,
"step": 1736
},
{
"epoch": 2.81,
"learning_rate": 8.532533239999602e-07,
"loss": 5.7029,
"step": 1738
},
{
"epoch": 2.81,
"learning_rate": 8.511798528157511e-07,
"loss": 6.1316,
"step": 1740
},
{
"epoch": 2.82,
"learning_rate": 8.491070357370729e-07,
"loss": 5.805,
"step": 1742
},
{
"epoch": 2.82,
"learning_rate": 8.470348818745276e-07,
"loss": 6.0199,
"step": 1744
},
{
"epoch": 2.82,
"learning_rate": 8.44963400335802e-07,
"loss": 5.8261,
"step": 1746
},
{
"epoch": 2.83,
"learning_rate": 8.428926002256282e-07,
"loss": 6.3244,
"step": 1748
},
{
"epoch": 2.83,
"learning_rate": 8.408224906457429e-07,
"loss": 5.5886,
"step": 1750
},
{
"epoch": 2.83,
"learning_rate": 8.387530806948477e-07,
"loss": 5.9277,
"step": 1752
},
{
"epoch": 2.83,
"learning_rate": 8.366843794685694e-07,
"loss": 6.1806,
"step": 1754
},
{
"epoch": 2.84,
"learning_rate": 8.346163960594192e-07,
"loss": 6.1859,
"step": 1756
},
{
"epoch": 2.84,
"learning_rate": 8.325491395567539e-07,
"loss": 6.0103,
"step": 1758
},
{
"epoch": 2.84,
"learning_rate": 8.304826190467349e-07,
"loss": 5.8642,
"step": 1760
},
{
"epoch": 2.85,
"learning_rate": 8.284168436122897e-07,
"loss": 6.0721,
"step": 1762
},
{
"epoch": 2.85,
"learning_rate": 8.263518223330696e-07,
"loss": 5.2597,
"step": 1764
},
{
"epoch": 2.85,
"learning_rate": 8.24287564285412e-07,
"loss": 5.9716,
"step": 1766
},
{
"epoch": 2.86,
"learning_rate": 8.222240785422995e-07,
"loss": 6.1662,
"step": 1768
},
{
"epoch": 2.86,
"learning_rate": 8.201613741733202e-07,
"loss": 6.4284,
"step": 1770
},
{
"epoch": 2.86,
"learning_rate": 8.180994602446279e-07,
"loss": 5.5088,
"step": 1772
},
{
"epoch": 2.87,
"learning_rate": 8.160383458189022e-07,
"loss": 6.2377,
"step": 1774
},
{
"epoch": 2.87,
"learning_rate": 8.139780399553079e-07,
"loss": 6.2402,
"step": 1776
},
{
"epoch": 2.87,
"learning_rate": 8.119185517094577e-07,
"loss": 6.4094,
"step": 1778
},
{
"epoch": 2.88,
"learning_rate": 8.098598901333692e-07,
"loss": 5.8309,
"step": 1780
},
{
"epoch": 2.88,
"learning_rate": 8.078020642754273e-07,
"loss": 5.8735,
"step": 1782
},
{
"epoch": 2.88,
"learning_rate": 8.057450831803427e-07,
"loss": 6.2882,
"step": 1784
},
{
"epoch": 2.89,
"learning_rate": 8.036889558891142e-07,
"loss": 6.0149,
"step": 1786
},
{
"epoch": 2.89,
"learning_rate": 8.016336914389873e-07,
"loss": 6.2319,
"step": 1788
},
{
"epoch": 2.89,
"learning_rate": 7.995792988634151e-07,
"loss": 5.6783,
"step": 1790
},
{
"epoch": 2.9,
"learning_rate": 7.975257871920193e-07,
"loss": 5.5368,
"step": 1792
},
{
"epoch": 2.9,
"learning_rate": 7.954731654505491e-07,
"loss": 5.7339,
"step": 1794
},
{
"epoch": 2.9,
"learning_rate": 7.93421442660842e-07,
"loss": 5.8137,
"step": 1796
},
{
"epoch": 2.91,
"learning_rate": 7.913706278407849e-07,
"loss": 6.0696,
"step": 1798
},
{
"epoch": 2.91,
"learning_rate": 7.89320730004274e-07,
"loss": 6.0298,
"step": 1800
},
{
"epoch": 2.91,
"learning_rate": 7.872717581611741e-07,
"loss": 5.9934,
"step": 1802
},
{
"epoch": 2.92,
"learning_rate": 7.852237213172811e-07,
"loss": 5.8865,
"step": 1804
},
{
"epoch": 2.92,
"learning_rate": 7.831766284742806e-07,
"loss": 6.481,
"step": 1806
},
{
"epoch": 2.92,
"learning_rate": 7.811304886297104e-07,
"loss": 6.1973,
"step": 1808
},
{
"epoch": 2.93,
"learning_rate": 7.790853107769178e-07,
"loss": 6.3481,
"step": 1810
},
{
"epoch": 2.93,
"learning_rate": 7.770411039050229e-07,
"loss": 6.235,
"step": 1812
},
{
"epoch": 2.93,
"learning_rate": 7.749978769988777e-07,
"loss": 6.1487,
"step": 1814
},
{
"epoch": 2.93,
"learning_rate": 7.729556390390275e-07,
"loss": 5.6685,
"step": 1816
},
{
"epoch": 2.94,
"learning_rate": 7.709143990016701e-07,
"loss": 6.3257,
"step": 1818
},
{
"epoch": 2.94,
"learning_rate": 7.688741658586178e-07,
"loss": 6.0141,
"step": 1820
},
{
"epoch": 2.94,
"learning_rate": 7.668349485772571e-07,
"loss": 6.3748,
"step": 1822
},
{
"epoch": 2.95,
"learning_rate": 7.6479675612051e-07,
"loss": 5.9491,
"step": 1824
},
{
"epoch": 2.95,
"learning_rate": 7.627595974467929e-07,
"loss": 6.2564,
"step": 1826
},
{
"epoch": 2.95,
"learning_rate": 7.607234815099801e-07,
"loss": 6.2111,
"step": 1828
},
{
"epoch": 2.96,
"learning_rate": 7.586884172593608e-07,
"loss": 5.5946,
"step": 1830
},
{
"epoch": 2.96,
"learning_rate": 7.566544136396036e-07,
"loss": 6.3797,
"step": 1832
},
{
"epoch": 2.96,
"learning_rate": 7.546214795907139e-07,
"loss": 5.0831,
"step": 1834
},
{
"epoch": 2.97,
"learning_rate": 7.525896240479976e-07,
"loss": 6.0609,
"step": 1836
},
{
"epoch": 2.97,
"learning_rate": 7.505588559420187e-07,
"loss": 6.1916,
"step": 1838
},
{
"epoch": 2.97,
"learning_rate": 7.485291841985626e-07,
"loss": 6.0715,
"step": 1840
},
{
"epoch": 2.98,
"learning_rate": 7.465006177385952e-07,
"loss": 6.2253,
"step": 1842
},
{
"epoch": 2.98,
"learning_rate": 7.444731654782253e-07,
"loss": 5.6089,
"step": 1844
},
{
"epoch": 2.98,
"learning_rate": 7.424468363286633e-07,
"loss": 5.9458,
"step": 1846
},
{
"epoch": 2.99,
"learning_rate": 7.404216391961847e-07,
"loss": 6.1995,
"step": 1848
},
{
"epoch": 2.99,
"learning_rate": 7.383975829820873e-07,
"loss": 5.889,
"step": 1850
},
{
"epoch": 2.99,
"learning_rate": 7.363746765826568e-07,
"loss": 5.7558,
"step": 1852
},
{
"epoch": 3.0,
"learning_rate": 7.343529288891239e-07,
"loss": 6.2806,
"step": 1854
},
{
"epoch": 3.0,
"learning_rate": 7.323323487876256e-07,
"loss": 6.117,
"step": 1856
},
{
"epoch": 3.0,
"learning_rate": 7.303129451591686e-07,
"loss": 5.9999,
"step": 1858
},
{
"epoch": 3.01,
"learning_rate": 7.282947268795876e-07,
"loss": 6.1178,
"step": 1860
},
{
"epoch": 3.01,
"learning_rate": 7.262777028195079e-07,
"loss": 6.1407,
"step": 1862
},
{
"epoch": 3.01,
"learning_rate": 7.242618818443056e-07,
"loss": 6.3491,
"step": 1864
},
{
"epoch": 3.02,
"learning_rate": 7.222472728140694e-07,
"loss": 6.2381,
"step": 1866
},
{
"epoch": 3.02,
"learning_rate": 7.202338845835605e-07,
"loss": 5.9172,
"step": 1868
},
{
"epoch": 3.02,
"learning_rate": 7.182217260021748e-07,
"loss": 6.3349,
"step": 1870
},
{
"epoch": 3.03,
"learning_rate": 7.162108059139032e-07,
"loss": 5.8465,
"step": 1872
},
{
"epoch": 3.03,
"learning_rate": 7.142011331572936e-07,
"loss": 6.0377,
"step": 1874
},
{
"epoch": 3.03,
"learning_rate": 7.121927165654108e-07,
"loss": 5.9383,
"step": 1876
},
{
"epoch": 3.04,
"learning_rate": 7.101855649657991e-07,
"loss": 6.3187,
"step": 1878
},
{
"epoch": 3.04,
"learning_rate": 7.08179687180442e-07,
"loss": 6.2913,
"step": 1880
},
{
"epoch": 3.04,
"learning_rate": 7.061750920257258e-07,
"loss": 5.9955,
"step": 1882
},
{
"epoch": 3.04,
"learning_rate": 7.041717883123976e-07,
"loss": 6.126,
"step": 1884
},
{
"epoch": 3.05,
"learning_rate": 7.02169784845529e-07,
"loss": 6.1044,
"step": 1886
},
{
"epoch": 3.05,
"learning_rate": 7.001690904244766e-07,
"loss": 6.1142,
"step": 1888
},
{
"epoch": 3.05,
"learning_rate": 6.981697138428433e-07,
"loss": 6.0607,
"step": 1890
},
{
"epoch": 3.06,
"learning_rate": 6.961716638884399e-07,
"loss": 6.1981,
"step": 1892
},
{
"epoch": 3.06,
"learning_rate": 6.94174949343246e-07,
"loss": 6.3634,
"step": 1894
},
{
"epoch": 3.06,
"learning_rate": 6.921795789833722e-07,
"loss": 5.6222,
"step": 1896
},
{
"epoch": 3.07,
"learning_rate": 6.901855615790205e-07,
"loss": 6.1772,
"step": 1898
},
{
"epoch": 3.07,
"learning_rate": 6.881929058944469e-07,
"loss": 5.7795,
"step": 1900
},
{
"epoch": 3.07,
"learning_rate": 6.862016206879216e-07,
"loss": 6.1229,
"step": 1902
},
{
"epoch": 3.08,
"learning_rate": 6.842117147116913e-07,
"loss": 5.6223,
"step": 1904
},
{
"epoch": 3.08,
"learning_rate": 6.822231967119409e-07,
"loss": 5.7032,
"step": 1906
},
{
"epoch": 3.08,
"learning_rate": 6.802360754287547e-07,
"loss": 5.5354,
"step": 1908
},
{
"epoch": 3.09,
"learning_rate": 6.782503595960781e-07,
"loss": 6.0374,
"step": 1910
},
{
"epoch": 3.09,
"learning_rate": 6.76266057941679e-07,
"loss": 6.3156,
"step": 1912
},
{
"epoch": 3.09,
"learning_rate": 6.742831791871095e-07,
"loss": 6.0218,
"step": 1914
},
{
"epoch": 3.1,
"learning_rate": 6.723017320476678e-07,
"loss": 5.9377,
"step": 1916
},
{
"epoch": 3.1,
"learning_rate": 6.7032172523236e-07,
"loss": 5.8099,
"step": 1918
},
{
"epoch": 3.1,
"learning_rate": 6.683431674438612e-07,
"loss": 5.9519,
"step": 1920
},
{
"epoch": 3.11,
"learning_rate": 6.663660673784775e-07,
"loss": 5.8745,
"step": 1922
},
{
"epoch": 3.11,
"learning_rate": 6.643904337261082e-07,
"loss": 6.1688,
"step": 1924
},
{
"epoch": 3.11,
"learning_rate": 6.624162751702076e-07,
"loss": 5.8773,
"step": 1926
},
{
"epoch": 3.12,
"learning_rate": 6.604436003877464e-07,
"loss": 6.0271,
"step": 1928
},
{
"epoch": 3.12,
"learning_rate": 6.584724180491729e-07,
"loss": 5.7793,
"step": 1930
},
{
"epoch": 3.12,
"learning_rate": 6.565027368183768e-07,
"loss": 6.1935,
"step": 1932
},
{
"epoch": 3.13,
"learning_rate": 6.545345653526495e-07,
"loss": 6.0947,
"step": 1934
},
{
"epoch": 3.13,
"learning_rate": 6.525679123026463e-07,
"loss": 5.8929,
"step": 1936
},
{
"epoch": 3.13,
"learning_rate": 6.506027863123491e-07,
"loss": 6.1234,
"step": 1938
},
{
"epoch": 3.14,
"learning_rate": 6.48639196019028e-07,
"loss": 5.3506,
"step": 1940
},
{
"epoch": 3.14,
"learning_rate": 6.466771500532029e-07,
"loss": 5.9521,
"step": 1942
},
{
"epoch": 3.14,
"learning_rate": 6.447166570386063e-07,
"loss": 6.1885,
"step": 1944
},
{
"epoch": 3.15,
"learning_rate": 6.427577255921449e-07,
"loss": 5.851,
"step": 1946
},
{
"epoch": 3.15,
"learning_rate": 6.40800364323862e-07,
"loss": 5.8766,
"step": 1948
},
{
"epoch": 3.15,
"learning_rate": 6.38844581836899e-07,
"loss": 5.7743,
"step": 1950
},
{
"epoch": 3.15,
"learning_rate": 6.368903867274584e-07,
"loss": 6.2211,
"step": 1952
},
{
"epoch": 3.16,
"learning_rate": 6.34937787584767e-07,
"loss": 6.1324,
"step": 1954
},
{
"epoch": 3.16,
"learning_rate": 6.329867929910347e-07,
"loss": 6.384,
"step": 1956
},
{
"epoch": 3.16,
"learning_rate": 6.310374115214203e-07,
"loss": 5.9948,
"step": 1958
},
{
"epoch": 3.17,
"learning_rate": 6.290896517439924e-07,
"loss": 5.6496,
"step": 1960
},
{
"epoch": 3.17,
"learning_rate": 6.271435222196914e-07,
"loss": 6.2404,
"step": 1962
},
{
"epoch": 3.17,
"learning_rate": 6.251990315022927e-07,
"loss": 5.3699,
"step": 1964
},
{
"epoch": 3.18,
"learning_rate": 6.232561881383686e-07,
"loss": 5.9554,
"step": 1966
},
{
"epoch": 3.18,
"learning_rate": 6.213150006672499e-07,
"loss": 6.4513,
"step": 1968
},
{
"epoch": 3.18,
"learning_rate": 6.193754776209911e-07,
"loss": 6.2884,
"step": 1970
},
{
"epoch": 3.19,
"learning_rate": 6.174376275243298e-07,
"loss": 6.1467,
"step": 1972
},
{
"epoch": 3.19,
"learning_rate": 6.15501458894651e-07,
"loss": 5.8498,
"step": 1974
},
{
"epoch": 3.19,
"learning_rate": 6.135669802419487e-07,
"loss": 5.7347,
"step": 1976
},
{
"epoch": 3.2,
"learning_rate": 6.116342000687896e-07,
"loss": 5.9732,
"step": 1978
},
{
"epoch": 3.2,
"learning_rate": 6.097031268702745e-07,
"loss": 6.2086,
"step": 1980
},
{
"epoch": 3.2,
"learning_rate": 6.077737691340023e-07,
"loss": 5.7324,
"step": 1982
},
{
"epoch": 3.21,
"learning_rate": 6.058461353400314e-07,
"loss": 5.7416,
"step": 1984
},
{
"epoch": 3.21,
"learning_rate": 6.039202339608431e-07,
"loss": 5.3508,
"step": 1986
},
{
"epoch": 3.21,
"learning_rate": 6.019960734613047e-07,
"loss": 6.0552,
"step": 1988
},
{
"epoch": 3.22,
"learning_rate": 6.000736622986311e-07,
"loss": 6.3086,
"step": 1990
},
{
"epoch": 3.22,
"learning_rate": 5.981530089223488e-07,
"loss": 6.1699,
"step": 1992
},
{
"epoch": 3.22,
"learning_rate": 5.962341217742588e-07,
"loss": 6.2947,
"step": 1994
},
{
"epoch": 3.23,
"learning_rate": 5.94317009288398e-07,
"loss": 6.3022,
"step": 1996
},
{
"epoch": 3.23,
"learning_rate": 5.924016798910037e-07,
"loss": 6.2368,
"step": 1998
},
{
"epoch": 3.23,
"learning_rate": 5.904881420004767e-07,
"loss": 5.5054,
"step": 2000
},
{
"epoch": 3.24,
"learning_rate": 5.885764040273426e-07,
"loss": 6.0251,
"step": 2002
},
{
"epoch": 3.24,
"learning_rate": 5.866664743742162e-07,
"loss": 5.7491,
"step": 2004
},
{
"epoch": 3.24,
"learning_rate": 5.847583614357643e-07,
"loss": 6.2488,
"step": 2006
},
{
"epoch": 3.25,
"learning_rate": 5.828520735986693e-07,
"loss": 6.3708,
"step": 2008
},
{
"epoch": 3.25,
"learning_rate": 5.809476192415904e-07,
"loss": 5.9273,
"step": 2010
},
{
"epoch": 3.25,
"learning_rate": 5.79045006735129e-07,
"loss": 6.2291,
"step": 2012
},
{
"epoch": 3.25,
"learning_rate": 5.771442444417918e-07,
"loss": 5.2932,
"step": 2014
},
{
"epoch": 3.26,
"learning_rate": 5.752453407159521e-07,
"loss": 5.9299,
"step": 2016
},
{
"epoch": 3.26,
"learning_rate": 5.733483039038148e-07,
"loss": 6.2873,
"step": 2018
},
{
"epoch": 3.26,
"learning_rate": 5.71453142343379e-07,
"loss": 5.9898,
"step": 2020
},
{
"epoch": 3.27,
"learning_rate": 5.69559864364402e-07,
"loss": 6.3883,
"step": 2022
},
{
"epoch": 3.27,
"learning_rate": 5.676684782883614e-07,
"loss": 5.7538,
"step": 2024
},
{
"epoch": 3.27,
"learning_rate": 5.657789924284202e-07,
"loss": 6.2761,
"step": 2026
},
{
"epoch": 3.28,
"learning_rate": 5.63891415089389e-07,
"loss": 6.3131,
"step": 2028
},
{
"epoch": 3.28,
"learning_rate": 5.6200575456769e-07,
"loss": 5.9519,
"step": 2030
},
{
"epoch": 3.28,
"learning_rate": 5.601220191513207e-07,
"loss": 5.4945,
"step": 2032
},
{
"epoch": 3.29,
"learning_rate": 5.582402171198168e-07,
"loss": 5.6255,
"step": 2034
},
{
"epoch": 3.29,
"learning_rate": 5.563603567442168e-07,
"loss": 6.5243,
"step": 2036
},
{
"epoch": 3.29,
"learning_rate": 5.544824462870243e-07,
"loss": 6.1559,
"step": 2038
},
{
"epoch": 3.3,
"learning_rate": 5.526064940021732e-07,
"loss": 5.7072,
"step": 2040
},
{
"epoch": 3.3,
"learning_rate": 5.507325081349903e-07,
"loss": 5.9227,
"step": 2042
},
{
"epoch": 3.3,
"learning_rate": 5.488604969221596e-07,
"loss": 6.2318,
"step": 2044
},
{
"epoch": 3.31,
"learning_rate": 5.46990468591686e-07,
"loss": 6.0336,
"step": 2046
},
{
"epoch": 3.31,
"learning_rate": 5.451224313628591e-07,
"loss": 6.2192,
"step": 2048
},
{
"epoch": 3.31,
"learning_rate": 5.432563934462166e-07,
"loss": 6.1455,
"step": 2050
},
{
"epoch": 3.32,
"learning_rate": 5.413923630435093e-07,
"loss": 5.8435,
"step": 2052
},
{
"epoch": 3.32,
"learning_rate": 5.395303483476641e-07,
"loss": 5.8657,
"step": 2054
},
{
"epoch": 3.32,
"learning_rate": 5.37670357542748e-07,
"loss": 5.9615,
"step": 2056
},
{
"epoch": 3.33,
"learning_rate": 5.358123988039337e-07,
"loss": 6.0928,
"step": 2058
},
{
"epoch": 3.33,
"learning_rate": 5.339564802974614e-07,
"loss": 5.6679,
"step": 2060
},
{
"epoch": 3.33,
"learning_rate": 5.321026101806032e-07,
"loss": 5.9366,
"step": 2062
},
{
"epoch": 3.34,
"learning_rate": 5.302507966016295e-07,
"loss": 5.9127,
"step": 2064
},
{
"epoch": 3.34,
"learning_rate": 5.284010476997704e-07,
"loss": 6.1515,
"step": 2066
},
{
"epoch": 3.34,
"learning_rate": 5.265533716051824e-07,
"loss": 6.135,
"step": 2068
},
{
"epoch": 3.35,
"learning_rate": 5.247077764389099e-07,
"loss": 6.117,
"step": 2070
},
{
"epoch": 3.35,
"learning_rate": 5.22864270312853e-07,
"loss": 5.5347,
"step": 2072
},
{
"epoch": 3.35,
"learning_rate": 5.210228613297281e-07,
"loss": 5.9705,
"step": 2074
},
{
"epoch": 3.36,
"learning_rate": 5.191835575830351e-07,
"loss": 6.0285,
"step": 2076
},
{
"epoch": 3.36,
"learning_rate": 5.173463671570204e-07,
"loss": 5.7762,
"step": 2078
},
{
"epoch": 3.36,
"learning_rate": 5.155112981266422e-07,
"loss": 6.1704,
"step": 2080
},
{
"epoch": 3.36,
"learning_rate": 5.136783585575335e-07,
"loss": 6.1445,
"step": 2082
},
{
"epoch": 3.37,
"learning_rate": 5.11847556505969e-07,
"loss": 5.7711,
"step": 2084
},
{
"epoch": 3.37,
"learning_rate": 5.100189000188273e-07,
"loss": 6.0446,
"step": 2086
},
{
"epoch": 3.37,
"learning_rate": 5.081923971335583e-07,
"loss": 5.7676,
"step": 2088
},
{
"epoch": 3.38,
"learning_rate": 5.063680558781445e-07,
"loss": 6.422,
"step": 2090
},
{
"epoch": 3.38,
"learning_rate": 5.045458842710683e-07,
"loss": 6.1722,
"step": 2092
},
{
"epoch": 3.38,
"learning_rate": 5.027258903212759e-07,
"loss": 5.9113,
"step": 2094
},
{
"epoch": 3.39,
"learning_rate": 5.009080820281415e-07,
"loss": 5.7517,
"step": 2096
},
{
"epoch": 3.39,
"learning_rate": 4.990924673814336e-07,
"loss": 5.7869,
"step": 2098
},
{
"epoch": 3.39,
"learning_rate": 4.972790543612782e-07,
"loss": 6.1813,
"step": 2100
},
{
"epoch": 3.4,
"learning_rate": 4.954678509381253e-07,
"loss": 5.8249,
"step": 2102
},
{
"epoch": 3.4,
"learning_rate": 4.936588650727124e-07,
"loss": 5.9902,
"step": 2104
},
{
"epoch": 3.4,
"learning_rate": 4.918521047160307e-07,
"loss": 6.2357,
"step": 2106
},
{
"epoch": 3.41,
"learning_rate": 4.900475778092897e-07,
"loss": 6.0328,
"step": 2108
},
{
"epoch": 3.41,
"learning_rate": 4.882452922838817e-07,
"loss": 6.0947,
"step": 2110
},
{
"epoch": 3.41,
"learning_rate": 4.864452560613484e-07,
"loss": 5.9981,
"step": 2112
},
{
"epoch": 3.42,
"learning_rate": 4.846474770533445e-07,
"loss": 6.0417,
"step": 2114
},
{
"epoch": 3.42,
"learning_rate": 4.828519631616037e-07,
"loss": 5.6535,
"step": 2116
},
{
"epoch": 3.42,
"learning_rate": 4.819550581009024e-07,
"loss": 6.1429,
"step": 2118
},
{
"epoch": 3.43,
"learning_rate": 4.801629566775196e-07,
"loss": 5.8783,
"step": 2120
},
{
"epoch": 3.43,
"learning_rate": 4.783731400811022e-07,
"loss": 5.8331,
"step": 2122
},
{
"epoch": 3.43,
"learning_rate": 4.7658561617838677e-07,
"loss": 6.1839,
"step": 2124
},
{
"epoch": 3.44,
"learning_rate": 4.7480039282603345e-07,
"loss": 6.1042,
"step": 2126
},
{
"epoch": 3.44,
"learning_rate": 4.730174778705908e-07,
"loss": 5.8208,
"step": 2128
},
{
"epoch": 3.44,
"learning_rate": 4.7123687914845966e-07,
"loss": 6.2347,
"step": 2130
},
{
"epoch": 3.45,
"learning_rate": 4.694586044858633e-07,
"loss": 6.0339,
"step": 2132
},
{
"epoch": 3.45,
"learning_rate": 4.67682661698808e-07,
"loss": 5.8287,
"step": 2134
},
{
"epoch": 3.45,
"learning_rate": 4.659090585930513e-07,
"loss": 5.9094,
"step": 2136
},
{
"epoch": 3.46,
"learning_rate": 4.641378029640676e-07,
"loss": 5.9343,
"step": 2138
},
{
"epoch": 3.46,
"learning_rate": 4.6236890259701277e-07,
"loss": 6.2003,
"step": 2140
},
{
"epoch": 3.46,
"learning_rate": 4.606023652666915e-07,
"loss": 6.2596,
"step": 2142
},
{
"epoch": 3.47,
"learning_rate": 4.588381987375215e-07,
"loss": 6.1376,
"step": 2144
},
{
"epoch": 3.47,
"learning_rate": 4.570764107635007e-07,
"loss": 6.2819,
"step": 2146
},
{
"epoch": 3.47,
"learning_rate": 4.553170090881724e-07,
"loss": 5.7263,
"step": 2148
},
{
"epoch": 3.47,
"learning_rate": 4.535600014445914e-07,
"loss": 5.8772,
"step": 2150
},
{
"epoch": 3.48,
"learning_rate": 4.518053955552903e-07,
"loss": 6.1401,
"step": 2152
},
{
"epoch": 3.48,
"learning_rate": 4.50053199132245e-07,
"loss": 5.937,
"step": 2154
},
{
"epoch": 3.48,
"learning_rate": 4.483034198768416e-07,
"loss": 6.1582,
"step": 2156
},
{
"epoch": 3.49,
"learning_rate": 4.465560654798416e-07,
"loss": 5.2896,
"step": 2158
},
{
"epoch": 3.49,
"learning_rate": 4.448111436213485e-07,
"loss": 5.7746,
"step": 2160
},
{
"epoch": 3.49,
"learning_rate": 4.4306866197077543e-07,
"loss": 5.535,
"step": 2162
},
{
"epoch": 3.5,
"learning_rate": 4.4132862818680803e-07,
"loss": 6.0889,
"step": 2164
},
{
"epoch": 3.5,
"learning_rate": 4.395910499173745e-07,
"loss": 5.7546,
"step": 2166
},
{
"epoch": 3.5,
"learning_rate": 4.378559347996096e-07,
"loss": 6.0215,
"step": 2168
},
{
"epoch": 3.51,
"learning_rate": 4.361232904598223e-07,
"loss": 5.8593,
"step": 2170
},
{
"epoch": 3.51,
"learning_rate": 4.3439312451346154e-07,
"loss": 6.0951,
"step": 2172
},
{
"epoch": 3.51,
"learning_rate": 4.3266544456508327e-07,
"loss": 6.0411,
"step": 2174
},
{
"epoch": 3.52,
"learning_rate": 4.3094025820831606e-07,
"loss": 6.2975,
"step": 2176
},
{
"epoch": 3.52,
"learning_rate": 4.2921757302582996e-07,
"loss": 5.9875,
"step": 2178
},
{
"epoch": 3.52,
"learning_rate": 4.2749739658930026e-07,
"loss": 6.2231,
"step": 2180
},
{
"epoch": 3.53,
"learning_rate": 4.257797364593767e-07,
"loss": 6.0196,
"step": 2182
},
{
"epoch": 3.53,
"learning_rate": 4.240646001856476e-07,
"loss": 5.7214,
"step": 2184
},
{
"epoch": 3.53,
"learning_rate": 4.223519953066098e-07,
"loss": 5.6502,
"step": 2186
},
{
"epoch": 3.54,
"learning_rate": 4.206419293496333e-07,
"loss": 5.6593,
"step": 2188
},
{
"epoch": 3.54,
"learning_rate": 4.1893440983092853e-07,
"loss": 6.1549,
"step": 2190
},
{
"epoch": 3.54,
"learning_rate": 4.172294442555148e-07,
"loss": 6.1882,
"step": 2192
},
{
"epoch": 3.55,
"learning_rate": 4.1552704011718497e-07,
"loss": 6.3202,
"step": 2194
},
{
"epoch": 3.55,
"learning_rate": 4.13827204898474e-07,
"loss": 6.2742,
"step": 2196
},
{
"epoch": 3.55,
"learning_rate": 4.121299460706259e-07,
"loss": 5.9497,
"step": 2198
},
{
"epoch": 3.56,
"learning_rate": 4.104352710935609e-07,
"loss": 6.2062,
"step": 2200
},
{
"epoch": 3.56,
"learning_rate": 4.087431874158416e-07,
"loss": 6.0651,
"step": 2202
},
{
"epoch": 3.56,
"learning_rate": 4.0705370247464155e-07,
"loss": 6.12,
"step": 2204
},
{
"epoch": 3.57,
"learning_rate": 4.053668236957134e-07,
"loss": 6.2323,
"step": 2206
},
{
"epoch": 3.57,
"learning_rate": 4.036825584933533e-07,
"loss": 6.018,
"step": 2208
},
{
"epoch": 3.57,
"learning_rate": 4.0200091427037075e-07,
"loss": 6.148,
"step": 2210
},
{
"epoch": 3.57,
"learning_rate": 4.003218984180552e-07,
"loss": 5.937,
"step": 2212
},
{
"epoch": 3.58,
"learning_rate": 3.986455183161437e-07,
"loss": 6.1027,
"step": 2214
},
{
"epoch": 3.58,
"learning_rate": 3.9697178133278854e-07,
"loss": 5.4278,
"step": 2216
},
{
"epoch": 3.58,
"learning_rate": 3.9530069482452466e-07,
"loss": 5.784,
"step": 2218
},
{
"epoch": 3.59,
"learning_rate": 3.9363226613623733e-07,
"loss": 6.0116,
"step": 2220
},
{
"epoch": 3.59,
"learning_rate": 3.919665026011304e-07,
"loss": 5.9013,
"step": 2222
},
{
"epoch": 3.59,
"learning_rate": 3.9030341154069314e-07,
"loss": 5.626,
"step": 2224
},
{
"epoch": 3.6,
"learning_rate": 3.886430002646688e-07,
"loss": 5.9606,
"step": 2226
},
{
"epoch": 3.6,
"learning_rate": 3.8698527607102214e-07,
"loss": 5.9847,
"step": 2228
},
{
"epoch": 3.6,
"learning_rate": 3.853302462459077e-07,
"loss": 6.2028,
"step": 2230
},
{
"epoch": 3.61,
"learning_rate": 3.8367791806363724e-07,
"loss": 6.0791,
"step": 2232
},
{
"epoch": 3.61,
"learning_rate": 3.820282987866481e-07,
"loss": 6.0794,
"step": 2234
},
{
"epoch": 3.61,
"learning_rate": 3.8038139566547144e-07,
"loss": 5.8957,
"step": 2236
},
{
"epoch": 3.62,
"learning_rate": 3.787372159386999e-07,
"loss": 5.7328,
"step": 2238
},
{
"epoch": 3.62,
"learning_rate": 3.7709576683295616e-07,
"loss": 6.1582,
"step": 2240
},
{
"epoch": 3.62,
"learning_rate": 3.7545705556286124e-07,
"loss": 5.8639,
"step": 2242
},
{
"epoch": 3.63,
"learning_rate": 3.738210893310023e-07,
"loss": 5.8205,
"step": 2244
},
{
"epoch": 3.63,
"learning_rate": 3.721878753279016e-07,
"loss": 6.0627,
"step": 2246
},
{
"epoch": 3.63,
"learning_rate": 3.705574207319844e-07,
"loss": 5.7041,
"step": 2248
},
{
"epoch": 3.64,
"learning_rate": 3.689297327095472e-07,
"loss": 6.1213,
"step": 2250
},
{
"epoch": 3.64,
"learning_rate": 3.6730481841472805e-07,
"loss": 6.2363,
"step": 2252
},
{
"epoch": 3.64,
"learning_rate": 3.656826849894725e-07,
"loss": 5.768,
"step": 2254
},
{
"epoch": 3.65,
"learning_rate": 3.640633395635032e-07,
"loss": 5.9267,
"step": 2256
},
{
"epoch": 3.65,
"learning_rate": 3.624467892542895e-07,
"loss": 6.0327,
"step": 2258
},
{
"epoch": 3.65,
"learning_rate": 3.608330411670153e-07,
"loss": 6.1895,
"step": 2260
},
{
"epoch": 3.66,
"learning_rate": 3.5922210239454764e-07,
"loss": 6.1527,
"step": 2262
},
{
"epoch": 3.66,
"learning_rate": 3.5761398001740597e-07,
"loss": 6.0396,
"step": 2264
},
{
"epoch": 3.66,
"learning_rate": 3.5600868110373163e-07,
"loss": 5.818,
"step": 2266
},
{
"epoch": 3.67,
"learning_rate": 3.5440621270925497e-07,
"loss": 6.3069,
"step": 2268
},
{
"epoch": 3.67,
"learning_rate": 3.5280658187726597e-07,
"loss": 6.5033,
"step": 2270
},
{
"epoch": 3.67,
"learning_rate": 3.5120979563858267e-07,
"loss": 5.7382,
"step": 2272
},
{
"epoch": 3.68,
"learning_rate": 3.4961586101152065e-07,
"loss": 5.9984,
"step": 2274
},
{
"epoch": 3.68,
"learning_rate": 3.4802478500186094e-07,
"loss": 5.9253,
"step": 2276
},
{
"epoch": 3.68,
"learning_rate": 3.4643657460282073e-07,
"loss": 6.0152,
"step": 2278
},
{
"epoch": 3.68,
"learning_rate": 3.448512367950227e-07,
"loss": 6.2795,
"step": 2280
},
{
"epoch": 3.69,
"learning_rate": 3.43268778546463e-07,
"loss": 5.8249,
"step": 2282
},
{
"epoch": 3.69,
"learning_rate": 3.4168920681248117e-07,
"loss": 6.0889,
"step": 2284
},
{
"epoch": 3.69,
"learning_rate": 3.4011252853573013e-07,
"loss": 5.3738,
"step": 2286
},
{
"epoch": 3.7,
"learning_rate": 3.3853875064614514e-07,
"loss": 5.7754,
"step": 2288
},
{
"epoch": 3.7,
"learning_rate": 3.369678800609134e-07,
"loss": 6.0388,
"step": 2290
},
{
"epoch": 3.7,
"learning_rate": 3.353999236844436e-07,
"loss": 6.1277,
"step": 2292
},
{
"epoch": 3.71,
"learning_rate": 3.3383488840833596e-07,
"loss": 6.1172,
"step": 2294
},
{
"epoch": 3.71,
"learning_rate": 3.322727811113516e-07,
"loss": 5.9489,
"step": 2296
},
{
"epoch": 3.71,
"learning_rate": 3.3071360865938205e-07,
"loss": 6.1349,
"step": 2298
},
{
"epoch": 3.72,
"learning_rate": 3.2915737790541986e-07,
"loss": 5.6581,
"step": 2300
},
{
"epoch": 3.72,
"learning_rate": 3.276040956895276e-07,
"loss": 5.9057,
"step": 2302
},
{
"epoch": 3.72,
"learning_rate": 3.260537688388085e-07,
"loss": 6.2865,
"step": 2304
},
{
"epoch": 3.73,
"learning_rate": 3.2450640416737595e-07,
"loss": 5.987,
"step": 2306
},
{
"epoch": 3.73,
"learning_rate": 3.229620084763237e-07,
"loss": 6.0143,
"step": 2308
},
{
"epoch": 3.73,
"learning_rate": 3.214205885536965e-07,
"loss": 5.7721,
"step": 2310
},
{
"epoch": 3.74,
"learning_rate": 3.198821511744589e-07,
"loss": 5.3875,
"step": 2312
},
{
"epoch": 3.74,
"learning_rate": 3.183467031004673e-07,
"loss": 6.1836,
"step": 2314
},
{
"epoch": 3.74,
"learning_rate": 3.168142510804386e-07,
"loss": 5.9135,
"step": 2316
},
{
"epoch": 3.75,
"learning_rate": 3.1528480184992144e-07,
"loss": 5.8897,
"step": 2318
},
{
"epoch": 3.75,
"learning_rate": 3.137583621312665e-07,
"loss": 6.1612,
"step": 2320
},
{
"epoch": 3.75,
"learning_rate": 3.122349386335964e-07,
"loss": 6.0063,
"step": 2322
},
{
"epoch": 3.76,
"learning_rate": 3.1071453805277757e-07,
"loss": 5.8636,
"step": 2324
},
{
"epoch": 3.76,
"learning_rate": 3.0919716707138887e-07,
"loss": 5.9821,
"step": 2326
},
{
"epoch": 3.76,
"learning_rate": 3.0768283235869406e-07,
"loss": 5.9865,
"step": 2328
},
{
"epoch": 3.77,
"learning_rate": 3.0617154057061054e-07,
"loss": 6.0986,
"step": 2330
},
{
"epoch": 3.77,
"learning_rate": 3.046632983496823e-07,
"loss": 5.807,
"step": 2332
},
{
"epoch": 3.77,
"learning_rate": 3.0315811232504916e-07,
"loss": 6.2453,
"step": 2334
},
{
"epoch": 3.78,
"learning_rate": 3.016559891124183e-07,
"loss": 6.0405,
"step": 2336
},
{
"epoch": 3.78,
"learning_rate": 3.001569353140346e-07,
"loss": 6.1985,
"step": 2338
},
{
"epoch": 3.78,
"learning_rate": 2.9866095751865297e-07,
"loss": 5.9151,
"step": 2340
},
{
"epoch": 3.79,
"learning_rate": 2.971680623015074e-07,
"loss": 6.1751,
"step": 2342
},
{
"epoch": 3.79,
"learning_rate": 2.9567825622428356e-07,
"loss": 6.2544,
"step": 2344
},
{
"epoch": 3.79,
"learning_rate": 2.9419154583508974e-07,
"loss": 5.9883,
"step": 2346
},
{
"epoch": 3.79,
"learning_rate": 2.9270793766842696e-07,
"loss": 6.0128,
"step": 2348
},
{
"epoch": 3.8,
"learning_rate": 2.9122743824516195e-07,
"loss": 6.0721,
"step": 2350
},
{
"epoch": 3.8,
"learning_rate": 2.897500540724972e-07,
"loss": 5.7388,
"step": 2352
},
{
"epoch": 3.8,
"learning_rate": 2.882757916439434e-07,
"loss": 6.0928,
"step": 2354
},
{
"epoch": 3.81,
"learning_rate": 2.868046574392898e-07,
"loss": 6.2217,
"step": 2356
},
{
"epoch": 3.81,
"learning_rate": 2.853366579245764e-07,
"loss": 6.2321,
"step": 2358
},
{
"epoch": 3.81,
"learning_rate": 2.838717995520652e-07,
"loss": 6.0278,
"step": 2360
},
{
"epoch": 3.82,
"learning_rate": 2.824100887602121e-07,
"loss": 6.1201,
"step": 2362
},
{
"epoch": 3.82,
"learning_rate": 2.8095153197363884e-07,
"loss": 5.7258,
"step": 2364
},
{
"epoch": 3.82,
"learning_rate": 2.794961356031044e-07,
"loss": 5.6672,
"step": 2366
},
{
"epoch": 3.83,
"learning_rate": 2.7804390604547556e-07,
"loss": 6.2415,
"step": 2368
},
{
"epoch": 3.83,
"learning_rate": 2.7659484968370216e-07,
"loss": 6.2554,
"step": 2370
},
{
"epoch": 3.83,
"learning_rate": 2.7514897288678574e-07,
"loss": 6.202,
"step": 2372
},
{
"epoch": 3.84,
"learning_rate": 2.73706282009753e-07,
"loss": 6.0814,
"step": 2374
},
{
"epoch": 3.84,
"learning_rate": 2.722667833936275e-07,
"loss": 6.1788,
"step": 2376
},
{
"epoch": 3.84,
"learning_rate": 2.708304833654023e-07,
"loss": 6.1503,
"step": 2378
},
{
"epoch": 3.85,
"learning_rate": 2.693973882380114e-07,
"loss": 5.7419,
"step": 2380
},
{
"epoch": 3.85,
"learning_rate": 2.6796750431030256e-07,
"loss": 6.1539,
"step": 2382
},
{
"epoch": 3.85,
"learning_rate": 2.6654083786700955e-07,
"loss": 5.6726,
"step": 2384
},
{
"epoch": 3.86,
"learning_rate": 2.651173951787242e-07,
"loss": 6.1883,
"step": 2386
},
{
"epoch": 3.86,
"learning_rate": 2.6369718250186915e-07,
"loss": 6.0832,
"step": 2388
},
{
"epoch": 3.86,
"learning_rate": 2.622802060786702e-07,
"loss": 6.3862,
"step": 2390
},
{
"epoch": 3.87,
"learning_rate": 2.6157293340899857e-07,
"loss": 5.9917,
"step": 2392
},
{
"epoch": 3.87,
"learning_rate": 2.601608230393345e-07,
"loss": 6.4018,
"step": 2394
},
{
"epoch": 3.87,
"learning_rate": 2.587519644666001e-07,
"loss": 5.7595,
"step": 2396
},
{
"epoch": 3.88,
"learning_rate": 2.573463638831166e-07,
"loss": 5.9134,
"step": 2398
},
{
"epoch": 3.88,
"learning_rate": 2.5594402746688636e-07,
"loss": 6.1006,
"step": 2400
},
{
"epoch": 3.88,
"learning_rate": 2.545449613815639e-07,
"loss": 6.3605,
"step": 2402
},
{
"epoch": 3.89,
"learning_rate": 2.531491717764297e-07,
"loss": 6.207,
"step": 2404
},
{
"epoch": 3.89,
"learning_rate": 2.517566647863637e-07,
"loss": 6.2167,
"step": 2406
},
{
"epoch": 3.89,
"learning_rate": 2.503674465318175e-07,
"loss": 5.9579,
"step": 2408
},
{
"epoch": 3.89,
"learning_rate": 2.4898152311878797e-07,
"loss": 5.4079,
"step": 2410
},
{
"epoch": 3.9,
"learning_rate": 2.4759890063879006e-07,
"loss": 5.4221,
"step": 2412
},
{
"epoch": 3.9,
"learning_rate": 2.462195851688306e-07,
"loss": 6.1915,
"step": 2414
},
{
"epoch": 3.9,
"learning_rate": 2.448435827713806e-07,
"loss": 6.202,
"step": 2416
},
{
"epoch": 3.91,
"learning_rate": 2.4347089949434984e-07,
"loss": 5.7824,
"step": 2418
},
{
"epoch": 3.91,
"learning_rate": 2.421015413710591e-07,
"loss": 6.23,
"step": 2420
},
{
"epoch": 3.91,
"learning_rate": 2.407355144202147e-07,
"loss": 6.0515,
"step": 2422
},
{
"epoch": 3.92,
"learning_rate": 2.39372824645881e-07,
"loss": 6.0654,
"step": 2424
},
{
"epoch": 3.92,
"learning_rate": 2.380134780374551e-07,
"loss": 5.7883,
"step": 2426
},
{
"epoch": 3.92,
"learning_rate": 2.3665748056963952e-07,
"loss": 5.7154,
"step": 2428
},
{
"epoch": 3.93,
"learning_rate": 2.3530483820241655e-07,
"loss": 5.7811,
"step": 2430
},
{
"epoch": 3.93,
"learning_rate": 2.339555568810221e-07,
"loss": 6.1055,
"step": 2432
},
{
"epoch": 3.93,
"learning_rate": 2.3260964253591898e-07,
"loss": 6.0643,
"step": 2434
},
{
"epoch": 3.94,
"learning_rate": 2.3126710108277148e-07,
"loss": 5.8521,
"step": 2436
},
{
"epoch": 3.94,
"learning_rate": 2.29927938422419e-07,
"loss": 6.3257,
"step": 2438
},
{
"epoch": 3.94,
"learning_rate": 2.2859216044085017e-07,
"loss": 6.2957,
"step": 2440
},
{
"epoch": 3.95,
"learning_rate": 2.2725977300917687e-07,
"loss": 6.2473,
"step": 2442
},
{
"epoch": 3.95,
"learning_rate": 2.2593078198360927e-07,
"loss": 5.7792,
"step": 2444
},
{
"epoch": 3.95,
"learning_rate": 2.2460519320542881e-07,
"loss": 5.0003,
"step": 2446
},
{
"epoch": 3.96,
"learning_rate": 2.2328301250096326e-07,
"loss": 5.6349,
"step": 2448
},
{
"epoch": 3.96,
"learning_rate": 2.219642456815607e-07,
"loss": 5.7349,
"step": 2450
},
{
"epoch": 3.96,
"learning_rate": 2.206488985435645e-07,
"loss": 5.8819,
"step": 2452
},
{
"epoch": 3.97,
"learning_rate": 2.1933697686828767e-07,
"loss": 5.5308,
"step": 2454
},
{
"epoch": 3.97,
"learning_rate": 2.180284864219869e-07,
"loss": 5.892,
"step": 2456
},
{
"epoch": 3.97,
"learning_rate": 2.1672343295583873e-07,
"loss": 5.7391,
"step": 2458
},
{
"epoch": 3.98,
"learning_rate": 2.154218222059122e-07,
"loss": 5.812,
"step": 2460
},
{
"epoch": 3.98,
"learning_rate": 2.1412365989314508e-07,
"loss": 5.4554,
"step": 2462
},
{
"epoch": 3.98,
"learning_rate": 2.1282895172331816e-07,
"loss": 5.7584,
"step": 2464
},
{
"epoch": 3.99,
"learning_rate": 2.1153770338703048e-07,
"loss": 5.6391,
"step": 2466
},
{
"epoch": 3.99,
"learning_rate": 2.102499205596743e-07,
"loss": 6.1335,
"step": 2468
},
{
"epoch": 3.99,
"learning_rate": 2.0896560890140913e-07,
"loss": 6.1353,
"step": 2470
},
{
"epoch": 4.0,
"learning_rate": 2.076847740571387e-07,
"loss": 5.8624,
"step": 2472
},
{
"epoch": 4.0,
"learning_rate": 2.0640742165648518e-07,
"loss": 5.3207,
"step": 2474
},
{
"epoch": 4.0,
"learning_rate": 2.0513355731376392e-07,
"loss": 5.5816,
"step": 2476
},
{
"epoch": 4.0,
"learning_rate": 2.0386318662795954e-07,
"loss": 6.1561,
"step": 2478
},
{
"epoch": 4.01,
"learning_rate": 2.0259631518270104e-07,
"loss": 6.2594,
"step": 2480
},
{
"epoch": 4.01,
"learning_rate": 2.013329485462374e-07,
"loss": 5.812,
"step": 2482
},
{
"epoch": 4.01,
"learning_rate": 2.000730922714128e-07,
"loss": 6.2848,
"step": 2484
},
{
"epoch": 4.02,
"learning_rate": 1.988167518956425e-07,
"loss": 6.1702,
"step": 2486
},
{
"epoch": 4.02,
"learning_rate": 1.975639329408887e-07,
"loss": 5.9088,
"step": 2488
},
{
"epoch": 4.02,
"learning_rate": 1.9631464091363537e-07,
"loss": 5.8951,
"step": 2490
},
{
"epoch": 4.03,
"learning_rate": 1.950688813048652e-07,
"loss": 5.9474,
"step": 2492
},
{
"epoch": 4.03,
"learning_rate": 1.9382665959003475e-07,
"loss": 6.1231,
"step": 2494
},
{
"epoch": 4.03,
"learning_rate": 1.9258798122905061e-07,
"loss": 5.8997,
"step": 2496
},
{
"epoch": 4.04,
"learning_rate": 1.9135285166624514e-07,
"loss": 5.191,
"step": 2498
},
{
"epoch": 4.04,
"learning_rate": 1.9012127633035302e-07,
"loss": 6.1011,
"step": 2500
},
{
"epoch": 4.04,
"learning_rate": 1.8889326063448696e-07,
"loss": 6.0884,
"step": 2502
},
{
"epoch": 4.05,
"learning_rate": 1.8766880997611424e-07,
"loss": 5.8573,
"step": 2504
},
{
"epoch": 4.05,
"learning_rate": 1.864479297370325e-07,
"loss": 5.6426,
"step": 2506
},
{
"epoch": 4.05,
"learning_rate": 1.8523062528334688e-07,
"loss": 5.9523,
"step": 2508
},
{
"epoch": 4.06,
"learning_rate": 1.840169019654455e-07,
"loss": 6.0296,
"step": 2510
},
{
"epoch": 4.06,
"learning_rate": 1.8280676511797665e-07,
"loss": 5.7186,
"step": 2512
},
{
"epoch": 4.06,
"learning_rate": 1.816002200598251e-07,
"loss": 5.8323,
"step": 2514
},
{
"epoch": 4.07,
"learning_rate": 1.803972720940884e-07,
"loss": 5.6306,
"step": 2516
},
{
"epoch": 4.07,
"learning_rate": 1.7919792650805455e-07,
"loss": 5.6633,
"step": 2518
},
{
"epoch": 4.07,
"learning_rate": 1.780021885731774e-07,
"loss": 6.2717,
"step": 2520
},
{
"epoch": 4.08,
"learning_rate": 1.768100635450549e-07,
"loss": 6.0222,
"step": 2522
},
{
"epoch": 4.08,
"learning_rate": 1.756215566634043e-07,
"loss": 6.0548,
"step": 2524
},
{
"epoch": 4.08,
"learning_rate": 1.744366731520408e-07,
"loss": 5.9844,
"step": 2526
},
{
"epoch": 4.09,
"learning_rate": 1.732554182188538e-07,
"loss": 6.0736,
"step": 2528
},
{
"epoch": 4.09,
"learning_rate": 1.7207779705578373e-07,
"loss": 6.1283,
"step": 2530
},
{
"epoch": 4.09,
"learning_rate": 1.7090381483880068e-07,
"loss": 6.0131,
"step": 2532
},
{
"epoch": 4.1,
"learning_rate": 1.697334767278792e-07,
"loss": 5.7743,
"step": 2534
},
{
"epoch": 4.1,
"learning_rate": 1.6856678786697777e-07,
"loss": 5.8014,
"step": 2536
},
{
"epoch": 4.1,
"learning_rate": 1.6740375338401524e-07,
"loss": 5.9811,
"step": 2538
},
{
"epoch": 4.11,
"learning_rate": 1.662443783908486e-07,
"loss": 6.236,
"step": 2540
},
{
"epoch": 4.11,
"learning_rate": 1.6508866798324983e-07,
"loss": 5.6924,
"step": 2542
},
{
"epoch": 4.11,
"learning_rate": 1.6393662724088475e-07,
"loss": 5.6433,
"step": 2544
},
{
"epoch": 4.11,
"learning_rate": 1.6278826122728928e-07,
"loss": 6.1821,
"step": 2546
},
{
"epoch": 4.12,
"learning_rate": 1.6164357498984893e-07,
"loss": 6.1518,
"step": 2548
},
{
"epoch": 4.12,
"learning_rate": 1.605025735597746e-07,
"loss": 6.4069,
"step": 2550
},
{
"epoch": 4.12,
"learning_rate": 1.5936526195208189e-07,
"loss": 6.1081,
"step": 2552
},
{
"epoch": 4.13,
"learning_rate": 1.582316451655684e-07,
"loss": 6.1456,
"step": 2554
},
{
"epoch": 4.13,
"learning_rate": 1.5710172818279222e-07,
"loss": 5.9343,
"step": 2556
},
{
"epoch": 4.13,
"learning_rate": 1.5597551597004964e-07,
"loss": 6.0851,
"step": 2558
},
{
"epoch": 4.14,
"learning_rate": 1.5485301347735348e-07,
"loss": 5.8427,
"step": 2560
},
{
"epoch": 4.14,
"learning_rate": 1.5373422563841131e-07,
"loss": 6.1268,
"step": 2562
},
{
"epoch": 4.14,
"learning_rate": 1.5261915737060382e-07,
"loss": 6.5334,
"step": 2564
},
{
"epoch": 4.15,
"learning_rate": 1.5150781357496312e-07,
"loss": 5.6949,
"step": 2566
},
{
"epoch": 4.15,
"learning_rate": 1.504001991361512e-07,
"loss": 6.3782,
"step": 2568
},
{
"epoch": 4.15,
"learning_rate": 1.4929631892243855e-07,
"loss": 6.0292,
"step": 2570
},
{
"epoch": 4.16,
"learning_rate": 1.4819617778568282e-07,
"loss": 6.235,
"step": 2572
},
{
"epoch": 4.16,
"learning_rate": 1.4709978056130712e-07,
"loss": 6.035,
"step": 2574
},
{
"epoch": 4.16,
"learning_rate": 1.460071320682793e-07,
"loss": 5.9589,
"step": 2576
},
{
"epoch": 4.17,
"learning_rate": 1.4491823710909045e-07,
"loss": 6.0054,
"step": 2578
},
{
"epoch": 4.17,
"learning_rate": 1.4383310046973362e-07,
"loss": 6.1784,
"step": 2580
},
{
"epoch": 4.17,
"learning_rate": 1.427517269196833e-07,
"loss": 5.0114,
"step": 2582
},
{
"epoch": 4.18,
"learning_rate": 1.4167412121187406e-07,
"loss": 5.903,
"step": 2584
},
{
"epoch": 4.18,
"learning_rate": 1.4060028808267964e-07,
"loss": 5.6401,
"step": 2586
},
{
"epoch": 4.18,
"learning_rate": 1.3953023225189243e-07,
"loss": 5.4205,
"step": 2588
},
{
"epoch": 4.19,
"learning_rate": 1.384639584227023e-07,
"loss": 6.2168,
"step": 2590
},
{
"epoch": 4.19,
"learning_rate": 1.3740147128167677e-07,
"loss": 6.3063,
"step": 2592
},
{
"epoch": 4.19,
"learning_rate": 1.363427754987395e-07,
"loss": 6.0987,
"step": 2594
},
{
"epoch": 4.2,
"learning_rate": 1.352878757271495e-07,
"loss": 5.9746,
"step": 2596
},
{
"epoch": 4.2,
"learning_rate": 1.342367766034821e-07,
"loss": 5.6164,
"step": 2598
},
{
"epoch": 4.2,
"learning_rate": 1.3318948274760734e-07,
"loss": 5.333,
"step": 2600
},
{
"epoch": 4.21,
"learning_rate": 1.3214599876266996e-07,
"loss": 5.9316,
"step": 2602
},
{
"epoch": 4.21,
"learning_rate": 1.311063292350696e-07,
"loss": 5.9,
"step": 2604
},
{
"epoch": 4.21,
"learning_rate": 1.3007047873444034e-07,
"loss": 5.6936,
"step": 2606
},
{
"epoch": 4.21,
"learning_rate": 1.2903845181363017e-07,
"loss": 5.7382,
"step": 2608
},
{
"epoch": 4.22,
"learning_rate": 1.2801025300868162e-07,
"loss": 5.9571,
"step": 2610
},
{
"epoch": 4.22,
"learning_rate": 1.2698588683881184e-07,
"loss": 5.3818,
"step": 2612
},
{
"epoch": 4.22,
"learning_rate": 1.2596535780639218e-07,
"loss": 6.2568,
"step": 2614
},
{
"epoch": 4.23,
"learning_rate": 1.2494867039692846e-07,
"loss": 5.8648,
"step": 2616
},
{
"epoch": 4.23,
"learning_rate": 1.2393582907904199e-07,
"loss": 6.2207,
"step": 2618
},
{
"epoch": 4.23,
"learning_rate": 1.2292683830444915e-07,
"loss": 5.7402,
"step": 2620
},
{
"epoch": 4.24,
"learning_rate": 1.2192170250794276e-07,
"loss": 5.8071,
"step": 2622
},
{
"epoch": 4.24,
"learning_rate": 1.2092042610737107e-07,
"loss": 6.3271,
"step": 2624
},
{
"epoch": 4.24,
"learning_rate": 1.1992301350361977e-07,
"loss": 6.1244,
"step": 2626
},
{
"epoch": 4.25,
"learning_rate": 1.1892946908059188e-07,
"loss": 5.9335,
"step": 2628
},
{
"epoch": 4.25,
"learning_rate": 1.1793979720518865e-07,
"loss": 6.1779,
"step": 2630
},
{
"epoch": 4.25,
"learning_rate": 1.1695400222729057e-07,
"loss": 5.7196,
"step": 2632
},
{
"epoch": 4.26,
"learning_rate": 1.1597208847973816e-07,
"loss": 5.7874,
"step": 2634
},
{
"epoch": 4.26,
"learning_rate": 1.149940602783126e-07,
"loss": 6.3751,
"step": 2636
},
{
"epoch": 4.26,
"learning_rate": 1.1401992192171739e-07,
"loss": 5.3379,
"step": 2638
},
{
"epoch": 4.27,
"learning_rate": 1.130496776915586e-07,
"loss": 5.9969,
"step": 2640
},
{
"epoch": 4.27,
"learning_rate": 1.120833318523271e-07,
"loss": 5.9765,
"step": 2642
},
{
"epoch": 4.27,
"learning_rate": 1.111208886513787e-07,
"loss": 5.4064,
"step": 2644
},
{
"epoch": 4.28,
"learning_rate": 1.1016235231891657e-07,
"loss": 6.1345,
"step": 2646
},
{
"epoch": 4.28,
"learning_rate": 1.0920772706797165e-07,
"loss": 6.2301,
"step": 2648
},
{
"epoch": 4.28,
"learning_rate": 1.0825701709438506e-07,
"loss": 6.2183,
"step": 2650
},
{
"epoch": 4.29,
"learning_rate": 1.0731022657678867e-07,
"loss": 6.2923,
"step": 2652
},
{
"epoch": 4.29,
"learning_rate": 1.0636735967658784e-07,
"loss": 5.9882,
"step": 2654
},
{
"epoch": 4.29,
"learning_rate": 1.0542842053794198e-07,
"loss": 6.1671,
"step": 2656
},
{
"epoch": 4.3,
"learning_rate": 1.0449341328774741e-07,
"loss": 6.11,
"step": 2658
},
{
"epoch": 4.3,
"learning_rate": 1.0356234203561831e-07,
"loss": 5.9462,
"step": 2660
},
{
"epoch": 4.3,
"learning_rate": 1.026352108738694e-07,
"loss": 6.0572,
"step": 2662
},
{
"epoch": 4.31,
"learning_rate": 1.0171202387749722e-07,
"loss": 6.0026,
"step": 2664
},
{
"epoch": 4.31,
"learning_rate": 1.0079278510416312e-07,
"loss": 5.2601,
"step": 2666
},
{
"epoch": 4.31,
"learning_rate": 9.987749859417483e-08,
"loss": 6.1281,
"step": 2668
},
{
"epoch": 4.32,
"learning_rate": 9.896616837046811e-08,
"loss": 5.5855,
"step": 2670
},
{
"epoch": 4.32,
"learning_rate": 9.805879843859055e-08,
"loss": 6.2328,
"step": 2672
},
{
"epoch": 4.32,
"learning_rate": 9.715539278668283e-08,
"loss": 6.3162,
"step": 2674
},
{
"epoch": 4.32,
"learning_rate": 9.625595538546171e-08,
"loss": 5.8745,
"step": 2676
},
{
"epoch": 4.33,
"learning_rate": 9.536049018820191e-08,
"loss": 6.216,
"step": 2678
},
{
"epoch": 4.33,
"learning_rate": 9.446900113071999e-08,
"loss": 6.1148,
"step": 2680
},
{
"epoch": 4.33,
"learning_rate": 9.358149213135569e-08,
"loss": 6.1663,
"step": 2682
},
{
"epoch": 4.34,
"learning_rate": 9.269796709095556e-08,
"loss": 6.1012,
"step": 2684
},
{
"epoch": 4.34,
"learning_rate": 9.181842989285559e-08,
"loss": 5.7841,
"step": 2686
},
{
"epoch": 4.34,
"learning_rate": 9.094288440286368e-08,
"loss": 6.1725,
"step": 2688
},
{
"epoch": 4.35,
"learning_rate": 9.007133446924342e-08,
"loss": 6.0184,
"step": 2690
},
{
"epoch": 4.35,
"learning_rate": 8.92037839226969e-08,
"loss": 6.0421,
"step": 2692
},
{
"epoch": 4.35,
"learning_rate": 8.834023657634737e-08,
"loss": 5.9721,
"step": 2694
},
{
"epoch": 4.36,
"learning_rate": 8.748069622572385e-08,
"loss": 6.104,
"step": 2696
},
{
"epoch": 4.36,
"learning_rate": 8.662516664874254e-08,
"loss": 6.0882,
"step": 2698
},
{
"epoch": 4.36,
"learning_rate": 8.57736516056915e-08,
"loss": 5.938,
"step": 2700
},
{
"epoch": 4.37,
"learning_rate": 8.492615483921395e-08,
"loss": 6.0638,
"step": 2702
},
{
"epoch": 4.37,
"learning_rate": 8.408268007429153e-08,
"loss": 6.2967,
"step": 2704
},
{
"epoch": 4.37,
"learning_rate": 8.324323101822827e-08,
"loss": 5.7991,
"step": 2706
},
{
"epoch": 4.38,
"learning_rate": 8.240781136063346e-08,
"loss": 5.8908,
"step": 2708
},
{
"epoch": 4.38,
"learning_rate": 8.157642477340709e-08,
"loss": 5.9115,
"step": 2710
},
{
"epoch": 4.38,
"learning_rate": 8.074907491072202e-08,
"loss": 6.4553,
"step": 2712
},
{
"epoch": 4.39,
"learning_rate": 7.992576540900875e-08,
"loss": 6.1312,
"step": 2714
},
{
"epoch": 4.39,
"learning_rate": 7.910649988693907e-08,
"loss": 5.9191,
"step": 2716
},
{
"epoch": 4.39,
"learning_rate": 7.82912819454109e-08,
"loss": 6.352,
"step": 2718
},
{
"epoch": 4.4,
"learning_rate": 7.748011516753139e-08,
"loss": 5.7628,
"step": 2720
},
{
"epoch": 4.4,
"learning_rate": 7.667300311860192e-08,
"loss": 5.9204,
"step": 2722
},
{
"epoch": 4.4,
"learning_rate": 7.586994934610225e-08,
"loss": 5.8677,
"step": 2724
},
{
"epoch": 4.41,
"learning_rate": 7.507095737967495e-08,
"loss": 6.1465,
"step": 2726
},
{
"epoch": 4.41,
"learning_rate": 7.427603073110966e-08,
"loss": 5.5993,
"step": 2728
},
{
"epoch": 4.41,
"learning_rate": 7.348517289432799e-08,
"loss": 6.0467,
"step": 2730
},
{
"epoch": 4.42,
"learning_rate": 7.269838734536771e-08,
"loss": 5.6803,
"step": 2732
},
{
"epoch": 4.42,
"learning_rate": 7.191567754236827e-08,
"loss": 5.7175,
"step": 2734
},
{
"epoch": 4.42,
"learning_rate": 7.113704692555467e-08,
"loss": 6.099,
"step": 2736
},
{
"epoch": 4.43,
"learning_rate": 7.03624989172228e-08,
"loss": 5.8287,
"step": 2738
},
{
"epoch": 4.43,
"learning_rate": 6.959203692172489e-08,
"loss": 6.0513,
"step": 2740
},
{
"epoch": 4.43,
"learning_rate": 6.8825664325453e-08,
"loss": 5.8603,
"step": 2742
},
{
"epoch": 4.43,
"learning_rate": 6.806338449682614e-08,
"loss": 5.7971,
"step": 2744
},
{
"epoch": 4.44,
"learning_rate": 6.7305200786274e-08,
"loss": 6.3585,
"step": 2746
},
{
"epoch": 4.44,
"learning_rate": 6.65511165262227e-08,
"loss": 5.9156,
"step": 2748
},
{
"epoch": 4.44,
"learning_rate": 6.580113503108031e-08,
"loss": 6.1018,
"step": 2750
},
{
"epoch": 4.45,
"learning_rate": 6.50552595972218e-08,
"loss": 6.4561,
"step": 2752
},
{
"epoch": 4.45,
"learning_rate": 6.431349350297555e-08,
"loss": 6.1506,
"step": 2754
},
{
"epoch": 4.45,
"learning_rate": 6.35758400086076e-08,
"loss": 6.0168,
"step": 2756
},
{
"epoch": 4.46,
"learning_rate": 6.284230235630827e-08,
"loss": 5.9695,
"step": 2758
},
{
"epoch": 4.46,
"learning_rate": 6.211288377017754e-08,
"loss": 6.1156,
"step": 2760
},
{
"epoch": 4.46,
"learning_rate": 6.138758745621086e-08,
"loss": 6.0857,
"step": 2762
},
{
"epoch": 4.47,
"learning_rate": 6.066641660228522e-08,
"loss": 6.2212,
"step": 2764
},
{
"epoch": 4.47,
"learning_rate": 5.994937437814518e-08,
"loss": 5.5856,
"step": 2766
},
{
"epoch": 4.47,
"learning_rate": 5.923646393538906e-08,
"loss": 5.8842,
"step": 2768
},
{
"epoch": 4.48,
"learning_rate": 5.8527688407454254e-08,
"loss": 6.084,
"step": 2770
},
{
"epoch": 4.48,
"learning_rate": 5.78230509096046e-08,
"loss": 6.1464,
"step": 2772
},
{
"epoch": 4.48,
"learning_rate": 5.712255453891579e-08,
"loss": 5.7051,
"step": 2774
},
{
"epoch": 4.49,
"learning_rate": 5.642620237426243e-08,
"loss": 6.2802,
"step": 2776
},
{
"epoch": 4.49,
"learning_rate": 5.573399747630403e-08,
"loss": 5.8334,
"step": 2778
},
{
"epoch": 4.49,
"learning_rate": 5.5045942887471885e-08,
"loss": 6.1747,
"step": 2780
},
{
"epoch": 4.5,
"learning_rate": 5.436204163195479e-08,
"loss": 6.1003,
"step": 2782
},
{
"epoch": 4.5,
"learning_rate": 5.36822967156878e-08,
"loss": 6.2131,
"step": 2784
},
{
"epoch": 4.5,
"learning_rate": 5.30067111263367e-08,
"loss": 5.2912,
"step": 2786
},
{
"epoch": 4.51,
"learning_rate": 5.233528783328634e-08,
"loss": 5.8937,
"step": 2788
},
{
"epoch": 4.51,
"learning_rate": 5.166802978762696e-08,
"loss": 5.1261,
"step": 2790
},
{
"epoch": 4.51,
"learning_rate": 5.1004939922141274e-08,
"loss": 6.0996,
"step": 2792
},
{
"epoch": 4.52,
"learning_rate": 5.034602115129205e-08,
"loss": 5.7728,
"step": 2794
},
{
"epoch": 4.52,
"learning_rate": 4.969127637120862e-08,
"loss": 6.1683,
"step": 2796
},
{
"epoch": 4.52,
"learning_rate": 4.904070845967467e-08,
"loss": 5.7688,
"step": 2798
},
{
"epoch": 4.53,
"learning_rate": 4.839432027611534e-08,
"loss": 6.2196,
"step": 2800
},
{
"epoch": 4.53,
"learning_rate": 4.7752114661584685e-08,
"loss": 5.9887,
"step": 2802
},
{
"epoch": 4.53,
"learning_rate": 4.711409443875325e-08,
"loss": 6.1383,
"step": 2804
},
{
"epoch": 4.53,
"learning_rate": 4.648026241189562e-08,
"loss": 5.973,
"step": 2806
},
{
"epoch": 4.54,
"learning_rate": 4.585062136687812e-08,
"loss": 5.6179,
"step": 2808
},
{
"epoch": 4.54,
"learning_rate": 4.522517407114645e-08,
"loss": 6.1717,
"step": 2810
},
{
"epoch": 4.54,
"learning_rate": 4.460392327371376e-08,
"loss": 5.5463,
"step": 2812
},
{
"epoch": 4.55,
"learning_rate": 4.3986871705148586e-08,
"loss": 5.1993,
"step": 2814
},
{
"epoch": 4.55,
"learning_rate": 4.337402207756235e-08,
"loss": 6.0538,
"step": 2816
},
{
"epoch": 4.55,
"learning_rate": 4.276537708459782e-08,
"loss": 5.9359,
"step": 2818
},
{
"epoch": 4.56,
"learning_rate": 4.2160939401417516e-08,
"loss": 6.2784,
"step": 2820
},
{
"epoch": 4.56,
"learning_rate": 4.156071168469144e-08,
"loss": 6.0201,
"step": 2822
},
{
"epoch": 4.56,
"learning_rate": 4.096469657258572e-08,
"loss": 6.1371,
"step": 2824
},
{
"epoch": 4.57,
"learning_rate": 4.037289668475086e-08,
"loss": 5.6743,
"step": 2826
},
{
"epoch": 4.57,
"learning_rate": 3.97853146223105e-08,
"loss": 6.4075,
"step": 2828
},
{
"epoch": 4.57,
"learning_rate": 3.920195296784956e-08,
"loss": 5.8316,
"step": 2830
},
{
"epoch": 4.58,
"learning_rate": 3.862281428540315e-08,
"loss": 5.4858,
"step": 2832
},
{
"epoch": 4.58,
"learning_rate": 3.8047901120445315e-08,
"loss": 5.7017,
"step": 2834
},
{
"epoch": 4.58,
"learning_rate": 3.747721599987763e-08,
"loss": 5.648,
"step": 2836
},
{
"epoch": 4.59,
"learning_rate": 3.691076143201832e-08,
"loss": 5.9246,
"step": 2838
},
{
"epoch": 4.59,
"learning_rate": 3.634853990659126e-08,
"loss": 6.1155,
"step": 2840
},
{
"epoch": 4.59,
"learning_rate": 3.579055389471508e-08,
"loss": 6.0291,
"step": 2842
},
{
"epoch": 4.6,
"learning_rate": 3.523680584889188e-08,
"loss": 5.8402,
"step": 2844
},
{
"epoch": 4.6,
"learning_rate": 3.4687298202996654e-08,
"loss": 6.3115,
"step": 2846
},
{
"epoch": 4.6,
"learning_rate": 3.414203337226695e-08,
"loss": 5.715,
"step": 2848
},
{
"epoch": 4.61,
"learning_rate": 3.360101375329194e-08,
"loss": 5.9762,
"step": 2850
},
{
"epoch": 4.61,
"learning_rate": 3.3064241724001794e-08,
"loss": 5.6715,
"step": 2852
},
{
"epoch": 4.61,
"learning_rate": 3.253171964365731e-08,
"loss": 6.0563,
"step": 2854
},
{
"epoch": 4.62,
"learning_rate": 3.200344985283965e-08,
"loss": 6.2123,
"step": 2856
},
{
"epoch": 4.62,
"learning_rate": 3.147943467344016e-08,
"loss": 5.653,
"step": 2858
},
{
"epoch": 4.62,
"learning_rate": 3.0959676408649824e-08,
"loss": 5.8813,
"step": 2860
},
{
"epoch": 4.63,
"learning_rate": 3.0444177342949464e-08,
"loss": 5.839,
"step": 2862
},
{
"epoch": 4.63,
"learning_rate": 2.993293974209921e-08,
"loss": 6.2093,
"step": 2864
},
{
"epoch": 4.63,
"learning_rate": 2.9425965853129285e-08,
"loss": 6.016,
"step": 2866
},
{
"epoch": 4.64,
"learning_rate": 2.8923257904329478e-08,
"loss": 6.3201,
"step": 2868
},
{
"epoch": 4.64,
"learning_rate": 2.8424818105239777e-08,
"loss": 6.1385,
"step": 2870
},
{
"epoch": 4.64,
"learning_rate": 2.7930648646640186e-08,
"loss": 5.7025,
"step": 2872
},
{
"epoch": 4.64,
"learning_rate": 2.7440751700541607e-08,
"loss": 6.1556,
"step": 2874
},
{
"epoch": 4.65,
"learning_rate": 2.6955129420176193e-08,
"loss": 6.0627,
"step": 2876
},
{
"epoch": 4.65,
"learning_rate": 2.6473783939987448e-08,
"loss": 6.0179,
"step": 2878
},
{
"epoch": 4.65,
"learning_rate": 2.599671737562137e-08,
"loss": 5.8187,
"step": 2880
},
{
"epoch": 4.66,
"learning_rate": 2.5523931823916768e-08,
"loss": 6.3774,
"step": 2882
},
{
"epoch": 4.66,
"learning_rate": 2.505542936289651e-08,
"loss": 5.7561,
"step": 2884
},
{
"epoch": 4.66,
"learning_rate": 2.4591212051757958e-08,
"loss": 5.9467,
"step": 2886
},
{
"epoch": 4.67,
"learning_rate": 2.4131281930864e-08,
"loss": 5.6536,
"step": 2888
},
{
"epoch": 4.67,
"learning_rate": 2.3675641021734026e-08,
"loss": 5.851,
"step": 2890
},
{
"epoch": 4.67,
"learning_rate": 2.3224291327035404e-08,
"loss": 6.0569,
"step": 2892
},
{
"epoch": 4.68,
"learning_rate": 2.2777234830574476e-08,
"loss": 6.06,
"step": 2894
},
{
"epoch": 4.68,
"learning_rate": 2.2334473497287453e-08,
"loss": 6.3221,
"step": 2896
},
{
"epoch": 4.68,
"learning_rate": 2.189600927323243e-08,
"loss": 5.5794,
"step": 2898
},
{
"epoch": 4.69,
"learning_rate": 2.146184408558038e-08,
"loss": 6.3434,
"step": 2900
},
{
"epoch": 4.69,
"learning_rate": 2.1031979842606852e-08,
"loss": 5.804,
"step": 2902
},
{
"epoch": 4.69,
"learning_rate": 2.0606418433683824e-08,
"loss": 5.9346,
"step": 2904
},
{
"epoch": 4.7,
"learning_rate": 2.018516172927065e-08,
"loss": 5.8539,
"step": 2906
},
{
"epoch": 4.7,
"learning_rate": 1.9768211580906468e-08,
"loss": 6.2849,
"step": 2908
},
{
"epoch": 4.7,
"learning_rate": 1.9355569821202234e-08,
"loss": 6.0772,
"step": 2910
},
{
"epoch": 4.71,
"learning_rate": 1.8947238263832043e-08,
"loss": 5.8683,
"step": 2912
},
{
"epoch": 4.71,
"learning_rate": 1.8543218703525376e-08,
"loss": 5.4705,
"step": 2914
},
{
"epoch": 4.71,
"learning_rate": 1.8143512916059644e-08,
"loss": 5.9988,
"step": 2916
},
{
"epoch": 4.72,
"learning_rate": 1.7748122658251872e-08,
"loss": 5.9572,
"step": 2918
},
{
"epoch": 4.72,
"learning_rate": 1.735704966795104e-08,
"loss": 6.1466,
"step": 2920
},
{
"epoch": 4.72,
"learning_rate": 1.697029566403074e-08,
"loss": 5.9623,
"step": 2922
},
{
"epoch": 4.73,
"learning_rate": 1.658786234638132e-08,
"loss": 5.9641,
"step": 2924
},
{
"epoch": 4.73,
"learning_rate": 1.6209751395902416e-08,
"loss": 6.1721,
"step": 2926
},
{
"epoch": 4.73,
"learning_rate": 1.5835964474495865e-08,
"loss": 6.0639,
"step": 2928
},
{
"epoch": 4.74,
"learning_rate": 1.5466503225058046e-08,
"loss": 6.2786,
"step": 2930
},
{
"epoch": 4.74,
"learning_rate": 1.5101369271472987e-08,
"loss": 5.4821,
"step": 2932
},
{
"epoch": 4.74,
"learning_rate": 1.4740564218605034e-08,
"loss": 5.7989,
"step": 2934
},
{
"epoch": 4.75,
"learning_rate": 1.4384089652291543e-08,
"loss": 6.0321,
"step": 2936
},
{
"epoch": 4.75,
"learning_rate": 1.4031947139336641e-08,
"loss": 5.9969,
"step": 2938
},
{
"epoch": 4.75,
"learning_rate": 1.3684138227503472e-08,
"loss": 6.1363,
"step": 2940
},
{
"epoch": 4.75,
"learning_rate": 1.3340664445507966e-08,
"loss": 6.1536,
"step": 2942
},
{
"epoch": 4.76,
"learning_rate": 1.3001527303012183e-08,
"loss": 6.0459,
"step": 2944
},
{
"epoch": 4.76,
"learning_rate": 1.2666728290617212e-08,
"loss": 5.8207,
"step": 2946
},
{
"epoch": 4.76,
"learning_rate": 1.2336268879856726e-08,
"loss": 6.0452,
"step": 2948
},
{
"epoch": 4.77,
"learning_rate": 1.2010150523190988e-08,
"loss": 5.9519,
"step": 2950
},
{
"epoch": 4.77,
"learning_rate": 1.1688374654000076e-08,
"loss": 5.9094,
"step": 2952
},
{
"epoch": 4.77,
"learning_rate": 1.1370942686577345e-08,
"loss": 5.9469,
"step": 2954
},
{
"epoch": 4.78,
"learning_rate": 1.1057856016123857e-08,
"loss": 5.5568,
"step": 2956
},
{
"epoch": 4.78,
"learning_rate": 1.0749116018741621e-08,
"loss": 6.0991,
"step": 2958
},
{
"epoch": 4.78,
"learning_rate": 1.0444724051428155e-08,
"loss": 6.1865,
"step": 2960
},
{
"epoch": 4.79,
"learning_rate": 1.0144681452069703e-08,
"loss": 5.8771,
"step": 2962
},
{
"epoch": 4.79,
"learning_rate": 9.84898953943636e-09,
"loss": 6.2052,
"step": 2964
},
{
"epoch": 4.79,
"learning_rate": 9.5576496131754e-09,
"loss": 5.171,
"step": 2966
},
{
"epoch": 4.8,
"learning_rate": 9.270662953806186e-09,
"loss": 6.3066,
"step": 2968
},
{
"epoch": 4.8,
"learning_rate": 8.988030822713821e-09,
"loss": 5.9792,
"step": 2970
},
{
"epoch": 4.8,
"learning_rate": 8.709754462144615e-09,
"loss": 5.3831,
"step": 2972
},
{
"epoch": 4.81,
"learning_rate": 8.435835095199628e-09,
"loss": 5.7783,
"step": 2974
},
{
"epoch": 4.81,
"learning_rate": 8.166273925830135e-09,
"loss": 6.1661,
"step": 2976
},
{
"epoch": 4.81,
"learning_rate": 7.90107213883151e-09,
"loss": 5.8294,
"step": 2978
},
{
"epoch": 4.82,
"learning_rate": 7.640230899838784e-09,
"loss": 5.9972,
"step": 2980
},
{
"epoch": 4.82,
"learning_rate": 7.3837513553209885e-09,
"loss": 6.2389,
"step": 2982
},
{
"epoch": 4.82,
"learning_rate": 7.131634632576267e-09,
"loss": 5.7572,
"step": 2984
},
{
"epoch": 4.83,
"learning_rate": 6.883881839727101e-09,
"loss": 5.4765,
"step": 2986
},
{
"epoch": 4.83,
"learning_rate": 6.640494065715207e-09,
"loss": 5.7902,
"step": 2988
},
{
"epoch": 4.83,
"learning_rate": 6.40147238029709e-09,
"loss": 6.2533,
"step": 2990
},
{
"epoch": 4.84,
"learning_rate": 6.166817834038607e-09,
"loss": 5.929,
"step": 2992
},
{
"epoch": 4.84,
"learning_rate": 5.936531458311189e-09,
"loss": 6.0872,
"step": 2994
},
{
"epoch": 4.84,
"learning_rate": 5.710614265287073e-09,
"loss": 5.8763,
"step": 2996
},
{
"epoch": 4.85,
"learning_rate": 5.489067247934298e-09,
"loss": 6.1908,
"step": 2998
},
{
"epoch": 4.85,
"learning_rate": 5.27189138001316e-09,
"loss": 5.9865,
"step": 3000
},
{
"epoch": 4.85,
"learning_rate": 5.059087616071212e-09,
"loss": 5.8444,
"step": 3002
},
{
"epoch": 4.85,
"learning_rate": 4.850656891439819e-09,
"loss": 5.8512,
"step": 3004
},
{
"epoch": 4.86,
"learning_rate": 4.646600122229283e-09,
"loss": 6.4206,
"step": 3006
},
{
"epoch": 4.86,
"learning_rate": 4.446918205325389e-09,
"loss": 6.4128,
"step": 3008
},
{
"epoch": 4.86,
"learning_rate": 4.251612018385087e-09,
"loss": 5.4091,
"step": 3010
},
{
"epoch": 4.87,
"learning_rate": 4.060682419832928e-09,
"loss": 6.5193,
"step": 3012
},
{
"epoch": 4.87,
"learning_rate": 3.874130248857077e-09,
"loss": 6.1928,
"step": 3014
},
{
"epoch": 4.87,
"learning_rate": 3.691956325405643e-09,
"loss": 5.8905,
"step": 3016
},
{
"epoch": 4.88,
"learning_rate": 3.5141614501831285e-09,
"loss": 6.2318,
"step": 3018
},
{
"epoch": 4.88,
"learning_rate": 3.340746404647099e-09,
"loss": 5.7658,
"step": 3020
},
{
"epoch": 4.88,
"learning_rate": 3.1717119510044076e-09,
"loss": 5.5051,
"step": 3022
},
{
"epoch": 4.89,
"learning_rate": 3.007058832207976e-09,
"loss": 5.8644,
"step": 3024
},
{
"epoch": 4.89,
"learning_rate": 2.8467877719535736e-09,
"loss": 6.4457,
"step": 3026
},
{
"epoch": 4.89,
"learning_rate": 2.690899474676822e-09,
"loss": 5.5942,
"step": 3028
},
{
"epoch": 4.9,
"learning_rate": 2.5393946255495293e-09,
"loss": 6.0458,
"step": 3030
},
{
"epoch": 4.9,
"learning_rate": 2.3922738904773587e-09,
"loss": 6.0967,
"step": 3032
},
{
"epoch": 4.9,
"learning_rate": 2.249537916096389e-09,
"loss": 6.1345,
"step": 3034
},
{
"epoch": 4.91,
"learning_rate": 2.1111873297706695e-09,
"loss": 5.7031,
"step": 3036
},
{
"epoch": 4.91,
"learning_rate": 1.9772227395888905e-09,
"loss": 6.0675,
"step": 3038
},
{
"epoch": 4.91,
"learning_rate": 1.8476447343624968e-09,
"loss": 5.5652,
"step": 3040
},
{
"epoch": 4.92,
"learning_rate": 1.7224538836223545e-09,
"loss": 6.0337,
"step": 3042
},
{
"epoch": 4.92,
"learning_rate": 1.6016507376169774e-09,
"loss": 6.1635,
"step": 3044
},
{
"epoch": 4.92,
"learning_rate": 1.4852358273091947e-09,
"loss": 5.2145,
"step": 3046
},
{
"epoch": 4.93,
"learning_rate": 1.3732096643747082e-09,
"loss": 5.837,
"step": 3048
},
{
"epoch": 4.93,
"learning_rate": 1.2655727411994276e-09,
"loss": 6.4039,
"step": 3050
},
{
"epoch": 4.93,
"learning_rate": 1.1623255308772507e-09,
"loss": 6.0581,
"step": 3052
},
{
"epoch": 4.94,
"learning_rate": 1.0634684872079525e-09,
"loss": 5.8881,
"step": 3054
},
{
"epoch": 4.94,
"learning_rate": 9.690020446956327e-10,
"loss": 5.9365,
"step": 3056
},
{
"epoch": 4.94,
"learning_rate": 8.789266185461608e-10,
"loss": 6.265,
"step": 3058
},
{
"epoch": 4.95,
"learning_rate": 7.932426046660667e-10,
"loss": 6.209,
"step": 3060
},
{
"epoch": 4.95,
"learning_rate": 7.119503796599868e-10,
"loss": 5.9455,
"step": 3062
},
{
"epoch": 4.95,
"learning_rate": 6.350503008296648e-10,
"loss": 6.1457,
"step": 3064
},
{
"epoch": 4.96,
"learning_rate": 5.625427061722865e-10,
"loss": 5.8044,
"step": 3066
},
{
"epoch": 4.96,
"learning_rate": 4.944279143784813e-10,
"loss": 6.093,
"step": 3068
},
{
"epoch": 4.96,
"learning_rate": 4.3070622483165617e-10,
"loss": 6.2533,
"step": 3070
},
{
"epoch": 4.96,
"learning_rate": 3.7137791760610824e-10,
"loss": 6.0355,
"step": 3072
},
{
"epoch": 4.97,
"learning_rate": 3.1644325346624757e-10,
"loss": 5.6227,
"step": 3074
},
{
"epoch": 4.97,
"learning_rate": 2.659024738648208e-10,
"loss": 6.157,
"step": 3076
},
{
"epoch": 4.97,
"learning_rate": 2.1975580094257818e-10,
"loss": 5.9123,
"step": 3078
},
{
"epoch": 4.98,
"learning_rate": 1.7800343752683021e-10,
"loss": 5.2241,
"step": 3080
},
{
"epoch": 4.98,
"learning_rate": 1.406455671308926e-10,
"loss": 6.0805,
"step": 3082
},
{
"epoch": 4.98,
"learning_rate": 1.076823539526428e-10,
"loss": 5.9766,
"step": 3084
},
{
"epoch": 4.99,
"learning_rate": 7.911394287452022e-11,
"loss": 5.9986,
"step": 3086
},
{
"epoch": 4.99,
"learning_rate": 5.494045946263792e-11,
"loss": 6.1283,
"step": 3088
},
{
"epoch": 4.99,
"learning_rate": 3.5162009966227535e-11,
"loss": 5.881,
"step": 3090
},
{
"epoch": 4.99,
"step": 3090,
"total_flos": 1.1758946324001587e+17,
"train_loss": 6.616625037085277,
"train_runtime": 22066.2449,
"train_samples_per_second": 8.973,
"train_steps_per_second": 0.14
}
],
"logging_steps": 2,
"max_steps": 3090,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 4000,
"total_flos": 1.1758946324001587e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}