diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,50191 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 44.123389301054274, + "eval_steps": 500, + "global_step": 4181000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 4.999736167248858e-05, + "loss": 5.8079, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 4.999472334497716e-05, + "loss": 4.48, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999208501746573e-05, + "loss": 4.1412, + "step": 1500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989446689954304e-05, + "loss": 3.8521, + "step": 2000 + }, + { + "epoch": 0.03, + "learning_rate": 4.998680836244288e-05, + "loss": 3.7444, + "step": 2500 + }, + { + "epoch": 0.03, + "learning_rate": 4.998417003493146e-05, + "loss": 3.547, + "step": 3000 + }, + { + "epoch": 0.04, + "learning_rate": 4.998153170742004e-05, + "loss": 3.3731, + "step": 3500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9978893379908606e-05, + "loss": 3.2642, + "step": 4000 + }, + { + "epoch": 0.05, + "learning_rate": 4.997625505239719e-05, + "loss": 3.1457, + "step": 4500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9973616724885764e-05, + "loss": 3.0872, + "step": 5000 + }, + { + "epoch": 0.06, + "learning_rate": 4.997097839737434e-05, + "loss": 2.9484, + "step": 5500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9968340069862915e-05, + "loss": 2.8687, + "step": 6000 + }, + { + "epoch": 0.07, + "learning_rate": 4.996570174235149e-05, + "loss": 2.8459, + "step": 6500 + }, + { + "epoch": 0.07, + "learning_rate": 4.9963063414840066e-05, + "loss": 2.8518, + "step": 7000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996042508732864e-05, + "loss": 2.7294, + "step": 7500 + }, + { + "epoch": 0.08, + "learning_rate": 4.9957786759817224e-05, + "loss": 2.6635, + "step": 8000 + }, + { + "epoch": 0.09, + "learning_rate": 4.995514843230579e-05, + "loss": 2.6407, + "step": 8500 + }, + { + "epoch": 0.09, + "learning_rate": 4.995251010479437e-05, + "loss": 2.6268, + "step": 9000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994987177728295e-05, + "loss": 2.6036, + "step": 9500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9947233449771526e-05, + "loss": 2.5329, + "step": 10000 + }, + { + "epoch": 0.11, + "learning_rate": 4.99445951222601e-05, + "loss": 2.6029, + "step": 10500 + }, + { + "epoch": 0.12, + "learning_rate": 4.994195679474868e-05, + "loss": 2.506, + "step": 11000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993931846723725e-05, + "loss": 2.4599, + "step": 11500 + }, + { + "epoch": 0.13, + "learning_rate": 4.993668013972583e-05, + "loss": 2.5108, + "step": 12000 + }, + { + "epoch": 0.13, + "learning_rate": 4.99340418122144e-05, + "loss": 2.4693, + "step": 12500 + }, + { + "epoch": 0.14, + "learning_rate": 4.9931403484702985e-05, + "loss": 2.4383, + "step": 13000 + }, + { + "epoch": 0.14, + "learning_rate": 4.9928765157191554e-05, + "loss": 2.4124, + "step": 13500 + }, + { + "epoch": 0.15, + "learning_rate": 4.992612682968013e-05, + "loss": 2.3671, + "step": 14000 + }, + { + "epoch": 0.15, + "learning_rate": 4.9923488502168705e-05, + "loss": 2.3858, + "step": 14500 + }, + { + "epoch": 0.16, + "learning_rate": 4.992085017465729e-05, + "loss": 2.3952, + "step": 15000 + }, + { + "epoch": 0.16, + "learning_rate": 4.9918211847145856e-05, + "loss": 2.3724, + "step": 15500 + }, + { + "epoch": 0.17, + "learning_rate": 4.991557351963443e-05, + "loss": 2.4513, + "step": 16000 + }, + { + "epoch": 0.17, + "learning_rate": 4.9912935192123014e-05, + "loss": 2.2696, + "step": 16500 + }, + { + "epoch": 0.18, + "learning_rate": 4.991029686461159e-05, + "loss": 2.3472, + "step": 17000 + }, + { + "epoch": 0.18, + "learning_rate": 4.9907658537100165e-05, + "loss": 2.3475, + "step": 17500 + }, + { + "epoch": 0.19, + "learning_rate": 4.990502020958874e-05, + "loss": 2.3289, + "step": 18000 + }, + { + "epoch": 0.2, + "learning_rate": 4.9902381882077316e-05, + "loss": 2.2573, + "step": 18500 + }, + { + "epoch": 0.2, + "learning_rate": 4.989974355456589e-05, + "loss": 2.2714, + "step": 19000 + }, + { + "epoch": 0.21, + "learning_rate": 4.989710522705447e-05, + "loss": 2.3415, + "step": 19500 + }, + { + "epoch": 0.21, + "learning_rate": 4.989446689954305e-05, + "loss": 2.2987, + "step": 20000 + }, + { + "epoch": 0.22, + "learning_rate": 4.989182857203162e-05, + "loss": 2.285, + "step": 20500 + }, + { + "epoch": 0.22, + "learning_rate": 4.988919024452019e-05, + "loss": 2.2427, + "step": 21000 + }, + { + "epoch": 0.23, + "learning_rate": 4.9886551917008775e-05, + "loss": 2.2315, + "step": 21500 + }, + { + "epoch": 0.23, + "learning_rate": 4.988391358949735e-05, + "loss": 2.2075, + "step": 22000 + }, + { + "epoch": 0.24, + "learning_rate": 4.9881275261985926e-05, + "loss": 2.2379, + "step": 22500 + }, + { + "epoch": 0.24, + "learning_rate": 4.98786369344745e-05, + "loss": 2.2669, + "step": 23000 + }, + { + "epoch": 0.25, + "learning_rate": 4.987599860696308e-05, + "loss": 2.1572, + "step": 23500 + }, + { + "epoch": 0.25, + "learning_rate": 4.987336027945165e-05, + "loss": 2.2218, + "step": 24000 + }, + { + "epoch": 0.26, + "learning_rate": 4.987072195194023e-05, + "loss": 2.2017, + "step": 24500 + }, + { + "epoch": 0.26, + "learning_rate": 4.9868083624428804e-05, + "loss": 2.1509, + "step": 25000 + }, + { + "epoch": 0.27, + "learning_rate": 4.986544529691738e-05, + "loss": 2.2906, + "step": 25500 + }, + { + "epoch": 0.27, + "learning_rate": 4.9862806969405955e-05, + "loss": 2.1208, + "step": 26000 + }, + { + "epoch": 0.28, + "learning_rate": 4.986016864189453e-05, + "loss": 2.2166, + "step": 26500 + }, + { + "epoch": 0.28, + "learning_rate": 4.985753031438311e-05, + "loss": 2.1473, + "step": 27000 + }, + { + "epoch": 0.29, + "learning_rate": 4.985489198687168e-05, + "loss": 2.1183, + "step": 27500 + }, + { + "epoch": 0.3, + "learning_rate": 4.985225365936026e-05, + "loss": 2.1408, + "step": 28000 + }, + { + "epoch": 0.3, + "learning_rate": 4.984961533184884e-05, + "loss": 2.1855, + "step": 28500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9846977004337415e-05, + "loss": 2.1993, + "step": 29000 + }, + { + "epoch": 0.31, + "learning_rate": 4.984433867682599e-05, + "loss": 2.1275, + "step": 29500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9841700349314566e-05, + "loss": 2.0987, + "step": 30000 + }, + { + "epoch": 0.32, + "learning_rate": 4.983906202180314e-05, + "loss": 2.1306, + "step": 30500 + }, + { + "epoch": 0.33, + "learning_rate": 4.9836423694291716e-05, + "loss": 2.1015, + "step": 31000 + }, + { + "epoch": 0.33, + "learning_rate": 4.983378536678029e-05, + "loss": 2.1981, + "step": 31500 + }, + { + "epoch": 0.34, + "learning_rate": 4.9831147039268874e-05, + "loss": 2.1527, + "step": 32000 + }, + { + "epoch": 0.34, + "learning_rate": 4.982850871175744e-05, + "loss": 2.1114, + "step": 32500 + }, + { + "epoch": 0.35, + "learning_rate": 4.982587038424602e-05, + "loss": 2.0971, + "step": 33000 + }, + { + "epoch": 0.35, + "learning_rate": 4.98232320567346e-05, + "loss": 2.0855, + "step": 33500 + }, + { + "epoch": 0.36, + "learning_rate": 4.9820593729223176e-05, + "loss": 2.102, + "step": 34000 + }, + { + "epoch": 0.36, + "learning_rate": 4.9817955401711745e-05, + "loss": 2.1159, + "step": 34500 + }, + { + "epoch": 0.37, + "learning_rate": 4.981531707420033e-05, + "loss": 2.0279, + "step": 35000 + }, + { + "epoch": 0.37, + "learning_rate": 4.98126787466889e-05, + "loss": 2.0742, + "step": 35500 + }, + { + "epoch": 0.38, + "learning_rate": 4.981004041917748e-05, + "loss": 2.1082, + "step": 36000 + }, + { + "epoch": 0.39, + "learning_rate": 4.9807402091666054e-05, + "loss": 2.1118, + "step": 36500 + }, + { + "epoch": 0.39, + "learning_rate": 4.980476376415463e-05, + "loss": 2.0661, + "step": 37000 + }, + { + "epoch": 0.4, + "learning_rate": 4.9802125436643205e-05, + "loss": 2.0396, + "step": 37500 + }, + { + "epoch": 0.4, + "learning_rate": 4.979948710913178e-05, + "loss": 2.0375, + "step": 38000 + }, + { + "epoch": 0.41, + "learning_rate": 4.9796848781620356e-05, + "loss": 2.053, + "step": 38500 + }, + { + "epoch": 0.41, + "learning_rate": 4.979421045410894e-05, + "loss": 2.0394, + "step": 39000 + }, + { + "epoch": 0.42, + "learning_rate": 4.9791572126597507e-05, + "loss": 2.0003, + "step": 39500 + }, + { + "epoch": 0.42, + "learning_rate": 4.978893379908608e-05, + "loss": 2.0158, + "step": 40000 + }, + { + "epoch": 0.43, + "learning_rate": 4.9786295471574664e-05, + "loss": 2.0421, + "step": 40500 + }, + { + "epoch": 0.43, + "learning_rate": 4.978365714406324e-05, + "loss": 2.1111, + "step": 41000 + }, + { + "epoch": 0.44, + "learning_rate": 4.9781018816551815e-05, + "loss": 2.0702, + "step": 41500 + }, + { + "epoch": 0.44, + "learning_rate": 4.977838048904039e-05, + "loss": 2.0634, + "step": 42000 + }, + { + "epoch": 0.45, + "learning_rate": 4.9775742161528966e-05, + "loss": 2.0276, + "step": 42500 + }, + { + "epoch": 0.45, + "learning_rate": 4.977310383401754e-05, + "loss": 2.0368, + "step": 43000 + }, + { + "epoch": 0.46, + "learning_rate": 4.977046550650612e-05, + "loss": 1.9591, + "step": 43500 + }, + { + "epoch": 0.46, + "learning_rate": 4.976782717899469e-05, + "loss": 2.0182, + "step": 44000 + }, + { + "epoch": 0.47, + "learning_rate": 4.976518885148327e-05, + "loss": 2.0129, + "step": 44500 + }, + { + "epoch": 0.47, + "learning_rate": 4.9762550523971844e-05, + "loss": 2.0446, + "step": 45000 + }, + { + "epoch": 0.48, + "learning_rate": 4.9759912196460426e-05, + "loss": 2.0224, + "step": 45500 + }, + { + "epoch": 0.49, + "learning_rate": 4.9757273868949e-05, + "loss": 1.9851, + "step": 46000 + }, + { + "epoch": 0.49, + "learning_rate": 4.975463554143757e-05, + "loss": 1.9947, + "step": 46500 + }, + { + "epoch": 0.5, + "learning_rate": 4.975199721392615e-05, + "loss": 1.9868, + "step": 47000 + }, + { + "epoch": 0.5, + "learning_rate": 4.974935888641473e-05, + "loss": 1.9678, + "step": 47500 + }, + { + "epoch": 0.51, + "learning_rate": 4.97467205589033e-05, + "loss": 1.9906, + "step": 48000 + }, + { + "epoch": 0.51, + "learning_rate": 4.974408223139188e-05, + "loss": 1.9736, + "step": 48500 + }, + { + "epoch": 0.52, + "learning_rate": 4.9741443903880454e-05, + "loss": 1.9457, + "step": 49000 + }, + { + "epoch": 0.52, + "learning_rate": 4.973880557636903e-05, + "loss": 1.9758, + "step": 49500 + }, + { + "epoch": 0.53, + "learning_rate": 4.9736167248857605e-05, + "loss": 2.031, + "step": 50000 + }, + { + "epoch": 0.53, + "learning_rate": 4.973352892134618e-05, + "loss": 1.9567, + "step": 50500 + }, + { + "epoch": 0.54, + "learning_rate": 4.973089059383476e-05, + "loss": 2.0051, + "step": 51000 + }, + { + "epoch": 0.54, + "learning_rate": 4.972825226632333e-05, + "loss": 1.9309, + "step": 51500 + }, + { + "epoch": 0.55, + "learning_rate": 4.972561393881191e-05, + "loss": 1.9041, + "step": 52000 + }, + { + "epoch": 0.55, + "learning_rate": 4.972297561130049e-05, + "loss": 1.9507, + "step": 52500 + }, + { + "epoch": 0.56, + "learning_rate": 4.9720337283789065e-05, + "loss": 1.935, + "step": 53000 + }, + { + "epoch": 0.56, + "learning_rate": 4.9717698956277634e-05, + "loss": 1.938, + "step": 53500 + }, + { + "epoch": 0.57, + "learning_rate": 4.9715060628766216e-05, + "loss": 2.0293, + "step": 54000 + }, + { + "epoch": 0.58, + "learning_rate": 4.971242230125479e-05, + "loss": 1.9879, + "step": 54500 + }, + { + "epoch": 0.58, + "learning_rate": 4.970978397374337e-05, + "loss": 1.9516, + "step": 55000 + }, + { + "epoch": 0.59, + "learning_rate": 4.970714564623194e-05, + "loss": 1.986, + "step": 55500 + }, + { + "epoch": 0.59, + "learning_rate": 4.970450731872052e-05, + "loss": 1.9432, + "step": 56000 + }, + { + "epoch": 0.6, + "learning_rate": 4.9701868991209093e-05, + "loss": 1.964, + "step": 56500 + }, + { + "epoch": 0.6, + "learning_rate": 4.969923066369767e-05, + "loss": 1.9411, + "step": 57000 + }, + { + "epoch": 0.61, + "learning_rate": 4.969659233618625e-05, + "loss": 1.924, + "step": 57500 + }, + { + "epoch": 0.61, + "learning_rate": 4.969395400867483e-05, + "loss": 1.929, + "step": 58000 + }, + { + "epoch": 0.62, + "learning_rate": 4.9691315681163395e-05, + "loss": 1.9136, + "step": 58500 + }, + { + "epoch": 0.62, + "learning_rate": 4.968867735365198e-05, + "loss": 1.9355, + "step": 59000 + }, + { + "epoch": 0.63, + "learning_rate": 4.968603902614055e-05, + "loss": 1.9159, + "step": 59500 + }, + { + "epoch": 0.63, + "learning_rate": 4.968340069862913e-05, + "loss": 1.9798, + "step": 60000 + }, + { + "epoch": 0.64, + "learning_rate": 4.9680762371117704e-05, + "loss": 1.8963, + "step": 60500 + }, + { + "epoch": 0.64, + "learning_rate": 4.967812404360628e-05, + "loss": 1.9586, + "step": 61000 + }, + { + "epoch": 0.65, + "learning_rate": 4.9675485716094855e-05, + "loss": 1.8789, + "step": 61500 + }, + { + "epoch": 0.65, + "learning_rate": 4.967284738858343e-05, + "loss": 1.955, + "step": 62000 + }, + { + "epoch": 0.66, + "learning_rate": 4.9670209061072006e-05, + "loss": 1.9076, + "step": 62500 + }, + { + "epoch": 0.66, + "learning_rate": 4.966757073356058e-05, + "loss": 1.8856, + "step": 63000 + }, + { + "epoch": 0.67, + "learning_rate": 4.966493240604916e-05, + "loss": 1.9284, + "step": 63500 + }, + { + "epoch": 0.68, + "learning_rate": 4.966229407853773e-05, + "loss": 1.9121, + "step": 64000 + }, + { + "epoch": 0.68, + "learning_rate": 4.9659655751026315e-05, + "loss": 1.899, + "step": 64500 + }, + { + "epoch": 0.69, + "learning_rate": 4.965701742351489e-05, + "loss": 1.9383, + "step": 65000 + }, + { + "epoch": 0.69, + "learning_rate": 4.965437909600346e-05, + "loss": 1.9472, + "step": 65500 + }, + { + "epoch": 0.7, + "learning_rate": 4.965174076849204e-05, + "loss": 2.0012, + "step": 66000 + }, + { + "epoch": 0.7, + "learning_rate": 4.964910244098062e-05, + "loss": 1.9008, + "step": 66500 + }, + { + "epoch": 0.71, + "learning_rate": 4.964646411346919e-05, + "loss": 1.968, + "step": 67000 + }, + { + "epoch": 0.71, + "learning_rate": 4.964382578595777e-05, + "loss": 1.8978, + "step": 67500 + }, + { + "epoch": 0.72, + "learning_rate": 4.964118745844634e-05, + "loss": 1.9119, + "step": 68000 + }, + { + "epoch": 0.72, + "learning_rate": 4.963854913093492e-05, + "loss": 1.9171, + "step": 68500 + }, + { + "epoch": 0.73, + "learning_rate": 4.9635910803423494e-05, + "loss": 1.86, + "step": 69000 + }, + { + "epoch": 0.73, + "learning_rate": 4.9633272475912076e-05, + "loss": 1.9395, + "step": 69500 + }, + { + "epoch": 0.74, + "learning_rate": 4.963063414840065e-05, + "loss": 1.9457, + "step": 70000 + }, + { + "epoch": 0.74, + "learning_rate": 4.962799582088922e-05, + "loss": 1.9378, + "step": 70500 + }, + { + "epoch": 0.75, + "learning_rate": 4.96253574933778e-05, + "loss": 1.9074, + "step": 71000 + }, + { + "epoch": 0.75, + "learning_rate": 4.962271916586638e-05, + "loss": 1.8827, + "step": 71500 + }, + { + "epoch": 0.76, + "learning_rate": 4.9620080838354954e-05, + "loss": 1.8431, + "step": 72000 + }, + { + "epoch": 0.77, + "learning_rate": 4.961744251084352e-05, + "loss": 1.8881, + "step": 72500 + }, + { + "epoch": 0.77, + "learning_rate": 4.9614804183332105e-05, + "loss": 1.9483, + "step": 73000 + }, + { + "epoch": 0.78, + "learning_rate": 4.961216585582068e-05, + "loss": 1.8593, + "step": 73500 + }, + { + "epoch": 0.78, + "learning_rate": 4.9609527528309256e-05, + "loss": 1.9339, + "step": 74000 + }, + { + "epoch": 0.79, + "learning_rate": 4.960688920079784e-05, + "loss": 1.8562, + "step": 74500 + }, + { + "epoch": 0.79, + "learning_rate": 4.960425087328641e-05, + "loss": 1.8673, + "step": 75000 + }, + { + "epoch": 0.8, + "learning_rate": 4.960161254577498e-05, + "loss": 1.8887, + "step": 75500 + }, + { + "epoch": 0.8, + "learning_rate": 4.959897421826356e-05, + "loss": 1.9011, + "step": 76000 + }, + { + "epoch": 0.81, + "learning_rate": 4.959633589075214e-05, + "loss": 1.931, + "step": 76500 + }, + { + "epoch": 0.81, + "learning_rate": 4.9593697563240716e-05, + "loss": 1.8714, + "step": 77000 + }, + { + "epoch": 0.82, + "learning_rate": 4.9591059235729284e-05, + "loss": 1.929, + "step": 77500 + }, + { + "epoch": 0.82, + "learning_rate": 4.9588420908217866e-05, + "loss": 1.8629, + "step": 78000 + }, + { + "epoch": 0.83, + "learning_rate": 4.958578258070644e-05, + "loss": 1.9195, + "step": 78500 + }, + { + "epoch": 0.83, + "learning_rate": 4.958314425319502e-05, + "loss": 1.8173, + "step": 79000 + }, + { + "epoch": 0.84, + "learning_rate": 4.958050592568359e-05, + "loss": 1.8651, + "step": 79500 + }, + { + "epoch": 0.84, + "learning_rate": 4.957786759817217e-05, + "loss": 1.8549, + "step": 80000 + }, + { + "epoch": 0.85, + "learning_rate": 4.9575229270660744e-05, + "loss": 1.8392, + "step": 80500 + }, + { + "epoch": 0.85, + "learning_rate": 4.957259094314932e-05, + "loss": 1.8546, + "step": 81000 + }, + { + "epoch": 0.86, + "learning_rate": 4.95699526156379e-05, + "loss": 1.9343, + "step": 81500 + }, + { + "epoch": 0.87, + "learning_rate": 4.956731428812647e-05, + "loss": 1.851, + "step": 82000 + }, + { + "epoch": 0.87, + "learning_rate": 4.9564675960615046e-05, + "loss": 1.8565, + "step": 82500 + }, + { + "epoch": 0.88, + "learning_rate": 4.956203763310363e-05, + "loss": 1.831, + "step": 83000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9559399305592204e-05, + "loss": 1.8616, + "step": 83500 + }, + { + "epoch": 0.89, + "learning_rate": 4.955676097808078e-05, + "loss": 1.8053, + "step": 84000 + }, + { + "epoch": 0.89, + "learning_rate": 4.955412265056935e-05, + "loss": 1.8284, + "step": 84500 + }, + { + "epoch": 0.9, + "learning_rate": 4.955148432305793e-05, + "loss": 1.8462, + "step": 85000 + }, + { + "epoch": 0.9, + "learning_rate": 4.9548845995546506e-05, + "loss": 1.797, + "step": 85500 + }, + { + "epoch": 0.91, + "learning_rate": 4.954620766803508e-05, + "loss": 1.8318, + "step": 86000 + }, + { + "epoch": 0.91, + "learning_rate": 4.954356934052366e-05, + "loss": 1.8532, + "step": 86500 + }, + { + "epoch": 0.92, + "learning_rate": 4.954093101301223e-05, + "loss": 1.9005, + "step": 87000 + }, + { + "epoch": 0.92, + "learning_rate": 4.953829268550081e-05, + "loss": 1.8816, + "step": 87500 + }, + { + "epoch": 0.93, + "learning_rate": 4.953565435798938e-05, + "loss": 1.826, + "step": 88000 + }, + { + "epoch": 0.93, + "learning_rate": 4.9533016030477965e-05, + "loss": 1.8047, + "step": 88500 + }, + { + "epoch": 0.94, + "learning_rate": 4.953037770296654e-05, + "loss": 1.8725, + "step": 89000 + }, + { + "epoch": 0.94, + "learning_rate": 4.952773937545511e-05, + "loss": 1.9063, + "step": 89500 + }, + { + "epoch": 0.95, + "learning_rate": 4.952510104794369e-05, + "loss": 1.8768, + "step": 90000 + }, + { + "epoch": 0.96, + "learning_rate": 4.952246272043227e-05, + "loss": 1.8589, + "step": 90500 + }, + { + "epoch": 0.96, + "learning_rate": 4.951982439292084e-05, + "loss": 1.9075, + "step": 91000 + }, + { + "epoch": 0.97, + "learning_rate": 4.951718606540942e-05, + "loss": 1.7886, + "step": 91500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9514547737897994e-05, + "loss": 1.8383, + "step": 92000 + }, + { + "epoch": 0.98, + "learning_rate": 4.951190941038657e-05, + "loss": 1.8656, + "step": 92500 + }, + { + "epoch": 0.98, + "learning_rate": 4.9509271082875145e-05, + "loss": 1.8166, + "step": 93000 + }, + { + "epoch": 0.99, + "learning_rate": 4.950663275536373e-05, + "loss": 1.8027, + "step": 93500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9503994427852296e-05, + "loss": 1.8244, + "step": 94000 + }, + { + "epoch": 1.0, + "learning_rate": 4.950135610034087e-05, + "loss": 1.8403, + "step": 94500 + }, + { + "epoch": 1.0, + "learning_rate": 4.9498717772829453e-05, + "loss": 1.7837, + "step": 95000 + }, + { + "epoch": 1.01, + "learning_rate": 4.949607944531803e-05, + "loss": 1.8076, + "step": 95500 + }, + { + "epoch": 1.01, + "learning_rate": 4.9493441117806604e-05, + "loss": 1.753, + "step": 96000 + }, + { + "epoch": 1.02, + "learning_rate": 4.949080279029518e-05, + "loss": 1.7972, + "step": 96500 + }, + { + "epoch": 1.02, + "learning_rate": 4.9488164462783755e-05, + "loss": 1.802, + "step": 97000 + }, + { + "epoch": 1.03, + "learning_rate": 4.948552613527233e-05, + "loss": 1.8633, + "step": 97500 + }, + { + "epoch": 1.03, + "learning_rate": 4.9482887807760906e-05, + "loss": 1.828, + "step": 98000 + }, + { + "epoch": 1.04, + "learning_rate": 4.948024948024949e-05, + "loss": 1.801, + "step": 98500 + }, + { + "epoch": 1.04, + "learning_rate": 4.947761115273806e-05, + "loss": 1.8275, + "step": 99000 + }, + { + "epoch": 1.05, + "learning_rate": 4.947497282522663e-05, + "loss": 1.6852, + "step": 99500 + }, + { + "epoch": 1.06, + "learning_rate": 4.947233449771521e-05, + "loss": 1.8134, + "step": 100000 + }, + { + "epoch": 1.06, + "learning_rate": 4.946969617020379e-05, + "loss": 1.8473, + "step": 100500 + }, + { + "epoch": 1.07, + "learning_rate": 4.946705784269236e-05, + "loss": 1.7716, + "step": 101000 + }, + { + "epoch": 1.07, + "learning_rate": 4.9464419515180935e-05, + "loss": 1.8282, + "step": 101500 + }, + { + "epoch": 1.08, + "learning_rate": 4.946178118766952e-05, + "loss": 1.8222, + "step": 102000 + }, + { + "epoch": 1.08, + "learning_rate": 4.945914286015809e-05, + "loss": 1.7972, + "step": 102500 + }, + { + "epoch": 1.09, + "learning_rate": 4.945650453264667e-05, + "loss": 1.7718, + "step": 103000 + }, + { + "epoch": 1.09, + "learning_rate": 4.9453866205135243e-05, + "loss": 1.7732, + "step": 103500 + }, + { + "epoch": 1.1, + "learning_rate": 4.945122787762382e-05, + "loss": 1.8121, + "step": 104000 + }, + { + "epoch": 1.1, + "learning_rate": 4.9448589550112394e-05, + "loss": 1.838, + "step": 104500 + }, + { + "epoch": 1.11, + "learning_rate": 4.944595122260097e-05, + "loss": 1.745, + "step": 105000 + }, + { + "epoch": 1.11, + "learning_rate": 4.944331289508955e-05, + "loss": 1.8298, + "step": 105500 + }, + { + "epoch": 1.12, + "learning_rate": 4.944067456757812e-05, + "loss": 1.7644, + "step": 106000 + }, + { + "epoch": 1.12, + "learning_rate": 4.9438036240066696e-05, + "loss": 1.7628, + "step": 106500 + }, + { + "epoch": 1.13, + "learning_rate": 4.943539791255528e-05, + "loss": 1.7666, + "step": 107000 + }, + { + "epoch": 1.13, + "learning_rate": 4.9432759585043854e-05, + "loss": 1.8087, + "step": 107500 + }, + { + "epoch": 1.14, + "learning_rate": 4.943012125753243e-05, + "loss": 1.8171, + "step": 108000 + }, + { + "epoch": 1.15, + "learning_rate": 4.9427482930021005e-05, + "loss": 1.7637, + "step": 108500 + }, + { + "epoch": 1.15, + "learning_rate": 4.942484460250958e-05, + "loss": 1.8473, + "step": 109000 + }, + { + "epoch": 1.16, + "learning_rate": 4.9422206274998156e-05, + "loss": 1.7809, + "step": 109500 + }, + { + "epoch": 1.16, + "learning_rate": 4.941956794748673e-05, + "loss": 1.8197, + "step": 110000 + }, + { + "epoch": 1.17, + "learning_rate": 4.941692961997531e-05, + "loss": 1.8043, + "step": 110500 + }, + { + "epoch": 1.17, + "learning_rate": 4.941429129246388e-05, + "loss": 1.7968, + "step": 111000 + }, + { + "epoch": 1.18, + "learning_rate": 4.941165296495246e-05, + "loss": 1.7791, + "step": 111500 + }, + { + "epoch": 1.18, + "learning_rate": 4.9409014637441034e-05, + "loss": 1.8318, + "step": 112000 + }, + { + "epoch": 1.19, + "learning_rate": 4.9406376309929616e-05, + "loss": 1.8194, + "step": 112500 + }, + { + "epoch": 1.19, + "learning_rate": 4.9403737982418184e-05, + "loss": 1.8207, + "step": 113000 + }, + { + "epoch": 1.2, + "learning_rate": 4.940109965490676e-05, + "loss": 1.7426, + "step": 113500 + }, + { + "epoch": 1.2, + "learning_rate": 4.939846132739534e-05, + "loss": 1.8495, + "step": 114000 + }, + { + "epoch": 1.21, + "learning_rate": 4.939582299988392e-05, + "loss": 1.7921, + "step": 114500 + }, + { + "epoch": 1.21, + "learning_rate": 4.939318467237249e-05, + "loss": 1.855, + "step": 115000 + }, + { + "epoch": 1.22, + "learning_rate": 4.939054634486107e-05, + "loss": 1.7774, + "step": 115500 + }, + { + "epoch": 1.22, + "learning_rate": 4.9387908017349644e-05, + "loss": 1.7221, + "step": 116000 + }, + { + "epoch": 1.23, + "learning_rate": 4.938526968983822e-05, + "loss": 1.7624, + "step": 116500 + }, + { + "epoch": 1.23, + "learning_rate": 4.9382631362326795e-05, + "loss": 1.7415, + "step": 117000 + }, + { + "epoch": 1.24, + "learning_rate": 4.937999303481538e-05, + "loss": 1.7585, + "step": 117500 + }, + { + "epoch": 1.25, + "learning_rate": 4.9377354707303946e-05, + "loss": 1.7917, + "step": 118000 + }, + { + "epoch": 1.25, + "learning_rate": 4.937471637979252e-05, + "loss": 1.7487, + "step": 118500 + }, + { + "epoch": 1.26, + "learning_rate": 4.9372078052281104e-05, + "loss": 1.8211, + "step": 119000 + }, + { + "epoch": 1.26, + "learning_rate": 4.936943972476968e-05, + "loss": 1.783, + "step": 119500 + }, + { + "epoch": 1.27, + "learning_rate": 4.936680139725825e-05, + "loss": 1.7488, + "step": 120000 + }, + { + "epoch": 1.27, + "learning_rate": 4.936416306974683e-05, + "loss": 1.8472, + "step": 120500 + }, + { + "epoch": 1.28, + "learning_rate": 4.9361524742235406e-05, + "loss": 1.7733, + "step": 121000 + }, + { + "epoch": 1.28, + "learning_rate": 4.935888641472398e-05, + "loss": 1.772, + "step": 121500 + }, + { + "epoch": 1.29, + "learning_rate": 4.935624808721256e-05, + "loss": 1.7718, + "step": 122000 + }, + { + "epoch": 1.29, + "learning_rate": 4.935360975970113e-05, + "loss": 1.7641, + "step": 122500 + }, + { + "epoch": 1.3, + "learning_rate": 4.935097143218971e-05, + "loss": 1.7613, + "step": 123000 + }, + { + "epoch": 1.3, + "learning_rate": 4.934833310467828e-05, + "loss": 1.7668, + "step": 123500 + }, + { + "epoch": 1.31, + "learning_rate": 4.934569477716686e-05, + "loss": 1.766, + "step": 124000 + }, + { + "epoch": 1.31, + "learning_rate": 4.934305644965544e-05, + "loss": 1.8101, + "step": 124500 + }, + { + "epoch": 1.32, + "learning_rate": 4.934041812214401e-05, + "loss": 1.7556, + "step": 125000 + }, + { + "epoch": 1.32, + "learning_rate": 4.9337779794632585e-05, + "loss": 1.8068, + "step": 125500 + }, + { + "epoch": 1.33, + "learning_rate": 4.933514146712117e-05, + "loss": 1.7675, + "step": 126000 + }, + { + "epoch": 1.33, + "learning_rate": 4.933250313960974e-05, + "loss": 1.7478, + "step": 126500 + }, + { + "epoch": 1.34, + "learning_rate": 4.932986481209831e-05, + "loss": 1.7629, + "step": 127000 + }, + { + "epoch": 1.35, + "learning_rate": 4.9327226484586894e-05, + "loss": 1.7598, + "step": 127500 + }, + { + "epoch": 1.35, + "learning_rate": 4.932458815707547e-05, + "loss": 1.7768, + "step": 128000 + }, + { + "epoch": 1.36, + "learning_rate": 4.9321949829564045e-05, + "loss": 1.7503, + "step": 128500 + }, + { + "epoch": 1.36, + "learning_rate": 4.931931150205262e-05, + "loss": 1.8354, + "step": 129000 + }, + { + "epoch": 1.37, + "learning_rate": 4.9316673174541196e-05, + "loss": 1.7779, + "step": 129500 + }, + { + "epoch": 1.37, + "learning_rate": 4.931403484702977e-05, + "loss": 1.8496, + "step": 130000 + }, + { + "epoch": 1.38, + "learning_rate": 4.931139651951835e-05, + "loss": 1.7351, + "step": 130500 + }, + { + "epoch": 1.38, + "learning_rate": 4.930875819200693e-05, + "loss": 1.7485, + "step": 131000 + }, + { + "epoch": 1.39, + "learning_rate": 4.9306119864495505e-05, + "loss": 1.7645, + "step": 131500 + }, + { + "epoch": 1.39, + "learning_rate": 4.930348153698407e-05, + "loss": 1.7865, + "step": 132000 + }, + { + "epoch": 1.4, + "learning_rate": 4.9300843209472656e-05, + "loss": 1.8111, + "step": 132500 + }, + { + "epoch": 1.4, + "learning_rate": 4.929820488196123e-05, + "loss": 1.8053, + "step": 133000 + }, + { + "epoch": 1.41, + "learning_rate": 4.9295566554449807e-05, + "loss": 1.7417, + "step": 133500 + }, + { + "epoch": 1.41, + "learning_rate": 4.929292822693838e-05, + "loss": 1.7713, + "step": 134000 + }, + { + "epoch": 1.42, + "learning_rate": 4.929028989942696e-05, + "loss": 1.7865, + "step": 134500 + }, + { + "epoch": 1.42, + "learning_rate": 4.928765157191553e-05, + "loss": 1.7825, + "step": 135000 + }, + { + "epoch": 1.43, + "learning_rate": 4.928501324440411e-05, + "loss": 1.7241, + "step": 135500 + }, + { + "epoch": 1.44, + "learning_rate": 4.9282374916892684e-05, + "loss": 1.7344, + "step": 136000 + }, + { + "epoch": 1.44, + "learning_rate": 4.927973658938126e-05, + "loss": 1.7488, + "step": 136500 + }, + { + "epoch": 1.45, + "learning_rate": 4.9277098261869835e-05, + "loss": 1.7841, + "step": 137000 + }, + { + "epoch": 1.45, + "learning_rate": 4.927445993435841e-05, + "loss": 1.7839, + "step": 137500 + }, + { + "epoch": 1.46, + "learning_rate": 4.927182160684699e-05, + "loss": 1.7906, + "step": 138000 + }, + { + "epoch": 1.46, + "learning_rate": 4.926918327933557e-05, + "loss": 1.6839, + "step": 138500 + }, + { + "epoch": 1.47, + "learning_rate": 4.926654495182414e-05, + "loss": 1.8357, + "step": 139000 + }, + { + "epoch": 1.47, + "learning_rate": 4.926390662431272e-05, + "loss": 1.7853, + "step": 139500 + }, + { + "epoch": 1.48, + "learning_rate": 4.9261268296801295e-05, + "loss": 1.7617, + "step": 140000 + }, + { + "epoch": 1.48, + "learning_rate": 4.925862996928987e-05, + "loss": 1.7382, + "step": 140500 + }, + { + "epoch": 1.49, + "learning_rate": 4.9255991641778446e-05, + "loss": 1.7745, + "step": 141000 + }, + { + "epoch": 1.49, + "learning_rate": 4.925335331426702e-05, + "loss": 1.8369, + "step": 141500 + }, + { + "epoch": 1.5, + "learning_rate": 4.92507149867556e-05, + "loss": 1.8001, + "step": 142000 + }, + { + "epoch": 1.5, + "learning_rate": 4.924807665924417e-05, + "loss": 1.752, + "step": 142500 + }, + { + "epoch": 1.51, + "learning_rate": 4.9245438331732754e-05, + "loss": 1.7505, + "step": 143000 + }, + { + "epoch": 1.51, + "learning_rate": 4.924280000422133e-05, + "loss": 1.7562, + "step": 143500 + }, + { + "epoch": 1.52, + "learning_rate": 4.92401616767099e-05, + "loss": 1.7219, + "step": 144000 + }, + { + "epoch": 1.52, + "learning_rate": 4.923752334919848e-05, + "loss": 1.8104, + "step": 144500 + }, + { + "epoch": 1.53, + "learning_rate": 4.9234885021687056e-05, + "loss": 1.7915, + "step": 145000 + }, + { + "epoch": 1.54, + "learning_rate": 4.923224669417563e-05, + "loss": 1.6853, + "step": 145500 + }, + { + "epoch": 1.54, + "learning_rate": 4.92296083666642e-05, + "loss": 1.7633, + "step": 146000 + }, + { + "epoch": 1.55, + "learning_rate": 4.922697003915278e-05, + "loss": 1.7715, + "step": 146500 + }, + { + "epoch": 1.55, + "learning_rate": 4.922433171164136e-05, + "loss": 1.7615, + "step": 147000 + }, + { + "epoch": 1.56, + "learning_rate": 4.9221693384129934e-05, + "loss": 1.764, + "step": 147500 + }, + { + "epoch": 1.56, + "learning_rate": 4.9219055056618516e-05, + "loss": 1.6518, + "step": 148000 + }, + { + "epoch": 1.57, + "learning_rate": 4.9216416729107085e-05, + "loss": 1.7363, + "step": 148500 + }, + { + "epoch": 1.57, + "learning_rate": 4.921377840159566e-05, + "loss": 1.7355, + "step": 149000 + }, + { + "epoch": 1.58, + "learning_rate": 4.9211140074084236e-05, + "loss": 1.7463, + "step": 149500 + }, + { + "epoch": 1.58, + "learning_rate": 4.920850174657282e-05, + "loss": 1.7872, + "step": 150000 + }, + { + "epoch": 1.59, + "learning_rate": 4.9205863419061393e-05, + "loss": 1.7646, + "step": 150500 + }, + { + "epoch": 1.59, + "learning_rate": 4.920322509154996e-05, + "loss": 1.8, + "step": 151000 + }, + { + "epoch": 1.6, + "learning_rate": 4.9200586764038544e-05, + "loss": 1.7694, + "step": 151500 + }, + { + "epoch": 1.6, + "learning_rate": 4.919794843652712e-05, + "loss": 1.6817, + "step": 152000 + }, + { + "epoch": 1.61, + "learning_rate": 4.9195310109015695e-05, + "loss": 1.696, + "step": 152500 + }, + { + "epoch": 1.61, + "learning_rate": 4.919267178150427e-05, + "loss": 1.655, + "step": 153000 + }, + { + "epoch": 1.62, + "learning_rate": 4.9190033453992846e-05, + "loss": 1.739, + "step": 153500 + }, + { + "epoch": 1.63, + "learning_rate": 4.918739512648142e-05, + "loss": 1.7529, + "step": 154000 + }, + { + "epoch": 1.63, + "learning_rate": 4.918475679897e-05, + "loss": 1.7408, + "step": 154500 + }, + { + "epoch": 1.64, + "learning_rate": 4.918211847145858e-05, + "loss": 1.7453, + "step": 155000 + }, + { + "epoch": 1.64, + "learning_rate": 4.917948014394715e-05, + "loss": 1.6618, + "step": 155500 + }, + { + "epoch": 1.65, + "learning_rate": 4.9176841816435724e-05, + "loss": 1.7424, + "step": 156000 + }, + { + "epoch": 1.65, + "learning_rate": 4.9174203488924306e-05, + "loss": 1.6764, + "step": 156500 + }, + { + "epoch": 1.66, + "learning_rate": 4.917156516141288e-05, + "loss": 1.7322, + "step": 157000 + }, + { + "epoch": 1.66, + "learning_rate": 4.916892683390146e-05, + "loss": 1.729, + "step": 157500 + }, + { + "epoch": 1.67, + "learning_rate": 4.9166288506390026e-05, + "loss": 1.7404, + "step": 158000 + }, + { + "epoch": 1.67, + "learning_rate": 4.916365017887861e-05, + "loss": 1.7711, + "step": 158500 + }, + { + "epoch": 1.68, + "learning_rate": 4.9161011851367184e-05, + "loss": 1.7441, + "step": 159000 + }, + { + "epoch": 1.68, + "learning_rate": 4.915837352385576e-05, + "loss": 1.7461, + "step": 159500 + }, + { + "epoch": 1.69, + "learning_rate": 4.915573519634434e-05, + "loss": 1.8016, + "step": 160000 + }, + { + "epoch": 1.69, + "learning_rate": 4.915309686883291e-05, + "loss": 1.8012, + "step": 160500 + }, + { + "epoch": 1.7, + "learning_rate": 4.9150458541321485e-05, + "loss": 1.7191, + "step": 161000 + }, + { + "epoch": 1.7, + "learning_rate": 4.914782021381006e-05, + "loss": 1.7023, + "step": 161500 + }, + { + "epoch": 1.71, + "learning_rate": 4.914518188629864e-05, + "loss": 1.7468, + "step": 162000 + }, + { + "epoch": 1.71, + "learning_rate": 4.914254355878722e-05, + "loss": 1.7401, + "step": 162500 + }, + { + "epoch": 1.72, + "learning_rate": 4.913990523127579e-05, + "loss": 1.6992, + "step": 163000 + }, + { + "epoch": 1.73, + "learning_rate": 4.913726690376437e-05, + "loss": 1.7466, + "step": 163500 + }, + { + "epoch": 1.73, + "learning_rate": 4.9134628576252945e-05, + "loss": 1.7413, + "step": 164000 + }, + { + "epoch": 1.74, + "learning_rate": 4.913199024874152e-05, + "loss": 1.6929, + "step": 164500 + }, + { + "epoch": 1.74, + "learning_rate": 4.9129351921230096e-05, + "loss": 1.7348, + "step": 165000 + }, + { + "epoch": 1.75, + "learning_rate": 4.912671359371867e-05, + "loss": 1.7522, + "step": 165500 + }, + { + "epoch": 1.75, + "learning_rate": 4.912407526620725e-05, + "loss": 1.7337, + "step": 166000 + }, + { + "epoch": 1.76, + "learning_rate": 4.912143693869582e-05, + "loss": 1.7149, + "step": 166500 + }, + { + "epoch": 1.76, + "learning_rate": 4.9118798611184405e-05, + "loss": 1.7649, + "step": 167000 + }, + { + "epoch": 1.77, + "learning_rate": 4.9116160283672974e-05, + "loss": 1.7287, + "step": 167500 + }, + { + "epoch": 1.77, + "learning_rate": 4.911352195616155e-05, + "loss": 1.7685, + "step": 168000 + }, + { + "epoch": 1.78, + "learning_rate": 4.911088362865013e-05, + "loss": 1.7232, + "step": 168500 + }, + { + "epoch": 1.78, + "learning_rate": 4.910824530113871e-05, + "loss": 1.6898, + "step": 169000 + }, + { + "epoch": 1.79, + "learning_rate": 4.910560697362728e-05, + "loss": 1.7203, + "step": 169500 + }, + { + "epoch": 1.79, + "learning_rate": 4.910296864611586e-05, + "loss": 1.704, + "step": 170000 + }, + { + "epoch": 1.8, + "learning_rate": 4.910033031860443e-05, + "loss": 1.7518, + "step": 170500 + }, + { + "epoch": 1.8, + "learning_rate": 4.909769199109301e-05, + "loss": 1.7617, + "step": 171000 + }, + { + "epoch": 1.81, + "learning_rate": 4.9095053663581584e-05, + "loss": 1.7215, + "step": 171500 + }, + { + "epoch": 1.82, + "learning_rate": 4.9092415336070167e-05, + "loss": 1.686, + "step": 172000 + }, + { + "epoch": 1.82, + "learning_rate": 4.9089777008558735e-05, + "loss": 1.697, + "step": 172500 + }, + { + "epoch": 1.83, + "learning_rate": 4.908713868104731e-05, + "loss": 1.7945, + "step": 173000 + }, + { + "epoch": 1.83, + "learning_rate": 4.9084500353535886e-05, + "loss": 1.7217, + "step": 173500 + }, + { + "epoch": 1.84, + "learning_rate": 4.908186202602447e-05, + "loss": 1.7539, + "step": 174000 + }, + { + "epoch": 1.84, + "learning_rate": 4.907922369851304e-05, + "loss": 1.7645, + "step": 174500 + }, + { + "epoch": 1.85, + "learning_rate": 4.907658537100161e-05, + "loss": 1.7937, + "step": 175000 + }, + { + "epoch": 1.85, + "learning_rate": 4.9073947043490195e-05, + "loss": 1.75, + "step": 175500 + }, + { + "epoch": 1.86, + "learning_rate": 4.907130871597877e-05, + "loss": 1.7107, + "step": 176000 + }, + { + "epoch": 1.86, + "learning_rate": 4.9068670388467346e-05, + "loss": 1.7503, + "step": 176500 + }, + { + "epoch": 1.87, + "learning_rate": 4.906603206095592e-05, + "loss": 1.7788, + "step": 177000 + }, + { + "epoch": 1.87, + "learning_rate": 4.90633937334445e-05, + "loss": 1.7617, + "step": 177500 + }, + { + "epoch": 1.88, + "learning_rate": 4.906075540593307e-05, + "loss": 1.7401, + "step": 178000 + }, + { + "epoch": 1.88, + "learning_rate": 4.905811707842165e-05, + "loss": 1.712, + "step": 178500 + }, + { + "epoch": 1.89, + "learning_rate": 4.905547875091023e-05, + "loss": 1.6923, + "step": 179000 + }, + { + "epoch": 1.89, + "learning_rate": 4.90528404233988e-05, + "loss": 1.674, + "step": 179500 + }, + { + "epoch": 1.9, + "learning_rate": 4.9050202095887374e-05, + "loss": 1.7781, + "step": 180000 + }, + { + "epoch": 1.9, + "learning_rate": 4.9047563768375957e-05, + "loss": 1.7068, + "step": 180500 + }, + { + "epoch": 1.91, + "learning_rate": 4.904492544086453e-05, + "loss": 1.7466, + "step": 181000 + }, + { + "epoch": 1.92, + "learning_rate": 4.904228711335311e-05, + "loss": 1.7442, + "step": 181500 + }, + { + "epoch": 1.92, + "learning_rate": 4.903964878584168e-05, + "loss": 1.7456, + "step": 182000 + }, + { + "epoch": 1.93, + "learning_rate": 4.903701045833026e-05, + "loss": 1.6894, + "step": 182500 + }, + { + "epoch": 1.93, + "learning_rate": 4.9034372130818834e-05, + "loss": 1.7349, + "step": 183000 + }, + { + "epoch": 1.94, + "learning_rate": 4.903173380330741e-05, + "loss": 1.7361, + "step": 183500 + }, + { + "epoch": 1.94, + "learning_rate": 4.9029095475795985e-05, + "loss": 1.7298, + "step": 184000 + }, + { + "epoch": 1.95, + "learning_rate": 4.902645714828456e-05, + "loss": 1.709, + "step": 184500 + }, + { + "epoch": 1.95, + "learning_rate": 4.9023818820773136e-05, + "loss": 1.7666, + "step": 185000 + }, + { + "epoch": 1.96, + "learning_rate": 4.902118049326171e-05, + "loss": 1.6987, + "step": 185500 + }, + { + "epoch": 1.96, + "learning_rate": 4.9018542165750294e-05, + "loss": 1.6872, + "step": 186000 + }, + { + "epoch": 1.97, + "learning_rate": 4.901590383823886e-05, + "loss": 1.7145, + "step": 186500 + }, + { + "epoch": 1.97, + "learning_rate": 4.901326551072744e-05, + "loss": 1.7244, + "step": 187000 + }, + { + "epoch": 1.98, + "learning_rate": 4.901062718321602e-05, + "loss": 1.7201, + "step": 187500 + }, + { + "epoch": 1.98, + "learning_rate": 4.9007988855704596e-05, + "loss": 1.7063, + "step": 188000 + }, + { + "epoch": 1.99, + "learning_rate": 4.900535052819317e-05, + "loss": 1.7023, + "step": 188500 + }, + { + "epoch": 1.99, + "learning_rate": 4.900271220068175e-05, + "loss": 1.6943, + "step": 189000 + }, + { + "epoch": 2.0, + "learning_rate": 4.900007387317032e-05, + "loss": 1.7301, + "step": 189500 + }, + { + "epoch": 2.01, + "learning_rate": 4.89974355456589e-05, + "loss": 1.7021, + "step": 190000 + }, + { + "epoch": 2.01, + "learning_rate": 4.899479721814747e-05, + "loss": 1.7012, + "step": 190500 + }, + { + "epoch": 2.02, + "learning_rate": 4.8992158890636055e-05, + "loss": 1.7209, + "step": 191000 + }, + { + "epoch": 2.02, + "learning_rate": 4.8989520563124624e-05, + "loss": 1.7103, + "step": 191500 + }, + { + "epoch": 2.03, + "learning_rate": 4.89868822356132e-05, + "loss": 1.769, + "step": 192000 + }, + { + "epoch": 2.03, + "learning_rate": 4.898424390810178e-05, + "loss": 1.724, + "step": 192500 + }, + { + "epoch": 2.04, + "learning_rate": 4.898160558059036e-05, + "loss": 1.7372, + "step": 193000 + }, + { + "epoch": 2.04, + "learning_rate": 4.8978967253078926e-05, + "loss": 1.6769, + "step": 193500 + }, + { + "epoch": 2.05, + "learning_rate": 4.897632892556751e-05, + "loss": 1.7192, + "step": 194000 + }, + { + "epoch": 2.05, + "learning_rate": 4.8973690598056084e-05, + "loss": 1.6837, + "step": 194500 + }, + { + "epoch": 2.06, + "learning_rate": 4.897105227054466e-05, + "loss": 1.7341, + "step": 195000 + }, + { + "epoch": 2.06, + "learning_rate": 4.8968413943033235e-05, + "loss": 1.6831, + "step": 195500 + }, + { + "epoch": 2.07, + "learning_rate": 4.896577561552181e-05, + "loss": 1.6526, + "step": 196000 + }, + { + "epoch": 2.07, + "learning_rate": 4.8963137288010386e-05, + "loss": 1.7261, + "step": 196500 + }, + { + "epoch": 2.08, + "learning_rate": 4.896049896049896e-05, + "loss": 1.7118, + "step": 197000 + }, + { + "epoch": 2.08, + "learning_rate": 4.895786063298754e-05, + "loss": 1.6494, + "step": 197500 + }, + { + "epoch": 2.09, + "learning_rate": 4.895522230547612e-05, + "loss": 1.6473, + "step": 198000 + }, + { + "epoch": 2.09, + "learning_rate": 4.895258397796469e-05, + "loss": 1.7002, + "step": 198500 + }, + { + "epoch": 2.1, + "learning_rate": 4.894994565045326e-05, + "loss": 1.7142, + "step": 199000 + }, + { + "epoch": 2.11, + "learning_rate": 4.8947307322941845e-05, + "loss": 1.6836, + "step": 199500 + }, + { + "epoch": 2.11, + "learning_rate": 4.894466899543042e-05, + "loss": 1.7262, + "step": 200000 + }, + { + "epoch": 2.12, + "learning_rate": 4.8942030667918996e-05, + "loss": 1.6797, + "step": 200500 + }, + { + "epoch": 2.12, + "learning_rate": 4.893939234040757e-05, + "loss": 1.757, + "step": 201000 + }, + { + "epoch": 2.13, + "learning_rate": 4.893675401289615e-05, + "loss": 1.6898, + "step": 201500 + }, + { + "epoch": 2.13, + "learning_rate": 4.893411568538472e-05, + "loss": 1.7077, + "step": 202000 + }, + { + "epoch": 2.14, + "learning_rate": 4.89314773578733e-05, + "loss": 1.7184, + "step": 202500 + }, + { + "epoch": 2.14, + "learning_rate": 4.8928839030361874e-05, + "loss": 1.7181, + "step": 203000 + }, + { + "epoch": 2.15, + "learning_rate": 4.892620070285045e-05, + "loss": 1.7161, + "step": 203500 + }, + { + "epoch": 2.15, + "learning_rate": 4.8923562375339025e-05, + "loss": 1.6628, + "step": 204000 + }, + { + "epoch": 2.16, + "learning_rate": 4.892092404782761e-05, + "loss": 1.6527, + "step": 204500 + }, + { + "epoch": 2.16, + "learning_rate": 4.891828572031618e-05, + "loss": 1.7463, + "step": 205000 + }, + { + "epoch": 2.17, + "learning_rate": 4.891564739280475e-05, + "loss": 1.743, + "step": 205500 + }, + { + "epoch": 2.17, + "learning_rate": 4.8913009065293334e-05, + "loss": 1.6885, + "step": 206000 + }, + { + "epoch": 2.18, + "learning_rate": 4.891037073778191e-05, + "loss": 1.6991, + "step": 206500 + }, + { + "epoch": 2.18, + "learning_rate": 4.8907732410270485e-05, + "loss": 1.6554, + "step": 207000 + }, + { + "epoch": 2.19, + "learning_rate": 4.890509408275906e-05, + "loss": 1.7406, + "step": 207500 + }, + { + "epoch": 2.2, + "learning_rate": 4.8902455755247635e-05, + "loss": 1.6958, + "step": 208000 + }, + { + "epoch": 2.2, + "learning_rate": 4.889981742773621e-05, + "loss": 1.7102, + "step": 208500 + }, + { + "epoch": 2.21, + "learning_rate": 4.8897179100224786e-05, + "loss": 1.6647, + "step": 209000 + }, + { + "epoch": 2.21, + "learning_rate": 4.889454077271336e-05, + "loss": 1.6753, + "step": 209500 + }, + { + "epoch": 2.22, + "learning_rate": 4.8891902445201944e-05, + "loss": 1.6846, + "step": 210000 + }, + { + "epoch": 2.22, + "learning_rate": 4.888926411769051e-05, + "loss": 1.6655, + "step": 210500 + }, + { + "epoch": 2.23, + "learning_rate": 4.888662579017909e-05, + "loss": 1.7068, + "step": 211000 + }, + { + "epoch": 2.23, + "learning_rate": 4.888398746266767e-05, + "loss": 1.666, + "step": 211500 + }, + { + "epoch": 2.24, + "learning_rate": 4.8881349135156246e-05, + "loss": 1.7239, + "step": 212000 + }, + { + "epoch": 2.24, + "learning_rate": 4.8878710807644815e-05, + "loss": 1.6347, + "step": 212500 + }, + { + "epoch": 2.25, + "learning_rate": 4.88760724801334e-05, + "loss": 1.693, + "step": 213000 + }, + { + "epoch": 2.25, + "learning_rate": 4.887343415262197e-05, + "loss": 1.693, + "step": 213500 + }, + { + "epoch": 2.26, + "learning_rate": 4.887079582511055e-05, + "loss": 1.698, + "step": 214000 + }, + { + "epoch": 2.26, + "learning_rate": 4.8868157497599124e-05, + "loss": 1.7336, + "step": 214500 + }, + { + "epoch": 2.27, + "learning_rate": 4.88655191700877e-05, + "loss": 1.7195, + "step": 215000 + }, + { + "epoch": 2.27, + "learning_rate": 4.8862880842576275e-05, + "loss": 1.6858, + "step": 215500 + }, + { + "epoch": 2.28, + "learning_rate": 4.886024251506485e-05, + "loss": 1.6581, + "step": 216000 + }, + { + "epoch": 2.28, + "learning_rate": 4.885760418755343e-05, + "loss": 1.7193, + "step": 216500 + }, + { + "epoch": 2.29, + "learning_rate": 4.885496586004201e-05, + "loss": 1.7095, + "step": 217000 + }, + { + "epoch": 2.3, + "learning_rate": 4.8852327532530577e-05, + "loss": 1.7284, + "step": 217500 + }, + { + "epoch": 2.3, + "learning_rate": 4.884968920501916e-05, + "loss": 1.6482, + "step": 218000 + }, + { + "epoch": 2.31, + "learning_rate": 4.8847050877507734e-05, + "loss": 1.7369, + "step": 218500 + }, + { + "epoch": 2.31, + "learning_rate": 4.884441254999631e-05, + "loss": 1.7034, + "step": 219000 + }, + { + "epoch": 2.32, + "learning_rate": 4.8841774222484885e-05, + "loss": 1.6569, + "step": 219500 + }, + { + "epoch": 2.32, + "learning_rate": 4.883913589497346e-05, + "loss": 1.68, + "step": 220000 + }, + { + "epoch": 2.33, + "learning_rate": 4.8836497567462036e-05, + "loss": 1.7285, + "step": 220500 + }, + { + "epoch": 2.33, + "learning_rate": 4.883385923995061e-05, + "loss": 1.688, + "step": 221000 + }, + { + "epoch": 2.34, + "learning_rate": 4.8831220912439194e-05, + "loss": 1.7384, + "step": 221500 + }, + { + "epoch": 2.34, + "learning_rate": 4.882858258492776e-05, + "loss": 1.7264, + "step": 222000 + }, + { + "epoch": 2.35, + "learning_rate": 4.882594425741634e-05, + "loss": 1.7002, + "step": 222500 + }, + { + "epoch": 2.35, + "learning_rate": 4.8823305929904914e-05, + "loss": 1.672, + "step": 223000 + }, + { + "epoch": 2.36, + "learning_rate": 4.8820667602393496e-05, + "loss": 1.6912, + "step": 223500 + }, + { + "epoch": 2.36, + "learning_rate": 4.881802927488207e-05, + "loss": 1.739, + "step": 224000 + }, + { + "epoch": 2.37, + "learning_rate": 4.881539094737064e-05, + "loss": 1.7253, + "step": 224500 + }, + { + "epoch": 2.37, + "learning_rate": 4.881275261985922e-05, + "loss": 1.7489, + "step": 225000 + }, + { + "epoch": 2.38, + "learning_rate": 4.88101142923478e-05, + "loss": 1.6896, + "step": 225500 + }, + { + "epoch": 2.39, + "learning_rate": 4.880747596483637e-05, + "loss": 1.7235, + "step": 226000 + }, + { + "epoch": 2.39, + "learning_rate": 4.880483763732495e-05, + "loss": 1.7274, + "step": 226500 + }, + { + "epoch": 2.4, + "learning_rate": 4.8802199309813524e-05, + "loss": 1.6662, + "step": 227000 + }, + { + "epoch": 2.4, + "learning_rate": 4.87995609823021e-05, + "loss": 1.6915, + "step": 227500 + }, + { + "epoch": 2.41, + "learning_rate": 4.8796922654790675e-05, + "loss": 1.7037, + "step": 228000 + }, + { + "epoch": 2.41, + "learning_rate": 4.879428432727926e-05, + "loss": 1.6538, + "step": 228500 + }, + { + "epoch": 2.42, + "learning_rate": 4.879164599976783e-05, + "loss": 1.7442, + "step": 229000 + }, + { + "epoch": 2.42, + "learning_rate": 4.87890076722564e-05, + "loss": 1.7138, + "step": 229500 + }, + { + "epoch": 2.43, + "learning_rate": 4.8786369344744984e-05, + "loss": 1.6833, + "step": 230000 + }, + { + "epoch": 2.43, + "learning_rate": 4.878373101723356e-05, + "loss": 1.7533, + "step": 230500 + }, + { + "epoch": 2.44, + "learning_rate": 4.8781092689722135e-05, + "loss": 1.6502, + "step": 231000 + }, + { + "epoch": 2.44, + "learning_rate": 4.8778454362210704e-05, + "loss": 1.6832, + "step": 231500 + }, + { + "epoch": 2.45, + "learning_rate": 4.8775816034699286e-05, + "loss": 1.7059, + "step": 232000 + }, + { + "epoch": 2.45, + "learning_rate": 4.877317770718786e-05, + "loss": 1.7332, + "step": 232500 + }, + { + "epoch": 2.46, + "learning_rate": 4.877053937967644e-05, + "loss": 1.6688, + "step": 233000 + }, + { + "epoch": 2.46, + "learning_rate": 4.876790105216502e-05, + "loss": 1.7518, + "step": 233500 + }, + { + "epoch": 2.47, + "learning_rate": 4.876526272465359e-05, + "loss": 1.7212, + "step": 234000 + }, + { + "epoch": 2.47, + "learning_rate": 4.8762624397142163e-05, + "loss": 1.6932, + "step": 234500 + }, + { + "epoch": 2.48, + "learning_rate": 4.875998606963074e-05, + "loss": 1.7022, + "step": 235000 + }, + { + "epoch": 2.49, + "learning_rate": 4.875734774211932e-05, + "loss": 1.6285, + "step": 235500 + }, + { + "epoch": 2.49, + "learning_rate": 4.87547094146079e-05, + "loss": 1.7442, + "step": 236000 + }, + { + "epoch": 2.5, + "learning_rate": 4.8752071087096465e-05, + "loss": 1.6937, + "step": 236500 + }, + { + "epoch": 2.5, + "learning_rate": 4.874943275958505e-05, + "loss": 1.7169, + "step": 237000 + }, + { + "epoch": 2.51, + "learning_rate": 4.874679443207362e-05, + "loss": 1.7334, + "step": 237500 + }, + { + "epoch": 2.51, + "learning_rate": 4.87441561045622e-05, + "loss": 1.6249, + "step": 238000 + }, + { + "epoch": 2.52, + "learning_rate": 4.8741517777050774e-05, + "loss": 1.7606, + "step": 238500 + }, + { + "epoch": 2.52, + "learning_rate": 4.873887944953935e-05, + "loss": 1.703, + "step": 239000 + }, + { + "epoch": 2.53, + "learning_rate": 4.8736241122027925e-05, + "loss": 1.7377, + "step": 239500 + }, + { + "epoch": 2.53, + "learning_rate": 4.87336027945165e-05, + "loss": 1.6979, + "step": 240000 + }, + { + "epoch": 2.54, + "learning_rate": 4.873096446700508e-05, + "loss": 1.6847, + "step": 240500 + }, + { + "epoch": 2.54, + "learning_rate": 4.872832613949365e-05, + "loss": 1.6939, + "step": 241000 + }, + { + "epoch": 2.55, + "learning_rate": 4.872568781198223e-05, + "loss": 1.7648, + "step": 241500 + }, + { + "epoch": 2.55, + "learning_rate": 4.872304948447081e-05, + "loss": 1.6578, + "step": 242000 + }, + { + "epoch": 2.56, + "learning_rate": 4.8720411156959385e-05, + "loss": 1.7277, + "step": 242500 + }, + { + "epoch": 2.56, + "learning_rate": 4.871777282944796e-05, + "loss": 1.7014, + "step": 243000 + }, + { + "epoch": 2.57, + "learning_rate": 4.8715134501936536e-05, + "loss": 1.8071, + "step": 243500 + }, + { + "epoch": 2.58, + "learning_rate": 4.871249617442511e-05, + "loss": 1.6826, + "step": 244000 + }, + { + "epoch": 2.58, + "learning_rate": 4.870985784691369e-05, + "loss": 1.721, + "step": 244500 + }, + { + "epoch": 2.59, + "learning_rate": 4.870721951940226e-05, + "loss": 1.6965, + "step": 245000 + }, + { + "epoch": 2.59, + "learning_rate": 4.8704581191890844e-05, + "loss": 1.7585, + "step": 245500 + }, + { + "epoch": 2.6, + "learning_rate": 4.870194286437941e-05, + "loss": 1.6792, + "step": 246000 + }, + { + "epoch": 2.6, + "learning_rate": 4.869930453686799e-05, + "loss": 1.6543, + "step": 246500 + }, + { + "epoch": 2.61, + "learning_rate": 4.8696666209356564e-05, + "loss": 1.7448, + "step": 247000 + }, + { + "epoch": 2.61, + "learning_rate": 4.8694027881845146e-05, + "loss": 1.691, + "step": 247500 + }, + { + "epoch": 2.62, + "learning_rate": 4.869138955433372e-05, + "loss": 1.6794, + "step": 248000 + }, + { + "epoch": 2.62, + "learning_rate": 4.868875122682229e-05, + "loss": 1.7113, + "step": 248500 + }, + { + "epoch": 2.63, + "learning_rate": 4.868611289931087e-05, + "loss": 1.7411, + "step": 249000 + }, + { + "epoch": 2.63, + "learning_rate": 4.868347457179945e-05, + "loss": 1.7102, + "step": 249500 + }, + { + "epoch": 2.64, + "learning_rate": 4.8680836244288024e-05, + "loss": 1.6756, + "step": 250000 + }, + { + "epoch": 2.64, + "learning_rate": 4.86781979167766e-05, + "loss": 1.6735, + "step": 250500 + }, + { + "epoch": 2.65, + "learning_rate": 4.8675559589265175e-05, + "loss": 1.7092, + "step": 251000 + }, + { + "epoch": 2.65, + "learning_rate": 4.867292126175375e-05, + "loss": 1.7197, + "step": 251500 + }, + { + "epoch": 2.66, + "learning_rate": 4.8670282934242326e-05, + "loss": 1.7064, + "step": 252000 + }, + { + "epoch": 2.66, + "learning_rate": 4.866764460673091e-05, + "loss": 1.7369, + "step": 252500 + }, + { + "epoch": 2.67, + "learning_rate": 4.866500627921948e-05, + "loss": 1.7038, + "step": 253000 + }, + { + "epoch": 2.68, + "learning_rate": 4.866236795170805e-05, + "loss": 1.7508, + "step": 253500 + }, + { + "epoch": 2.68, + "learning_rate": 4.8659729624196635e-05, + "loss": 1.7227, + "step": 254000 + }, + { + "epoch": 2.69, + "learning_rate": 4.865709129668521e-05, + "loss": 1.6917, + "step": 254500 + }, + { + "epoch": 2.69, + "learning_rate": 4.8654452969173785e-05, + "loss": 1.7066, + "step": 255000 + }, + { + "epoch": 2.7, + "learning_rate": 4.865181464166236e-05, + "loss": 1.7158, + "step": 255500 + }, + { + "epoch": 2.7, + "learning_rate": 4.8649176314150936e-05, + "loss": 1.6913, + "step": 256000 + }, + { + "epoch": 2.71, + "learning_rate": 4.864653798663951e-05, + "loss": 1.7248, + "step": 256500 + }, + { + "epoch": 2.71, + "learning_rate": 4.864389965912809e-05, + "loss": 1.6812, + "step": 257000 + }, + { + "epoch": 2.72, + "learning_rate": 4.864126133161667e-05, + "loss": 1.7347, + "step": 257500 + }, + { + "epoch": 2.72, + "learning_rate": 4.863862300410524e-05, + "loss": 1.7114, + "step": 258000 + }, + { + "epoch": 2.73, + "learning_rate": 4.8635984676593814e-05, + "loss": 1.7453, + "step": 258500 + }, + { + "epoch": 2.73, + "learning_rate": 4.863334634908239e-05, + "loss": 1.7243, + "step": 259000 + }, + { + "epoch": 2.74, + "learning_rate": 4.863070802157097e-05, + "loss": 1.7015, + "step": 259500 + }, + { + "epoch": 2.74, + "learning_rate": 4.862806969405954e-05, + "loss": 1.7205, + "step": 260000 + }, + { + "epoch": 2.75, + "learning_rate": 4.8625431366548116e-05, + "loss": 1.7205, + "step": 260500 + }, + { + "epoch": 2.75, + "learning_rate": 4.86227930390367e-05, + "loss": 1.7309, + "step": 261000 + }, + { + "epoch": 2.76, + "learning_rate": 4.8620154711525274e-05, + "loss": 1.7358, + "step": 261500 + }, + { + "epoch": 2.76, + "learning_rate": 4.861751638401385e-05, + "loss": 1.7191, + "step": 262000 + }, + { + "epoch": 2.77, + "learning_rate": 4.8614878056502425e-05, + "loss": 1.6687, + "step": 262500 + }, + { + "epoch": 2.78, + "learning_rate": 4.8612239728991e-05, + "loss": 1.6839, + "step": 263000 + }, + { + "epoch": 2.78, + "learning_rate": 4.8609601401479576e-05, + "loss": 1.7014, + "step": 263500 + }, + { + "epoch": 2.79, + "learning_rate": 4.860696307396815e-05, + "loss": 1.749, + "step": 264000 + }, + { + "epoch": 2.79, + "learning_rate": 4.860432474645673e-05, + "loss": 1.7306, + "step": 264500 + }, + { + "epoch": 2.8, + "learning_rate": 4.86016864189453e-05, + "loss": 1.6477, + "step": 265000 + }, + { + "epoch": 2.8, + "learning_rate": 4.859904809143388e-05, + "loss": 1.7868, + "step": 265500 + }, + { + "epoch": 2.81, + "learning_rate": 4.859640976392246e-05, + "loss": 1.7169, + "step": 266000 + }, + { + "epoch": 2.81, + "learning_rate": 4.8593771436411035e-05, + "loss": 1.7109, + "step": 266500 + }, + { + "epoch": 2.82, + "learning_rate": 4.859113310889961e-05, + "loss": 1.7196, + "step": 267000 + }, + { + "epoch": 2.82, + "learning_rate": 4.8588494781388186e-05, + "loss": 1.6625, + "step": 267500 + }, + { + "epoch": 2.83, + "learning_rate": 4.858585645387676e-05, + "loss": 1.6963, + "step": 268000 + }, + { + "epoch": 2.83, + "learning_rate": 4.858321812636534e-05, + "loss": 1.7503, + "step": 268500 + }, + { + "epoch": 2.84, + "learning_rate": 4.858057979885391e-05, + "loss": 1.7355, + "step": 269000 + }, + { + "epoch": 2.84, + "learning_rate": 4.857794147134249e-05, + "loss": 1.655, + "step": 269500 + }, + { + "epoch": 2.85, + "learning_rate": 4.8575303143831064e-05, + "loss": 1.7101, + "step": 270000 + }, + { + "epoch": 2.85, + "learning_rate": 4.857266481631964e-05, + "loss": 1.7334, + "step": 270500 + }, + { + "epoch": 2.86, + "learning_rate": 4.8570026488808215e-05, + "loss": 1.6939, + "step": 271000 + }, + { + "epoch": 2.87, + "learning_rate": 4.85673881612968e-05, + "loss": 1.7089, + "step": 271500 + }, + { + "epoch": 2.87, + "learning_rate": 4.8564749833785366e-05, + "loss": 1.6987, + "step": 272000 + }, + { + "epoch": 2.88, + "learning_rate": 4.856211150627394e-05, + "loss": 1.6851, + "step": 272500 + }, + { + "epoch": 2.88, + "learning_rate": 4.855947317876252e-05, + "loss": 1.688, + "step": 273000 + }, + { + "epoch": 2.89, + "learning_rate": 4.85568348512511e-05, + "loss": 1.6986, + "step": 273500 + }, + { + "epoch": 2.89, + "learning_rate": 4.8554196523739674e-05, + "loss": 1.6905, + "step": 274000 + }, + { + "epoch": 2.9, + "learning_rate": 4.855155819622825e-05, + "loss": 1.7356, + "step": 274500 + }, + { + "epoch": 2.9, + "learning_rate": 4.8548919868716825e-05, + "loss": 1.6631, + "step": 275000 + }, + { + "epoch": 2.91, + "learning_rate": 4.85462815412054e-05, + "loss": 1.7263, + "step": 275500 + }, + { + "epoch": 2.91, + "learning_rate": 4.8543643213693976e-05, + "loss": 1.6505, + "step": 276000 + }, + { + "epoch": 2.92, + "learning_rate": 4.854100488618256e-05, + "loss": 1.6988, + "step": 276500 + }, + { + "epoch": 2.92, + "learning_rate": 4.853836655867113e-05, + "loss": 1.6654, + "step": 277000 + }, + { + "epoch": 2.93, + "learning_rate": 4.85357282311597e-05, + "loss": 1.6675, + "step": 277500 + }, + { + "epoch": 2.93, + "learning_rate": 4.8533089903648285e-05, + "loss": 1.6286, + "step": 278000 + }, + { + "epoch": 2.94, + "learning_rate": 4.853045157613686e-05, + "loss": 1.6634, + "step": 278500 + }, + { + "epoch": 2.94, + "learning_rate": 4.852781324862543e-05, + "loss": 1.7349, + "step": 279000 + }, + { + "epoch": 2.95, + "learning_rate": 4.852517492111401e-05, + "loss": 1.6749, + "step": 279500 + }, + { + "epoch": 2.95, + "learning_rate": 4.852253659360259e-05, + "loss": 1.6642, + "step": 280000 + }, + { + "epoch": 2.96, + "learning_rate": 4.851989826609116e-05, + "loss": 1.6811, + "step": 280500 + }, + { + "epoch": 2.97, + "learning_rate": 4.851725993857974e-05, + "loss": 1.7147, + "step": 281000 + }, + { + "epoch": 2.97, + "learning_rate": 4.8514621611068313e-05, + "loss": 1.6932, + "step": 281500 + }, + { + "epoch": 2.98, + "learning_rate": 4.851198328355689e-05, + "loss": 1.6603, + "step": 282000 + }, + { + "epoch": 2.98, + "learning_rate": 4.8509344956045464e-05, + "loss": 1.7445, + "step": 282500 + }, + { + "epoch": 2.99, + "learning_rate": 4.850670662853404e-05, + "loss": 1.7289, + "step": 283000 + }, + { + "epoch": 2.99, + "learning_rate": 4.850406830102262e-05, + "loss": 1.7037, + "step": 283500 + }, + { + "epoch": 3.0, + "learning_rate": 4.850142997351119e-05, + "loss": 1.7119, + "step": 284000 + }, + { + "epoch": 3.0, + "learning_rate": 4.8498791645999766e-05, + "loss": 1.7227, + "step": 284500 + }, + { + "epoch": 3.01, + "learning_rate": 4.849615331848835e-05, + "loss": 1.7518, + "step": 285000 + }, + { + "epoch": 3.01, + "learning_rate": 4.8493514990976924e-05, + "loss": 1.7073, + "step": 285500 + }, + { + "epoch": 3.02, + "learning_rate": 4.84908766634655e-05, + "loss": 1.631, + "step": 286000 + }, + { + "epoch": 3.02, + "learning_rate": 4.8488238335954075e-05, + "loss": 1.7416, + "step": 286500 + }, + { + "epoch": 3.03, + "learning_rate": 4.848560000844265e-05, + "loss": 1.6357, + "step": 287000 + }, + { + "epoch": 3.03, + "learning_rate": 4.8482961680931226e-05, + "loss": 1.6751, + "step": 287500 + }, + { + "epoch": 3.04, + "learning_rate": 4.84803233534198e-05, + "loss": 1.6612, + "step": 288000 + }, + { + "epoch": 3.04, + "learning_rate": 4.847768502590838e-05, + "loss": 1.6609, + "step": 288500 + }, + { + "epoch": 3.05, + "learning_rate": 4.847504669839695e-05, + "loss": 1.6773, + "step": 289000 + }, + { + "epoch": 3.06, + "learning_rate": 4.847240837088553e-05, + "loss": 1.6682, + "step": 289500 + }, + { + "epoch": 3.06, + "learning_rate": 4.846977004337411e-05, + "loss": 1.7214, + "step": 290000 + }, + { + "epoch": 3.07, + "learning_rate": 4.8467131715862686e-05, + "loss": 1.6761, + "step": 290500 + }, + { + "epoch": 3.07, + "learning_rate": 4.8464493388351254e-05, + "loss": 1.6674, + "step": 291000 + }, + { + "epoch": 3.08, + "learning_rate": 4.846185506083984e-05, + "loss": 1.6683, + "step": 291500 + }, + { + "epoch": 3.08, + "learning_rate": 4.845921673332841e-05, + "loss": 1.7131, + "step": 292000 + }, + { + "epoch": 3.09, + "learning_rate": 4.845657840581699e-05, + "loss": 1.7381, + "step": 292500 + }, + { + "epoch": 3.09, + "learning_rate": 4.845394007830556e-05, + "loss": 1.6669, + "step": 293000 + }, + { + "epoch": 3.1, + "learning_rate": 4.845130175079414e-05, + "loss": 1.6997, + "step": 293500 + }, + { + "epoch": 3.1, + "learning_rate": 4.8448663423282714e-05, + "loss": 1.6871, + "step": 294000 + }, + { + "epoch": 3.11, + "learning_rate": 4.844602509577129e-05, + "loss": 1.6939, + "step": 294500 + }, + { + "epoch": 3.11, + "learning_rate": 4.844338676825987e-05, + "loss": 1.6947, + "step": 295000 + }, + { + "epoch": 3.12, + "learning_rate": 4.844074844074845e-05, + "loss": 1.6562, + "step": 295500 + }, + { + "epoch": 3.12, + "learning_rate": 4.8438110113237016e-05, + "loss": 1.6445, + "step": 296000 + }, + { + "epoch": 3.13, + "learning_rate": 4.843547178572559e-05, + "loss": 1.7058, + "step": 296500 + }, + { + "epoch": 3.13, + "learning_rate": 4.8432833458214174e-05, + "loss": 1.7239, + "step": 297000 + }, + { + "epoch": 3.14, + "learning_rate": 4.843019513070275e-05, + "loss": 1.6551, + "step": 297500 + }, + { + "epoch": 3.14, + "learning_rate": 4.842755680319132e-05, + "loss": 1.7141, + "step": 298000 + }, + { + "epoch": 3.15, + "learning_rate": 4.84249184756799e-05, + "loss": 1.6708, + "step": 298500 + }, + { + "epoch": 3.16, + "learning_rate": 4.8422280148168476e-05, + "loss": 1.6953, + "step": 299000 + }, + { + "epoch": 3.16, + "learning_rate": 4.841964182065705e-05, + "loss": 1.671, + "step": 299500 + }, + { + "epoch": 3.17, + "learning_rate": 4.841700349314563e-05, + "loss": 1.6486, + "step": 300000 + }, + { + "epoch": 3.17, + "learning_rate": 4.84143651656342e-05, + "loss": 1.706, + "step": 300500 + }, + { + "epoch": 3.18, + "learning_rate": 4.841172683812278e-05, + "loss": 1.6429, + "step": 301000 + }, + { + "epoch": 3.18, + "learning_rate": 4.840908851061135e-05, + "loss": 1.7043, + "step": 301500 + }, + { + "epoch": 3.19, + "learning_rate": 4.8406450183099936e-05, + "loss": 1.6538, + "step": 302000 + }, + { + "epoch": 3.19, + "learning_rate": 4.840381185558851e-05, + "loss": 1.7288, + "step": 302500 + }, + { + "epoch": 3.2, + "learning_rate": 4.840117352807708e-05, + "loss": 1.7037, + "step": 303000 + }, + { + "epoch": 3.2, + "learning_rate": 4.839853520056566e-05, + "loss": 1.6921, + "step": 303500 + }, + { + "epoch": 3.21, + "learning_rate": 4.839589687305424e-05, + "loss": 1.6823, + "step": 304000 + }, + { + "epoch": 3.21, + "learning_rate": 4.839325854554281e-05, + "loss": 1.6628, + "step": 304500 + }, + { + "epoch": 3.22, + "learning_rate": 4.839062021803139e-05, + "loss": 1.663, + "step": 305000 + }, + { + "epoch": 3.22, + "learning_rate": 4.8387981890519964e-05, + "loss": 1.6735, + "step": 305500 + }, + { + "epoch": 3.23, + "learning_rate": 4.838534356300854e-05, + "loss": 1.6844, + "step": 306000 + }, + { + "epoch": 3.23, + "learning_rate": 4.8382705235497115e-05, + "loss": 1.6927, + "step": 306500 + }, + { + "epoch": 3.24, + "learning_rate": 4.83800669079857e-05, + "loss": 1.6823, + "step": 307000 + }, + { + "epoch": 3.25, + "learning_rate": 4.8377428580474266e-05, + "loss": 1.6781, + "step": 307500 + }, + { + "epoch": 3.25, + "learning_rate": 4.837479025296284e-05, + "loss": 1.7053, + "step": 308000 + }, + { + "epoch": 3.26, + "learning_rate": 4.837215192545142e-05, + "loss": 1.6733, + "step": 308500 + }, + { + "epoch": 3.26, + "learning_rate": 4.836951359794e-05, + "loss": 1.6161, + "step": 309000 + }, + { + "epoch": 3.27, + "learning_rate": 4.8366875270428575e-05, + "loss": 1.6848, + "step": 309500 + }, + { + "epoch": 3.27, + "learning_rate": 4.836423694291714e-05, + "loss": 1.6697, + "step": 310000 + }, + { + "epoch": 3.28, + "learning_rate": 4.8361598615405726e-05, + "loss": 1.6078, + "step": 310500 + }, + { + "epoch": 3.28, + "learning_rate": 4.83589602878943e-05, + "loss": 1.6819, + "step": 311000 + }, + { + "epoch": 3.29, + "learning_rate": 4.8356321960382877e-05, + "loss": 1.6648, + "step": 311500 + }, + { + "epoch": 3.29, + "learning_rate": 4.835368363287145e-05, + "loss": 1.6675, + "step": 312000 + }, + { + "epoch": 3.3, + "learning_rate": 4.835104530536003e-05, + "loss": 1.6451, + "step": 312500 + }, + { + "epoch": 3.3, + "learning_rate": 4.83484069778486e-05, + "loss": 1.751, + "step": 313000 + }, + { + "epoch": 3.31, + "learning_rate": 4.834576865033718e-05, + "loss": 1.6985, + "step": 313500 + }, + { + "epoch": 3.31, + "learning_rate": 4.834313032282576e-05, + "loss": 1.6553, + "step": 314000 + }, + { + "epoch": 3.32, + "learning_rate": 4.8340491995314336e-05, + "loss": 1.676, + "step": 314500 + }, + { + "epoch": 3.32, + "learning_rate": 4.8337853667802905e-05, + "loss": 1.7431, + "step": 315000 + }, + { + "epoch": 3.33, + "learning_rate": 4.833521534029149e-05, + "loss": 1.6853, + "step": 315500 + }, + { + "epoch": 3.33, + "learning_rate": 4.833257701278006e-05, + "loss": 1.6736, + "step": 316000 + }, + { + "epoch": 3.34, + "learning_rate": 4.832993868526864e-05, + "loss": 1.6953, + "step": 316500 + }, + { + "epoch": 3.35, + "learning_rate": 4.8327300357757214e-05, + "loss": 1.7401, + "step": 317000 + }, + { + "epoch": 3.35, + "learning_rate": 4.832466203024579e-05, + "loss": 1.6518, + "step": 317500 + }, + { + "epoch": 3.36, + "learning_rate": 4.8322023702734365e-05, + "loss": 1.7008, + "step": 318000 + }, + { + "epoch": 3.36, + "learning_rate": 4.831938537522294e-05, + "loss": 1.6562, + "step": 318500 + }, + { + "epoch": 3.37, + "learning_rate": 4.831674704771152e-05, + "loss": 1.6616, + "step": 319000 + }, + { + "epoch": 3.37, + "learning_rate": 4.831410872020009e-05, + "loss": 1.6658, + "step": 319500 + }, + { + "epoch": 3.38, + "learning_rate": 4.8311470392688667e-05, + "loss": 1.7119, + "step": 320000 + }, + { + "epoch": 3.38, + "learning_rate": 4.830883206517724e-05, + "loss": 1.7166, + "step": 320500 + }, + { + "epoch": 3.39, + "learning_rate": 4.8306193737665824e-05, + "loss": 1.6877, + "step": 321000 + }, + { + "epoch": 3.39, + "learning_rate": 4.83035554101544e-05, + "loss": 1.7004, + "step": 321500 + }, + { + "epoch": 3.4, + "learning_rate": 4.830091708264297e-05, + "loss": 1.6745, + "step": 322000 + }, + { + "epoch": 3.4, + "learning_rate": 4.829827875513155e-05, + "loss": 1.7037, + "step": 322500 + }, + { + "epoch": 3.41, + "learning_rate": 4.8295640427620126e-05, + "loss": 1.6333, + "step": 323000 + }, + { + "epoch": 3.41, + "learning_rate": 4.82930021001087e-05, + "loss": 1.6723, + "step": 323500 + }, + { + "epoch": 3.42, + "learning_rate": 4.829036377259728e-05, + "loss": 1.6591, + "step": 324000 + }, + { + "epoch": 3.42, + "learning_rate": 4.828772544508585e-05, + "loss": 1.6465, + "step": 324500 + }, + { + "epoch": 3.43, + "learning_rate": 4.828508711757443e-05, + "loss": 1.7052, + "step": 325000 + }, + { + "epoch": 3.44, + "learning_rate": 4.8282448790063004e-05, + "loss": 1.7242, + "step": 325500 + }, + { + "epoch": 3.44, + "learning_rate": 4.8279810462551586e-05, + "loss": 1.6665, + "step": 326000 + }, + { + "epoch": 3.45, + "learning_rate": 4.8277172135040155e-05, + "loss": 1.7022, + "step": 326500 + }, + { + "epoch": 3.45, + "learning_rate": 4.827453380752873e-05, + "loss": 1.6468, + "step": 327000 + }, + { + "epoch": 3.46, + "learning_rate": 4.827189548001731e-05, + "loss": 1.6509, + "step": 327500 + }, + { + "epoch": 3.46, + "learning_rate": 4.826925715250589e-05, + "loss": 1.6806, + "step": 328000 + }, + { + "epoch": 3.47, + "learning_rate": 4.8266618824994463e-05, + "loss": 1.6762, + "step": 328500 + }, + { + "epoch": 3.47, + "learning_rate": 4.826398049748304e-05, + "loss": 1.6799, + "step": 329000 + }, + { + "epoch": 3.48, + "learning_rate": 4.8261342169971614e-05, + "loss": 1.6667, + "step": 329500 + }, + { + "epoch": 3.48, + "learning_rate": 4.825870384246019e-05, + "loss": 1.717, + "step": 330000 + }, + { + "epoch": 3.49, + "learning_rate": 4.8256065514948765e-05, + "loss": 1.6508, + "step": 330500 + }, + { + "epoch": 3.49, + "learning_rate": 4.825342718743735e-05, + "loss": 1.6641, + "step": 331000 + }, + { + "epoch": 3.5, + "learning_rate": 4.8250788859925916e-05, + "loss": 1.6673, + "step": 331500 + }, + { + "epoch": 3.5, + "learning_rate": 4.824815053241449e-05, + "loss": 1.6973, + "step": 332000 + }, + { + "epoch": 3.51, + "learning_rate": 4.824551220490307e-05, + "loss": 1.6409, + "step": 332500 + }, + { + "epoch": 3.51, + "learning_rate": 4.824287387739165e-05, + "loss": 1.6891, + "step": 333000 + }, + { + "epoch": 3.52, + "learning_rate": 4.8240235549880225e-05, + "loss": 1.6648, + "step": 333500 + }, + { + "epoch": 3.52, + "learning_rate": 4.8237597222368794e-05, + "loss": 1.6737, + "step": 334000 + }, + { + "epoch": 3.53, + "learning_rate": 4.8234958894857376e-05, + "loss": 1.6215, + "step": 334500 + }, + { + "epoch": 3.54, + "learning_rate": 4.823232056734595e-05, + "loss": 1.6787, + "step": 335000 + }, + { + "epoch": 3.54, + "learning_rate": 4.822968223983453e-05, + "loss": 1.6445, + "step": 335500 + }, + { + "epoch": 3.55, + "learning_rate": 4.82270439123231e-05, + "loss": 1.6563, + "step": 336000 + }, + { + "epoch": 3.55, + "learning_rate": 4.822440558481168e-05, + "loss": 1.755, + "step": 336500 + }, + { + "epoch": 3.56, + "learning_rate": 4.8221767257300253e-05, + "loss": 1.6341, + "step": 337000 + }, + { + "epoch": 3.56, + "learning_rate": 4.821912892978883e-05, + "loss": 1.5877, + "step": 337500 + }, + { + "epoch": 3.57, + "learning_rate": 4.821649060227741e-05, + "loss": 1.6857, + "step": 338000 + }, + { + "epoch": 3.57, + "learning_rate": 4.821385227476598e-05, + "loss": 1.6568, + "step": 338500 + }, + { + "epoch": 3.58, + "learning_rate": 4.8211213947254555e-05, + "loss": 1.6517, + "step": 339000 + }, + { + "epoch": 3.58, + "learning_rate": 4.820857561974314e-05, + "loss": 1.653, + "step": 339500 + }, + { + "epoch": 3.59, + "learning_rate": 4.820593729223171e-05, + "loss": 1.6615, + "step": 340000 + }, + { + "epoch": 3.59, + "learning_rate": 4.820329896472029e-05, + "loss": 1.606, + "step": 340500 + }, + { + "epoch": 3.6, + "learning_rate": 4.8200660637208864e-05, + "loss": 1.6582, + "step": 341000 + }, + { + "epoch": 3.6, + "learning_rate": 4.819802230969744e-05, + "loss": 1.6914, + "step": 341500 + }, + { + "epoch": 3.61, + "learning_rate": 4.8195383982186015e-05, + "loss": 1.657, + "step": 342000 + }, + { + "epoch": 3.61, + "learning_rate": 4.819274565467459e-05, + "loss": 1.7014, + "step": 342500 + }, + { + "epoch": 3.62, + "learning_rate": 4.819010732716317e-05, + "loss": 1.665, + "step": 343000 + }, + { + "epoch": 3.63, + "learning_rate": 4.818746899965174e-05, + "loss": 1.5978, + "step": 343500 + }, + { + "epoch": 3.63, + "learning_rate": 4.818483067214032e-05, + "loss": 1.6981, + "step": 344000 + }, + { + "epoch": 3.64, + "learning_rate": 4.818219234462889e-05, + "loss": 1.6625, + "step": 344500 + }, + { + "epoch": 3.64, + "learning_rate": 4.8179554017117475e-05, + "loss": 1.6385, + "step": 345000 + }, + { + "epoch": 3.65, + "learning_rate": 4.8176915689606044e-05, + "loss": 1.6216, + "step": 345500 + }, + { + "epoch": 3.65, + "learning_rate": 4.817427736209462e-05, + "loss": 1.7118, + "step": 346000 + }, + { + "epoch": 3.66, + "learning_rate": 4.81716390345832e-05, + "loss": 1.6888, + "step": 346500 + }, + { + "epoch": 3.66, + "learning_rate": 4.816900070707178e-05, + "loss": 1.6905, + "step": 347000 + }, + { + "epoch": 3.67, + "learning_rate": 4.816636237956035e-05, + "loss": 1.7083, + "step": 347500 + }, + { + "epoch": 3.67, + "learning_rate": 4.816372405204893e-05, + "loss": 1.6536, + "step": 348000 + }, + { + "epoch": 3.68, + "learning_rate": 4.81610857245375e-05, + "loss": 1.648, + "step": 348500 + }, + { + "epoch": 3.68, + "learning_rate": 4.815844739702608e-05, + "loss": 1.7008, + "step": 349000 + }, + { + "epoch": 3.69, + "learning_rate": 4.8155809069514654e-05, + "loss": 1.6465, + "step": 349500 + }, + { + "epoch": 3.69, + "learning_rate": 4.8153170742003236e-05, + "loss": 1.6125, + "step": 350000 + }, + { + "epoch": 3.7, + "learning_rate": 4.8150532414491805e-05, + "loss": 1.6014, + "step": 350500 + }, + { + "epoch": 3.7, + "learning_rate": 4.814789408698038e-05, + "loss": 1.6055, + "step": 351000 + }, + { + "epoch": 3.71, + "learning_rate": 4.814525575946896e-05, + "loss": 1.7267, + "step": 351500 + }, + { + "epoch": 3.71, + "learning_rate": 4.814261743195754e-05, + "loss": 1.7071, + "step": 352000 + }, + { + "epoch": 3.72, + "learning_rate": 4.8139979104446114e-05, + "loss": 1.656, + "step": 352500 + }, + { + "epoch": 3.73, + "learning_rate": 4.813734077693469e-05, + "loss": 1.7256, + "step": 353000 + }, + { + "epoch": 3.73, + "learning_rate": 4.8134702449423265e-05, + "loss": 1.6698, + "step": 353500 + }, + { + "epoch": 3.74, + "learning_rate": 4.813206412191184e-05, + "loss": 1.6392, + "step": 354000 + }, + { + "epoch": 3.74, + "learning_rate": 4.8129425794400416e-05, + "loss": 1.6474, + "step": 354500 + }, + { + "epoch": 3.75, + "learning_rate": 4.812678746688899e-05, + "loss": 1.6977, + "step": 355000 + }, + { + "epoch": 3.75, + "learning_rate": 4.812414913937757e-05, + "loss": 1.6203, + "step": 355500 + }, + { + "epoch": 3.76, + "learning_rate": 4.812151081186614e-05, + "loss": 1.7067, + "step": 356000 + }, + { + "epoch": 3.76, + "learning_rate": 4.811887248435472e-05, + "loss": 1.6473, + "step": 356500 + }, + { + "epoch": 3.77, + "learning_rate": 4.81162341568433e-05, + "loss": 1.661, + "step": 357000 + }, + { + "epoch": 3.77, + "learning_rate": 4.811359582933187e-05, + "loss": 1.694, + "step": 357500 + }, + { + "epoch": 3.78, + "learning_rate": 4.8110957501820444e-05, + "loss": 1.6565, + "step": 358000 + }, + { + "epoch": 3.78, + "learning_rate": 4.8108319174309027e-05, + "loss": 1.6908, + "step": 358500 + }, + { + "epoch": 3.79, + "learning_rate": 4.81056808467976e-05, + "loss": 1.6365, + "step": 359000 + }, + { + "epoch": 3.79, + "learning_rate": 4.810304251928618e-05, + "loss": 1.6668, + "step": 359500 + }, + { + "epoch": 3.8, + "learning_rate": 4.810040419177475e-05, + "loss": 1.6407, + "step": 360000 + }, + { + "epoch": 3.8, + "learning_rate": 4.809776586426333e-05, + "loss": 1.7032, + "step": 360500 + }, + { + "epoch": 3.81, + "learning_rate": 4.8095127536751904e-05, + "loss": 1.6689, + "step": 361000 + }, + { + "epoch": 3.82, + "learning_rate": 4.809248920924048e-05, + "loss": 1.6702, + "step": 361500 + }, + { + "epoch": 3.82, + "learning_rate": 4.808985088172906e-05, + "loss": 1.6768, + "step": 362000 + }, + { + "epoch": 3.83, + "learning_rate": 4.808721255421763e-05, + "loss": 1.6351, + "step": 362500 + }, + { + "epoch": 3.83, + "learning_rate": 4.8084574226706206e-05, + "loss": 1.6318, + "step": 363000 + }, + { + "epoch": 3.84, + "learning_rate": 4.808193589919479e-05, + "loss": 1.6762, + "step": 363500 + }, + { + "epoch": 3.84, + "learning_rate": 4.8079297571683364e-05, + "loss": 1.6904, + "step": 364000 + }, + { + "epoch": 3.85, + "learning_rate": 4.807665924417193e-05, + "loss": 1.6479, + "step": 364500 + }, + { + "epoch": 3.85, + "learning_rate": 4.8074020916660515e-05, + "loss": 1.685, + "step": 365000 + }, + { + "epoch": 3.86, + "learning_rate": 4.807138258914909e-05, + "loss": 1.6978, + "step": 365500 + }, + { + "epoch": 3.86, + "learning_rate": 4.8068744261637666e-05, + "loss": 1.7268, + "step": 366000 + }, + { + "epoch": 3.87, + "learning_rate": 4.806610593412624e-05, + "loss": 1.5727, + "step": 366500 + }, + { + "epoch": 3.87, + "learning_rate": 4.8063467606614817e-05, + "loss": 1.6757, + "step": 367000 + }, + { + "epoch": 3.88, + "learning_rate": 4.806082927910339e-05, + "loss": 1.6584, + "step": 367500 + }, + { + "epoch": 3.88, + "learning_rate": 4.805819095159197e-05, + "loss": 1.6526, + "step": 368000 + }, + { + "epoch": 3.89, + "learning_rate": 4.805555262408055e-05, + "loss": 1.6811, + "step": 368500 + }, + { + "epoch": 3.89, + "learning_rate": 4.8052914296569125e-05, + "loss": 1.6566, + "step": 369000 + }, + { + "epoch": 3.9, + "learning_rate": 4.8050275969057694e-05, + "loss": 1.6137, + "step": 369500 + }, + { + "epoch": 3.9, + "learning_rate": 4.804763764154627e-05, + "loss": 1.6556, + "step": 370000 + }, + { + "epoch": 3.91, + "learning_rate": 4.804499931403485e-05, + "loss": 1.6395, + "step": 370500 + }, + { + "epoch": 3.92, + "learning_rate": 4.804236098652343e-05, + "loss": 1.7093, + "step": 371000 + }, + { + "epoch": 3.92, + "learning_rate": 4.8039722659011996e-05, + "loss": 1.6868, + "step": 371500 + }, + { + "epoch": 3.93, + "learning_rate": 4.803708433150058e-05, + "loss": 1.6831, + "step": 372000 + }, + { + "epoch": 3.93, + "learning_rate": 4.8034446003989154e-05, + "loss": 1.6136, + "step": 372500 + }, + { + "epoch": 3.94, + "learning_rate": 4.803180767647773e-05, + "loss": 1.6435, + "step": 373000 + }, + { + "epoch": 3.94, + "learning_rate": 4.8029169348966305e-05, + "loss": 1.6391, + "step": 373500 + }, + { + "epoch": 3.95, + "learning_rate": 4.802653102145488e-05, + "loss": 1.6854, + "step": 374000 + }, + { + "epoch": 3.95, + "learning_rate": 4.8023892693943456e-05, + "loss": 1.6383, + "step": 374500 + }, + { + "epoch": 3.96, + "learning_rate": 4.802125436643203e-05, + "loss": 1.6545, + "step": 375000 + }, + { + "epoch": 3.96, + "learning_rate": 4.8018616038920613e-05, + "loss": 1.6104, + "step": 375500 + }, + { + "epoch": 3.97, + "learning_rate": 4.801597771140919e-05, + "loss": 1.6323, + "step": 376000 + }, + { + "epoch": 3.97, + "learning_rate": 4.801333938389776e-05, + "loss": 1.623, + "step": 376500 + }, + { + "epoch": 3.98, + "learning_rate": 4.801070105638634e-05, + "loss": 1.7045, + "step": 377000 + }, + { + "epoch": 3.98, + "learning_rate": 4.8008062728874915e-05, + "loss": 1.6174, + "step": 377500 + }, + { + "epoch": 3.99, + "learning_rate": 4.800542440136349e-05, + "loss": 1.6552, + "step": 378000 + }, + { + "epoch": 3.99, + "learning_rate": 4.8002786073852066e-05, + "loss": 1.6537, + "step": 378500 + }, + { + "epoch": 4.0, + "learning_rate": 4.800014774634064e-05, + "loss": 1.6549, + "step": 379000 + }, + { + "epoch": 4.0, + "learning_rate": 4.799750941882922e-05, + "loss": 1.6834, + "step": 379500 + }, + { + "epoch": 4.01, + "learning_rate": 4.799487109131779e-05, + "loss": 1.6471, + "step": 380000 + }, + { + "epoch": 4.02, + "learning_rate": 4.7992232763806375e-05, + "loss": 1.6518, + "step": 380500 + }, + { + "epoch": 4.02, + "learning_rate": 4.7989594436294944e-05, + "loss": 1.6697, + "step": 381000 + }, + { + "epoch": 4.03, + "learning_rate": 4.798695610878352e-05, + "loss": 1.6707, + "step": 381500 + }, + { + "epoch": 4.03, + "learning_rate": 4.7984317781272095e-05, + "loss": 1.6655, + "step": 382000 + }, + { + "epoch": 4.04, + "learning_rate": 4.798167945376068e-05, + "loss": 1.6576, + "step": 382500 + }, + { + "epoch": 4.04, + "learning_rate": 4.797904112624925e-05, + "loss": 1.6724, + "step": 383000 + }, + { + "epoch": 4.05, + "learning_rate": 4.797640279873782e-05, + "loss": 1.725, + "step": 383500 + }, + { + "epoch": 4.05, + "learning_rate": 4.7973764471226404e-05, + "loss": 1.6289, + "step": 384000 + }, + { + "epoch": 4.06, + "learning_rate": 4.797112614371498e-05, + "loss": 1.6394, + "step": 384500 + }, + { + "epoch": 4.06, + "learning_rate": 4.7968487816203554e-05, + "loss": 1.5837, + "step": 385000 + }, + { + "epoch": 4.07, + "learning_rate": 4.796584948869213e-05, + "loss": 1.64, + "step": 385500 + }, + { + "epoch": 4.07, + "learning_rate": 4.7963211161180705e-05, + "loss": 1.6711, + "step": 386000 + }, + { + "epoch": 4.08, + "learning_rate": 4.796057283366928e-05, + "loss": 1.6392, + "step": 386500 + }, + { + "epoch": 4.08, + "learning_rate": 4.7957934506157856e-05, + "loss": 1.6572, + "step": 387000 + }, + { + "epoch": 4.09, + "learning_rate": 4.795529617864644e-05, + "loss": 1.6576, + "step": 387500 + }, + { + "epoch": 4.09, + "learning_rate": 4.7952657851135014e-05, + "loss": 1.6883, + "step": 388000 + }, + { + "epoch": 4.1, + "learning_rate": 4.795001952362358e-05, + "loss": 1.6942, + "step": 388500 + }, + { + "epoch": 4.11, + "learning_rate": 4.7947381196112165e-05, + "loss": 1.6218, + "step": 389000 + }, + { + "epoch": 4.11, + "learning_rate": 4.794474286860074e-05, + "loss": 1.6295, + "step": 389500 + }, + { + "epoch": 4.12, + "learning_rate": 4.7942104541089316e-05, + "loss": 1.6481, + "step": 390000 + }, + { + "epoch": 4.12, + "learning_rate": 4.793946621357789e-05, + "loss": 1.6542, + "step": 390500 + }, + { + "epoch": 4.13, + "learning_rate": 4.793682788606647e-05, + "loss": 1.6281, + "step": 391000 + }, + { + "epoch": 4.13, + "learning_rate": 4.793418955855504e-05, + "loss": 1.7108, + "step": 391500 + }, + { + "epoch": 4.14, + "learning_rate": 4.793155123104362e-05, + "loss": 1.596, + "step": 392000 + }, + { + "epoch": 4.14, + "learning_rate": 4.79289129035322e-05, + "loss": 1.5914, + "step": 392500 + }, + { + "epoch": 4.15, + "learning_rate": 4.792627457602077e-05, + "loss": 1.6497, + "step": 393000 + }, + { + "epoch": 4.15, + "learning_rate": 4.7923636248509345e-05, + "loss": 1.6767, + "step": 393500 + }, + { + "epoch": 4.16, + "learning_rate": 4.792099792099792e-05, + "loss": 1.6309, + "step": 394000 + }, + { + "epoch": 4.16, + "learning_rate": 4.79183595934865e-05, + "loss": 1.6643, + "step": 394500 + }, + { + "epoch": 4.17, + "learning_rate": 4.791572126597508e-05, + "loss": 1.736, + "step": 395000 + }, + { + "epoch": 4.17, + "learning_rate": 4.7913082938463646e-05, + "loss": 1.6474, + "step": 395500 + }, + { + "epoch": 4.18, + "learning_rate": 4.791044461095223e-05, + "loss": 1.6526, + "step": 396000 + }, + { + "epoch": 4.18, + "learning_rate": 4.7907806283440804e-05, + "loss": 1.6095, + "step": 396500 + }, + { + "epoch": 4.19, + "learning_rate": 4.790516795592938e-05, + "loss": 1.5997, + "step": 397000 + }, + { + "epoch": 4.19, + "learning_rate": 4.7902529628417955e-05, + "loss": 1.6006, + "step": 397500 + }, + { + "epoch": 4.2, + "learning_rate": 4.789989130090653e-05, + "loss": 1.6454, + "step": 398000 + }, + { + "epoch": 4.21, + "learning_rate": 4.7897252973395106e-05, + "loss": 1.6686, + "step": 398500 + }, + { + "epoch": 4.21, + "learning_rate": 4.789461464588368e-05, + "loss": 1.6563, + "step": 399000 + }, + { + "epoch": 4.22, + "learning_rate": 4.7891976318372264e-05, + "loss": 1.6452, + "step": 399500 + }, + { + "epoch": 4.22, + "learning_rate": 4.788933799086083e-05, + "loss": 1.5825, + "step": 400000 + }, + { + "epoch": 4.23, + "learning_rate": 4.788669966334941e-05, + "loss": 1.6478, + "step": 400500 + }, + { + "epoch": 4.23, + "learning_rate": 4.788406133583799e-05, + "loss": 1.6919, + "step": 401000 + }, + { + "epoch": 4.24, + "learning_rate": 4.7881423008326566e-05, + "loss": 1.6677, + "step": 401500 + }, + { + "epoch": 4.24, + "learning_rate": 4.787878468081514e-05, + "loss": 1.5854, + "step": 402000 + }, + { + "epoch": 4.25, + "learning_rate": 4.787614635330372e-05, + "loss": 1.6133, + "step": 402500 + }, + { + "epoch": 4.25, + "learning_rate": 4.787350802579229e-05, + "loss": 1.6278, + "step": 403000 + }, + { + "epoch": 4.26, + "learning_rate": 4.787086969828087e-05, + "loss": 1.6411, + "step": 403500 + }, + { + "epoch": 4.26, + "learning_rate": 4.786823137076944e-05, + "loss": 1.618, + "step": 404000 + }, + { + "epoch": 4.27, + "learning_rate": 4.7865593043258026e-05, + "loss": 1.6237, + "step": 404500 + }, + { + "epoch": 4.27, + "learning_rate": 4.7862954715746594e-05, + "loss": 1.6506, + "step": 405000 + }, + { + "epoch": 4.28, + "learning_rate": 4.786031638823517e-05, + "loss": 1.6734, + "step": 405500 + }, + { + "epoch": 4.28, + "learning_rate": 4.7857678060723745e-05, + "loss": 1.594, + "step": 406000 + }, + { + "epoch": 4.29, + "learning_rate": 4.785503973321233e-05, + "loss": 1.6166, + "step": 406500 + }, + { + "epoch": 4.3, + "learning_rate": 4.78524014057009e-05, + "loss": 1.6727, + "step": 407000 + }, + { + "epoch": 4.3, + "learning_rate": 4.784976307818947e-05, + "loss": 1.6498, + "step": 407500 + }, + { + "epoch": 4.31, + "learning_rate": 4.7847124750678054e-05, + "loss": 1.6661, + "step": 408000 + }, + { + "epoch": 4.31, + "learning_rate": 4.784448642316663e-05, + "loss": 1.6566, + "step": 408500 + }, + { + "epoch": 4.32, + "learning_rate": 4.7841848095655205e-05, + "loss": 1.6268, + "step": 409000 + }, + { + "epoch": 4.32, + "learning_rate": 4.783920976814378e-05, + "loss": 1.6362, + "step": 409500 + }, + { + "epoch": 4.33, + "learning_rate": 4.7836571440632356e-05, + "loss": 1.6837, + "step": 410000 + }, + { + "epoch": 4.33, + "learning_rate": 4.783393311312093e-05, + "loss": 1.6431, + "step": 410500 + }, + { + "epoch": 4.34, + "learning_rate": 4.783129478560951e-05, + "loss": 1.6104, + "step": 411000 + }, + { + "epoch": 4.34, + "learning_rate": 4.782865645809809e-05, + "loss": 1.6288, + "step": 411500 + }, + { + "epoch": 4.35, + "learning_rate": 4.782601813058666e-05, + "loss": 1.6331, + "step": 412000 + }, + { + "epoch": 4.35, + "learning_rate": 4.782337980307523e-05, + "loss": 1.6347, + "step": 412500 + }, + { + "epoch": 4.36, + "learning_rate": 4.7820741475563816e-05, + "loss": 1.6032, + "step": 413000 + }, + { + "epoch": 4.36, + "learning_rate": 4.781810314805239e-05, + "loss": 1.625, + "step": 413500 + }, + { + "epoch": 4.37, + "learning_rate": 4.781546482054097e-05, + "loss": 1.6519, + "step": 414000 + }, + { + "epoch": 4.37, + "learning_rate": 4.781282649302954e-05, + "loss": 1.6903, + "step": 414500 + }, + { + "epoch": 4.38, + "learning_rate": 4.781018816551812e-05, + "loss": 1.6407, + "step": 415000 + }, + { + "epoch": 4.38, + "learning_rate": 4.780754983800669e-05, + "loss": 1.5923, + "step": 415500 + }, + { + "epoch": 4.39, + "learning_rate": 4.780491151049527e-05, + "loss": 1.641, + "step": 416000 + }, + { + "epoch": 4.4, + "learning_rate": 4.780227318298385e-05, + "loss": 1.6768, + "step": 416500 + }, + { + "epoch": 4.4, + "learning_rate": 4.779963485547242e-05, + "loss": 1.6736, + "step": 417000 + }, + { + "epoch": 4.41, + "learning_rate": 4.7796996527960995e-05, + "loss": 1.6425, + "step": 417500 + }, + { + "epoch": 4.41, + "learning_rate": 4.779435820044957e-05, + "loss": 1.6744, + "step": 418000 + }, + { + "epoch": 4.42, + "learning_rate": 4.779171987293815e-05, + "loss": 1.6647, + "step": 418500 + }, + { + "epoch": 4.42, + "learning_rate": 4.778908154542672e-05, + "loss": 1.6718, + "step": 419000 + }, + { + "epoch": 4.43, + "learning_rate": 4.77864432179153e-05, + "loss": 1.6699, + "step": 419500 + }, + { + "epoch": 4.43, + "learning_rate": 4.778380489040388e-05, + "loss": 1.6802, + "step": 420000 + }, + { + "epoch": 4.44, + "learning_rate": 4.7781166562892455e-05, + "loss": 1.6153, + "step": 420500 + }, + { + "epoch": 4.44, + "learning_rate": 4.777852823538103e-05, + "loss": 1.6693, + "step": 421000 + }, + { + "epoch": 4.45, + "learning_rate": 4.7775889907869606e-05, + "loss": 1.5998, + "step": 421500 + }, + { + "epoch": 4.45, + "learning_rate": 4.777325158035818e-05, + "loss": 1.638, + "step": 422000 + }, + { + "epoch": 4.46, + "learning_rate": 4.777061325284676e-05, + "loss": 1.6581, + "step": 422500 + }, + { + "epoch": 4.46, + "learning_rate": 4.776797492533533e-05, + "loss": 1.6443, + "step": 423000 + }, + { + "epoch": 4.47, + "learning_rate": 4.7765336597823914e-05, + "loss": 1.5833, + "step": 423500 + }, + { + "epoch": 4.47, + "learning_rate": 4.776269827031248e-05, + "loss": 1.6991, + "step": 424000 + }, + { + "epoch": 4.48, + "learning_rate": 4.776005994280106e-05, + "loss": 1.6339, + "step": 424500 + }, + { + "epoch": 4.49, + "learning_rate": 4.775742161528964e-05, + "loss": 1.6509, + "step": 425000 + }, + { + "epoch": 4.49, + "learning_rate": 4.7754783287778216e-05, + "loss": 1.6765, + "step": 425500 + }, + { + "epoch": 4.5, + "learning_rate": 4.775214496026679e-05, + "loss": 1.5962, + "step": 426000 + }, + { + "epoch": 4.5, + "learning_rate": 4.774950663275537e-05, + "loss": 1.6448, + "step": 426500 + }, + { + "epoch": 4.51, + "learning_rate": 4.774686830524394e-05, + "loss": 1.6225, + "step": 427000 + }, + { + "epoch": 4.51, + "learning_rate": 4.774422997773252e-05, + "loss": 1.5847, + "step": 427500 + }, + { + "epoch": 4.52, + "learning_rate": 4.7741591650221094e-05, + "loss": 1.6683, + "step": 428000 + }, + { + "epoch": 4.52, + "learning_rate": 4.773895332270967e-05, + "loss": 1.6106, + "step": 428500 + }, + { + "epoch": 4.53, + "learning_rate": 4.7736314995198245e-05, + "loss": 1.6308, + "step": 429000 + }, + { + "epoch": 4.53, + "learning_rate": 4.773367666768682e-05, + "loss": 1.6596, + "step": 429500 + }, + { + "epoch": 4.54, + "learning_rate": 4.7731038340175396e-05, + "loss": 1.6046, + "step": 430000 + }, + { + "epoch": 4.54, + "learning_rate": 4.772840001266398e-05, + "loss": 1.7039, + "step": 430500 + }, + { + "epoch": 4.55, + "learning_rate": 4.772576168515255e-05, + "loss": 1.6439, + "step": 431000 + }, + { + "epoch": 4.55, + "learning_rate": 4.772312335764112e-05, + "loss": 1.6718, + "step": 431500 + }, + { + "epoch": 4.56, + "learning_rate": 4.7720485030129704e-05, + "loss": 1.6171, + "step": 432000 + }, + { + "epoch": 4.56, + "learning_rate": 4.771784670261828e-05, + "loss": 1.6606, + "step": 432500 + }, + { + "epoch": 4.57, + "learning_rate": 4.7715208375106855e-05, + "loss": 1.6203, + "step": 433000 + }, + { + "epoch": 4.57, + "learning_rate": 4.771257004759543e-05, + "loss": 1.6291, + "step": 433500 + }, + { + "epoch": 4.58, + "learning_rate": 4.7709931720084006e-05, + "loss": 1.6647, + "step": 434000 + }, + { + "epoch": 4.59, + "learning_rate": 4.770729339257258e-05, + "loss": 1.6363, + "step": 434500 + }, + { + "epoch": 4.59, + "learning_rate": 4.770465506506116e-05, + "loss": 1.6679, + "step": 435000 + }, + { + "epoch": 4.6, + "learning_rate": 4.770201673754974e-05, + "loss": 1.611, + "step": 435500 + }, + { + "epoch": 4.6, + "learning_rate": 4.769937841003831e-05, + "loss": 1.6905, + "step": 436000 + }, + { + "epoch": 4.61, + "learning_rate": 4.7696740082526884e-05, + "loss": 1.6785, + "step": 436500 + }, + { + "epoch": 4.61, + "learning_rate": 4.7694101755015466e-05, + "loss": 1.6331, + "step": 437000 + }, + { + "epoch": 4.62, + "learning_rate": 4.769146342750404e-05, + "loss": 1.6846, + "step": 437500 + }, + { + "epoch": 4.62, + "learning_rate": 4.768882509999261e-05, + "loss": 1.5959, + "step": 438000 + }, + { + "epoch": 4.63, + "learning_rate": 4.768618677248119e-05, + "loss": 1.622, + "step": 438500 + }, + { + "epoch": 4.63, + "learning_rate": 4.768354844496977e-05, + "loss": 1.6951, + "step": 439000 + }, + { + "epoch": 4.64, + "learning_rate": 4.7680910117458344e-05, + "loss": 1.6167, + "step": 439500 + }, + { + "epoch": 4.64, + "learning_rate": 4.767827178994692e-05, + "loss": 1.6469, + "step": 440000 + }, + { + "epoch": 4.65, + "learning_rate": 4.7675633462435495e-05, + "loss": 1.6068, + "step": 440500 + }, + { + "epoch": 4.65, + "learning_rate": 4.767299513492407e-05, + "loss": 1.6256, + "step": 441000 + }, + { + "epoch": 4.66, + "learning_rate": 4.7670356807412646e-05, + "loss": 1.6095, + "step": 441500 + }, + { + "epoch": 4.66, + "learning_rate": 4.766771847990123e-05, + "loss": 1.6369, + "step": 442000 + }, + { + "epoch": 4.67, + "learning_rate": 4.76650801523898e-05, + "loss": 1.6118, + "step": 442500 + }, + { + "epoch": 4.68, + "learning_rate": 4.766244182487837e-05, + "loss": 1.6545, + "step": 443000 + }, + { + "epoch": 4.68, + "learning_rate": 4.765980349736695e-05, + "loss": 1.6059, + "step": 443500 + }, + { + "epoch": 4.69, + "learning_rate": 4.765716516985553e-05, + "loss": 1.6684, + "step": 444000 + }, + { + "epoch": 4.69, + "learning_rate": 4.7654526842344105e-05, + "loss": 1.6138, + "step": 444500 + }, + { + "epoch": 4.7, + "learning_rate": 4.765188851483268e-05, + "loss": 1.6432, + "step": 445000 + }, + { + "epoch": 4.7, + "learning_rate": 4.7649250187321256e-05, + "loss": 1.6021, + "step": 445500 + }, + { + "epoch": 4.71, + "learning_rate": 4.764661185980983e-05, + "loss": 1.6009, + "step": 446000 + }, + { + "epoch": 4.71, + "learning_rate": 4.764397353229841e-05, + "loss": 1.6686, + "step": 446500 + }, + { + "epoch": 4.72, + "learning_rate": 4.764133520478698e-05, + "loss": 1.613, + "step": 447000 + }, + { + "epoch": 4.72, + "learning_rate": 4.763869687727556e-05, + "loss": 1.5977, + "step": 447500 + }, + { + "epoch": 4.73, + "learning_rate": 4.7636058549764134e-05, + "loss": 1.6445, + "step": 448000 + }, + { + "epoch": 4.73, + "learning_rate": 4.763342022225271e-05, + "loss": 1.6953, + "step": 448500 + }, + { + "epoch": 4.74, + "learning_rate": 4.763078189474129e-05, + "loss": 1.6407, + "step": 449000 + }, + { + "epoch": 4.74, + "learning_rate": 4.762814356722987e-05, + "loss": 1.6648, + "step": 449500 + }, + { + "epoch": 4.75, + "learning_rate": 4.7625505239718436e-05, + "loss": 1.6102, + "step": 450000 + }, + { + "epoch": 4.75, + "learning_rate": 4.762286691220702e-05, + "loss": 1.6217, + "step": 450500 + }, + { + "epoch": 4.76, + "learning_rate": 4.762022858469559e-05, + "loss": 1.712, + "step": 451000 + }, + { + "epoch": 4.76, + "learning_rate": 4.761759025718417e-05, + "loss": 1.6443, + "step": 451500 + }, + { + "epoch": 4.77, + "learning_rate": 4.7614951929672744e-05, + "loss": 1.5958, + "step": 452000 + }, + { + "epoch": 4.78, + "learning_rate": 4.761231360216132e-05, + "loss": 1.646, + "step": 452500 + }, + { + "epoch": 4.78, + "learning_rate": 4.7609675274649895e-05, + "loss": 1.6038, + "step": 453000 + }, + { + "epoch": 4.79, + "learning_rate": 4.760703694713847e-05, + "loss": 1.6476, + "step": 453500 + }, + { + "epoch": 4.79, + "learning_rate": 4.760439861962705e-05, + "loss": 1.5922, + "step": 454000 + }, + { + "epoch": 4.8, + "learning_rate": 4.760176029211563e-05, + "loss": 1.6415, + "step": 454500 + }, + { + "epoch": 4.8, + "learning_rate": 4.75991219646042e-05, + "loss": 1.6584, + "step": 455000 + }, + { + "epoch": 4.81, + "learning_rate": 4.759648363709277e-05, + "loss": 1.5969, + "step": 455500 + }, + { + "epoch": 4.81, + "learning_rate": 4.7593845309581355e-05, + "loss": 1.7003, + "step": 456000 + }, + { + "epoch": 4.82, + "learning_rate": 4.759120698206993e-05, + "loss": 1.619, + "step": 456500 + }, + { + "epoch": 4.82, + "learning_rate": 4.75885686545585e-05, + "loss": 1.5629, + "step": 457000 + }, + { + "epoch": 4.83, + "learning_rate": 4.758593032704708e-05, + "loss": 1.6749, + "step": 457500 + }, + { + "epoch": 4.83, + "learning_rate": 4.758329199953566e-05, + "loss": 1.6881, + "step": 458000 + }, + { + "epoch": 4.84, + "learning_rate": 4.758065367202423e-05, + "loss": 1.5868, + "step": 458500 + }, + { + "epoch": 4.84, + "learning_rate": 4.757801534451281e-05, + "loss": 1.6644, + "step": 459000 + }, + { + "epoch": 4.85, + "learning_rate": 4.7575377017001383e-05, + "loss": 1.5954, + "step": 459500 + }, + { + "epoch": 4.85, + "learning_rate": 4.757273868948996e-05, + "loss": 1.5997, + "step": 460000 + }, + { + "epoch": 4.86, + "learning_rate": 4.7570100361978534e-05, + "loss": 1.5938, + "step": 460500 + }, + { + "epoch": 4.87, + "learning_rate": 4.756746203446712e-05, + "loss": 1.6152, + "step": 461000 + }, + { + "epoch": 4.87, + "learning_rate": 4.756482370695569e-05, + "loss": 1.6343, + "step": 461500 + }, + { + "epoch": 4.88, + "learning_rate": 4.756218537944426e-05, + "loss": 1.6591, + "step": 462000 + }, + { + "epoch": 4.88, + "learning_rate": 4.755954705193284e-05, + "loss": 1.6547, + "step": 462500 + }, + { + "epoch": 4.89, + "learning_rate": 4.755690872442142e-05, + "loss": 1.6118, + "step": 463000 + }, + { + "epoch": 4.89, + "learning_rate": 4.7554270396909994e-05, + "loss": 1.6258, + "step": 463500 + }, + { + "epoch": 4.9, + "learning_rate": 4.755163206939857e-05, + "loss": 1.6307, + "step": 464000 + }, + { + "epoch": 4.9, + "learning_rate": 4.7548993741887145e-05, + "loss": 1.6158, + "step": 464500 + }, + { + "epoch": 4.91, + "learning_rate": 4.754635541437572e-05, + "loss": 1.7294, + "step": 465000 + }, + { + "epoch": 4.91, + "learning_rate": 4.7543717086864296e-05, + "loss": 1.6328, + "step": 465500 + }, + { + "epoch": 4.92, + "learning_rate": 4.754107875935288e-05, + "loss": 1.6578, + "step": 466000 + }, + { + "epoch": 4.92, + "learning_rate": 4.753844043184145e-05, + "loss": 1.6379, + "step": 466500 + }, + { + "epoch": 4.93, + "learning_rate": 4.753580210433002e-05, + "loss": 1.6438, + "step": 467000 + }, + { + "epoch": 4.93, + "learning_rate": 4.75331637768186e-05, + "loss": 1.6156, + "step": 467500 + }, + { + "epoch": 4.94, + "learning_rate": 4.753052544930718e-05, + "loss": 1.6282, + "step": 468000 + }, + { + "epoch": 4.94, + "learning_rate": 4.7527887121795756e-05, + "loss": 1.6226, + "step": 468500 + }, + { + "epoch": 4.95, + "learning_rate": 4.7525248794284324e-05, + "loss": 1.6445, + "step": 469000 + }, + { + "epoch": 4.95, + "learning_rate": 4.752261046677291e-05, + "loss": 1.6046, + "step": 469500 + }, + { + "epoch": 4.96, + "learning_rate": 4.751997213926148e-05, + "loss": 1.6112, + "step": 470000 + }, + { + "epoch": 4.97, + "learning_rate": 4.751733381175006e-05, + "loss": 1.6427, + "step": 470500 + }, + { + "epoch": 4.97, + "learning_rate": 4.751469548423863e-05, + "loss": 1.6069, + "step": 471000 + }, + { + "epoch": 4.98, + "learning_rate": 4.751205715672721e-05, + "loss": 1.5632, + "step": 471500 + }, + { + "epoch": 4.98, + "learning_rate": 4.7509418829215784e-05, + "loss": 1.5718, + "step": 472000 + }, + { + "epoch": 4.99, + "learning_rate": 4.750678050170436e-05, + "loss": 1.6254, + "step": 472500 + }, + { + "epoch": 4.99, + "learning_rate": 4.750414217419294e-05, + "loss": 1.5766, + "step": 473000 + }, + { + "epoch": 5.0, + "learning_rate": 4.750150384668152e-05, + "loss": 1.6458, + "step": 473500 + }, + { + "epoch": 5.0, + "learning_rate": 4.7498865519170086e-05, + "loss": 1.6088, + "step": 474000 + }, + { + "epoch": 5.01, + "learning_rate": 4.749622719165867e-05, + "loss": 1.6618, + "step": 474500 + }, + { + "epoch": 5.01, + "learning_rate": 4.7493588864147244e-05, + "loss": 1.6301, + "step": 475000 + }, + { + "epoch": 5.02, + "learning_rate": 4.749095053663582e-05, + "loss": 1.5825, + "step": 475500 + }, + { + "epoch": 5.02, + "learning_rate": 4.7488312209124395e-05, + "loss": 1.5824, + "step": 476000 + }, + { + "epoch": 5.03, + "learning_rate": 4.748567388161297e-05, + "loss": 1.6586, + "step": 476500 + }, + { + "epoch": 5.03, + "learning_rate": 4.7483035554101546e-05, + "loss": 1.6123, + "step": 477000 + }, + { + "epoch": 5.04, + "learning_rate": 4.748039722659012e-05, + "loss": 1.6036, + "step": 477500 + }, + { + "epoch": 5.04, + "learning_rate": 4.7477758899078704e-05, + "loss": 1.6296, + "step": 478000 + }, + { + "epoch": 5.05, + "learning_rate": 4.747512057156727e-05, + "loss": 1.5868, + "step": 478500 + }, + { + "epoch": 5.06, + "learning_rate": 4.747248224405585e-05, + "loss": 1.5925, + "step": 479000 + }, + { + "epoch": 5.06, + "learning_rate": 4.746984391654442e-05, + "loss": 1.6261, + "step": 479500 + }, + { + "epoch": 5.07, + "learning_rate": 4.7467205589033005e-05, + "loss": 1.5992, + "step": 480000 + }, + { + "epoch": 5.07, + "learning_rate": 4.746456726152158e-05, + "loss": 1.6335, + "step": 480500 + }, + { + "epoch": 5.08, + "learning_rate": 4.746192893401015e-05, + "loss": 1.6818, + "step": 481000 + }, + { + "epoch": 5.08, + "learning_rate": 4.745929060649873e-05, + "loss": 1.62, + "step": 481500 + }, + { + "epoch": 5.09, + "learning_rate": 4.745665227898731e-05, + "loss": 1.6744, + "step": 482000 + }, + { + "epoch": 5.09, + "learning_rate": 4.745401395147588e-05, + "loss": 1.5865, + "step": 482500 + }, + { + "epoch": 5.1, + "learning_rate": 4.745137562396446e-05, + "loss": 1.6044, + "step": 483000 + }, + { + "epoch": 5.1, + "learning_rate": 4.7448737296453034e-05, + "loss": 1.5615, + "step": 483500 + }, + { + "epoch": 5.11, + "learning_rate": 4.744609896894161e-05, + "loss": 1.6365, + "step": 484000 + }, + { + "epoch": 5.11, + "learning_rate": 4.7443460641430185e-05, + "loss": 1.6027, + "step": 484500 + }, + { + "epoch": 5.12, + "learning_rate": 4.744082231391877e-05, + "loss": 1.5974, + "step": 485000 + }, + { + "epoch": 5.12, + "learning_rate": 4.7438183986407336e-05, + "loss": 1.6319, + "step": 485500 + }, + { + "epoch": 5.13, + "learning_rate": 4.743554565889591e-05, + "loss": 1.6239, + "step": 486000 + }, + { + "epoch": 5.13, + "learning_rate": 4.7432907331384494e-05, + "loss": 1.6054, + "step": 486500 + }, + { + "epoch": 5.14, + "learning_rate": 4.743026900387307e-05, + "loss": 1.5867, + "step": 487000 + }, + { + "epoch": 5.14, + "learning_rate": 4.7427630676361645e-05, + "loss": 1.6207, + "step": 487500 + }, + { + "epoch": 5.15, + "learning_rate": 4.742499234885022e-05, + "loss": 1.594, + "step": 488000 + }, + { + "epoch": 5.16, + "learning_rate": 4.7422354021338796e-05, + "loss": 1.5996, + "step": 488500 + }, + { + "epoch": 5.16, + "learning_rate": 4.741971569382737e-05, + "loss": 1.658, + "step": 489000 + }, + { + "epoch": 5.17, + "learning_rate": 4.7417077366315947e-05, + "loss": 1.6473, + "step": 489500 + }, + { + "epoch": 5.17, + "learning_rate": 4.741443903880453e-05, + "loss": 1.5704, + "step": 490000 + }, + { + "epoch": 5.18, + "learning_rate": 4.74118007112931e-05, + "loss": 1.6053, + "step": 490500 + }, + { + "epoch": 5.18, + "learning_rate": 4.740916238378167e-05, + "loss": 1.611, + "step": 491000 + }, + { + "epoch": 5.19, + "learning_rate": 4.740652405627025e-05, + "loss": 1.5511, + "step": 491500 + }, + { + "epoch": 5.19, + "learning_rate": 4.740388572875883e-05, + "loss": 1.555, + "step": 492000 + }, + { + "epoch": 5.2, + "learning_rate": 4.7401247401247406e-05, + "loss": 1.5561, + "step": 492500 + }, + { + "epoch": 5.2, + "learning_rate": 4.7398609073735975e-05, + "loss": 1.6389, + "step": 493000 + }, + { + "epoch": 5.21, + "learning_rate": 4.739597074622456e-05, + "loss": 1.6217, + "step": 493500 + }, + { + "epoch": 5.21, + "learning_rate": 4.739333241871313e-05, + "loss": 1.5734, + "step": 494000 + }, + { + "epoch": 5.22, + "learning_rate": 4.739069409120171e-05, + "loss": 1.6123, + "step": 494500 + }, + { + "epoch": 5.22, + "learning_rate": 4.7388055763690284e-05, + "loss": 1.6724, + "step": 495000 + }, + { + "epoch": 5.23, + "learning_rate": 4.738541743617886e-05, + "loss": 1.5765, + "step": 495500 + }, + { + "epoch": 5.23, + "learning_rate": 4.7382779108667435e-05, + "loss": 1.5895, + "step": 496000 + }, + { + "epoch": 5.24, + "learning_rate": 4.738014078115601e-05, + "loss": 1.6198, + "step": 496500 + }, + { + "epoch": 5.24, + "learning_rate": 4.737750245364459e-05, + "loss": 1.6433, + "step": 497000 + }, + { + "epoch": 5.25, + "learning_rate": 4.737486412613316e-05, + "loss": 1.5787, + "step": 497500 + }, + { + "epoch": 5.26, + "learning_rate": 4.7372225798621737e-05, + "loss": 1.6067, + "step": 498000 + }, + { + "epoch": 5.26, + "learning_rate": 4.736958747111032e-05, + "loss": 1.6044, + "step": 498500 + }, + { + "epoch": 5.27, + "learning_rate": 4.7366949143598894e-05, + "loss": 1.5846, + "step": 499000 + }, + { + "epoch": 5.27, + "learning_rate": 4.736431081608747e-05, + "loss": 1.546, + "step": 499500 + }, + { + "epoch": 5.28, + "learning_rate": 4.7361672488576045e-05, + "loss": 1.6272, + "step": 500000 + }, + { + "epoch": 5.28, + "learning_rate": 4.735903416106462e-05, + "loss": 1.5858, + "step": 500500 + }, + { + "epoch": 5.29, + "learning_rate": 4.7356395833553196e-05, + "loss": 1.5824, + "step": 501000 + }, + { + "epoch": 5.29, + "learning_rate": 4.735375750604177e-05, + "loss": 1.6254, + "step": 501500 + }, + { + "epoch": 5.3, + "learning_rate": 4.7351119178530354e-05, + "loss": 1.6031, + "step": 502000 + }, + { + "epoch": 5.3, + "learning_rate": 4.734848085101892e-05, + "loss": 1.58, + "step": 502500 + }, + { + "epoch": 5.31, + "learning_rate": 4.73458425235075e-05, + "loss": 1.6577, + "step": 503000 + }, + { + "epoch": 5.31, + "learning_rate": 4.7343204195996074e-05, + "loss": 1.6133, + "step": 503500 + }, + { + "epoch": 5.32, + "learning_rate": 4.7340565868484656e-05, + "loss": 1.5924, + "step": 504000 + }, + { + "epoch": 5.32, + "learning_rate": 4.7337927540973225e-05, + "loss": 1.6244, + "step": 504500 + }, + { + "epoch": 5.33, + "learning_rate": 4.73352892134618e-05, + "loss": 1.566, + "step": 505000 + }, + { + "epoch": 5.33, + "learning_rate": 4.733265088595038e-05, + "loss": 1.5824, + "step": 505500 + }, + { + "epoch": 5.34, + "learning_rate": 4.733001255843896e-05, + "loss": 1.6317, + "step": 506000 + }, + { + "epoch": 5.35, + "learning_rate": 4.7327374230927533e-05, + "loss": 1.6222, + "step": 506500 + }, + { + "epoch": 5.35, + "learning_rate": 4.732473590341611e-05, + "loss": 1.6029, + "step": 507000 + }, + { + "epoch": 5.36, + "learning_rate": 4.7322097575904684e-05, + "loss": 1.6218, + "step": 507500 + }, + { + "epoch": 5.36, + "learning_rate": 4.731945924839326e-05, + "loss": 1.65, + "step": 508000 + }, + { + "epoch": 5.37, + "learning_rate": 4.7316820920881835e-05, + "loss": 1.6045, + "step": 508500 + }, + { + "epoch": 5.37, + "learning_rate": 4.731418259337042e-05, + "loss": 1.6085, + "step": 509000 + }, + { + "epoch": 5.38, + "learning_rate": 4.7311544265858986e-05, + "loss": 1.5934, + "step": 509500 + }, + { + "epoch": 5.38, + "learning_rate": 4.730890593834756e-05, + "loss": 1.6569, + "step": 510000 + }, + { + "epoch": 5.39, + "learning_rate": 4.7306267610836144e-05, + "loss": 1.6161, + "step": 510500 + }, + { + "epoch": 5.39, + "learning_rate": 4.730362928332472e-05, + "loss": 1.6312, + "step": 511000 + }, + { + "epoch": 5.4, + "learning_rate": 4.7300990955813295e-05, + "loss": 1.5641, + "step": 511500 + }, + { + "epoch": 5.4, + "learning_rate": 4.729835262830187e-05, + "loss": 1.5925, + "step": 512000 + }, + { + "epoch": 5.41, + "learning_rate": 4.7295714300790446e-05, + "loss": 1.5405, + "step": 512500 + }, + { + "epoch": 5.41, + "learning_rate": 4.729307597327902e-05, + "loss": 1.5908, + "step": 513000 + }, + { + "epoch": 5.42, + "learning_rate": 4.72904376457676e-05, + "loss": 1.6157, + "step": 513500 + }, + { + "epoch": 5.42, + "learning_rate": 4.728779931825617e-05, + "loss": 1.599, + "step": 514000 + }, + { + "epoch": 5.43, + "learning_rate": 4.728516099074475e-05, + "loss": 1.6312, + "step": 514500 + }, + { + "epoch": 5.43, + "learning_rate": 4.7282522663233323e-05, + "loss": 1.5281, + "step": 515000 + }, + { + "epoch": 5.44, + "learning_rate": 4.7279884335721906e-05, + "loss": 1.6089, + "step": 515500 + }, + { + "epoch": 5.45, + "learning_rate": 4.727724600821048e-05, + "loss": 1.598, + "step": 516000 + }, + { + "epoch": 5.45, + "learning_rate": 4.727460768069905e-05, + "loss": 1.5862, + "step": 516500 + }, + { + "epoch": 5.46, + "learning_rate": 4.7271969353187625e-05, + "loss": 1.6163, + "step": 517000 + }, + { + "epoch": 5.46, + "learning_rate": 4.726933102567621e-05, + "loss": 1.5934, + "step": 517500 + }, + { + "epoch": 5.47, + "learning_rate": 4.726669269816478e-05, + "loss": 1.5813, + "step": 518000 + }, + { + "epoch": 5.47, + "learning_rate": 4.726405437065336e-05, + "loss": 1.6065, + "step": 518500 + }, + { + "epoch": 5.48, + "learning_rate": 4.7261416043141934e-05, + "loss": 1.5972, + "step": 519000 + }, + { + "epoch": 5.48, + "learning_rate": 4.725877771563051e-05, + "loss": 1.5867, + "step": 519500 + }, + { + "epoch": 5.49, + "learning_rate": 4.7256139388119085e-05, + "loss": 1.5534, + "step": 520000 + }, + { + "epoch": 5.49, + "learning_rate": 4.725350106060766e-05, + "loss": 1.5987, + "step": 520500 + }, + { + "epoch": 5.5, + "learning_rate": 4.725086273309624e-05, + "loss": 1.6481, + "step": 521000 + }, + { + "epoch": 5.5, + "learning_rate": 4.724822440558481e-05, + "loss": 1.6056, + "step": 521500 + }, + { + "epoch": 5.51, + "learning_rate": 4.724558607807339e-05, + "loss": 1.65, + "step": 522000 + }, + { + "epoch": 5.51, + "learning_rate": 4.724294775056197e-05, + "loss": 1.6141, + "step": 522500 + }, + { + "epoch": 5.52, + "learning_rate": 4.7240309423050545e-05, + "loss": 1.6122, + "step": 523000 + }, + { + "epoch": 5.52, + "learning_rate": 4.7237671095539114e-05, + "loss": 1.5758, + "step": 523500 + }, + { + "epoch": 5.53, + "learning_rate": 4.7235032768027696e-05, + "loss": 1.6405, + "step": 524000 + }, + { + "epoch": 5.54, + "learning_rate": 4.723239444051627e-05, + "loss": 1.6152, + "step": 524500 + }, + { + "epoch": 5.54, + "learning_rate": 4.722975611300485e-05, + "loss": 1.6285, + "step": 525000 + }, + { + "epoch": 5.55, + "learning_rate": 4.722711778549342e-05, + "loss": 1.5927, + "step": 525500 + }, + { + "epoch": 5.55, + "learning_rate": 4.7224479457982e-05, + "loss": 1.6235, + "step": 526000 + }, + { + "epoch": 5.56, + "learning_rate": 4.722184113047057e-05, + "loss": 1.6307, + "step": 526500 + }, + { + "epoch": 5.56, + "learning_rate": 4.721920280295915e-05, + "loss": 1.5789, + "step": 527000 + }, + { + "epoch": 5.57, + "learning_rate": 4.721656447544773e-05, + "loss": 1.6399, + "step": 527500 + }, + { + "epoch": 5.57, + "learning_rate": 4.7213926147936306e-05, + "loss": 1.5424, + "step": 528000 + }, + { + "epoch": 5.58, + "learning_rate": 4.7211287820424875e-05, + "loss": 1.6026, + "step": 528500 + }, + { + "epoch": 5.58, + "learning_rate": 4.720864949291345e-05, + "loss": 1.6079, + "step": 529000 + }, + { + "epoch": 5.59, + "learning_rate": 4.720601116540203e-05, + "loss": 1.5942, + "step": 529500 + }, + { + "epoch": 5.59, + "learning_rate": 4.720337283789061e-05, + "loss": 1.5539, + "step": 530000 + }, + { + "epoch": 5.6, + "learning_rate": 4.7200734510379184e-05, + "loss": 1.5898, + "step": 530500 + }, + { + "epoch": 5.6, + "learning_rate": 4.719809618286776e-05, + "loss": 1.5718, + "step": 531000 + }, + { + "epoch": 5.61, + "learning_rate": 4.7195457855356335e-05, + "loss": 1.6116, + "step": 531500 + }, + { + "epoch": 5.61, + "learning_rate": 4.719281952784491e-05, + "loss": 1.5543, + "step": 532000 + }, + { + "epoch": 5.62, + "learning_rate": 4.7190181200333486e-05, + "loss": 1.5653, + "step": 532500 + }, + { + "epoch": 5.62, + "learning_rate": 4.718754287282206e-05, + "loss": 1.595, + "step": 533000 + }, + { + "epoch": 5.63, + "learning_rate": 4.718490454531064e-05, + "loss": 1.6734, + "step": 533500 + }, + { + "epoch": 5.64, + "learning_rate": 4.718226621779921e-05, + "loss": 1.6143, + "step": 534000 + }, + { + "epoch": 5.64, + "learning_rate": 4.7179627890287795e-05, + "loss": 1.6051, + "step": 534500 + }, + { + "epoch": 5.65, + "learning_rate": 4.717698956277637e-05, + "loss": 1.597, + "step": 535000 + }, + { + "epoch": 5.65, + "learning_rate": 4.717435123526494e-05, + "loss": 1.5881, + "step": 535500 + }, + { + "epoch": 5.66, + "learning_rate": 4.717171290775352e-05, + "loss": 1.6141, + "step": 536000 + }, + { + "epoch": 5.66, + "learning_rate": 4.7169074580242097e-05, + "loss": 1.6107, + "step": 536500 + }, + { + "epoch": 5.67, + "learning_rate": 4.716643625273067e-05, + "loss": 1.6004, + "step": 537000 + }, + { + "epoch": 5.67, + "learning_rate": 4.716379792521925e-05, + "loss": 1.5663, + "step": 537500 + }, + { + "epoch": 5.68, + "learning_rate": 4.716115959770782e-05, + "loss": 1.6131, + "step": 538000 + }, + { + "epoch": 5.68, + "learning_rate": 4.71585212701964e-05, + "loss": 1.6065, + "step": 538500 + }, + { + "epoch": 5.69, + "learning_rate": 4.7155882942684974e-05, + "loss": 1.5881, + "step": 539000 + }, + { + "epoch": 5.69, + "learning_rate": 4.7153244615173556e-05, + "loss": 1.6044, + "step": 539500 + }, + { + "epoch": 5.7, + "learning_rate": 4.715060628766213e-05, + "loss": 1.6107, + "step": 540000 + }, + { + "epoch": 5.7, + "learning_rate": 4.71479679601507e-05, + "loss": 1.5912, + "step": 540500 + }, + { + "epoch": 5.71, + "learning_rate": 4.7145329632639276e-05, + "loss": 1.6306, + "step": 541000 + }, + { + "epoch": 5.71, + "learning_rate": 4.714269130512786e-05, + "loss": 1.6232, + "step": 541500 + }, + { + "epoch": 5.72, + "learning_rate": 4.7140052977616434e-05, + "loss": 1.6587, + "step": 542000 + }, + { + "epoch": 5.73, + "learning_rate": 4.7137414650105e-05, + "loss": 1.5613, + "step": 542500 + }, + { + "epoch": 5.73, + "learning_rate": 4.7134776322593585e-05, + "loss": 1.6087, + "step": 543000 + }, + { + "epoch": 5.74, + "learning_rate": 4.713213799508216e-05, + "loss": 1.6141, + "step": 543500 + }, + { + "epoch": 5.74, + "learning_rate": 4.7129499667570736e-05, + "loss": 1.5854, + "step": 544000 + }, + { + "epoch": 5.75, + "learning_rate": 4.712686134005931e-05, + "loss": 1.6353, + "step": 544500 + }, + { + "epoch": 5.75, + "learning_rate": 4.7124223012547887e-05, + "loss": 1.6345, + "step": 545000 + }, + { + "epoch": 5.76, + "learning_rate": 4.712158468503646e-05, + "loss": 1.5875, + "step": 545500 + }, + { + "epoch": 5.76, + "learning_rate": 4.711894635752504e-05, + "loss": 1.5819, + "step": 546000 + }, + { + "epoch": 5.77, + "learning_rate": 4.711630803001362e-05, + "loss": 1.5707, + "step": 546500 + }, + { + "epoch": 5.77, + "learning_rate": 4.7113669702502195e-05, + "loss": 1.6411, + "step": 547000 + }, + { + "epoch": 5.78, + "learning_rate": 4.7111031374990764e-05, + "loss": 1.5361, + "step": 547500 + }, + { + "epoch": 5.78, + "learning_rate": 4.7108393047479346e-05, + "loss": 1.6249, + "step": 548000 + }, + { + "epoch": 5.79, + "learning_rate": 4.710575471996792e-05, + "loss": 1.5848, + "step": 548500 + }, + { + "epoch": 5.79, + "learning_rate": 4.71031163924565e-05, + "loss": 1.5607, + "step": 549000 + }, + { + "epoch": 5.8, + "learning_rate": 4.710047806494507e-05, + "loss": 1.5965, + "step": 549500 + }, + { + "epoch": 5.8, + "learning_rate": 4.709783973743365e-05, + "loss": 1.5437, + "step": 550000 + }, + { + "epoch": 5.81, + "learning_rate": 4.7095201409922224e-05, + "loss": 1.6159, + "step": 550500 + }, + { + "epoch": 5.81, + "learning_rate": 4.70925630824108e-05, + "loss": 1.561, + "step": 551000 + }, + { + "epoch": 5.82, + "learning_rate": 4.708992475489938e-05, + "loss": 1.5703, + "step": 551500 + }, + { + "epoch": 5.83, + "learning_rate": 4.708728642738795e-05, + "loss": 1.5424, + "step": 552000 + }, + { + "epoch": 5.83, + "learning_rate": 4.7084648099876526e-05, + "loss": 1.5926, + "step": 552500 + }, + { + "epoch": 5.84, + "learning_rate": 4.70820097723651e-05, + "loss": 1.5715, + "step": 553000 + }, + { + "epoch": 5.84, + "learning_rate": 4.7079371444853683e-05, + "loss": 1.6255, + "step": 553500 + }, + { + "epoch": 5.85, + "learning_rate": 4.707673311734226e-05, + "loss": 1.55, + "step": 554000 + }, + { + "epoch": 5.85, + "learning_rate": 4.707409478983083e-05, + "loss": 1.5837, + "step": 554500 + }, + { + "epoch": 5.86, + "learning_rate": 4.707145646231941e-05, + "loss": 1.5679, + "step": 555000 + }, + { + "epoch": 5.86, + "learning_rate": 4.7068818134807985e-05, + "loss": 1.5952, + "step": 555500 + }, + { + "epoch": 5.87, + "learning_rate": 4.706617980729656e-05, + "loss": 1.6066, + "step": 556000 + }, + { + "epoch": 5.87, + "learning_rate": 4.7063541479785136e-05, + "loss": 1.6008, + "step": 556500 + }, + { + "epoch": 5.88, + "learning_rate": 4.706090315227371e-05, + "loss": 1.5693, + "step": 557000 + }, + { + "epoch": 5.88, + "learning_rate": 4.705826482476229e-05, + "loss": 1.6092, + "step": 557500 + }, + { + "epoch": 5.89, + "learning_rate": 4.705562649725086e-05, + "loss": 1.6026, + "step": 558000 + }, + { + "epoch": 5.89, + "learning_rate": 4.7052988169739445e-05, + "loss": 1.5661, + "step": 558500 + }, + { + "epoch": 5.9, + "learning_rate": 4.705034984222802e-05, + "loss": 1.5723, + "step": 559000 + }, + { + "epoch": 5.9, + "learning_rate": 4.704771151471659e-05, + "loss": 1.5804, + "step": 559500 + }, + { + "epoch": 5.91, + "learning_rate": 4.704507318720517e-05, + "loss": 1.6379, + "step": 560000 + }, + { + "epoch": 5.92, + "learning_rate": 4.704243485969375e-05, + "loss": 1.5815, + "step": 560500 + }, + { + "epoch": 5.92, + "learning_rate": 4.703979653218232e-05, + "loss": 1.5482, + "step": 561000 + }, + { + "epoch": 5.93, + "learning_rate": 4.70371582046709e-05, + "loss": 1.5436, + "step": 561500 + }, + { + "epoch": 5.93, + "learning_rate": 4.7034519877159473e-05, + "loss": 1.5782, + "step": 562000 + }, + { + "epoch": 5.94, + "learning_rate": 4.703188154964805e-05, + "loss": 1.6076, + "step": 562500 + }, + { + "epoch": 5.94, + "learning_rate": 4.7029243222136624e-05, + "loss": 1.5135, + "step": 563000 + }, + { + "epoch": 5.95, + "learning_rate": 4.702660489462521e-05, + "loss": 1.571, + "step": 563500 + }, + { + "epoch": 5.95, + "learning_rate": 4.7023966567113775e-05, + "loss": 1.597, + "step": 564000 + }, + { + "epoch": 5.96, + "learning_rate": 4.702132823960235e-05, + "loss": 1.5486, + "step": 564500 + }, + { + "epoch": 5.96, + "learning_rate": 4.7018689912090926e-05, + "loss": 1.5737, + "step": 565000 + }, + { + "epoch": 5.97, + "learning_rate": 4.701605158457951e-05, + "loss": 1.6054, + "step": 565500 + }, + { + "epoch": 5.97, + "learning_rate": 4.7013413257068084e-05, + "loss": 1.613, + "step": 566000 + }, + { + "epoch": 5.98, + "learning_rate": 4.701077492955665e-05, + "loss": 1.5358, + "step": 566500 + }, + { + "epoch": 5.98, + "learning_rate": 4.7008136602045235e-05, + "loss": 1.555, + "step": 567000 + }, + { + "epoch": 5.99, + "learning_rate": 4.700549827453381e-05, + "loss": 1.5899, + "step": 567500 + }, + { + "epoch": 5.99, + "learning_rate": 4.7002859947022386e-05, + "loss": 1.5689, + "step": 568000 + }, + { + "epoch": 6.0, + "learning_rate": 4.700022161951096e-05, + "loss": 1.5863, + "step": 568500 + }, + { + "epoch": 6.0, + "learning_rate": 4.699758329199954e-05, + "loss": 1.5649, + "step": 569000 + }, + { + "epoch": 6.01, + "learning_rate": 4.699494496448811e-05, + "loss": 1.5337, + "step": 569500 + }, + { + "epoch": 6.02, + "learning_rate": 4.699230663697669e-05, + "loss": 1.5289, + "step": 570000 + }, + { + "epoch": 6.02, + "learning_rate": 4.698966830946527e-05, + "loss": 1.5854, + "step": 570500 + }, + { + "epoch": 6.03, + "learning_rate": 4.698702998195384e-05, + "loss": 1.5638, + "step": 571000 + }, + { + "epoch": 6.03, + "learning_rate": 4.6984391654442415e-05, + "loss": 1.5806, + "step": 571500 + }, + { + "epoch": 6.04, + "learning_rate": 4.6981753326931e-05, + "loss": 1.5922, + "step": 572000 + }, + { + "epoch": 6.04, + "learning_rate": 4.697911499941957e-05, + "loss": 1.5267, + "step": 572500 + }, + { + "epoch": 6.05, + "learning_rate": 4.697647667190815e-05, + "loss": 1.5361, + "step": 573000 + }, + { + "epoch": 6.05, + "learning_rate": 4.697383834439672e-05, + "loss": 1.5375, + "step": 573500 + }, + { + "epoch": 6.06, + "learning_rate": 4.69712000168853e-05, + "loss": 1.5971, + "step": 574000 + }, + { + "epoch": 6.06, + "learning_rate": 4.6968561689373874e-05, + "loss": 1.5501, + "step": 574500 + }, + { + "epoch": 6.07, + "learning_rate": 4.696592336186245e-05, + "loss": 1.6159, + "step": 575000 + }, + { + "epoch": 6.07, + "learning_rate": 4.696328503435103e-05, + "loss": 1.5343, + "step": 575500 + }, + { + "epoch": 6.08, + "learning_rate": 4.69606467068396e-05, + "loss": 1.5853, + "step": 576000 + }, + { + "epoch": 6.08, + "learning_rate": 4.6958008379328176e-05, + "loss": 1.5436, + "step": 576500 + }, + { + "epoch": 6.09, + "learning_rate": 4.695537005181675e-05, + "loss": 1.5941, + "step": 577000 + }, + { + "epoch": 6.09, + "learning_rate": 4.6952731724305334e-05, + "loss": 1.5695, + "step": 577500 + }, + { + "epoch": 6.1, + "learning_rate": 4.695009339679391e-05, + "loss": 1.5933, + "step": 578000 + }, + { + "epoch": 6.11, + "learning_rate": 4.694745506928248e-05, + "loss": 1.6093, + "step": 578500 + }, + { + "epoch": 6.11, + "learning_rate": 4.694481674177106e-05, + "loss": 1.5673, + "step": 579000 + }, + { + "epoch": 6.12, + "learning_rate": 4.6942178414259636e-05, + "loss": 1.5577, + "step": 579500 + }, + { + "epoch": 6.12, + "learning_rate": 4.693954008674821e-05, + "loss": 1.5665, + "step": 580000 + }, + { + "epoch": 6.13, + "learning_rate": 4.693690175923679e-05, + "loss": 1.5725, + "step": 580500 + }, + { + "epoch": 6.13, + "learning_rate": 4.693426343172536e-05, + "loss": 1.5562, + "step": 581000 + }, + { + "epoch": 6.14, + "learning_rate": 4.693162510421394e-05, + "loss": 1.546, + "step": 581500 + }, + { + "epoch": 6.14, + "learning_rate": 4.692898677670251e-05, + "loss": 1.5711, + "step": 582000 + }, + { + "epoch": 6.15, + "learning_rate": 4.6926348449191096e-05, + "loss": 1.5826, + "step": 582500 + }, + { + "epoch": 6.15, + "learning_rate": 4.6923710121679664e-05, + "loss": 1.5373, + "step": 583000 + }, + { + "epoch": 6.16, + "learning_rate": 4.692107179416824e-05, + "loss": 1.5748, + "step": 583500 + }, + { + "epoch": 6.16, + "learning_rate": 4.691843346665682e-05, + "loss": 1.5308, + "step": 584000 + }, + { + "epoch": 6.17, + "learning_rate": 4.69157951391454e-05, + "loss": 1.6058, + "step": 584500 + }, + { + "epoch": 6.17, + "learning_rate": 4.691315681163397e-05, + "loss": 1.5243, + "step": 585000 + }, + { + "epoch": 6.18, + "learning_rate": 4.691051848412255e-05, + "loss": 1.504, + "step": 585500 + }, + { + "epoch": 6.18, + "learning_rate": 4.6907880156611124e-05, + "loss": 1.5225, + "step": 586000 + }, + { + "epoch": 6.19, + "learning_rate": 4.69052418290997e-05, + "loss": 1.5264, + "step": 586500 + }, + { + "epoch": 6.19, + "learning_rate": 4.6902603501588275e-05, + "loss": 1.5486, + "step": 587000 + }, + { + "epoch": 6.2, + "learning_rate": 4.689996517407686e-05, + "loss": 1.5715, + "step": 587500 + }, + { + "epoch": 6.21, + "learning_rate": 4.6897326846565426e-05, + "loss": 1.5754, + "step": 588000 + }, + { + "epoch": 6.21, + "learning_rate": 4.6894688519054e-05, + "loss": 1.5968, + "step": 588500 + }, + { + "epoch": 6.22, + "learning_rate": 4.689205019154258e-05, + "loss": 1.5428, + "step": 589000 + }, + { + "epoch": 6.22, + "learning_rate": 4.688941186403116e-05, + "loss": 1.5625, + "step": 589500 + }, + { + "epoch": 6.23, + "learning_rate": 4.688677353651973e-05, + "loss": 1.5252, + "step": 590000 + }, + { + "epoch": 6.23, + "learning_rate": 4.68841352090083e-05, + "loss": 1.5933, + "step": 590500 + }, + { + "epoch": 6.24, + "learning_rate": 4.6881496881496886e-05, + "loss": 1.5401, + "step": 591000 + }, + { + "epoch": 6.24, + "learning_rate": 4.687885855398546e-05, + "loss": 1.5656, + "step": 591500 + }, + { + "epoch": 6.25, + "learning_rate": 4.6876220226474037e-05, + "loss": 1.6269, + "step": 592000 + }, + { + "epoch": 6.25, + "learning_rate": 4.687358189896261e-05, + "loss": 1.5654, + "step": 592500 + }, + { + "epoch": 6.26, + "learning_rate": 4.687094357145119e-05, + "loss": 1.5573, + "step": 593000 + }, + { + "epoch": 6.26, + "learning_rate": 4.686830524393976e-05, + "loss": 1.5655, + "step": 593500 + }, + { + "epoch": 6.27, + "learning_rate": 4.686566691642834e-05, + "loss": 1.5632, + "step": 594000 + }, + { + "epoch": 6.27, + "learning_rate": 4.686302858891692e-05, + "loss": 1.609, + "step": 594500 + }, + { + "epoch": 6.28, + "learning_rate": 4.686039026140549e-05, + "loss": 1.548, + "step": 595000 + }, + { + "epoch": 6.28, + "learning_rate": 4.6857751933894065e-05, + "loss": 1.5888, + "step": 595500 + }, + { + "epoch": 6.29, + "learning_rate": 4.685511360638265e-05, + "loss": 1.5434, + "step": 596000 + }, + { + "epoch": 6.3, + "learning_rate": 4.685247527887122e-05, + "loss": 1.5855, + "step": 596500 + }, + { + "epoch": 6.3, + "learning_rate": 4.68498369513598e-05, + "loss": 1.6229, + "step": 597000 + }, + { + "epoch": 6.31, + "learning_rate": 4.6847198623848374e-05, + "loss": 1.5858, + "step": 597500 + }, + { + "epoch": 6.31, + "learning_rate": 4.684456029633695e-05, + "loss": 1.5795, + "step": 598000 + }, + { + "epoch": 6.32, + "learning_rate": 4.6841921968825525e-05, + "loss": 1.5646, + "step": 598500 + }, + { + "epoch": 6.32, + "learning_rate": 4.68392836413141e-05, + "loss": 1.5194, + "step": 599000 + }, + { + "epoch": 6.33, + "learning_rate": 4.6836645313802676e-05, + "loss": 1.5501, + "step": 599500 + }, + { + "epoch": 6.33, + "learning_rate": 4.683400698629125e-05, + "loss": 1.5627, + "step": 600000 + }, + { + "epoch": 6.34, + "learning_rate": 4.683136865877983e-05, + "loss": 1.5277, + "step": 600500 + }, + { + "epoch": 6.34, + "learning_rate": 4.682873033126841e-05, + "loss": 1.6273, + "step": 601000 + }, + { + "epoch": 6.35, + "learning_rate": 4.6826092003756984e-05, + "loss": 1.6298, + "step": 601500 + }, + { + "epoch": 6.35, + "learning_rate": 4.682345367624555e-05, + "loss": 1.5655, + "step": 602000 + }, + { + "epoch": 6.36, + "learning_rate": 4.682081534873413e-05, + "loss": 1.583, + "step": 602500 + }, + { + "epoch": 6.36, + "learning_rate": 4.681817702122271e-05, + "loss": 1.5916, + "step": 603000 + }, + { + "epoch": 6.37, + "learning_rate": 4.6815538693711286e-05, + "loss": 1.5802, + "step": 603500 + }, + { + "epoch": 6.37, + "learning_rate": 4.681290036619986e-05, + "loss": 1.592, + "step": 604000 + }, + { + "epoch": 6.38, + "learning_rate": 4.681026203868844e-05, + "loss": 1.5806, + "step": 604500 + }, + { + "epoch": 6.38, + "learning_rate": 4.680762371117701e-05, + "loss": 1.5353, + "step": 605000 + }, + { + "epoch": 6.39, + "learning_rate": 4.680498538366559e-05, + "loss": 1.6011, + "step": 605500 + }, + { + "epoch": 6.4, + "learning_rate": 4.6802347056154164e-05, + "loss": 1.6063, + "step": 606000 + }, + { + "epoch": 6.4, + "learning_rate": 4.6799708728642746e-05, + "loss": 1.565, + "step": 606500 + }, + { + "epoch": 6.41, + "learning_rate": 4.6797070401131315e-05, + "loss": 1.5239, + "step": 607000 + }, + { + "epoch": 6.41, + "learning_rate": 4.679443207361989e-05, + "loss": 1.5482, + "step": 607500 + }, + { + "epoch": 6.42, + "learning_rate": 4.679179374610847e-05, + "loss": 1.5685, + "step": 608000 + }, + { + "epoch": 6.42, + "learning_rate": 4.678915541859705e-05, + "loss": 1.6324, + "step": 608500 + }, + { + "epoch": 6.43, + "learning_rate": 4.678651709108562e-05, + "loss": 1.6039, + "step": 609000 + }, + { + "epoch": 6.43, + "learning_rate": 4.67838787635742e-05, + "loss": 1.579, + "step": 609500 + }, + { + "epoch": 6.44, + "learning_rate": 4.6781240436062774e-05, + "loss": 1.5673, + "step": 610000 + }, + { + "epoch": 6.44, + "learning_rate": 4.677860210855135e-05, + "loss": 1.5833, + "step": 610500 + }, + { + "epoch": 6.45, + "learning_rate": 4.6775963781039925e-05, + "loss": 1.5358, + "step": 611000 + }, + { + "epoch": 6.45, + "learning_rate": 4.67733254535285e-05, + "loss": 1.5704, + "step": 611500 + }, + { + "epoch": 6.46, + "learning_rate": 4.6770687126017076e-05, + "loss": 1.5536, + "step": 612000 + }, + { + "epoch": 6.46, + "learning_rate": 4.676804879850565e-05, + "loss": 1.5397, + "step": 612500 + }, + { + "epoch": 6.47, + "learning_rate": 4.6765410470994234e-05, + "loss": 1.5453, + "step": 613000 + }, + { + "epoch": 6.47, + "learning_rate": 4.676277214348281e-05, + "loss": 1.542, + "step": 613500 + }, + { + "epoch": 6.48, + "learning_rate": 4.676013381597138e-05, + "loss": 1.5655, + "step": 614000 + }, + { + "epoch": 6.49, + "learning_rate": 4.6757495488459954e-05, + "loss": 1.5586, + "step": 614500 + }, + { + "epoch": 6.49, + "learning_rate": 4.6754857160948536e-05, + "loss": 1.5729, + "step": 615000 + }, + { + "epoch": 6.5, + "learning_rate": 4.675221883343711e-05, + "loss": 1.5618, + "step": 615500 + }, + { + "epoch": 6.5, + "learning_rate": 4.674958050592568e-05, + "loss": 1.5798, + "step": 616000 + }, + { + "epoch": 6.51, + "learning_rate": 4.674694217841426e-05, + "loss": 1.5853, + "step": 616500 + }, + { + "epoch": 6.51, + "learning_rate": 4.674430385090284e-05, + "loss": 1.5419, + "step": 617000 + }, + { + "epoch": 6.52, + "learning_rate": 4.6741665523391414e-05, + "loss": 1.527, + "step": 617500 + }, + { + "epoch": 6.52, + "learning_rate": 4.673902719587999e-05, + "loss": 1.5385, + "step": 618000 + }, + { + "epoch": 6.53, + "learning_rate": 4.6736388868368565e-05, + "loss": 1.621, + "step": 618500 + }, + { + "epoch": 6.53, + "learning_rate": 4.673375054085714e-05, + "loss": 1.5795, + "step": 619000 + }, + { + "epoch": 6.54, + "learning_rate": 4.6731112213345715e-05, + "loss": 1.5566, + "step": 619500 + }, + { + "epoch": 6.54, + "learning_rate": 4.67284738858343e-05, + "loss": 1.5682, + "step": 620000 + }, + { + "epoch": 6.55, + "learning_rate": 4.672583555832287e-05, + "loss": 1.6056, + "step": 620500 + }, + { + "epoch": 6.55, + "learning_rate": 4.672319723081144e-05, + "loss": 1.5953, + "step": 621000 + }, + { + "epoch": 6.56, + "learning_rate": 4.6720558903300024e-05, + "loss": 1.5685, + "step": 621500 + }, + { + "epoch": 6.56, + "learning_rate": 4.67179205757886e-05, + "loss": 1.5738, + "step": 622000 + }, + { + "epoch": 6.57, + "learning_rate": 4.6715282248277175e-05, + "loss": 1.5062, + "step": 622500 + }, + { + "epoch": 6.57, + "learning_rate": 4.671264392076575e-05, + "loss": 1.5813, + "step": 623000 + }, + { + "epoch": 6.58, + "learning_rate": 4.6710005593254326e-05, + "loss": 1.5521, + "step": 623500 + }, + { + "epoch": 6.59, + "learning_rate": 4.67073672657429e-05, + "loss": 1.5735, + "step": 624000 + }, + { + "epoch": 6.59, + "learning_rate": 4.670472893823148e-05, + "loss": 1.5525, + "step": 624500 + }, + { + "epoch": 6.6, + "learning_rate": 4.670209061072006e-05, + "loss": 1.5548, + "step": 625000 + }, + { + "epoch": 6.6, + "learning_rate": 4.669945228320863e-05, + "loss": 1.6289, + "step": 625500 + }, + { + "epoch": 6.61, + "learning_rate": 4.6696813955697204e-05, + "loss": 1.5233, + "step": 626000 + }, + { + "epoch": 6.61, + "learning_rate": 4.669417562818578e-05, + "loss": 1.5918, + "step": 626500 + }, + { + "epoch": 6.62, + "learning_rate": 4.669153730067436e-05, + "loss": 1.566, + "step": 627000 + }, + { + "epoch": 6.62, + "learning_rate": 4.668889897316294e-05, + "loss": 1.58, + "step": 627500 + }, + { + "epoch": 6.63, + "learning_rate": 4.6686260645651506e-05, + "loss": 1.5543, + "step": 628000 + }, + { + "epoch": 6.63, + "learning_rate": 4.668362231814009e-05, + "loss": 1.5612, + "step": 628500 + }, + { + "epoch": 6.64, + "learning_rate": 4.668098399062866e-05, + "loss": 1.5311, + "step": 629000 + }, + { + "epoch": 6.64, + "learning_rate": 4.667834566311724e-05, + "loss": 1.5294, + "step": 629500 + }, + { + "epoch": 6.65, + "learning_rate": 4.6675707335605814e-05, + "loss": 1.5237, + "step": 630000 + }, + { + "epoch": 6.65, + "learning_rate": 4.667306900809439e-05, + "loss": 1.6066, + "step": 630500 + }, + { + "epoch": 6.66, + "learning_rate": 4.6670430680582965e-05, + "loss": 1.5881, + "step": 631000 + }, + { + "epoch": 6.66, + "learning_rate": 4.666779235307154e-05, + "loss": 1.507, + "step": 631500 + }, + { + "epoch": 6.67, + "learning_rate": 4.666515402556012e-05, + "loss": 1.5388, + "step": 632000 + }, + { + "epoch": 6.67, + "learning_rate": 4.66625156980487e-05, + "loss": 1.535, + "step": 632500 + }, + { + "epoch": 6.68, + "learning_rate": 4.665987737053727e-05, + "loss": 1.5537, + "step": 633000 + }, + { + "epoch": 6.69, + "learning_rate": 4.665723904302585e-05, + "loss": 1.5126, + "step": 633500 + }, + { + "epoch": 6.69, + "learning_rate": 4.6654600715514425e-05, + "loss": 1.5695, + "step": 634000 + }, + { + "epoch": 6.7, + "learning_rate": 4.6651962388003e-05, + "loss": 1.4985, + "step": 634500 + }, + { + "epoch": 6.7, + "learning_rate": 4.6649324060491576e-05, + "loss": 1.5456, + "step": 635000 + }, + { + "epoch": 6.71, + "learning_rate": 4.664668573298015e-05, + "loss": 1.5373, + "step": 635500 + }, + { + "epoch": 6.71, + "learning_rate": 4.664404740546873e-05, + "loss": 1.5359, + "step": 636000 + }, + { + "epoch": 6.72, + "learning_rate": 4.66414090779573e-05, + "loss": 1.533, + "step": 636500 + }, + { + "epoch": 6.72, + "learning_rate": 4.6638770750445885e-05, + "loss": 1.5453, + "step": 637000 + }, + { + "epoch": 6.73, + "learning_rate": 4.663613242293445e-05, + "loss": 1.5355, + "step": 637500 + }, + { + "epoch": 6.73, + "learning_rate": 4.663349409542303e-05, + "loss": 1.5728, + "step": 638000 + }, + { + "epoch": 6.74, + "learning_rate": 4.6630855767911604e-05, + "loss": 1.6012, + "step": 638500 + }, + { + "epoch": 6.74, + "learning_rate": 4.6628217440400187e-05, + "loss": 1.5804, + "step": 639000 + }, + { + "epoch": 6.75, + "learning_rate": 4.662557911288876e-05, + "loss": 1.6098, + "step": 639500 + }, + { + "epoch": 6.75, + "learning_rate": 4.662294078537733e-05, + "loss": 1.5671, + "step": 640000 + }, + { + "epoch": 6.76, + "learning_rate": 4.662030245786591e-05, + "loss": 1.5756, + "step": 640500 + }, + { + "epoch": 6.76, + "learning_rate": 4.661766413035449e-05, + "loss": 1.5261, + "step": 641000 + }, + { + "epoch": 6.77, + "learning_rate": 4.6615025802843064e-05, + "loss": 1.5968, + "step": 641500 + }, + { + "epoch": 6.78, + "learning_rate": 4.661238747533164e-05, + "loss": 1.5959, + "step": 642000 + }, + { + "epoch": 6.78, + "learning_rate": 4.6609749147820215e-05, + "loss": 1.5485, + "step": 642500 + }, + { + "epoch": 6.79, + "learning_rate": 4.660711082030879e-05, + "loss": 1.4645, + "step": 643000 + }, + { + "epoch": 6.79, + "learning_rate": 4.6604472492797366e-05, + "loss": 1.6253, + "step": 643500 + }, + { + "epoch": 6.8, + "learning_rate": 4.660183416528595e-05, + "loss": 1.542, + "step": 644000 + }, + { + "epoch": 6.8, + "learning_rate": 4.659919583777452e-05, + "loss": 1.5557, + "step": 644500 + }, + { + "epoch": 6.81, + "learning_rate": 4.659655751026309e-05, + "loss": 1.5647, + "step": 645000 + }, + { + "epoch": 6.81, + "learning_rate": 4.6593919182751675e-05, + "loss": 1.5418, + "step": 645500 + }, + { + "epoch": 6.82, + "learning_rate": 4.659128085524025e-05, + "loss": 1.5518, + "step": 646000 + }, + { + "epoch": 6.82, + "learning_rate": 4.6588642527728826e-05, + "loss": 1.5586, + "step": 646500 + }, + { + "epoch": 6.83, + "learning_rate": 4.65860042002174e-05, + "loss": 1.5456, + "step": 647000 + }, + { + "epoch": 6.83, + "learning_rate": 4.658336587270598e-05, + "loss": 1.5916, + "step": 647500 + }, + { + "epoch": 6.84, + "learning_rate": 4.658072754519455e-05, + "loss": 1.5603, + "step": 648000 + }, + { + "epoch": 6.84, + "learning_rate": 4.657808921768313e-05, + "loss": 1.553, + "step": 648500 + }, + { + "epoch": 6.85, + "learning_rate": 4.657545089017171e-05, + "loss": 1.5874, + "step": 649000 + }, + { + "epoch": 6.85, + "learning_rate": 4.657281256266028e-05, + "loss": 1.5526, + "step": 649500 + }, + { + "epoch": 6.86, + "learning_rate": 4.6570174235148854e-05, + "loss": 1.5855, + "step": 650000 + }, + { + "epoch": 6.86, + "learning_rate": 4.656753590763743e-05, + "loss": 1.5749, + "step": 650500 + }, + { + "epoch": 6.87, + "learning_rate": 4.656489758012601e-05, + "loss": 1.5072, + "step": 651000 + }, + { + "epoch": 6.88, + "learning_rate": 4.656225925261459e-05, + "loss": 1.549, + "step": 651500 + }, + { + "epoch": 6.88, + "learning_rate": 4.6559620925103156e-05, + "loss": 1.5683, + "step": 652000 + }, + { + "epoch": 6.89, + "learning_rate": 4.655698259759174e-05, + "loss": 1.6031, + "step": 652500 + }, + { + "epoch": 6.89, + "learning_rate": 4.6554344270080314e-05, + "loss": 1.584, + "step": 653000 + }, + { + "epoch": 6.9, + "learning_rate": 4.655170594256889e-05, + "loss": 1.6017, + "step": 653500 + }, + { + "epoch": 6.9, + "learning_rate": 4.6549067615057465e-05, + "loss": 1.5826, + "step": 654000 + }, + { + "epoch": 6.91, + "learning_rate": 4.654642928754604e-05, + "loss": 1.5626, + "step": 654500 + }, + { + "epoch": 6.91, + "learning_rate": 4.6543790960034616e-05, + "loss": 1.5541, + "step": 655000 + }, + { + "epoch": 6.92, + "learning_rate": 4.654115263252319e-05, + "loss": 1.5948, + "step": 655500 + }, + { + "epoch": 6.92, + "learning_rate": 4.6538514305011773e-05, + "loss": 1.5998, + "step": 656000 + }, + { + "epoch": 6.93, + "learning_rate": 4.653587597750034e-05, + "loss": 1.5461, + "step": 656500 + }, + { + "epoch": 6.93, + "learning_rate": 4.653323764998892e-05, + "loss": 1.5436, + "step": 657000 + }, + { + "epoch": 6.94, + "learning_rate": 4.65305993224775e-05, + "loss": 1.5417, + "step": 657500 + }, + { + "epoch": 6.94, + "learning_rate": 4.6527960994966075e-05, + "loss": 1.5153, + "step": 658000 + }, + { + "epoch": 6.95, + "learning_rate": 4.652532266745465e-05, + "loss": 1.5491, + "step": 658500 + }, + { + "epoch": 6.95, + "learning_rate": 4.6522684339943226e-05, + "loss": 1.5606, + "step": 659000 + }, + { + "epoch": 6.96, + "learning_rate": 4.65200460124318e-05, + "loss": 1.5137, + "step": 659500 + }, + { + "epoch": 6.97, + "learning_rate": 4.651740768492038e-05, + "loss": 1.5071, + "step": 660000 + }, + { + "epoch": 6.97, + "learning_rate": 4.651476935740895e-05, + "loss": 1.5475, + "step": 660500 + }, + { + "epoch": 6.98, + "learning_rate": 4.6512131029897535e-05, + "loss": 1.6058, + "step": 661000 + }, + { + "epoch": 6.98, + "learning_rate": 4.6509492702386104e-05, + "loss": 1.5317, + "step": 661500 + }, + { + "epoch": 6.99, + "learning_rate": 4.650685437487468e-05, + "loss": 1.5655, + "step": 662000 + }, + { + "epoch": 6.99, + "learning_rate": 4.6504216047363255e-05, + "loss": 1.5177, + "step": 662500 + }, + { + "epoch": 7.0, + "learning_rate": 4.650157771985184e-05, + "loss": 1.5608, + "step": 663000 + }, + { + "epoch": 7.0, + "learning_rate": 4.6498939392340406e-05, + "loss": 1.5513, + "step": 663500 + }, + { + "epoch": 7.01, + "learning_rate": 4.649630106482898e-05, + "loss": 1.5288, + "step": 664000 + }, + { + "epoch": 7.01, + "learning_rate": 4.6493662737317564e-05, + "loss": 1.5554, + "step": 664500 + }, + { + "epoch": 7.02, + "learning_rate": 4.649102440980614e-05, + "loss": 1.5212, + "step": 665000 + }, + { + "epoch": 7.02, + "learning_rate": 4.6488386082294715e-05, + "loss": 1.5173, + "step": 665500 + }, + { + "epoch": 7.03, + "learning_rate": 4.648574775478329e-05, + "loss": 1.583, + "step": 666000 + }, + { + "epoch": 7.03, + "learning_rate": 4.6483109427271866e-05, + "loss": 1.4853, + "step": 666500 + }, + { + "epoch": 7.04, + "learning_rate": 4.648047109976044e-05, + "loss": 1.4866, + "step": 667000 + }, + { + "epoch": 7.04, + "learning_rate": 4.6477832772249016e-05, + "loss": 1.5072, + "step": 667500 + }, + { + "epoch": 7.05, + "learning_rate": 4.64751944447376e-05, + "loss": 1.5881, + "step": 668000 + }, + { + "epoch": 7.05, + "learning_rate": 4.647255611722617e-05, + "loss": 1.5064, + "step": 668500 + }, + { + "epoch": 7.06, + "learning_rate": 4.646991778971474e-05, + "loss": 1.5156, + "step": 669000 + }, + { + "epoch": 7.07, + "learning_rate": 4.6467279462203325e-05, + "loss": 1.4971, + "step": 669500 + }, + { + "epoch": 7.07, + "learning_rate": 4.64646411346919e-05, + "loss": 1.5319, + "step": 670000 + }, + { + "epoch": 7.08, + "learning_rate": 4.6462002807180476e-05, + "loss": 1.5321, + "step": 670500 + }, + { + "epoch": 7.08, + "learning_rate": 4.645936447966905e-05, + "loss": 1.5261, + "step": 671000 + }, + { + "epoch": 7.09, + "learning_rate": 4.645672615215763e-05, + "loss": 1.4987, + "step": 671500 + }, + { + "epoch": 7.09, + "learning_rate": 4.64540878246462e-05, + "loss": 1.5254, + "step": 672000 + }, + { + "epoch": 7.1, + "learning_rate": 4.645144949713478e-05, + "loss": 1.5679, + "step": 672500 + }, + { + "epoch": 7.1, + "learning_rate": 4.6448811169623354e-05, + "loss": 1.5558, + "step": 673000 + }, + { + "epoch": 7.11, + "learning_rate": 4.644617284211193e-05, + "loss": 1.5596, + "step": 673500 + }, + { + "epoch": 7.11, + "learning_rate": 4.6443534514600505e-05, + "loss": 1.5503, + "step": 674000 + }, + { + "epoch": 7.12, + "learning_rate": 4.644089618708909e-05, + "loss": 1.5375, + "step": 674500 + }, + { + "epoch": 7.12, + "learning_rate": 4.643825785957766e-05, + "loss": 1.5078, + "step": 675000 + }, + { + "epoch": 7.13, + "learning_rate": 4.643561953206623e-05, + "loss": 1.5153, + "step": 675500 + }, + { + "epoch": 7.13, + "learning_rate": 4.6432981204554807e-05, + "loss": 1.5497, + "step": 676000 + }, + { + "epoch": 7.14, + "learning_rate": 4.643034287704339e-05, + "loss": 1.5369, + "step": 676500 + }, + { + "epoch": 7.14, + "learning_rate": 4.6427704549531964e-05, + "loss": 1.4763, + "step": 677000 + }, + { + "epoch": 7.15, + "learning_rate": 4.642506622202054e-05, + "loss": 1.5775, + "step": 677500 + }, + { + "epoch": 7.16, + "learning_rate": 4.6422427894509115e-05, + "loss": 1.5444, + "step": 678000 + }, + { + "epoch": 7.16, + "learning_rate": 4.641978956699769e-05, + "loss": 1.5402, + "step": 678500 + }, + { + "epoch": 7.17, + "learning_rate": 4.6417151239486266e-05, + "loss": 1.5212, + "step": 679000 + }, + { + "epoch": 7.17, + "learning_rate": 4.641451291197484e-05, + "loss": 1.451, + "step": 679500 + }, + { + "epoch": 7.18, + "learning_rate": 4.6411874584463424e-05, + "loss": 1.5434, + "step": 680000 + }, + { + "epoch": 7.18, + "learning_rate": 4.640923625695199e-05, + "loss": 1.5159, + "step": 680500 + }, + { + "epoch": 7.19, + "learning_rate": 4.640659792944057e-05, + "loss": 1.5445, + "step": 681000 + }, + { + "epoch": 7.19, + "learning_rate": 4.640395960192915e-05, + "loss": 1.5545, + "step": 681500 + }, + { + "epoch": 7.2, + "learning_rate": 4.6401321274417726e-05, + "loss": 1.5416, + "step": 682000 + }, + { + "epoch": 7.2, + "learning_rate": 4.6398682946906295e-05, + "loss": 1.5086, + "step": 682500 + }, + { + "epoch": 7.21, + "learning_rate": 4.639604461939488e-05, + "loss": 1.5539, + "step": 683000 + }, + { + "epoch": 7.21, + "learning_rate": 4.639340629188345e-05, + "loss": 1.5839, + "step": 683500 + }, + { + "epoch": 7.22, + "learning_rate": 4.639076796437203e-05, + "loss": 1.5511, + "step": 684000 + }, + { + "epoch": 7.22, + "learning_rate": 4.63881296368606e-05, + "loss": 1.4778, + "step": 684500 + }, + { + "epoch": 7.23, + "learning_rate": 4.638549130934918e-05, + "loss": 1.5724, + "step": 685000 + }, + { + "epoch": 7.23, + "learning_rate": 4.6382852981837754e-05, + "loss": 1.5511, + "step": 685500 + }, + { + "epoch": 7.24, + "learning_rate": 4.638021465432633e-05, + "loss": 1.4857, + "step": 686000 + }, + { + "epoch": 7.24, + "learning_rate": 4.637757632681491e-05, + "loss": 1.5569, + "step": 686500 + }, + { + "epoch": 7.25, + "learning_rate": 4.637493799930349e-05, + "loss": 1.5369, + "step": 687000 + }, + { + "epoch": 7.26, + "learning_rate": 4.6372299671792056e-05, + "loss": 1.4937, + "step": 687500 + }, + { + "epoch": 7.26, + "learning_rate": 4.636966134428063e-05, + "loss": 1.5551, + "step": 688000 + }, + { + "epoch": 7.27, + "learning_rate": 4.6367023016769214e-05, + "loss": 1.5419, + "step": 688500 + }, + { + "epoch": 7.27, + "learning_rate": 4.636438468925779e-05, + "loss": 1.6045, + "step": 689000 + }, + { + "epoch": 7.28, + "learning_rate": 4.6361746361746365e-05, + "loss": 1.5637, + "step": 689500 + }, + { + "epoch": 7.28, + "learning_rate": 4.635910803423494e-05, + "loss": 1.564, + "step": 690000 + }, + { + "epoch": 7.29, + "learning_rate": 4.6356469706723516e-05, + "loss": 1.5386, + "step": 690500 + }, + { + "epoch": 7.29, + "learning_rate": 4.635383137921209e-05, + "loss": 1.5636, + "step": 691000 + }, + { + "epoch": 7.3, + "learning_rate": 4.635119305170067e-05, + "loss": 1.5504, + "step": 691500 + }, + { + "epoch": 7.3, + "learning_rate": 4.634855472418924e-05, + "loss": 1.5228, + "step": 692000 + }, + { + "epoch": 7.31, + "learning_rate": 4.634591639667782e-05, + "loss": 1.5299, + "step": 692500 + }, + { + "epoch": 7.31, + "learning_rate": 4.6343278069166393e-05, + "loss": 1.5078, + "step": 693000 + }, + { + "epoch": 7.32, + "learning_rate": 4.6340639741654976e-05, + "loss": 1.5406, + "step": 693500 + }, + { + "epoch": 7.32, + "learning_rate": 4.633800141414355e-05, + "loss": 1.5699, + "step": 694000 + }, + { + "epoch": 7.33, + "learning_rate": 4.633536308663212e-05, + "loss": 1.5153, + "step": 694500 + }, + { + "epoch": 7.33, + "learning_rate": 4.63327247591207e-05, + "loss": 1.5585, + "step": 695000 + }, + { + "epoch": 7.34, + "learning_rate": 4.633008643160928e-05, + "loss": 1.5119, + "step": 695500 + }, + { + "epoch": 7.35, + "learning_rate": 4.632744810409785e-05, + "loss": 1.525, + "step": 696000 + }, + { + "epoch": 7.35, + "learning_rate": 4.632480977658643e-05, + "loss": 1.4776, + "step": 696500 + }, + { + "epoch": 7.36, + "learning_rate": 4.6322171449075004e-05, + "loss": 1.5534, + "step": 697000 + }, + { + "epoch": 7.36, + "learning_rate": 4.631953312156358e-05, + "loss": 1.4884, + "step": 697500 + }, + { + "epoch": 7.37, + "learning_rate": 4.6316894794052155e-05, + "loss": 1.5275, + "step": 698000 + }, + { + "epoch": 7.37, + "learning_rate": 4.631425646654074e-05, + "loss": 1.5659, + "step": 698500 + }, + { + "epoch": 7.38, + "learning_rate": 4.631161813902931e-05, + "loss": 1.5743, + "step": 699000 + }, + { + "epoch": 7.38, + "learning_rate": 4.630897981151788e-05, + "loss": 1.5234, + "step": 699500 + }, + { + "epoch": 7.39, + "learning_rate": 4.630634148400646e-05, + "loss": 1.5538, + "step": 700000 + }, + { + "epoch": 7.39, + "learning_rate": 4.630370315649504e-05, + "loss": 1.5643, + "step": 700500 + }, + { + "epoch": 7.4, + "learning_rate": 4.6301064828983615e-05, + "loss": 1.5388, + "step": 701000 + }, + { + "epoch": 7.4, + "learning_rate": 4.6298426501472183e-05, + "loss": 1.5441, + "step": 701500 + }, + { + "epoch": 7.41, + "learning_rate": 4.6295788173960766e-05, + "loss": 1.5417, + "step": 702000 + }, + { + "epoch": 7.41, + "learning_rate": 4.629314984644934e-05, + "loss": 1.5365, + "step": 702500 + }, + { + "epoch": 7.42, + "learning_rate": 4.629051151893792e-05, + "loss": 1.5394, + "step": 703000 + }, + { + "epoch": 7.42, + "learning_rate": 4.628787319142649e-05, + "loss": 1.5298, + "step": 703500 + }, + { + "epoch": 7.43, + "learning_rate": 4.628523486391507e-05, + "loss": 1.5469, + "step": 704000 + }, + { + "epoch": 7.43, + "learning_rate": 4.628259653640364e-05, + "loss": 1.5533, + "step": 704500 + }, + { + "epoch": 7.44, + "learning_rate": 4.627995820889222e-05, + "loss": 1.616, + "step": 705000 + }, + { + "epoch": 7.45, + "learning_rate": 4.62773198813808e-05, + "loss": 1.4938, + "step": 705500 + }, + { + "epoch": 7.45, + "learning_rate": 4.6274681553869376e-05, + "loss": 1.5798, + "step": 706000 + }, + { + "epoch": 7.46, + "learning_rate": 4.6272043226357945e-05, + "loss": 1.4972, + "step": 706500 + }, + { + "epoch": 7.46, + "learning_rate": 4.626940489884653e-05, + "loss": 1.4848, + "step": 707000 + }, + { + "epoch": 7.47, + "learning_rate": 4.62667665713351e-05, + "loss": 1.5263, + "step": 707500 + }, + { + "epoch": 7.47, + "learning_rate": 4.626412824382368e-05, + "loss": 1.4994, + "step": 708000 + }, + { + "epoch": 7.48, + "learning_rate": 4.6261489916312254e-05, + "loss": 1.4977, + "step": 708500 + }, + { + "epoch": 7.48, + "learning_rate": 4.625885158880083e-05, + "loss": 1.5651, + "step": 709000 + }, + { + "epoch": 7.49, + "learning_rate": 4.6256213261289405e-05, + "loss": 1.5206, + "step": 709500 + }, + { + "epoch": 7.49, + "learning_rate": 4.625357493377798e-05, + "loss": 1.5526, + "step": 710000 + }, + { + "epoch": 7.5, + "learning_rate": 4.625093660626656e-05, + "loss": 1.5453, + "step": 710500 + }, + { + "epoch": 7.5, + "learning_rate": 4.624829827875513e-05, + "loss": 1.5197, + "step": 711000 + }, + { + "epoch": 7.51, + "learning_rate": 4.624565995124371e-05, + "loss": 1.4714, + "step": 711500 + }, + { + "epoch": 7.51, + "learning_rate": 4.624302162373228e-05, + "loss": 1.605, + "step": 712000 + }, + { + "epoch": 7.52, + "learning_rate": 4.6240383296220865e-05, + "loss": 1.5809, + "step": 712500 + }, + { + "epoch": 7.52, + "learning_rate": 4.623774496870944e-05, + "loss": 1.5645, + "step": 713000 + }, + { + "epoch": 7.53, + "learning_rate": 4.623510664119801e-05, + "loss": 1.5674, + "step": 713500 + }, + { + "epoch": 7.54, + "learning_rate": 4.623246831368659e-05, + "loss": 1.4627, + "step": 714000 + }, + { + "epoch": 7.54, + "learning_rate": 4.6229829986175166e-05, + "loss": 1.5293, + "step": 714500 + }, + { + "epoch": 7.55, + "learning_rate": 4.622719165866374e-05, + "loss": 1.5384, + "step": 715000 + }, + { + "epoch": 7.55, + "learning_rate": 4.622455333115232e-05, + "loss": 1.5014, + "step": 715500 + }, + { + "epoch": 7.56, + "learning_rate": 4.622191500364089e-05, + "loss": 1.5375, + "step": 716000 + }, + { + "epoch": 7.56, + "learning_rate": 4.621927667612947e-05, + "loss": 1.4813, + "step": 716500 + }, + { + "epoch": 7.57, + "learning_rate": 4.6216638348618044e-05, + "loss": 1.5681, + "step": 717000 + }, + { + "epoch": 7.57, + "learning_rate": 4.6214000021106626e-05, + "loss": 1.4718, + "step": 717500 + }, + { + "epoch": 7.58, + "learning_rate": 4.62113616935952e-05, + "loss": 1.5164, + "step": 718000 + }, + { + "epoch": 7.58, + "learning_rate": 4.620872336608377e-05, + "loss": 1.5006, + "step": 718500 + }, + { + "epoch": 7.59, + "learning_rate": 4.620608503857235e-05, + "loss": 1.4957, + "step": 719000 + }, + { + "epoch": 7.59, + "learning_rate": 4.620344671106093e-05, + "loss": 1.5103, + "step": 719500 + }, + { + "epoch": 7.6, + "learning_rate": 4.6200808383549504e-05, + "loss": 1.5042, + "step": 720000 + }, + { + "epoch": 7.6, + "learning_rate": 4.619817005603808e-05, + "loss": 1.5286, + "step": 720500 + }, + { + "epoch": 7.61, + "learning_rate": 4.6195531728526655e-05, + "loss": 1.4908, + "step": 721000 + }, + { + "epoch": 7.61, + "learning_rate": 4.619289340101523e-05, + "loss": 1.5493, + "step": 721500 + }, + { + "epoch": 7.62, + "learning_rate": 4.6190255073503806e-05, + "loss": 1.4931, + "step": 722000 + }, + { + "epoch": 7.62, + "learning_rate": 4.618761674599239e-05, + "loss": 1.5632, + "step": 722500 + }, + { + "epoch": 7.63, + "learning_rate": 4.6184978418480957e-05, + "loss": 1.5382, + "step": 723000 + }, + { + "epoch": 7.64, + "learning_rate": 4.618234009096953e-05, + "loss": 1.466, + "step": 723500 + }, + { + "epoch": 7.64, + "learning_rate": 4.617970176345811e-05, + "loss": 1.5274, + "step": 724000 + }, + { + "epoch": 7.65, + "learning_rate": 4.617706343594669e-05, + "loss": 1.5177, + "step": 724500 + }, + { + "epoch": 7.65, + "learning_rate": 4.6174425108435265e-05, + "loss": 1.5112, + "step": 725000 + }, + { + "epoch": 7.66, + "learning_rate": 4.6171786780923834e-05, + "loss": 1.5546, + "step": 725500 + }, + { + "epoch": 7.66, + "learning_rate": 4.6169148453412416e-05, + "loss": 1.5209, + "step": 726000 + }, + { + "epoch": 7.67, + "learning_rate": 4.616651012590099e-05, + "loss": 1.5249, + "step": 726500 + }, + { + "epoch": 7.67, + "learning_rate": 4.616387179838957e-05, + "loss": 1.5214, + "step": 727000 + }, + { + "epoch": 7.68, + "learning_rate": 4.616123347087814e-05, + "loss": 1.5383, + "step": 727500 + }, + { + "epoch": 7.68, + "learning_rate": 4.615859514336672e-05, + "loss": 1.5324, + "step": 728000 + }, + { + "epoch": 7.69, + "learning_rate": 4.6155956815855294e-05, + "loss": 1.5218, + "step": 728500 + }, + { + "epoch": 7.69, + "learning_rate": 4.615331848834387e-05, + "loss": 1.536, + "step": 729000 + }, + { + "epoch": 7.7, + "learning_rate": 4.615068016083245e-05, + "loss": 1.5354, + "step": 729500 + }, + { + "epoch": 7.7, + "learning_rate": 4.614804183332102e-05, + "loss": 1.515, + "step": 730000 + }, + { + "epoch": 7.71, + "learning_rate": 4.6145403505809596e-05, + "loss": 1.5044, + "step": 730500 + }, + { + "epoch": 7.71, + "learning_rate": 4.614276517829818e-05, + "loss": 1.5297, + "step": 731000 + }, + { + "epoch": 7.72, + "learning_rate": 4.614012685078675e-05, + "loss": 1.5114, + "step": 731500 + }, + { + "epoch": 7.73, + "learning_rate": 4.613748852327533e-05, + "loss": 1.5458, + "step": 732000 + }, + { + "epoch": 7.73, + "learning_rate": 4.6134850195763904e-05, + "loss": 1.4726, + "step": 732500 + }, + { + "epoch": 7.74, + "learning_rate": 4.613221186825248e-05, + "loss": 1.4841, + "step": 733000 + }, + { + "epoch": 7.74, + "learning_rate": 4.6129573540741055e-05, + "loss": 1.5312, + "step": 733500 + }, + { + "epoch": 7.75, + "learning_rate": 4.612693521322963e-05, + "loss": 1.4909, + "step": 734000 + }, + { + "epoch": 7.75, + "learning_rate": 4.612429688571821e-05, + "loss": 1.5605, + "step": 734500 + }, + { + "epoch": 7.76, + "learning_rate": 4.612165855820678e-05, + "loss": 1.5075, + "step": 735000 + }, + { + "epoch": 7.76, + "learning_rate": 4.611902023069536e-05, + "loss": 1.5562, + "step": 735500 + }, + { + "epoch": 7.77, + "learning_rate": 4.611638190318393e-05, + "loss": 1.58, + "step": 736000 + }, + { + "epoch": 7.77, + "learning_rate": 4.6113743575672515e-05, + "loss": 1.5121, + "step": 736500 + }, + { + "epoch": 7.78, + "learning_rate": 4.611110524816109e-05, + "loss": 1.5274, + "step": 737000 + }, + { + "epoch": 7.78, + "learning_rate": 4.610846692064966e-05, + "loss": 1.5839, + "step": 737500 + }, + { + "epoch": 7.79, + "learning_rate": 4.610582859313824e-05, + "loss": 1.5267, + "step": 738000 + }, + { + "epoch": 7.79, + "learning_rate": 4.610319026562682e-05, + "loss": 1.5886, + "step": 738500 + }, + { + "epoch": 7.8, + "learning_rate": 4.610055193811539e-05, + "loss": 1.5151, + "step": 739000 + }, + { + "epoch": 7.8, + "learning_rate": 4.609791361060397e-05, + "loss": 1.5348, + "step": 739500 + }, + { + "epoch": 7.81, + "learning_rate": 4.6095275283092543e-05, + "loss": 1.5032, + "step": 740000 + }, + { + "epoch": 7.81, + "learning_rate": 4.609263695558112e-05, + "loss": 1.5589, + "step": 740500 + }, + { + "epoch": 7.82, + "learning_rate": 4.6089998628069694e-05, + "loss": 1.5095, + "step": 741000 + }, + { + "epoch": 7.83, + "learning_rate": 4.608736030055828e-05, + "loss": 1.5131, + "step": 741500 + }, + { + "epoch": 7.83, + "learning_rate": 4.6084721973046845e-05, + "loss": 1.528, + "step": 742000 + }, + { + "epoch": 7.84, + "learning_rate": 4.608208364553542e-05, + "loss": 1.4847, + "step": 742500 + }, + { + "epoch": 7.84, + "learning_rate": 4.6079445318024e-05, + "loss": 1.5035, + "step": 743000 + }, + { + "epoch": 7.85, + "learning_rate": 4.607680699051258e-05, + "loss": 1.5074, + "step": 743500 + }, + { + "epoch": 7.85, + "learning_rate": 4.6074168663001154e-05, + "loss": 1.5308, + "step": 744000 + }, + { + "epoch": 7.86, + "learning_rate": 4.607153033548973e-05, + "loss": 1.5442, + "step": 744500 + }, + { + "epoch": 7.86, + "learning_rate": 4.6068892007978305e-05, + "loss": 1.5365, + "step": 745000 + }, + { + "epoch": 7.87, + "learning_rate": 4.606625368046688e-05, + "loss": 1.5338, + "step": 745500 + }, + { + "epoch": 7.87, + "learning_rate": 4.6063615352955456e-05, + "loss": 1.4638, + "step": 746000 + }, + { + "epoch": 7.88, + "learning_rate": 4.606097702544404e-05, + "loss": 1.5363, + "step": 746500 + }, + { + "epoch": 7.88, + "learning_rate": 4.605833869793261e-05, + "loss": 1.4783, + "step": 747000 + }, + { + "epoch": 7.89, + "learning_rate": 4.605570037042118e-05, + "loss": 1.5154, + "step": 747500 + }, + { + "epoch": 7.89, + "learning_rate": 4.6053062042909765e-05, + "loss": 1.5383, + "step": 748000 + }, + { + "epoch": 7.9, + "learning_rate": 4.605042371539834e-05, + "loss": 1.5622, + "step": 748500 + }, + { + "epoch": 7.9, + "learning_rate": 4.604778538788691e-05, + "loss": 1.4818, + "step": 749000 + }, + { + "epoch": 7.91, + "learning_rate": 4.6045147060375484e-05, + "loss": 1.5289, + "step": 749500 + }, + { + "epoch": 7.91, + "learning_rate": 4.604250873286407e-05, + "loss": 1.5275, + "step": 750000 + }, + { + "epoch": 7.92, + "learning_rate": 4.603987040535264e-05, + "loss": 1.5334, + "step": 750500 + }, + { + "epoch": 7.93, + "learning_rate": 4.603723207784122e-05, + "loss": 1.5717, + "step": 751000 + }, + { + "epoch": 7.93, + "learning_rate": 4.603459375032979e-05, + "loss": 1.5339, + "step": 751500 + }, + { + "epoch": 7.94, + "learning_rate": 4.603195542281837e-05, + "loss": 1.4903, + "step": 752000 + }, + { + "epoch": 7.94, + "learning_rate": 4.6029317095306944e-05, + "loss": 1.541, + "step": 752500 + }, + { + "epoch": 7.95, + "learning_rate": 4.602667876779552e-05, + "loss": 1.4812, + "step": 753000 + }, + { + "epoch": 7.95, + "learning_rate": 4.60240404402841e-05, + "loss": 1.5262, + "step": 753500 + }, + { + "epoch": 7.96, + "learning_rate": 4.602140211277267e-05, + "loss": 1.5397, + "step": 754000 + }, + { + "epoch": 7.96, + "learning_rate": 4.6018763785261246e-05, + "loss": 1.5148, + "step": 754500 + }, + { + "epoch": 7.97, + "learning_rate": 4.601612545774983e-05, + "loss": 1.5317, + "step": 755000 + }, + { + "epoch": 7.97, + "learning_rate": 4.6013487130238404e-05, + "loss": 1.5226, + "step": 755500 + }, + { + "epoch": 7.98, + "learning_rate": 4.601084880272698e-05, + "loss": 1.5377, + "step": 756000 + }, + { + "epoch": 7.98, + "learning_rate": 4.6008210475215555e-05, + "loss": 1.5145, + "step": 756500 + }, + { + "epoch": 7.99, + "learning_rate": 4.600557214770413e-05, + "loss": 1.5717, + "step": 757000 + }, + { + "epoch": 7.99, + "learning_rate": 4.6002933820192706e-05, + "loss": 1.4813, + "step": 757500 + }, + { + "epoch": 8.0, + "learning_rate": 4.600029549268128e-05, + "loss": 1.5353, + "step": 758000 + }, + { + "epoch": 8.0, + "learning_rate": 4.599765716516986e-05, + "loss": 1.4844, + "step": 758500 + }, + { + "epoch": 8.01, + "learning_rate": 4.599501883765843e-05, + "loss": 1.4703, + "step": 759000 + }, + { + "epoch": 8.02, + "learning_rate": 4.599238051014701e-05, + "loss": 1.5227, + "step": 759500 + }, + { + "epoch": 8.02, + "learning_rate": 4.598974218263559e-05, + "loss": 1.4999, + "step": 760000 + }, + { + "epoch": 8.03, + "learning_rate": 4.5987103855124166e-05, + "loss": 1.5236, + "step": 760500 + }, + { + "epoch": 8.03, + "learning_rate": 4.5984465527612734e-05, + "loss": 1.5229, + "step": 761000 + }, + { + "epoch": 8.04, + "learning_rate": 4.598182720010131e-05, + "loss": 1.4932, + "step": 761500 + }, + { + "epoch": 8.04, + "learning_rate": 4.597918887258989e-05, + "loss": 1.5068, + "step": 762000 + }, + { + "epoch": 8.05, + "learning_rate": 4.597655054507847e-05, + "loss": 1.5057, + "step": 762500 + }, + { + "epoch": 8.05, + "learning_rate": 4.597391221756704e-05, + "loss": 1.526, + "step": 763000 + }, + { + "epoch": 8.06, + "learning_rate": 4.597127389005562e-05, + "loss": 1.5663, + "step": 763500 + }, + { + "epoch": 8.06, + "learning_rate": 4.5968635562544194e-05, + "loss": 1.5341, + "step": 764000 + }, + { + "epoch": 8.07, + "learning_rate": 4.596599723503277e-05, + "loss": 1.5351, + "step": 764500 + }, + { + "epoch": 8.07, + "learning_rate": 4.5963358907521345e-05, + "loss": 1.4832, + "step": 765000 + }, + { + "epoch": 8.08, + "learning_rate": 4.596072058000993e-05, + "loss": 1.5022, + "step": 765500 + }, + { + "epoch": 8.08, + "learning_rate": 4.5958082252498496e-05, + "loss": 1.5413, + "step": 766000 + }, + { + "epoch": 8.09, + "learning_rate": 4.595544392498707e-05, + "loss": 1.5498, + "step": 766500 + }, + { + "epoch": 8.09, + "learning_rate": 4.5952805597475654e-05, + "loss": 1.525, + "step": 767000 + }, + { + "epoch": 8.1, + "learning_rate": 4.595016726996423e-05, + "loss": 1.5338, + "step": 767500 + }, + { + "epoch": 8.1, + "learning_rate": 4.59475289424528e-05, + "loss": 1.5164, + "step": 768000 + }, + { + "epoch": 8.11, + "learning_rate": 4.594489061494138e-05, + "loss": 1.4618, + "step": 768500 + }, + { + "epoch": 8.12, + "learning_rate": 4.5942252287429956e-05, + "loss": 1.5494, + "step": 769000 + }, + { + "epoch": 8.12, + "learning_rate": 4.593961395991853e-05, + "loss": 1.5027, + "step": 769500 + }, + { + "epoch": 8.13, + "learning_rate": 4.5936975632407107e-05, + "loss": 1.4816, + "step": 770000 + }, + { + "epoch": 8.13, + "learning_rate": 4.593433730489568e-05, + "loss": 1.4805, + "step": 770500 + }, + { + "epoch": 8.14, + "learning_rate": 4.593169897738426e-05, + "loss": 1.5015, + "step": 771000 + }, + { + "epoch": 8.14, + "learning_rate": 4.592906064987283e-05, + "loss": 1.4868, + "step": 771500 + }, + { + "epoch": 8.15, + "learning_rate": 4.5926422322361415e-05, + "loss": 1.53, + "step": 772000 + }, + { + "epoch": 8.15, + "learning_rate": 4.592378399484999e-05, + "loss": 1.5063, + "step": 772500 + }, + { + "epoch": 8.16, + "learning_rate": 4.592114566733856e-05, + "loss": 1.498, + "step": 773000 + }, + { + "epoch": 8.16, + "learning_rate": 4.5918507339827135e-05, + "loss": 1.5027, + "step": 773500 + }, + { + "epoch": 8.17, + "learning_rate": 4.591586901231572e-05, + "loss": 1.523, + "step": 774000 + }, + { + "epoch": 8.17, + "learning_rate": 4.591323068480429e-05, + "loss": 1.5508, + "step": 774500 + }, + { + "epoch": 8.18, + "learning_rate": 4.591059235729287e-05, + "loss": 1.4233, + "step": 775000 + }, + { + "epoch": 8.18, + "learning_rate": 4.5907954029781444e-05, + "loss": 1.4985, + "step": 775500 + }, + { + "epoch": 8.19, + "learning_rate": 4.590531570227002e-05, + "loss": 1.5265, + "step": 776000 + }, + { + "epoch": 8.19, + "learning_rate": 4.5902677374758595e-05, + "loss": 1.5287, + "step": 776500 + }, + { + "epoch": 8.2, + "learning_rate": 4.590003904724717e-05, + "loss": 1.5233, + "step": 777000 + }, + { + "epoch": 8.21, + "learning_rate": 4.5897400719735746e-05, + "loss": 1.5272, + "step": 777500 + }, + { + "epoch": 8.21, + "learning_rate": 4.589476239222432e-05, + "loss": 1.5692, + "step": 778000 + }, + { + "epoch": 8.22, + "learning_rate": 4.58921240647129e-05, + "loss": 1.4921, + "step": 778500 + }, + { + "epoch": 8.22, + "learning_rate": 4.588948573720148e-05, + "loss": 1.4837, + "step": 779000 + }, + { + "epoch": 8.23, + "learning_rate": 4.5886847409690054e-05, + "loss": 1.5291, + "step": 779500 + }, + { + "epoch": 8.23, + "learning_rate": 4.588420908217862e-05, + "loss": 1.5614, + "step": 780000 + }, + { + "epoch": 8.24, + "learning_rate": 4.5881570754667205e-05, + "loss": 1.5763, + "step": 780500 + }, + { + "epoch": 8.24, + "learning_rate": 4.587893242715578e-05, + "loss": 1.5123, + "step": 781000 + }, + { + "epoch": 8.25, + "learning_rate": 4.5876294099644356e-05, + "loss": 1.5518, + "step": 781500 + }, + { + "epoch": 8.25, + "learning_rate": 4.587365577213293e-05, + "loss": 1.4835, + "step": 782000 + }, + { + "epoch": 8.26, + "learning_rate": 4.587101744462151e-05, + "loss": 1.4936, + "step": 782500 + }, + { + "epoch": 8.26, + "learning_rate": 4.586837911711008e-05, + "loss": 1.489, + "step": 783000 + }, + { + "epoch": 8.27, + "learning_rate": 4.586574078959866e-05, + "loss": 1.4575, + "step": 783500 + }, + { + "epoch": 8.27, + "learning_rate": 4.586310246208724e-05, + "loss": 1.5163, + "step": 784000 + }, + { + "epoch": 8.28, + "learning_rate": 4.5860464134575816e-05, + "loss": 1.536, + "step": 784500 + }, + { + "epoch": 8.28, + "learning_rate": 4.5857825807064385e-05, + "loss": 1.4468, + "step": 785000 + }, + { + "epoch": 8.29, + "learning_rate": 4.585518747955296e-05, + "loss": 1.4916, + "step": 785500 + }, + { + "epoch": 8.29, + "learning_rate": 4.585254915204154e-05, + "loss": 1.5535, + "step": 786000 + }, + { + "epoch": 8.3, + "learning_rate": 4.584991082453012e-05, + "loss": 1.5481, + "step": 786500 + }, + { + "epoch": 8.31, + "learning_rate": 4.584727249701869e-05, + "loss": 1.4677, + "step": 787000 + }, + { + "epoch": 8.31, + "learning_rate": 4.584463416950727e-05, + "loss": 1.518, + "step": 787500 + }, + { + "epoch": 8.32, + "learning_rate": 4.5841995841995844e-05, + "loss": 1.55, + "step": 788000 + }, + { + "epoch": 8.32, + "learning_rate": 4.583935751448442e-05, + "loss": 1.5275, + "step": 788500 + }, + { + "epoch": 8.33, + "learning_rate": 4.5836719186972995e-05, + "loss": 1.4966, + "step": 789000 + }, + { + "epoch": 8.33, + "learning_rate": 4.583408085946157e-05, + "loss": 1.498, + "step": 789500 + }, + { + "epoch": 8.34, + "learning_rate": 4.5831442531950146e-05, + "loss": 1.4838, + "step": 790000 + }, + { + "epoch": 8.34, + "learning_rate": 4.582880420443872e-05, + "loss": 1.4418, + "step": 790500 + }, + { + "epoch": 8.35, + "learning_rate": 4.5826165876927304e-05, + "loss": 1.4914, + "step": 791000 + }, + { + "epoch": 8.35, + "learning_rate": 4.582352754941588e-05, + "loss": 1.5284, + "step": 791500 + }, + { + "epoch": 8.36, + "learning_rate": 4.582088922190445e-05, + "loss": 1.4825, + "step": 792000 + }, + { + "epoch": 8.36, + "learning_rate": 4.581825089439303e-05, + "loss": 1.5273, + "step": 792500 + }, + { + "epoch": 8.37, + "learning_rate": 4.5815612566881606e-05, + "loss": 1.5519, + "step": 793000 + }, + { + "epoch": 8.37, + "learning_rate": 4.581297423937018e-05, + "loss": 1.4948, + "step": 793500 + }, + { + "epoch": 8.38, + "learning_rate": 4.581033591185876e-05, + "loss": 1.4513, + "step": 794000 + }, + { + "epoch": 8.38, + "learning_rate": 4.580769758434733e-05, + "loss": 1.4713, + "step": 794500 + }, + { + "epoch": 8.39, + "learning_rate": 4.580505925683591e-05, + "loss": 1.4987, + "step": 795000 + }, + { + "epoch": 8.4, + "learning_rate": 4.5802420929324484e-05, + "loss": 1.5318, + "step": 795500 + }, + { + "epoch": 8.4, + "learning_rate": 4.5799782601813066e-05, + "loss": 1.5051, + "step": 796000 + }, + { + "epoch": 8.41, + "learning_rate": 4.5797144274301634e-05, + "loss": 1.4995, + "step": 796500 + }, + { + "epoch": 8.41, + "learning_rate": 4.579450594679021e-05, + "loss": 1.4304, + "step": 797000 + }, + { + "epoch": 8.42, + "learning_rate": 4.5791867619278785e-05, + "loss": 1.5333, + "step": 797500 + }, + { + "epoch": 8.42, + "learning_rate": 4.578922929176737e-05, + "loss": 1.5254, + "step": 798000 + }, + { + "epoch": 8.43, + "learning_rate": 4.578659096425594e-05, + "loss": 1.5061, + "step": 798500 + }, + { + "epoch": 8.43, + "learning_rate": 4.578395263674451e-05, + "loss": 1.4652, + "step": 799000 + }, + { + "epoch": 8.44, + "learning_rate": 4.5781314309233094e-05, + "loss": 1.4926, + "step": 799500 + }, + { + "epoch": 8.44, + "learning_rate": 4.577867598172167e-05, + "loss": 1.4471, + "step": 800000 + }, + { + "epoch": 8.45, + "learning_rate": 4.5776037654210245e-05, + "loss": 1.5457, + "step": 800500 + }, + { + "epoch": 8.45, + "learning_rate": 4.577339932669882e-05, + "loss": 1.4899, + "step": 801000 + }, + { + "epoch": 8.46, + "learning_rate": 4.5770760999187396e-05, + "loss": 1.5145, + "step": 801500 + }, + { + "epoch": 8.46, + "learning_rate": 4.576812267167597e-05, + "loss": 1.45, + "step": 802000 + }, + { + "epoch": 8.47, + "learning_rate": 4.576548434416455e-05, + "loss": 1.5025, + "step": 802500 + }, + { + "epoch": 8.47, + "learning_rate": 4.576284601665313e-05, + "loss": 1.4812, + "step": 803000 + }, + { + "epoch": 8.48, + "learning_rate": 4.5760207689141705e-05, + "loss": 1.4892, + "step": 803500 + }, + { + "epoch": 8.48, + "learning_rate": 4.5757569361630274e-05, + "loss": 1.5799, + "step": 804000 + }, + { + "epoch": 8.49, + "learning_rate": 4.5754931034118856e-05, + "loss": 1.4934, + "step": 804500 + }, + { + "epoch": 8.5, + "learning_rate": 4.575229270660743e-05, + "loss": 1.5279, + "step": 805000 + }, + { + "epoch": 8.5, + "learning_rate": 4.574965437909601e-05, + "loss": 1.4973, + "step": 805500 + }, + { + "epoch": 8.51, + "learning_rate": 4.574701605158458e-05, + "loss": 1.4551, + "step": 806000 + }, + { + "epoch": 8.51, + "learning_rate": 4.574437772407316e-05, + "loss": 1.5139, + "step": 806500 + }, + { + "epoch": 8.52, + "learning_rate": 4.574173939656173e-05, + "loss": 1.5575, + "step": 807000 + }, + { + "epoch": 8.52, + "learning_rate": 4.573910106905031e-05, + "loss": 1.4928, + "step": 807500 + }, + { + "epoch": 8.53, + "learning_rate": 4.573646274153889e-05, + "loss": 1.5196, + "step": 808000 + }, + { + "epoch": 8.53, + "learning_rate": 4.573382441402746e-05, + "loss": 1.4923, + "step": 808500 + }, + { + "epoch": 8.54, + "learning_rate": 4.5731186086516035e-05, + "loss": 1.4987, + "step": 809000 + }, + { + "epoch": 8.54, + "learning_rate": 4.572854775900461e-05, + "loss": 1.6131, + "step": 809500 + }, + { + "epoch": 8.55, + "learning_rate": 4.572590943149319e-05, + "loss": 1.4986, + "step": 810000 + }, + { + "epoch": 8.55, + "learning_rate": 4.572327110398177e-05, + "loss": 1.5385, + "step": 810500 + }, + { + "epoch": 8.56, + "learning_rate": 4.572063277647034e-05, + "loss": 1.4832, + "step": 811000 + }, + { + "epoch": 8.56, + "learning_rate": 4.571799444895892e-05, + "loss": 1.5388, + "step": 811500 + }, + { + "epoch": 8.57, + "learning_rate": 4.5715356121447495e-05, + "loss": 1.5117, + "step": 812000 + }, + { + "epoch": 8.57, + "learning_rate": 4.571271779393607e-05, + "loss": 1.5383, + "step": 812500 + }, + { + "epoch": 8.58, + "learning_rate": 4.5710079466424646e-05, + "loss": 1.476, + "step": 813000 + }, + { + "epoch": 8.59, + "learning_rate": 4.570744113891322e-05, + "loss": 1.4776, + "step": 813500 + }, + { + "epoch": 8.59, + "learning_rate": 4.57048028114018e-05, + "loss": 1.4802, + "step": 814000 + }, + { + "epoch": 8.6, + "learning_rate": 4.570216448389037e-05, + "loss": 1.5292, + "step": 814500 + }, + { + "epoch": 8.6, + "learning_rate": 4.5699526156378955e-05, + "loss": 1.5119, + "step": 815000 + }, + { + "epoch": 8.61, + "learning_rate": 4.569688782886752e-05, + "loss": 1.5454, + "step": 815500 + }, + { + "epoch": 8.61, + "learning_rate": 4.56942495013561e-05, + "loss": 1.4798, + "step": 816000 + }, + { + "epoch": 8.62, + "learning_rate": 4.569161117384468e-05, + "loss": 1.485, + "step": 816500 + }, + { + "epoch": 8.62, + "learning_rate": 4.5688972846333257e-05, + "loss": 1.4885, + "step": 817000 + }, + { + "epoch": 8.63, + "learning_rate": 4.568633451882183e-05, + "loss": 1.4986, + "step": 817500 + }, + { + "epoch": 8.63, + "learning_rate": 4.568369619131041e-05, + "loss": 1.4396, + "step": 818000 + }, + { + "epoch": 8.64, + "learning_rate": 4.568105786379898e-05, + "loss": 1.5205, + "step": 818500 + }, + { + "epoch": 8.64, + "learning_rate": 4.567841953628756e-05, + "loss": 1.5677, + "step": 819000 + }, + { + "epoch": 8.65, + "learning_rate": 4.5675781208776134e-05, + "loss": 1.4936, + "step": 819500 + }, + { + "epoch": 8.65, + "learning_rate": 4.5673142881264716e-05, + "loss": 1.4856, + "step": 820000 + }, + { + "epoch": 8.66, + "learning_rate": 4.5670504553753285e-05, + "loss": 1.4669, + "step": 820500 + }, + { + "epoch": 8.66, + "learning_rate": 4.566786622624186e-05, + "loss": 1.5214, + "step": 821000 + }, + { + "epoch": 8.67, + "learning_rate": 4.566522789873044e-05, + "loss": 1.5361, + "step": 821500 + }, + { + "epoch": 8.67, + "learning_rate": 4.566258957121902e-05, + "loss": 1.5234, + "step": 822000 + }, + { + "epoch": 8.68, + "learning_rate": 4.5659951243707594e-05, + "loss": 1.4918, + "step": 822500 + }, + { + "epoch": 8.69, + "learning_rate": 4.565731291619616e-05, + "loss": 1.4677, + "step": 823000 + }, + { + "epoch": 8.69, + "learning_rate": 4.5654674588684745e-05, + "loss": 1.5137, + "step": 823500 + }, + { + "epoch": 8.7, + "learning_rate": 4.565203626117332e-05, + "loss": 1.5054, + "step": 824000 + }, + { + "epoch": 8.7, + "learning_rate": 4.5649397933661896e-05, + "loss": 1.5198, + "step": 824500 + }, + { + "epoch": 8.71, + "learning_rate": 4.564675960615047e-05, + "loss": 1.5457, + "step": 825000 + }, + { + "epoch": 8.71, + "learning_rate": 4.564412127863905e-05, + "loss": 1.4964, + "step": 825500 + }, + { + "epoch": 8.72, + "learning_rate": 4.564148295112762e-05, + "loss": 1.5181, + "step": 826000 + }, + { + "epoch": 8.72, + "learning_rate": 4.56388446236162e-05, + "loss": 1.5402, + "step": 826500 + }, + { + "epoch": 8.73, + "learning_rate": 4.563620629610478e-05, + "loss": 1.4371, + "step": 827000 + }, + { + "epoch": 8.73, + "learning_rate": 4.563356796859335e-05, + "loss": 1.5457, + "step": 827500 + }, + { + "epoch": 8.74, + "learning_rate": 4.5630929641081924e-05, + "loss": 1.5171, + "step": 828000 + }, + { + "epoch": 8.74, + "learning_rate": 4.5628291313570506e-05, + "loss": 1.5143, + "step": 828500 + }, + { + "epoch": 8.75, + "learning_rate": 4.562565298605908e-05, + "loss": 1.4715, + "step": 829000 + }, + { + "epoch": 8.75, + "learning_rate": 4.562301465854766e-05, + "loss": 1.5141, + "step": 829500 + }, + { + "epoch": 8.76, + "learning_rate": 4.562037633103623e-05, + "loss": 1.4906, + "step": 830000 + }, + { + "epoch": 8.76, + "learning_rate": 4.561773800352481e-05, + "loss": 1.5788, + "step": 830500 + }, + { + "epoch": 8.77, + "learning_rate": 4.5615099676013384e-05, + "loss": 1.5097, + "step": 831000 + }, + { + "epoch": 8.78, + "learning_rate": 4.561246134850196e-05, + "loss": 1.4957, + "step": 831500 + }, + { + "epoch": 8.78, + "learning_rate": 4.560982302099054e-05, + "loss": 1.5315, + "step": 832000 + }, + { + "epoch": 8.79, + "learning_rate": 4.560718469347911e-05, + "loss": 1.4576, + "step": 832500 + }, + { + "epoch": 8.79, + "learning_rate": 4.5604546365967686e-05, + "loss": 1.4825, + "step": 833000 + }, + { + "epoch": 8.8, + "learning_rate": 4.560190803845627e-05, + "loss": 1.4553, + "step": 833500 + }, + { + "epoch": 8.8, + "learning_rate": 4.5599269710944843e-05, + "loss": 1.5057, + "step": 834000 + }, + { + "epoch": 8.81, + "learning_rate": 4.559663138343341e-05, + "loss": 1.521, + "step": 834500 + }, + { + "epoch": 8.81, + "learning_rate": 4.559399305592199e-05, + "loss": 1.4966, + "step": 835000 + }, + { + "epoch": 8.82, + "learning_rate": 4.559135472841057e-05, + "loss": 1.4842, + "step": 835500 + }, + { + "epoch": 8.82, + "learning_rate": 4.5588716400899145e-05, + "loss": 1.5414, + "step": 836000 + }, + { + "epoch": 8.83, + "learning_rate": 4.558607807338772e-05, + "loss": 1.5532, + "step": 836500 + }, + { + "epoch": 8.83, + "learning_rate": 4.5583439745876296e-05, + "loss": 1.457, + "step": 837000 + }, + { + "epoch": 8.84, + "learning_rate": 4.558080141836487e-05, + "loss": 1.5209, + "step": 837500 + }, + { + "epoch": 8.84, + "learning_rate": 4.557816309085345e-05, + "loss": 1.5159, + "step": 838000 + }, + { + "epoch": 8.85, + "learning_rate": 4.557552476334202e-05, + "loss": 1.448, + "step": 838500 + }, + { + "epoch": 8.85, + "learning_rate": 4.5572886435830605e-05, + "loss": 1.5197, + "step": 839000 + }, + { + "epoch": 8.86, + "learning_rate": 4.5570248108319174e-05, + "loss": 1.5156, + "step": 839500 + }, + { + "epoch": 8.86, + "learning_rate": 4.556760978080775e-05, + "loss": 1.4897, + "step": 840000 + }, + { + "epoch": 8.87, + "learning_rate": 4.556497145329633e-05, + "loss": 1.5486, + "step": 840500 + }, + { + "epoch": 8.88, + "learning_rate": 4.556233312578491e-05, + "loss": 1.5172, + "step": 841000 + }, + { + "epoch": 8.88, + "learning_rate": 4.555969479827348e-05, + "loss": 1.4715, + "step": 841500 + }, + { + "epoch": 8.89, + "learning_rate": 4.555705647076206e-05, + "loss": 1.5004, + "step": 842000 + }, + { + "epoch": 8.89, + "learning_rate": 4.5554418143250634e-05, + "loss": 1.513, + "step": 842500 + }, + { + "epoch": 8.9, + "learning_rate": 4.555177981573921e-05, + "loss": 1.5132, + "step": 843000 + }, + { + "epoch": 8.9, + "learning_rate": 4.5549141488227785e-05, + "loss": 1.5247, + "step": 843500 + }, + { + "epoch": 8.91, + "learning_rate": 4.554650316071636e-05, + "loss": 1.4759, + "step": 844000 + }, + { + "epoch": 8.91, + "learning_rate": 4.5543864833204935e-05, + "loss": 1.4496, + "step": 844500 + }, + { + "epoch": 8.92, + "learning_rate": 4.554122650569351e-05, + "loss": 1.5219, + "step": 845000 + }, + { + "epoch": 8.92, + "learning_rate": 4.553858817818209e-05, + "loss": 1.4615, + "step": 845500 + }, + { + "epoch": 8.93, + "learning_rate": 4.553594985067067e-05, + "loss": 1.5516, + "step": 846000 + }, + { + "epoch": 8.93, + "learning_rate": 4.553331152315924e-05, + "loss": 1.5273, + "step": 846500 + }, + { + "epoch": 8.94, + "learning_rate": 4.553067319564781e-05, + "loss": 1.4877, + "step": 847000 + }, + { + "epoch": 8.94, + "learning_rate": 4.5528034868136395e-05, + "loss": 1.4884, + "step": 847500 + }, + { + "epoch": 8.95, + "learning_rate": 4.552539654062497e-05, + "loss": 1.4771, + "step": 848000 + }, + { + "epoch": 8.95, + "learning_rate": 4.5522758213113546e-05, + "loss": 1.5481, + "step": 848500 + }, + { + "epoch": 8.96, + "learning_rate": 4.552011988560212e-05, + "loss": 1.5059, + "step": 849000 + }, + { + "epoch": 8.97, + "learning_rate": 4.55174815580907e-05, + "loss": 1.4753, + "step": 849500 + }, + { + "epoch": 8.97, + "learning_rate": 4.551484323057927e-05, + "loss": 1.4916, + "step": 850000 + }, + { + "epoch": 8.98, + "learning_rate": 4.551220490306785e-05, + "loss": 1.5208, + "step": 850500 + }, + { + "epoch": 8.98, + "learning_rate": 4.550956657555643e-05, + "loss": 1.4954, + "step": 851000 + }, + { + "epoch": 8.99, + "learning_rate": 4.5506928248045e-05, + "loss": 1.5149, + "step": 851500 + }, + { + "epoch": 8.99, + "learning_rate": 4.5504289920533575e-05, + "loss": 1.5112, + "step": 852000 + }, + { + "epoch": 9.0, + "learning_rate": 4.550165159302216e-05, + "loss": 1.5326, + "step": 852500 + }, + { + "epoch": 9.0, + "learning_rate": 4.549901326551073e-05, + "loss": 1.4933, + "step": 853000 + }, + { + "epoch": 9.01, + "learning_rate": 4.54963749379993e-05, + "loss": 1.487, + "step": 853500 + }, + { + "epoch": 9.01, + "learning_rate": 4.549373661048788e-05, + "loss": 1.5004, + "step": 854000 + }, + { + "epoch": 9.02, + "learning_rate": 4.549109828297646e-05, + "loss": 1.5143, + "step": 854500 + }, + { + "epoch": 9.02, + "learning_rate": 4.5488459955465034e-05, + "loss": 1.5013, + "step": 855000 + }, + { + "epoch": 9.03, + "learning_rate": 4.548582162795361e-05, + "loss": 1.4674, + "step": 855500 + }, + { + "epoch": 9.03, + "learning_rate": 4.5483183300442185e-05, + "loss": 1.4734, + "step": 856000 + }, + { + "epoch": 9.04, + "learning_rate": 4.548054497293076e-05, + "loss": 1.4547, + "step": 856500 + }, + { + "epoch": 9.04, + "learning_rate": 4.5477906645419336e-05, + "loss": 1.5009, + "step": 857000 + }, + { + "epoch": 9.05, + "learning_rate": 4.547526831790792e-05, + "loss": 1.5013, + "step": 857500 + }, + { + "epoch": 9.05, + "learning_rate": 4.5472629990396494e-05, + "loss": 1.5221, + "step": 858000 + }, + { + "epoch": 9.06, + "learning_rate": 4.546999166288506e-05, + "loss": 1.5439, + "step": 858500 + }, + { + "epoch": 9.07, + "learning_rate": 4.546735333537364e-05, + "loss": 1.4816, + "step": 859000 + }, + { + "epoch": 9.07, + "learning_rate": 4.546471500786222e-05, + "loss": 1.4365, + "step": 859500 + }, + { + "epoch": 9.08, + "learning_rate": 4.5462076680350796e-05, + "loss": 1.476, + "step": 860000 + }, + { + "epoch": 9.08, + "learning_rate": 4.5459438352839365e-05, + "loss": 1.5484, + "step": 860500 + }, + { + "epoch": 9.09, + "learning_rate": 4.545680002532795e-05, + "loss": 1.5225, + "step": 861000 + }, + { + "epoch": 9.09, + "learning_rate": 4.545416169781652e-05, + "loss": 1.5144, + "step": 861500 + }, + { + "epoch": 9.1, + "learning_rate": 4.54515233703051e-05, + "loss": 1.4876, + "step": 862000 + }, + { + "epoch": 9.1, + "learning_rate": 4.544888504279367e-05, + "loss": 1.488, + "step": 862500 + }, + { + "epoch": 9.11, + "learning_rate": 4.544624671528225e-05, + "loss": 1.5019, + "step": 863000 + }, + { + "epoch": 9.11, + "learning_rate": 4.5443608387770824e-05, + "loss": 1.4586, + "step": 863500 + }, + { + "epoch": 9.12, + "learning_rate": 4.54409700602594e-05, + "loss": 1.4882, + "step": 864000 + }, + { + "epoch": 9.12, + "learning_rate": 4.543833173274798e-05, + "loss": 1.4721, + "step": 864500 + }, + { + "epoch": 9.13, + "learning_rate": 4.543569340523656e-05, + "loss": 1.5136, + "step": 865000 + }, + { + "epoch": 9.13, + "learning_rate": 4.5433055077725126e-05, + "loss": 1.4903, + "step": 865500 + }, + { + "epoch": 9.14, + "learning_rate": 4.543041675021371e-05, + "loss": 1.5404, + "step": 866000 + }, + { + "epoch": 9.14, + "learning_rate": 4.5427778422702284e-05, + "loss": 1.4911, + "step": 866500 + }, + { + "epoch": 9.15, + "learning_rate": 4.542514009519086e-05, + "loss": 1.4516, + "step": 867000 + }, + { + "epoch": 9.15, + "learning_rate": 4.5422501767679435e-05, + "loss": 1.4905, + "step": 867500 + }, + { + "epoch": 9.16, + "learning_rate": 4.541986344016801e-05, + "loss": 1.4924, + "step": 868000 + }, + { + "epoch": 9.17, + "learning_rate": 4.5417225112656586e-05, + "loss": 1.5063, + "step": 868500 + }, + { + "epoch": 9.17, + "learning_rate": 4.541458678514516e-05, + "loss": 1.4862, + "step": 869000 + }, + { + "epoch": 9.18, + "learning_rate": 4.5411948457633744e-05, + "loss": 1.4828, + "step": 869500 + }, + { + "epoch": 9.18, + "learning_rate": 4.540931013012231e-05, + "loss": 1.4989, + "step": 870000 + }, + { + "epoch": 9.19, + "learning_rate": 4.540667180261089e-05, + "loss": 1.533, + "step": 870500 + }, + { + "epoch": 9.19, + "learning_rate": 4.5404033475099463e-05, + "loss": 1.5392, + "step": 871000 + }, + { + "epoch": 9.2, + "learning_rate": 4.5401395147588046e-05, + "loss": 1.5212, + "step": 871500 + }, + { + "epoch": 9.2, + "learning_rate": 4.539875682007662e-05, + "loss": 1.5228, + "step": 872000 + }, + { + "epoch": 9.21, + "learning_rate": 4.539611849256519e-05, + "loss": 1.5116, + "step": 872500 + }, + { + "epoch": 9.21, + "learning_rate": 4.539348016505377e-05, + "loss": 1.519, + "step": 873000 + }, + { + "epoch": 9.22, + "learning_rate": 4.539084183754235e-05, + "loss": 1.5352, + "step": 873500 + }, + { + "epoch": 9.22, + "learning_rate": 4.538820351003092e-05, + "loss": 1.5112, + "step": 874000 + }, + { + "epoch": 9.23, + "learning_rate": 4.53855651825195e-05, + "loss": 1.5095, + "step": 874500 + }, + { + "epoch": 9.23, + "learning_rate": 4.5382926855008074e-05, + "loss": 1.4485, + "step": 875000 + }, + { + "epoch": 9.24, + "learning_rate": 4.538028852749665e-05, + "loss": 1.4753, + "step": 875500 + }, + { + "epoch": 9.24, + "learning_rate": 4.5377650199985225e-05, + "loss": 1.5099, + "step": 876000 + }, + { + "epoch": 9.25, + "learning_rate": 4.537501187247381e-05, + "loss": 1.5292, + "step": 876500 + }, + { + "epoch": 9.26, + "learning_rate": 4.537237354496238e-05, + "loss": 1.4996, + "step": 877000 + }, + { + "epoch": 9.26, + "learning_rate": 4.536973521745095e-05, + "loss": 1.525, + "step": 877500 + }, + { + "epoch": 9.27, + "learning_rate": 4.5367096889939534e-05, + "loss": 1.5394, + "step": 878000 + }, + { + "epoch": 9.27, + "learning_rate": 4.536445856242811e-05, + "loss": 1.5309, + "step": 878500 + }, + { + "epoch": 9.28, + "learning_rate": 4.5361820234916685e-05, + "loss": 1.4884, + "step": 879000 + }, + { + "epoch": 9.28, + "learning_rate": 4.535918190740526e-05, + "loss": 1.512, + "step": 879500 + }, + { + "epoch": 9.29, + "learning_rate": 4.5356543579893836e-05, + "loss": 1.4659, + "step": 880000 + }, + { + "epoch": 9.29, + "learning_rate": 4.535390525238241e-05, + "loss": 1.4314, + "step": 880500 + }, + { + "epoch": 9.3, + "learning_rate": 4.535126692487099e-05, + "loss": 1.5154, + "step": 881000 + }, + { + "epoch": 9.3, + "learning_rate": 4.534862859735957e-05, + "loss": 1.4589, + "step": 881500 + }, + { + "epoch": 9.31, + "learning_rate": 4.534599026984814e-05, + "loss": 1.4929, + "step": 882000 + }, + { + "epoch": 9.31, + "learning_rate": 4.534335194233671e-05, + "loss": 1.4657, + "step": 882500 + }, + { + "epoch": 9.32, + "learning_rate": 4.534071361482529e-05, + "loss": 1.5184, + "step": 883000 + }, + { + "epoch": 9.32, + "learning_rate": 4.533807528731387e-05, + "loss": 1.5129, + "step": 883500 + }, + { + "epoch": 9.33, + "learning_rate": 4.5335436959802446e-05, + "loss": 1.4943, + "step": 884000 + }, + { + "epoch": 9.33, + "learning_rate": 4.5332798632291015e-05, + "loss": 1.5047, + "step": 884500 + }, + { + "epoch": 9.34, + "learning_rate": 4.53301603047796e-05, + "loss": 1.5267, + "step": 885000 + }, + { + "epoch": 9.34, + "learning_rate": 4.532752197726817e-05, + "loss": 1.4905, + "step": 885500 + }, + { + "epoch": 9.35, + "learning_rate": 4.532488364975675e-05, + "loss": 1.501, + "step": 886000 + }, + { + "epoch": 9.36, + "learning_rate": 4.5322245322245324e-05, + "loss": 1.4997, + "step": 886500 + }, + { + "epoch": 9.36, + "learning_rate": 4.53196069947339e-05, + "loss": 1.4772, + "step": 887000 + }, + { + "epoch": 9.37, + "learning_rate": 4.5316968667222475e-05, + "loss": 1.4075, + "step": 887500 + }, + { + "epoch": 9.37, + "learning_rate": 4.531433033971105e-05, + "loss": 1.4817, + "step": 888000 + }, + { + "epoch": 9.38, + "learning_rate": 4.531169201219963e-05, + "loss": 1.5309, + "step": 888500 + }, + { + "epoch": 9.38, + "learning_rate": 4.53090536846882e-05, + "loss": 1.4818, + "step": 889000 + }, + { + "epoch": 9.39, + "learning_rate": 4.530641535717678e-05, + "loss": 1.5279, + "step": 889500 + }, + { + "epoch": 9.39, + "learning_rate": 4.530377702966536e-05, + "loss": 1.4995, + "step": 890000 + }, + { + "epoch": 9.4, + "learning_rate": 4.5301138702153935e-05, + "loss": 1.5277, + "step": 890500 + }, + { + "epoch": 9.4, + "learning_rate": 4.529850037464251e-05, + "loss": 1.4601, + "step": 891000 + }, + { + "epoch": 9.41, + "learning_rate": 4.5295862047131085e-05, + "loss": 1.4825, + "step": 891500 + }, + { + "epoch": 9.41, + "learning_rate": 4.529322371961966e-05, + "loss": 1.5565, + "step": 892000 + }, + { + "epoch": 9.42, + "learning_rate": 4.5290585392108236e-05, + "loss": 1.4455, + "step": 892500 + }, + { + "epoch": 9.42, + "learning_rate": 4.528794706459681e-05, + "loss": 1.5124, + "step": 893000 + }, + { + "epoch": 9.43, + "learning_rate": 4.5285308737085394e-05, + "loss": 1.4756, + "step": 893500 + }, + { + "epoch": 9.43, + "learning_rate": 4.528267040957396e-05, + "loss": 1.4438, + "step": 894000 + }, + { + "epoch": 9.44, + "learning_rate": 4.528003208206254e-05, + "loss": 1.4551, + "step": 894500 + }, + { + "epoch": 9.45, + "learning_rate": 4.527739375455112e-05, + "loss": 1.4604, + "step": 895000 + }, + { + "epoch": 9.45, + "learning_rate": 4.5274755427039696e-05, + "loss": 1.4646, + "step": 895500 + }, + { + "epoch": 9.46, + "learning_rate": 4.527211709952827e-05, + "loss": 1.4837, + "step": 896000 + }, + { + "epoch": 9.46, + "learning_rate": 4.526947877201684e-05, + "loss": 1.4403, + "step": 896500 + }, + { + "epoch": 9.47, + "learning_rate": 4.526684044450542e-05, + "loss": 1.4495, + "step": 897000 + }, + { + "epoch": 9.47, + "learning_rate": 4.5264202116994e-05, + "loss": 1.4795, + "step": 897500 + }, + { + "epoch": 9.48, + "learning_rate": 4.5261563789482574e-05, + "loss": 1.4648, + "step": 898000 + }, + { + "epoch": 9.48, + "learning_rate": 4.525892546197115e-05, + "loss": 1.4478, + "step": 898500 + }, + { + "epoch": 9.49, + "learning_rate": 4.5256287134459725e-05, + "loss": 1.446, + "step": 899000 + }, + { + "epoch": 9.49, + "learning_rate": 4.52536488069483e-05, + "loss": 1.4738, + "step": 899500 + }, + { + "epoch": 9.5, + "learning_rate": 4.5251010479436876e-05, + "loss": 1.4616, + "step": 900000 + }, + { + "epoch": 9.5, + "learning_rate": 4.524837215192546e-05, + "loss": 1.5362, + "step": 900500 + }, + { + "epoch": 9.51, + "learning_rate": 4.5245733824414027e-05, + "loss": 1.4643, + "step": 901000 + }, + { + "epoch": 9.51, + "learning_rate": 4.52430954969026e-05, + "loss": 1.5177, + "step": 901500 + }, + { + "epoch": 9.52, + "learning_rate": 4.5240457169391184e-05, + "loss": 1.5416, + "step": 902000 + }, + { + "epoch": 9.52, + "learning_rate": 4.523781884187976e-05, + "loss": 1.4862, + "step": 902500 + }, + { + "epoch": 9.53, + "learning_rate": 4.5235180514368335e-05, + "loss": 1.5007, + "step": 903000 + }, + { + "epoch": 9.53, + "learning_rate": 4.523254218685691e-05, + "loss": 1.5211, + "step": 903500 + }, + { + "epoch": 9.54, + "learning_rate": 4.5229903859345486e-05, + "loss": 1.5028, + "step": 904000 + }, + { + "epoch": 9.55, + "learning_rate": 4.522726553183406e-05, + "loss": 1.4976, + "step": 904500 + }, + { + "epoch": 9.55, + "learning_rate": 4.522462720432264e-05, + "loss": 1.4339, + "step": 905000 + }, + { + "epoch": 9.56, + "learning_rate": 4.522198887681122e-05, + "loss": 1.4719, + "step": 905500 + }, + { + "epoch": 9.56, + "learning_rate": 4.521935054929979e-05, + "loss": 1.5127, + "step": 906000 + }, + { + "epoch": 9.57, + "learning_rate": 4.5216712221788364e-05, + "loss": 1.5213, + "step": 906500 + }, + { + "epoch": 9.57, + "learning_rate": 4.5214073894276946e-05, + "loss": 1.4638, + "step": 907000 + }, + { + "epoch": 9.58, + "learning_rate": 4.521143556676552e-05, + "loss": 1.4422, + "step": 907500 + }, + { + "epoch": 9.58, + "learning_rate": 4.520879723925409e-05, + "loss": 1.4453, + "step": 908000 + }, + { + "epoch": 9.59, + "learning_rate": 4.5206158911742666e-05, + "loss": 1.475, + "step": 908500 + }, + { + "epoch": 9.59, + "learning_rate": 4.520352058423125e-05, + "loss": 1.4721, + "step": 909000 + }, + { + "epoch": 9.6, + "learning_rate": 4.520088225671982e-05, + "loss": 1.51, + "step": 909500 + }, + { + "epoch": 9.6, + "learning_rate": 4.51982439292084e-05, + "loss": 1.4479, + "step": 910000 + }, + { + "epoch": 9.61, + "learning_rate": 4.5195605601696974e-05, + "loss": 1.5073, + "step": 910500 + }, + { + "epoch": 9.61, + "learning_rate": 4.519296727418555e-05, + "loss": 1.491, + "step": 911000 + }, + { + "epoch": 9.62, + "learning_rate": 4.5190328946674125e-05, + "loss": 1.5191, + "step": 911500 + }, + { + "epoch": 9.62, + "learning_rate": 4.51876906191627e-05, + "loss": 1.5183, + "step": 912000 + }, + { + "epoch": 9.63, + "learning_rate": 4.518505229165128e-05, + "loss": 1.5053, + "step": 912500 + }, + { + "epoch": 9.64, + "learning_rate": 4.518241396413985e-05, + "loss": 1.51, + "step": 913000 + }, + { + "epoch": 9.64, + "learning_rate": 4.517977563662843e-05, + "loss": 1.4287, + "step": 913500 + }, + { + "epoch": 9.65, + "learning_rate": 4.517713730911701e-05, + "loss": 1.5434, + "step": 914000 + }, + { + "epoch": 9.65, + "learning_rate": 4.5174498981605585e-05, + "loss": 1.4698, + "step": 914500 + }, + { + "epoch": 9.66, + "learning_rate": 4.517186065409416e-05, + "loss": 1.5364, + "step": 915000 + }, + { + "epoch": 9.66, + "learning_rate": 4.5169222326582736e-05, + "loss": 1.4931, + "step": 915500 + }, + { + "epoch": 9.67, + "learning_rate": 4.516658399907131e-05, + "loss": 1.4778, + "step": 916000 + }, + { + "epoch": 9.67, + "learning_rate": 4.516394567155989e-05, + "loss": 1.5369, + "step": 916500 + }, + { + "epoch": 9.68, + "learning_rate": 4.516130734404846e-05, + "loss": 1.5657, + "step": 917000 + }, + { + "epoch": 9.68, + "learning_rate": 4.515866901653704e-05, + "loss": 1.5032, + "step": 917500 + }, + { + "epoch": 9.69, + "learning_rate": 4.5156030689025613e-05, + "loss": 1.4626, + "step": 918000 + }, + { + "epoch": 9.69, + "learning_rate": 4.515339236151419e-05, + "loss": 1.4944, + "step": 918500 + }, + { + "epoch": 9.7, + "learning_rate": 4.515075403400277e-05, + "loss": 1.5235, + "step": 919000 + }, + { + "epoch": 9.7, + "learning_rate": 4.514811570649135e-05, + "loss": 1.4774, + "step": 919500 + }, + { + "epoch": 9.71, + "learning_rate": 4.5145477378979915e-05, + "loss": 1.5623, + "step": 920000 + }, + { + "epoch": 9.71, + "learning_rate": 4.514283905146849e-05, + "loss": 1.4622, + "step": 920500 + }, + { + "epoch": 9.72, + "learning_rate": 4.514020072395707e-05, + "loss": 1.4604, + "step": 921000 + }, + { + "epoch": 9.72, + "learning_rate": 4.513756239644565e-05, + "loss": 1.4849, + "step": 921500 + }, + { + "epoch": 9.73, + "learning_rate": 4.5134924068934224e-05, + "loss": 1.4958, + "step": 922000 + }, + { + "epoch": 9.74, + "learning_rate": 4.51322857414228e-05, + "loss": 1.4507, + "step": 922500 + }, + { + "epoch": 9.74, + "learning_rate": 4.5129647413911375e-05, + "loss": 1.4955, + "step": 923000 + }, + { + "epoch": 9.75, + "learning_rate": 4.512700908639995e-05, + "loss": 1.4541, + "step": 923500 + }, + { + "epoch": 9.75, + "learning_rate": 4.5124370758888526e-05, + "loss": 1.4939, + "step": 924000 + }, + { + "epoch": 9.76, + "learning_rate": 4.512173243137711e-05, + "loss": 1.4765, + "step": 924500 + }, + { + "epoch": 9.76, + "learning_rate": 4.511909410386568e-05, + "loss": 1.5421, + "step": 925000 + }, + { + "epoch": 9.77, + "learning_rate": 4.511645577635425e-05, + "loss": 1.4423, + "step": 925500 + }, + { + "epoch": 9.77, + "learning_rate": 4.5113817448842835e-05, + "loss": 1.4601, + "step": 926000 + }, + { + "epoch": 9.78, + "learning_rate": 4.511117912133141e-05, + "loss": 1.4704, + "step": 926500 + }, + { + "epoch": 9.78, + "learning_rate": 4.510854079381998e-05, + "loss": 1.5334, + "step": 927000 + }, + { + "epoch": 9.79, + "learning_rate": 4.510590246630856e-05, + "loss": 1.4531, + "step": 927500 + }, + { + "epoch": 9.79, + "learning_rate": 4.510326413879714e-05, + "loss": 1.5642, + "step": 928000 + }, + { + "epoch": 9.8, + "learning_rate": 4.510062581128571e-05, + "loss": 1.4868, + "step": 928500 + }, + { + "epoch": 9.8, + "learning_rate": 4.509798748377429e-05, + "loss": 1.4767, + "step": 929000 + }, + { + "epoch": 9.81, + "learning_rate": 4.509534915626286e-05, + "loss": 1.5022, + "step": 929500 + }, + { + "epoch": 9.81, + "learning_rate": 4.509271082875144e-05, + "loss": 1.4662, + "step": 930000 + }, + { + "epoch": 9.82, + "learning_rate": 4.5090072501240014e-05, + "loss": 1.5083, + "step": 930500 + }, + { + "epoch": 9.83, + "learning_rate": 4.5087434173728596e-05, + "loss": 1.5323, + "step": 931000 + }, + { + "epoch": 9.83, + "learning_rate": 4.508479584621717e-05, + "loss": 1.5259, + "step": 931500 + }, + { + "epoch": 9.84, + "learning_rate": 4.508215751870574e-05, + "loss": 1.4552, + "step": 932000 + }, + { + "epoch": 9.84, + "learning_rate": 4.5079519191194316e-05, + "loss": 1.5096, + "step": 932500 + }, + { + "epoch": 9.85, + "learning_rate": 4.50768808636829e-05, + "loss": 1.5349, + "step": 933000 + }, + { + "epoch": 9.85, + "learning_rate": 4.5074242536171474e-05, + "loss": 1.5324, + "step": 933500 + }, + { + "epoch": 9.86, + "learning_rate": 4.507160420866005e-05, + "loss": 1.4674, + "step": 934000 + }, + { + "epoch": 9.86, + "learning_rate": 4.5068965881148625e-05, + "loss": 1.5186, + "step": 934500 + }, + { + "epoch": 9.87, + "learning_rate": 4.50663275536372e-05, + "loss": 1.4904, + "step": 935000 + }, + { + "epoch": 9.87, + "learning_rate": 4.5063689226125776e-05, + "loss": 1.4751, + "step": 935500 + }, + { + "epoch": 9.88, + "learning_rate": 4.506105089861435e-05, + "loss": 1.4166, + "step": 936000 + }, + { + "epoch": 9.88, + "learning_rate": 4.505841257110293e-05, + "loss": 1.4835, + "step": 936500 + }, + { + "epoch": 9.89, + "learning_rate": 4.50557742435915e-05, + "loss": 1.4622, + "step": 937000 + }, + { + "epoch": 9.89, + "learning_rate": 4.505313591608008e-05, + "loss": 1.4672, + "step": 937500 + }, + { + "epoch": 9.9, + "learning_rate": 4.505049758856866e-05, + "loss": 1.4824, + "step": 938000 + }, + { + "epoch": 9.9, + "learning_rate": 4.5047859261057235e-05, + "loss": 1.4678, + "step": 938500 + }, + { + "epoch": 9.91, + "learning_rate": 4.5045220933545804e-05, + "loss": 1.4639, + "step": 939000 + }, + { + "epoch": 9.91, + "learning_rate": 4.5042582606034386e-05, + "loss": 1.4757, + "step": 939500 + }, + { + "epoch": 9.92, + "learning_rate": 4.503994427852296e-05, + "loss": 1.4152, + "step": 940000 + }, + { + "epoch": 9.93, + "learning_rate": 4.503730595101154e-05, + "loss": 1.5075, + "step": 940500 + }, + { + "epoch": 9.93, + "learning_rate": 4.503466762350011e-05, + "loss": 1.4309, + "step": 941000 + }, + { + "epoch": 9.94, + "learning_rate": 4.503202929598869e-05, + "loss": 1.5002, + "step": 941500 + }, + { + "epoch": 9.94, + "learning_rate": 4.5029390968477264e-05, + "loss": 1.4672, + "step": 942000 + }, + { + "epoch": 9.95, + "learning_rate": 4.502675264096584e-05, + "loss": 1.4766, + "step": 942500 + }, + { + "epoch": 9.95, + "learning_rate": 4.502411431345442e-05, + "loss": 1.4786, + "step": 943000 + }, + { + "epoch": 9.96, + "learning_rate": 4.5021475985943e-05, + "loss": 1.509, + "step": 943500 + }, + { + "epoch": 9.96, + "learning_rate": 4.5018837658431566e-05, + "loss": 1.4909, + "step": 944000 + }, + { + "epoch": 9.97, + "learning_rate": 4.501619933092014e-05, + "loss": 1.4568, + "step": 944500 + }, + { + "epoch": 9.97, + "learning_rate": 4.5013561003408724e-05, + "loss": 1.4634, + "step": 945000 + }, + { + "epoch": 9.98, + "learning_rate": 4.50109226758973e-05, + "loss": 1.498, + "step": 945500 + }, + { + "epoch": 9.98, + "learning_rate": 4.500828434838587e-05, + "loss": 1.4812, + "step": 946000 + }, + { + "epoch": 9.99, + "learning_rate": 4.500564602087445e-05, + "loss": 1.5238, + "step": 946500 + }, + { + "epoch": 9.99, + "learning_rate": 4.5003007693363026e-05, + "loss": 1.4968, + "step": 947000 + }, + { + "epoch": 10.0, + "learning_rate": 4.50003693658516e-05, + "loss": 1.5074, + "step": 947500 + }, + { + "epoch": 10.0, + "learning_rate": 4.4997731038340177e-05, + "loss": 1.5028, + "step": 948000 + }, + { + "epoch": 10.01, + "learning_rate": 4.499509271082875e-05, + "loss": 1.4614, + "step": 948500 + }, + { + "epoch": 10.02, + "learning_rate": 4.499245438331733e-05, + "loss": 1.5029, + "step": 949000 + }, + { + "epoch": 10.02, + "learning_rate": 4.49898160558059e-05, + "loss": 1.5074, + "step": 949500 + }, + { + "epoch": 10.03, + "learning_rate": 4.4987177728294485e-05, + "loss": 1.519, + "step": 950000 + }, + { + "epoch": 10.03, + "learning_rate": 4.498453940078306e-05, + "loss": 1.4205, + "step": 950500 + }, + { + "epoch": 10.04, + "learning_rate": 4.498190107327163e-05, + "loss": 1.4628, + "step": 951000 + }, + { + "epoch": 10.04, + "learning_rate": 4.497926274576021e-05, + "loss": 1.4497, + "step": 951500 + }, + { + "epoch": 10.05, + "learning_rate": 4.497662441824879e-05, + "loss": 1.5096, + "step": 952000 + }, + { + "epoch": 10.05, + "learning_rate": 4.497398609073736e-05, + "loss": 1.4989, + "step": 952500 + }, + { + "epoch": 10.06, + "learning_rate": 4.497134776322594e-05, + "loss": 1.4959, + "step": 953000 + }, + { + "epoch": 10.06, + "learning_rate": 4.4968709435714514e-05, + "loss": 1.4446, + "step": 953500 + }, + { + "epoch": 10.07, + "learning_rate": 4.496607110820309e-05, + "loss": 1.4954, + "step": 954000 + }, + { + "epoch": 10.07, + "learning_rate": 4.4963432780691665e-05, + "loss": 1.4781, + "step": 954500 + }, + { + "epoch": 10.08, + "learning_rate": 4.496079445318025e-05, + "loss": 1.4616, + "step": 955000 + }, + { + "epoch": 10.08, + "learning_rate": 4.4958156125668816e-05, + "loss": 1.4417, + "step": 955500 + }, + { + "epoch": 10.09, + "learning_rate": 4.495551779815739e-05, + "loss": 1.5022, + "step": 956000 + }, + { + "epoch": 10.09, + "learning_rate": 4.4952879470645967e-05, + "loss": 1.4272, + "step": 956500 + }, + { + "epoch": 10.1, + "learning_rate": 4.495024114313455e-05, + "loss": 1.4717, + "step": 957000 + }, + { + "epoch": 10.1, + "learning_rate": 4.4947602815623124e-05, + "loss": 1.5145, + "step": 957500 + }, + { + "epoch": 10.11, + "learning_rate": 4.494496448811169e-05, + "loss": 1.4569, + "step": 958000 + }, + { + "epoch": 10.12, + "learning_rate": 4.4942326160600275e-05, + "loss": 1.5077, + "step": 958500 + }, + { + "epoch": 10.12, + "learning_rate": 4.493968783308885e-05, + "loss": 1.4501, + "step": 959000 + }, + { + "epoch": 10.13, + "learning_rate": 4.4937049505577426e-05, + "loss": 1.4879, + "step": 959500 + }, + { + "epoch": 10.13, + "learning_rate": 4.4934411178066e-05, + "loss": 1.5376, + "step": 960000 + }, + { + "epoch": 10.14, + "learning_rate": 4.493177285055458e-05, + "loss": 1.473, + "step": 960500 + }, + { + "epoch": 10.14, + "learning_rate": 4.492913452304315e-05, + "loss": 1.5191, + "step": 961000 + }, + { + "epoch": 10.15, + "learning_rate": 4.492649619553173e-05, + "loss": 1.4843, + "step": 961500 + }, + { + "epoch": 10.15, + "learning_rate": 4.492385786802031e-05, + "loss": 1.4281, + "step": 962000 + }, + { + "epoch": 10.16, + "learning_rate": 4.4921219540508886e-05, + "loss": 1.4868, + "step": 962500 + }, + { + "epoch": 10.16, + "learning_rate": 4.4918581212997455e-05, + "loss": 1.4922, + "step": 963000 + }, + { + "epoch": 10.17, + "learning_rate": 4.491594288548604e-05, + "loss": 1.4366, + "step": 963500 + }, + { + "epoch": 10.17, + "learning_rate": 4.491330455797461e-05, + "loss": 1.4608, + "step": 964000 + }, + { + "epoch": 10.18, + "learning_rate": 4.491066623046319e-05, + "loss": 1.4923, + "step": 964500 + }, + { + "epoch": 10.18, + "learning_rate": 4.4908027902951763e-05, + "loss": 1.4819, + "step": 965000 + }, + { + "epoch": 10.19, + "learning_rate": 4.490538957544034e-05, + "loss": 1.3943, + "step": 965500 + }, + { + "epoch": 10.19, + "learning_rate": 4.4902751247928914e-05, + "loss": 1.4837, + "step": 966000 + }, + { + "epoch": 10.2, + "learning_rate": 4.490011292041749e-05, + "loss": 1.5072, + "step": 966500 + }, + { + "epoch": 10.21, + "learning_rate": 4.489747459290607e-05, + "loss": 1.4958, + "step": 967000 + }, + { + "epoch": 10.21, + "learning_rate": 4.489483626539464e-05, + "loss": 1.4854, + "step": 967500 + }, + { + "epoch": 10.22, + "learning_rate": 4.4892197937883216e-05, + "loss": 1.4557, + "step": 968000 + }, + { + "epoch": 10.22, + "learning_rate": 4.48895596103718e-05, + "loss": 1.4132, + "step": 968500 + }, + { + "epoch": 10.23, + "learning_rate": 4.4886921282860374e-05, + "loss": 1.4359, + "step": 969000 + }, + { + "epoch": 10.23, + "learning_rate": 4.488428295534895e-05, + "loss": 1.4939, + "step": 969500 + }, + { + "epoch": 10.24, + "learning_rate": 4.488164462783752e-05, + "loss": 1.5083, + "step": 970000 + }, + { + "epoch": 10.24, + "learning_rate": 4.48790063003261e-05, + "loss": 1.466, + "step": 970500 + }, + { + "epoch": 10.25, + "learning_rate": 4.4876367972814676e-05, + "loss": 1.4175, + "step": 971000 + }, + { + "epoch": 10.25, + "learning_rate": 4.487372964530325e-05, + "loss": 1.4706, + "step": 971500 + }, + { + "epoch": 10.26, + "learning_rate": 4.487109131779183e-05, + "loss": 1.4759, + "step": 972000 + }, + { + "epoch": 10.26, + "learning_rate": 4.48684529902804e-05, + "loss": 1.4587, + "step": 972500 + }, + { + "epoch": 10.27, + "learning_rate": 4.486581466276898e-05, + "loss": 1.4934, + "step": 973000 + }, + { + "epoch": 10.27, + "learning_rate": 4.4863176335257553e-05, + "loss": 1.5089, + "step": 973500 + }, + { + "epoch": 10.28, + "learning_rate": 4.4860538007746136e-05, + "loss": 1.4623, + "step": 974000 + }, + { + "epoch": 10.28, + "learning_rate": 4.4857899680234704e-05, + "loss": 1.4125, + "step": 974500 + }, + { + "epoch": 10.29, + "learning_rate": 4.485526135272328e-05, + "loss": 1.4723, + "step": 975000 + }, + { + "epoch": 10.29, + "learning_rate": 4.485262302521186e-05, + "loss": 1.512, + "step": 975500 + }, + { + "epoch": 10.3, + "learning_rate": 4.484998469770044e-05, + "loss": 1.4719, + "step": 976000 + }, + { + "epoch": 10.31, + "learning_rate": 4.484734637018901e-05, + "loss": 1.5121, + "step": 976500 + }, + { + "epoch": 10.31, + "learning_rate": 4.484470804267759e-05, + "loss": 1.4502, + "step": 977000 + }, + { + "epoch": 10.32, + "learning_rate": 4.4842069715166164e-05, + "loss": 1.4706, + "step": 977500 + }, + { + "epoch": 10.32, + "learning_rate": 4.483943138765474e-05, + "loss": 1.4827, + "step": 978000 + }, + { + "epoch": 10.33, + "learning_rate": 4.4836793060143315e-05, + "loss": 1.451, + "step": 978500 + }, + { + "epoch": 10.33, + "learning_rate": 4.48341547326319e-05, + "loss": 1.4546, + "step": 979000 + }, + { + "epoch": 10.34, + "learning_rate": 4.4831516405120466e-05, + "loss": 1.4421, + "step": 979500 + }, + { + "epoch": 10.34, + "learning_rate": 4.482887807760904e-05, + "loss": 1.5474, + "step": 980000 + }, + { + "epoch": 10.35, + "learning_rate": 4.4826239750097624e-05, + "loss": 1.4536, + "step": 980500 + }, + { + "epoch": 10.35, + "learning_rate": 4.48236014225862e-05, + "loss": 1.4571, + "step": 981000 + }, + { + "epoch": 10.36, + "learning_rate": 4.4820963095074775e-05, + "loss": 1.5258, + "step": 981500 + }, + { + "epoch": 10.36, + "learning_rate": 4.4818324767563344e-05, + "loss": 1.4907, + "step": 982000 + }, + { + "epoch": 10.37, + "learning_rate": 4.4815686440051926e-05, + "loss": 1.5235, + "step": 982500 + }, + { + "epoch": 10.37, + "learning_rate": 4.48130481125405e-05, + "loss": 1.5357, + "step": 983000 + }, + { + "epoch": 10.38, + "learning_rate": 4.481040978502908e-05, + "loss": 1.4476, + "step": 983500 + }, + { + "epoch": 10.38, + "learning_rate": 4.480777145751765e-05, + "loss": 1.4688, + "step": 984000 + }, + { + "epoch": 10.39, + "learning_rate": 4.480513313000623e-05, + "loss": 1.4813, + "step": 984500 + }, + { + "epoch": 10.4, + "learning_rate": 4.48024948024948e-05, + "loss": 1.5097, + "step": 985000 + }, + { + "epoch": 10.4, + "learning_rate": 4.479985647498338e-05, + "loss": 1.4923, + "step": 985500 + }, + { + "epoch": 10.41, + "learning_rate": 4.479721814747196e-05, + "loss": 1.472, + "step": 986000 + }, + { + "epoch": 10.41, + "learning_rate": 4.479457981996053e-05, + "loss": 1.4826, + "step": 986500 + }, + { + "epoch": 10.42, + "learning_rate": 4.4791941492449105e-05, + "loss": 1.4857, + "step": 987000 + }, + { + "epoch": 10.42, + "learning_rate": 4.478930316493769e-05, + "loss": 1.4517, + "step": 987500 + }, + { + "epoch": 10.43, + "learning_rate": 4.478666483742626e-05, + "loss": 1.4957, + "step": 988000 + }, + { + "epoch": 10.43, + "learning_rate": 4.478402650991484e-05, + "loss": 1.4694, + "step": 988500 + }, + { + "epoch": 10.44, + "learning_rate": 4.4781388182403414e-05, + "loss": 1.4693, + "step": 989000 + }, + { + "epoch": 10.44, + "learning_rate": 4.477874985489199e-05, + "loss": 1.4784, + "step": 989500 + }, + { + "epoch": 10.45, + "learning_rate": 4.4776111527380565e-05, + "loss": 1.4867, + "step": 990000 + }, + { + "epoch": 10.45, + "learning_rate": 4.477347319986914e-05, + "loss": 1.4665, + "step": 990500 + }, + { + "epoch": 10.46, + "learning_rate": 4.477083487235772e-05, + "loss": 1.4847, + "step": 991000 + }, + { + "epoch": 10.46, + "learning_rate": 4.476819654484629e-05, + "loss": 1.4726, + "step": 991500 + }, + { + "epoch": 10.47, + "learning_rate": 4.476555821733487e-05, + "loss": 1.4515, + "step": 992000 + }, + { + "epoch": 10.47, + "learning_rate": 4.476291988982345e-05, + "loss": 1.4694, + "step": 992500 + }, + { + "epoch": 10.48, + "learning_rate": 4.4760281562312025e-05, + "loss": 1.4844, + "step": 993000 + }, + { + "epoch": 10.48, + "learning_rate": 4.475764323480059e-05, + "loss": 1.4635, + "step": 993500 + }, + { + "epoch": 10.49, + "learning_rate": 4.475500490728917e-05, + "loss": 1.4775, + "step": 994000 + }, + { + "epoch": 10.5, + "learning_rate": 4.475236657977775e-05, + "loss": 1.4765, + "step": 994500 + }, + { + "epoch": 10.5, + "learning_rate": 4.4749728252266327e-05, + "loss": 1.4752, + "step": 995000 + }, + { + "epoch": 10.51, + "learning_rate": 4.47470899247549e-05, + "loss": 1.5024, + "step": 995500 + }, + { + "epoch": 10.51, + "learning_rate": 4.474445159724348e-05, + "loss": 1.4487, + "step": 996000 + }, + { + "epoch": 10.52, + "learning_rate": 4.474181326973205e-05, + "loss": 1.4695, + "step": 996500 + }, + { + "epoch": 10.52, + "learning_rate": 4.473917494222063e-05, + "loss": 1.4563, + "step": 997000 + }, + { + "epoch": 10.53, + "learning_rate": 4.4736536614709204e-05, + "loss": 1.533, + "step": 997500 + }, + { + "epoch": 10.53, + "learning_rate": 4.4733898287197786e-05, + "loss": 1.4621, + "step": 998000 + }, + { + "epoch": 10.54, + "learning_rate": 4.4731259959686355e-05, + "loss": 1.4063, + "step": 998500 + }, + { + "epoch": 10.54, + "learning_rate": 4.472862163217493e-05, + "loss": 1.4481, + "step": 999000 + }, + { + "epoch": 10.55, + "learning_rate": 4.472598330466351e-05, + "loss": 1.4998, + "step": 999500 + }, + { + "epoch": 10.55, + "learning_rate": 4.472334497715209e-05, + "loss": 1.4728, + "step": 1000000 + }, + { + "epoch": 10.56, + "learning_rate": 4.4720706649640664e-05, + "loss": 1.4496, + "step": 1000500 + }, + { + "epoch": 10.56, + "learning_rate": 4.471806832212924e-05, + "loss": 1.5025, + "step": 1001000 + }, + { + "epoch": 10.57, + "learning_rate": 4.4715429994617815e-05, + "loss": 1.4753, + "step": 1001500 + }, + { + "epoch": 10.57, + "learning_rate": 4.471279166710639e-05, + "loss": 1.4395, + "step": 1002000 + }, + { + "epoch": 10.58, + "learning_rate": 4.4710153339594966e-05, + "loss": 1.4607, + "step": 1002500 + }, + { + "epoch": 10.58, + "learning_rate": 4.470751501208354e-05, + "loss": 1.5016, + "step": 1003000 + }, + { + "epoch": 10.59, + "learning_rate": 4.4704876684572117e-05, + "loss": 1.4951, + "step": 1003500 + }, + { + "epoch": 10.6, + "learning_rate": 4.470223835706069e-05, + "loss": 1.4987, + "step": 1004000 + }, + { + "epoch": 10.6, + "learning_rate": 4.4699600029549274e-05, + "loss": 1.4285, + "step": 1004500 + }, + { + "epoch": 10.61, + "learning_rate": 4.469696170203785e-05, + "loss": 1.4291, + "step": 1005000 + }, + { + "epoch": 10.61, + "learning_rate": 4.469432337452642e-05, + "loss": 1.4817, + "step": 1005500 + }, + { + "epoch": 10.62, + "learning_rate": 4.4691685047014994e-05, + "loss": 1.4571, + "step": 1006000 + }, + { + "epoch": 10.62, + "learning_rate": 4.4689046719503576e-05, + "loss": 1.46, + "step": 1006500 + }, + { + "epoch": 10.63, + "learning_rate": 4.468640839199215e-05, + "loss": 1.5045, + "step": 1007000 + }, + { + "epoch": 10.63, + "learning_rate": 4.468377006448073e-05, + "loss": 1.5132, + "step": 1007500 + }, + { + "epoch": 10.64, + "learning_rate": 4.46811317369693e-05, + "loss": 1.4437, + "step": 1008000 + }, + { + "epoch": 10.64, + "learning_rate": 4.467849340945788e-05, + "loss": 1.4863, + "step": 1008500 + }, + { + "epoch": 10.65, + "learning_rate": 4.4675855081946454e-05, + "loss": 1.4778, + "step": 1009000 + }, + { + "epoch": 10.65, + "learning_rate": 4.467321675443503e-05, + "loss": 1.4645, + "step": 1009500 + }, + { + "epoch": 10.66, + "learning_rate": 4.467057842692361e-05, + "loss": 1.4974, + "step": 1010000 + }, + { + "epoch": 10.66, + "learning_rate": 4.466794009941218e-05, + "loss": 1.4646, + "step": 1010500 + }, + { + "epoch": 10.67, + "learning_rate": 4.4665301771900756e-05, + "loss": 1.5104, + "step": 1011000 + }, + { + "epoch": 10.67, + "learning_rate": 4.466266344438934e-05, + "loss": 1.4706, + "step": 1011500 + }, + { + "epoch": 10.68, + "learning_rate": 4.4660025116877913e-05, + "loss": 1.4921, + "step": 1012000 + }, + { + "epoch": 10.69, + "learning_rate": 4.465738678936648e-05, + "loss": 1.4999, + "step": 1012500 + }, + { + "epoch": 10.69, + "learning_rate": 4.4654748461855064e-05, + "loss": 1.5034, + "step": 1013000 + }, + { + "epoch": 10.7, + "learning_rate": 4.465211013434364e-05, + "loss": 1.4542, + "step": 1013500 + }, + { + "epoch": 10.7, + "learning_rate": 4.4649471806832215e-05, + "loss": 1.4814, + "step": 1014000 + }, + { + "epoch": 10.71, + "learning_rate": 4.464683347932079e-05, + "loss": 1.471, + "step": 1014500 + }, + { + "epoch": 10.71, + "learning_rate": 4.4644195151809366e-05, + "loss": 1.4532, + "step": 1015000 + }, + { + "epoch": 10.72, + "learning_rate": 4.464155682429794e-05, + "loss": 1.4373, + "step": 1015500 + }, + { + "epoch": 10.72, + "learning_rate": 4.463891849678652e-05, + "loss": 1.4787, + "step": 1016000 + }, + { + "epoch": 10.73, + "learning_rate": 4.46362801692751e-05, + "loss": 1.4601, + "step": 1016500 + }, + { + "epoch": 10.73, + "learning_rate": 4.4633641841763675e-05, + "loss": 1.4594, + "step": 1017000 + }, + { + "epoch": 10.74, + "learning_rate": 4.4631003514252244e-05, + "loss": 1.5043, + "step": 1017500 + }, + { + "epoch": 10.74, + "learning_rate": 4.462836518674082e-05, + "loss": 1.4808, + "step": 1018000 + }, + { + "epoch": 10.75, + "learning_rate": 4.46257268592294e-05, + "loss": 1.477, + "step": 1018500 + }, + { + "epoch": 10.75, + "learning_rate": 4.462308853171798e-05, + "loss": 1.5432, + "step": 1019000 + }, + { + "epoch": 10.76, + "learning_rate": 4.462045020420655e-05, + "loss": 1.4557, + "step": 1019500 + }, + { + "epoch": 10.76, + "learning_rate": 4.461781187669513e-05, + "loss": 1.4537, + "step": 1020000 + }, + { + "epoch": 10.77, + "learning_rate": 4.4615173549183703e-05, + "loss": 1.4945, + "step": 1020500 + }, + { + "epoch": 10.77, + "learning_rate": 4.461253522167228e-05, + "loss": 1.471, + "step": 1021000 + }, + { + "epoch": 10.78, + "learning_rate": 4.4609896894160854e-05, + "loss": 1.4408, + "step": 1021500 + }, + { + "epoch": 10.79, + "learning_rate": 4.460725856664943e-05, + "loss": 1.4655, + "step": 1022000 + }, + { + "epoch": 10.79, + "learning_rate": 4.4604620239138005e-05, + "loss": 1.4721, + "step": 1022500 + }, + { + "epoch": 10.8, + "learning_rate": 4.460198191162658e-05, + "loss": 1.4475, + "step": 1023000 + }, + { + "epoch": 10.8, + "learning_rate": 4.459934358411516e-05, + "loss": 1.4831, + "step": 1023500 + }, + { + "epoch": 10.81, + "learning_rate": 4.459670525660374e-05, + "loss": 1.4868, + "step": 1024000 + }, + { + "epoch": 10.81, + "learning_rate": 4.459406692909231e-05, + "loss": 1.492, + "step": 1024500 + }, + { + "epoch": 10.82, + "learning_rate": 4.459142860158089e-05, + "loss": 1.498, + "step": 1025000 + }, + { + "epoch": 10.82, + "learning_rate": 4.4588790274069465e-05, + "loss": 1.4162, + "step": 1025500 + }, + { + "epoch": 10.83, + "learning_rate": 4.458615194655804e-05, + "loss": 1.4832, + "step": 1026000 + }, + { + "epoch": 10.83, + "learning_rate": 4.4583513619046616e-05, + "loss": 1.5313, + "step": 1026500 + }, + { + "epoch": 10.84, + "learning_rate": 4.458087529153519e-05, + "loss": 1.4502, + "step": 1027000 + }, + { + "epoch": 10.84, + "learning_rate": 4.457823696402377e-05, + "loss": 1.47, + "step": 1027500 + }, + { + "epoch": 10.85, + "learning_rate": 4.457559863651234e-05, + "loss": 1.5197, + "step": 1028000 + }, + { + "epoch": 10.85, + "learning_rate": 4.4572960309000925e-05, + "loss": 1.4838, + "step": 1028500 + }, + { + "epoch": 10.86, + "learning_rate": 4.45703219814895e-05, + "loss": 1.4788, + "step": 1029000 + }, + { + "epoch": 10.86, + "learning_rate": 4.456768365397807e-05, + "loss": 1.4449, + "step": 1029500 + }, + { + "epoch": 10.87, + "learning_rate": 4.4565045326466645e-05, + "loss": 1.4606, + "step": 1030000 + }, + { + "epoch": 10.88, + "learning_rate": 4.456240699895523e-05, + "loss": 1.5465, + "step": 1030500 + }, + { + "epoch": 10.88, + "learning_rate": 4.45597686714438e-05, + "loss": 1.4758, + "step": 1031000 + }, + { + "epoch": 10.89, + "learning_rate": 4.455713034393237e-05, + "loss": 1.5108, + "step": 1031500 + }, + { + "epoch": 10.89, + "learning_rate": 4.455449201642095e-05, + "loss": 1.4862, + "step": 1032000 + }, + { + "epoch": 10.9, + "learning_rate": 4.455185368890953e-05, + "loss": 1.4871, + "step": 1032500 + }, + { + "epoch": 10.9, + "learning_rate": 4.4549215361398104e-05, + "loss": 1.4167, + "step": 1033000 + }, + { + "epoch": 10.91, + "learning_rate": 4.454657703388668e-05, + "loss": 1.4649, + "step": 1033500 + }, + { + "epoch": 10.91, + "learning_rate": 4.4543938706375255e-05, + "loss": 1.4926, + "step": 1034000 + }, + { + "epoch": 10.92, + "learning_rate": 4.454130037886383e-05, + "loss": 1.5074, + "step": 1034500 + }, + { + "epoch": 10.92, + "learning_rate": 4.4538662051352406e-05, + "loss": 1.5181, + "step": 1035000 + }, + { + "epoch": 10.93, + "learning_rate": 4.453602372384099e-05, + "loss": 1.4328, + "step": 1035500 + }, + { + "epoch": 10.93, + "learning_rate": 4.4533385396329564e-05, + "loss": 1.4495, + "step": 1036000 + }, + { + "epoch": 10.94, + "learning_rate": 4.453074706881813e-05, + "loss": 1.4775, + "step": 1036500 + }, + { + "epoch": 10.94, + "learning_rate": 4.4528108741306715e-05, + "loss": 1.4632, + "step": 1037000 + }, + { + "epoch": 10.95, + "learning_rate": 4.452547041379529e-05, + "loss": 1.4614, + "step": 1037500 + }, + { + "epoch": 10.95, + "learning_rate": 4.4522832086283866e-05, + "loss": 1.4344, + "step": 1038000 + }, + { + "epoch": 10.96, + "learning_rate": 4.452019375877244e-05, + "loss": 1.4329, + "step": 1038500 + }, + { + "epoch": 10.96, + "learning_rate": 4.451755543126102e-05, + "loss": 1.4505, + "step": 1039000 + }, + { + "epoch": 10.97, + "learning_rate": 4.451491710374959e-05, + "loss": 1.4591, + "step": 1039500 + }, + { + "epoch": 10.98, + "learning_rate": 4.451227877623817e-05, + "loss": 1.4828, + "step": 1040000 + }, + { + "epoch": 10.98, + "learning_rate": 4.450964044872675e-05, + "loss": 1.4477, + "step": 1040500 + }, + { + "epoch": 10.99, + "learning_rate": 4.450700212121532e-05, + "loss": 1.5263, + "step": 1041000 + }, + { + "epoch": 10.99, + "learning_rate": 4.4504363793703894e-05, + "loss": 1.4474, + "step": 1041500 + }, + { + "epoch": 11.0, + "learning_rate": 4.4501725466192477e-05, + "loss": 1.4488, + "step": 1042000 + }, + { + "epoch": 11.0, + "learning_rate": 4.449908713868105e-05, + "loss": 1.5118, + "step": 1042500 + }, + { + "epoch": 11.01, + "learning_rate": 4.449644881116963e-05, + "loss": 1.4807, + "step": 1043000 + }, + { + "epoch": 11.01, + "learning_rate": 4.4493810483658196e-05, + "loss": 1.4302, + "step": 1043500 + }, + { + "epoch": 11.02, + "learning_rate": 4.449117215614678e-05, + "loss": 1.4364, + "step": 1044000 + }, + { + "epoch": 11.02, + "learning_rate": 4.4488533828635354e-05, + "loss": 1.4693, + "step": 1044500 + }, + { + "epoch": 11.03, + "learning_rate": 4.448589550112393e-05, + "loss": 1.4561, + "step": 1045000 + }, + { + "epoch": 11.03, + "learning_rate": 4.4483257173612505e-05, + "loss": 1.458, + "step": 1045500 + }, + { + "epoch": 11.04, + "learning_rate": 4.448061884610108e-05, + "loss": 1.4763, + "step": 1046000 + }, + { + "epoch": 11.04, + "learning_rate": 4.4477980518589656e-05, + "loss": 1.443, + "step": 1046500 + }, + { + "epoch": 11.05, + "learning_rate": 4.447534219107823e-05, + "loss": 1.4537, + "step": 1047000 + }, + { + "epoch": 11.05, + "learning_rate": 4.4472703863566814e-05, + "loss": 1.4717, + "step": 1047500 + }, + { + "epoch": 11.06, + "learning_rate": 4.447006553605539e-05, + "loss": 1.3994, + "step": 1048000 + }, + { + "epoch": 11.07, + "learning_rate": 4.446742720854396e-05, + "loss": 1.4524, + "step": 1048500 + }, + { + "epoch": 11.07, + "learning_rate": 4.446478888103254e-05, + "loss": 1.4808, + "step": 1049000 + }, + { + "epoch": 11.08, + "learning_rate": 4.4462150553521116e-05, + "loss": 1.4701, + "step": 1049500 + }, + { + "epoch": 11.08, + "learning_rate": 4.445951222600969e-05, + "loss": 1.414, + "step": 1050000 + }, + { + "epoch": 11.09, + "learning_rate": 4.445687389849827e-05, + "loss": 1.5012, + "step": 1050500 + }, + { + "epoch": 11.09, + "learning_rate": 4.445423557098684e-05, + "loss": 1.4941, + "step": 1051000 + }, + { + "epoch": 11.1, + "learning_rate": 4.445159724347542e-05, + "loss": 1.4335, + "step": 1051500 + }, + { + "epoch": 11.1, + "learning_rate": 4.444895891596399e-05, + "loss": 1.3966, + "step": 1052000 + }, + { + "epoch": 11.11, + "learning_rate": 4.4446320588452575e-05, + "loss": 1.4673, + "step": 1052500 + }, + { + "epoch": 11.11, + "learning_rate": 4.4443682260941144e-05, + "loss": 1.4664, + "step": 1053000 + }, + { + "epoch": 11.12, + "learning_rate": 4.444104393342972e-05, + "loss": 1.4682, + "step": 1053500 + }, + { + "epoch": 11.12, + "learning_rate": 4.44384056059183e-05, + "loss": 1.4812, + "step": 1054000 + }, + { + "epoch": 11.13, + "learning_rate": 4.443576727840688e-05, + "loss": 1.457, + "step": 1054500 + }, + { + "epoch": 11.13, + "learning_rate": 4.443312895089545e-05, + "loss": 1.4898, + "step": 1055000 + }, + { + "epoch": 11.14, + "learning_rate": 4.443049062338402e-05, + "loss": 1.4538, + "step": 1055500 + }, + { + "epoch": 11.14, + "learning_rate": 4.4427852295872604e-05, + "loss": 1.4962, + "step": 1056000 + }, + { + "epoch": 11.15, + "learning_rate": 4.442521396836118e-05, + "loss": 1.5021, + "step": 1056500 + }, + { + "epoch": 11.15, + "learning_rate": 4.4422575640849755e-05, + "loss": 1.4588, + "step": 1057000 + }, + { + "epoch": 11.16, + "learning_rate": 4.441993731333833e-05, + "loss": 1.5175, + "step": 1057500 + }, + { + "epoch": 11.17, + "learning_rate": 4.4417298985826906e-05, + "loss": 1.4994, + "step": 1058000 + }, + { + "epoch": 11.17, + "learning_rate": 4.441466065831548e-05, + "loss": 1.4983, + "step": 1058500 + }, + { + "epoch": 11.18, + "learning_rate": 4.441202233080406e-05, + "loss": 1.4514, + "step": 1059000 + }, + { + "epoch": 11.18, + "learning_rate": 4.440938400329264e-05, + "loss": 1.4945, + "step": 1059500 + }, + { + "epoch": 11.19, + "learning_rate": 4.440674567578121e-05, + "loss": 1.4531, + "step": 1060000 + }, + { + "epoch": 11.19, + "learning_rate": 4.440410734826978e-05, + "loss": 1.4371, + "step": 1060500 + }, + { + "epoch": 11.2, + "learning_rate": 4.4401469020758365e-05, + "loss": 1.51, + "step": 1061000 + }, + { + "epoch": 11.2, + "learning_rate": 4.439883069324694e-05, + "loss": 1.4528, + "step": 1061500 + }, + { + "epoch": 11.21, + "learning_rate": 4.4396192365735516e-05, + "loss": 1.4786, + "step": 1062000 + }, + { + "epoch": 11.21, + "learning_rate": 4.439355403822409e-05, + "loss": 1.5288, + "step": 1062500 + }, + { + "epoch": 11.22, + "learning_rate": 4.439091571071267e-05, + "loss": 1.4431, + "step": 1063000 + }, + { + "epoch": 11.22, + "learning_rate": 4.438827738320124e-05, + "loss": 1.5064, + "step": 1063500 + }, + { + "epoch": 11.23, + "learning_rate": 4.438563905568982e-05, + "loss": 1.4263, + "step": 1064000 + }, + { + "epoch": 11.23, + "learning_rate": 4.43830007281784e-05, + "loss": 1.4611, + "step": 1064500 + }, + { + "epoch": 11.24, + "learning_rate": 4.438036240066697e-05, + "loss": 1.4955, + "step": 1065000 + }, + { + "epoch": 11.24, + "learning_rate": 4.4377724073155545e-05, + "loss": 1.4874, + "step": 1065500 + }, + { + "epoch": 11.25, + "learning_rate": 4.437508574564413e-05, + "loss": 1.498, + "step": 1066000 + }, + { + "epoch": 11.26, + "learning_rate": 4.43724474181327e-05, + "loss": 1.4159, + "step": 1066500 + }, + { + "epoch": 11.26, + "learning_rate": 4.436980909062128e-05, + "loss": 1.5195, + "step": 1067000 + }, + { + "epoch": 11.27, + "learning_rate": 4.436717076310985e-05, + "loss": 1.5183, + "step": 1067500 + }, + { + "epoch": 11.27, + "learning_rate": 4.436453243559843e-05, + "loss": 1.44, + "step": 1068000 + }, + { + "epoch": 11.28, + "learning_rate": 4.4361894108087004e-05, + "loss": 1.4171, + "step": 1068500 + }, + { + "epoch": 11.28, + "learning_rate": 4.435925578057558e-05, + "loss": 1.4739, + "step": 1069000 + }, + { + "epoch": 11.29, + "learning_rate": 4.4356617453064155e-05, + "loss": 1.4537, + "step": 1069500 + }, + { + "epoch": 11.29, + "learning_rate": 4.435397912555273e-05, + "loss": 1.4611, + "step": 1070000 + }, + { + "epoch": 11.3, + "learning_rate": 4.4351340798041306e-05, + "loss": 1.4715, + "step": 1070500 + }, + { + "epoch": 11.3, + "learning_rate": 4.434870247052988e-05, + "loss": 1.468, + "step": 1071000 + }, + { + "epoch": 11.31, + "learning_rate": 4.4346064143018464e-05, + "loss": 1.5055, + "step": 1071500 + }, + { + "epoch": 11.31, + "learning_rate": 4.434342581550703e-05, + "loss": 1.4133, + "step": 1072000 + }, + { + "epoch": 11.32, + "learning_rate": 4.434078748799561e-05, + "loss": 1.4769, + "step": 1072500 + }, + { + "epoch": 11.32, + "learning_rate": 4.433814916048419e-05, + "loss": 1.4183, + "step": 1073000 + }, + { + "epoch": 11.33, + "learning_rate": 4.4335510832972766e-05, + "loss": 1.4266, + "step": 1073500 + }, + { + "epoch": 11.33, + "learning_rate": 4.433287250546134e-05, + "loss": 1.4487, + "step": 1074000 + }, + { + "epoch": 11.34, + "learning_rate": 4.433023417794992e-05, + "loss": 1.4605, + "step": 1074500 + }, + { + "epoch": 11.34, + "learning_rate": 4.432759585043849e-05, + "loss": 1.4443, + "step": 1075000 + }, + { + "epoch": 11.35, + "learning_rate": 4.432495752292707e-05, + "loss": 1.4501, + "step": 1075500 + }, + { + "epoch": 11.36, + "learning_rate": 4.4322319195415644e-05, + "loss": 1.4871, + "step": 1076000 + }, + { + "epoch": 11.36, + "learning_rate": 4.4319680867904226e-05, + "loss": 1.4934, + "step": 1076500 + }, + { + "epoch": 11.37, + "learning_rate": 4.4317042540392795e-05, + "loss": 1.4151, + "step": 1077000 + }, + { + "epoch": 11.37, + "learning_rate": 4.431440421288137e-05, + "loss": 1.4155, + "step": 1077500 + }, + { + "epoch": 11.38, + "learning_rate": 4.431176588536995e-05, + "loss": 1.4655, + "step": 1078000 + }, + { + "epoch": 11.38, + "learning_rate": 4.430912755785853e-05, + "loss": 1.4146, + "step": 1078500 + }, + { + "epoch": 11.39, + "learning_rate": 4.4306489230347096e-05, + "loss": 1.499, + "step": 1079000 + }, + { + "epoch": 11.39, + "learning_rate": 4.430385090283567e-05, + "loss": 1.4797, + "step": 1079500 + }, + { + "epoch": 11.4, + "learning_rate": 4.4301212575324254e-05, + "loss": 1.457, + "step": 1080000 + }, + { + "epoch": 11.4, + "learning_rate": 4.429857424781283e-05, + "loss": 1.4104, + "step": 1080500 + }, + { + "epoch": 11.41, + "learning_rate": 4.4295935920301405e-05, + "loss": 1.479, + "step": 1081000 + }, + { + "epoch": 11.41, + "learning_rate": 4.429329759278998e-05, + "loss": 1.5106, + "step": 1081500 + }, + { + "epoch": 11.42, + "learning_rate": 4.4290659265278556e-05, + "loss": 1.4806, + "step": 1082000 + }, + { + "epoch": 11.42, + "learning_rate": 4.428802093776713e-05, + "loss": 1.5093, + "step": 1082500 + }, + { + "epoch": 11.43, + "learning_rate": 4.428538261025571e-05, + "loss": 1.4441, + "step": 1083000 + }, + { + "epoch": 11.43, + "learning_rate": 4.428274428274429e-05, + "loss": 1.4705, + "step": 1083500 + }, + { + "epoch": 11.44, + "learning_rate": 4.428010595523286e-05, + "loss": 1.4475, + "step": 1084000 + }, + { + "epoch": 11.45, + "learning_rate": 4.4277467627721434e-05, + "loss": 1.4609, + "step": 1084500 + }, + { + "epoch": 11.45, + "learning_rate": 4.4274829300210016e-05, + "loss": 1.4598, + "step": 1085000 + }, + { + "epoch": 11.46, + "learning_rate": 4.427219097269859e-05, + "loss": 1.4494, + "step": 1085500 + }, + { + "epoch": 11.46, + "learning_rate": 4.426955264518717e-05, + "loss": 1.4967, + "step": 1086000 + }, + { + "epoch": 11.47, + "learning_rate": 4.426691431767574e-05, + "loss": 1.4804, + "step": 1086500 + }, + { + "epoch": 11.47, + "learning_rate": 4.426427599016432e-05, + "loss": 1.4461, + "step": 1087000 + }, + { + "epoch": 11.48, + "learning_rate": 4.426163766265289e-05, + "loss": 1.4279, + "step": 1087500 + }, + { + "epoch": 11.48, + "learning_rate": 4.425899933514147e-05, + "loss": 1.4353, + "step": 1088000 + }, + { + "epoch": 11.49, + "learning_rate": 4.4256361007630044e-05, + "loss": 1.4698, + "step": 1088500 + }, + { + "epoch": 11.49, + "learning_rate": 4.425372268011862e-05, + "loss": 1.5078, + "step": 1089000 + }, + { + "epoch": 11.5, + "learning_rate": 4.4251084352607195e-05, + "loss": 1.459, + "step": 1089500 + }, + { + "epoch": 11.5, + "learning_rate": 4.424844602509578e-05, + "loss": 1.4874, + "step": 1090000 + }, + { + "epoch": 11.51, + "learning_rate": 4.424580769758435e-05, + "loss": 1.4802, + "step": 1090500 + }, + { + "epoch": 11.51, + "learning_rate": 4.424316937007292e-05, + "loss": 1.5399, + "step": 1091000 + }, + { + "epoch": 11.52, + "learning_rate": 4.42405310425615e-05, + "loss": 1.526, + "step": 1091500 + }, + { + "epoch": 11.52, + "learning_rate": 4.423789271505008e-05, + "loss": 1.4809, + "step": 1092000 + }, + { + "epoch": 11.53, + "learning_rate": 4.4235254387538655e-05, + "loss": 1.433, + "step": 1092500 + }, + { + "epoch": 11.53, + "learning_rate": 4.423261606002723e-05, + "loss": 1.498, + "step": 1093000 + }, + { + "epoch": 11.54, + "learning_rate": 4.4229977732515806e-05, + "loss": 1.5211, + "step": 1093500 + }, + { + "epoch": 11.55, + "learning_rate": 4.422733940500438e-05, + "loss": 1.4117, + "step": 1094000 + }, + { + "epoch": 11.55, + "learning_rate": 4.422470107749296e-05, + "loss": 1.4596, + "step": 1094500 + }, + { + "epoch": 11.56, + "learning_rate": 4.422206274998153e-05, + "loss": 1.464, + "step": 1095000 + }, + { + "epoch": 11.56, + "learning_rate": 4.4219424422470115e-05, + "loss": 1.5075, + "step": 1095500 + }, + { + "epoch": 11.57, + "learning_rate": 4.4216786094958683e-05, + "loss": 1.4727, + "step": 1096000 + }, + { + "epoch": 11.57, + "learning_rate": 4.421414776744726e-05, + "loss": 1.5109, + "step": 1096500 + }, + { + "epoch": 11.58, + "learning_rate": 4.421150943993584e-05, + "loss": 1.4561, + "step": 1097000 + }, + { + "epoch": 11.58, + "learning_rate": 4.420887111242442e-05, + "loss": 1.5011, + "step": 1097500 + }, + { + "epoch": 11.59, + "learning_rate": 4.4206232784912985e-05, + "loss": 1.4741, + "step": 1098000 + }, + { + "epoch": 11.59, + "learning_rate": 4.420359445740157e-05, + "loss": 1.4817, + "step": 1098500 + }, + { + "epoch": 11.6, + "learning_rate": 4.420095612989014e-05, + "loss": 1.4541, + "step": 1099000 + }, + { + "epoch": 11.6, + "learning_rate": 4.419831780237872e-05, + "loss": 1.4958, + "step": 1099500 + }, + { + "epoch": 11.61, + "learning_rate": 4.4195679474867294e-05, + "loss": 1.4595, + "step": 1100000 + }, + { + "epoch": 11.61, + "learning_rate": 4.419304114735587e-05, + "loss": 1.4543, + "step": 1100500 + }, + { + "epoch": 11.62, + "learning_rate": 4.4190402819844445e-05, + "loss": 1.4858, + "step": 1101000 + }, + { + "epoch": 11.62, + "learning_rate": 4.418776449233302e-05, + "loss": 1.4688, + "step": 1101500 + }, + { + "epoch": 11.63, + "learning_rate": 4.41851261648216e-05, + "loss": 1.4833, + "step": 1102000 + }, + { + "epoch": 11.64, + "learning_rate": 4.418248783731018e-05, + "loss": 1.4594, + "step": 1102500 + }, + { + "epoch": 11.64, + "learning_rate": 4.417984950979875e-05, + "loss": 1.4759, + "step": 1103000 + }, + { + "epoch": 11.65, + "learning_rate": 4.417721118228732e-05, + "loss": 1.4695, + "step": 1103500 + }, + { + "epoch": 11.65, + "learning_rate": 4.4174572854775905e-05, + "loss": 1.4908, + "step": 1104000 + }, + { + "epoch": 11.66, + "learning_rate": 4.417193452726448e-05, + "loss": 1.4731, + "step": 1104500 + }, + { + "epoch": 11.66, + "learning_rate": 4.416929619975305e-05, + "loss": 1.4622, + "step": 1105000 + }, + { + "epoch": 11.67, + "learning_rate": 4.416665787224163e-05, + "loss": 1.4355, + "step": 1105500 + }, + { + "epoch": 11.67, + "learning_rate": 4.416401954473021e-05, + "loss": 1.4175, + "step": 1106000 + }, + { + "epoch": 11.68, + "learning_rate": 4.416138121721878e-05, + "loss": 1.4538, + "step": 1106500 + }, + { + "epoch": 11.68, + "learning_rate": 4.415874288970736e-05, + "loss": 1.495, + "step": 1107000 + }, + { + "epoch": 11.69, + "learning_rate": 4.415610456219593e-05, + "loss": 1.4078, + "step": 1107500 + }, + { + "epoch": 11.69, + "learning_rate": 4.415346623468451e-05, + "loss": 1.4585, + "step": 1108000 + }, + { + "epoch": 11.7, + "learning_rate": 4.4150827907173084e-05, + "loss": 1.4537, + "step": 1108500 + }, + { + "epoch": 11.7, + "learning_rate": 4.4148189579661666e-05, + "loss": 1.4607, + "step": 1109000 + }, + { + "epoch": 11.71, + "learning_rate": 4.414555125215024e-05, + "loss": 1.4567, + "step": 1109500 + }, + { + "epoch": 11.71, + "learning_rate": 4.414291292463881e-05, + "loss": 1.4165, + "step": 1110000 + }, + { + "epoch": 11.72, + "learning_rate": 4.414027459712739e-05, + "loss": 1.5011, + "step": 1110500 + }, + { + "epoch": 11.72, + "learning_rate": 4.413763626961597e-05, + "loss": 1.4602, + "step": 1111000 + }, + { + "epoch": 11.73, + "learning_rate": 4.4134997942104544e-05, + "loss": 1.5124, + "step": 1111500 + }, + { + "epoch": 11.74, + "learning_rate": 4.413235961459312e-05, + "loss": 1.4919, + "step": 1112000 + }, + { + "epoch": 11.74, + "learning_rate": 4.4129721287081695e-05, + "loss": 1.4609, + "step": 1112500 + }, + { + "epoch": 11.75, + "learning_rate": 4.412708295957027e-05, + "loss": 1.4683, + "step": 1113000 + }, + { + "epoch": 11.75, + "learning_rate": 4.4124444632058846e-05, + "loss": 1.4995, + "step": 1113500 + }, + { + "epoch": 11.76, + "learning_rate": 4.412180630454743e-05, + "loss": 1.4677, + "step": 1114000 + }, + { + "epoch": 11.76, + "learning_rate": 4.4119167977036e-05, + "loss": 1.4662, + "step": 1114500 + }, + { + "epoch": 11.77, + "learning_rate": 4.411652964952457e-05, + "loss": 1.4104, + "step": 1115000 + }, + { + "epoch": 11.77, + "learning_rate": 4.4113891322013154e-05, + "loss": 1.4018, + "step": 1115500 + }, + { + "epoch": 11.78, + "learning_rate": 4.411125299450173e-05, + "loss": 1.4204, + "step": 1116000 + }, + { + "epoch": 11.78, + "learning_rate": 4.4108614666990305e-05, + "loss": 1.5528, + "step": 1116500 + }, + { + "epoch": 11.79, + "learning_rate": 4.4105976339478874e-05, + "loss": 1.4344, + "step": 1117000 + }, + { + "epoch": 11.79, + "learning_rate": 4.4103338011967456e-05, + "loss": 1.4773, + "step": 1117500 + }, + { + "epoch": 11.8, + "learning_rate": 4.410069968445603e-05, + "loss": 1.4582, + "step": 1118000 + }, + { + "epoch": 11.8, + "learning_rate": 4.409806135694461e-05, + "loss": 1.4615, + "step": 1118500 + }, + { + "epoch": 11.81, + "learning_rate": 4.409542302943318e-05, + "loss": 1.4384, + "step": 1119000 + }, + { + "epoch": 11.81, + "learning_rate": 4.409278470192176e-05, + "loss": 1.4455, + "step": 1119500 + }, + { + "epoch": 11.82, + "learning_rate": 4.4090146374410334e-05, + "loss": 1.4302, + "step": 1120000 + }, + { + "epoch": 11.82, + "learning_rate": 4.408750804689891e-05, + "loss": 1.5017, + "step": 1120500 + }, + { + "epoch": 11.83, + "learning_rate": 4.408486971938749e-05, + "loss": 1.5205, + "step": 1121000 + }, + { + "epoch": 11.84, + "learning_rate": 4.408223139187607e-05, + "loss": 1.4898, + "step": 1121500 + }, + { + "epoch": 11.84, + "learning_rate": 4.4079593064364636e-05, + "loss": 1.484, + "step": 1122000 + }, + { + "epoch": 11.85, + "learning_rate": 4.407695473685322e-05, + "loss": 1.4346, + "step": 1122500 + }, + { + "epoch": 11.85, + "learning_rate": 4.4074316409341794e-05, + "loss": 1.5027, + "step": 1123000 + }, + { + "epoch": 11.86, + "learning_rate": 4.407167808183037e-05, + "loss": 1.4217, + "step": 1123500 + }, + { + "epoch": 11.86, + "learning_rate": 4.4069039754318945e-05, + "loss": 1.416, + "step": 1124000 + }, + { + "epoch": 11.87, + "learning_rate": 4.406640142680752e-05, + "loss": 1.5081, + "step": 1124500 + }, + { + "epoch": 11.87, + "learning_rate": 4.4063763099296096e-05, + "loss": 1.482, + "step": 1125000 + }, + { + "epoch": 11.88, + "learning_rate": 4.406112477178467e-05, + "loss": 1.4946, + "step": 1125500 + }, + { + "epoch": 11.88, + "learning_rate": 4.405848644427325e-05, + "loss": 1.5036, + "step": 1126000 + }, + { + "epoch": 11.89, + "learning_rate": 4.405584811676182e-05, + "loss": 1.4919, + "step": 1126500 + }, + { + "epoch": 11.89, + "learning_rate": 4.40532097892504e-05, + "loss": 1.5205, + "step": 1127000 + }, + { + "epoch": 11.9, + "learning_rate": 4.405057146173898e-05, + "loss": 1.4783, + "step": 1127500 + }, + { + "epoch": 11.9, + "learning_rate": 4.4047933134227555e-05, + "loss": 1.4587, + "step": 1128000 + }, + { + "epoch": 11.91, + "learning_rate": 4.404529480671613e-05, + "loss": 1.4918, + "step": 1128500 + }, + { + "epoch": 11.91, + "learning_rate": 4.40426564792047e-05, + "loss": 1.4808, + "step": 1129000 + }, + { + "epoch": 11.92, + "learning_rate": 4.404001815169328e-05, + "loss": 1.4798, + "step": 1129500 + }, + { + "epoch": 11.93, + "learning_rate": 4.403737982418186e-05, + "loss": 1.4413, + "step": 1130000 + }, + { + "epoch": 11.93, + "learning_rate": 4.403474149667043e-05, + "loss": 1.4266, + "step": 1130500 + }, + { + "epoch": 11.94, + "learning_rate": 4.403210316915901e-05, + "loss": 1.4803, + "step": 1131000 + }, + { + "epoch": 11.94, + "learning_rate": 4.4029464841647584e-05, + "loss": 1.4789, + "step": 1131500 + }, + { + "epoch": 11.95, + "learning_rate": 4.402682651413616e-05, + "loss": 1.4825, + "step": 1132000 + }, + { + "epoch": 11.95, + "learning_rate": 4.4024188186624735e-05, + "loss": 1.4837, + "step": 1132500 + }, + { + "epoch": 11.96, + "learning_rate": 4.402154985911332e-05, + "loss": 1.4127, + "step": 1133000 + }, + { + "epoch": 11.96, + "learning_rate": 4.4018911531601886e-05, + "loss": 1.4806, + "step": 1133500 + }, + { + "epoch": 11.97, + "learning_rate": 4.401627320409046e-05, + "loss": 1.4134, + "step": 1134000 + }, + { + "epoch": 11.97, + "learning_rate": 4.401363487657904e-05, + "loss": 1.4374, + "step": 1134500 + }, + { + "epoch": 11.98, + "learning_rate": 4.401099654906762e-05, + "loss": 1.5121, + "step": 1135000 + }, + { + "epoch": 11.98, + "learning_rate": 4.4008358221556194e-05, + "loss": 1.4637, + "step": 1135500 + }, + { + "epoch": 11.99, + "learning_rate": 4.400571989404477e-05, + "loss": 1.4861, + "step": 1136000 + }, + { + "epoch": 11.99, + "learning_rate": 4.4003081566533345e-05, + "loss": 1.4625, + "step": 1136500 + }, + { + "epoch": 12.0, + "learning_rate": 4.400044323902192e-05, + "loss": 1.4223, + "step": 1137000 + }, + { + "epoch": 12.0, + "learning_rate": 4.3997804911510496e-05, + "loss": 1.4483, + "step": 1137500 + }, + { + "epoch": 12.01, + "learning_rate": 4.399516658399908e-05, + "loss": 1.4713, + "step": 1138000 + }, + { + "epoch": 12.01, + "learning_rate": 4.399252825648765e-05, + "loss": 1.3901, + "step": 1138500 + }, + { + "epoch": 12.02, + "learning_rate": 4.398988992897622e-05, + "loss": 1.4504, + "step": 1139000 + }, + { + "epoch": 12.03, + "learning_rate": 4.3987251601464805e-05, + "loss": 1.4236, + "step": 1139500 + }, + { + "epoch": 12.03, + "learning_rate": 4.398461327395338e-05, + "loss": 1.509, + "step": 1140000 + }, + { + "epoch": 12.04, + "learning_rate": 4.3981974946441956e-05, + "loss": 1.436, + "step": 1140500 + }, + { + "epoch": 12.04, + "learning_rate": 4.3979336618930525e-05, + "loss": 1.3985, + "step": 1141000 + }, + { + "epoch": 12.05, + "learning_rate": 4.397669829141911e-05, + "loss": 1.4373, + "step": 1141500 + }, + { + "epoch": 12.05, + "learning_rate": 4.397405996390768e-05, + "loss": 1.4668, + "step": 1142000 + }, + { + "epoch": 12.06, + "learning_rate": 4.397142163639626e-05, + "loss": 1.4313, + "step": 1142500 + }, + { + "epoch": 12.06, + "learning_rate": 4.3968783308884833e-05, + "loss": 1.4952, + "step": 1143000 + }, + { + "epoch": 12.07, + "learning_rate": 4.396614498137341e-05, + "loss": 1.4409, + "step": 1143500 + }, + { + "epoch": 12.07, + "learning_rate": 4.3963506653861984e-05, + "loss": 1.4608, + "step": 1144000 + }, + { + "epoch": 12.08, + "learning_rate": 4.396086832635056e-05, + "loss": 1.4305, + "step": 1144500 + }, + { + "epoch": 12.08, + "learning_rate": 4.395822999883914e-05, + "loss": 1.4621, + "step": 1145000 + }, + { + "epoch": 12.09, + "learning_rate": 4.395559167132771e-05, + "loss": 1.429, + "step": 1145500 + }, + { + "epoch": 12.09, + "learning_rate": 4.3952953343816286e-05, + "loss": 1.4399, + "step": 1146000 + }, + { + "epoch": 12.1, + "learning_rate": 4.395031501630487e-05, + "loss": 1.5109, + "step": 1146500 + }, + { + "epoch": 12.1, + "learning_rate": 4.3947676688793444e-05, + "loss": 1.4768, + "step": 1147000 + }, + { + "epoch": 12.11, + "learning_rate": 4.394503836128202e-05, + "loss": 1.4552, + "step": 1147500 + }, + { + "epoch": 12.12, + "learning_rate": 4.3942400033770595e-05, + "loss": 1.413, + "step": 1148000 + }, + { + "epoch": 12.12, + "learning_rate": 4.393976170625917e-05, + "loss": 1.413, + "step": 1148500 + }, + { + "epoch": 12.13, + "learning_rate": 4.3937123378747746e-05, + "loss": 1.4494, + "step": 1149000 + }, + { + "epoch": 12.13, + "learning_rate": 4.393448505123632e-05, + "loss": 1.4454, + "step": 1149500 + }, + { + "epoch": 12.14, + "learning_rate": 4.3931846723724904e-05, + "loss": 1.4753, + "step": 1150000 + }, + { + "epoch": 12.14, + "learning_rate": 4.392920839621347e-05, + "loss": 1.4595, + "step": 1150500 + }, + { + "epoch": 12.15, + "learning_rate": 4.392657006870205e-05, + "loss": 1.4292, + "step": 1151000 + }, + { + "epoch": 12.15, + "learning_rate": 4.392393174119063e-05, + "loss": 1.4753, + "step": 1151500 + }, + { + "epoch": 12.16, + "learning_rate": 4.3921293413679206e-05, + "loss": 1.427, + "step": 1152000 + }, + { + "epoch": 12.16, + "learning_rate": 4.3918655086167774e-05, + "loss": 1.4129, + "step": 1152500 + }, + { + "epoch": 12.17, + "learning_rate": 4.391601675865635e-05, + "loss": 1.4367, + "step": 1153000 + }, + { + "epoch": 12.17, + "learning_rate": 4.391337843114493e-05, + "loss": 1.4695, + "step": 1153500 + }, + { + "epoch": 12.18, + "learning_rate": 4.391074010363351e-05, + "loss": 1.455, + "step": 1154000 + }, + { + "epoch": 12.18, + "learning_rate": 4.390810177612208e-05, + "loss": 1.4823, + "step": 1154500 + }, + { + "epoch": 12.19, + "learning_rate": 4.390546344861066e-05, + "loss": 1.5212, + "step": 1155000 + }, + { + "epoch": 12.19, + "learning_rate": 4.3902825121099234e-05, + "loss": 1.4614, + "step": 1155500 + }, + { + "epoch": 12.2, + "learning_rate": 4.390018679358781e-05, + "loss": 1.4647, + "step": 1156000 + }, + { + "epoch": 12.2, + "learning_rate": 4.3897548466076385e-05, + "loss": 1.4679, + "step": 1156500 + }, + { + "epoch": 12.21, + "learning_rate": 4.389491013856497e-05, + "loss": 1.4406, + "step": 1157000 + }, + { + "epoch": 12.22, + "learning_rate": 4.3892271811053536e-05, + "loss": 1.478, + "step": 1157500 + }, + { + "epoch": 12.22, + "learning_rate": 4.388963348354211e-05, + "loss": 1.4471, + "step": 1158000 + }, + { + "epoch": 12.23, + "learning_rate": 4.3886995156030694e-05, + "loss": 1.4604, + "step": 1158500 + }, + { + "epoch": 12.23, + "learning_rate": 4.388435682851927e-05, + "loss": 1.4586, + "step": 1159000 + }, + { + "epoch": 12.24, + "learning_rate": 4.3881718501007845e-05, + "loss": 1.4572, + "step": 1159500 + }, + { + "epoch": 12.24, + "learning_rate": 4.387908017349642e-05, + "loss": 1.4652, + "step": 1160000 + }, + { + "epoch": 12.25, + "learning_rate": 4.3876441845984996e-05, + "loss": 1.4128, + "step": 1160500 + }, + { + "epoch": 12.25, + "learning_rate": 4.387380351847357e-05, + "loss": 1.4277, + "step": 1161000 + }, + { + "epoch": 12.26, + "learning_rate": 4.387116519096215e-05, + "loss": 1.4966, + "step": 1161500 + }, + { + "epoch": 12.26, + "learning_rate": 4.386852686345072e-05, + "loss": 1.4655, + "step": 1162000 + }, + { + "epoch": 12.27, + "learning_rate": 4.38658885359393e-05, + "loss": 1.4591, + "step": 1162500 + }, + { + "epoch": 12.27, + "learning_rate": 4.386325020842787e-05, + "loss": 1.4534, + "step": 1163000 + }, + { + "epoch": 12.28, + "learning_rate": 4.3860611880916455e-05, + "loss": 1.4678, + "step": 1163500 + }, + { + "epoch": 12.28, + "learning_rate": 4.385797355340503e-05, + "loss": 1.4427, + "step": 1164000 + }, + { + "epoch": 12.29, + "learning_rate": 4.38553352258936e-05, + "loss": 1.4431, + "step": 1164500 + }, + { + "epoch": 12.29, + "learning_rate": 4.3852696898382175e-05, + "loss": 1.3975, + "step": 1165000 + }, + { + "epoch": 12.3, + "learning_rate": 4.385005857087076e-05, + "loss": 1.396, + "step": 1165500 + }, + { + "epoch": 12.31, + "learning_rate": 4.384742024335933e-05, + "loss": 1.4766, + "step": 1166000 + }, + { + "epoch": 12.31, + "learning_rate": 4.384478191584791e-05, + "loss": 1.4554, + "step": 1166500 + }, + { + "epoch": 12.32, + "learning_rate": 4.3842143588336484e-05, + "loss": 1.4847, + "step": 1167000 + }, + { + "epoch": 12.32, + "learning_rate": 4.383950526082506e-05, + "loss": 1.4327, + "step": 1167500 + }, + { + "epoch": 12.33, + "learning_rate": 4.3836866933313635e-05, + "loss": 1.477, + "step": 1168000 + }, + { + "epoch": 12.33, + "learning_rate": 4.383422860580221e-05, + "loss": 1.4528, + "step": 1168500 + }, + { + "epoch": 12.34, + "learning_rate": 4.383159027829079e-05, + "loss": 1.4576, + "step": 1169000 + }, + { + "epoch": 12.34, + "learning_rate": 4.382895195077936e-05, + "loss": 1.4339, + "step": 1169500 + }, + { + "epoch": 12.35, + "learning_rate": 4.382631362326794e-05, + "loss": 1.4667, + "step": 1170000 + }, + { + "epoch": 12.35, + "learning_rate": 4.382367529575652e-05, + "loss": 1.4742, + "step": 1170500 + }, + { + "epoch": 12.36, + "learning_rate": 4.3821036968245095e-05, + "loss": 1.4785, + "step": 1171000 + }, + { + "epoch": 12.36, + "learning_rate": 4.381839864073366e-05, + "loss": 1.444, + "step": 1171500 + }, + { + "epoch": 12.37, + "learning_rate": 4.3815760313222246e-05, + "loss": 1.485, + "step": 1172000 + }, + { + "epoch": 12.37, + "learning_rate": 4.381312198571082e-05, + "loss": 1.4572, + "step": 1172500 + }, + { + "epoch": 12.38, + "learning_rate": 4.3810483658199397e-05, + "loss": 1.3661, + "step": 1173000 + }, + { + "epoch": 12.38, + "learning_rate": 4.380784533068797e-05, + "loss": 1.4189, + "step": 1173500 + }, + { + "epoch": 12.39, + "learning_rate": 4.380520700317655e-05, + "loss": 1.4728, + "step": 1174000 + }, + { + "epoch": 12.39, + "learning_rate": 4.380256867566512e-05, + "loss": 1.5145, + "step": 1174500 + }, + { + "epoch": 12.4, + "learning_rate": 4.37999303481537e-05, + "loss": 1.4957, + "step": 1175000 + }, + { + "epoch": 12.41, + "learning_rate": 4.379729202064228e-05, + "loss": 1.408, + "step": 1175500 + }, + { + "epoch": 12.41, + "learning_rate": 4.3794653693130856e-05, + "loss": 1.3978, + "step": 1176000 + }, + { + "epoch": 12.42, + "learning_rate": 4.3792015365619425e-05, + "loss": 1.4065, + "step": 1176500 + }, + { + "epoch": 12.42, + "learning_rate": 4.3789377038108e-05, + "loss": 1.4917, + "step": 1177000 + }, + { + "epoch": 12.43, + "learning_rate": 4.378673871059658e-05, + "loss": 1.4383, + "step": 1177500 + }, + { + "epoch": 12.43, + "learning_rate": 4.378410038308516e-05, + "loss": 1.4241, + "step": 1178000 + }, + { + "epoch": 12.44, + "learning_rate": 4.3781462055573734e-05, + "loss": 1.4593, + "step": 1178500 + }, + { + "epoch": 12.44, + "learning_rate": 4.377882372806231e-05, + "loss": 1.4271, + "step": 1179000 + }, + { + "epoch": 12.45, + "learning_rate": 4.3776185400550885e-05, + "loss": 1.4439, + "step": 1179500 + }, + { + "epoch": 12.45, + "learning_rate": 4.377354707303946e-05, + "loss": 1.474, + "step": 1180000 + }, + { + "epoch": 12.46, + "learning_rate": 4.3770908745528036e-05, + "loss": 1.5043, + "step": 1180500 + }, + { + "epoch": 12.46, + "learning_rate": 4.376827041801661e-05, + "loss": 1.4561, + "step": 1181000 + }, + { + "epoch": 12.47, + "learning_rate": 4.3765632090505187e-05, + "loss": 1.4849, + "step": 1181500 + }, + { + "epoch": 12.47, + "learning_rate": 4.376299376299376e-05, + "loss": 1.4686, + "step": 1182000 + }, + { + "epoch": 12.48, + "learning_rate": 4.3760355435482344e-05, + "loss": 1.455, + "step": 1182500 + }, + { + "epoch": 12.48, + "learning_rate": 4.375771710797092e-05, + "loss": 1.4487, + "step": 1183000 + }, + { + "epoch": 12.49, + "learning_rate": 4.375507878045949e-05, + "loss": 1.4219, + "step": 1183500 + }, + { + "epoch": 12.5, + "learning_rate": 4.375244045294807e-05, + "loss": 1.4689, + "step": 1184000 + }, + { + "epoch": 12.5, + "learning_rate": 4.3749802125436646e-05, + "loss": 1.467, + "step": 1184500 + }, + { + "epoch": 12.51, + "learning_rate": 4.374716379792522e-05, + "loss": 1.431, + "step": 1185000 + }, + { + "epoch": 12.51, + "learning_rate": 4.37445254704138e-05, + "loss": 1.4939, + "step": 1185500 + }, + { + "epoch": 12.52, + "learning_rate": 4.374188714290237e-05, + "loss": 1.4387, + "step": 1186000 + }, + { + "epoch": 12.52, + "learning_rate": 4.373924881539095e-05, + "loss": 1.4953, + "step": 1186500 + }, + { + "epoch": 12.53, + "learning_rate": 4.3736610487879524e-05, + "loss": 1.4444, + "step": 1187000 + }, + { + "epoch": 12.53, + "learning_rate": 4.3733972160368106e-05, + "loss": 1.4969, + "step": 1187500 + }, + { + "epoch": 12.54, + "learning_rate": 4.373133383285668e-05, + "loss": 1.3882, + "step": 1188000 + }, + { + "epoch": 12.54, + "learning_rate": 4.372869550534525e-05, + "loss": 1.4494, + "step": 1188500 + }, + { + "epoch": 12.55, + "learning_rate": 4.3726057177833826e-05, + "loss": 1.428, + "step": 1189000 + }, + { + "epoch": 12.55, + "learning_rate": 4.372341885032241e-05, + "loss": 1.4502, + "step": 1189500 + }, + { + "epoch": 12.56, + "learning_rate": 4.3720780522810983e-05, + "loss": 1.4374, + "step": 1190000 + }, + { + "epoch": 12.56, + "learning_rate": 4.371814219529955e-05, + "loss": 1.4464, + "step": 1190500 + }, + { + "epoch": 12.57, + "learning_rate": 4.3715503867788134e-05, + "loss": 1.5181, + "step": 1191000 + }, + { + "epoch": 12.57, + "learning_rate": 4.371286554027671e-05, + "loss": 1.4237, + "step": 1191500 + }, + { + "epoch": 12.58, + "learning_rate": 4.3710227212765285e-05, + "loss": 1.4622, + "step": 1192000 + }, + { + "epoch": 12.58, + "learning_rate": 4.370758888525386e-05, + "loss": 1.4572, + "step": 1192500 + }, + { + "epoch": 12.59, + "learning_rate": 4.3704950557742436e-05, + "loss": 1.4835, + "step": 1193000 + }, + { + "epoch": 12.6, + "learning_rate": 4.370231223023101e-05, + "loss": 1.4487, + "step": 1193500 + }, + { + "epoch": 12.6, + "learning_rate": 4.369967390271959e-05, + "loss": 1.4445, + "step": 1194000 + }, + { + "epoch": 12.61, + "learning_rate": 4.369703557520817e-05, + "loss": 1.4279, + "step": 1194500 + }, + { + "epoch": 12.61, + "learning_rate": 4.3694397247696745e-05, + "loss": 1.4103, + "step": 1195000 + }, + { + "epoch": 12.62, + "learning_rate": 4.3691758920185314e-05, + "loss": 1.4733, + "step": 1195500 + }, + { + "epoch": 12.62, + "learning_rate": 4.3689120592673896e-05, + "loss": 1.4847, + "step": 1196000 + }, + { + "epoch": 12.63, + "learning_rate": 4.368648226516247e-05, + "loss": 1.4658, + "step": 1196500 + }, + { + "epoch": 12.63, + "learning_rate": 4.368384393765105e-05, + "loss": 1.4595, + "step": 1197000 + }, + { + "epoch": 12.64, + "learning_rate": 4.368120561013962e-05, + "loss": 1.4648, + "step": 1197500 + }, + { + "epoch": 12.64, + "learning_rate": 4.36785672826282e-05, + "loss": 1.4779, + "step": 1198000 + }, + { + "epoch": 12.65, + "learning_rate": 4.3675928955116773e-05, + "loss": 1.4552, + "step": 1198500 + }, + { + "epoch": 12.65, + "learning_rate": 4.367329062760535e-05, + "loss": 1.4943, + "step": 1199000 + }, + { + "epoch": 12.66, + "learning_rate": 4.367065230009393e-05, + "loss": 1.4869, + "step": 1199500 + }, + { + "epoch": 12.66, + "learning_rate": 4.36680139725825e-05, + "loss": 1.5111, + "step": 1200000 + }, + { + "epoch": 12.67, + "learning_rate": 4.3665375645071075e-05, + "loss": 1.4279, + "step": 1200500 + }, + { + "epoch": 12.67, + "learning_rate": 4.366273731755966e-05, + "loss": 1.463, + "step": 1201000 + }, + { + "epoch": 12.68, + "learning_rate": 4.366009899004823e-05, + "loss": 1.4492, + "step": 1201500 + }, + { + "epoch": 12.69, + "learning_rate": 4.365746066253681e-05, + "loss": 1.4818, + "step": 1202000 + }, + { + "epoch": 12.69, + "learning_rate": 4.365482233502538e-05, + "loss": 1.4574, + "step": 1202500 + }, + { + "epoch": 12.7, + "learning_rate": 4.365218400751396e-05, + "loss": 1.4155, + "step": 1203000 + }, + { + "epoch": 12.7, + "learning_rate": 4.3649545680002535e-05, + "loss": 1.4823, + "step": 1203500 + }, + { + "epoch": 12.71, + "learning_rate": 4.364690735249111e-05, + "loss": 1.4731, + "step": 1204000 + }, + { + "epoch": 12.71, + "learning_rate": 4.3644269024979686e-05, + "loss": 1.4405, + "step": 1204500 + }, + { + "epoch": 12.72, + "learning_rate": 4.364163069746826e-05, + "loss": 1.4777, + "step": 1205000 + }, + { + "epoch": 12.72, + "learning_rate": 4.363899236995684e-05, + "loss": 1.49, + "step": 1205500 + }, + { + "epoch": 12.73, + "learning_rate": 4.363635404244541e-05, + "loss": 1.4945, + "step": 1206000 + }, + { + "epoch": 12.73, + "learning_rate": 4.3633715714933995e-05, + "loss": 1.4833, + "step": 1206500 + }, + { + "epoch": 12.74, + "learning_rate": 4.363107738742257e-05, + "loss": 1.42, + "step": 1207000 + }, + { + "epoch": 12.74, + "learning_rate": 4.362843905991114e-05, + "loss": 1.4476, + "step": 1207500 + }, + { + "epoch": 12.75, + "learning_rate": 4.362580073239972e-05, + "loss": 1.4953, + "step": 1208000 + }, + { + "epoch": 12.75, + "learning_rate": 4.36231624048883e-05, + "loss": 1.5059, + "step": 1208500 + }, + { + "epoch": 12.76, + "learning_rate": 4.362052407737687e-05, + "loss": 1.4179, + "step": 1209000 + }, + { + "epoch": 12.76, + "learning_rate": 4.361788574986545e-05, + "loss": 1.4303, + "step": 1209500 + }, + { + "epoch": 12.77, + "learning_rate": 4.361524742235402e-05, + "loss": 1.4041, + "step": 1210000 + }, + { + "epoch": 12.77, + "learning_rate": 4.36126090948426e-05, + "loss": 1.459, + "step": 1210500 + }, + { + "epoch": 12.78, + "learning_rate": 4.3609970767331174e-05, + "loss": 1.4744, + "step": 1211000 + }, + { + "epoch": 12.79, + "learning_rate": 4.3607332439819756e-05, + "loss": 1.4469, + "step": 1211500 + }, + { + "epoch": 12.79, + "learning_rate": 4.3604694112308325e-05, + "loss": 1.4127, + "step": 1212000 + }, + { + "epoch": 12.8, + "learning_rate": 4.36020557847969e-05, + "loss": 1.4363, + "step": 1212500 + }, + { + "epoch": 12.8, + "learning_rate": 4.359941745728548e-05, + "loss": 1.4585, + "step": 1213000 + }, + { + "epoch": 12.81, + "learning_rate": 4.359677912977406e-05, + "loss": 1.4184, + "step": 1213500 + }, + { + "epoch": 12.81, + "learning_rate": 4.3594140802262634e-05, + "loss": 1.4055, + "step": 1214000 + }, + { + "epoch": 12.82, + "learning_rate": 4.35915024747512e-05, + "loss": 1.4556, + "step": 1214500 + }, + { + "epoch": 12.82, + "learning_rate": 4.3588864147239785e-05, + "loss": 1.4648, + "step": 1215000 + }, + { + "epoch": 12.83, + "learning_rate": 4.358622581972836e-05, + "loss": 1.4629, + "step": 1215500 + }, + { + "epoch": 12.83, + "learning_rate": 4.3583587492216936e-05, + "loss": 1.4426, + "step": 1216000 + }, + { + "epoch": 12.84, + "learning_rate": 4.358094916470551e-05, + "loss": 1.4957, + "step": 1216500 + }, + { + "epoch": 12.84, + "learning_rate": 4.357831083719409e-05, + "loss": 1.433, + "step": 1217000 + }, + { + "epoch": 12.85, + "learning_rate": 4.357567250968266e-05, + "loss": 1.4437, + "step": 1217500 + }, + { + "epoch": 12.85, + "learning_rate": 4.357303418217124e-05, + "loss": 1.4626, + "step": 1218000 + }, + { + "epoch": 12.86, + "learning_rate": 4.357039585465982e-05, + "loss": 1.4756, + "step": 1218500 + }, + { + "epoch": 12.86, + "learning_rate": 4.356775752714839e-05, + "loss": 1.4044, + "step": 1219000 + }, + { + "epoch": 12.87, + "learning_rate": 4.3565119199636964e-05, + "loss": 1.3893, + "step": 1219500 + }, + { + "epoch": 12.88, + "learning_rate": 4.3562480872125547e-05, + "loss": 1.4272, + "step": 1220000 + }, + { + "epoch": 12.88, + "learning_rate": 4.355984254461412e-05, + "loss": 1.4232, + "step": 1220500 + }, + { + "epoch": 12.89, + "learning_rate": 4.35572042171027e-05, + "loss": 1.4445, + "step": 1221000 + }, + { + "epoch": 12.89, + "learning_rate": 4.355456588959127e-05, + "loss": 1.4568, + "step": 1221500 + }, + { + "epoch": 12.9, + "learning_rate": 4.355192756207985e-05, + "loss": 1.4431, + "step": 1222000 + }, + { + "epoch": 12.9, + "learning_rate": 4.3549289234568424e-05, + "loss": 1.4647, + "step": 1222500 + }, + { + "epoch": 12.91, + "learning_rate": 4.3546650907057e-05, + "loss": 1.4608, + "step": 1223000 + }, + { + "epoch": 12.91, + "learning_rate": 4.354401257954558e-05, + "loss": 1.4092, + "step": 1223500 + }, + { + "epoch": 12.92, + "learning_rate": 4.354137425203415e-05, + "loss": 1.4511, + "step": 1224000 + }, + { + "epoch": 12.92, + "learning_rate": 4.3538735924522726e-05, + "loss": 1.4707, + "step": 1224500 + }, + { + "epoch": 12.93, + "learning_rate": 4.353609759701131e-05, + "loss": 1.4521, + "step": 1225000 + }, + { + "epoch": 12.93, + "learning_rate": 4.3533459269499884e-05, + "loss": 1.5028, + "step": 1225500 + }, + { + "epoch": 12.94, + "learning_rate": 4.353082094198846e-05, + "loss": 1.477, + "step": 1226000 + }, + { + "epoch": 12.94, + "learning_rate": 4.352818261447703e-05, + "loss": 1.4471, + "step": 1226500 + }, + { + "epoch": 12.95, + "learning_rate": 4.352554428696561e-05, + "loss": 1.4502, + "step": 1227000 + }, + { + "epoch": 12.95, + "learning_rate": 4.3522905959454186e-05, + "loss": 1.4445, + "step": 1227500 + }, + { + "epoch": 12.96, + "learning_rate": 4.352026763194276e-05, + "loss": 1.404, + "step": 1228000 + }, + { + "epoch": 12.96, + "learning_rate": 4.3517629304431337e-05, + "loss": 1.479, + "step": 1228500 + }, + { + "epoch": 12.97, + "learning_rate": 4.351499097691991e-05, + "loss": 1.4346, + "step": 1229000 + }, + { + "epoch": 12.98, + "learning_rate": 4.351235264940849e-05, + "loss": 1.4542, + "step": 1229500 + }, + { + "epoch": 12.98, + "learning_rate": 4.350971432189706e-05, + "loss": 1.4787, + "step": 1230000 + }, + { + "epoch": 12.99, + "learning_rate": 4.3507075994385645e-05, + "loss": 1.4431, + "step": 1230500 + }, + { + "epoch": 12.99, + "learning_rate": 4.3504437666874214e-05, + "loss": 1.4413, + "step": 1231000 + }, + { + "epoch": 13.0, + "learning_rate": 4.350179933936279e-05, + "loss": 1.4239, + "step": 1231500 + }, + { + "epoch": 13.0, + "learning_rate": 4.349916101185137e-05, + "loss": 1.4167, + "step": 1232000 + }, + { + "epoch": 13.01, + "learning_rate": 4.349652268433995e-05, + "loss": 1.4747, + "step": 1232500 + }, + { + "epoch": 13.01, + "learning_rate": 4.349388435682852e-05, + "loss": 1.4099, + "step": 1233000 + }, + { + "epoch": 13.02, + "learning_rate": 4.34912460293171e-05, + "loss": 1.4398, + "step": 1233500 + }, + { + "epoch": 13.02, + "learning_rate": 4.3488607701805674e-05, + "loss": 1.4277, + "step": 1234000 + }, + { + "epoch": 13.03, + "learning_rate": 4.348596937429425e-05, + "loss": 1.4655, + "step": 1234500 + }, + { + "epoch": 13.03, + "learning_rate": 4.3483331046782825e-05, + "loss": 1.4957, + "step": 1235000 + }, + { + "epoch": 13.04, + "learning_rate": 4.348069271927141e-05, + "loss": 1.4432, + "step": 1235500 + }, + { + "epoch": 13.04, + "learning_rate": 4.3478054391759976e-05, + "loss": 1.4102, + "step": 1236000 + }, + { + "epoch": 13.05, + "learning_rate": 4.347541606424855e-05, + "loss": 1.4367, + "step": 1236500 + }, + { + "epoch": 13.05, + "learning_rate": 4.3472777736737133e-05, + "loss": 1.4089, + "step": 1237000 + }, + { + "epoch": 13.06, + "learning_rate": 4.347013940922571e-05, + "loss": 1.4659, + "step": 1237500 + }, + { + "epoch": 13.06, + "learning_rate": 4.346750108171428e-05, + "loss": 1.4502, + "step": 1238000 + }, + { + "epoch": 13.07, + "learning_rate": 4.346486275420285e-05, + "loss": 1.3999, + "step": 1238500 + }, + { + "epoch": 13.08, + "learning_rate": 4.3462224426691435e-05, + "loss": 1.4476, + "step": 1239000 + }, + { + "epoch": 13.08, + "learning_rate": 4.345958609918001e-05, + "loss": 1.3991, + "step": 1239500 + }, + { + "epoch": 13.09, + "learning_rate": 4.3456947771668586e-05, + "loss": 1.4685, + "step": 1240000 + }, + { + "epoch": 13.09, + "learning_rate": 4.345430944415716e-05, + "loss": 1.4471, + "step": 1240500 + }, + { + "epoch": 13.1, + "learning_rate": 4.345167111664574e-05, + "loss": 1.4198, + "step": 1241000 + }, + { + "epoch": 13.1, + "learning_rate": 4.344903278913431e-05, + "loss": 1.4547, + "step": 1241500 + }, + { + "epoch": 13.11, + "learning_rate": 4.344639446162289e-05, + "loss": 1.4569, + "step": 1242000 + }, + { + "epoch": 13.11, + "learning_rate": 4.344375613411147e-05, + "loss": 1.4712, + "step": 1242500 + }, + { + "epoch": 13.12, + "learning_rate": 4.344111780660004e-05, + "loss": 1.4383, + "step": 1243000 + }, + { + "epoch": 13.12, + "learning_rate": 4.3438479479088615e-05, + "loss": 1.4513, + "step": 1243500 + }, + { + "epoch": 13.13, + "learning_rate": 4.34358411515772e-05, + "loss": 1.4515, + "step": 1244000 + }, + { + "epoch": 13.13, + "learning_rate": 4.343320282406577e-05, + "loss": 1.4303, + "step": 1244500 + }, + { + "epoch": 13.14, + "learning_rate": 4.343056449655435e-05, + "loss": 1.4874, + "step": 1245000 + }, + { + "epoch": 13.14, + "learning_rate": 4.3427926169042923e-05, + "loss": 1.449, + "step": 1245500 + }, + { + "epoch": 13.15, + "learning_rate": 4.34252878415315e-05, + "loss": 1.4557, + "step": 1246000 + }, + { + "epoch": 13.15, + "learning_rate": 4.3422649514020074e-05, + "loss": 1.4296, + "step": 1246500 + }, + { + "epoch": 13.16, + "learning_rate": 4.342001118650865e-05, + "loss": 1.4159, + "step": 1247000 + }, + { + "epoch": 13.17, + "learning_rate": 4.3417372858997225e-05, + "loss": 1.4998, + "step": 1247500 + }, + { + "epoch": 13.17, + "learning_rate": 4.34147345314858e-05, + "loss": 1.4832, + "step": 1248000 + }, + { + "epoch": 13.18, + "learning_rate": 4.3412096203974376e-05, + "loss": 1.4831, + "step": 1248500 + }, + { + "epoch": 13.18, + "learning_rate": 4.340945787646296e-05, + "loss": 1.4493, + "step": 1249000 + }, + { + "epoch": 13.19, + "learning_rate": 4.3406819548951534e-05, + "loss": 1.4164, + "step": 1249500 + }, + { + "epoch": 13.19, + "learning_rate": 4.34041812214401e-05, + "loss": 1.4018, + "step": 1250000 + }, + { + "epoch": 13.2, + "learning_rate": 4.340154289392868e-05, + "loss": 1.3922, + "step": 1250500 + }, + { + "epoch": 13.2, + "learning_rate": 4.339890456641726e-05, + "loss": 1.4464, + "step": 1251000 + }, + { + "epoch": 13.21, + "learning_rate": 4.3396266238905836e-05, + "loss": 1.4674, + "step": 1251500 + }, + { + "epoch": 13.21, + "learning_rate": 4.339362791139441e-05, + "loss": 1.4167, + "step": 1252000 + }, + { + "epoch": 13.22, + "learning_rate": 4.339098958388299e-05, + "loss": 1.4554, + "step": 1252500 + }, + { + "epoch": 13.22, + "learning_rate": 4.338835125637156e-05, + "loss": 1.4652, + "step": 1253000 + }, + { + "epoch": 13.23, + "learning_rate": 4.338571292886014e-05, + "loss": 1.471, + "step": 1253500 + }, + { + "epoch": 13.23, + "learning_rate": 4.3383074601348714e-05, + "loss": 1.4448, + "step": 1254000 + }, + { + "epoch": 13.24, + "learning_rate": 4.3380436273837296e-05, + "loss": 1.4048, + "step": 1254500 + }, + { + "epoch": 13.24, + "learning_rate": 4.3377797946325865e-05, + "loss": 1.4865, + "step": 1255000 + }, + { + "epoch": 13.25, + "learning_rate": 4.337515961881444e-05, + "loss": 1.4579, + "step": 1255500 + }, + { + "epoch": 13.25, + "learning_rate": 4.337252129130302e-05, + "loss": 1.4155, + "step": 1256000 + }, + { + "epoch": 13.26, + "learning_rate": 4.33698829637916e-05, + "loss": 1.4135, + "step": 1256500 + }, + { + "epoch": 13.27, + "learning_rate": 4.3367244636280166e-05, + "loss": 1.4184, + "step": 1257000 + }, + { + "epoch": 13.27, + "learning_rate": 4.336460630876875e-05, + "loss": 1.4557, + "step": 1257500 + }, + { + "epoch": 13.28, + "learning_rate": 4.3361967981257324e-05, + "loss": 1.4267, + "step": 1258000 + }, + { + "epoch": 13.28, + "learning_rate": 4.33593296537459e-05, + "loss": 1.4621, + "step": 1258500 + }, + { + "epoch": 13.29, + "learning_rate": 4.3356691326234475e-05, + "loss": 1.4572, + "step": 1259000 + }, + { + "epoch": 13.29, + "learning_rate": 4.335405299872305e-05, + "loss": 1.4824, + "step": 1259500 + }, + { + "epoch": 13.3, + "learning_rate": 4.3351414671211626e-05, + "loss": 1.4424, + "step": 1260000 + }, + { + "epoch": 13.3, + "learning_rate": 4.33487763437002e-05, + "loss": 1.3926, + "step": 1260500 + }, + { + "epoch": 13.31, + "learning_rate": 4.3346138016188784e-05, + "loss": 1.4463, + "step": 1261000 + }, + { + "epoch": 13.31, + "learning_rate": 4.334349968867736e-05, + "loss": 1.4644, + "step": 1261500 + }, + { + "epoch": 13.32, + "learning_rate": 4.334086136116593e-05, + "loss": 1.3755, + "step": 1262000 + }, + { + "epoch": 13.32, + "learning_rate": 4.3338223033654504e-05, + "loss": 1.4395, + "step": 1262500 + }, + { + "epoch": 13.33, + "learning_rate": 4.3335584706143086e-05, + "loss": 1.4614, + "step": 1263000 + }, + { + "epoch": 13.33, + "learning_rate": 4.333294637863166e-05, + "loss": 1.482, + "step": 1263500 + }, + { + "epoch": 13.34, + "learning_rate": 4.333030805112024e-05, + "loss": 1.4242, + "step": 1264000 + }, + { + "epoch": 13.34, + "learning_rate": 4.332766972360881e-05, + "loss": 1.4563, + "step": 1264500 + }, + { + "epoch": 13.35, + "learning_rate": 4.332503139609739e-05, + "loss": 1.4653, + "step": 1265000 + }, + { + "epoch": 13.36, + "learning_rate": 4.332239306858596e-05, + "loss": 1.4526, + "step": 1265500 + }, + { + "epoch": 13.36, + "learning_rate": 4.331975474107454e-05, + "loss": 1.3878, + "step": 1266000 + }, + { + "epoch": 13.37, + "learning_rate": 4.3317116413563114e-05, + "loss": 1.4203, + "step": 1266500 + }, + { + "epoch": 13.37, + "learning_rate": 4.331447808605169e-05, + "loss": 1.4502, + "step": 1267000 + }, + { + "epoch": 13.38, + "learning_rate": 4.3311839758540265e-05, + "loss": 1.4742, + "step": 1267500 + }, + { + "epoch": 13.38, + "learning_rate": 4.330920143102885e-05, + "loss": 1.4462, + "step": 1268000 + }, + { + "epoch": 13.39, + "learning_rate": 4.330656310351742e-05, + "loss": 1.4673, + "step": 1268500 + }, + { + "epoch": 13.39, + "learning_rate": 4.330392477600599e-05, + "loss": 1.4778, + "step": 1269000 + }, + { + "epoch": 13.4, + "learning_rate": 4.3301286448494574e-05, + "loss": 1.4308, + "step": 1269500 + }, + { + "epoch": 13.4, + "learning_rate": 4.329864812098315e-05, + "loss": 1.4408, + "step": 1270000 + }, + { + "epoch": 13.41, + "learning_rate": 4.3296009793471725e-05, + "loss": 1.4381, + "step": 1270500 + }, + { + "epoch": 13.41, + "learning_rate": 4.32933714659603e-05, + "loss": 1.4269, + "step": 1271000 + }, + { + "epoch": 13.42, + "learning_rate": 4.3290733138448876e-05, + "loss": 1.4325, + "step": 1271500 + }, + { + "epoch": 13.42, + "learning_rate": 4.328809481093745e-05, + "loss": 1.4718, + "step": 1272000 + }, + { + "epoch": 13.43, + "learning_rate": 4.328545648342603e-05, + "loss": 1.4378, + "step": 1272500 + }, + { + "epoch": 13.43, + "learning_rate": 4.328281815591461e-05, + "loss": 1.4205, + "step": 1273000 + }, + { + "epoch": 13.44, + "learning_rate": 4.3280179828403185e-05, + "loss": 1.4716, + "step": 1273500 + }, + { + "epoch": 13.44, + "learning_rate": 4.327754150089175e-05, + "loss": 1.4862, + "step": 1274000 + }, + { + "epoch": 13.45, + "learning_rate": 4.3274903173380336e-05, + "loss": 1.4433, + "step": 1274500 + }, + { + "epoch": 13.46, + "learning_rate": 4.327226484586891e-05, + "loss": 1.4696, + "step": 1275000 + }, + { + "epoch": 13.46, + "learning_rate": 4.3269626518357487e-05, + "loss": 1.4201, + "step": 1275500 + }, + { + "epoch": 13.47, + "learning_rate": 4.3266988190846055e-05, + "loss": 1.4277, + "step": 1276000 + }, + { + "epoch": 13.47, + "learning_rate": 4.326434986333464e-05, + "loss": 1.4317, + "step": 1276500 + }, + { + "epoch": 13.48, + "learning_rate": 4.326171153582321e-05, + "loss": 1.4573, + "step": 1277000 + }, + { + "epoch": 13.48, + "learning_rate": 4.325907320831179e-05, + "loss": 1.4342, + "step": 1277500 + }, + { + "epoch": 13.49, + "learning_rate": 4.3256434880800364e-05, + "loss": 1.4632, + "step": 1278000 + }, + { + "epoch": 13.49, + "learning_rate": 4.325379655328894e-05, + "loss": 1.3882, + "step": 1278500 + }, + { + "epoch": 13.5, + "learning_rate": 4.3251158225777515e-05, + "loss": 1.5049, + "step": 1279000 + }, + { + "epoch": 13.5, + "learning_rate": 4.324851989826609e-05, + "loss": 1.4372, + "step": 1279500 + }, + { + "epoch": 13.51, + "learning_rate": 4.324588157075467e-05, + "loss": 1.4366, + "step": 1280000 + }, + { + "epoch": 13.51, + "learning_rate": 4.324324324324325e-05, + "loss": 1.4606, + "step": 1280500 + }, + { + "epoch": 13.52, + "learning_rate": 4.324060491573182e-05, + "loss": 1.4417, + "step": 1281000 + }, + { + "epoch": 13.52, + "learning_rate": 4.32379665882204e-05, + "loss": 1.4111, + "step": 1281500 + }, + { + "epoch": 13.53, + "learning_rate": 4.3235328260708975e-05, + "loss": 1.4108, + "step": 1282000 + }, + { + "epoch": 13.53, + "learning_rate": 4.323268993319755e-05, + "loss": 1.5074, + "step": 1282500 + }, + { + "epoch": 13.54, + "learning_rate": 4.3230051605686126e-05, + "loss": 1.438, + "step": 1283000 + }, + { + "epoch": 13.55, + "learning_rate": 4.32274132781747e-05, + "loss": 1.4717, + "step": 1283500 + }, + { + "epoch": 13.55, + "learning_rate": 4.322477495066328e-05, + "loss": 1.4335, + "step": 1284000 + }, + { + "epoch": 13.56, + "learning_rate": 4.322213662315185e-05, + "loss": 1.453, + "step": 1284500 + }, + { + "epoch": 13.56, + "learning_rate": 4.3219498295640434e-05, + "loss": 1.442, + "step": 1285000 + }, + { + "epoch": 13.57, + "learning_rate": 4.3216859968129e-05, + "loss": 1.4309, + "step": 1285500 + }, + { + "epoch": 13.57, + "learning_rate": 4.321422164061758e-05, + "loss": 1.494, + "step": 1286000 + }, + { + "epoch": 13.58, + "learning_rate": 4.321158331310616e-05, + "loss": 1.4151, + "step": 1286500 + }, + { + "epoch": 13.58, + "learning_rate": 4.3208944985594736e-05, + "loss": 1.4589, + "step": 1287000 + }, + { + "epoch": 13.59, + "learning_rate": 4.320630665808331e-05, + "loss": 1.4737, + "step": 1287500 + }, + { + "epoch": 13.59, + "learning_rate": 4.320366833057188e-05, + "loss": 1.4062, + "step": 1288000 + }, + { + "epoch": 13.6, + "learning_rate": 4.320103000306046e-05, + "loss": 1.3784, + "step": 1288500 + }, + { + "epoch": 13.6, + "learning_rate": 4.319839167554904e-05, + "loss": 1.3678, + "step": 1289000 + }, + { + "epoch": 13.61, + "learning_rate": 4.3195753348037614e-05, + "loss": 1.4653, + "step": 1289500 + }, + { + "epoch": 13.61, + "learning_rate": 4.319311502052619e-05, + "loss": 1.4923, + "step": 1290000 + }, + { + "epoch": 13.62, + "learning_rate": 4.3190476693014765e-05, + "loss": 1.4175, + "step": 1290500 + }, + { + "epoch": 13.62, + "learning_rate": 4.318783836550334e-05, + "loss": 1.4265, + "step": 1291000 + }, + { + "epoch": 13.63, + "learning_rate": 4.3185200037991916e-05, + "loss": 1.4612, + "step": 1291500 + }, + { + "epoch": 13.63, + "learning_rate": 4.31825617104805e-05, + "loss": 1.4854, + "step": 1292000 + }, + { + "epoch": 13.64, + "learning_rate": 4.3179923382969073e-05, + "loss": 1.4248, + "step": 1292500 + }, + { + "epoch": 13.65, + "learning_rate": 4.317728505545764e-05, + "loss": 1.4424, + "step": 1293000 + }, + { + "epoch": 13.65, + "learning_rate": 4.3174646727946224e-05, + "loss": 1.4548, + "step": 1293500 + }, + { + "epoch": 13.66, + "learning_rate": 4.31720084004348e-05, + "loss": 1.3944, + "step": 1294000 + }, + { + "epoch": 13.66, + "learning_rate": 4.3169370072923375e-05, + "loss": 1.4502, + "step": 1294500 + }, + { + "epoch": 13.67, + "learning_rate": 4.316673174541195e-05, + "loss": 1.4486, + "step": 1295000 + }, + { + "epoch": 13.67, + "learning_rate": 4.3164093417900526e-05, + "loss": 1.4467, + "step": 1295500 + }, + { + "epoch": 13.68, + "learning_rate": 4.31614550903891e-05, + "loss": 1.3831, + "step": 1296000 + }, + { + "epoch": 13.68, + "learning_rate": 4.315881676287768e-05, + "loss": 1.4345, + "step": 1296500 + }, + { + "epoch": 13.69, + "learning_rate": 4.315617843536626e-05, + "loss": 1.4966, + "step": 1297000 + }, + { + "epoch": 13.69, + "learning_rate": 4.315354010785483e-05, + "loss": 1.4467, + "step": 1297500 + }, + { + "epoch": 13.7, + "learning_rate": 4.3150901780343404e-05, + "loss": 1.4282, + "step": 1298000 + }, + { + "epoch": 13.7, + "learning_rate": 4.3148263452831986e-05, + "loss": 1.4309, + "step": 1298500 + }, + { + "epoch": 13.71, + "learning_rate": 4.314562512532056e-05, + "loss": 1.4253, + "step": 1299000 + }, + { + "epoch": 13.71, + "learning_rate": 4.314298679780914e-05, + "loss": 1.467, + "step": 1299500 + }, + { + "epoch": 13.72, + "learning_rate": 4.3140348470297706e-05, + "loss": 1.4791, + "step": 1300000 + }, + { + "epoch": 13.72, + "learning_rate": 4.313771014278629e-05, + "loss": 1.418, + "step": 1300500 + }, + { + "epoch": 13.73, + "learning_rate": 4.3135071815274864e-05, + "loss": 1.4752, + "step": 1301000 + }, + { + "epoch": 13.74, + "learning_rate": 4.313243348776344e-05, + "loss": 1.4631, + "step": 1301500 + }, + { + "epoch": 13.74, + "learning_rate": 4.3129795160252015e-05, + "loss": 1.4475, + "step": 1302000 + }, + { + "epoch": 13.75, + "learning_rate": 4.312715683274059e-05, + "loss": 1.4576, + "step": 1302500 + }, + { + "epoch": 13.75, + "learning_rate": 4.3124518505229166e-05, + "loss": 1.4885, + "step": 1303000 + }, + { + "epoch": 13.76, + "learning_rate": 4.312188017771774e-05, + "loss": 1.4153, + "step": 1303500 + }, + { + "epoch": 13.76, + "learning_rate": 4.311924185020632e-05, + "loss": 1.4504, + "step": 1304000 + }, + { + "epoch": 13.77, + "learning_rate": 4.311660352269489e-05, + "loss": 1.4381, + "step": 1304500 + }, + { + "epoch": 13.77, + "learning_rate": 4.311396519518347e-05, + "loss": 1.4731, + "step": 1305000 + }, + { + "epoch": 13.78, + "learning_rate": 4.311132686767205e-05, + "loss": 1.4424, + "step": 1305500 + }, + { + "epoch": 13.78, + "learning_rate": 4.3108688540160625e-05, + "loss": 1.4923, + "step": 1306000 + }, + { + "epoch": 13.79, + "learning_rate": 4.31060502126492e-05, + "loss": 1.4151, + "step": 1306500 + }, + { + "epoch": 13.79, + "learning_rate": 4.3103411885137776e-05, + "loss": 1.4222, + "step": 1307000 + }, + { + "epoch": 13.8, + "learning_rate": 4.310077355762635e-05, + "loss": 1.453, + "step": 1307500 + }, + { + "epoch": 13.8, + "learning_rate": 4.309813523011493e-05, + "loss": 1.4496, + "step": 1308000 + }, + { + "epoch": 13.81, + "learning_rate": 4.30954969026035e-05, + "loss": 1.4788, + "step": 1308500 + }, + { + "epoch": 13.81, + "learning_rate": 4.3092858575092085e-05, + "loss": 1.4912, + "step": 1309000 + }, + { + "epoch": 13.82, + "learning_rate": 4.3090220247580654e-05, + "loss": 1.4788, + "step": 1309500 + }, + { + "epoch": 13.82, + "learning_rate": 4.308758192006923e-05, + "loss": 1.435, + "step": 1310000 + }, + { + "epoch": 13.83, + "learning_rate": 4.308494359255781e-05, + "loss": 1.4491, + "step": 1310500 + }, + { + "epoch": 13.84, + "learning_rate": 4.308230526504639e-05, + "loss": 1.4107, + "step": 1311000 + }, + { + "epoch": 13.84, + "learning_rate": 4.307966693753496e-05, + "loss": 1.454, + "step": 1311500 + }, + { + "epoch": 13.85, + "learning_rate": 4.307702861002353e-05, + "loss": 1.4671, + "step": 1312000 + }, + { + "epoch": 13.85, + "learning_rate": 4.307439028251211e-05, + "loss": 1.4923, + "step": 1312500 + }, + { + "epoch": 13.86, + "learning_rate": 4.307175195500069e-05, + "loss": 1.4468, + "step": 1313000 + }, + { + "epoch": 13.86, + "learning_rate": 4.3069113627489264e-05, + "loss": 1.4277, + "step": 1313500 + }, + { + "epoch": 13.87, + "learning_rate": 4.306647529997784e-05, + "loss": 1.4094, + "step": 1314000 + }, + { + "epoch": 13.87, + "learning_rate": 4.3063836972466415e-05, + "loss": 1.4575, + "step": 1314500 + }, + { + "epoch": 13.88, + "learning_rate": 4.306119864495499e-05, + "loss": 1.4778, + "step": 1315000 + }, + { + "epoch": 13.88, + "learning_rate": 4.3058560317443566e-05, + "loss": 1.4425, + "step": 1315500 + }, + { + "epoch": 13.89, + "learning_rate": 4.305592198993215e-05, + "loss": 1.392, + "step": 1316000 + }, + { + "epoch": 13.89, + "learning_rate": 4.305328366242072e-05, + "loss": 1.4104, + "step": 1316500 + }, + { + "epoch": 13.9, + "learning_rate": 4.305064533490929e-05, + "loss": 1.4181, + "step": 1317000 + }, + { + "epoch": 13.9, + "learning_rate": 4.3048007007397875e-05, + "loss": 1.4412, + "step": 1317500 + }, + { + "epoch": 13.91, + "learning_rate": 4.304536867988645e-05, + "loss": 1.4555, + "step": 1318000 + }, + { + "epoch": 13.91, + "learning_rate": 4.3042730352375026e-05, + "loss": 1.4466, + "step": 1318500 + }, + { + "epoch": 13.92, + "learning_rate": 4.30400920248636e-05, + "loss": 1.4876, + "step": 1319000 + }, + { + "epoch": 13.93, + "learning_rate": 4.303745369735218e-05, + "loss": 1.4294, + "step": 1319500 + }, + { + "epoch": 13.93, + "learning_rate": 4.303481536984075e-05, + "loss": 1.3798, + "step": 1320000 + }, + { + "epoch": 13.94, + "learning_rate": 4.303217704232933e-05, + "loss": 1.5197, + "step": 1320500 + }, + { + "epoch": 13.94, + "learning_rate": 4.302953871481791e-05, + "loss": 1.3965, + "step": 1321000 + }, + { + "epoch": 13.95, + "learning_rate": 4.302690038730648e-05, + "loss": 1.4528, + "step": 1321500 + }, + { + "epoch": 13.95, + "learning_rate": 4.3024262059795054e-05, + "loss": 1.4704, + "step": 1322000 + }, + { + "epoch": 13.96, + "learning_rate": 4.3021623732283637e-05, + "loss": 1.4118, + "step": 1322500 + }, + { + "epoch": 13.96, + "learning_rate": 4.301898540477221e-05, + "loss": 1.4696, + "step": 1323000 + }, + { + "epoch": 13.97, + "learning_rate": 4.301634707726078e-05, + "loss": 1.4651, + "step": 1323500 + }, + { + "epoch": 13.97, + "learning_rate": 4.3013708749749356e-05, + "loss": 1.4267, + "step": 1324000 + }, + { + "epoch": 13.98, + "learning_rate": 4.301107042223794e-05, + "loss": 1.4364, + "step": 1324500 + }, + { + "epoch": 13.98, + "learning_rate": 4.3008432094726514e-05, + "loss": 1.5419, + "step": 1325000 + }, + { + "epoch": 13.99, + "learning_rate": 4.300579376721509e-05, + "loss": 1.4504, + "step": 1325500 + }, + { + "epoch": 13.99, + "learning_rate": 4.3003155439703665e-05, + "loss": 1.436, + "step": 1326000 + }, + { + "epoch": 14.0, + "learning_rate": 4.300051711219224e-05, + "loss": 1.462, + "step": 1326500 + }, + { + "epoch": 14.0, + "learning_rate": 4.2997878784680816e-05, + "loss": 1.4669, + "step": 1327000 + }, + { + "epoch": 14.01, + "learning_rate": 4.299524045716939e-05, + "loss": 1.534, + "step": 1327500 + }, + { + "epoch": 14.01, + "learning_rate": 4.2992602129657974e-05, + "loss": 1.4019, + "step": 1328000 + }, + { + "epoch": 14.02, + "learning_rate": 4.298996380214654e-05, + "loss": 1.4406, + "step": 1328500 + }, + { + "epoch": 14.03, + "learning_rate": 4.298732547463512e-05, + "loss": 1.4044, + "step": 1329000 + }, + { + "epoch": 14.03, + "learning_rate": 4.29846871471237e-05, + "loss": 1.463, + "step": 1329500 + }, + { + "epoch": 14.04, + "learning_rate": 4.2982048819612276e-05, + "loss": 1.4588, + "step": 1330000 + }, + { + "epoch": 14.04, + "learning_rate": 4.297941049210085e-05, + "loss": 1.3962, + "step": 1330500 + }, + { + "epoch": 14.05, + "learning_rate": 4.297677216458943e-05, + "loss": 1.4217, + "step": 1331000 + }, + { + "epoch": 14.05, + "learning_rate": 4.2974133837078e-05, + "loss": 1.4174, + "step": 1331500 + }, + { + "epoch": 14.06, + "learning_rate": 4.297149550956658e-05, + "loss": 1.4547, + "step": 1332000 + }, + { + "epoch": 14.06, + "learning_rate": 4.296885718205515e-05, + "loss": 1.4235, + "step": 1332500 + }, + { + "epoch": 14.07, + "learning_rate": 4.296621885454373e-05, + "loss": 1.4557, + "step": 1333000 + }, + { + "epoch": 14.07, + "learning_rate": 4.2963580527032304e-05, + "loss": 1.4267, + "step": 1333500 + }, + { + "epoch": 14.08, + "learning_rate": 4.296094219952088e-05, + "loss": 1.4141, + "step": 1334000 + }, + { + "epoch": 14.08, + "learning_rate": 4.295830387200946e-05, + "loss": 1.4294, + "step": 1334500 + }, + { + "epoch": 14.09, + "learning_rate": 4.295566554449804e-05, + "loss": 1.4576, + "step": 1335000 + }, + { + "epoch": 14.09, + "learning_rate": 4.2953027216986606e-05, + "loss": 1.4481, + "step": 1335500 + }, + { + "epoch": 14.1, + "learning_rate": 4.295038888947518e-05, + "loss": 1.3587, + "step": 1336000 + }, + { + "epoch": 14.1, + "learning_rate": 4.2947750561963764e-05, + "loss": 1.4188, + "step": 1336500 + }, + { + "epoch": 14.11, + "learning_rate": 4.294511223445234e-05, + "loss": 1.4232, + "step": 1337000 + }, + { + "epoch": 14.12, + "learning_rate": 4.2942473906940915e-05, + "loss": 1.4798, + "step": 1337500 + }, + { + "epoch": 14.12, + "learning_rate": 4.293983557942949e-05, + "loss": 1.4275, + "step": 1338000 + }, + { + "epoch": 14.13, + "learning_rate": 4.2937197251918066e-05, + "loss": 1.4715, + "step": 1338500 + }, + { + "epoch": 14.13, + "learning_rate": 4.293455892440664e-05, + "loss": 1.4626, + "step": 1339000 + }, + { + "epoch": 14.14, + "learning_rate": 4.293192059689522e-05, + "loss": 1.4713, + "step": 1339500 + }, + { + "epoch": 14.14, + "learning_rate": 4.29292822693838e-05, + "loss": 1.3867, + "step": 1340000 + }, + { + "epoch": 14.15, + "learning_rate": 4.292664394187237e-05, + "loss": 1.463, + "step": 1340500 + }, + { + "epoch": 14.15, + "learning_rate": 4.292400561436094e-05, + "loss": 1.4555, + "step": 1341000 + }, + { + "epoch": 14.16, + "learning_rate": 4.2921367286849525e-05, + "loss": 1.4194, + "step": 1341500 + }, + { + "epoch": 14.16, + "learning_rate": 4.29187289593381e-05, + "loss": 1.4565, + "step": 1342000 + }, + { + "epoch": 14.17, + "learning_rate": 4.291609063182667e-05, + "loss": 1.418, + "step": 1342500 + }, + { + "epoch": 14.17, + "learning_rate": 4.291345230431525e-05, + "loss": 1.4428, + "step": 1343000 + }, + { + "epoch": 14.18, + "learning_rate": 4.291081397680383e-05, + "loss": 1.4662, + "step": 1343500 + }, + { + "epoch": 14.18, + "learning_rate": 4.29081756492924e-05, + "loss": 1.4827, + "step": 1344000 + }, + { + "epoch": 14.19, + "learning_rate": 4.290553732178098e-05, + "loss": 1.4102, + "step": 1344500 + }, + { + "epoch": 14.19, + "learning_rate": 4.2902898994269554e-05, + "loss": 1.5091, + "step": 1345000 + }, + { + "epoch": 14.2, + "learning_rate": 4.290026066675813e-05, + "loss": 1.445, + "step": 1345500 + }, + { + "epoch": 14.2, + "learning_rate": 4.2897622339246705e-05, + "loss": 1.4345, + "step": 1346000 + }, + { + "epoch": 14.21, + "learning_rate": 4.289498401173529e-05, + "loss": 1.5064, + "step": 1346500 + }, + { + "epoch": 14.22, + "learning_rate": 4.289234568422386e-05, + "loss": 1.4208, + "step": 1347000 + }, + { + "epoch": 14.22, + "learning_rate": 4.288970735671243e-05, + "loss": 1.4495, + "step": 1347500 + }, + { + "epoch": 14.23, + "learning_rate": 4.2887069029201014e-05, + "loss": 1.4242, + "step": 1348000 + }, + { + "epoch": 14.23, + "learning_rate": 4.288443070168959e-05, + "loss": 1.4316, + "step": 1348500 + }, + { + "epoch": 14.24, + "learning_rate": 4.2881792374178165e-05, + "loss": 1.5038, + "step": 1349000 + }, + { + "epoch": 14.24, + "learning_rate": 4.287915404666674e-05, + "loss": 1.4302, + "step": 1349500 + }, + { + "epoch": 14.25, + "learning_rate": 4.2876515719155316e-05, + "loss": 1.4754, + "step": 1350000 + }, + { + "epoch": 14.25, + "learning_rate": 4.287387739164389e-05, + "loss": 1.4778, + "step": 1350500 + }, + { + "epoch": 14.26, + "learning_rate": 4.2871239064132466e-05, + "loss": 1.4108, + "step": 1351000 + }, + { + "epoch": 14.26, + "learning_rate": 4.286860073662104e-05, + "loss": 1.4257, + "step": 1351500 + }, + { + "epoch": 14.27, + "learning_rate": 4.286596240910962e-05, + "loss": 1.4347, + "step": 1352000 + }, + { + "epoch": 14.27, + "learning_rate": 4.286332408159819e-05, + "loss": 1.3727, + "step": 1352500 + }, + { + "epoch": 14.28, + "learning_rate": 4.286068575408677e-05, + "loss": 1.4622, + "step": 1353000 + }, + { + "epoch": 14.28, + "learning_rate": 4.285804742657535e-05, + "loss": 1.354, + "step": 1353500 + }, + { + "epoch": 14.29, + "learning_rate": 4.2855409099063926e-05, + "loss": 1.4071, + "step": 1354000 + }, + { + "epoch": 14.29, + "learning_rate": 4.2852770771552495e-05, + "loss": 1.4403, + "step": 1354500 + }, + { + "epoch": 14.3, + "learning_rate": 4.285013244404108e-05, + "loss": 1.4118, + "step": 1355000 + }, + { + "epoch": 14.31, + "learning_rate": 4.284749411652965e-05, + "loss": 1.4641, + "step": 1355500 + }, + { + "epoch": 14.31, + "learning_rate": 4.284485578901823e-05, + "loss": 1.3872, + "step": 1356000 + }, + { + "epoch": 14.32, + "learning_rate": 4.2842217461506804e-05, + "loss": 1.4622, + "step": 1356500 + }, + { + "epoch": 14.32, + "learning_rate": 4.283957913399538e-05, + "loss": 1.4932, + "step": 1357000 + }, + { + "epoch": 14.33, + "learning_rate": 4.2836940806483955e-05, + "loss": 1.4223, + "step": 1357500 + }, + { + "epoch": 14.33, + "learning_rate": 4.283430247897253e-05, + "loss": 1.4587, + "step": 1358000 + }, + { + "epoch": 14.34, + "learning_rate": 4.283166415146111e-05, + "loss": 1.4294, + "step": 1358500 + }, + { + "epoch": 14.34, + "learning_rate": 4.282902582394968e-05, + "loss": 1.3946, + "step": 1359000 + }, + { + "epoch": 14.35, + "learning_rate": 4.2826387496438257e-05, + "loss": 1.3846, + "step": 1359500 + }, + { + "epoch": 14.35, + "learning_rate": 4.282374916892684e-05, + "loss": 1.4249, + "step": 1360000 + }, + { + "epoch": 14.36, + "learning_rate": 4.2821110841415414e-05, + "loss": 1.47, + "step": 1360500 + }, + { + "epoch": 14.36, + "learning_rate": 4.281847251390399e-05, + "loss": 1.4172, + "step": 1361000 + }, + { + "epoch": 14.37, + "learning_rate": 4.281583418639256e-05, + "loss": 1.4302, + "step": 1361500 + }, + { + "epoch": 14.37, + "learning_rate": 4.281319585888114e-05, + "loss": 1.481, + "step": 1362000 + }, + { + "epoch": 14.38, + "learning_rate": 4.2810557531369716e-05, + "loss": 1.488, + "step": 1362500 + }, + { + "epoch": 14.38, + "learning_rate": 4.280791920385829e-05, + "loss": 1.4709, + "step": 1363000 + }, + { + "epoch": 14.39, + "learning_rate": 4.280528087634687e-05, + "loss": 1.4077, + "step": 1363500 + }, + { + "epoch": 14.39, + "learning_rate": 4.280264254883544e-05, + "loss": 1.3819, + "step": 1364000 + }, + { + "epoch": 14.4, + "learning_rate": 4.280000422132402e-05, + "loss": 1.4584, + "step": 1364500 + }, + { + "epoch": 14.41, + "learning_rate": 4.2797365893812594e-05, + "loss": 1.385, + "step": 1365000 + }, + { + "epoch": 14.41, + "learning_rate": 4.2794727566301176e-05, + "loss": 1.4458, + "step": 1365500 + }, + { + "epoch": 14.42, + "learning_rate": 4.279208923878975e-05, + "loss": 1.4542, + "step": 1366000 + }, + { + "epoch": 14.42, + "learning_rate": 4.278945091127832e-05, + "loss": 1.442, + "step": 1366500 + }, + { + "epoch": 14.43, + "learning_rate": 4.27868125837669e-05, + "loss": 1.4127, + "step": 1367000 + }, + { + "epoch": 14.43, + "learning_rate": 4.278417425625548e-05, + "loss": 1.4622, + "step": 1367500 + }, + { + "epoch": 14.44, + "learning_rate": 4.278153592874405e-05, + "loss": 1.4792, + "step": 1368000 + }, + { + "epoch": 14.44, + "learning_rate": 4.277889760123263e-05, + "loss": 1.4533, + "step": 1368500 + }, + { + "epoch": 14.45, + "learning_rate": 4.2776259273721204e-05, + "loss": 1.4645, + "step": 1369000 + }, + { + "epoch": 14.45, + "learning_rate": 4.277362094620978e-05, + "loss": 1.509, + "step": 1369500 + }, + { + "epoch": 14.46, + "learning_rate": 4.2770982618698355e-05, + "loss": 1.4695, + "step": 1370000 + }, + { + "epoch": 14.46, + "learning_rate": 4.276834429118694e-05, + "loss": 1.4049, + "step": 1370500 + }, + { + "epoch": 14.47, + "learning_rate": 4.2765705963675506e-05, + "loss": 1.4159, + "step": 1371000 + }, + { + "epoch": 14.47, + "learning_rate": 4.276306763616408e-05, + "loss": 1.4437, + "step": 1371500 + }, + { + "epoch": 14.48, + "learning_rate": 4.2760429308652664e-05, + "loss": 1.4437, + "step": 1372000 + }, + { + "epoch": 14.48, + "learning_rate": 4.275779098114124e-05, + "loss": 1.4463, + "step": 1372500 + }, + { + "epoch": 14.49, + "learning_rate": 4.2755152653629815e-05, + "loss": 1.3961, + "step": 1373000 + }, + { + "epoch": 14.49, + "learning_rate": 4.2752514326118384e-05, + "loss": 1.4133, + "step": 1373500 + }, + { + "epoch": 14.5, + "learning_rate": 4.2749875998606966e-05, + "loss": 1.432, + "step": 1374000 + }, + { + "epoch": 14.51, + "learning_rate": 4.274723767109554e-05, + "loss": 1.4483, + "step": 1374500 + }, + { + "epoch": 14.51, + "learning_rate": 4.274459934358412e-05, + "loss": 1.4226, + "step": 1375000 + }, + { + "epoch": 14.52, + "learning_rate": 4.274196101607269e-05, + "loss": 1.3943, + "step": 1375500 + }, + { + "epoch": 14.52, + "learning_rate": 4.273932268856127e-05, + "loss": 1.3989, + "step": 1376000 + }, + { + "epoch": 14.53, + "learning_rate": 4.2736684361049843e-05, + "loss": 1.4344, + "step": 1376500 + }, + { + "epoch": 14.53, + "learning_rate": 4.273404603353842e-05, + "loss": 1.3787, + "step": 1377000 + }, + { + "epoch": 14.54, + "learning_rate": 4.2731407706027e-05, + "loss": 1.4472, + "step": 1377500 + }, + { + "epoch": 14.54, + "learning_rate": 4.272876937851557e-05, + "loss": 1.4903, + "step": 1378000 + }, + { + "epoch": 14.55, + "learning_rate": 4.2726131051004145e-05, + "loss": 1.4585, + "step": 1378500 + }, + { + "epoch": 14.55, + "learning_rate": 4.272349272349273e-05, + "loss": 1.4224, + "step": 1379000 + }, + { + "epoch": 14.56, + "learning_rate": 4.27208543959813e-05, + "loss": 1.4495, + "step": 1379500 + }, + { + "epoch": 14.56, + "learning_rate": 4.271821606846988e-05, + "loss": 1.4428, + "step": 1380000 + }, + { + "epoch": 14.57, + "learning_rate": 4.2715577740958454e-05, + "loss": 1.4681, + "step": 1380500 + }, + { + "epoch": 14.57, + "learning_rate": 4.271293941344703e-05, + "loss": 1.4424, + "step": 1381000 + }, + { + "epoch": 14.58, + "learning_rate": 4.2710301085935605e-05, + "loss": 1.4595, + "step": 1381500 + }, + { + "epoch": 14.58, + "learning_rate": 4.270766275842418e-05, + "loss": 1.482, + "step": 1382000 + }, + { + "epoch": 14.59, + "learning_rate": 4.270502443091276e-05, + "loss": 1.4221, + "step": 1382500 + }, + { + "epoch": 14.6, + "learning_rate": 4.270238610340133e-05, + "loss": 1.4475, + "step": 1383000 + }, + { + "epoch": 14.6, + "learning_rate": 4.269974777588991e-05, + "loss": 1.4658, + "step": 1383500 + }, + { + "epoch": 14.61, + "learning_rate": 4.269710944837849e-05, + "loss": 1.4348, + "step": 1384000 + }, + { + "epoch": 14.61, + "learning_rate": 4.2694471120867065e-05, + "loss": 1.458, + "step": 1384500 + }, + { + "epoch": 14.62, + "learning_rate": 4.269183279335564e-05, + "loss": 1.483, + "step": 1385000 + }, + { + "epoch": 14.62, + "learning_rate": 4.268919446584421e-05, + "loss": 1.4675, + "step": 1385500 + }, + { + "epoch": 14.63, + "learning_rate": 4.268655613833279e-05, + "loss": 1.4428, + "step": 1386000 + }, + { + "epoch": 14.63, + "learning_rate": 4.268391781082137e-05, + "loss": 1.3951, + "step": 1386500 + }, + { + "epoch": 14.64, + "learning_rate": 4.268127948330994e-05, + "loss": 1.4153, + "step": 1387000 + }, + { + "epoch": 14.64, + "learning_rate": 4.267864115579852e-05, + "loss": 1.4052, + "step": 1387500 + }, + { + "epoch": 14.65, + "learning_rate": 4.267600282828709e-05, + "loss": 1.3805, + "step": 1388000 + }, + { + "epoch": 14.65, + "learning_rate": 4.267336450077567e-05, + "loss": 1.4292, + "step": 1388500 + }, + { + "epoch": 14.66, + "learning_rate": 4.2670726173264244e-05, + "loss": 1.4566, + "step": 1389000 + }, + { + "epoch": 14.66, + "learning_rate": 4.2668087845752826e-05, + "loss": 1.4348, + "step": 1389500 + }, + { + "epoch": 14.67, + "learning_rate": 4.2665449518241395e-05, + "loss": 1.435, + "step": 1390000 + }, + { + "epoch": 14.67, + "learning_rate": 4.266281119072997e-05, + "loss": 1.4065, + "step": 1390500 + }, + { + "epoch": 14.68, + "learning_rate": 4.266017286321855e-05, + "loss": 1.4269, + "step": 1391000 + }, + { + "epoch": 14.68, + "learning_rate": 4.265753453570713e-05, + "loss": 1.4534, + "step": 1391500 + }, + { + "epoch": 14.69, + "learning_rate": 4.2654896208195704e-05, + "loss": 1.4202, + "step": 1392000 + }, + { + "epoch": 14.7, + "learning_rate": 4.265225788068428e-05, + "loss": 1.4484, + "step": 1392500 + }, + { + "epoch": 14.7, + "learning_rate": 4.2649619553172855e-05, + "loss": 1.4681, + "step": 1393000 + }, + { + "epoch": 14.71, + "learning_rate": 4.264698122566143e-05, + "loss": 1.4505, + "step": 1393500 + }, + { + "epoch": 14.71, + "learning_rate": 4.2644342898150006e-05, + "loss": 1.4066, + "step": 1394000 + }, + { + "epoch": 14.72, + "learning_rate": 4.264170457063859e-05, + "loss": 1.4475, + "step": 1394500 + }, + { + "epoch": 14.72, + "learning_rate": 4.263906624312716e-05, + "loss": 1.4901, + "step": 1395000 + }, + { + "epoch": 14.73, + "learning_rate": 4.263642791561573e-05, + "loss": 1.4063, + "step": 1395500 + }, + { + "epoch": 14.73, + "learning_rate": 4.2633789588104315e-05, + "loss": 1.4272, + "step": 1396000 + }, + { + "epoch": 14.74, + "learning_rate": 4.263115126059289e-05, + "loss": 1.4076, + "step": 1396500 + }, + { + "epoch": 14.74, + "learning_rate": 4.262851293308146e-05, + "loss": 1.4344, + "step": 1397000 + }, + { + "epoch": 14.75, + "learning_rate": 4.2625874605570034e-05, + "loss": 1.4635, + "step": 1397500 + }, + { + "epoch": 14.75, + "learning_rate": 4.2623236278058616e-05, + "loss": 1.4065, + "step": 1398000 + }, + { + "epoch": 14.76, + "learning_rate": 4.262059795054719e-05, + "loss": 1.3997, + "step": 1398500 + }, + { + "epoch": 14.76, + "learning_rate": 4.261795962303577e-05, + "loss": 1.4185, + "step": 1399000 + }, + { + "epoch": 14.77, + "learning_rate": 4.261532129552434e-05, + "loss": 1.4426, + "step": 1399500 + }, + { + "epoch": 14.77, + "learning_rate": 4.261268296801292e-05, + "loss": 1.4253, + "step": 1400000 + }, + { + "epoch": 14.78, + "learning_rate": 4.2610044640501494e-05, + "loss": 1.4392, + "step": 1400500 + }, + { + "epoch": 14.79, + "learning_rate": 4.260740631299007e-05, + "loss": 1.4496, + "step": 1401000 + }, + { + "epoch": 14.79, + "learning_rate": 4.260476798547865e-05, + "loss": 1.4177, + "step": 1401500 + }, + { + "epoch": 14.8, + "learning_rate": 4.260212965796722e-05, + "loss": 1.4398, + "step": 1402000 + }, + { + "epoch": 14.8, + "learning_rate": 4.2599491330455796e-05, + "loss": 1.4479, + "step": 1402500 + }, + { + "epoch": 14.81, + "learning_rate": 4.259685300294438e-05, + "loss": 1.4581, + "step": 1403000 + }, + { + "epoch": 14.81, + "learning_rate": 4.2594214675432954e-05, + "loss": 1.445, + "step": 1403500 + }, + { + "epoch": 14.82, + "learning_rate": 4.259157634792153e-05, + "loss": 1.4518, + "step": 1404000 + }, + { + "epoch": 14.82, + "learning_rate": 4.2588938020410105e-05, + "loss": 1.4496, + "step": 1404500 + }, + { + "epoch": 14.83, + "learning_rate": 4.258629969289868e-05, + "loss": 1.4541, + "step": 1405000 + }, + { + "epoch": 14.83, + "learning_rate": 4.2583661365387256e-05, + "loss": 1.4105, + "step": 1405500 + }, + { + "epoch": 14.84, + "learning_rate": 4.258102303787583e-05, + "loss": 1.4352, + "step": 1406000 + }, + { + "epoch": 14.84, + "learning_rate": 4.2578384710364407e-05, + "loss": 1.4378, + "step": 1406500 + }, + { + "epoch": 14.85, + "learning_rate": 4.257574638285298e-05, + "loss": 1.4579, + "step": 1407000 + }, + { + "epoch": 14.85, + "learning_rate": 4.257310805534156e-05, + "loss": 1.4461, + "step": 1407500 + }, + { + "epoch": 14.86, + "learning_rate": 4.257046972783014e-05, + "loss": 1.4388, + "step": 1408000 + }, + { + "epoch": 14.86, + "learning_rate": 4.2567831400318715e-05, + "loss": 1.4515, + "step": 1408500 + }, + { + "epoch": 14.87, + "learning_rate": 4.2565193072807284e-05, + "loss": 1.3891, + "step": 1409000 + }, + { + "epoch": 14.87, + "learning_rate": 4.256255474529586e-05, + "loss": 1.4344, + "step": 1409500 + }, + { + "epoch": 14.88, + "learning_rate": 4.255991641778444e-05, + "loss": 1.4216, + "step": 1410000 + }, + { + "epoch": 14.89, + "learning_rate": 4.255727809027302e-05, + "loss": 1.4002, + "step": 1410500 + }, + { + "epoch": 14.89, + "learning_rate": 4.255463976276159e-05, + "loss": 1.4354, + "step": 1411000 + }, + { + "epoch": 14.9, + "learning_rate": 4.255200143525017e-05, + "loss": 1.3862, + "step": 1411500 + }, + { + "epoch": 14.9, + "learning_rate": 4.2549363107738744e-05, + "loss": 1.4464, + "step": 1412000 + }, + { + "epoch": 14.91, + "learning_rate": 4.254672478022732e-05, + "loss": 1.4417, + "step": 1412500 + }, + { + "epoch": 14.91, + "learning_rate": 4.2544086452715895e-05, + "loss": 1.4285, + "step": 1413000 + }, + { + "epoch": 14.92, + "learning_rate": 4.254144812520448e-05, + "loss": 1.3974, + "step": 1413500 + }, + { + "epoch": 14.92, + "learning_rate": 4.2538809797693046e-05, + "loss": 1.3871, + "step": 1414000 + }, + { + "epoch": 14.93, + "learning_rate": 4.253617147018162e-05, + "loss": 1.3666, + "step": 1414500 + }, + { + "epoch": 14.93, + "learning_rate": 4.2533533142670203e-05, + "loss": 1.432, + "step": 1415000 + }, + { + "epoch": 14.94, + "learning_rate": 4.253089481515878e-05, + "loss": 1.4763, + "step": 1415500 + }, + { + "epoch": 14.94, + "learning_rate": 4.252825648764735e-05, + "loss": 1.4317, + "step": 1416000 + }, + { + "epoch": 14.95, + "learning_rate": 4.252561816013593e-05, + "loss": 1.4525, + "step": 1416500 + }, + { + "epoch": 14.95, + "learning_rate": 4.2522979832624505e-05, + "loss": 1.3896, + "step": 1417000 + }, + { + "epoch": 14.96, + "learning_rate": 4.252034150511308e-05, + "loss": 1.4424, + "step": 1417500 + }, + { + "epoch": 14.96, + "learning_rate": 4.2517703177601656e-05, + "loss": 1.449, + "step": 1418000 + }, + { + "epoch": 14.97, + "learning_rate": 4.251506485009023e-05, + "loss": 1.4416, + "step": 1418500 + }, + { + "epoch": 14.98, + "learning_rate": 4.251242652257881e-05, + "loss": 1.4354, + "step": 1419000 + }, + { + "epoch": 14.98, + "learning_rate": 4.250978819506738e-05, + "loss": 1.4101, + "step": 1419500 + }, + { + "epoch": 14.99, + "learning_rate": 4.2507149867555965e-05, + "loss": 1.4349, + "step": 1420000 + }, + { + "epoch": 14.99, + "learning_rate": 4.250451154004454e-05, + "loss": 1.386, + "step": 1420500 + }, + { + "epoch": 15.0, + "learning_rate": 4.250187321253311e-05, + "loss": 1.4987, + "step": 1421000 + }, + { + "epoch": 15.0, + "learning_rate": 4.249923488502169e-05, + "loss": 1.4536, + "step": 1421500 + }, + { + "epoch": 15.01, + "learning_rate": 4.249659655751027e-05, + "loss": 1.4279, + "step": 1422000 + }, + { + "epoch": 15.01, + "learning_rate": 4.249395822999884e-05, + "loss": 1.4002, + "step": 1422500 + }, + { + "epoch": 15.02, + "learning_rate": 4.249131990248742e-05, + "loss": 1.4103, + "step": 1423000 + }, + { + "epoch": 15.02, + "learning_rate": 4.2488681574975993e-05, + "loss": 1.4357, + "step": 1423500 + }, + { + "epoch": 15.03, + "learning_rate": 4.248604324746457e-05, + "loss": 1.3906, + "step": 1424000 + }, + { + "epoch": 15.03, + "learning_rate": 4.2483404919953144e-05, + "loss": 1.3773, + "step": 1424500 + }, + { + "epoch": 15.04, + "learning_rate": 4.248076659244172e-05, + "loss": 1.3828, + "step": 1425000 + }, + { + "epoch": 15.04, + "learning_rate": 4.2478128264930295e-05, + "loss": 1.4608, + "step": 1425500 + }, + { + "epoch": 15.05, + "learning_rate": 4.247548993741887e-05, + "loss": 1.4179, + "step": 1426000 + }, + { + "epoch": 15.05, + "learning_rate": 4.2472851609907446e-05, + "loss": 1.3724, + "step": 1426500 + }, + { + "epoch": 15.06, + "learning_rate": 4.247021328239603e-05, + "loss": 1.4252, + "step": 1427000 + }, + { + "epoch": 15.06, + "learning_rate": 4.2467574954884604e-05, + "loss": 1.4156, + "step": 1427500 + }, + { + "epoch": 15.07, + "learning_rate": 4.246493662737317e-05, + "loss": 1.4056, + "step": 1428000 + }, + { + "epoch": 15.08, + "learning_rate": 4.2462298299861755e-05, + "loss": 1.4576, + "step": 1428500 + }, + { + "epoch": 15.08, + "learning_rate": 4.245965997235033e-05, + "loss": 1.4172, + "step": 1429000 + }, + { + "epoch": 15.09, + "learning_rate": 4.2457021644838906e-05, + "loss": 1.4121, + "step": 1429500 + }, + { + "epoch": 15.09, + "learning_rate": 4.245438331732748e-05, + "loss": 1.4266, + "step": 1430000 + }, + { + "epoch": 15.1, + "learning_rate": 4.245174498981606e-05, + "loss": 1.4192, + "step": 1430500 + }, + { + "epoch": 15.1, + "learning_rate": 4.244910666230463e-05, + "loss": 1.4386, + "step": 1431000 + }, + { + "epoch": 15.11, + "learning_rate": 4.244646833479321e-05, + "loss": 1.3915, + "step": 1431500 + }, + { + "epoch": 15.11, + "learning_rate": 4.244383000728179e-05, + "loss": 1.4181, + "step": 1432000 + }, + { + "epoch": 15.12, + "learning_rate": 4.2441191679770366e-05, + "loss": 1.4722, + "step": 1432500 + }, + { + "epoch": 15.12, + "learning_rate": 4.2438553352258934e-05, + "loss": 1.4196, + "step": 1433000 + }, + { + "epoch": 15.13, + "learning_rate": 4.243591502474752e-05, + "loss": 1.4048, + "step": 1433500 + }, + { + "epoch": 15.13, + "learning_rate": 4.243327669723609e-05, + "loss": 1.4722, + "step": 1434000 + }, + { + "epoch": 15.14, + "learning_rate": 4.243063836972467e-05, + "loss": 1.4158, + "step": 1434500 + }, + { + "epoch": 15.14, + "learning_rate": 4.2428000042213236e-05, + "loss": 1.4238, + "step": 1435000 + }, + { + "epoch": 15.15, + "learning_rate": 4.242536171470182e-05, + "loss": 1.3842, + "step": 1435500 + }, + { + "epoch": 15.15, + "learning_rate": 4.2422723387190394e-05, + "loss": 1.4521, + "step": 1436000 + }, + { + "epoch": 15.16, + "learning_rate": 4.242008505967897e-05, + "loss": 1.4185, + "step": 1436500 + }, + { + "epoch": 15.17, + "learning_rate": 4.2417446732167545e-05, + "loss": 1.3849, + "step": 1437000 + }, + { + "epoch": 15.17, + "learning_rate": 4.241480840465612e-05, + "loss": 1.4584, + "step": 1437500 + }, + { + "epoch": 15.18, + "learning_rate": 4.2412170077144696e-05, + "loss": 1.3937, + "step": 1438000 + }, + { + "epoch": 15.18, + "learning_rate": 4.240953174963327e-05, + "loss": 1.4838, + "step": 1438500 + }, + { + "epoch": 15.19, + "learning_rate": 4.2406893422121854e-05, + "loss": 1.4699, + "step": 1439000 + }, + { + "epoch": 15.19, + "learning_rate": 4.240425509461043e-05, + "loss": 1.4091, + "step": 1439500 + }, + { + "epoch": 15.2, + "learning_rate": 4.2401616767099e-05, + "loss": 1.4249, + "step": 1440000 + }, + { + "epoch": 15.2, + "learning_rate": 4.239897843958758e-05, + "loss": 1.4158, + "step": 1440500 + }, + { + "epoch": 15.21, + "learning_rate": 4.2396340112076156e-05, + "loss": 1.4346, + "step": 1441000 + }, + { + "epoch": 15.21, + "learning_rate": 4.239370178456473e-05, + "loss": 1.4227, + "step": 1441500 + }, + { + "epoch": 15.22, + "learning_rate": 4.239106345705331e-05, + "loss": 1.464, + "step": 1442000 + }, + { + "epoch": 15.22, + "learning_rate": 4.238842512954188e-05, + "loss": 1.4949, + "step": 1442500 + }, + { + "epoch": 15.23, + "learning_rate": 4.238578680203046e-05, + "loss": 1.4235, + "step": 1443000 + }, + { + "epoch": 15.23, + "learning_rate": 4.238314847451903e-05, + "loss": 1.4504, + "step": 1443500 + }, + { + "epoch": 15.24, + "learning_rate": 4.2380510147007616e-05, + "loss": 1.4018, + "step": 1444000 + }, + { + "epoch": 15.24, + "learning_rate": 4.2377871819496184e-05, + "loss": 1.4772, + "step": 1444500 + }, + { + "epoch": 15.25, + "learning_rate": 4.237523349198476e-05, + "loss": 1.4501, + "step": 1445000 + }, + { + "epoch": 15.25, + "learning_rate": 4.237259516447334e-05, + "loss": 1.4477, + "step": 1445500 + }, + { + "epoch": 15.26, + "learning_rate": 4.236995683696192e-05, + "loss": 1.4051, + "step": 1446000 + }, + { + "epoch": 15.27, + "learning_rate": 4.236731850945049e-05, + "loss": 1.4647, + "step": 1446500 + }, + { + "epoch": 15.27, + "learning_rate": 4.236468018193906e-05, + "loss": 1.4474, + "step": 1447000 + }, + { + "epoch": 15.28, + "learning_rate": 4.2362041854427644e-05, + "loss": 1.4842, + "step": 1447500 + }, + { + "epoch": 15.28, + "learning_rate": 4.235940352691622e-05, + "loss": 1.3837, + "step": 1448000 + }, + { + "epoch": 15.29, + "learning_rate": 4.2356765199404795e-05, + "loss": 1.3915, + "step": 1448500 + }, + { + "epoch": 15.29, + "learning_rate": 4.235412687189337e-05, + "loss": 1.4406, + "step": 1449000 + }, + { + "epoch": 15.3, + "learning_rate": 4.2351488544381946e-05, + "loss": 1.4197, + "step": 1449500 + }, + { + "epoch": 15.3, + "learning_rate": 4.234885021687052e-05, + "loss": 1.4037, + "step": 1450000 + }, + { + "epoch": 15.31, + "learning_rate": 4.23462118893591e-05, + "loss": 1.3542, + "step": 1450500 + }, + { + "epoch": 15.31, + "learning_rate": 4.234357356184768e-05, + "loss": 1.4102, + "step": 1451000 + }, + { + "epoch": 15.32, + "learning_rate": 4.2340935234336255e-05, + "loss": 1.491, + "step": 1451500 + }, + { + "epoch": 15.32, + "learning_rate": 4.233829690682482e-05, + "loss": 1.426, + "step": 1452000 + }, + { + "epoch": 15.33, + "learning_rate": 4.2335658579313406e-05, + "loss": 1.4666, + "step": 1452500 + }, + { + "epoch": 15.33, + "learning_rate": 4.233302025180198e-05, + "loss": 1.4345, + "step": 1453000 + }, + { + "epoch": 15.34, + "learning_rate": 4.2330381924290557e-05, + "loss": 1.4434, + "step": 1453500 + }, + { + "epoch": 15.34, + "learning_rate": 4.232774359677913e-05, + "loss": 1.4456, + "step": 1454000 + }, + { + "epoch": 15.35, + "learning_rate": 4.232510526926771e-05, + "loss": 1.436, + "step": 1454500 + }, + { + "epoch": 15.36, + "learning_rate": 4.232246694175628e-05, + "loss": 1.4379, + "step": 1455000 + }, + { + "epoch": 15.36, + "learning_rate": 4.231982861424486e-05, + "loss": 1.3711, + "step": 1455500 + }, + { + "epoch": 15.37, + "learning_rate": 4.231719028673344e-05, + "loss": 1.441, + "step": 1456000 + }, + { + "epoch": 15.37, + "learning_rate": 4.231455195922201e-05, + "loss": 1.4355, + "step": 1456500 + }, + { + "epoch": 15.38, + "learning_rate": 4.2311913631710585e-05, + "loss": 1.457, + "step": 1457000 + }, + { + "epoch": 15.38, + "learning_rate": 4.230927530419917e-05, + "loss": 1.4272, + "step": 1457500 + }, + { + "epoch": 15.39, + "learning_rate": 4.230663697668774e-05, + "loss": 1.426, + "step": 1458000 + }, + { + "epoch": 15.39, + "learning_rate": 4.230399864917632e-05, + "loss": 1.3966, + "step": 1458500 + }, + { + "epoch": 15.4, + "learning_rate": 4.230136032166489e-05, + "loss": 1.3711, + "step": 1459000 + }, + { + "epoch": 15.4, + "learning_rate": 4.229872199415347e-05, + "loss": 1.4381, + "step": 1459500 + }, + { + "epoch": 15.41, + "learning_rate": 4.2296083666642045e-05, + "loss": 1.4773, + "step": 1460000 + }, + { + "epoch": 15.41, + "learning_rate": 4.229344533913062e-05, + "loss": 1.426, + "step": 1460500 + }, + { + "epoch": 15.42, + "learning_rate": 4.2290807011619196e-05, + "loss": 1.3853, + "step": 1461000 + }, + { + "epoch": 15.42, + "learning_rate": 4.228816868410777e-05, + "loss": 1.378, + "step": 1461500 + }, + { + "epoch": 15.43, + "learning_rate": 4.228553035659635e-05, + "loss": 1.4238, + "step": 1462000 + }, + { + "epoch": 15.43, + "learning_rate": 4.228289202908492e-05, + "loss": 1.4442, + "step": 1462500 + }, + { + "epoch": 15.44, + "learning_rate": 4.2280253701573504e-05, + "loss": 1.4786, + "step": 1463000 + }, + { + "epoch": 15.44, + "learning_rate": 4.227761537406207e-05, + "loss": 1.4063, + "step": 1463500 + }, + { + "epoch": 15.45, + "learning_rate": 4.227497704655065e-05, + "loss": 1.4739, + "step": 1464000 + }, + { + "epoch": 15.46, + "learning_rate": 4.227233871903923e-05, + "loss": 1.4429, + "step": 1464500 + }, + { + "epoch": 15.46, + "learning_rate": 4.2269700391527806e-05, + "loss": 1.4361, + "step": 1465000 + }, + { + "epoch": 15.47, + "learning_rate": 4.226706206401638e-05, + "loss": 1.4094, + "step": 1465500 + }, + { + "epoch": 15.47, + "learning_rate": 4.226442373650496e-05, + "loss": 1.386, + "step": 1466000 + }, + { + "epoch": 15.48, + "learning_rate": 4.226178540899353e-05, + "loss": 1.4265, + "step": 1466500 + }, + { + "epoch": 15.48, + "learning_rate": 4.225914708148211e-05, + "loss": 1.4235, + "step": 1467000 + }, + { + "epoch": 15.49, + "learning_rate": 4.2256508753970684e-05, + "loss": 1.4498, + "step": 1467500 + }, + { + "epoch": 15.49, + "learning_rate": 4.2253870426459266e-05, + "loss": 1.4086, + "step": 1468000 + }, + { + "epoch": 15.5, + "learning_rate": 4.2251232098947835e-05, + "loss": 1.4511, + "step": 1468500 + }, + { + "epoch": 15.5, + "learning_rate": 4.224859377143641e-05, + "loss": 1.4505, + "step": 1469000 + }, + { + "epoch": 15.51, + "learning_rate": 4.224595544392499e-05, + "loss": 1.3893, + "step": 1469500 + }, + { + "epoch": 15.51, + "learning_rate": 4.224331711641357e-05, + "loss": 1.3945, + "step": 1470000 + }, + { + "epoch": 15.52, + "learning_rate": 4.2240678788902143e-05, + "loss": 1.473, + "step": 1470500 + }, + { + "epoch": 15.52, + "learning_rate": 4.223804046139071e-05, + "loss": 1.4292, + "step": 1471000 + }, + { + "epoch": 15.53, + "learning_rate": 4.2235402133879294e-05, + "loss": 1.3867, + "step": 1471500 + }, + { + "epoch": 15.53, + "learning_rate": 4.223276380636787e-05, + "loss": 1.4397, + "step": 1472000 + }, + { + "epoch": 15.54, + "learning_rate": 4.2230125478856445e-05, + "loss": 1.42, + "step": 1472500 + }, + { + "epoch": 15.55, + "learning_rate": 4.222748715134502e-05, + "loss": 1.4611, + "step": 1473000 + }, + { + "epoch": 15.55, + "learning_rate": 4.2224848823833596e-05, + "loss": 1.4016, + "step": 1473500 + }, + { + "epoch": 15.56, + "learning_rate": 4.222221049632217e-05, + "loss": 1.4021, + "step": 1474000 + }, + { + "epoch": 15.56, + "learning_rate": 4.221957216881075e-05, + "loss": 1.4527, + "step": 1474500 + }, + { + "epoch": 15.57, + "learning_rate": 4.221693384129933e-05, + "loss": 1.4611, + "step": 1475000 + }, + { + "epoch": 15.57, + "learning_rate": 4.22142955137879e-05, + "loss": 1.4115, + "step": 1475500 + }, + { + "epoch": 15.58, + "learning_rate": 4.2211657186276474e-05, + "loss": 1.3728, + "step": 1476000 + }, + { + "epoch": 15.58, + "learning_rate": 4.2209018858765056e-05, + "loss": 1.4799, + "step": 1476500 + }, + { + "epoch": 15.59, + "learning_rate": 4.220638053125363e-05, + "loss": 1.4472, + "step": 1477000 + }, + { + "epoch": 15.59, + "learning_rate": 4.220374220374221e-05, + "loss": 1.4324, + "step": 1477500 + }, + { + "epoch": 15.6, + "learning_rate": 4.220110387623078e-05, + "loss": 1.4254, + "step": 1478000 + }, + { + "epoch": 15.6, + "learning_rate": 4.219846554871936e-05, + "loss": 1.4333, + "step": 1478500 + }, + { + "epoch": 15.61, + "learning_rate": 4.2195827221207934e-05, + "loss": 1.4117, + "step": 1479000 + }, + { + "epoch": 15.61, + "learning_rate": 4.219318889369651e-05, + "loss": 1.4862, + "step": 1479500 + }, + { + "epoch": 15.62, + "learning_rate": 4.219055056618509e-05, + "loss": 1.3971, + "step": 1480000 + }, + { + "epoch": 15.62, + "learning_rate": 4.218791223867366e-05, + "loss": 1.4141, + "step": 1480500 + }, + { + "epoch": 15.63, + "learning_rate": 4.2185273911162235e-05, + "loss": 1.3968, + "step": 1481000 + }, + { + "epoch": 15.63, + "learning_rate": 4.218263558365082e-05, + "loss": 1.4484, + "step": 1481500 + }, + { + "epoch": 15.64, + "learning_rate": 4.217999725613939e-05, + "loss": 1.4697, + "step": 1482000 + }, + { + "epoch": 15.65, + "learning_rate": 4.217735892862796e-05, + "loss": 1.4498, + "step": 1482500 + }, + { + "epoch": 15.65, + "learning_rate": 4.217472060111654e-05, + "loss": 1.4286, + "step": 1483000 + }, + { + "epoch": 15.66, + "learning_rate": 4.217208227360512e-05, + "loss": 1.4066, + "step": 1483500 + }, + { + "epoch": 15.66, + "learning_rate": 4.2169443946093695e-05, + "loss": 1.4835, + "step": 1484000 + }, + { + "epoch": 15.67, + "learning_rate": 4.216680561858227e-05, + "loss": 1.4679, + "step": 1484500 + }, + { + "epoch": 15.67, + "learning_rate": 4.2164167291070846e-05, + "loss": 1.3883, + "step": 1485000 + }, + { + "epoch": 15.68, + "learning_rate": 4.216152896355942e-05, + "loss": 1.3922, + "step": 1485500 + }, + { + "epoch": 15.68, + "learning_rate": 4.2158890636048e-05, + "loss": 1.4424, + "step": 1486000 + }, + { + "epoch": 15.69, + "learning_rate": 4.215625230853657e-05, + "loss": 1.4408, + "step": 1486500 + }, + { + "epoch": 15.69, + "learning_rate": 4.2153613981025155e-05, + "loss": 1.3841, + "step": 1487000 + }, + { + "epoch": 15.7, + "learning_rate": 4.2150975653513724e-05, + "loss": 1.4434, + "step": 1487500 + }, + { + "epoch": 15.7, + "learning_rate": 4.21483373260023e-05, + "loss": 1.3981, + "step": 1488000 + }, + { + "epoch": 15.71, + "learning_rate": 4.214569899849088e-05, + "loss": 1.446, + "step": 1488500 + }, + { + "epoch": 15.71, + "learning_rate": 4.214306067097946e-05, + "loss": 1.4333, + "step": 1489000 + }, + { + "epoch": 15.72, + "learning_rate": 4.214042234346803e-05, + "loss": 1.4608, + "step": 1489500 + }, + { + "epoch": 15.72, + "learning_rate": 4.213778401595661e-05, + "loss": 1.4301, + "step": 1490000 + }, + { + "epoch": 15.73, + "learning_rate": 4.213514568844518e-05, + "loss": 1.409, + "step": 1490500 + }, + { + "epoch": 15.73, + "learning_rate": 4.213250736093376e-05, + "loss": 1.4312, + "step": 1491000 + }, + { + "epoch": 15.74, + "learning_rate": 4.2129869033422334e-05, + "loss": 1.4541, + "step": 1491500 + }, + { + "epoch": 15.75, + "learning_rate": 4.212723070591091e-05, + "loss": 1.4487, + "step": 1492000 + }, + { + "epoch": 15.75, + "learning_rate": 4.2124592378399485e-05, + "loss": 1.4082, + "step": 1492500 + }, + { + "epoch": 15.76, + "learning_rate": 4.212195405088806e-05, + "loss": 1.4783, + "step": 1493000 + }, + { + "epoch": 15.76, + "learning_rate": 4.211931572337664e-05, + "loss": 1.3769, + "step": 1493500 + }, + { + "epoch": 15.77, + "learning_rate": 4.211667739586522e-05, + "loss": 1.3937, + "step": 1494000 + }, + { + "epoch": 15.77, + "learning_rate": 4.211403906835379e-05, + "loss": 1.4023, + "step": 1494500 + }, + { + "epoch": 15.78, + "learning_rate": 4.211140074084237e-05, + "loss": 1.4389, + "step": 1495000 + }, + { + "epoch": 15.78, + "learning_rate": 4.2108762413330945e-05, + "loss": 1.4852, + "step": 1495500 + }, + { + "epoch": 15.79, + "learning_rate": 4.210612408581952e-05, + "loss": 1.3463, + "step": 1496000 + }, + { + "epoch": 15.79, + "learning_rate": 4.2103485758308096e-05, + "loss": 1.4446, + "step": 1496500 + }, + { + "epoch": 15.8, + "learning_rate": 4.210084743079667e-05, + "loss": 1.4165, + "step": 1497000 + }, + { + "epoch": 15.8, + "learning_rate": 4.209820910328525e-05, + "loss": 1.4259, + "step": 1497500 + }, + { + "epoch": 15.81, + "learning_rate": 4.209557077577382e-05, + "loss": 1.413, + "step": 1498000 + }, + { + "epoch": 15.81, + "learning_rate": 4.20929324482624e-05, + "loss": 1.4454, + "step": 1498500 + }, + { + "epoch": 15.82, + "learning_rate": 4.209029412075098e-05, + "loss": 1.4524, + "step": 1499000 + }, + { + "epoch": 15.82, + "learning_rate": 4.208765579323955e-05, + "loss": 1.4086, + "step": 1499500 + }, + { + "epoch": 15.83, + "learning_rate": 4.2085017465728124e-05, + "loss": 1.4335, + "step": 1500000 + }, + { + "epoch": 15.84, + "learning_rate": 4.2082379138216707e-05, + "loss": 1.4369, + "step": 1500500 + }, + { + "epoch": 15.84, + "learning_rate": 4.207974081070528e-05, + "loss": 1.4312, + "step": 1501000 + }, + { + "epoch": 15.85, + "learning_rate": 4.207710248319385e-05, + "loss": 1.4136, + "step": 1501500 + }, + { + "epoch": 15.85, + "learning_rate": 4.207446415568243e-05, + "loss": 1.4283, + "step": 1502000 + }, + { + "epoch": 15.86, + "learning_rate": 4.207182582817101e-05, + "loss": 1.4503, + "step": 1502500 + }, + { + "epoch": 15.86, + "learning_rate": 4.2069187500659584e-05, + "loss": 1.4378, + "step": 1503000 + }, + { + "epoch": 15.87, + "learning_rate": 4.206654917314816e-05, + "loss": 1.4772, + "step": 1503500 + }, + { + "epoch": 15.87, + "learning_rate": 4.2063910845636735e-05, + "loss": 1.4235, + "step": 1504000 + }, + { + "epoch": 15.88, + "learning_rate": 4.206127251812531e-05, + "loss": 1.4441, + "step": 1504500 + }, + { + "epoch": 15.88, + "learning_rate": 4.2058634190613886e-05, + "loss": 1.4727, + "step": 1505000 + }, + { + "epoch": 15.89, + "learning_rate": 4.205599586310247e-05, + "loss": 1.4559, + "step": 1505500 + }, + { + "epoch": 15.89, + "learning_rate": 4.2053357535591044e-05, + "loss": 1.4154, + "step": 1506000 + }, + { + "epoch": 15.9, + "learning_rate": 4.205071920807961e-05, + "loss": 1.4288, + "step": 1506500 + }, + { + "epoch": 15.9, + "learning_rate": 4.2048080880568195e-05, + "loss": 1.3847, + "step": 1507000 + }, + { + "epoch": 15.91, + "learning_rate": 4.204544255305677e-05, + "loss": 1.5109, + "step": 1507500 + }, + { + "epoch": 15.91, + "learning_rate": 4.2042804225545346e-05, + "loss": 1.4625, + "step": 1508000 + }, + { + "epoch": 15.92, + "learning_rate": 4.204016589803392e-05, + "loss": 1.4261, + "step": 1508500 + }, + { + "epoch": 15.92, + "learning_rate": 4.20375275705225e-05, + "loss": 1.4479, + "step": 1509000 + }, + { + "epoch": 15.93, + "learning_rate": 4.203488924301107e-05, + "loss": 1.4176, + "step": 1509500 + }, + { + "epoch": 15.94, + "learning_rate": 4.203225091549965e-05, + "loss": 1.3624, + "step": 1510000 + }, + { + "epoch": 15.94, + "learning_rate": 4.202961258798822e-05, + "loss": 1.414, + "step": 1510500 + }, + { + "epoch": 15.95, + "learning_rate": 4.20269742604768e-05, + "loss": 1.447, + "step": 1511000 + }, + { + "epoch": 15.95, + "learning_rate": 4.2024335932965374e-05, + "loss": 1.4516, + "step": 1511500 + }, + { + "epoch": 15.96, + "learning_rate": 4.202169760545395e-05, + "loss": 1.3872, + "step": 1512000 + }, + { + "epoch": 15.96, + "learning_rate": 4.201905927794253e-05, + "loss": 1.4211, + "step": 1512500 + }, + { + "epoch": 15.97, + "learning_rate": 4.201642095043111e-05, + "loss": 1.4552, + "step": 1513000 + }, + { + "epoch": 15.97, + "learning_rate": 4.2013782622919676e-05, + "loss": 1.3732, + "step": 1513500 + }, + { + "epoch": 15.98, + "learning_rate": 4.201114429540826e-05, + "loss": 1.4778, + "step": 1514000 + }, + { + "epoch": 15.98, + "learning_rate": 4.2008505967896834e-05, + "loss": 1.3997, + "step": 1514500 + }, + { + "epoch": 15.99, + "learning_rate": 4.200586764038541e-05, + "loss": 1.4493, + "step": 1515000 + }, + { + "epoch": 15.99, + "learning_rate": 4.2003229312873985e-05, + "loss": 1.4036, + "step": 1515500 + }, + { + "epoch": 16.0, + "learning_rate": 4.200059098536256e-05, + "loss": 1.406, + "step": 1516000 + }, + { + "epoch": 16.0, + "learning_rate": 4.1997952657851136e-05, + "loss": 1.4399, + "step": 1516500 + }, + { + "epoch": 16.01, + "learning_rate": 4.199531433033971e-05, + "loss": 1.4463, + "step": 1517000 + }, + { + "epoch": 16.01, + "learning_rate": 4.1992676002828293e-05, + "loss": 1.4318, + "step": 1517500 + }, + { + "epoch": 16.02, + "learning_rate": 4.199003767531687e-05, + "loss": 1.4135, + "step": 1518000 + }, + { + "epoch": 16.03, + "learning_rate": 4.198739934780544e-05, + "loss": 1.4512, + "step": 1518500 + }, + { + "epoch": 16.03, + "learning_rate": 4.198476102029402e-05, + "loss": 1.3983, + "step": 1519000 + }, + { + "epoch": 16.04, + "learning_rate": 4.1982122692782595e-05, + "loss": 1.4161, + "step": 1519500 + }, + { + "epoch": 16.04, + "learning_rate": 4.197948436527117e-05, + "loss": 1.423, + "step": 1520000 + }, + { + "epoch": 16.05, + "learning_rate": 4.197684603775974e-05, + "loss": 1.4534, + "step": 1520500 + }, + { + "epoch": 16.05, + "learning_rate": 4.197420771024832e-05, + "loss": 1.471, + "step": 1521000 + }, + { + "epoch": 16.06, + "learning_rate": 4.19715693827369e-05, + "loss": 1.4596, + "step": 1521500 + }, + { + "epoch": 16.06, + "learning_rate": 4.196893105522547e-05, + "loss": 1.4405, + "step": 1522000 + }, + { + "epoch": 16.07, + "learning_rate": 4.196629272771405e-05, + "loss": 1.4176, + "step": 1522500 + }, + { + "epoch": 16.07, + "learning_rate": 4.1963654400202624e-05, + "loss": 1.4532, + "step": 1523000 + }, + { + "epoch": 16.08, + "learning_rate": 4.19610160726912e-05, + "loss": 1.3659, + "step": 1523500 + }, + { + "epoch": 16.08, + "learning_rate": 4.1958377745179775e-05, + "loss": 1.4079, + "step": 1524000 + }, + { + "epoch": 16.09, + "learning_rate": 4.195573941766836e-05, + "loss": 1.3446, + "step": 1524500 + }, + { + "epoch": 16.09, + "learning_rate": 4.195310109015693e-05, + "loss": 1.4858, + "step": 1525000 + }, + { + "epoch": 16.1, + "learning_rate": 4.19504627626455e-05, + "loss": 1.4956, + "step": 1525500 + }, + { + "epoch": 16.1, + "learning_rate": 4.1947824435134084e-05, + "loss": 1.4188, + "step": 1526000 + }, + { + "epoch": 16.11, + "learning_rate": 4.194518610762266e-05, + "loss": 1.415, + "step": 1526500 + }, + { + "epoch": 16.11, + "learning_rate": 4.1942547780111235e-05, + "loss": 1.4134, + "step": 1527000 + }, + { + "epoch": 16.12, + "learning_rate": 4.193990945259981e-05, + "loss": 1.414, + "step": 1527500 + }, + { + "epoch": 16.13, + "learning_rate": 4.1937271125088385e-05, + "loss": 1.3932, + "step": 1528000 + }, + { + "epoch": 16.13, + "learning_rate": 4.193463279757696e-05, + "loss": 1.3962, + "step": 1528500 + }, + { + "epoch": 16.14, + "learning_rate": 4.1931994470065536e-05, + "loss": 1.4497, + "step": 1529000 + }, + { + "epoch": 16.14, + "learning_rate": 4.192935614255412e-05, + "loss": 1.4519, + "step": 1529500 + }, + { + "epoch": 16.15, + "learning_rate": 4.192671781504269e-05, + "loss": 1.3972, + "step": 1530000 + }, + { + "epoch": 16.15, + "learning_rate": 4.192407948753126e-05, + "loss": 1.4172, + "step": 1530500 + }, + { + "epoch": 16.16, + "learning_rate": 4.1921441160019845e-05, + "loss": 1.4292, + "step": 1531000 + }, + { + "epoch": 16.16, + "learning_rate": 4.191880283250842e-05, + "loss": 1.4052, + "step": 1531500 + }, + { + "epoch": 16.17, + "learning_rate": 4.1916164504996996e-05, + "loss": 1.4259, + "step": 1532000 + }, + { + "epoch": 16.17, + "learning_rate": 4.1913526177485565e-05, + "loss": 1.3816, + "step": 1532500 + }, + { + "epoch": 16.18, + "learning_rate": 4.191088784997415e-05, + "loss": 1.3892, + "step": 1533000 + }, + { + "epoch": 16.18, + "learning_rate": 4.190824952246272e-05, + "loss": 1.4227, + "step": 1533500 + }, + { + "epoch": 16.19, + "learning_rate": 4.19056111949513e-05, + "loss": 1.4109, + "step": 1534000 + }, + { + "epoch": 16.19, + "learning_rate": 4.1902972867439874e-05, + "loss": 1.4403, + "step": 1534500 + }, + { + "epoch": 16.2, + "learning_rate": 4.190033453992845e-05, + "loss": 1.4506, + "step": 1535000 + }, + { + "epoch": 16.2, + "learning_rate": 4.1897696212417025e-05, + "loss": 1.4061, + "step": 1535500 + }, + { + "epoch": 16.21, + "learning_rate": 4.18950578849056e-05, + "loss": 1.4415, + "step": 1536000 + }, + { + "epoch": 16.22, + "learning_rate": 4.189241955739418e-05, + "loss": 1.4242, + "step": 1536500 + }, + { + "epoch": 16.22, + "learning_rate": 4.188978122988276e-05, + "loss": 1.401, + "step": 1537000 + }, + { + "epoch": 16.23, + "learning_rate": 4.1887142902371327e-05, + "loss": 1.4078, + "step": 1537500 + }, + { + "epoch": 16.23, + "learning_rate": 4.188450457485991e-05, + "loss": 1.4105, + "step": 1538000 + }, + { + "epoch": 16.24, + "learning_rate": 4.1881866247348484e-05, + "loss": 1.3957, + "step": 1538500 + }, + { + "epoch": 16.24, + "learning_rate": 4.187922791983706e-05, + "loss": 1.3706, + "step": 1539000 + }, + { + "epoch": 16.25, + "learning_rate": 4.1876589592325635e-05, + "loss": 1.4192, + "step": 1539500 + }, + { + "epoch": 16.25, + "learning_rate": 4.187395126481421e-05, + "loss": 1.403, + "step": 1540000 + }, + { + "epoch": 16.26, + "learning_rate": 4.1871312937302786e-05, + "loss": 1.4355, + "step": 1540500 + }, + { + "epoch": 16.26, + "learning_rate": 4.186867460979136e-05, + "loss": 1.3653, + "step": 1541000 + }, + { + "epoch": 16.27, + "learning_rate": 4.1866036282279944e-05, + "loss": 1.4254, + "step": 1541500 + }, + { + "epoch": 16.27, + "learning_rate": 4.186339795476851e-05, + "loss": 1.3987, + "step": 1542000 + }, + { + "epoch": 16.28, + "learning_rate": 4.186075962725709e-05, + "loss": 1.3716, + "step": 1542500 + }, + { + "epoch": 16.28, + "learning_rate": 4.185812129974567e-05, + "loss": 1.3712, + "step": 1543000 + }, + { + "epoch": 16.29, + "learning_rate": 4.1855482972234246e-05, + "loss": 1.4048, + "step": 1543500 + }, + { + "epoch": 16.29, + "learning_rate": 4.185284464472282e-05, + "loss": 1.3838, + "step": 1544000 + }, + { + "epoch": 16.3, + "learning_rate": 4.185020631721139e-05, + "loss": 1.4156, + "step": 1544500 + }, + { + "epoch": 16.3, + "learning_rate": 4.184756798969997e-05, + "loss": 1.4102, + "step": 1545000 + }, + { + "epoch": 16.31, + "learning_rate": 4.184492966218855e-05, + "loss": 1.4394, + "step": 1545500 + }, + { + "epoch": 16.32, + "learning_rate": 4.184229133467712e-05, + "loss": 1.4413, + "step": 1546000 + }, + { + "epoch": 16.32, + "learning_rate": 4.1839653007165706e-05, + "loss": 1.4344, + "step": 1546500 + }, + { + "epoch": 16.33, + "learning_rate": 4.1837014679654274e-05, + "loss": 1.3868, + "step": 1547000 + }, + { + "epoch": 16.33, + "learning_rate": 4.183437635214285e-05, + "loss": 1.4639, + "step": 1547500 + }, + { + "epoch": 16.34, + "learning_rate": 4.1831738024631425e-05, + "loss": 1.3568, + "step": 1548000 + }, + { + "epoch": 16.34, + "learning_rate": 4.182909969712001e-05, + "loss": 1.4366, + "step": 1548500 + }, + { + "epoch": 16.35, + "learning_rate": 4.1826461369608576e-05, + "loss": 1.4548, + "step": 1549000 + }, + { + "epoch": 16.35, + "learning_rate": 4.182382304209715e-05, + "loss": 1.4414, + "step": 1549500 + }, + { + "epoch": 16.36, + "learning_rate": 4.1821184714585734e-05, + "loss": 1.4497, + "step": 1550000 + }, + { + "epoch": 16.36, + "learning_rate": 4.181854638707431e-05, + "loss": 1.4126, + "step": 1550500 + }, + { + "epoch": 16.37, + "learning_rate": 4.1815908059562885e-05, + "loss": 1.4577, + "step": 1551000 + }, + { + "epoch": 16.37, + "learning_rate": 4.181326973205146e-05, + "loss": 1.3501, + "step": 1551500 + }, + { + "epoch": 16.38, + "learning_rate": 4.1810631404540036e-05, + "loss": 1.4447, + "step": 1552000 + }, + { + "epoch": 16.38, + "learning_rate": 4.180799307702861e-05, + "loss": 1.4166, + "step": 1552500 + }, + { + "epoch": 16.39, + "learning_rate": 4.180535474951719e-05, + "loss": 1.4606, + "step": 1553000 + }, + { + "epoch": 16.39, + "learning_rate": 4.180271642200577e-05, + "loss": 1.4318, + "step": 1553500 + }, + { + "epoch": 16.4, + "learning_rate": 4.180007809449434e-05, + "loss": 1.4348, + "step": 1554000 + }, + { + "epoch": 16.41, + "learning_rate": 4.1797439766982913e-05, + "loss": 1.4005, + "step": 1554500 + }, + { + "epoch": 16.41, + "learning_rate": 4.1794801439471496e-05, + "loss": 1.412, + "step": 1555000 + }, + { + "epoch": 16.42, + "learning_rate": 4.179216311196007e-05, + "loss": 1.4202, + "step": 1555500 + }, + { + "epoch": 16.42, + "learning_rate": 4.178952478444865e-05, + "loss": 1.4169, + "step": 1556000 + }, + { + "epoch": 16.43, + "learning_rate": 4.1786886456937215e-05, + "loss": 1.4006, + "step": 1556500 + }, + { + "epoch": 16.43, + "learning_rate": 4.17842481294258e-05, + "loss": 1.3605, + "step": 1557000 + }, + { + "epoch": 16.44, + "learning_rate": 4.178160980191437e-05, + "loss": 1.3829, + "step": 1557500 + }, + { + "epoch": 16.44, + "learning_rate": 4.177897147440295e-05, + "loss": 1.4698, + "step": 1558000 + }, + { + "epoch": 16.45, + "learning_rate": 4.1776333146891524e-05, + "loss": 1.4752, + "step": 1558500 + }, + { + "epoch": 16.45, + "learning_rate": 4.17736948193801e-05, + "loss": 1.4138, + "step": 1559000 + }, + { + "epoch": 16.46, + "learning_rate": 4.1771056491868675e-05, + "loss": 1.4337, + "step": 1559500 + }, + { + "epoch": 16.46, + "learning_rate": 4.176841816435725e-05, + "loss": 1.416, + "step": 1560000 + }, + { + "epoch": 16.47, + "learning_rate": 4.176577983684583e-05, + "loss": 1.4478, + "step": 1560500 + }, + { + "epoch": 16.47, + "learning_rate": 4.17631415093344e-05, + "loss": 1.382, + "step": 1561000 + }, + { + "epoch": 16.48, + "learning_rate": 4.176050318182298e-05, + "loss": 1.4176, + "step": 1561500 + }, + { + "epoch": 16.48, + "learning_rate": 4.175786485431156e-05, + "loss": 1.3608, + "step": 1562000 + }, + { + "epoch": 16.49, + "learning_rate": 4.1755226526800135e-05, + "loss": 1.4934, + "step": 1562500 + }, + { + "epoch": 16.49, + "learning_rate": 4.175258819928871e-05, + "loss": 1.4622, + "step": 1563000 + }, + { + "epoch": 16.5, + "learning_rate": 4.1749949871777286e-05, + "loss": 1.4154, + "step": 1563500 + }, + { + "epoch": 16.51, + "learning_rate": 4.174731154426586e-05, + "loss": 1.4503, + "step": 1564000 + }, + { + "epoch": 16.51, + "learning_rate": 4.174467321675444e-05, + "loss": 1.3952, + "step": 1564500 + }, + { + "epoch": 16.52, + "learning_rate": 4.174203488924301e-05, + "loss": 1.4024, + "step": 1565000 + }, + { + "epoch": 16.52, + "learning_rate": 4.1739396561731594e-05, + "loss": 1.4291, + "step": 1565500 + }, + { + "epoch": 16.53, + "learning_rate": 4.173675823422016e-05, + "loss": 1.5112, + "step": 1566000 + }, + { + "epoch": 16.53, + "learning_rate": 4.173411990670874e-05, + "loss": 1.4522, + "step": 1566500 + }, + { + "epoch": 16.54, + "learning_rate": 4.173148157919732e-05, + "loss": 1.4065, + "step": 1567000 + }, + { + "epoch": 16.54, + "learning_rate": 4.1728843251685896e-05, + "loss": 1.4686, + "step": 1567500 + }, + { + "epoch": 16.55, + "learning_rate": 4.1726204924174465e-05, + "loss": 1.4315, + "step": 1568000 + }, + { + "epoch": 16.55, + "learning_rate": 4.172356659666305e-05, + "loss": 1.4794, + "step": 1568500 + }, + { + "epoch": 16.56, + "learning_rate": 4.172092826915162e-05, + "loss": 1.4398, + "step": 1569000 + }, + { + "epoch": 16.56, + "learning_rate": 4.17182899416402e-05, + "loss": 1.4415, + "step": 1569500 + }, + { + "epoch": 16.57, + "learning_rate": 4.1715651614128774e-05, + "loss": 1.4995, + "step": 1570000 + }, + { + "epoch": 16.57, + "learning_rate": 4.171301328661735e-05, + "loss": 1.4024, + "step": 1570500 + }, + { + "epoch": 16.58, + "learning_rate": 4.1710374959105925e-05, + "loss": 1.3879, + "step": 1571000 + }, + { + "epoch": 16.58, + "learning_rate": 4.17077366315945e-05, + "loss": 1.4432, + "step": 1571500 + }, + { + "epoch": 16.59, + "learning_rate": 4.1705098304083076e-05, + "loss": 1.4236, + "step": 1572000 + }, + { + "epoch": 16.6, + "learning_rate": 4.170245997657166e-05, + "loss": 1.4061, + "step": 1572500 + }, + { + "epoch": 16.6, + "learning_rate": 4.169982164906023e-05, + "loss": 1.4371, + "step": 1573000 + }, + { + "epoch": 16.61, + "learning_rate": 4.16971833215488e-05, + "loss": 1.4286, + "step": 1573500 + }, + { + "epoch": 16.61, + "learning_rate": 4.1694544994037385e-05, + "loss": 1.4368, + "step": 1574000 + }, + { + "epoch": 16.62, + "learning_rate": 4.169190666652596e-05, + "loss": 1.3684, + "step": 1574500 + }, + { + "epoch": 16.62, + "learning_rate": 4.1689268339014535e-05, + "loss": 1.4477, + "step": 1575000 + }, + { + "epoch": 16.63, + "learning_rate": 4.168663001150311e-05, + "loss": 1.3976, + "step": 1575500 + }, + { + "epoch": 16.63, + "learning_rate": 4.1683991683991686e-05, + "loss": 1.3676, + "step": 1576000 + }, + { + "epoch": 16.64, + "learning_rate": 4.168135335648026e-05, + "loss": 1.3727, + "step": 1576500 + }, + { + "epoch": 16.64, + "learning_rate": 4.167871502896884e-05, + "loss": 1.4657, + "step": 1577000 + }, + { + "epoch": 16.65, + "learning_rate": 4.167607670145741e-05, + "loss": 1.4331, + "step": 1577500 + }, + { + "epoch": 16.65, + "learning_rate": 4.167343837394599e-05, + "loss": 1.4182, + "step": 1578000 + }, + { + "epoch": 16.66, + "learning_rate": 4.1670800046434564e-05, + "loss": 1.4205, + "step": 1578500 + }, + { + "epoch": 16.66, + "learning_rate": 4.1668161718923146e-05, + "loss": 1.3848, + "step": 1579000 + }, + { + "epoch": 16.67, + "learning_rate": 4.166552339141172e-05, + "loss": 1.4162, + "step": 1579500 + }, + { + "epoch": 16.67, + "learning_rate": 4.166288506390029e-05, + "loss": 1.4357, + "step": 1580000 + }, + { + "epoch": 16.68, + "learning_rate": 4.166024673638887e-05, + "loss": 1.4136, + "step": 1580500 + }, + { + "epoch": 16.68, + "learning_rate": 4.165760840887745e-05, + "loss": 1.4672, + "step": 1581000 + }, + { + "epoch": 16.69, + "learning_rate": 4.1654970081366024e-05, + "loss": 1.4228, + "step": 1581500 + }, + { + "epoch": 16.7, + "learning_rate": 4.16523317538546e-05, + "loss": 1.3872, + "step": 1582000 + }, + { + "epoch": 16.7, + "learning_rate": 4.1649693426343175e-05, + "loss": 1.4336, + "step": 1582500 + }, + { + "epoch": 16.71, + "learning_rate": 4.164705509883175e-05, + "loss": 1.4257, + "step": 1583000 + }, + { + "epoch": 16.71, + "learning_rate": 4.1644416771320326e-05, + "loss": 1.4025, + "step": 1583500 + }, + { + "epoch": 16.72, + "learning_rate": 4.16417784438089e-05, + "loss": 1.388, + "step": 1584000 + }, + { + "epoch": 16.72, + "learning_rate": 4.163914011629748e-05, + "loss": 1.4136, + "step": 1584500 + }, + { + "epoch": 16.73, + "learning_rate": 4.163650178878605e-05, + "loss": 1.4691, + "step": 1585000 + }, + { + "epoch": 16.73, + "learning_rate": 4.163386346127463e-05, + "loss": 1.405, + "step": 1585500 + }, + { + "epoch": 16.74, + "learning_rate": 4.163122513376321e-05, + "loss": 1.4426, + "step": 1586000 + }, + { + "epoch": 16.74, + "learning_rate": 4.1628586806251785e-05, + "loss": 1.4439, + "step": 1586500 + }, + { + "epoch": 16.75, + "learning_rate": 4.1625948478740354e-05, + "loss": 1.4723, + "step": 1587000 + }, + { + "epoch": 16.75, + "learning_rate": 4.1623310151228936e-05, + "loss": 1.3888, + "step": 1587500 + }, + { + "epoch": 16.76, + "learning_rate": 4.162067182371751e-05, + "loss": 1.3892, + "step": 1588000 + }, + { + "epoch": 16.76, + "learning_rate": 4.161803349620609e-05, + "loss": 1.4069, + "step": 1588500 + }, + { + "epoch": 16.77, + "learning_rate": 4.161539516869466e-05, + "loss": 1.3856, + "step": 1589000 + }, + { + "epoch": 16.77, + "learning_rate": 4.161275684118324e-05, + "loss": 1.3715, + "step": 1589500 + }, + { + "epoch": 16.78, + "learning_rate": 4.1610118513671814e-05, + "loss": 1.416, + "step": 1590000 + }, + { + "epoch": 16.79, + "learning_rate": 4.160748018616039e-05, + "loss": 1.4378, + "step": 1590500 + }, + { + "epoch": 16.79, + "learning_rate": 4.160484185864897e-05, + "loss": 1.4008, + "step": 1591000 + }, + { + "epoch": 16.8, + "learning_rate": 4.160220353113755e-05, + "loss": 1.4369, + "step": 1591500 + }, + { + "epoch": 16.8, + "learning_rate": 4.1599565203626116e-05, + "loss": 1.4007, + "step": 1592000 + }, + { + "epoch": 16.81, + "learning_rate": 4.15969268761147e-05, + "loss": 1.4348, + "step": 1592500 + }, + { + "epoch": 16.81, + "learning_rate": 4.159428854860327e-05, + "loss": 1.3795, + "step": 1593000 + }, + { + "epoch": 16.82, + "learning_rate": 4.159165022109185e-05, + "loss": 1.4096, + "step": 1593500 + }, + { + "epoch": 16.82, + "learning_rate": 4.1589011893580424e-05, + "loss": 1.438, + "step": 1594000 + }, + { + "epoch": 16.83, + "learning_rate": 4.1586373566069e-05, + "loss": 1.4782, + "step": 1594500 + }, + { + "epoch": 16.83, + "learning_rate": 4.1583735238557575e-05, + "loss": 1.4174, + "step": 1595000 + }, + { + "epoch": 16.84, + "learning_rate": 4.158109691104615e-05, + "loss": 1.4149, + "step": 1595500 + }, + { + "epoch": 16.84, + "learning_rate": 4.1578458583534726e-05, + "loss": 1.4173, + "step": 1596000 + }, + { + "epoch": 16.85, + "learning_rate": 4.15758202560233e-05, + "loss": 1.473, + "step": 1596500 + }, + { + "epoch": 16.85, + "learning_rate": 4.157318192851188e-05, + "loss": 1.4163, + "step": 1597000 + }, + { + "epoch": 16.86, + "learning_rate": 4.157054360100045e-05, + "loss": 1.4344, + "step": 1597500 + }, + { + "epoch": 16.86, + "learning_rate": 4.1567905273489035e-05, + "loss": 1.4571, + "step": 1598000 + }, + { + "epoch": 16.87, + "learning_rate": 4.156526694597761e-05, + "loss": 1.3993, + "step": 1598500 + }, + { + "epoch": 16.87, + "learning_rate": 4.156262861846618e-05, + "loss": 1.4791, + "step": 1599000 + }, + { + "epoch": 16.88, + "learning_rate": 4.155999029095476e-05, + "loss": 1.4622, + "step": 1599500 + }, + { + "epoch": 16.89, + "learning_rate": 4.155735196344334e-05, + "loss": 1.4753, + "step": 1600000 + }, + { + "epoch": 16.89, + "learning_rate": 4.155471363593191e-05, + "loss": 1.4155, + "step": 1600500 + }, + { + "epoch": 16.9, + "learning_rate": 4.155207530842049e-05, + "loss": 1.4762, + "step": 1601000 + }, + { + "epoch": 16.9, + "learning_rate": 4.1549436980909063e-05, + "loss": 1.4654, + "step": 1601500 + }, + { + "epoch": 16.91, + "learning_rate": 4.154679865339764e-05, + "loss": 1.4434, + "step": 1602000 + }, + { + "epoch": 16.91, + "learning_rate": 4.1544160325886214e-05, + "loss": 1.4032, + "step": 1602500 + }, + { + "epoch": 16.92, + "learning_rate": 4.15415219983748e-05, + "loss": 1.39, + "step": 1603000 + }, + { + "epoch": 16.92, + "learning_rate": 4.1538883670863365e-05, + "loss": 1.3726, + "step": 1603500 + }, + { + "epoch": 16.93, + "learning_rate": 4.153624534335194e-05, + "loss": 1.4242, + "step": 1604000 + }, + { + "epoch": 16.93, + "learning_rate": 4.153360701584052e-05, + "loss": 1.4696, + "step": 1604500 + }, + { + "epoch": 16.94, + "learning_rate": 4.15309686883291e-05, + "loss": 1.4291, + "step": 1605000 + }, + { + "epoch": 16.94, + "learning_rate": 4.1528330360817674e-05, + "loss": 1.4749, + "step": 1605500 + }, + { + "epoch": 16.95, + "learning_rate": 4.152569203330624e-05, + "loss": 1.4216, + "step": 1606000 + }, + { + "epoch": 16.95, + "learning_rate": 4.1523053705794825e-05, + "loss": 1.4855, + "step": 1606500 + }, + { + "epoch": 16.96, + "learning_rate": 4.15204153782834e-05, + "loss": 1.4221, + "step": 1607000 + }, + { + "epoch": 16.96, + "learning_rate": 4.1517777050771976e-05, + "loss": 1.4627, + "step": 1607500 + }, + { + "epoch": 16.97, + "learning_rate": 4.151513872326055e-05, + "loss": 1.4187, + "step": 1608000 + }, + { + "epoch": 16.97, + "learning_rate": 4.151250039574913e-05, + "loss": 1.4203, + "step": 1608500 + }, + { + "epoch": 16.98, + "learning_rate": 4.15098620682377e-05, + "loss": 1.3503, + "step": 1609000 + }, + { + "epoch": 16.99, + "learning_rate": 4.150722374072628e-05, + "loss": 1.4109, + "step": 1609500 + }, + { + "epoch": 16.99, + "learning_rate": 4.150458541321486e-05, + "loss": 1.4218, + "step": 1610000 + }, + { + "epoch": 17.0, + "learning_rate": 4.1501947085703436e-05, + "loss": 1.4512, + "step": 1610500 + }, + { + "epoch": 17.0, + "learning_rate": 4.1499308758192004e-05, + "loss": 1.3891, + "step": 1611000 + }, + { + "epoch": 17.01, + "learning_rate": 4.149667043068059e-05, + "loss": 1.3712, + "step": 1611500 + }, + { + "epoch": 17.01, + "learning_rate": 4.149403210316916e-05, + "loss": 1.4174, + "step": 1612000 + }, + { + "epoch": 17.02, + "learning_rate": 4.149139377565774e-05, + "loss": 1.3895, + "step": 1612500 + }, + { + "epoch": 17.02, + "learning_rate": 4.148875544814631e-05, + "loss": 1.4064, + "step": 1613000 + }, + { + "epoch": 17.03, + "learning_rate": 4.148611712063489e-05, + "loss": 1.3801, + "step": 1613500 + }, + { + "epoch": 17.03, + "learning_rate": 4.1483478793123464e-05, + "loss": 1.4441, + "step": 1614000 + }, + { + "epoch": 17.04, + "learning_rate": 4.148084046561204e-05, + "loss": 1.3741, + "step": 1614500 + }, + { + "epoch": 17.04, + "learning_rate": 4.147820213810062e-05, + "loss": 1.3954, + "step": 1615000 + }, + { + "epoch": 17.05, + "learning_rate": 4.147556381058919e-05, + "loss": 1.3902, + "step": 1615500 + }, + { + "epoch": 17.05, + "learning_rate": 4.1472925483077766e-05, + "loss": 1.3911, + "step": 1616000 + }, + { + "epoch": 17.06, + "learning_rate": 4.147028715556635e-05, + "loss": 1.4318, + "step": 1616500 + }, + { + "epoch": 17.06, + "learning_rate": 4.1467648828054924e-05, + "loss": 1.4343, + "step": 1617000 + }, + { + "epoch": 17.07, + "learning_rate": 4.14650105005435e-05, + "loss": 1.4023, + "step": 1617500 + }, + { + "epoch": 17.08, + "learning_rate": 4.146237217303207e-05, + "loss": 1.4193, + "step": 1618000 + }, + { + "epoch": 17.08, + "learning_rate": 4.145973384552065e-05, + "loss": 1.4062, + "step": 1618500 + }, + { + "epoch": 17.09, + "learning_rate": 4.1457095518009226e-05, + "loss": 1.4051, + "step": 1619000 + }, + { + "epoch": 17.09, + "learning_rate": 4.14544571904978e-05, + "loss": 1.3939, + "step": 1619500 + }, + { + "epoch": 17.1, + "learning_rate": 4.1451818862986384e-05, + "loss": 1.4426, + "step": 1620000 + }, + { + "epoch": 17.1, + "learning_rate": 4.144918053547495e-05, + "loss": 1.4619, + "step": 1620500 + }, + { + "epoch": 17.11, + "learning_rate": 4.144654220796353e-05, + "loss": 1.3402, + "step": 1621000 + }, + { + "epoch": 17.11, + "learning_rate": 4.14439038804521e-05, + "loss": 1.4035, + "step": 1621500 + }, + { + "epoch": 17.12, + "learning_rate": 4.1441265552940686e-05, + "loss": 1.4382, + "step": 1622000 + }, + { + "epoch": 17.12, + "learning_rate": 4.1438627225429254e-05, + "loss": 1.3843, + "step": 1622500 + }, + { + "epoch": 17.13, + "learning_rate": 4.143598889791783e-05, + "loss": 1.3832, + "step": 1623000 + }, + { + "epoch": 17.13, + "learning_rate": 4.143335057040641e-05, + "loss": 1.358, + "step": 1623500 + }, + { + "epoch": 17.14, + "learning_rate": 4.143071224289499e-05, + "loss": 1.4259, + "step": 1624000 + }, + { + "epoch": 17.14, + "learning_rate": 4.142807391538356e-05, + "loss": 1.3565, + "step": 1624500 + }, + { + "epoch": 17.15, + "learning_rate": 4.142543558787214e-05, + "loss": 1.4238, + "step": 1625000 + }, + { + "epoch": 17.15, + "learning_rate": 4.1422797260360714e-05, + "loss": 1.4076, + "step": 1625500 + }, + { + "epoch": 17.16, + "learning_rate": 4.142015893284929e-05, + "loss": 1.3893, + "step": 1626000 + }, + { + "epoch": 17.16, + "learning_rate": 4.1417520605337865e-05, + "loss": 1.4245, + "step": 1626500 + }, + { + "epoch": 17.17, + "learning_rate": 4.141488227782645e-05, + "loss": 1.4009, + "step": 1627000 + }, + { + "epoch": 17.18, + "learning_rate": 4.1412243950315016e-05, + "loss": 1.4113, + "step": 1627500 + }, + { + "epoch": 17.18, + "learning_rate": 4.140960562280359e-05, + "loss": 1.3989, + "step": 1628000 + }, + { + "epoch": 17.19, + "learning_rate": 4.1406967295292174e-05, + "loss": 1.389, + "step": 1628500 + }, + { + "epoch": 17.19, + "learning_rate": 4.140432896778075e-05, + "loss": 1.43, + "step": 1629000 + }, + { + "epoch": 17.2, + "learning_rate": 4.1401690640269325e-05, + "loss": 1.4441, + "step": 1629500 + }, + { + "epoch": 17.2, + "learning_rate": 4.139905231275789e-05, + "loss": 1.3752, + "step": 1630000 + }, + { + "epoch": 17.21, + "learning_rate": 4.1396413985246476e-05, + "loss": 1.3485, + "step": 1630500 + }, + { + "epoch": 17.21, + "learning_rate": 4.139377565773505e-05, + "loss": 1.4128, + "step": 1631000 + }, + { + "epoch": 17.22, + "learning_rate": 4.1391137330223627e-05, + "loss": 1.4076, + "step": 1631500 + }, + { + "epoch": 17.22, + "learning_rate": 4.13884990027122e-05, + "loss": 1.3962, + "step": 1632000 + }, + { + "epoch": 17.23, + "learning_rate": 4.138586067520078e-05, + "loss": 1.3894, + "step": 1632500 + }, + { + "epoch": 17.23, + "learning_rate": 4.138322234768935e-05, + "loss": 1.3827, + "step": 1633000 + }, + { + "epoch": 17.24, + "learning_rate": 4.138058402017793e-05, + "loss": 1.4004, + "step": 1633500 + }, + { + "epoch": 17.24, + "learning_rate": 4.137794569266651e-05, + "loss": 1.4115, + "step": 1634000 + }, + { + "epoch": 17.25, + "learning_rate": 4.137530736515508e-05, + "loss": 1.4184, + "step": 1634500 + }, + { + "epoch": 17.25, + "learning_rate": 4.1372669037643655e-05, + "loss": 1.3857, + "step": 1635000 + }, + { + "epoch": 17.26, + "learning_rate": 4.137003071013224e-05, + "loss": 1.4266, + "step": 1635500 + }, + { + "epoch": 17.27, + "learning_rate": 4.136739238262081e-05, + "loss": 1.4686, + "step": 1636000 + }, + { + "epoch": 17.27, + "learning_rate": 4.136475405510939e-05, + "loss": 1.4251, + "step": 1636500 + }, + { + "epoch": 17.28, + "learning_rate": 4.1362115727597964e-05, + "loss": 1.3737, + "step": 1637000 + }, + { + "epoch": 17.28, + "learning_rate": 4.135947740008654e-05, + "loss": 1.434, + "step": 1637500 + }, + { + "epoch": 17.29, + "learning_rate": 4.1356839072575115e-05, + "loss": 1.3759, + "step": 1638000 + }, + { + "epoch": 17.29, + "learning_rate": 4.135420074506369e-05, + "loss": 1.4274, + "step": 1638500 + }, + { + "epoch": 17.3, + "learning_rate": 4.135156241755227e-05, + "loss": 1.4217, + "step": 1639000 + }, + { + "epoch": 17.3, + "learning_rate": 4.134892409004084e-05, + "loss": 1.5034, + "step": 1639500 + }, + { + "epoch": 17.31, + "learning_rate": 4.1346285762529417e-05, + "loss": 1.427, + "step": 1640000 + }, + { + "epoch": 17.31, + "learning_rate": 4.1343647435018e-05, + "loss": 1.4253, + "step": 1640500 + }, + { + "epoch": 17.32, + "learning_rate": 4.1341009107506574e-05, + "loss": 1.3879, + "step": 1641000 + }, + { + "epoch": 17.32, + "learning_rate": 4.133837077999514e-05, + "loss": 1.4221, + "step": 1641500 + }, + { + "epoch": 17.33, + "learning_rate": 4.1335732452483725e-05, + "loss": 1.4657, + "step": 1642000 + }, + { + "epoch": 17.33, + "learning_rate": 4.13330941249723e-05, + "loss": 1.3487, + "step": 1642500 + }, + { + "epoch": 17.34, + "learning_rate": 4.1330455797460876e-05, + "loss": 1.4318, + "step": 1643000 + }, + { + "epoch": 17.34, + "learning_rate": 4.132781746994945e-05, + "loss": 1.4211, + "step": 1643500 + }, + { + "epoch": 17.35, + "learning_rate": 4.132517914243803e-05, + "loss": 1.4071, + "step": 1644000 + }, + { + "epoch": 17.35, + "learning_rate": 4.13225408149266e-05, + "loss": 1.3708, + "step": 1644500 + }, + { + "epoch": 17.36, + "learning_rate": 4.131990248741518e-05, + "loss": 1.4038, + "step": 1645000 + }, + { + "epoch": 17.37, + "learning_rate": 4.1317264159903754e-05, + "loss": 1.4369, + "step": 1645500 + }, + { + "epoch": 17.37, + "learning_rate": 4.1314625832392336e-05, + "loss": 1.4657, + "step": 1646000 + }, + { + "epoch": 17.38, + "learning_rate": 4.1311987504880905e-05, + "loss": 1.4115, + "step": 1646500 + }, + { + "epoch": 17.38, + "learning_rate": 4.130934917736948e-05, + "loss": 1.4072, + "step": 1647000 + }, + { + "epoch": 17.39, + "learning_rate": 4.130671084985806e-05, + "loss": 1.4551, + "step": 1647500 + }, + { + "epoch": 17.39, + "learning_rate": 4.130407252234664e-05, + "loss": 1.4117, + "step": 1648000 + }, + { + "epoch": 17.4, + "learning_rate": 4.1301434194835213e-05, + "loss": 1.4944, + "step": 1648500 + }, + { + "epoch": 17.4, + "learning_rate": 4.129879586732379e-05, + "loss": 1.3961, + "step": 1649000 + }, + { + "epoch": 17.41, + "learning_rate": 4.1296157539812364e-05, + "loss": 1.4155, + "step": 1649500 + }, + { + "epoch": 17.41, + "learning_rate": 4.129351921230094e-05, + "loss": 1.4082, + "step": 1650000 + }, + { + "epoch": 17.42, + "learning_rate": 4.1290880884789515e-05, + "loss": 1.3698, + "step": 1650500 + }, + { + "epoch": 17.42, + "learning_rate": 4.128824255727809e-05, + "loss": 1.3938, + "step": 1651000 + }, + { + "epoch": 17.43, + "learning_rate": 4.1285604229766666e-05, + "loss": 1.4001, + "step": 1651500 + }, + { + "epoch": 17.43, + "learning_rate": 4.128296590225524e-05, + "loss": 1.3852, + "step": 1652000 + }, + { + "epoch": 17.44, + "learning_rate": 4.1280327574743824e-05, + "loss": 1.4102, + "step": 1652500 + }, + { + "epoch": 17.44, + "learning_rate": 4.12776892472324e-05, + "loss": 1.3813, + "step": 1653000 + }, + { + "epoch": 17.45, + "learning_rate": 4.127505091972097e-05, + "loss": 1.4692, + "step": 1653500 + }, + { + "epoch": 17.46, + "learning_rate": 4.127241259220955e-05, + "loss": 1.3874, + "step": 1654000 + }, + { + "epoch": 17.46, + "learning_rate": 4.1269774264698126e-05, + "loss": 1.375, + "step": 1654500 + }, + { + "epoch": 17.47, + "learning_rate": 4.12671359371867e-05, + "loss": 1.4102, + "step": 1655000 + }, + { + "epoch": 17.47, + "learning_rate": 4.126449760967528e-05, + "loss": 1.4387, + "step": 1655500 + }, + { + "epoch": 17.48, + "learning_rate": 4.126185928216385e-05, + "loss": 1.3957, + "step": 1656000 + }, + { + "epoch": 17.48, + "learning_rate": 4.125922095465243e-05, + "loss": 1.3434, + "step": 1656500 + }, + { + "epoch": 17.49, + "learning_rate": 4.1256582627141003e-05, + "loss": 1.4455, + "step": 1657000 + }, + { + "epoch": 17.49, + "learning_rate": 4.125394429962958e-05, + "loss": 1.4251, + "step": 1657500 + }, + { + "epoch": 17.5, + "learning_rate": 4.125130597211816e-05, + "loss": 1.3911, + "step": 1658000 + }, + { + "epoch": 17.5, + "learning_rate": 4.124866764460673e-05, + "loss": 1.4198, + "step": 1658500 + }, + { + "epoch": 17.51, + "learning_rate": 4.1246029317095305e-05, + "loss": 1.3944, + "step": 1659000 + }, + { + "epoch": 17.51, + "learning_rate": 4.124339098958389e-05, + "loss": 1.409, + "step": 1659500 + }, + { + "epoch": 17.52, + "learning_rate": 4.124075266207246e-05, + "loss": 1.3751, + "step": 1660000 + }, + { + "epoch": 17.52, + "learning_rate": 4.123811433456103e-05, + "loss": 1.3848, + "step": 1660500 + }, + { + "epoch": 17.53, + "learning_rate": 4.1235476007049614e-05, + "loss": 1.4015, + "step": 1661000 + }, + { + "epoch": 17.53, + "learning_rate": 4.123283767953819e-05, + "loss": 1.4784, + "step": 1661500 + }, + { + "epoch": 17.54, + "learning_rate": 4.1230199352026765e-05, + "loss": 1.4108, + "step": 1662000 + }, + { + "epoch": 17.54, + "learning_rate": 4.122756102451534e-05, + "loss": 1.4127, + "step": 1662500 + }, + { + "epoch": 17.55, + "learning_rate": 4.1224922697003916e-05, + "loss": 1.4451, + "step": 1663000 + }, + { + "epoch": 17.56, + "learning_rate": 4.122228436949249e-05, + "loss": 1.3879, + "step": 1663500 + }, + { + "epoch": 17.56, + "learning_rate": 4.121964604198107e-05, + "loss": 1.4318, + "step": 1664000 + }, + { + "epoch": 17.57, + "learning_rate": 4.121700771446965e-05, + "loss": 1.437, + "step": 1664500 + }, + { + "epoch": 17.57, + "learning_rate": 4.1214369386958225e-05, + "loss": 1.4077, + "step": 1665000 + }, + { + "epoch": 17.58, + "learning_rate": 4.1211731059446794e-05, + "loss": 1.3917, + "step": 1665500 + }, + { + "epoch": 17.58, + "learning_rate": 4.1209092731935376e-05, + "loss": 1.4233, + "step": 1666000 + }, + { + "epoch": 17.59, + "learning_rate": 4.120645440442395e-05, + "loss": 1.392, + "step": 1666500 + }, + { + "epoch": 17.59, + "learning_rate": 4.120381607691253e-05, + "loss": 1.3881, + "step": 1667000 + }, + { + "epoch": 17.6, + "learning_rate": 4.12011777494011e-05, + "loss": 1.3747, + "step": 1667500 + }, + { + "epoch": 17.6, + "learning_rate": 4.119853942188968e-05, + "loss": 1.376, + "step": 1668000 + }, + { + "epoch": 17.61, + "learning_rate": 4.119590109437825e-05, + "loss": 1.4042, + "step": 1668500 + }, + { + "epoch": 17.61, + "learning_rate": 4.119326276686683e-05, + "loss": 1.4273, + "step": 1669000 + }, + { + "epoch": 17.62, + "learning_rate": 4.1190624439355404e-05, + "loss": 1.3918, + "step": 1669500 + }, + { + "epoch": 17.62, + "learning_rate": 4.118798611184398e-05, + "loss": 1.4206, + "step": 1670000 + }, + { + "epoch": 17.63, + "learning_rate": 4.1185347784332555e-05, + "loss": 1.4635, + "step": 1670500 + }, + { + "epoch": 17.63, + "learning_rate": 4.118270945682113e-05, + "loss": 1.4158, + "step": 1671000 + }, + { + "epoch": 17.64, + "learning_rate": 4.118007112930971e-05, + "loss": 1.3758, + "step": 1671500 + }, + { + "epoch": 17.65, + "learning_rate": 4.117743280179829e-05, + "loss": 1.4426, + "step": 1672000 + }, + { + "epoch": 17.65, + "learning_rate": 4.117479447428686e-05, + "loss": 1.3776, + "step": 1672500 + }, + { + "epoch": 17.66, + "learning_rate": 4.117215614677544e-05, + "loss": 1.4144, + "step": 1673000 + }, + { + "epoch": 17.66, + "learning_rate": 4.1169517819264015e-05, + "loss": 1.4319, + "step": 1673500 + }, + { + "epoch": 17.67, + "learning_rate": 4.116687949175259e-05, + "loss": 1.3946, + "step": 1674000 + }, + { + "epoch": 17.67, + "learning_rate": 4.1164241164241166e-05, + "loss": 1.4193, + "step": 1674500 + }, + { + "epoch": 17.68, + "learning_rate": 4.116160283672974e-05, + "loss": 1.4442, + "step": 1675000 + }, + { + "epoch": 17.68, + "learning_rate": 4.115896450921832e-05, + "loss": 1.4038, + "step": 1675500 + }, + { + "epoch": 17.69, + "learning_rate": 4.115632618170689e-05, + "loss": 1.3859, + "step": 1676000 + }, + { + "epoch": 17.69, + "learning_rate": 4.1153687854195475e-05, + "loss": 1.4027, + "step": 1676500 + }, + { + "epoch": 17.7, + "learning_rate": 4.115104952668405e-05, + "loss": 1.4258, + "step": 1677000 + }, + { + "epoch": 17.7, + "learning_rate": 4.114841119917262e-05, + "loss": 1.4403, + "step": 1677500 + }, + { + "epoch": 17.71, + "learning_rate": 4.11457728716612e-05, + "loss": 1.4458, + "step": 1678000 + }, + { + "epoch": 17.71, + "learning_rate": 4.1143134544149777e-05, + "loss": 1.372, + "step": 1678500 + }, + { + "epoch": 17.72, + "learning_rate": 4.114049621663835e-05, + "loss": 1.4563, + "step": 1679000 + }, + { + "epoch": 17.72, + "learning_rate": 4.113785788912692e-05, + "loss": 1.3458, + "step": 1679500 + }, + { + "epoch": 17.73, + "learning_rate": 4.11352195616155e-05, + "loss": 1.4158, + "step": 1680000 + }, + { + "epoch": 17.73, + "learning_rate": 4.113258123410408e-05, + "loss": 1.4461, + "step": 1680500 + }, + { + "epoch": 17.74, + "learning_rate": 4.1129942906592654e-05, + "loss": 1.4106, + "step": 1681000 + }, + { + "epoch": 17.75, + "learning_rate": 4.112730457908123e-05, + "loss": 1.3313, + "step": 1681500 + }, + { + "epoch": 17.75, + "learning_rate": 4.1124666251569805e-05, + "loss": 1.4166, + "step": 1682000 + }, + { + "epoch": 17.76, + "learning_rate": 4.112202792405838e-05, + "loss": 1.4184, + "step": 1682500 + }, + { + "epoch": 17.76, + "learning_rate": 4.1119389596546956e-05, + "loss": 1.4705, + "step": 1683000 + }, + { + "epoch": 17.77, + "learning_rate": 4.111675126903554e-05, + "loss": 1.4387, + "step": 1683500 + }, + { + "epoch": 17.77, + "learning_rate": 4.1114112941524114e-05, + "loss": 1.4419, + "step": 1684000 + }, + { + "epoch": 17.78, + "learning_rate": 4.111147461401268e-05, + "loss": 1.3749, + "step": 1684500 + }, + { + "epoch": 17.78, + "learning_rate": 4.1108836286501265e-05, + "loss": 1.4237, + "step": 1685000 + }, + { + "epoch": 17.79, + "learning_rate": 4.110619795898984e-05, + "loss": 1.408, + "step": 1685500 + }, + { + "epoch": 17.79, + "learning_rate": 4.1103559631478416e-05, + "loss": 1.4028, + "step": 1686000 + }, + { + "epoch": 17.8, + "learning_rate": 4.110092130396699e-05, + "loss": 1.3431, + "step": 1686500 + }, + { + "epoch": 17.8, + "learning_rate": 4.1098282976455567e-05, + "loss": 1.4033, + "step": 1687000 + }, + { + "epoch": 17.81, + "learning_rate": 4.109564464894414e-05, + "loss": 1.3624, + "step": 1687500 + }, + { + "epoch": 17.81, + "learning_rate": 4.109300632143272e-05, + "loss": 1.3997, + "step": 1688000 + }, + { + "epoch": 17.82, + "learning_rate": 4.10903679939213e-05, + "loss": 1.4175, + "step": 1688500 + }, + { + "epoch": 17.82, + "learning_rate": 4.108772966640987e-05, + "loss": 1.4081, + "step": 1689000 + }, + { + "epoch": 17.83, + "learning_rate": 4.1085091338898444e-05, + "loss": 1.4452, + "step": 1689500 + }, + { + "epoch": 17.84, + "learning_rate": 4.1082453011387026e-05, + "loss": 1.4137, + "step": 1690000 + }, + { + "epoch": 17.84, + "learning_rate": 4.10798146838756e-05, + "loss": 1.4287, + "step": 1690500 + }, + { + "epoch": 17.85, + "learning_rate": 4.107717635636418e-05, + "loss": 1.4189, + "step": 1691000 + }, + { + "epoch": 17.85, + "learning_rate": 4.1074538028852746e-05, + "loss": 1.4131, + "step": 1691500 + }, + { + "epoch": 17.86, + "learning_rate": 4.107189970134133e-05, + "loss": 1.4264, + "step": 1692000 + }, + { + "epoch": 17.86, + "learning_rate": 4.1069261373829904e-05, + "loss": 1.4024, + "step": 1692500 + }, + { + "epoch": 17.87, + "learning_rate": 4.106662304631848e-05, + "loss": 1.436, + "step": 1693000 + }, + { + "epoch": 17.87, + "learning_rate": 4.106398471880706e-05, + "loss": 1.3802, + "step": 1693500 + }, + { + "epoch": 17.88, + "learning_rate": 4.106134639129563e-05, + "loss": 1.3979, + "step": 1694000 + }, + { + "epoch": 17.88, + "learning_rate": 4.1058708063784206e-05, + "loss": 1.4186, + "step": 1694500 + }, + { + "epoch": 17.89, + "learning_rate": 4.105606973627278e-05, + "loss": 1.4537, + "step": 1695000 + }, + { + "epoch": 17.89, + "learning_rate": 4.1053431408761363e-05, + "loss": 1.3824, + "step": 1695500 + }, + { + "epoch": 17.9, + "learning_rate": 4.105079308124994e-05, + "loss": 1.4055, + "step": 1696000 + }, + { + "epoch": 17.9, + "learning_rate": 4.104815475373851e-05, + "loss": 1.4082, + "step": 1696500 + }, + { + "epoch": 17.91, + "learning_rate": 4.104551642622709e-05, + "loss": 1.3764, + "step": 1697000 + }, + { + "epoch": 17.91, + "learning_rate": 4.1042878098715665e-05, + "loss": 1.3854, + "step": 1697500 + }, + { + "epoch": 17.92, + "learning_rate": 4.104023977120424e-05, + "loss": 1.3957, + "step": 1698000 + }, + { + "epoch": 17.92, + "learning_rate": 4.1037601443692816e-05, + "loss": 1.4096, + "step": 1698500 + }, + { + "epoch": 17.93, + "learning_rate": 4.103496311618139e-05, + "loss": 1.3663, + "step": 1699000 + }, + { + "epoch": 17.94, + "learning_rate": 4.103232478866997e-05, + "loss": 1.4467, + "step": 1699500 + }, + { + "epoch": 17.94, + "learning_rate": 4.102968646115854e-05, + "loss": 1.3877, + "step": 1700000 + }, + { + "epoch": 17.95, + "learning_rate": 4.1027048133647125e-05, + "loss": 1.3571, + "step": 1700500 + }, + { + "epoch": 17.95, + "learning_rate": 4.1024409806135694e-05, + "loss": 1.4084, + "step": 1701000 + }, + { + "epoch": 17.96, + "learning_rate": 4.102177147862427e-05, + "loss": 1.367, + "step": 1701500 + }, + { + "epoch": 17.96, + "learning_rate": 4.101913315111285e-05, + "loss": 1.4394, + "step": 1702000 + }, + { + "epoch": 17.97, + "learning_rate": 4.101649482360143e-05, + "loss": 1.3515, + "step": 1702500 + }, + { + "epoch": 17.97, + "learning_rate": 4.101385649609e-05, + "loss": 1.3886, + "step": 1703000 + }, + { + "epoch": 17.98, + "learning_rate": 4.101121816857857e-05, + "loss": 1.4163, + "step": 1703500 + }, + { + "epoch": 17.98, + "learning_rate": 4.1008579841067154e-05, + "loss": 1.4178, + "step": 1704000 + }, + { + "epoch": 17.99, + "learning_rate": 4.100594151355573e-05, + "loss": 1.4481, + "step": 1704500 + }, + { + "epoch": 17.99, + "learning_rate": 4.1003303186044304e-05, + "loss": 1.395, + "step": 1705000 + }, + { + "epoch": 18.0, + "learning_rate": 4.100066485853289e-05, + "loss": 1.3864, + "step": 1705500 + }, + { + "epoch": 18.0, + "learning_rate": 4.0998026531021455e-05, + "loss": 1.4637, + "step": 1706000 + }, + { + "epoch": 18.01, + "learning_rate": 4.099538820351003e-05, + "loss": 1.3979, + "step": 1706500 + }, + { + "epoch": 18.01, + "learning_rate": 4.0992749875998606e-05, + "loss": 1.4055, + "step": 1707000 + }, + { + "epoch": 18.02, + "learning_rate": 4.099011154848719e-05, + "loss": 1.3972, + "step": 1707500 + }, + { + "epoch": 18.03, + "learning_rate": 4.098747322097576e-05, + "loss": 1.4098, + "step": 1708000 + }, + { + "epoch": 18.03, + "learning_rate": 4.098483489346433e-05, + "loss": 1.414, + "step": 1708500 + }, + { + "epoch": 18.04, + "learning_rate": 4.0982196565952915e-05, + "loss": 1.3378, + "step": 1709000 + }, + { + "epoch": 18.04, + "learning_rate": 4.097955823844149e-05, + "loss": 1.4574, + "step": 1709500 + }, + { + "epoch": 18.05, + "learning_rate": 4.0976919910930066e-05, + "loss": 1.4028, + "step": 1710000 + }, + { + "epoch": 18.05, + "learning_rate": 4.097428158341864e-05, + "loss": 1.4227, + "step": 1710500 + }, + { + "epoch": 18.06, + "learning_rate": 4.097164325590722e-05, + "loss": 1.4289, + "step": 1711000 + }, + { + "epoch": 18.06, + "learning_rate": 4.096900492839579e-05, + "loss": 1.4783, + "step": 1711500 + }, + { + "epoch": 18.07, + "learning_rate": 4.096636660088437e-05, + "loss": 1.4166, + "step": 1712000 + }, + { + "epoch": 18.07, + "learning_rate": 4.096372827337295e-05, + "loss": 1.3661, + "step": 1712500 + }, + { + "epoch": 18.08, + "learning_rate": 4.096108994586152e-05, + "loss": 1.3777, + "step": 1713000 + }, + { + "epoch": 18.08, + "learning_rate": 4.0958451618350095e-05, + "loss": 1.4439, + "step": 1713500 + }, + { + "epoch": 18.09, + "learning_rate": 4.095581329083868e-05, + "loss": 1.406, + "step": 1714000 + }, + { + "epoch": 18.09, + "learning_rate": 4.095317496332725e-05, + "loss": 1.3514, + "step": 1714500 + }, + { + "epoch": 18.1, + "learning_rate": 4.095053663581583e-05, + "loss": 1.4041, + "step": 1715000 + }, + { + "epoch": 18.1, + "learning_rate": 4.09478983083044e-05, + "loss": 1.4694, + "step": 1715500 + }, + { + "epoch": 18.11, + "learning_rate": 4.094525998079298e-05, + "loss": 1.3818, + "step": 1716000 + }, + { + "epoch": 18.11, + "learning_rate": 4.0942621653281554e-05, + "loss": 1.4226, + "step": 1716500 + }, + { + "epoch": 18.12, + "learning_rate": 4.093998332577013e-05, + "loss": 1.3911, + "step": 1717000 + }, + { + "epoch": 18.13, + "learning_rate": 4.0937344998258705e-05, + "loss": 1.3881, + "step": 1717500 + }, + { + "epoch": 18.13, + "learning_rate": 4.093470667074728e-05, + "loss": 1.3749, + "step": 1718000 + }, + { + "epoch": 18.14, + "learning_rate": 4.0932068343235856e-05, + "loss": 1.4006, + "step": 1718500 + }, + { + "epoch": 18.14, + "learning_rate": 4.092943001572443e-05, + "loss": 1.3855, + "step": 1719000 + }, + { + "epoch": 18.15, + "learning_rate": 4.0926791688213014e-05, + "loss": 1.3801, + "step": 1719500 + }, + { + "epoch": 18.15, + "learning_rate": 4.092415336070158e-05, + "loss": 1.415, + "step": 1720000 + }, + { + "epoch": 18.16, + "learning_rate": 4.092151503319016e-05, + "loss": 1.4261, + "step": 1720500 + }, + { + "epoch": 18.16, + "learning_rate": 4.091887670567874e-05, + "loss": 1.3356, + "step": 1721000 + }, + { + "epoch": 18.17, + "learning_rate": 4.0916238378167316e-05, + "loss": 1.3731, + "step": 1721500 + }, + { + "epoch": 18.17, + "learning_rate": 4.091360005065589e-05, + "loss": 1.3935, + "step": 1722000 + }, + { + "epoch": 18.18, + "learning_rate": 4.091096172314447e-05, + "loss": 1.4272, + "step": 1722500 + }, + { + "epoch": 18.18, + "learning_rate": 4.090832339563304e-05, + "loss": 1.4481, + "step": 1723000 + }, + { + "epoch": 18.19, + "learning_rate": 4.090568506812162e-05, + "loss": 1.3989, + "step": 1723500 + }, + { + "epoch": 18.19, + "learning_rate": 4.090304674061019e-05, + "loss": 1.4065, + "step": 1724000 + }, + { + "epoch": 18.2, + "learning_rate": 4.0900408413098776e-05, + "loss": 1.3788, + "step": 1724500 + }, + { + "epoch": 18.2, + "learning_rate": 4.0897770085587344e-05, + "loss": 1.4143, + "step": 1725000 + }, + { + "epoch": 18.21, + "learning_rate": 4.089513175807592e-05, + "loss": 1.4343, + "step": 1725500 + }, + { + "epoch": 18.22, + "learning_rate": 4.08924934305645e-05, + "loss": 1.4069, + "step": 1726000 + }, + { + "epoch": 18.22, + "learning_rate": 4.088985510305308e-05, + "loss": 1.3818, + "step": 1726500 + }, + { + "epoch": 18.23, + "learning_rate": 4.0887216775541646e-05, + "loss": 1.3914, + "step": 1727000 + }, + { + "epoch": 18.23, + "learning_rate": 4.088457844803023e-05, + "loss": 1.3951, + "step": 1727500 + }, + { + "epoch": 18.24, + "learning_rate": 4.0881940120518804e-05, + "loss": 1.436, + "step": 1728000 + }, + { + "epoch": 18.24, + "learning_rate": 4.087930179300738e-05, + "loss": 1.3744, + "step": 1728500 + }, + { + "epoch": 18.25, + "learning_rate": 4.0876663465495955e-05, + "loss": 1.4525, + "step": 1729000 + }, + { + "epoch": 18.25, + "learning_rate": 4.087402513798453e-05, + "loss": 1.4713, + "step": 1729500 + }, + { + "epoch": 18.26, + "learning_rate": 4.0871386810473106e-05, + "loss": 1.4495, + "step": 1730000 + }, + { + "epoch": 18.26, + "learning_rate": 4.086874848296168e-05, + "loss": 1.396, + "step": 1730500 + }, + { + "epoch": 18.27, + "learning_rate": 4.086611015545026e-05, + "loss": 1.374, + "step": 1731000 + }, + { + "epoch": 18.27, + "learning_rate": 4.086347182793884e-05, + "loss": 1.3902, + "step": 1731500 + }, + { + "epoch": 18.28, + "learning_rate": 4.086083350042741e-05, + "loss": 1.3647, + "step": 1732000 + }, + { + "epoch": 18.28, + "learning_rate": 4.085819517291598e-05, + "loss": 1.3832, + "step": 1732500 + }, + { + "epoch": 18.29, + "learning_rate": 4.0855556845404566e-05, + "loss": 1.4342, + "step": 1733000 + }, + { + "epoch": 18.29, + "learning_rate": 4.085291851789314e-05, + "loss": 1.4427, + "step": 1733500 + }, + { + "epoch": 18.3, + "learning_rate": 4.085028019038172e-05, + "loss": 1.341, + "step": 1734000 + }, + { + "epoch": 18.3, + "learning_rate": 4.084764186287029e-05, + "loss": 1.4681, + "step": 1734500 + }, + { + "epoch": 18.31, + "learning_rate": 4.084500353535887e-05, + "loss": 1.394, + "step": 1735000 + }, + { + "epoch": 18.32, + "learning_rate": 4.084236520784744e-05, + "loss": 1.4121, + "step": 1735500 + }, + { + "epoch": 18.32, + "learning_rate": 4.083972688033602e-05, + "loss": 1.4604, + "step": 1736000 + }, + { + "epoch": 18.33, + "learning_rate": 4.0837088552824594e-05, + "loss": 1.3862, + "step": 1736500 + }, + { + "epoch": 18.33, + "learning_rate": 4.083445022531317e-05, + "loss": 1.3952, + "step": 1737000 + }, + { + "epoch": 18.34, + "learning_rate": 4.0831811897801745e-05, + "loss": 1.4741, + "step": 1737500 + }, + { + "epoch": 18.34, + "learning_rate": 4.082917357029033e-05, + "loss": 1.3878, + "step": 1738000 + }, + { + "epoch": 18.35, + "learning_rate": 4.08265352427789e-05, + "loss": 1.4202, + "step": 1738500 + }, + { + "epoch": 18.35, + "learning_rate": 4.082389691526747e-05, + "loss": 1.4692, + "step": 1739000 + }, + { + "epoch": 18.36, + "learning_rate": 4.0821258587756054e-05, + "loss": 1.423, + "step": 1739500 + }, + { + "epoch": 18.36, + "learning_rate": 4.081862026024463e-05, + "loss": 1.3663, + "step": 1740000 + }, + { + "epoch": 18.37, + "learning_rate": 4.0815981932733205e-05, + "loss": 1.4436, + "step": 1740500 + }, + { + "epoch": 18.37, + "learning_rate": 4.081334360522178e-05, + "loss": 1.4039, + "step": 1741000 + }, + { + "epoch": 18.38, + "learning_rate": 4.0810705277710356e-05, + "loss": 1.3768, + "step": 1741500 + }, + { + "epoch": 18.38, + "learning_rate": 4.080806695019893e-05, + "loss": 1.3571, + "step": 1742000 + }, + { + "epoch": 18.39, + "learning_rate": 4.080542862268751e-05, + "loss": 1.4203, + "step": 1742500 + }, + { + "epoch": 18.39, + "learning_rate": 4.080279029517608e-05, + "loss": 1.3783, + "step": 1743000 + }, + { + "epoch": 18.4, + "learning_rate": 4.0800151967664664e-05, + "loss": 1.3761, + "step": 1743500 + }, + { + "epoch": 18.4, + "learning_rate": 4.079751364015323e-05, + "loss": 1.445, + "step": 1744000 + }, + { + "epoch": 18.41, + "learning_rate": 4.079487531264181e-05, + "loss": 1.3615, + "step": 1744500 + }, + { + "epoch": 18.42, + "learning_rate": 4.079223698513039e-05, + "loss": 1.4041, + "step": 1745000 + }, + { + "epoch": 18.42, + "learning_rate": 4.0789598657618966e-05, + "loss": 1.4385, + "step": 1745500 + }, + { + "epoch": 18.43, + "learning_rate": 4.0786960330107535e-05, + "loss": 1.4075, + "step": 1746000 + }, + { + "epoch": 18.43, + "learning_rate": 4.078432200259612e-05, + "loss": 1.4118, + "step": 1746500 + }, + { + "epoch": 18.44, + "learning_rate": 4.078168367508469e-05, + "loss": 1.3896, + "step": 1747000 + }, + { + "epoch": 18.44, + "learning_rate": 4.077904534757327e-05, + "loss": 1.3702, + "step": 1747500 + }, + { + "epoch": 18.45, + "learning_rate": 4.0776407020061844e-05, + "loss": 1.3957, + "step": 1748000 + }, + { + "epoch": 18.45, + "learning_rate": 4.077376869255042e-05, + "loss": 1.3821, + "step": 1748500 + }, + { + "epoch": 18.46, + "learning_rate": 4.0771130365038995e-05, + "loss": 1.4022, + "step": 1749000 + }, + { + "epoch": 18.46, + "learning_rate": 4.076849203752757e-05, + "loss": 1.4358, + "step": 1749500 + }, + { + "epoch": 18.47, + "learning_rate": 4.076585371001615e-05, + "loss": 1.3689, + "step": 1750000 + }, + { + "epoch": 18.47, + "learning_rate": 4.076321538250473e-05, + "loss": 1.3837, + "step": 1750500 + }, + { + "epoch": 18.48, + "learning_rate": 4.07605770549933e-05, + "loss": 1.431, + "step": 1751000 + }, + { + "epoch": 18.48, + "learning_rate": 4.075793872748188e-05, + "loss": 1.4075, + "step": 1751500 + }, + { + "epoch": 18.49, + "learning_rate": 4.0755300399970454e-05, + "loss": 1.3728, + "step": 1752000 + }, + { + "epoch": 18.49, + "learning_rate": 4.075266207245903e-05, + "loss": 1.3977, + "step": 1752500 + }, + { + "epoch": 18.5, + "learning_rate": 4.0750023744947605e-05, + "loss": 1.3597, + "step": 1753000 + }, + { + "epoch": 18.51, + "learning_rate": 4.074738541743618e-05, + "loss": 1.4623, + "step": 1753500 + }, + { + "epoch": 18.51, + "learning_rate": 4.0744747089924756e-05, + "loss": 1.4116, + "step": 1754000 + }, + { + "epoch": 18.52, + "learning_rate": 4.074210876241333e-05, + "loss": 1.3637, + "step": 1754500 + }, + { + "epoch": 18.52, + "learning_rate": 4.073947043490191e-05, + "loss": 1.3885, + "step": 1755000 + }, + { + "epoch": 18.53, + "learning_rate": 4.073683210739048e-05, + "loss": 1.4205, + "step": 1755500 + }, + { + "epoch": 18.53, + "learning_rate": 4.073419377987906e-05, + "loss": 1.4105, + "step": 1756000 + }, + { + "epoch": 18.54, + "learning_rate": 4.0731555452367634e-05, + "loss": 1.4063, + "step": 1756500 + }, + { + "epoch": 18.54, + "learning_rate": 4.0728917124856216e-05, + "loss": 1.3974, + "step": 1757000 + }, + { + "epoch": 18.55, + "learning_rate": 4.072627879734479e-05, + "loss": 1.4187, + "step": 1757500 + }, + { + "epoch": 18.55, + "learning_rate": 4.072364046983336e-05, + "loss": 1.4469, + "step": 1758000 + }, + { + "epoch": 18.56, + "learning_rate": 4.072100214232194e-05, + "loss": 1.3838, + "step": 1758500 + }, + { + "epoch": 18.56, + "learning_rate": 4.071836381481052e-05, + "loss": 1.4305, + "step": 1759000 + }, + { + "epoch": 18.57, + "learning_rate": 4.0715725487299094e-05, + "loss": 1.3648, + "step": 1759500 + }, + { + "epoch": 18.57, + "learning_rate": 4.071308715978767e-05, + "loss": 1.4272, + "step": 1760000 + }, + { + "epoch": 18.58, + "learning_rate": 4.0710448832276245e-05, + "loss": 1.4026, + "step": 1760500 + }, + { + "epoch": 18.58, + "learning_rate": 4.070781050476482e-05, + "loss": 1.4187, + "step": 1761000 + }, + { + "epoch": 18.59, + "learning_rate": 4.0705172177253396e-05, + "loss": 1.4385, + "step": 1761500 + }, + { + "epoch": 18.59, + "learning_rate": 4.070253384974198e-05, + "loss": 1.425, + "step": 1762000 + }, + { + "epoch": 18.6, + "learning_rate": 4.069989552223055e-05, + "loss": 1.3813, + "step": 1762500 + }, + { + "epoch": 18.61, + "learning_rate": 4.069725719471912e-05, + "loss": 1.4353, + "step": 1763000 + }, + { + "epoch": 18.61, + "learning_rate": 4.0694618867207704e-05, + "loss": 1.4273, + "step": 1763500 + }, + { + "epoch": 18.62, + "learning_rate": 4.069198053969628e-05, + "loss": 1.3862, + "step": 1764000 + }, + { + "epoch": 18.62, + "learning_rate": 4.0689342212184855e-05, + "loss": 1.3725, + "step": 1764500 + }, + { + "epoch": 18.63, + "learning_rate": 4.0686703884673424e-05, + "loss": 1.4, + "step": 1765000 + }, + { + "epoch": 18.63, + "learning_rate": 4.0684065557162006e-05, + "loss": 1.4487, + "step": 1765500 + }, + { + "epoch": 18.64, + "learning_rate": 4.068142722965058e-05, + "loss": 1.3975, + "step": 1766000 + }, + { + "epoch": 18.64, + "learning_rate": 4.067878890213916e-05, + "loss": 1.4325, + "step": 1766500 + }, + { + "epoch": 18.65, + "learning_rate": 4.067615057462773e-05, + "loss": 1.4048, + "step": 1767000 + }, + { + "epoch": 18.65, + "learning_rate": 4.067351224711631e-05, + "loss": 1.3742, + "step": 1767500 + }, + { + "epoch": 18.66, + "learning_rate": 4.0670873919604884e-05, + "loss": 1.4373, + "step": 1768000 + }, + { + "epoch": 18.66, + "learning_rate": 4.066823559209346e-05, + "loss": 1.3741, + "step": 1768500 + }, + { + "epoch": 18.67, + "learning_rate": 4.066559726458204e-05, + "loss": 1.3871, + "step": 1769000 + }, + { + "epoch": 18.67, + "learning_rate": 4.066295893707062e-05, + "loss": 1.415, + "step": 1769500 + }, + { + "epoch": 18.68, + "learning_rate": 4.0660320609559186e-05, + "loss": 1.4055, + "step": 1770000 + }, + { + "epoch": 18.68, + "learning_rate": 4.065768228204777e-05, + "loss": 1.3852, + "step": 1770500 + }, + { + "epoch": 18.69, + "learning_rate": 4.065504395453634e-05, + "loss": 1.4023, + "step": 1771000 + }, + { + "epoch": 18.7, + "learning_rate": 4.065240562702492e-05, + "loss": 1.3843, + "step": 1771500 + }, + { + "epoch": 18.7, + "learning_rate": 4.0649767299513494e-05, + "loss": 1.4387, + "step": 1772000 + }, + { + "epoch": 18.71, + "learning_rate": 4.064712897200207e-05, + "loss": 1.3947, + "step": 1772500 + }, + { + "epoch": 18.71, + "learning_rate": 4.0644490644490645e-05, + "loss": 1.4227, + "step": 1773000 + }, + { + "epoch": 18.72, + "learning_rate": 4.064185231697922e-05, + "loss": 1.4439, + "step": 1773500 + }, + { + "epoch": 18.72, + "learning_rate": 4.06392139894678e-05, + "loss": 1.3833, + "step": 1774000 + }, + { + "epoch": 18.73, + "learning_rate": 4.063657566195637e-05, + "loss": 1.386, + "step": 1774500 + }, + { + "epoch": 18.73, + "learning_rate": 4.063393733444495e-05, + "loss": 1.4227, + "step": 1775000 + }, + { + "epoch": 18.74, + "learning_rate": 4.063129900693353e-05, + "loss": 1.4729, + "step": 1775500 + }, + { + "epoch": 18.74, + "learning_rate": 4.0628660679422105e-05, + "loss": 1.4374, + "step": 1776000 + }, + { + "epoch": 18.75, + "learning_rate": 4.062602235191068e-05, + "loss": 1.4193, + "step": 1776500 + }, + { + "epoch": 18.75, + "learning_rate": 4.062338402439925e-05, + "loss": 1.3928, + "step": 1777000 + }, + { + "epoch": 18.76, + "learning_rate": 4.062074569688783e-05, + "loss": 1.3704, + "step": 1777500 + }, + { + "epoch": 18.76, + "learning_rate": 4.061810736937641e-05, + "loss": 1.4303, + "step": 1778000 + }, + { + "epoch": 18.77, + "learning_rate": 4.061546904186498e-05, + "loss": 1.3718, + "step": 1778500 + }, + { + "epoch": 18.77, + "learning_rate": 4.0612830714353565e-05, + "loss": 1.3847, + "step": 1779000 + }, + { + "epoch": 18.78, + "learning_rate": 4.0610192386842133e-05, + "loss": 1.3831, + "step": 1779500 + }, + { + "epoch": 18.78, + "learning_rate": 4.060755405933071e-05, + "loss": 1.4217, + "step": 1780000 + }, + { + "epoch": 18.79, + "learning_rate": 4.0604915731819284e-05, + "loss": 1.4196, + "step": 1780500 + }, + { + "epoch": 18.8, + "learning_rate": 4.060227740430787e-05, + "loss": 1.3967, + "step": 1781000 + }, + { + "epoch": 18.8, + "learning_rate": 4.059963907679644e-05, + "loss": 1.4133, + "step": 1781500 + }, + { + "epoch": 18.81, + "learning_rate": 4.059700074928501e-05, + "loss": 1.4086, + "step": 1782000 + }, + { + "epoch": 18.81, + "learning_rate": 4.059436242177359e-05, + "loss": 1.4077, + "step": 1782500 + }, + { + "epoch": 18.82, + "learning_rate": 4.059172409426217e-05, + "loss": 1.42, + "step": 1783000 + }, + { + "epoch": 18.82, + "learning_rate": 4.0589085766750744e-05, + "loss": 1.3795, + "step": 1783500 + }, + { + "epoch": 18.83, + "learning_rate": 4.058644743923932e-05, + "loss": 1.3886, + "step": 1784000 + }, + { + "epoch": 18.83, + "learning_rate": 4.0583809111727895e-05, + "loss": 1.3601, + "step": 1784500 + }, + { + "epoch": 18.84, + "learning_rate": 4.058117078421647e-05, + "loss": 1.4413, + "step": 1785000 + }, + { + "epoch": 18.84, + "learning_rate": 4.0578532456705046e-05, + "loss": 1.4182, + "step": 1785500 + }, + { + "epoch": 18.85, + "learning_rate": 4.057589412919363e-05, + "loss": 1.4528, + "step": 1786000 + }, + { + "epoch": 18.85, + "learning_rate": 4.05732558016822e-05, + "loss": 1.4288, + "step": 1786500 + }, + { + "epoch": 18.86, + "learning_rate": 4.057061747417077e-05, + "loss": 1.3878, + "step": 1787000 + }, + { + "epoch": 18.86, + "learning_rate": 4.0567979146659355e-05, + "loss": 1.4401, + "step": 1787500 + }, + { + "epoch": 18.87, + "learning_rate": 4.056534081914793e-05, + "loss": 1.3424, + "step": 1788000 + }, + { + "epoch": 18.87, + "learning_rate": 4.0562702491636506e-05, + "loss": 1.4719, + "step": 1788500 + }, + { + "epoch": 18.88, + "learning_rate": 4.0560064164125074e-05, + "loss": 1.3932, + "step": 1789000 + }, + { + "epoch": 18.89, + "learning_rate": 4.055742583661366e-05, + "loss": 1.4019, + "step": 1789500 + }, + { + "epoch": 18.89, + "learning_rate": 4.055478750910223e-05, + "loss": 1.4661, + "step": 1790000 + }, + { + "epoch": 18.9, + "learning_rate": 4.055214918159081e-05, + "loss": 1.441, + "step": 1790500 + }, + { + "epoch": 18.9, + "learning_rate": 4.054951085407939e-05, + "loss": 1.3353, + "step": 1791000 + }, + { + "epoch": 18.91, + "learning_rate": 4.054687252656796e-05, + "loss": 1.3993, + "step": 1791500 + }, + { + "epoch": 18.91, + "learning_rate": 4.0544234199056534e-05, + "loss": 1.495, + "step": 1792000 + }, + { + "epoch": 18.92, + "learning_rate": 4.054159587154511e-05, + "loss": 1.4303, + "step": 1792500 + }, + { + "epoch": 18.92, + "learning_rate": 4.053895754403369e-05, + "loss": 1.4076, + "step": 1793000 + }, + { + "epoch": 18.93, + "learning_rate": 4.053631921652226e-05, + "loss": 1.3858, + "step": 1793500 + }, + { + "epoch": 18.93, + "learning_rate": 4.0533680889010836e-05, + "loss": 1.4697, + "step": 1794000 + }, + { + "epoch": 18.94, + "learning_rate": 4.053104256149942e-05, + "loss": 1.4348, + "step": 1794500 + }, + { + "epoch": 18.94, + "learning_rate": 4.0528404233987994e-05, + "loss": 1.378, + "step": 1795000 + }, + { + "epoch": 18.95, + "learning_rate": 4.052576590647657e-05, + "loss": 1.3566, + "step": 1795500 + }, + { + "epoch": 18.95, + "learning_rate": 4.0523127578965145e-05, + "loss": 1.3959, + "step": 1796000 + }, + { + "epoch": 18.96, + "learning_rate": 4.052048925145372e-05, + "loss": 1.3865, + "step": 1796500 + }, + { + "epoch": 18.96, + "learning_rate": 4.0517850923942296e-05, + "loss": 1.4257, + "step": 1797000 + }, + { + "epoch": 18.97, + "learning_rate": 4.051521259643087e-05, + "loss": 1.3747, + "step": 1797500 + }, + { + "epoch": 18.97, + "learning_rate": 4.0512574268919454e-05, + "loss": 1.367, + "step": 1798000 + }, + { + "epoch": 18.98, + "learning_rate": 4.050993594140802e-05, + "loss": 1.4048, + "step": 1798500 + }, + { + "epoch": 18.99, + "learning_rate": 4.05072976138966e-05, + "loss": 1.4598, + "step": 1799000 + }, + { + "epoch": 18.99, + "learning_rate": 4.050465928638518e-05, + "loss": 1.4001, + "step": 1799500 + }, + { + "epoch": 19.0, + "learning_rate": 4.0502020958873755e-05, + "loss": 1.3849, + "step": 1800000 + }, + { + "epoch": 19.0, + "learning_rate": 4.049938263136233e-05, + "loss": 1.3229, + "step": 1800500 + }, + { + "epoch": 19.01, + "learning_rate": 4.0496744303850906e-05, + "loss": 1.4023, + "step": 1801000 + }, + { + "epoch": 19.01, + "learning_rate": 4.049410597633948e-05, + "loss": 1.4199, + "step": 1801500 + }, + { + "epoch": 19.02, + "learning_rate": 4.049146764882806e-05, + "loss": 1.3348, + "step": 1802000 + }, + { + "epoch": 19.02, + "learning_rate": 4.048882932131663e-05, + "loss": 1.423, + "step": 1802500 + }, + { + "epoch": 19.03, + "learning_rate": 4.048619099380521e-05, + "loss": 1.4232, + "step": 1803000 + }, + { + "epoch": 19.03, + "learning_rate": 4.0483552666293784e-05, + "loss": 1.4228, + "step": 1803500 + }, + { + "epoch": 19.04, + "learning_rate": 4.048091433878236e-05, + "loss": 1.4016, + "step": 1804000 + }, + { + "epoch": 19.04, + "learning_rate": 4.0478276011270935e-05, + "loss": 1.4225, + "step": 1804500 + }, + { + "epoch": 19.05, + "learning_rate": 4.047563768375952e-05, + "loss": 1.3869, + "step": 1805000 + }, + { + "epoch": 19.05, + "learning_rate": 4.0472999356248086e-05, + "loss": 1.3759, + "step": 1805500 + }, + { + "epoch": 19.06, + "learning_rate": 4.047036102873666e-05, + "loss": 1.3904, + "step": 1806000 + }, + { + "epoch": 19.06, + "learning_rate": 4.0467722701225244e-05, + "loss": 1.435, + "step": 1806500 + }, + { + "epoch": 19.07, + "learning_rate": 4.046508437371382e-05, + "loss": 1.3683, + "step": 1807000 + }, + { + "epoch": 19.08, + "learning_rate": 4.0462446046202395e-05, + "loss": 1.4382, + "step": 1807500 + }, + { + "epoch": 19.08, + "learning_rate": 4.045980771869097e-05, + "loss": 1.4524, + "step": 1808000 + }, + { + "epoch": 19.09, + "learning_rate": 4.0457169391179546e-05, + "loss": 1.416, + "step": 1808500 + }, + { + "epoch": 19.09, + "learning_rate": 4.045453106366812e-05, + "loss": 1.413, + "step": 1809000 + }, + { + "epoch": 19.1, + "learning_rate": 4.0451892736156697e-05, + "loss": 1.3971, + "step": 1809500 + }, + { + "epoch": 19.1, + "learning_rate": 4.044925440864528e-05, + "loss": 1.4418, + "step": 1810000 + }, + { + "epoch": 19.11, + "learning_rate": 4.044661608113385e-05, + "loss": 1.3296, + "step": 1810500 + }, + { + "epoch": 19.11, + "learning_rate": 4.044397775362242e-05, + "loss": 1.4372, + "step": 1811000 + }, + { + "epoch": 19.12, + "learning_rate": 4.0441339426111005e-05, + "loss": 1.4447, + "step": 1811500 + }, + { + "epoch": 19.12, + "learning_rate": 4.043870109859958e-05, + "loss": 1.368, + "step": 1812000 + }, + { + "epoch": 19.13, + "learning_rate": 4.043606277108815e-05, + "loss": 1.4045, + "step": 1812500 + }, + { + "epoch": 19.13, + "learning_rate": 4.043342444357673e-05, + "loss": 1.375, + "step": 1813000 + }, + { + "epoch": 19.14, + "learning_rate": 4.043078611606531e-05, + "loss": 1.3882, + "step": 1813500 + }, + { + "epoch": 19.14, + "learning_rate": 4.042814778855388e-05, + "loss": 1.4298, + "step": 1814000 + }, + { + "epoch": 19.15, + "learning_rate": 4.042550946104246e-05, + "loss": 1.4198, + "step": 1814500 + }, + { + "epoch": 19.15, + "learning_rate": 4.0422871133531034e-05, + "loss": 1.3889, + "step": 1815000 + }, + { + "epoch": 19.16, + "learning_rate": 4.042023280601961e-05, + "loss": 1.4015, + "step": 1815500 + }, + { + "epoch": 19.16, + "learning_rate": 4.0417594478508185e-05, + "loss": 1.3762, + "step": 1816000 + }, + { + "epoch": 19.17, + "learning_rate": 4.041495615099676e-05, + "loss": 1.4887, + "step": 1816500 + }, + { + "epoch": 19.18, + "learning_rate": 4.041231782348534e-05, + "loss": 1.3586, + "step": 1817000 + }, + { + "epoch": 19.18, + "learning_rate": 4.040967949597391e-05, + "loss": 1.3634, + "step": 1817500 + }, + { + "epoch": 19.19, + "learning_rate": 4.0407041168462487e-05, + "loss": 1.3879, + "step": 1818000 + }, + { + "epoch": 19.19, + "learning_rate": 4.040440284095107e-05, + "loss": 1.4514, + "step": 1818500 + }, + { + "epoch": 19.2, + "learning_rate": 4.0401764513439644e-05, + "loss": 1.4079, + "step": 1819000 + }, + { + "epoch": 19.2, + "learning_rate": 4.039912618592822e-05, + "loss": 1.4294, + "step": 1819500 + }, + { + "epoch": 19.21, + "learning_rate": 4.0396487858416795e-05, + "loss": 1.3646, + "step": 1820000 + }, + { + "epoch": 19.21, + "learning_rate": 4.039384953090537e-05, + "loss": 1.3985, + "step": 1820500 + }, + { + "epoch": 19.22, + "learning_rate": 4.0391211203393946e-05, + "loss": 1.3822, + "step": 1821000 + }, + { + "epoch": 19.22, + "learning_rate": 4.038857287588252e-05, + "loss": 1.4498, + "step": 1821500 + }, + { + "epoch": 19.23, + "learning_rate": 4.03859345483711e-05, + "loss": 1.4101, + "step": 1822000 + }, + { + "epoch": 19.23, + "learning_rate": 4.038329622085967e-05, + "loss": 1.4184, + "step": 1822500 + }, + { + "epoch": 19.24, + "learning_rate": 4.038065789334825e-05, + "loss": 1.3924, + "step": 1823000 + }, + { + "epoch": 19.24, + "learning_rate": 4.037801956583683e-05, + "loss": 1.4215, + "step": 1823500 + }, + { + "epoch": 19.25, + "learning_rate": 4.0375381238325406e-05, + "loss": 1.3923, + "step": 1824000 + }, + { + "epoch": 19.25, + "learning_rate": 4.0372742910813975e-05, + "loss": 1.3601, + "step": 1824500 + }, + { + "epoch": 19.26, + "learning_rate": 4.037010458330256e-05, + "loss": 1.3725, + "step": 1825000 + }, + { + "epoch": 19.27, + "learning_rate": 4.036746625579113e-05, + "loss": 1.427, + "step": 1825500 + }, + { + "epoch": 19.27, + "learning_rate": 4.036482792827971e-05, + "loss": 1.454, + "step": 1826000 + }, + { + "epoch": 19.28, + "learning_rate": 4.0362189600768283e-05, + "loss": 1.4075, + "step": 1826500 + }, + { + "epoch": 19.28, + "learning_rate": 4.035955127325686e-05, + "loss": 1.4031, + "step": 1827000 + }, + { + "epoch": 19.29, + "learning_rate": 4.0356912945745434e-05, + "loss": 1.4247, + "step": 1827500 + }, + { + "epoch": 19.29, + "learning_rate": 4.035427461823401e-05, + "loss": 1.4519, + "step": 1828000 + }, + { + "epoch": 19.3, + "learning_rate": 4.0351636290722585e-05, + "loss": 1.4123, + "step": 1828500 + }, + { + "epoch": 19.3, + "learning_rate": 4.034899796321117e-05, + "loss": 1.4143, + "step": 1829000 + }, + { + "epoch": 19.31, + "learning_rate": 4.0346359635699736e-05, + "loss": 1.3302, + "step": 1829500 + }, + { + "epoch": 19.31, + "learning_rate": 4.034372130818831e-05, + "loss": 1.3971, + "step": 1830000 + }, + { + "epoch": 19.32, + "learning_rate": 4.0341082980676894e-05, + "loss": 1.4, + "step": 1830500 + }, + { + "epoch": 19.32, + "learning_rate": 4.033844465316547e-05, + "loss": 1.4003, + "step": 1831000 + }, + { + "epoch": 19.33, + "learning_rate": 4.033580632565404e-05, + "loss": 1.3988, + "step": 1831500 + }, + { + "epoch": 19.33, + "learning_rate": 4.033316799814262e-05, + "loss": 1.4139, + "step": 1832000 + }, + { + "epoch": 19.34, + "learning_rate": 4.0330529670631196e-05, + "loss": 1.3967, + "step": 1832500 + }, + { + "epoch": 19.34, + "learning_rate": 4.032789134311977e-05, + "loss": 1.4056, + "step": 1833000 + }, + { + "epoch": 19.35, + "learning_rate": 4.032525301560835e-05, + "loss": 1.3942, + "step": 1833500 + }, + { + "epoch": 19.35, + "learning_rate": 4.032261468809692e-05, + "loss": 1.4106, + "step": 1834000 + }, + { + "epoch": 19.36, + "learning_rate": 4.03199763605855e-05, + "loss": 1.3924, + "step": 1834500 + }, + { + "epoch": 19.37, + "learning_rate": 4.0317338033074073e-05, + "loss": 1.3908, + "step": 1835000 + }, + { + "epoch": 19.37, + "learning_rate": 4.0314699705562656e-05, + "loss": 1.3892, + "step": 1835500 + }, + { + "epoch": 19.38, + "learning_rate": 4.031206137805123e-05, + "loss": 1.4137, + "step": 1836000 + }, + { + "epoch": 19.38, + "learning_rate": 4.03094230505398e-05, + "loss": 1.3963, + "step": 1836500 + }, + { + "epoch": 19.39, + "learning_rate": 4.030678472302838e-05, + "loss": 1.3684, + "step": 1837000 + }, + { + "epoch": 19.39, + "learning_rate": 4.030414639551696e-05, + "loss": 1.393, + "step": 1837500 + }, + { + "epoch": 19.4, + "learning_rate": 4.030150806800553e-05, + "loss": 1.4189, + "step": 1838000 + }, + { + "epoch": 19.4, + "learning_rate": 4.029886974049411e-05, + "loss": 1.3537, + "step": 1838500 + }, + { + "epoch": 19.41, + "learning_rate": 4.0296231412982684e-05, + "loss": 1.4103, + "step": 1839000 + }, + { + "epoch": 19.41, + "learning_rate": 4.029359308547126e-05, + "loss": 1.3865, + "step": 1839500 + }, + { + "epoch": 19.42, + "learning_rate": 4.0290954757959835e-05, + "loss": 1.3325, + "step": 1840000 + }, + { + "epoch": 19.42, + "learning_rate": 4.028831643044841e-05, + "loss": 1.4076, + "step": 1840500 + }, + { + "epoch": 19.43, + "learning_rate": 4.0285678102936986e-05, + "loss": 1.3867, + "step": 1841000 + }, + { + "epoch": 19.43, + "learning_rate": 4.028303977542556e-05, + "loss": 1.3491, + "step": 1841500 + }, + { + "epoch": 19.44, + "learning_rate": 4.028040144791414e-05, + "loss": 1.4049, + "step": 1842000 + }, + { + "epoch": 19.44, + "learning_rate": 4.027776312040272e-05, + "loss": 1.424, + "step": 1842500 + }, + { + "epoch": 19.45, + "learning_rate": 4.0275124792891295e-05, + "loss": 1.411, + "step": 1843000 + }, + { + "epoch": 19.46, + "learning_rate": 4.0272486465379864e-05, + "loss": 1.3588, + "step": 1843500 + }, + { + "epoch": 19.46, + "learning_rate": 4.0269848137868446e-05, + "loss": 1.3839, + "step": 1844000 + }, + { + "epoch": 19.47, + "learning_rate": 4.026720981035702e-05, + "loss": 1.4166, + "step": 1844500 + }, + { + "epoch": 19.47, + "learning_rate": 4.02645714828456e-05, + "loss": 1.3761, + "step": 1845000 + }, + { + "epoch": 19.48, + "learning_rate": 4.026193315533417e-05, + "loss": 1.3472, + "step": 1845500 + }, + { + "epoch": 19.48, + "learning_rate": 4.025929482782275e-05, + "loss": 1.4332, + "step": 1846000 + }, + { + "epoch": 19.49, + "learning_rate": 4.025665650031132e-05, + "loss": 1.4386, + "step": 1846500 + }, + { + "epoch": 19.49, + "learning_rate": 4.02540181727999e-05, + "loss": 1.3839, + "step": 1847000 + }, + { + "epoch": 19.5, + "learning_rate": 4.025137984528848e-05, + "loss": 1.4294, + "step": 1847500 + }, + { + "epoch": 19.5, + "learning_rate": 4.024874151777705e-05, + "loss": 1.4233, + "step": 1848000 + }, + { + "epoch": 19.51, + "learning_rate": 4.0246103190265625e-05, + "loss": 1.3349, + "step": 1848500 + }, + { + "epoch": 19.51, + "learning_rate": 4.024346486275421e-05, + "loss": 1.5044, + "step": 1849000 + }, + { + "epoch": 19.52, + "learning_rate": 4.024082653524278e-05, + "loss": 1.5333, + "step": 1849500 + }, + { + "epoch": 19.52, + "learning_rate": 4.023818820773136e-05, + "loss": 1.3632, + "step": 1850000 + }, + { + "epoch": 19.53, + "learning_rate": 4.023554988021993e-05, + "loss": 1.4335, + "step": 1850500 + }, + { + "epoch": 19.53, + "learning_rate": 4.023291155270851e-05, + "loss": 1.3795, + "step": 1851000 + }, + { + "epoch": 19.54, + "learning_rate": 4.0230273225197085e-05, + "loss": 1.3861, + "step": 1851500 + }, + { + "epoch": 19.54, + "learning_rate": 4.022763489768566e-05, + "loss": 1.414, + "step": 1852000 + }, + { + "epoch": 19.55, + "learning_rate": 4.022499657017424e-05, + "loss": 1.3528, + "step": 1852500 + }, + { + "epoch": 19.56, + "learning_rate": 4.022235824266281e-05, + "loss": 1.4388, + "step": 1853000 + }, + { + "epoch": 19.56, + "learning_rate": 4.021971991515139e-05, + "loss": 1.3605, + "step": 1853500 + }, + { + "epoch": 19.57, + "learning_rate": 4.021708158763996e-05, + "loss": 1.3941, + "step": 1854000 + }, + { + "epoch": 19.57, + "learning_rate": 4.0214443260128545e-05, + "loss": 1.3824, + "step": 1854500 + }, + { + "epoch": 19.58, + "learning_rate": 4.021180493261712e-05, + "loss": 1.398, + "step": 1855000 + }, + { + "epoch": 19.58, + "learning_rate": 4.020916660510569e-05, + "loss": 1.3893, + "step": 1855500 + }, + { + "epoch": 19.59, + "learning_rate": 4.020652827759427e-05, + "loss": 1.4426, + "step": 1856000 + }, + { + "epoch": 19.59, + "learning_rate": 4.0203889950082847e-05, + "loss": 1.3779, + "step": 1856500 + }, + { + "epoch": 19.6, + "learning_rate": 4.020125162257142e-05, + "loss": 1.4221, + "step": 1857000 + }, + { + "epoch": 19.6, + "learning_rate": 4.019861329506e-05, + "loss": 1.3946, + "step": 1857500 + }, + { + "epoch": 19.61, + "learning_rate": 4.019597496754857e-05, + "loss": 1.393, + "step": 1858000 + }, + { + "epoch": 19.61, + "learning_rate": 4.019333664003715e-05, + "loss": 1.4055, + "step": 1858500 + }, + { + "epoch": 19.62, + "learning_rate": 4.0190698312525724e-05, + "loss": 1.4147, + "step": 1859000 + }, + { + "epoch": 19.62, + "learning_rate": 4.0188059985014306e-05, + "loss": 1.4385, + "step": 1859500 + }, + { + "epoch": 19.63, + "learning_rate": 4.0185421657502875e-05, + "loss": 1.3874, + "step": 1860000 + }, + { + "epoch": 19.63, + "learning_rate": 4.018278332999145e-05, + "loss": 1.417, + "step": 1860500 + }, + { + "epoch": 19.64, + "learning_rate": 4.018014500248003e-05, + "loss": 1.4307, + "step": 1861000 + }, + { + "epoch": 19.64, + "learning_rate": 4.017750667496861e-05, + "loss": 1.3595, + "step": 1861500 + }, + { + "epoch": 19.65, + "learning_rate": 4.0174868347457184e-05, + "loss": 1.4402, + "step": 1862000 + }, + { + "epoch": 19.66, + "learning_rate": 4.017223001994575e-05, + "loss": 1.432, + "step": 1862500 + }, + { + "epoch": 19.66, + "learning_rate": 4.0169591692434335e-05, + "loss": 1.373, + "step": 1863000 + }, + { + "epoch": 19.67, + "learning_rate": 4.016695336492291e-05, + "loss": 1.4094, + "step": 1863500 + }, + { + "epoch": 19.67, + "learning_rate": 4.0164315037411486e-05, + "loss": 1.3814, + "step": 1864000 + }, + { + "epoch": 19.68, + "learning_rate": 4.016167670990007e-05, + "loss": 1.4477, + "step": 1864500 + }, + { + "epoch": 19.68, + "learning_rate": 4.0159038382388637e-05, + "loss": 1.398, + "step": 1865000 + }, + { + "epoch": 19.69, + "learning_rate": 4.015640005487721e-05, + "loss": 1.4282, + "step": 1865500 + }, + { + "epoch": 19.69, + "learning_rate": 4.015376172736579e-05, + "loss": 1.3982, + "step": 1866000 + }, + { + "epoch": 19.7, + "learning_rate": 4.015112339985437e-05, + "loss": 1.4066, + "step": 1866500 + }, + { + "epoch": 19.7, + "learning_rate": 4.014848507234294e-05, + "loss": 1.3423, + "step": 1867000 + }, + { + "epoch": 19.71, + "learning_rate": 4.0145846744831514e-05, + "loss": 1.3794, + "step": 1867500 + }, + { + "epoch": 19.71, + "learning_rate": 4.0143208417320096e-05, + "loss": 1.4382, + "step": 1868000 + }, + { + "epoch": 19.72, + "learning_rate": 4.014057008980867e-05, + "loss": 1.4164, + "step": 1868500 + }, + { + "epoch": 19.72, + "learning_rate": 4.013793176229725e-05, + "loss": 1.4094, + "step": 1869000 + }, + { + "epoch": 19.73, + "learning_rate": 4.013529343478582e-05, + "loss": 1.4461, + "step": 1869500 + }, + { + "epoch": 19.73, + "learning_rate": 4.01326551072744e-05, + "loss": 1.364, + "step": 1870000 + }, + { + "epoch": 19.74, + "learning_rate": 4.0130016779762974e-05, + "loss": 1.4177, + "step": 1870500 + }, + { + "epoch": 19.75, + "learning_rate": 4.012737845225155e-05, + "loss": 1.3998, + "step": 1871000 + }, + { + "epoch": 19.75, + "learning_rate": 4.012474012474013e-05, + "loss": 1.3849, + "step": 1871500 + }, + { + "epoch": 19.76, + "learning_rate": 4.01221017972287e-05, + "loss": 1.3265, + "step": 1872000 + }, + { + "epoch": 19.76, + "learning_rate": 4.0119463469717276e-05, + "loss": 1.3537, + "step": 1872500 + }, + { + "epoch": 19.77, + "learning_rate": 4.011682514220586e-05, + "loss": 1.4226, + "step": 1873000 + }, + { + "epoch": 19.77, + "learning_rate": 4.0114186814694433e-05, + "loss": 1.3843, + "step": 1873500 + }, + { + "epoch": 19.78, + "learning_rate": 4.011154848718301e-05, + "loss": 1.4235, + "step": 1874000 + }, + { + "epoch": 19.78, + "learning_rate": 4.0108910159671584e-05, + "loss": 1.388, + "step": 1874500 + }, + { + "epoch": 19.79, + "learning_rate": 4.010627183216016e-05, + "loss": 1.4619, + "step": 1875000 + }, + { + "epoch": 19.79, + "learning_rate": 4.0103633504648735e-05, + "loss": 1.3546, + "step": 1875500 + }, + { + "epoch": 19.8, + "learning_rate": 4.010099517713731e-05, + "loss": 1.39, + "step": 1876000 + }, + { + "epoch": 19.8, + "learning_rate": 4.0098356849625886e-05, + "loss": 1.3761, + "step": 1876500 + }, + { + "epoch": 19.81, + "learning_rate": 4.009571852211446e-05, + "loss": 1.4279, + "step": 1877000 + }, + { + "epoch": 19.81, + "learning_rate": 4.009308019460304e-05, + "loss": 1.3943, + "step": 1877500 + }, + { + "epoch": 19.82, + "learning_rate": 4.009044186709161e-05, + "loss": 1.3773, + "step": 1878000 + }, + { + "epoch": 19.82, + "learning_rate": 4.0087803539580195e-05, + "loss": 1.4063, + "step": 1878500 + }, + { + "epoch": 19.83, + "learning_rate": 4.0085165212068764e-05, + "loss": 1.4517, + "step": 1879000 + }, + { + "epoch": 19.83, + "learning_rate": 4.008252688455734e-05, + "loss": 1.3969, + "step": 1879500 + }, + { + "epoch": 19.84, + "learning_rate": 4.007988855704592e-05, + "loss": 1.4296, + "step": 1880000 + }, + { + "epoch": 19.85, + "learning_rate": 4.00772502295345e-05, + "loss": 1.4058, + "step": 1880500 + }, + { + "epoch": 19.85, + "learning_rate": 4.007461190202307e-05, + "loss": 1.409, + "step": 1881000 + }, + { + "epoch": 19.86, + "learning_rate": 4.007197357451165e-05, + "loss": 1.4029, + "step": 1881500 + }, + { + "epoch": 19.86, + "learning_rate": 4.0069335247000223e-05, + "loss": 1.3428, + "step": 1882000 + }, + { + "epoch": 19.87, + "learning_rate": 4.00666969194888e-05, + "loss": 1.3929, + "step": 1882500 + }, + { + "epoch": 19.87, + "learning_rate": 4.0064058591977374e-05, + "loss": 1.3785, + "step": 1883000 + }, + { + "epoch": 19.88, + "learning_rate": 4.006142026446596e-05, + "loss": 1.4103, + "step": 1883500 + }, + { + "epoch": 19.88, + "learning_rate": 4.0058781936954525e-05, + "loss": 1.3804, + "step": 1884000 + }, + { + "epoch": 19.89, + "learning_rate": 4.00561436094431e-05, + "loss": 1.4134, + "step": 1884500 + }, + { + "epoch": 19.89, + "learning_rate": 4.005350528193168e-05, + "loss": 1.3931, + "step": 1885000 + }, + { + "epoch": 19.9, + "learning_rate": 4.005086695442026e-05, + "loss": 1.3474, + "step": 1885500 + }, + { + "epoch": 19.9, + "learning_rate": 4.004822862690883e-05, + "loss": 1.4283, + "step": 1886000 + }, + { + "epoch": 19.91, + "learning_rate": 4.004559029939741e-05, + "loss": 1.4236, + "step": 1886500 + }, + { + "epoch": 19.91, + "learning_rate": 4.0042951971885985e-05, + "loss": 1.438, + "step": 1887000 + }, + { + "epoch": 19.92, + "learning_rate": 4.004031364437456e-05, + "loss": 1.3472, + "step": 1887500 + }, + { + "epoch": 19.92, + "learning_rate": 4.0037675316863136e-05, + "loss": 1.4101, + "step": 1888000 + }, + { + "epoch": 19.93, + "learning_rate": 4.003503698935171e-05, + "loss": 1.4116, + "step": 1888500 + }, + { + "epoch": 19.94, + "learning_rate": 4.003239866184029e-05, + "loss": 1.4134, + "step": 1889000 + }, + { + "epoch": 19.94, + "learning_rate": 4.002976033432886e-05, + "loss": 1.3698, + "step": 1889500 + }, + { + "epoch": 19.95, + "learning_rate": 4.002712200681744e-05, + "loss": 1.4134, + "step": 1890000 + }, + { + "epoch": 19.95, + "learning_rate": 4.002448367930602e-05, + "loss": 1.3835, + "step": 1890500 + }, + { + "epoch": 19.96, + "learning_rate": 4.002184535179459e-05, + "loss": 1.4263, + "step": 1891000 + }, + { + "epoch": 19.96, + "learning_rate": 4.0019207024283165e-05, + "loss": 1.403, + "step": 1891500 + }, + { + "epoch": 19.97, + "learning_rate": 4.001656869677175e-05, + "loss": 1.4256, + "step": 1892000 + }, + { + "epoch": 19.97, + "learning_rate": 4.001393036926032e-05, + "loss": 1.3907, + "step": 1892500 + }, + { + "epoch": 19.98, + "learning_rate": 4.00112920417489e-05, + "loss": 1.3813, + "step": 1893000 + }, + { + "epoch": 19.98, + "learning_rate": 4.000865371423747e-05, + "loss": 1.4141, + "step": 1893500 + }, + { + "epoch": 19.99, + "learning_rate": 4.000601538672605e-05, + "loss": 1.3624, + "step": 1894000 + }, + { + "epoch": 19.99, + "learning_rate": 4.0003377059214624e-05, + "loss": 1.4064, + "step": 1894500 + }, + { + "epoch": 20.0, + "learning_rate": 4.00007387317032e-05, + "loss": 1.4128, + "step": 1895000 + }, + { + "epoch": 20.0, + "learning_rate": 3.9998100404191775e-05, + "loss": 1.3767, + "step": 1895500 + }, + { + "epoch": 20.01, + "learning_rate": 3.999546207668035e-05, + "loss": 1.4081, + "step": 1896000 + }, + { + "epoch": 20.01, + "learning_rate": 3.9992823749168926e-05, + "loss": 1.4168, + "step": 1896500 + }, + { + "epoch": 20.02, + "learning_rate": 3.999018542165751e-05, + "loss": 1.3853, + "step": 1897000 + }, + { + "epoch": 20.02, + "learning_rate": 3.9987547094146084e-05, + "loss": 1.5025, + "step": 1897500 + }, + { + "epoch": 20.03, + "learning_rate": 3.998490876663465e-05, + "loss": 1.4281, + "step": 1898000 + }, + { + "epoch": 20.04, + "learning_rate": 3.9982270439123235e-05, + "loss": 1.3968, + "step": 1898500 + }, + { + "epoch": 20.04, + "learning_rate": 3.997963211161181e-05, + "loss": 1.3779, + "step": 1899000 + }, + { + "epoch": 20.05, + "learning_rate": 3.9976993784100386e-05, + "loss": 1.3928, + "step": 1899500 + }, + { + "epoch": 20.05, + "learning_rate": 3.997435545658896e-05, + "loss": 1.3691, + "step": 1900000 + }, + { + "epoch": 20.06, + "learning_rate": 3.997171712907754e-05, + "loss": 1.3404, + "step": 1900500 + }, + { + "epoch": 20.06, + "learning_rate": 3.996907880156611e-05, + "loss": 1.4241, + "step": 1901000 + }, + { + "epoch": 20.07, + "learning_rate": 3.996644047405469e-05, + "loss": 1.3821, + "step": 1901500 + }, + { + "epoch": 20.07, + "learning_rate": 3.996380214654326e-05, + "loss": 1.422, + "step": 1902000 + }, + { + "epoch": 20.08, + "learning_rate": 3.9961163819031846e-05, + "loss": 1.4222, + "step": 1902500 + }, + { + "epoch": 20.08, + "learning_rate": 3.9958525491520414e-05, + "loss": 1.3918, + "step": 1903000 + }, + { + "epoch": 20.09, + "learning_rate": 3.995588716400899e-05, + "loss": 1.3881, + "step": 1903500 + }, + { + "epoch": 20.09, + "learning_rate": 3.995324883649757e-05, + "loss": 1.3407, + "step": 1904000 + }, + { + "epoch": 20.1, + "learning_rate": 3.995061050898615e-05, + "loss": 1.386, + "step": 1904500 + }, + { + "epoch": 20.1, + "learning_rate": 3.9947972181474716e-05, + "loss": 1.4213, + "step": 1905000 + }, + { + "epoch": 20.11, + "learning_rate": 3.99453338539633e-05, + "loss": 1.3336, + "step": 1905500 + }, + { + "epoch": 20.11, + "learning_rate": 3.9942695526451874e-05, + "loss": 1.4267, + "step": 1906000 + }, + { + "epoch": 20.12, + "learning_rate": 3.994005719894045e-05, + "loss": 1.3593, + "step": 1906500 + }, + { + "epoch": 20.13, + "learning_rate": 3.9937418871429025e-05, + "loss": 1.4413, + "step": 1907000 + }, + { + "epoch": 20.13, + "learning_rate": 3.99347805439176e-05, + "loss": 1.4042, + "step": 1907500 + }, + { + "epoch": 20.14, + "learning_rate": 3.9932142216406176e-05, + "loss": 1.4396, + "step": 1908000 + }, + { + "epoch": 20.14, + "learning_rate": 3.992950388889475e-05, + "loss": 1.3634, + "step": 1908500 + }, + { + "epoch": 20.15, + "learning_rate": 3.9926865561383334e-05, + "loss": 1.3908, + "step": 1909000 + }, + { + "epoch": 20.15, + "learning_rate": 3.992422723387191e-05, + "loss": 1.371, + "step": 1909500 + }, + { + "epoch": 20.16, + "learning_rate": 3.992158890636048e-05, + "loss": 1.4064, + "step": 1910000 + }, + { + "epoch": 20.16, + "learning_rate": 3.991895057884906e-05, + "loss": 1.3936, + "step": 1910500 + }, + { + "epoch": 20.17, + "learning_rate": 3.9916312251337636e-05, + "loss": 1.3462, + "step": 1911000 + }, + { + "epoch": 20.17, + "learning_rate": 3.991367392382621e-05, + "loss": 1.3574, + "step": 1911500 + }, + { + "epoch": 20.18, + "learning_rate": 3.9911035596314787e-05, + "loss": 1.3773, + "step": 1912000 + }, + { + "epoch": 20.18, + "learning_rate": 3.990839726880336e-05, + "loss": 1.4133, + "step": 1912500 + }, + { + "epoch": 20.19, + "learning_rate": 3.990575894129194e-05, + "loss": 1.3501, + "step": 1913000 + }, + { + "epoch": 20.19, + "learning_rate": 3.990312061378051e-05, + "loss": 1.4369, + "step": 1913500 + }, + { + "epoch": 20.2, + "learning_rate": 3.990048228626909e-05, + "loss": 1.3918, + "step": 1914000 + }, + { + "epoch": 20.2, + "learning_rate": 3.9897843958757664e-05, + "loss": 1.4301, + "step": 1914500 + }, + { + "epoch": 20.21, + "learning_rate": 3.989520563124624e-05, + "loss": 1.3879, + "step": 1915000 + }, + { + "epoch": 20.21, + "learning_rate": 3.9892567303734815e-05, + "loss": 1.3421, + "step": 1915500 + }, + { + "epoch": 20.22, + "learning_rate": 3.98899289762234e-05, + "loss": 1.404, + "step": 1916000 + }, + { + "epoch": 20.23, + "learning_rate": 3.988729064871197e-05, + "loss": 1.4111, + "step": 1916500 + }, + { + "epoch": 20.23, + "learning_rate": 3.988465232120054e-05, + "loss": 1.3988, + "step": 1917000 + }, + { + "epoch": 20.24, + "learning_rate": 3.9882013993689124e-05, + "loss": 1.4278, + "step": 1917500 + }, + { + "epoch": 20.24, + "learning_rate": 3.98793756661777e-05, + "loss": 1.3789, + "step": 1918000 + }, + { + "epoch": 20.25, + "learning_rate": 3.9876737338666275e-05, + "loss": 1.4065, + "step": 1918500 + }, + { + "epoch": 20.25, + "learning_rate": 3.987409901115485e-05, + "loss": 1.3129, + "step": 1919000 + }, + { + "epoch": 20.26, + "learning_rate": 3.9871460683643426e-05, + "loss": 1.3828, + "step": 1919500 + }, + { + "epoch": 20.26, + "learning_rate": 3.9868822356132e-05, + "loss": 1.4146, + "step": 1920000 + }, + { + "epoch": 20.27, + "learning_rate": 3.986618402862058e-05, + "loss": 1.401, + "step": 1920500 + }, + { + "epoch": 20.27, + "learning_rate": 3.986354570110916e-05, + "loss": 1.3948, + "step": 1921000 + }, + { + "epoch": 20.28, + "learning_rate": 3.9860907373597734e-05, + "loss": 1.364, + "step": 1921500 + }, + { + "epoch": 20.28, + "learning_rate": 3.98582690460863e-05, + "loss": 1.4297, + "step": 1922000 + }, + { + "epoch": 20.29, + "learning_rate": 3.9855630718574885e-05, + "loss": 1.4479, + "step": 1922500 + }, + { + "epoch": 20.29, + "learning_rate": 3.985299239106346e-05, + "loss": 1.4008, + "step": 1923000 + }, + { + "epoch": 20.3, + "learning_rate": 3.9850354063552036e-05, + "loss": 1.3443, + "step": 1923500 + }, + { + "epoch": 20.3, + "learning_rate": 3.9847715736040605e-05, + "loss": 1.441, + "step": 1924000 + }, + { + "epoch": 20.31, + "learning_rate": 3.984507740852919e-05, + "loss": 1.3606, + "step": 1924500 + }, + { + "epoch": 20.32, + "learning_rate": 3.984243908101776e-05, + "loss": 1.3847, + "step": 1925000 + }, + { + "epoch": 20.32, + "learning_rate": 3.983980075350634e-05, + "loss": 1.4029, + "step": 1925500 + }, + { + "epoch": 20.33, + "learning_rate": 3.983716242599492e-05, + "loss": 1.3621, + "step": 1926000 + }, + { + "epoch": 20.33, + "learning_rate": 3.983452409848349e-05, + "loss": 1.4123, + "step": 1926500 + }, + { + "epoch": 20.34, + "learning_rate": 3.9831885770972065e-05, + "loss": 1.3956, + "step": 1927000 + }, + { + "epoch": 20.34, + "learning_rate": 3.982924744346064e-05, + "loss": 1.3966, + "step": 1927500 + }, + { + "epoch": 20.35, + "learning_rate": 3.982660911594922e-05, + "loss": 1.3739, + "step": 1928000 + }, + { + "epoch": 20.35, + "learning_rate": 3.98239707884378e-05, + "loss": 1.4048, + "step": 1928500 + }, + { + "epoch": 20.36, + "learning_rate": 3.982133246092637e-05, + "loss": 1.3981, + "step": 1929000 + }, + { + "epoch": 20.36, + "learning_rate": 3.981869413341495e-05, + "loss": 1.3952, + "step": 1929500 + }, + { + "epoch": 20.37, + "learning_rate": 3.9816055805903524e-05, + "loss": 1.4018, + "step": 1930000 + }, + { + "epoch": 20.37, + "learning_rate": 3.98134174783921e-05, + "loss": 1.3862, + "step": 1930500 + }, + { + "epoch": 20.38, + "learning_rate": 3.9810779150880675e-05, + "loss": 1.361, + "step": 1931000 + }, + { + "epoch": 20.38, + "learning_rate": 3.980814082336925e-05, + "loss": 1.3787, + "step": 1931500 + }, + { + "epoch": 20.39, + "learning_rate": 3.9805502495857826e-05, + "loss": 1.4228, + "step": 1932000 + }, + { + "epoch": 20.39, + "learning_rate": 3.98028641683464e-05, + "loss": 1.4023, + "step": 1932500 + }, + { + "epoch": 20.4, + "learning_rate": 3.9800225840834984e-05, + "loss": 1.3997, + "step": 1933000 + }, + { + "epoch": 20.4, + "learning_rate": 3.979758751332355e-05, + "loss": 1.3649, + "step": 1933500 + }, + { + "epoch": 20.41, + "learning_rate": 3.979494918581213e-05, + "loss": 1.3518, + "step": 1934000 + }, + { + "epoch": 20.42, + "learning_rate": 3.979231085830071e-05, + "loss": 1.4469, + "step": 1934500 + }, + { + "epoch": 20.42, + "learning_rate": 3.9789672530789286e-05, + "loss": 1.4352, + "step": 1935000 + }, + { + "epoch": 20.43, + "learning_rate": 3.978703420327786e-05, + "loss": 1.4124, + "step": 1935500 + }, + { + "epoch": 20.43, + "learning_rate": 3.978439587576643e-05, + "loss": 1.3817, + "step": 1936000 + }, + { + "epoch": 20.44, + "learning_rate": 3.978175754825501e-05, + "loss": 1.3931, + "step": 1936500 + }, + { + "epoch": 20.44, + "learning_rate": 3.977911922074359e-05, + "loss": 1.3906, + "step": 1937000 + }, + { + "epoch": 20.45, + "learning_rate": 3.9776480893232164e-05, + "loss": 1.349, + "step": 1937500 + }, + { + "epoch": 20.45, + "learning_rate": 3.9773842565720746e-05, + "loss": 1.3696, + "step": 1938000 + }, + { + "epoch": 20.46, + "learning_rate": 3.9771204238209315e-05, + "loss": 1.4525, + "step": 1938500 + }, + { + "epoch": 20.46, + "learning_rate": 3.976856591069789e-05, + "loss": 1.3983, + "step": 1939000 + }, + { + "epoch": 20.47, + "learning_rate": 3.9765927583186465e-05, + "loss": 1.4368, + "step": 1939500 + }, + { + "epoch": 20.47, + "learning_rate": 3.976328925567505e-05, + "loss": 1.3546, + "step": 1940000 + }, + { + "epoch": 20.48, + "learning_rate": 3.976065092816362e-05, + "loss": 1.3862, + "step": 1940500 + }, + { + "epoch": 20.48, + "learning_rate": 3.975801260065219e-05, + "loss": 1.3247, + "step": 1941000 + }, + { + "epoch": 20.49, + "learning_rate": 3.9755374273140774e-05, + "loss": 1.3959, + "step": 1941500 + }, + { + "epoch": 20.49, + "learning_rate": 3.975273594562935e-05, + "loss": 1.3936, + "step": 1942000 + }, + { + "epoch": 20.5, + "learning_rate": 3.9750097618117925e-05, + "loss": 1.3887, + "step": 1942500 + }, + { + "epoch": 20.51, + "learning_rate": 3.97474592906065e-05, + "loss": 1.423, + "step": 1943000 + }, + { + "epoch": 20.51, + "learning_rate": 3.9744820963095076e-05, + "loss": 1.4125, + "step": 1943500 + }, + { + "epoch": 20.52, + "learning_rate": 3.974218263558365e-05, + "loss": 1.4138, + "step": 1944000 + }, + { + "epoch": 20.52, + "learning_rate": 3.973954430807223e-05, + "loss": 1.3938, + "step": 1944500 + }, + { + "epoch": 20.53, + "learning_rate": 3.973690598056081e-05, + "loss": 1.3831, + "step": 1945000 + }, + { + "epoch": 20.53, + "learning_rate": 3.973426765304938e-05, + "loss": 1.4009, + "step": 1945500 + }, + { + "epoch": 20.54, + "learning_rate": 3.9731629325537954e-05, + "loss": 1.3552, + "step": 1946000 + }, + { + "epoch": 20.54, + "learning_rate": 3.9728990998026536e-05, + "loss": 1.3687, + "step": 1946500 + }, + { + "epoch": 20.55, + "learning_rate": 3.972635267051511e-05, + "loss": 1.3648, + "step": 1947000 + }, + { + "epoch": 20.55, + "learning_rate": 3.972371434300369e-05, + "loss": 1.332, + "step": 1947500 + }, + { + "epoch": 20.56, + "learning_rate": 3.972107601549226e-05, + "loss": 1.3732, + "step": 1948000 + }, + { + "epoch": 20.56, + "learning_rate": 3.971843768798084e-05, + "loss": 1.4307, + "step": 1948500 + }, + { + "epoch": 20.57, + "learning_rate": 3.971579936046941e-05, + "loss": 1.3886, + "step": 1949000 + }, + { + "epoch": 20.57, + "learning_rate": 3.971316103295799e-05, + "loss": 1.4076, + "step": 1949500 + }, + { + "epoch": 20.58, + "learning_rate": 3.971052270544657e-05, + "loss": 1.4009, + "step": 1950000 + }, + { + "epoch": 20.58, + "learning_rate": 3.970788437793514e-05, + "loss": 1.3967, + "step": 1950500 + }, + { + "epoch": 20.59, + "learning_rate": 3.9705246050423715e-05, + "loss": 1.3545, + "step": 1951000 + }, + { + "epoch": 20.59, + "learning_rate": 3.970260772291229e-05, + "loss": 1.4205, + "step": 1951500 + }, + { + "epoch": 20.6, + "learning_rate": 3.969996939540087e-05, + "loss": 1.3663, + "step": 1952000 + }, + { + "epoch": 20.61, + "learning_rate": 3.969733106788944e-05, + "loss": 1.3826, + "step": 1952500 + }, + { + "epoch": 20.61, + "learning_rate": 3.969469274037802e-05, + "loss": 1.4412, + "step": 1953000 + }, + { + "epoch": 20.62, + "learning_rate": 3.96920544128666e-05, + "loss": 1.4127, + "step": 1953500 + }, + { + "epoch": 20.62, + "learning_rate": 3.9689416085355175e-05, + "loss": 1.3765, + "step": 1954000 + }, + { + "epoch": 20.63, + "learning_rate": 3.968677775784375e-05, + "loss": 1.4246, + "step": 1954500 + }, + { + "epoch": 20.63, + "learning_rate": 3.9684139430332326e-05, + "loss": 1.3691, + "step": 1955000 + }, + { + "epoch": 20.64, + "learning_rate": 3.96815011028209e-05, + "loss": 1.4067, + "step": 1955500 + }, + { + "epoch": 20.64, + "learning_rate": 3.967886277530948e-05, + "loss": 1.3581, + "step": 1956000 + }, + { + "epoch": 20.65, + "learning_rate": 3.967622444779805e-05, + "loss": 1.3505, + "step": 1956500 + }, + { + "epoch": 20.65, + "learning_rate": 3.9673586120286635e-05, + "loss": 1.3466, + "step": 1957000 + }, + { + "epoch": 20.66, + "learning_rate": 3.96709477927752e-05, + "loss": 1.3826, + "step": 1957500 + }, + { + "epoch": 20.66, + "learning_rate": 3.966830946526378e-05, + "loss": 1.409, + "step": 1958000 + }, + { + "epoch": 20.67, + "learning_rate": 3.966567113775236e-05, + "loss": 1.3859, + "step": 1958500 + }, + { + "epoch": 20.67, + "learning_rate": 3.9663032810240937e-05, + "loss": 1.3333, + "step": 1959000 + }, + { + "epoch": 20.68, + "learning_rate": 3.966039448272951e-05, + "loss": 1.4234, + "step": 1959500 + }, + { + "epoch": 20.68, + "learning_rate": 3.965775615521809e-05, + "loss": 1.3993, + "step": 1960000 + }, + { + "epoch": 20.69, + "learning_rate": 3.965511782770666e-05, + "loss": 1.3606, + "step": 1960500 + }, + { + "epoch": 20.7, + "learning_rate": 3.965247950019524e-05, + "loss": 1.3678, + "step": 1961000 + }, + { + "epoch": 20.7, + "learning_rate": 3.9649841172683814e-05, + "loss": 1.3818, + "step": 1961500 + }, + { + "epoch": 20.71, + "learning_rate": 3.964720284517239e-05, + "loss": 1.3728, + "step": 1962000 + }, + { + "epoch": 20.71, + "learning_rate": 3.9644564517660965e-05, + "loss": 1.3786, + "step": 1962500 + }, + { + "epoch": 20.72, + "learning_rate": 3.964192619014954e-05, + "loss": 1.366, + "step": 1963000 + }, + { + "epoch": 20.72, + "learning_rate": 3.9639287862638116e-05, + "loss": 1.3997, + "step": 1963500 + }, + { + "epoch": 20.73, + "learning_rate": 3.96366495351267e-05, + "loss": 1.4217, + "step": 1964000 + }, + { + "epoch": 20.73, + "learning_rate": 3.963401120761527e-05, + "loss": 1.3929, + "step": 1964500 + }, + { + "epoch": 20.74, + "learning_rate": 3.963137288010384e-05, + "loss": 1.3958, + "step": 1965000 + }, + { + "epoch": 20.74, + "learning_rate": 3.9628734552592425e-05, + "loss": 1.4074, + "step": 1965500 + }, + { + "epoch": 20.75, + "learning_rate": 3.9626096225081e-05, + "loss": 1.459, + "step": 1966000 + }, + { + "epoch": 20.75, + "learning_rate": 3.9623457897569576e-05, + "loss": 1.4208, + "step": 1966500 + }, + { + "epoch": 20.76, + "learning_rate": 3.962081957005815e-05, + "loss": 1.4192, + "step": 1967000 + }, + { + "epoch": 20.76, + "learning_rate": 3.961818124254673e-05, + "loss": 1.3874, + "step": 1967500 + }, + { + "epoch": 20.77, + "learning_rate": 3.96155429150353e-05, + "loss": 1.4058, + "step": 1968000 + }, + { + "epoch": 20.77, + "learning_rate": 3.961290458752388e-05, + "loss": 1.3758, + "step": 1968500 + }, + { + "epoch": 20.78, + "learning_rate": 3.961026626001246e-05, + "loss": 1.3668, + "step": 1969000 + }, + { + "epoch": 20.78, + "learning_rate": 3.960762793250103e-05, + "loss": 1.3741, + "step": 1969500 + }, + { + "epoch": 20.79, + "learning_rate": 3.9604989604989604e-05, + "loss": 1.3386, + "step": 1970000 + }, + { + "epoch": 20.8, + "learning_rate": 3.9602351277478186e-05, + "loss": 1.3763, + "step": 1970500 + }, + { + "epoch": 20.8, + "learning_rate": 3.959971294996676e-05, + "loss": 1.3775, + "step": 1971000 + }, + { + "epoch": 20.81, + "learning_rate": 3.959707462245533e-05, + "loss": 1.4096, + "step": 1971500 + }, + { + "epoch": 20.81, + "learning_rate": 3.959443629494391e-05, + "loss": 1.4028, + "step": 1972000 + }, + { + "epoch": 20.82, + "learning_rate": 3.959179796743249e-05, + "loss": 1.4264, + "step": 1972500 + }, + { + "epoch": 20.82, + "learning_rate": 3.9589159639921064e-05, + "loss": 1.4029, + "step": 1973000 + }, + { + "epoch": 20.83, + "learning_rate": 3.958652131240964e-05, + "loss": 1.4513, + "step": 1973500 + }, + { + "epoch": 20.83, + "learning_rate": 3.9583882984898215e-05, + "loss": 1.3796, + "step": 1974000 + }, + { + "epoch": 20.84, + "learning_rate": 3.958124465738679e-05, + "loss": 1.3782, + "step": 1974500 + }, + { + "epoch": 20.84, + "learning_rate": 3.9578606329875366e-05, + "loss": 1.3684, + "step": 1975000 + }, + { + "epoch": 20.85, + "learning_rate": 3.957596800236394e-05, + "loss": 1.344, + "step": 1975500 + }, + { + "epoch": 20.85, + "learning_rate": 3.9573329674852523e-05, + "loss": 1.43, + "step": 1976000 + }, + { + "epoch": 20.86, + "learning_rate": 3.957069134734109e-05, + "loss": 1.4129, + "step": 1976500 + }, + { + "epoch": 20.86, + "learning_rate": 3.956805301982967e-05, + "loss": 1.4021, + "step": 1977000 + }, + { + "epoch": 20.87, + "learning_rate": 3.956541469231825e-05, + "loss": 1.3958, + "step": 1977500 + }, + { + "epoch": 20.87, + "learning_rate": 3.9562776364806825e-05, + "loss": 1.4524, + "step": 1978000 + }, + { + "epoch": 20.88, + "learning_rate": 3.95601380372954e-05, + "loss": 1.3598, + "step": 1978500 + }, + { + "epoch": 20.89, + "learning_rate": 3.9557499709783976e-05, + "loss": 1.3873, + "step": 1979000 + }, + { + "epoch": 20.89, + "learning_rate": 3.955486138227255e-05, + "loss": 1.4303, + "step": 1979500 + }, + { + "epoch": 20.9, + "learning_rate": 3.955222305476113e-05, + "loss": 1.4005, + "step": 1980000 + }, + { + "epoch": 20.9, + "learning_rate": 3.95495847272497e-05, + "loss": 1.3917, + "step": 1980500 + }, + { + "epoch": 20.91, + "learning_rate": 3.954694639973828e-05, + "loss": 1.347, + "step": 1981000 + }, + { + "epoch": 20.91, + "learning_rate": 3.9544308072226854e-05, + "loss": 1.4134, + "step": 1981500 + }, + { + "epoch": 20.92, + "learning_rate": 3.954166974471543e-05, + "loss": 1.4305, + "step": 1982000 + }, + { + "epoch": 20.92, + "learning_rate": 3.953903141720401e-05, + "loss": 1.336, + "step": 1982500 + }, + { + "epoch": 20.93, + "learning_rate": 3.953639308969259e-05, + "loss": 1.3903, + "step": 1983000 + }, + { + "epoch": 20.93, + "learning_rate": 3.9533754762181156e-05, + "loss": 1.4109, + "step": 1983500 + }, + { + "epoch": 20.94, + "learning_rate": 3.953111643466974e-05, + "loss": 1.4346, + "step": 1984000 + }, + { + "epoch": 20.94, + "learning_rate": 3.9528478107158314e-05, + "loss": 1.3927, + "step": 1984500 + }, + { + "epoch": 20.95, + "learning_rate": 3.952583977964689e-05, + "loss": 1.432, + "step": 1985000 + }, + { + "epoch": 20.95, + "learning_rate": 3.9523201452135465e-05, + "loss": 1.3841, + "step": 1985500 + }, + { + "epoch": 20.96, + "learning_rate": 3.952056312462404e-05, + "loss": 1.3985, + "step": 1986000 + }, + { + "epoch": 20.96, + "learning_rate": 3.9517924797112616e-05, + "loss": 1.3909, + "step": 1986500 + }, + { + "epoch": 20.97, + "learning_rate": 3.951528646960119e-05, + "loss": 1.3998, + "step": 1987000 + }, + { + "epoch": 20.97, + "learning_rate": 3.9512648142089766e-05, + "loss": 1.4033, + "step": 1987500 + }, + { + "epoch": 20.98, + "learning_rate": 3.951000981457835e-05, + "loss": 1.404, + "step": 1988000 + }, + { + "epoch": 20.99, + "learning_rate": 3.950737148706692e-05, + "loss": 1.3968, + "step": 1988500 + }, + { + "epoch": 20.99, + "learning_rate": 3.950473315955549e-05, + "loss": 1.3982, + "step": 1989000 + }, + { + "epoch": 21.0, + "learning_rate": 3.9502094832044075e-05, + "loss": 1.3755, + "step": 1989500 + }, + { + "epoch": 21.0, + "learning_rate": 3.949945650453265e-05, + "loss": 1.3694, + "step": 1990000 + }, + { + "epoch": 21.01, + "learning_rate": 3.949681817702122e-05, + "loss": 1.4208, + "step": 1990500 + }, + { + "epoch": 21.01, + "learning_rate": 3.94941798495098e-05, + "loss": 1.4059, + "step": 1991000 + }, + { + "epoch": 21.02, + "learning_rate": 3.949154152199838e-05, + "loss": 1.3682, + "step": 1991500 + }, + { + "epoch": 21.02, + "learning_rate": 3.948890319448695e-05, + "loss": 1.3812, + "step": 1992000 + }, + { + "epoch": 21.03, + "learning_rate": 3.948626486697553e-05, + "loss": 1.4008, + "step": 1992500 + }, + { + "epoch": 21.03, + "learning_rate": 3.9483626539464104e-05, + "loss": 1.36, + "step": 1993000 + }, + { + "epoch": 21.04, + "learning_rate": 3.948098821195268e-05, + "loss": 1.3967, + "step": 1993500 + }, + { + "epoch": 21.04, + "learning_rate": 3.9478349884441255e-05, + "loss": 1.3952, + "step": 1994000 + }, + { + "epoch": 21.05, + "learning_rate": 3.947571155692984e-05, + "loss": 1.3631, + "step": 1994500 + }, + { + "epoch": 21.05, + "learning_rate": 3.947307322941841e-05, + "loss": 1.3882, + "step": 1995000 + }, + { + "epoch": 21.06, + "learning_rate": 3.947043490190698e-05, + "loss": 1.4387, + "step": 1995500 + }, + { + "epoch": 21.06, + "learning_rate": 3.946779657439556e-05, + "loss": 1.3544, + "step": 1996000 + }, + { + "epoch": 21.07, + "learning_rate": 3.946515824688414e-05, + "loss": 1.3837, + "step": 1996500 + }, + { + "epoch": 21.07, + "learning_rate": 3.9462519919372714e-05, + "loss": 1.3868, + "step": 1997000 + }, + { + "epoch": 21.08, + "learning_rate": 3.945988159186129e-05, + "loss": 1.3782, + "step": 1997500 + }, + { + "epoch": 21.09, + "learning_rate": 3.9457243264349865e-05, + "loss": 1.387, + "step": 1998000 + }, + { + "epoch": 21.09, + "learning_rate": 3.945460493683844e-05, + "loss": 1.3356, + "step": 1998500 + }, + { + "epoch": 21.1, + "learning_rate": 3.9451966609327016e-05, + "loss": 1.4335, + "step": 1999000 + }, + { + "epoch": 21.1, + "learning_rate": 3.94493282818156e-05, + "loss": 1.392, + "step": 1999500 + }, + { + "epoch": 21.11, + "learning_rate": 3.944668995430417e-05, + "loss": 1.4194, + "step": 2000000 + }, + { + "epoch": 21.11, + "learning_rate": 3.944405162679274e-05, + "loss": 1.3712, + "step": 2000500 + }, + { + "epoch": 21.12, + "learning_rate": 3.944141329928132e-05, + "loss": 1.2999, + "step": 2001000 + }, + { + "epoch": 21.12, + "learning_rate": 3.94387749717699e-05, + "loss": 1.3456, + "step": 2001500 + }, + { + "epoch": 21.13, + "learning_rate": 3.9436136644258476e-05, + "loss": 1.4002, + "step": 2002000 + }, + { + "epoch": 21.13, + "learning_rate": 3.9433498316747045e-05, + "loss": 1.411, + "step": 2002500 + }, + { + "epoch": 21.14, + "learning_rate": 3.943085998923563e-05, + "loss": 1.346, + "step": 2003000 + }, + { + "epoch": 21.14, + "learning_rate": 3.94282216617242e-05, + "loss": 1.4249, + "step": 2003500 + }, + { + "epoch": 21.15, + "learning_rate": 3.942558333421278e-05, + "loss": 1.3578, + "step": 2004000 + }, + { + "epoch": 21.15, + "learning_rate": 3.942294500670135e-05, + "loss": 1.4234, + "step": 2004500 + }, + { + "epoch": 21.16, + "learning_rate": 3.942030667918993e-05, + "loss": 1.4418, + "step": 2005000 + }, + { + "epoch": 21.16, + "learning_rate": 3.9417668351678504e-05, + "loss": 1.4219, + "step": 2005500 + }, + { + "epoch": 21.17, + "learning_rate": 3.941503002416708e-05, + "loss": 1.4311, + "step": 2006000 + }, + { + "epoch": 21.18, + "learning_rate": 3.941239169665566e-05, + "loss": 1.3649, + "step": 2006500 + }, + { + "epoch": 21.18, + "learning_rate": 3.940975336914424e-05, + "loss": 1.4193, + "step": 2007000 + }, + { + "epoch": 21.19, + "learning_rate": 3.9407115041632806e-05, + "loss": 1.3597, + "step": 2007500 + }, + { + "epoch": 21.19, + "learning_rate": 3.940447671412139e-05, + "loss": 1.3891, + "step": 2008000 + }, + { + "epoch": 21.2, + "learning_rate": 3.9401838386609964e-05, + "loss": 1.4104, + "step": 2008500 + }, + { + "epoch": 21.2, + "learning_rate": 3.939920005909854e-05, + "loss": 1.3909, + "step": 2009000 + }, + { + "epoch": 21.21, + "learning_rate": 3.939656173158711e-05, + "loss": 1.411, + "step": 2009500 + }, + { + "epoch": 21.21, + "learning_rate": 3.939392340407569e-05, + "loss": 1.3715, + "step": 2010000 + }, + { + "epoch": 21.22, + "learning_rate": 3.9391285076564266e-05, + "loss": 1.3664, + "step": 2010500 + }, + { + "epoch": 21.22, + "learning_rate": 3.938864674905284e-05, + "loss": 1.3745, + "step": 2011000 + }, + { + "epoch": 21.23, + "learning_rate": 3.9386008421541424e-05, + "loss": 1.4044, + "step": 2011500 + }, + { + "epoch": 21.23, + "learning_rate": 3.938337009402999e-05, + "loss": 1.3718, + "step": 2012000 + }, + { + "epoch": 21.24, + "learning_rate": 3.938073176651857e-05, + "loss": 1.3933, + "step": 2012500 + }, + { + "epoch": 21.24, + "learning_rate": 3.9378093439007143e-05, + "loss": 1.3883, + "step": 2013000 + }, + { + "epoch": 21.25, + "learning_rate": 3.9375455111495726e-05, + "loss": 1.327, + "step": 2013500 + }, + { + "epoch": 21.25, + "learning_rate": 3.93728167839843e-05, + "loss": 1.3579, + "step": 2014000 + }, + { + "epoch": 21.26, + "learning_rate": 3.937017845647287e-05, + "loss": 1.3574, + "step": 2014500 + }, + { + "epoch": 21.26, + "learning_rate": 3.936754012896145e-05, + "loss": 1.3915, + "step": 2015000 + }, + { + "epoch": 21.27, + "learning_rate": 3.936490180145003e-05, + "loss": 1.3836, + "step": 2015500 + }, + { + "epoch": 21.28, + "learning_rate": 3.93622634739386e-05, + "loss": 1.3582, + "step": 2016000 + }, + { + "epoch": 21.28, + "learning_rate": 3.935962514642718e-05, + "loss": 1.4099, + "step": 2016500 + }, + { + "epoch": 21.29, + "learning_rate": 3.9356986818915754e-05, + "loss": 1.3748, + "step": 2017000 + }, + { + "epoch": 21.29, + "learning_rate": 3.935434849140433e-05, + "loss": 1.3847, + "step": 2017500 + }, + { + "epoch": 21.3, + "learning_rate": 3.9351710163892905e-05, + "loss": 1.3439, + "step": 2018000 + }, + { + "epoch": 21.3, + "learning_rate": 3.934907183638149e-05, + "loss": 1.3927, + "step": 2018500 + }, + { + "epoch": 21.31, + "learning_rate": 3.9346433508870056e-05, + "loss": 1.3641, + "step": 2019000 + }, + { + "epoch": 21.31, + "learning_rate": 3.934379518135863e-05, + "loss": 1.4157, + "step": 2019500 + }, + { + "epoch": 21.32, + "learning_rate": 3.9341156853847214e-05, + "loss": 1.4578, + "step": 2020000 + }, + { + "epoch": 21.32, + "learning_rate": 3.933851852633579e-05, + "loss": 1.3924, + "step": 2020500 + }, + { + "epoch": 21.33, + "learning_rate": 3.9335880198824365e-05, + "loss": 1.3831, + "step": 2021000 + }, + { + "epoch": 21.33, + "learning_rate": 3.933324187131294e-05, + "loss": 1.3496, + "step": 2021500 + }, + { + "epoch": 21.34, + "learning_rate": 3.9330603543801516e-05, + "loss": 1.3849, + "step": 2022000 + }, + { + "epoch": 21.34, + "learning_rate": 3.932796521629009e-05, + "loss": 1.3781, + "step": 2022500 + }, + { + "epoch": 21.35, + "learning_rate": 3.932532688877867e-05, + "loss": 1.3825, + "step": 2023000 + }, + { + "epoch": 21.35, + "learning_rate": 3.932268856126725e-05, + "loss": 1.3575, + "step": 2023500 + }, + { + "epoch": 21.36, + "learning_rate": 3.932005023375582e-05, + "loss": 1.3808, + "step": 2024000 + }, + { + "epoch": 21.37, + "learning_rate": 3.931741190624439e-05, + "loss": 1.3736, + "step": 2024500 + }, + { + "epoch": 21.37, + "learning_rate": 3.931477357873297e-05, + "loss": 1.3814, + "step": 2025000 + }, + { + "epoch": 21.38, + "learning_rate": 3.931213525122155e-05, + "loss": 1.3912, + "step": 2025500 + }, + { + "epoch": 21.38, + "learning_rate": 3.9309496923710126e-05, + "loss": 1.4359, + "step": 2026000 + }, + { + "epoch": 21.39, + "learning_rate": 3.9306858596198695e-05, + "loss": 1.4003, + "step": 2026500 + }, + { + "epoch": 21.39, + "learning_rate": 3.930422026868728e-05, + "loss": 1.3788, + "step": 2027000 + }, + { + "epoch": 21.4, + "learning_rate": 3.930158194117585e-05, + "loss": 1.3274, + "step": 2027500 + }, + { + "epoch": 21.4, + "learning_rate": 3.929894361366443e-05, + "loss": 1.3303, + "step": 2028000 + }, + { + "epoch": 21.41, + "learning_rate": 3.9296305286153004e-05, + "loss": 1.4014, + "step": 2028500 + }, + { + "epoch": 21.41, + "learning_rate": 3.929366695864158e-05, + "loss": 1.4113, + "step": 2029000 + }, + { + "epoch": 21.42, + "learning_rate": 3.9291028631130155e-05, + "loss": 1.3823, + "step": 2029500 + }, + { + "epoch": 21.42, + "learning_rate": 3.928839030361873e-05, + "loss": 1.357, + "step": 2030000 + }, + { + "epoch": 21.43, + "learning_rate": 3.928575197610731e-05, + "loss": 1.3855, + "step": 2030500 + }, + { + "epoch": 21.43, + "learning_rate": 3.928311364859588e-05, + "loss": 1.4027, + "step": 2031000 + }, + { + "epoch": 21.44, + "learning_rate": 3.928047532108446e-05, + "loss": 1.4262, + "step": 2031500 + }, + { + "epoch": 21.44, + "learning_rate": 3.927783699357304e-05, + "loss": 1.3829, + "step": 2032000 + }, + { + "epoch": 21.45, + "learning_rate": 3.9275198666061615e-05, + "loss": 1.3352, + "step": 2032500 + }, + { + "epoch": 21.45, + "learning_rate": 3.927256033855019e-05, + "loss": 1.3796, + "step": 2033000 + }, + { + "epoch": 21.46, + "learning_rate": 3.9269922011038766e-05, + "loss": 1.3966, + "step": 2033500 + }, + { + "epoch": 21.47, + "learning_rate": 3.926728368352734e-05, + "loss": 1.3562, + "step": 2034000 + }, + { + "epoch": 21.47, + "learning_rate": 3.9264645356015916e-05, + "loss": 1.4295, + "step": 2034500 + }, + { + "epoch": 21.48, + "learning_rate": 3.926200702850449e-05, + "loss": 1.3522, + "step": 2035000 + }, + { + "epoch": 21.48, + "learning_rate": 3.9259368700993074e-05, + "loss": 1.4211, + "step": 2035500 + }, + { + "epoch": 21.49, + "learning_rate": 3.925673037348164e-05, + "loss": 1.3348, + "step": 2036000 + }, + { + "epoch": 21.49, + "learning_rate": 3.925409204597022e-05, + "loss": 1.3964, + "step": 2036500 + }, + { + "epoch": 21.5, + "learning_rate": 3.9251453718458794e-05, + "loss": 1.4079, + "step": 2037000 + }, + { + "epoch": 21.5, + "learning_rate": 3.9248815390947376e-05, + "loss": 1.4036, + "step": 2037500 + }, + { + "epoch": 21.51, + "learning_rate": 3.9246177063435945e-05, + "loss": 1.3511, + "step": 2038000 + }, + { + "epoch": 21.51, + "learning_rate": 3.924353873592452e-05, + "loss": 1.43, + "step": 2038500 + }, + { + "epoch": 21.52, + "learning_rate": 3.92409004084131e-05, + "loss": 1.4274, + "step": 2039000 + }, + { + "epoch": 21.52, + "learning_rate": 3.923826208090168e-05, + "loss": 1.3768, + "step": 2039500 + }, + { + "epoch": 21.53, + "learning_rate": 3.9235623753390254e-05, + "loss": 1.3771, + "step": 2040000 + }, + { + "epoch": 21.53, + "learning_rate": 3.923298542587883e-05, + "loss": 1.3668, + "step": 2040500 + }, + { + "epoch": 21.54, + "learning_rate": 3.9230347098367405e-05, + "loss": 1.4027, + "step": 2041000 + }, + { + "epoch": 21.54, + "learning_rate": 3.922770877085598e-05, + "loss": 1.3921, + "step": 2041500 + }, + { + "epoch": 21.55, + "learning_rate": 3.9225070443344556e-05, + "loss": 1.372, + "step": 2042000 + }, + { + "epoch": 21.56, + "learning_rate": 3.922243211583314e-05, + "loss": 1.3869, + "step": 2042500 + }, + { + "epoch": 21.56, + "learning_rate": 3.9219793788321707e-05, + "loss": 1.4188, + "step": 2043000 + }, + { + "epoch": 21.57, + "learning_rate": 3.921715546081028e-05, + "loss": 1.3903, + "step": 2043500 + }, + { + "epoch": 21.57, + "learning_rate": 3.9214517133298864e-05, + "loss": 1.3879, + "step": 2044000 + }, + { + "epoch": 21.58, + "learning_rate": 3.921187880578744e-05, + "loss": 1.333, + "step": 2044500 + }, + { + "epoch": 21.58, + "learning_rate": 3.9209240478276015e-05, + "loss": 1.4115, + "step": 2045000 + }, + { + "epoch": 21.59, + "learning_rate": 3.920660215076459e-05, + "loss": 1.3816, + "step": 2045500 + }, + { + "epoch": 21.59, + "learning_rate": 3.9203963823253166e-05, + "loss": 1.3807, + "step": 2046000 + }, + { + "epoch": 21.6, + "learning_rate": 3.920132549574174e-05, + "loss": 1.4282, + "step": 2046500 + }, + { + "epoch": 21.6, + "learning_rate": 3.919868716823032e-05, + "loss": 1.4287, + "step": 2047000 + }, + { + "epoch": 21.61, + "learning_rate": 3.919604884071889e-05, + "loss": 1.3562, + "step": 2047500 + }, + { + "epoch": 21.61, + "learning_rate": 3.919341051320747e-05, + "loss": 1.4201, + "step": 2048000 + }, + { + "epoch": 21.62, + "learning_rate": 3.9190772185696044e-05, + "loss": 1.3506, + "step": 2048500 + }, + { + "epoch": 21.62, + "learning_rate": 3.918813385818462e-05, + "loss": 1.3862, + "step": 2049000 + }, + { + "epoch": 21.63, + "learning_rate": 3.91854955306732e-05, + "loss": 1.3956, + "step": 2049500 + }, + { + "epoch": 21.63, + "learning_rate": 3.918285720316177e-05, + "loss": 1.366, + "step": 2050000 + }, + { + "epoch": 21.64, + "learning_rate": 3.9180218875650346e-05, + "loss": 1.4455, + "step": 2050500 + }, + { + "epoch": 21.64, + "learning_rate": 3.917758054813893e-05, + "loss": 1.3854, + "step": 2051000 + }, + { + "epoch": 21.65, + "learning_rate": 3.91749422206275e-05, + "loss": 1.388, + "step": 2051500 + }, + { + "epoch": 21.66, + "learning_rate": 3.917230389311608e-05, + "loss": 1.351, + "step": 2052000 + }, + { + "epoch": 21.66, + "learning_rate": 3.9169665565604654e-05, + "loss": 1.3894, + "step": 2052500 + }, + { + "epoch": 21.67, + "learning_rate": 3.916702723809323e-05, + "loss": 1.3669, + "step": 2053000 + }, + { + "epoch": 21.67, + "learning_rate": 3.9164388910581805e-05, + "loss": 1.4303, + "step": 2053500 + }, + { + "epoch": 21.68, + "learning_rate": 3.916175058307038e-05, + "loss": 1.4227, + "step": 2054000 + }, + { + "epoch": 21.68, + "learning_rate": 3.915911225555896e-05, + "loss": 1.406, + "step": 2054500 + }, + { + "epoch": 21.69, + "learning_rate": 3.915647392804753e-05, + "loss": 1.4154, + "step": 2055000 + }, + { + "epoch": 21.69, + "learning_rate": 3.915383560053611e-05, + "loss": 1.4222, + "step": 2055500 + }, + { + "epoch": 21.7, + "learning_rate": 3.915119727302469e-05, + "loss": 1.3749, + "step": 2056000 + }, + { + "epoch": 21.7, + "learning_rate": 3.9148558945513265e-05, + "loss": 1.3447, + "step": 2056500 + }, + { + "epoch": 21.71, + "learning_rate": 3.9145920618001834e-05, + "loss": 1.3684, + "step": 2057000 + }, + { + "epoch": 21.71, + "learning_rate": 3.9143282290490416e-05, + "loss": 1.3941, + "step": 2057500 + }, + { + "epoch": 21.72, + "learning_rate": 3.914064396297899e-05, + "loss": 1.3746, + "step": 2058000 + }, + { + "epoch": 21.72, + "learning_rate": 3.913800563546757e-05, + "loss": 1.3937, + "step": 2058500 + }, + { + "epoch": 21.73, + "learning_rate": 3.913536730795614e-05, + "loss": 1.3809, + "step": 2059000 + }, + { + "epoch": 21.73, + "learning_rate": 3.913272898044472e-05, + "loss": 1.3824, + "step": 2059500 + }, + { + "epoch": 21.74, + "learning_rate": 3.9130090652933293e-05, + "loss": 1.4225, + "step": 2060000 + }, + { + "epoch": 21.75, + "learning_rate": 3.912745232542187e-05, + "loss": 1.3488, + "step": 2060500 + }, + { + "epoch": 21.75, + "learning_rate": 3.9124813997910444e-05, + "loss": 1.3575, + "step": 2061000 + }, + { + "epoch": 21.76, + "learning_rate": 3.912217567039903e-05, + "loss": 1.3827, + "step": 2061500 + }, + { + "epoch": 21.76, + "learning_rate": 3.9119537342887595e-05, + "loss": 1.3716, + "step": 2062000 + }, + { + "epoch": 21.77, + "learning_rate": 3.911689901537617e-05, + "loss": 1.4321, + "step": 2062500 + }, + { + "epoch": 21.77, + "learning_rate": 3.911426068786475e-05, + "loss": 1.4089, + "step": 2063000 + }, + { + "epoch": 21.78, + "learning_rate": 3.911162236035333e-05, + "loss": 1.4191, + "step": 2063500 + }, + { + "epoch": 21.78, + "learning_rate": 3.9108984032841904e-05, + "loss": 1.3894, + "step": 2064000 + }, + { + "epoch": 21.79, + "learning_rate": 3.910634570533048e-05, + "loss": 1.3959, + "step": 2064500 + }, + { + "epoch": 21.79, + "learning_rate": 3.9103707377819055e-05, + "loss": 1.3587, + "step": 2065000 + }, + { + "epoch": 21.8, + "learning_rate": 3.910106905030763e-05, + "loss": 1.3471, + "step": 2065500 + }, + { + "epoch": 21.8, + "learning_rate": 3.9098430722796206e-05, + "loss": 1.3197, + "step": 2066000 + }, + { + "epoch": 21.81, + "learning_rate": 3.909579239528478e-05, + "loss": 1.399, + "step": 2066500 + }, + { + "epoch": 21.81, + "learning_rate": 3.909315406777336e-05, + "loss": 1.4248, + "step": 2067000 + }, + { + "epoch": 21.82, + "learning_rate": 3.909051574026193e-05, + "loss": 1.4075, + "step": 2067500 + }, + { + "epoch": 21.82, + "learning_rate": 3.9087877412750515e-05, + "loss": 1.3428, + "step": 2068000 + }, + { + "epoch": 21.83, + "learning_rate": 3.908523908523909e-05, + "loss": 1.3479, + "step": 2068500 + }, + { + "epoch": 21.83, + "learning_rate": 3.908260075772766e-05, + "loss": 1.4012, + "step": 2069000 + }, + { + "epoch": 21.84, + "learning_rate": 3.907996243021624e-05, + "loss": 1.4043, + "step": 2069500 + }, + { + "epoch": 21.85, + "learning_rate": 3.907732410270482e-05, + "loss": 1.3624, + "step": 2070000 + }, + { + "epoch": 21.85, + "learning_rate": 3.907468577519339e-05, + "loss": 1.3837, + "step": 2070500 + }, + { + "epoch": 21.86, + "learning_rate": 3.907204744768197e-05, + "loss": 1.3994, + "step": 2071000 + }, + { + "epoch": 21.86, + "learning_rate": 3.906940912017054e-05, + "loss": 1.4307, + "step": 2071500 + }, + { + "epoch": 21.87, + "learning_rate": 3.906677079265912e-05, + "loss": 1.3997, + "step": 2072000 + }, + { + "epoch": 21.87, + "learning_rate": 3.9064132465147694e-05, + "loss": 1.397, + "step": 2072500 + }, + { + "epoch": 21.88, + "learning_rate": 3.9061494137636276e-05, + "loss": 1.3745, + "step": 2073000 + }, + { + "epoch": 21.88, + "learning_rate": 3.905885581012485e-05, + "loss": 1.397, + "step": 2073500 + }, + { + "epoch": 21.89, + "learning_rate": 3.905621748261342e-05, + "loss": 1.3941, + "step": 2074000 + }, + { + "epoch": 21.89, + "learning_rate": 3.9053579155101996e-05, + "loss": 1.425, + "step": 2074500 + }, + { + "epoch": 21.9, + "learning_rate": 3.905094082759058e-05, + "loss": 1.3514, + "step": 2075000 + }, + { + "epoch": 21.9, + "learning_rate": 3.9048302500079154e-05, + "loss": 1.4036, + "step": 2075500 + }, + { + "epoch": 21.91, + "learning_rate": 3.904566417256772e-05, + "loss": 1.3406, + "step": 2076000 + }, + { + "epoch": 21.91, + "learning_rate": 3.9043025845056305e-05, + "loss": 1.4217, + "step": 2076500 + }, + { + "epoch": 21.92, + "learning_rate": 3.904038751754488e-05, + "loss": 1.4323, + "step": 2077000 + }, + { + "epoch": 21.92, + "learning_rate": 3.9037749190033456e-05, + "loss": 1.4246, + "step": 2077500 + }, + { + "epoch": 21.93, + "learning_rate": 3.903511086252203e-05, + "loss": 1.4214, + "step": 2078000 + }, + { + "epoch": 21.94, + "learning_rate": 3.903247253501061e-05, + "loss": 1.3256, + "step": 2078500 + }, + { + "epoch": 21.94, + "learning_rate": 3.902983420749918e-05, + "loss": 1.4434, + "step": 2079000 + }, + { + "epoch": 21.95, + "learning_rate": 3.902719587998776e-05, + "loss": 1.4262, + "step": 2079500 + }, + { + "epoch": 21.95, + "learning_rate": 3.902455755247634e-05, + "loss": 1.395, + "step": 2080000 + }, + { + "epoch": 21.96, + "learning_rate": 3.9021919224964916e-05, + "loss": 1.4113, + "step": 2080500 + }, + { + "epoch": 21.96, + "learning_rate": 3.9019280897453484e-05, + "loss": 1.3949, + "step": 2081000 + }, + { + "epoch": 21.97, + "learning_rate": 3.9016642569942067e-05, + "loss": 1.369, + "step": 2081500 + }, + { + "epoch": 21.97, + "learning_rate": 3.901400424243064e-05, + "loss": 1.3758, + "step": 2082000 + }, + { + "epoch": 21.98, + "learning_rate": 3.901136591491922e-05, + "loss": 1.391, + "step": 2082500 + }, + { + "epoch": 21.98, + "learning_rate": 3.900872758740779e-05, + "loss": 1.3923, + "step": 2083000 + }, + { + "epoch": 21.99, + "learning_rate": 3.900608925989637e-05, + "loss": 1.4035, + "step": 2083500 + }, + { + "epoch": 21.99, + "learning_rate": 3.9003450932384944e-05, + "loss": 1.385, + "step": 2084000 + }, + { + "epoch": 22.0, + "learning_rate": 3.900081260487352e-05, + "loss": 1.396, + "step": 2084500 + }, + { + "epoch": 22.0, + "learning_rate": 3.89981742773621e-05, + "loss": 1.3781, + "step": 2085000 + }, + { + "epoch": 22.01, + "learning_rate": 3.899553594985067e-05, + "loss": 1.3616, + "step": 2085500 + }, + { + "epoch": 22.01, + "learning_rate": 3.8992897622339246e-05, + "loss": 1.359, + "step": 2086000 + }, + { + "epoch": 22.02, + "learning_rate": 3.899025929482782e-05, + "loss": 1.3537, + "step": 2086500 + }, + { + "epoch": 22.02, + "learning_rate": 3.8987620967316404e-05, + "loss": 1.365, + "step": 2087000 + }, + { + "epoch": 22.03, + "learning_rate": 3.898498263980498e-05, + "loss": 1.3927, + "step": 2087500 + }, + { + "epoch": 22.04, + "learning_rate": 3.898234431229355e-05, + "loss": 1.3831, + "step": 2088000 + }, + { + "epoch": 22.04, + "learning_rate": 3.897970598478213e-05, + "loss": 1.3649, + "step": 2088500 + }, + { + "epoch": 22.05, + "learning_rate": 3.8977067657270706e-05, + "loss": 1.38, + "step": 2089000 + }, + { + "epoch": 22.05, + "learning_rate": 3.897442932975928e-05, + "loss": 1.3948, + "step": 2089500 + }, + { + "epoch": 22.06, + "learning_rate": 3.8971791002247857e-05, + "loss": 1.3284, + "step": 2090000 + }, + { + "epoch": 22.06, + "learning_rate": 3.896915267473643e-05, + "loss": 1.3197, + "step": 2090500 + }, + { + "epoch": 22.07, + "learning_rate": 3.896651434722501e-05, + "loss": 1.3589, + "step": 2091000 + }, + { + "epoch": 22.07, + "learning_rate": 3.896387601971358e-05, + "loss": 1.3749, + "step": 2091500 + }, + { + "epoch": 22.08, + "learning_rate": 3.8961237692202165e-05, + "loss": 1.4381, + "step": 2092000 + }, + { + "epoch": 22.08, + "learning_rate": 3.8958599364690734e-05, + "loss": 1.4096, + "step": 2092500 + }, + { + "epoch": 22.09, + "learning_rate": 3.895596103717931e-05, + "loss": 1.3671, + "step": 2093000 + }, + { + "epoch": 22.09, + "learning_rate": 3.895332270966789e-05, + "loss": 1.3691, + "step": 2093500 + }, + { + "epoch": 22.1, + "learning_rate": 3.895068438215647e-05, + "loss": 1.3214, + "step": 2094000 + }, + { + "epoch": 22.1, + "learning_rate": 3.894804605464504e-05, + "loss": 1.3683, + "step": 2094500 + }, + { + "epoch": 22.11, + "learning_rate": 3.894540772713362e-05, + "loss": 1.3764, + "step": 2095000 + }, + { + "epoch": 22.11, + "learning_rate": 3.8942769399622194e-05, + "loss": 1.3364, + "step": 2095500 + }, + { + "epoch": 22.12, + "learning_rate": 3.894013107211077e-05, + "loss": 1.4164, + "step": 2096000 + }, + { + "epoch": 22.13, + "learning_rate": 3.8937492744599345e-05, + "loss": 1.3631, + "step": 2096500 + }, + { + "epoch": 22.13, + "learning_rate": 3.893485441708793e-05, + "loss": 1.3855, + "step": 2097000 + }, + { + "epoch": 22.14, + "learning_rate": 3.8932216089576496e-05, + "loss": 1.3635, + "step": 2097500 + }, + { + "epoch": 22.14, + "learning_rate": 3.892957776206507e-05, + "loss": 1.4046, + "step": 2098000 + }, + { + "epoch": 22.15, + "learning_rate": 3.892693943455365e-05, + "loss": 1.4206, + "step": 2098500 + }, + { + "epoch": 22.15, + "learning_rate": 3.892430110704223e-05, + "loss": 1.3927, + "step": 2099000 + }, + { + "epoch": 22.16, + "learning_rate": 3.8921662779530804e-05, + "loss": 1.4013, + "step": 2099500 + }, + { + "epoch": 22.16, + "learning_rate": 3.891902445201937e-05, + "loss": 1.3648, + "step": 2100000 + }, + { + "epoch": 22.17, + "learning_rate": 3.8916386124507955e-05, + "loss": 1.3769, + "step": 2100500 + }, + { + "epoch": 22.17, + "learning_rate": 3.891374779699653e-05, + "loss": 1.4131, + "step": 2101000 + }, + { + "epoch": 22.18, + "learning_rate": 3.8911109469485106e-05, + "loss": 1.3333, + "step": 2101500 + }, + { + "epoch": 22.18, + "learning_rate": 3.890847114197368e-05, + "loss": 1.3513, + "step": 2102000 + }, + { + "epoch": 22.19, + "learning_rate": 3.890583281446226e-05, + "loss": 1.3955, + "step": 2102500 + }, + { + "epoch": 22.19, + "learning_rate": 3.890319448695083e-05, + "loss": 1.4137, + "step": 2103000 + }, + { + "epoch": 22.2, + "learning_rate": 3.890055615943941e-05, + "loss": 1.3644, + "step": 2103500 + }, + { + "epoch": 22.2, + "learning_rate": 3.889791783192799e-05, + "loss": 1.3727, + "step": 2104000 + }, + { + "epoch": 22.21, + "learning_rate": 3.889527950441656e-05, + "loss": 1.3657, + "step": 2104500 + }, + { + "epoch": 22.21, + "learning_rate": 3.8892641176905135e-05, + "loss": 1.3942, + "step": 2105000 + }, + { + "epoch": 22.22, + "learning_rate": 3.889000284939372e-05, + "loss": 1.3257, + "step": 2105500 + }, + { + "epoch": 22.23, + "learning_rate": 3.888736452188229e-05, + "loss": 1.3718, + "step": 2106000 + }, + { + "epoch": 22.23, + "learning_rate": 3.888472619437087e-05, + "loss": 1.3781, + "step": 2106500 + }, + { + "epoch": 22.24, + "learning_rate": 3.8882087866859443e-05, + "loss": 1.4102, + "step": 2107000 + }, + { + "epoch": 22.24, + "learning_rate": 3.887944953934802e-05, + "loss": 1.3396, + "step": 2107500 + }, + { + "epoch": 22.25, + "learning_rate": 3.8876811211836594e-05, + "loss": 1.4218, + "step": 2108000 + }, + { + "epoch": 22.25, + "learning_rate": 3.887417288432517e-05, + "loss": 1.4143, + "step": 2108500 + }, + { + "epoch": 22.26, + "learning_rate": 3.887153455681375e-05, + "loss": 1.4089, + "step": 2109000 + }, + { + "epoch": 22.26, + "learning_rate": 3.886889622930232e-05, + "loss": 1.345, + "step": 2109500 + }, + { + "epoch": 22.27, + "learning_rate": 3.8866257901790896e-05, + "loss": 1.363, + "step": 2110000 + }, + { + "epoch": 22.27, + "learning_rate": 3.886361957427947e-05, + "loss": 1.4317, + "step": 2110500 + }, + { + "epoch": 22.28, + "learning_rate": 3.8860981246768054e-05, + "loss": 1.4463, + "step": 2111000 + }, + { + "epoch": 22.28, + "learning_rate": 3.885834291925662e-05, + "loss": 1.3745, + "step": 2111500 + }, + { + "epoch": 22.29, + "learning_rate": 3.88557045917452e-05, + "loss": 1.4107, + "step": 2112000 + }, + { + "epoch": 22.29, + "learning_rate": 3.885306626423378e-05, + "loss": 1.4021, + "step": 2112500 + }, + { + "epoch": 22.3, + "learning_rate": 3.8850427936722356e-05, + "loss": 1.4359, + "step": 2113000 + }, + { + "epoch": 22.3, + "learning_rate": 3.884778960921093e-05, + "loss": 1.4052, + "step": 2113500 + }, + { + "epoch": 22.31, + "learning_rate": 3.884515128169951e-05, + "loss": 1.3745, + "step": 2114000 + }, + { + "epoch": 22.31, + "learning_rate": 3.884251295418808e-05, + "loss": 1.4098, + "step": 2114500 + }, + { + "epoch": 22.32, + "learning_rate": 3.883987462667666e-05, + "loss": 1.346, + "step": 2115000 + }, + { + "epoch": 22.33, + "learning_rate": 3.8837236299165234e-05, + "loss": 1.3932, + "step": 2115500 + }, + { + "epoch": 22.33, + "learning_rate": 3.8834597971653816e-05, + "loss": 1.422, + "step": 2116000 + }, + { + "epoch": 22.34, + "learning_rate": 3.8831959644142384e-05, + "loss": 1.4215, + "step": 2116500 + }, + { + "epoch": 22.34, + "learning_rate": 3.882932131663096e-05, + "loss": 1.4134, + "step": 2117000 + }, + { + "epoch": 22.35, + "learning_rate": 3.882668298911954e-05, + "loss": 1.4019, + "step": 2117500 + }, + { + "epoch": 22.35, + "learning_rate": 3.882404466160812e-05, + "loss": 1.3887, + "step": 2118000 + }, + { + "epoch": 22.36, + "learning_rate": 3.882140633409669e-05, + "loss": 1.3917, + "step": 2118500 + }, + { + "epoch": 22.36, + "learning_rate": 3.881876800658527e-05, + "loss": 1.3572, + "step": 2119000 + }, + { + "epoch": 22.37, + "learning_rate": 3.8816129679073844e-05, + "loss": 1.337, + "step": 2119500 + }, + { + "epoch": 22.37, + "learning_rate": 3.881349135156242e-05, + "loss": 1.3807, + "step": 2120000 + }, + { + "epoch": 22.38, + "learning_rate": 3.8810853024050995e-05, + "loss": 1.4321, + "step": 2120500 + }, + { + "epoch": 22.38, + "learning_rate": 3.880821469653957e-05, + "loss": 1.3691, + "step": 2121000 + }, + { + "epoch": 22.39, + "learning_rate": 3.8805576369028146e-05, + "loss": 1.3481, + "step": 2121500 + }, + { + "epoch": 22.39, + "learning_rate": 3.880293804151672e-05, + "loss": 1.3918, + "step": 2122000 + }, + { + "epoch": 22.4, + "learning_rate": 3.88002997140053e-05, + "loss": 1.4121, + "step": 2122500 + }, + { + "epoch": 22.4, + "learning_rate": 3.879766138649388e-05, + "loss": 1.4158, + "step": 2123000 + }, + { + "epoch": 22.41, + "learning_rate": 3.879502305898245e-05, + "loss": 1.3719, + "step": 2123500 + }, + { + "epoch": 22.42, + "learning_rate": 3.8792384731471024e-05, + "loss": 1.4054, + "step": 2124000 + }, + { + "epoch": 22.42, + "learning_rate": 3.8789746403959606e-05, + "loss": 1.3895, + "step": 2124500 + }, + { + "epoch": 22.43, + "learning_rate": 3.878710807644818e-05, + "loss": 1.3731, + "step": 2125000 + }, + { + "epoch": 22.43, + "learning_rate": 3.878446974893676e-05, + "loss": 1.4031, + "step": 2125500 + }, + { + "epoch": 22.44, + "learning_rate": 3.878183142142533e-05, + "loss": 1.3789, + "step": 2126000 + }, + { + "epoch": 22.44, + "learning_rate": 3.877919309391391e-05, + "loss": 1.3357, + "step": 2126500 + }, + { + "epoch": 22.45, + "learning_rate": 3.877655476640248e-05, + "loss": 1.3258, + "step": 2127000 + }, + { + "epoch": 22.45, + "learning_rate": 3.877391643889106e-05, + "loss": 1.4094, + "step": 2127500 + }, + { + "epoch": 22.46, + "learning_rate": 3.877127811137964e-05, + "loss": 1.3487, + "step": 2128000 + }, + { + "epoch": 22.46, + "learning_rate": 3.876863978386821e-05, + "loss": 1.4007, + "step": 2128500 + }, + { + "epoch": 22.47, + "learning_rate": 3.8766001456356785e-05, + "loss": 1.4097, + "step": 2129000 + }, + { + "epoch": 22.47, + "learning_rate": 3.876336312884537e-05, + "loss": 1.3327, + "step": 2129500 + }, + { + "epoch": 22.48, + "learning_rate": 3.876072480133394e-05, + "loss": 1.3239, + "step": 2130000 + }, + { + "epoch": 22.48, + "learning_rate": 3.875808647382251e-05, + "loss": 1.3627, + "step": 2130500 + }, + { + "epoch": 22.49, + "learning_rate": 3.8755448146311094e-05, + "loss": 1.3628, + "step": 2131000 + }, + { + "epoch": 22.49, + "learning_rate": 3.875280981879967e-05, + "loss": 1.3855, + "step": 2131500 + }, + { + "epoch": 22.5, + "learning_rate": 3.8750171491288245e-05, + "loss": 1.4129, + "step": 2132000 + }, + { + "epoch": 22.5, + "learning_rate": 3.874753316377682e-05, + "loss": 1.4109, + "step": 2132500 + }, + { + "epoch": 22.51, + "learning_rate": 3.8744894836265396e-05, + "loss": 1.3777, + "step": 2133000 + }, + { + "epoch": 22.52, + "learning_rate": 3.874225650875397e-05, + "loss": 1.3298, + "step": 2133500 + }, + { + "epoch": 22.52, + "learning_rate": 3.873961818124255e-05, + "loss": 1.4152, + "step": 2134000 + }, + { + "epoch": 22.53, + "learning_rate": 3.873697985373112e-05, + "loss": 1.4411, + "step": 2134500 + }, + { + "epoch": 22.53, + "learning_rate": 3.8734341526219705e-05, + "loss": 1.3461, + "step": 2135000 + }, + { + "epoch": 22.54, + "learning_rate": 3.873170319870827e-05, + "loss": 1.39, + "step": 2135500 + }, + { + "epoch": 22.54, + "learning_rate": 3.872906487119685e-05, + "loss": 1.392, + "step": 2136000 + }, + { + "epoch": 22.55, + "learning_rate": 3.872642654368543e-05, + "loss": 1.3663, + "step": 2136500 + }, + { + "epoch": 22.55, + "learning_rate": 3.8723788216174007e-05, + "loss": 1.3575, + "step": 2137000 + }, + { + "epoch": 22.56, + "learning_rate": 3.872114988866258e-05, + "loss": 1.3611, + "step": 2137500 + }, + { + "epoch": 22.56, + "learning_rate": 3.871851156115116e-05, + "loss": 1.4542, + "step": 2138000 + }, + { + "epoch": 22.57, + "learning_rate": 3.871587323363973e-05, + "loss": 1.4129, + "step": 2138500 + }, + { + "epoch": 22.57, + "learning_rate": 3.871323490612831e-05, + "loss": 1.3629, + "step": 2139000 + }, + { + "epoch": 22.58, + "learning_rate": 3.8710596578616884e-05, + "loss": 1.4065, + "step": 2139500 + }, + { + "epoch": 22.58, + "learning_rate": 3.870795825110546e-05, + "loss": 1.4103, + "step": 2140000 + }, + { + "epoch": 22.59, + "learning_rate": 3.8705319923594035e-05, + "loss": 1.386, + "step": 2140500 + }, + { + "epoch": 22.59, + "learning_rate": 3.870268159608261e-05, + "loss": 1.34, + "step": 2141000 + }, + { + "epoch": 22.6, + "learning_rate": 3.870004326857119e-05, + "loss": 1.4004, + "step": 2141500 + }, + { + "epoch": 22.61, + "learning_rate": 3.869740494105977e-05, + "loss": 1.3937, + "step": 2142000 + }, + { + "epoch": 22.61, + "learning_rate": 3.869476661354834e-05, + "loss": 1.3376, + "step": 2142500 + }, + { + "epoch": 22.62, + "learning_rate": 3.869212828603692e-05, + "loss": 1.3208, + "step": 2143000 + }, + { + "epoch": 22.62, + "learning_rate": 3.8689489958525495e-05, + "loss": 1.3979, + "step": 2143500 + }, + { + "epoch": 22.63, + "learning_rate": 3.868685163101407e-05, + "loss": 1.4188, + "step": 2144000 + }, + { + "epoch": 22.63, + "learning_rate": 3.8684213303502646e-05, + "loss": 1.3614, + "step": 2144500 + }, + { + "epoch": 22.64, + "learning_rate": 3.868157497599122e-05, + "loss": 1.4315, + "step": 2145000 + }, + { + "epoch": 22.64, + "learning_rate": 3.86789366484798e-05, + "loss": 1.3498, + "step": 2145500 + }, + { + "epoch": 22.65, + "learning_rate": 3.867629832096837e-05, + "loss": 1.3904, + "step": 2146000 + }, + { + "epoch": 22.65, + "learning_rate": 3.8673659993456954e-05, + "loss": 1.389, + "step": 2146500 + }, + { + "epoch": 22.66, + "learning_rate": 3.867102166594553e-05, + "loss": 1.3774, + "step": 2147000 + }, + { + "epoch": 22.66, + "learning_rate": 3.86683833384341e-05, + "loss": 1.3809, + "step": 2147500 + }, + { + "epoch": 22.67, + "learning_rate": 3.8665745010922674e-05, + "loss": 1.3985, + "step": 2148000 + }, + { + "epoch": 22.67, + "learning_rate": 3.8663106683411256e-05, + "loss": 1.4532, + "step": 2148500 + }, + { + "epoch": 22.68, + "learning_rate": 3.866046835589983e-05, + "loss": 1.4191, + "step": 2149000 + }, + { + "epoch": 22.68, + "learning_rate": 3.86578300283884e-05, + "loss": 1.3776, + "step": 2149500 + }, + { + "epoch": 22.69, + "learning_rate": 3.865519170087698e-05, + "loss": 1.3557, + "step": 2150000 + }, + { + "epoch": 22.69, + "learning_rate": 3.865255337336556e-05, + "loss": 1.4316, + "step": 2150500 + }, + { + "epoch": 22.7, + "learning_rate": 3.8649915045854134e-05, + "loss": 1.3722, + "step": 2151000 + }, + { + "epoch": 22.71, + "learning_rate": 3.864727671834271e-05, + "loss": 1.3578, + "step": 2151500 + }, + { + "epoch": 22.71, + "learning_rate": 3.8644638390831285e-05, + "loss": 1.3957, + "step": 2152000 + }, + { + "epoch": 22.72, + "learning_rate": 3.864200006331986e-05, + "loss": 1.3812, + "step": 2152500 + }, + { + "epoch": 22.72, + "learning_rate": 3.8639361735808436e-05, + "loss": 1.3718, + "step": 2153000 + }, + { + "epoch": 22.73, + "learning_rate": 3.863672340829702e-05, + "loss": 1.378, + "step": 2153500 + }, + { + "epoch": 22.73, + "learning_rate": 3.8634085080785593e-05, + "loss": 1.4019, + "step": 2154000 + }, + { + "epoch": 22.74, + "learning_rate": 3.863144675327416e-05, + "loss": 1.4038, + "step": 2154500 + }, + { + "epoch": 22.74, + "learning_rate": 3.8628808425762744e-05, + "loss": 1.3565, + "step": 2155000 + }, + { + "epoch": 22.75, + "learning_rate": 3.862617009825132e-05, + "loss": 1.4153, + "step": 2155500 + }, + { + "epoch": 22.75, + "learning_rate": 3.8623531770739895e-05, + "loss": 1.3661, + "step": 2156000 + }, + { + "epoch": 22.76, + "learning_rate": 3.862089344322847e-05, + "loss": 1.376, + "step": 2156500 + }, + { + "epoch": 22.76, + "learning_rate": 3.8618255115717046e-05, + "loss": 1.3722, + "step": 2157000 + }, + { + "epoch": 22.77, + "learning_rate": 3.861561678820562e-05, + "loss": 1.3942, + "step": 2157500 + }, + { + "epoch": 22.77, + "learning_rate": 3.86129784606942e-05, + "loss": 1.4198, + "step": 2158000 + }, + { + "epoch": 22.78, + "learning_rate": 3.861034013318278e-05, + "loss": 1.3766, + "step": 2158500 + }, + { + "epoch": 22.78, + "learning_rate": 3.860770180567135e-05, + "loss": 1.399, + "step": 2159000 + }, + { + "epoch": 22.79, + "learning_rate": 3.8605063478159924e-05, + "loss": 1.4037, + "step": 2159500 + }, + { + "epoch": 22.8, + "learning_rate": 3.86024251506485e-05, + "loss": 1.4365, + "step": 2160000 + }, + { + "epoch": 22.8, + "learning_rate": 3.859978682313708e-05, + "loss": 1.4006, + "step": 2160500 + }, + { + "epoch": 22.81, + "learning_rate": 3.859714849562566e-05, + "loss": 1.3187, + "step": 2161000 + }, + { + "epoch": 22.81, + "learning_rate": 3.8594510168114226e-05, + "loss": 1.3614, + "step": 2161500 + }, + { + "epoch": 22.82, + "learning_rate": 3.859187184060281e-05, + "loss": 1.3766, + "step": 2162000 + }, + { + "epoch": 22.82, + "learning_rate": 3.8589233513091384e-05, + "loss": 1.3964, + "step": 2162500 + }, + { + "epoch": 22.83, + "learning_rate": 3.858659518557996e-05, + "loss": 1.3745, + "step": 2163000 + }, + { + "epoch": 22.83, + "learning_rate": 3.8583956858068535e-05, + "loss": 1.3584, + "step": 2163500 + }, + { + "epoch": 22.84, + "learning_rate": 3.858131853055711e-05, + "loss": 1.392, + "step": 2164000 + }, + { + "epoch": 22.84, + "learning_rate": 3.8578680203045685e-05, + "loss": 1.3489, + "step": 2164500 + }, + { + "epoch": 22.85, + "learning_rate": 3.857604187553426e-05, + "loss": 1.4005, + "step": 2165000 + }, + { + "epoch": 22.85, + "learning_rate": 3.857340354802284e-05, + "loss": 1.3644, + "step": 2165500 + }, + { + "epoch": 22.86, + "learning_rate": 3.857076522051142e-05, + "loss": 1.4116, + "step": 2166000 + }, + { + "epoch": 22.86, + "learning_rate": 3.856812689299999e-05, + "loss": 1.3582, + "step": 2166500 + }, + { + "epoch": 22.87, + "learning_rate": 3.856548856548857e-05, + "loss": 1.3906, + "step": 2167000 + }, + { + "epoch": 22.87, + "learning_rate": 3.8562850237977145e-05, + "loss": 1.3645, + "step": 2167500 + }, + { + "epoch": 22.88, + "learning_rate": 3.856021191046572e-05, + "loss": 1.3256, + "step": 2168000 + }, + { + "epoch": 22.88, + "learning_rate": 3.8557573582954296e-05, + "loss": 1.4385, + "step": 2168500 + }, + { + "epoch": 22.89, + "learning_rate": 3.855493525544287e-05, + "loss": 1.3783, + "step": 2169000 + }, + { + "epoch": 22.9, + "learning_rate": 3.855229692793145e-05, + "loss": 1.3454, + "step": 2169500 + }, + { + "epoch": 22.9, + "learning_rate": 3.854965860042002e-05, + "loss": 1.4553, + "step": 2170000 + }, + { + "epoch": 22.91, + "learning_rate": 3.8547020272908605e-05, + "loss": 1.3766, + "step": 2170500 + }, + { + "epoch": 22.91, + "learning_rate": 3.8544381945397174e-05, + "loss": 1.3774, + "step": 2171000 + }, + { + "epoch": 22.92, + "learning_rate": 3.854174361788575e-05, + "loss": 1.3652, + "step": 2171500 + }, + { + "epoch": 22.92, + "learning_rate": 3.8539105290374325e-05, + "loss": 1.4615, + "step": 2172000 + }, + { + "epoch": 22.93, + "learning_rate": 3.853646696286291e-05, + "loss": 1.3481, + "step": 2172500 + }, + { + "epoch": 22.93, + "learning_rate": 3.853382863535148e-05, + "loss": 1.3796, + "step": 2173000 + }, + { + "epoch": 22.94, + "learning_rate": 3.853119030784005e-05, + "loss": 1.4418, + "step": 2173500 + }, + { + "epoch": 22.94, + "learning_rate": 3.852855198032863e-05, + "loss": 1.3512, + "step": 2174000 + }, + { + "epoch": 22.95, + "learning_rate": 3.852591365281721e-05, + "loss": 1.3985, + "step": 2174500 + }, + { + "epoch": 22.95, + "learning_rate": 3.8523275325305784e-05, + "loss": 1.3921, + "step": 2175000 + }, + { + "epoch": 22.96, + "learning_rate": 3.852063699779436e-05, + "loss": 1.405, + "step": 2175500 + }, + { + "epoch": 22.96, + "learning_rate": 3.8517998670282935e-05, + "loss": 1.4015, + "step": 2176000 + }, + { + "epoch": 22.97, + "learning_rate": 3.851536034277151e-05, + "loss": 1.3738, + "step": 2176500 + }, + { + "epoch": 22.97, + "learning_rate": 3.8512722015260086e-05, + "loss": 1.3145, + "step": 2177000 + }, + { + "epoch": 22.98, + "learning_rate": 3.851008368774867e-05, + "loss": 1.3293, + "step": 2177500 + }, + { + "epoch": 22.99, + "learning_rate": 3.850744536023724e-05, + "loss": 1.4203, + "step": 2178000 + }, + { + "epoch": 22.99, + "learning_rate": 3.850480703272581e-05, + "loss": 1.3633, + "step": 2178500 + }, + { + "epoch": 23.0, + "learning_rate": 3.8502168705214395e-05, + "loss": 1.4084, + "step": 2179000 + }, + { + "epoch": 23.0, + "learning_rate": 3.849953037770297e-05, + "loss": 1.3879, + "step": 2179500 + }, + { + "epoch": 23.01, + "learning_rate": 3.8496892050191546e-05, + "loss": 1.3911, + "step": 2180000 + }, + { + "epoch": 23.01, + "learning_rate": 3.849425372268012e-05, + "loss": 1.3861, + "step": 2180500 + }, + { + "epoch": 23.02, + "learning_rate": 3.84916153951687e-05, + "loss": 1.3439, + "step": 2181000 + }, + { + "epoch": 23.02, + "learning_rate": 3.848897706765727e-05, + "loss": 1.3995, + "step": 2181500 + }, + { + "epoch": 23.03, + "learning_rate": 3.848633874014585e-05, + "loss": 1.3432, + "step": 2182000 + }, + { + "epoch": 23.03, + "learning_rate": 3.848370041263443e-05, + "loss": 1.3438, + "step": 2182500 + }, + { + "epoch": 23.04, + "learning_rate": 3.8481062085123e-05, + "loss": 1.3171, + "step": 2183000 + }, + { + "epoch": 23.04, + "learning_rate": 3.8478423757611574e-05, + "loss": 1.3895, + "step": 2183500 + }, + { + "epoch": 23.05, + "learning_rate": 3.847578543010015e-05, + "loss": 1.3732, + "step": 2184000 + }, + { + "epoch": 23.05, + "learning_rate": 3.847314710258873e-05, + "loss": 1.3664, + "step": 2184500 + }, + { + "epoch": 23.06, + "learning_rate": 3.847050877507731e-05, + "loss": 1.4002, + "step": 2185000 + }, + { + "epoch": 23.06, + "learning_rate": 3.8467870447565876e-05, + "loss": 1.3586, + "step": 2185500 + }, + { + "epoch": 23.07, + "learning_rate": 3.846523212005446e-05, + "loss": 1.3735, + "step": 2186000 + }, + { + "epoch": 23.07, + "learning_rate": 3.8462593792543034e-05, + "loss": 1.3873, + "step": 2186500 + }, + { + "epoch": 23.08, + "learning_rate": 3.845995546503161e-05, + "loss": 1.3196, + "step": 2187000 + }, + { + "epoch": 23.09, + "learning_rate": 3.8457317137520185e-05, + "loss": 1.3597, + "step": 2187500 + }, + { + "epoch": 23.09, + "learning_rate": 3.845467881000876e-05, + "loss": 1.4094, + "step": 2188000 + }, + { + "epoch": 23.1, + "learning_rate": 3.8452040482497336e-05, + "loss": 1.4206, + "step": 2188500 + }, + { + "epoch": 23.1, + "learning_rate": 3.844940215498591e-05, + "loss": 1.3574, + "step": 2189000 + }, + { + "epoch": 23.11, + "learning_rate": 3.8446763827474494e-05, + "loss": 1.3849, + "step": 2189500 + }, + { + "epoch": 23.11, + "learning_rate": 3.844412549996306e-05, + "loss": 1.3598, + "step": 2190000 + }, + { + "epoch": 23.12, + "learning_rate": 3.844148717245164e-05, + "loss": 1.3666, + "step": 2190500 + }, + { + "epoch": 23.12, + "learning_rate": 3.843884884494022e-05, + "loss": 1.4157, + "step": 2191000 + }, + { + "epoch": 23.13, + "learning_rate": 3.8436210517428796e-05, + "loss": 1.3489, + "step": 2191500 + }, + { + "epoch": 23.13, + "learning_rate": 3.843357218991737e-05, + "loss": 1.3402, + "step": 2192000 + }, + { + "epoch": 23.14, + "learning_rate": 3.843093386240595e-05, + "loss": 1.3346, + "step": 2192500 + }, + { + "epoch": 23.14, + "learning_rate": 3.842829553489452e-05, + "loss": 1.3385, + "step": 2193000 + }, + { + "epoch": 23.15, + "learning_rate": 3.84256572073831e-05, + "loss": 1.3511, + "step": 2193500 + }, + { + "epoch": 23.15, + "learning_rate": 3.842301887987167e-05, + "loss": 1.3287, + "step": 2194000 + }, + { + "epoch": 23.16, + "learning_rate": 3.8420380552360255e-05, + "loss": 1.3241, + "step": 2194500 + }, + { + "epoch": 23.16, + "learning_rate": 3.8417742224848824e-05, + "loss": 1.4058, + "step": 2195000 + }, + { + "epoch": 23.17, + "learning_rate": 3.84151038973374e-05, + "loss": 1.4133, + "step": 2195500 + }, + { + "epoch": 23.18, + "learning_rate": 3.8412465569825975e-05, + "loss": 1.3571, + "step": 2196000 + }, + { + "epoch": 23.18, + "learning_rate": 3.840982724231456e-05, + "loss": 1.3674, + "step": 2196500 + }, + { + "epoch": 23.19, + "learning_rate": 3.8407188914803126e-05, + "loss": 1.3575, + "step": 2197000 + }, + { + "epoch": 23.19, + "learning_rate": 3.84045505872917e-05, + "loss": 1.4051, + "step": 2197500 + }, + { + "epoch": 23.2, + "learning_rate": 3.8401912259780284e-05, + "loss": 1.3374, + "step": 2198000 + }, + { + "epoch": 23.2, + "learning_rate": 3.839927393226886e-05, + "loss": 1.3463, + "step": 2198500 + }, + { + "epoch": 23.21, + "learning_rate": 3.8396635604757435e-05, + "loss": 1.3822, + "step": 2199000 + }, + { + "epoch": 23.21, + "learning_rate": 3.839399727724601e-05, + "loss": 1.3736, + "step": 2199500 + }, + { + "epoch": 23.22, + "learning_rate": 3.8391358949734586e-05, + "loss": 1.4278, + "step": 2200000 + }, + { + "epoch": 23.22, + "learning_rate": 3.838872062222316e-05, + "loss": 1.3306, + "step": 2200500 + }, + { + "epoch": 23.23, + "learning_rate": 3.838608229471174e-05, + "loss": 1.3677, + "step": 2201000 + }, + { + "epoch": 23.23, + "learning_rate": 3.838344396720032e-05, + "loss": 1.3634, + "step": 2201500 + }, + { + "epoch": 23.24, + "learning_rate": 3.838080563968889e-05, + "loss": 1.3823, + "step": 2202000 + }, + { + "epoch": 23.24, + "learning_rate": 3.837816731217746e-05, + "loss": 1.3411, + "step": 2202500 + }, + { + "epoch": 23.25, + "learning_rate": 3.8375528984666045e-05, + "loss": 1.4209, + "step": 2203000 + }, + { + "epoch": 23.25, + "learning_rate": 3.837289065715462e-05, + "loss": 1.3392, + "step": 2203500 + }, + { + "epoch": 23.26, + "learning_rate": 3.8370252329643196e-05, + "loss": 1.4029, + "step": 2204000 + }, + { + "epoch": 23.26, + "learning_rate": 3.836761400213177e-05, + "loss": 1.3981, + "step": 2204500 + }, + { + "epoch": 23.27, + "learning_rate": 3.836497567462035e-05, + "loss": 1.4187, + "step": 2205000 + }, + { + "epoch": 23.28, + "learning_rate": 3.836233734710892e-05, + "loss": 1.3895, + "step": 2205500 + }, + { + "epoch": 23.28, + "learning_rate": 3.83596990195975e-05, + "loss": 1.3708, + "step": 2206000 + }, + { + "epoch": 23.29, + "learning_rate": 3.8357060692086074e-05, + "loss": 1.3944, + "step": 2206500 + }, + { + "epoch": 23.29, + "learning_rate": 3.835442236457465e-05, + "loss": 1.409, + "step": 2207000 + }, + { + "epoch": 23.3, + "learning_rate": 3.8351784037063225e-05, + "loss": 1.3874, + "step": 2207500 + }, + { + "epoch": 23.3, + "learning_rate": 3.83491457095518e-05, + "loss": 1.4019, + "step": 2208000 + }, + { + "epoch": 23.31, + "learning_rate": 3.834650738204038e-05, + "loss": 1.3874, + "step": 2208500 + }, + { + "epoch": 23.31, + "learning_rate": 3.834386905452895e-05, + "loss": 1.4264, + "step": 2209000 + }, + { + "epoch": 23.32, + "learning_rate": 3.834123072701753e-05, + "loss": 1.4103, + "step": 2209500 + }, + { + "epoch": 23.32, + "learning_rate": 3.833859239950611e-05, + "loss": 1.3853, + "step": 2210000 + }, + { + "epoch": 23.33, + "learning_rate": 3.8335954071994685e-05, + "loss": 1.3222, + "step": 2210500 + }, + { + "epoch": 23.33, + "learning_rate": 3.833331574448326e-05, + "loss": 1.3446, + "step": 2211000 + }, + { + "epoch": 23.34, + "learning_rate": 3.8330677416971835e-05, + "loss": 1.3975, + "step": 2211500 + }, + { + "epoch": 23.34, + "learning_rate": 3.832803908946041e-05, + "loss": 1.3844, + "step": 2212000 + }, + { + "epoch": 23.35, + "learning_rate": 3.8325400761948986e-05, + "loss": 1.3861, + "step": 2212500 + }, + { + "epoch": 23.35, + "learning_rate": 3.832276243443756e-05, + "loss": 1.3848, + "step": 2213000 + }, + { + "epoch": 23.36, + "learning_rate": 3.8320124106926144e-05, + "loss": 1.3496, + "step": 2213500 + }, + { + "epoch": 23.37, + "learning_rate": 3.831748577941471e-05, + "loss": 1.4074, + "step": 2214000 + }, + { + "epoch": 23.37, + "learning_rate": 3.831484745190329e-05, + "loss": 1.407, + "step": 2214500 + }, + { + "epoch": 23.38, + "learning_rate": 3.831220912439187e-05, + "loss": 1.3482, + "step": 2215000 + }, + { + "epoch": 23.38, + "learning_rate": 3.8309570796880446e-05, + "loss": 1.3845, + "step": 2215500 + }, + { + "epoch": 23.39, + "learning_rate": 3.8306932469369015e-05, + "loss": 1.385, + "step": 2216000 + }, + { + "epoch": 23.39, + "learning_rate": 3.83042941418576e-05, + "loss": 1.3484, + "step": 2216500 + }, + { + "epoch": 23.4, + "learning_rate": 3.830165581434617e-05, + "loss": 1.3774, + "step": 2217000 + }, + { + "epoch": 23.4, + "learning_rate": 3.829901748683475e-05, + "loss": 1.3618, + "step": 2217500 + }, + { + "epoch": 23.41, + "learning_rate": 3.8296379159323324e-05, + "loss": 1.348, + "step": 2218000 + }, + { + "epoch": 23.41, + "learning_rate": 3.82937408318119e-05, + "loss": 1.336, + "step": 2218500 + }, + { + "epoch": 23.42, + "learning_rate": 3.8291102504300475e-05, + "loss": 1.3742, + "step": 2219000 + }, + { + "epoch": 23.42, + "learning_rate": 3.828846417678905e-05, + "loss": 1.3032, + "step": 2219500 + }, + { + "epoch": 23.43, + "learning_rate": 3.828582584927763e-05, + "loss": 1.3695, + "step": 2220000 + }, + { + "epoch": 23.43, + "learning_rate": 3.828318752176621e-05, + "loss": 1.3447, + "step": 2220500 + }, + { + "epoch": 23.44, + "learning_rate": 3.8280549194254777e-05, + "loss": 1.3956, + "step": 2221000 + }, + { + "epoch": 23.44, + "learning_rate": 3.827791086674335e-05, + "loss": 1.3412, + "step": 2221500 + }, + { + "epoch": 23.45, + "learning_rate": 3.8275272539231934e-05, + "loss": 1.3499, + "step": 2222000 + }, + { + "epoch": 23.45, + "learning_rate": 3.827263421172051e-05, + "loss": 1.3614, + "step": 2222500 + }, + { + "epoch": 23.46, + "learning_rate": 3.8269995884209085e-05, + "loss": 1.3847, + "step": 2223000 + }, + { + "epoch": 23.47, + "learning_rate": 3.826735755669766e-05, + "loss": 1.3615, + "step": 2223500 + }, + { + "epoch": 23.47, + "learning_rate": 3.8264719229186236e-05, + "loss": 1.3482, + "step": 2224000 + }, + { + "epoch": 23.48, + "learning_rate": 3.826208090167481e-05, + "loss": 1.3258, + "step": 2224500 + }, + { + "epoch": 23.48, + "learning_rate": 3.825944257416339e-05, + "loss": 1.3799, + "step": 2225000 + }, + { + "epoch": 23.49, + "learning_rate": 3.825680424665196e-05, + "loss": 1.376, + "step": 2225500 + }, + { + "epoch": 23.49, + "learning_rate": 3.825416591914054e-05, + "loss": 1.3863, + "step": 2226000 + }, + { + "epoch": 23.5, + "learning_rate": 3.8251527591629114e-05, + "loss": 1.4004, + "step": 2226500 + }, + { + "epoch": 23.5, + "learning_rate": 3.8248889264117696e-05, + "loss": 1.387, + "step": 2227000 + }, + { + "epoch": 23.51, + "learning_rate": 3.824625093660627e-05, + "loss": 1.3325, + "step": 2227500 + }, + { + "epoch": 23.51, + "learning_rate": 3.824361260909484e-05, + "loss": 1.4159, + "step": 2228000 + }, + { + "epoch": 23.52, + "learning_rate": 3.824097428158342e-05, + "loss": 1.3794, + "step": 2228500 + }, + { + "epoch": 23.52, + "learning_rate": 3.8238335954072e-05, + "loss": 1.3505, + "step": 2229000 + }, + { + "epoch": 23.53, + "learning_rate": 3.823569762656057e-05, + "loss": 1.4141, + "step": 2229500 + }, + { + "epoch": 23.53, + "learning_rate": 3.823305929904915e-05, + "loss": 1.3689, + "step": 2230000 + }, + { + "epoch": 23.54, + "learning_rate": 3.8230420971537724e-05, + "loss": 1.4094, + "step": 2230500 + }, + { + "epoch": 23.54, + "learning_rate": 3.82277826440263e-05, + "loss": 1.3689, + "step": 2231000 + }, + { + "epoch": 23.55, + "learning_rate": 3.8225144316514875e-05, + "loss": 1.3375, + "step": 2231500 + }, + { + "epoch": 23.55, + "learning_rate": 3.822250598900346e-05, + "loss": 1.3547, + "step": 2232000 + }, + { + "epoch": 23.56, + "learning_rate": 3.821986766149203e-05, + "loss": 1.3996, + "step": 2232500 + }, + { + "epoch": 23.57, + "learning_rate": 3.82172293339806e-05, + "loss": 1.4071, + "step": 2233000 + }, + { + "epoch": 23.57, + "learning_rate": 3.821459100646918e-05, + "loss": 1.3272, + "step": 2233500 + }, + { + "epoch": 23.58, + "learning_rate": 3.821195267895776e-05, + "loss": 1.3136, + "step": 2234000 + }, + { + "epoch": 23.58, + "learning_rate": 3.8209314351446335e-05, + "loss": 1.3888, + "step": 2234500 + }, + { + "epoch": 23.59, + "learning_rate": 3.8206676023934904e-05, + "loss": 1.365, + "step": 2235000 + }, + { + "epoch": 23.59, + "learning_rate": 3.8204037696423486e-05, + "loss": 1.396, + "step": 2235500 + }, + { + "epoch": 23.6, + "learning_rate": 3.820139936891206e-05, + "loss": 1.32, + "step": 2236000 + }, + { + "epoch": 23.6, + "learning_rate": 3.819876104140064e-05, + "loss": 1.3607, + "step": 2236500 + }, + { + "epoch": 23.61, + "learning_rate": 3.819612271388921e-05, + "loss": 1.3591, + "step": 2237000 + }, + { + "epoch": 23.61, + "learning_rate": 3.819348438637779e-05, + "loss": 1.3687, + "step": 2237500 + }, + { + "epoch": 23.62, + "learning_rate": 3.8190846058866363e-05, + "loss": 1.3437, + "step": 2238000 + }, + { + "epoch": 23.62, + "learning_rate": 3.818820773135494e-05, + "loss": 1.3831, + "step": 2238500 + }, + { + "epoch": 23.63, + "learning_rate": 3.818556940384352e-05, + "loss": 1.3939, + "step": 2239000 + }, + { + "epoch": 23.63, + "learning_rate": 3.81829310763321e-05, + "loss": 1.3556, + "step": 2239500 + }, + { + "epoch": 23.64, + "learning_rate": 3.8180292748820665e-05, + "loss": 1.3852, + "step": 2240000 + }, + { + "epoch": 23.64, + "learning_rate": 3.817765442130925e-05, + "loss": 1.3836, + "step": 2240500 + }, + { + "epoch": 23.65, + "learning_rate": 3.817501609379782e-05, + "loss": 1.3796, + "step": 2241000 + }, + { + "epoch": 23.66, + "learning_rate": 3.81723777662864e-05, + "loss": 1.4062, + "step": 2241500 + }, + { + "epoch": 23.66, + "learning_rate": 3.8169739438774974e-05, + "loss": 1.3856, + "step": 2242000 + }, + { + "epoch": 23.67, + "learning_rate": 3.816710111126355e-05, + "loss": 1.3635, + "step": 2242500 + }, + { + "epoch": 23.67, + "learning_rate": 3.8164462783752125e-05, + "loss": 1.4058, + "step": 2243000 + }, + { + "epoch": 23.68, + "learning_rate": 3.81618244562407e-05, + "loss": 1.371, + "step": 2243500 + }, + { + "epoch": 23.68, + "learning_rate": 3.815918612872928e-05, + "loss": 1.3858, + "step": 2244000 + }, + { + "epoch": 23.69, + "learning_rate": 3.815654780121785e-05, + "loss": 1.386, + "step": 2244500 + }, + { + "epoch": 23.69, + "learning_rate": 3.815390947370643e-05, + "loss": 1.3541, + "step": 2245000 + }, + { + "epoch": 23.7, + "learning_rate": 3.8151271146195e-05, + "loss": 1.3421, + "step": 2245500 + }, + { + "epoch": 23.7, + "learning_rate": 3.8148632818683585e-05, + "loss": 1.3792, + "step": 2246000 + }, + { + "epoch": 23.71, + "learning_rate": 3.814599449117216e-05, + "loss": 1.3385, + "step": 2246500 + }, + { + "epoch": 23.71, + "learning_rate": 3.814335616366073e-05, + "loss": 1.386, + "step": 2247000 + }, + { + "epoch": 23.72, + "learning_rate": 3.814071783614931e-05, + "loss": 1.4082, + "step": 2247500 + }, + { + "epoch": 23.72, + "learning_rate": 3.813807950863789e-05, + "loss": 1.3834, + "step": 2248000 + }, + { + "epoch": 23.73, + "learning_rate": 3.813544118112646e-05, + "loss": 1.3929, + "step": 2248500 + }, + { + "epoch": 23.73, + "learning_rate": 3.813280285361504e-05, + "loss": 1.3467, + "step": 2249000 + }, + { + "epoch": 23.74, + "learning_rate": 3.813016452610361e-05, + "loss": 1.3891, + "step": 2249500 + }, + { + "epoch": 23.74, + "learning_rate": 3.812752619859219e-05, + "loss": 1.3199, + "step": 2250000 + }, + { + "epoch": 23.75, + "learning_rate": 3.8124887871080764e-05, + "loss": 1.3333, + "step": 2250500 + }, + { + "epoch": 23.76, + "learning_rate": 3.8122249543569346e-05, + "loss": 1.3757, + "step": 2251000 + }, + { + "epoch": 23.76, + "learning_rate": 3.811961121605792e-05, + "loss": 1.3723, + "step": 2251500 + }, + { + "epoch": 23.77, + "learning_rate": 3.811697288854649e-05, + "loss": 1.3288, + "step": 2252000 + }, + { + "epoch": 23.77, + "learning_rate": 3.811433456103507e-05, + "loss": 1.3807, + "step": 2252500 + }, + { + "epoch": 23.78, + "learning_rate": 3.811169623352365e-05, + "loss": 1.4166, + "step": 2253000 + }, + { + "epoch": 23.78, + "learning_rate": 3.8109057906012224e-05, + "loss": 1.3975, + "step": 2253500 + }, + { + "epoch": 23.79, + "learning_rate": 3.81064195785008e-05, + "loss": 1.3406, + "step": 2254000 + }, + { + "epoch": 23.79, + "learning_rate": 3.8103781250989375e-05, + "loss": 1.3712, + "step": 2254500 + }, + { + "epoch": 23.8, + "learning_rate": 3.810114292347795e-05, + "loss": 1.3369, + "step": 2255000 + }, + { + "epoch": 23.8, + "learning_rate": 3.8098504595966526e-05, + "loss": 1.3574, + "step": 2255500 + }, + { + "epoch": 23.81, + "learning_rate": 3.809586626845511e-05, + "loss": 1.3552, + "step": 2256000 + }, + { + "epoch": 23.81, + "learning_rate": 3.809322794094368e-05, + "loss": 1.3622, + "step": 2256500 + }, + { + "epoch": 23.82, + "learning_rate": 3.809058961343225e-05, + "loss": 1.3961, + "step": 2257000 + }, + { + "epoch": 23.82, + "learning_rate": 3.808795128592083e-05, + "loss": 1.4173, + "step": 2257500 + }, + { + "epoch": 23.83, + "learning_rate": 3.808531295840941e-05, + "loss": 1.3407, + "step": 2258000 + }, + { + "epoch": 23.83, + "learning_rate": 3.8082674630897985e-05, + "loss": 1.3753, + "step": 2258500 + }, + { + "epoch": 23.84, + "learning_rate": 3.8080036303386554e-05, + "loss": 1.3137, + "step": 2259000 + }, + { + "epoch": 23.85, + "learning_rate": 3.8077397975875136e-05, + "loss": 1.4283, + "step": 2259500 + }, + { + "epoch": 23.85, + "learning_rate": 3.807475964836371e-05, + "loss": 1.3533, + "step": 2260000 + }, + { + "epoch": 23.86, + "learning_rate": 3.807212132085229e-05, + "loss": 1.3884, + "step": 2260500 + }, + { + "epoch": 23.86, + "learning_rate": 3.806948299334086e-05, + "loss": 1.401, + "step": 2261000 + }, + { + "epoch": 23.87, + "learning_rate": 3.806684466582944e-05, + "loss": 1.4219, + "step": 2261500 + }, + { + "epoch": 23.87, + "learning_rate": 3.8064206338318014e-05, + "loss": 1.377, + "step": 2262000 + }, + { + "epoch": 23.88, + "learning_rate": 3.806156801080659e-05, + "loss": 1.4328, + "step": 2262500 + }, + { + "epoch": 23.88, + "learning_rate": 3.805892968329517e-05, + "loss": 1.3641, + "step": 2263000 + }, + { + "epoch": 23.89, + "learning_rate": 3.805629135578374e-05, + "loss": 1.4016, + "step": 2263500 + }, + { + "epoch": 23.89, + "learning_rate": 3.8053653028272316e-05, + "loss": 1.3978, + "step": 2264000 + }, + { + "epoch": 23.9, + "learning_rate": 3.80510147007609e-05, + "loss": 1.3267, + "step": 2264500 + }, + { + "epoch": 23.9, + "learning_rate": 3.8048376373249474e-05, + "loss": 1.3655, + "step": 2265000 + }, + { + "epoch": 23.91, + "learning_rate": 3.804573804573805e-05, + "loss": 1.35, + "step": 2265500 + }, + { + "epoch": 23.91, + "learning_rate": 3.8043099718226625e-05, + "loss": 1.3881, + "step": 2266000 + }, + { + "epoch": 23.92, + "learning_rate": 3.80404613907152e-05, + "loss": 1.343, + "step": 2266500 + }, + { + "epoch": 23.92, + "learning_rate": 3.8037823063203776e-05, + "loss": 1.3613, + "step": 2267000 + }, + { + "epoch": 23.93, + "learning_rate": 3.803518473569235e-05, + "loss": 1.338, + "step": 2267500 + }, + { + "epoch": 23.93, + "learning_rate": 3.803254640818093e-05, + "loss": 1.3503, + "step": 2268000 + }, + { + "epoch": 23.94, + "learning_rate": 3.80299080806695e-05, + "loss": 1.3712, + "step": 2268500 + }, + { + "epoch": 23.95, + "learning_rate": 3.802726975315808e-05, + "loss": 1.3547, + "step": 2269000 + }, + { + "epoch": 23.95, + "learning_rate": 3.802463142564665e-05, + "loss": 1.3667, + "step": 2269500 + }, + { + "epoch": 23.96, + "learning_rate": 3.8021993098135235e-05, + "loss": 1.345, + "step": 2270000 + }, + { + "epoch": 23.96, + "learning_rate": 3.801935477062381e-05, + "loss": 1.3761, + "step": 2270500 + }, + { + "epoch": 23.97, + "learning_rate": 3.801671644311238e-05, + "loss": 1.3354, + "step": 2271000 + }, + { + "epoch": 23.97, + "learning_rate": 3.801407811560096e-05, + "loss": 1.3733, + "step": 2271500 + }, + { + "epoch": 23.98, + "learning_rate": 3.801143978808954e-05, + "loss": 1.3774, + "step": 2272000 + }, + { + "epoch": 23.98, + "learning_rate": 3.800880146057811e-05, + "loss": 1.3339, + "step": 2272500 + }, + { + "epoch": 23.99, + "learning_rate": 3.800616313306669e-05, + "loss": 1.4039, + "step": 2273000 + }, + { + "epoch": 23.99, + "learning_rate": 3.8003524805555264e-05, + "loss": 1.3974, + "step": 2273500 + }, + { + "epoch": 24.0, + "learning_rate": 3.800088647804384e-05, + "loss": 1.3454, + "step": 2274000 + }, + { + "epoch": 24.0, + "learning_rate": 3.7998248150532415e-05, + "loss": 1.3458, + "step": 2274500 + }, + { + "epoch": 24.01, + "learning_rate": 3.7995609823021e-05, + "loss": 1.4209, + "step": 2275000 + }, + { + "epoch": 24.01, + "learning_rate": 3.7992971495509566e-05, + "loss": 1.3783, + "step": 2275500 + }, + { + "epoch": 24.02, + "learning_rate": 3.799033316799814e-05, + "loss": 1.3879, + "step": 2276000 + }, + { + "epoch": 24.02, + "learning_rate": 3.798769484048672e-05, + "loss": 1.3593, + "step": 2276500 + }, + { + "epoch": 24.03, + "learning_rate": 3.79850565129753e-05, + "loss": 1.4137, + "step": 2277000 + }, + { + "epoch": 24.04, + "learning_rate": 3.7982418185463874e-05, + "loss": 1.4114, + "step": 2277500 + }, + { + "epoch": 24.04, + "learning_rate": 3.797977985795245e-05, + "loss": 1.3551, + "step": 2278000 + }, + { + "epoch": 24.05, + "learning_rate": 3.7977141530441025e-05, + "loss": 1.3305, + "step": 2278500 + }, + { + "epoch": 24.05, + "learning_rate": 3.79745032029296e-05, + "loss": 1.3346, + "step": 2279000 + }, + { + "epoch": 24.06, + "learning_rate": 3.7971864875418176e-05, + "loss": 1.4007, + "step": 2279500 + }, + { + "epoch": 24.06, + "learning_rate": 3.796922654790676e-05, + "loss": 1.297, + "step": 2280000 + }, + { + "epoch": 24.07, + "learning_rate": 3.796658822039533e-05, + "loss": 1.3829, + "step": 2280500 + }, + { + "epoch": 24.07, + "learning_rate": 3.79639498928839e-05, + "loss": 1.3887, + "step": 2281000 + }, + { + "epoch": 24.08, + "learning_rate": 3.796131156537248e-05, + "loss": 1.3508, + "step": 2281500 + }, + { + "epoch": 24.08, + "learning_rate": 3.795867323786106e-05, + "loss": 1.3669, + "step": 2282000 + }, + { + "epoch": 24.09, + "learning_rate": 3.795603491034963e-05, + "loss": 1.3171, + "step": 2282500 + }, + { + "epoch": 24.09, + "learning_rate": 3.7953396582838205e-05, + "loss": 1.3719, + "step": 2283000 + }, + { + "epoch": 24.1, + "learning_rate": 3.795075825532679e-05, + "loss": 1.4377, + "step": 2283500 + }, + { + "epoch": 24.1, + "learning_rate": 3.794811992781536e-05, + "loss": 1.4052, + "step": 2284000 + }, + { + "epoch": 24.11, + "learning_rate": 3.794548160030394e-05, + "loss": 1.335, + "step": 2284500 + }, + { + "epoch": 24.11, + "learning_rate": 3.7942843272792513e-05, + "loss": 1.3502, + "step": 2285000 + }, + { + "epoch": 24.12, + "learning_rate": 3.794020494528109e-05, + "loss": 1.3991, + "step": 2285500 + }, + { + "epoch": 24.12, + "learning_rate": 3.7937566617769664e-05, + "loss": 1.3343, + "step": 2286000 + }, + { + "epoch": 24.13, + "learning_rate": 3.793492829025824e-05, + "loss": 1.3683, + "step": 2286500 + }, + { + "epoch": 24.14, + "learning_rate": 3.793228996274682e-05, + "loss": 1.3602, + "step": 2287000 + }, + { + "epoch": 24.14, + "learning_rate": 3.792965163523539e-05, + "loss": 1.4024, + "step": 2287500 + }, + { + "epoch": 24.15, + "learning_rate": 3.7927013307723966e-05, + "loss": 1.3511, + "step": 2288000 + }, + { + "epoch": 24.15, + "learning_rate": 3.792437498021255e-05, + "loss": 1.3648, + "step": 2288500 + }, + { + "epoch": 24.16, + "learning_rate": 3.7921736652701124e-05, + "loss": 1.3665, + "step": 2289000 + }, + { + "epoch": 24.16, + "learning_rate": 3.79190983251897e-05, + "loss": 1.3529, + "step": 2289500 + }, + { + "epoch": 24.17, + "learning_rate": 3.7916459997678275e-05, + "loss": 1.4358, + "step": 2290000 + }, + { + "epoch": 24.17, + "learning_rate": 3.791382167016685e-05, + "loss": 1.3208, + "step": 2290500 + }, + { + "epoch": 24.18, + "learning_rate": 3.7911183342655426e-05, + "loss": 1.365, + "step": 2291000 + }, + { + "epoch": 24.18, + "learning_rate": 3.7908545015144e-05, + "loss": 1.4259, + "step": 2291500 + }, + { + "epoch": 24.19, + "learning_rate": 3.790590668763258e-05, + "loss": 1.3433, + "step": 2292000 + }, + { + "epoch": 24.19, + "learning_rate": 3.790326836012115e-05, + "loss": 1.3757, + "step": 2292500 + }, + { + "epoch": 24.2, + "learning_rate": 3.790063003260973e-05, + "loss": 1.4053, + "step": 2293000 + }, + { + "epoch": 24.2, + "learning_rate": 3.789799170509831e-05, + "loss": 1.3882, + "step": 2293500 + }, + { + "epoch": 24.21, + "learning_rate": 3.7895353377586886e-05, + "loss": 1.3505, + "step": 2294000 + }, + { + "epoch": 24.21, + "learning_rate": 3.7892715050075454e-05, + "loss": 1.3992, + "step": 2294500 + }, + { + "epoch": 24.22, + "learning_rate": 3.789007672256403e-05, + "loss": 1.3571, + "step": 2295000 + }, + { + "epoch": 24.23, + "learning_rate": 3.788743839505261e-05, + "loss": 1.4264, + "step": 2295500 + }, + { + "epoch": 24.23, + "learning_rate": 3.788480006754119e-05, + "loss": 1.3442, + "step": 2296000 + }, + { + "epoch": 24.24, + "learning_rate": 3.788216174002976e-05, + "loss": 1.3119, + "step": 2296500 + }, + { + "epoch": 24.24, + "learning_rate": 3.787952341251834e-05, + "loss": 1.3099, + "step": 2297000 + }, + { + "epoch": 24.25, + "learning_rate": 3.7876885085006914e-05, + "loss": 1.3482, + "step": 2297500 + }, + { + "epoch": 24.25, + "learning_rate": 3.787424675749549e-05, + "loss": 1.3571, + "step": 2298000 + }, + { + "epoch": 24.26, + "learning_rate": 3.7871608429984065e-05, + "loss": 1.3931, + "step": 2298500 + }, + { + "epoch": 24.26, + "learning_rate": 3.786897010247265e-05, + "loss": 1.3716, + "step": 2299000 + }, + { + "epoch": 24.27, + "learning_rate": 3.7866331774961216e-05, + "loss": 1.336, + "step": 2299500 + }, + { + "epoch": 24.27, + "learning_rate": 3.786369344744979e-05, + "loss": 1.3514, + "step": 2300000 + }, + { + "epoch": 24.28, + "learning_rate": 3.7861055119938374e-05, + "loss": 1.3719, + "step": 2300500 + }, + { + "epoch": 24.28, + "learning_rate": 3.785841679242695e-05, + "loss": 1.4373, + "step": 2301000 + }, + { + "epoch": 24.29, + "learning_rate": 3.785577846491552e-05, + "loss": 1.3997, + "step": 2301500 + }, + { + "epoch": 24.29, + "learning_rate": 3.78531401374041e-05, + "loss": 1.3859, + "step": 2302000 + }, + { + "epoch": 24.3, + "learning_rate": 3.7850501809892676e-05, + "loss": 1.3771, + "step": 2302500 + }, + { + "epoch": 24.3, + "learning_rate": 3.784786348238125e-05, + "loss": 1.4, + "step": 2303000 + }, + { + "epoch": 24.31, + "learning_rate": 3.784522515486983e-05, + "loss": 1.3964, + "step": 2303500 + }, + { + "epoch": 24.31, + "learning_rate": 3.78425868273584e-05, + "loss": 1.3204, + "step": 2304000 + }, + { + "epoch": 24.32, + "learning_rate": 3.783994849984698e-05, + "loss": 1.3601, + "step": 2304500 + }, + { + "epoch": 24.33, + "learning_rate": 3.783731017233555e-05, + "loss": 1.3959, + "step": 2305000 + }, + { + "epoch": 24.33, + "learning_rate": 3.7834671844824136e-05, + "loss": 1.3386, + "step": 2305500 + }, + { + "epoch": 24.34, + "learning_rate": 3.783203351731271e-05, + "loss": 1.3164, + "step": 2306000 + }, + { + "epoch": 24.34, + "learning_rate": 3.782939518980128e-05, + "loss": 1.3321, + "step": 2306500 + }, + { + "epoch": 24.35, + "learning_rate": 3.7826756862289855e-05, + "loss": 1.3189, + "step": 2307000 + }, + { + "epoch": 24.35, + "learning_rate": 3.782411853477844e-05, + "loss": 1.3292, + "step": 2307500 + }, + { + "epoch": 24.36, + "learning_rate": 3.782148020726701e-05, + "loss": 1.3606, + "step": 2308000 + }, + { + "epoch": 24.36, + "learning_rate": 3.781884187975559e-05, + "loss": 1.4157, + "step": 2308500 + }, + { + "epoch": 24.37, + "learning_rate": 3.7816203552244164e-05, + "loss": 1.371, + "step": 2309000 + }, + { + "epoch": 24.37, + "learning_rate": 3.781356522473274e-05, + "loss": 1.3421, + "step": 2309500 + }, + { + "epoch": 24.38, + "learning_rate": 3.7810926897221315e-05, + "loss": 1.345, + "step": 2310000 + }, + { + "epoch": 24.38, + "learning_rate": 3.780828856970989e-05, + "loss": 1.3504, + "step": 2310500 + }, + { + "epoch": 24.39, + "learning_rate": 3.7805650242198466e-05, + "loss": 1.4174, + "step": 2311000 + }, + { + "epoch": 24.39, + "learning_rate": 3.780301191468704e-05, + "loss": 1.3275, + "step": 2311500 + }, + { + "epoch": 24.4, + "learning_rate": 3.780037358717562e-05, + "loss": 1.3835, + "step": 2312000 + }, + { + "epoch": 24.4, + "learning_rate": 3.77977352596642e-05, + "loss": 1.3414, + "step": 2312500 + }, + { + "epoch": 24.41, + "learning_rate": 3.7795096932152775e-05, + "loss": 1.3488, + "step": 2313000 + }, + { + "epoch": 24.42, + "learning_rate": 3.779245860464134e-05, + "loss": 1.299, + "step": 2313500 + }, + { + "epoch": 24.42, + "learning_rate": 3.7789820277129926e-05, + "loss": 1.3496, + "step": 2314000 + }, + { + "epoch": 24.43, + "learning_rate": 3.77871819496185e-05, + "loss": 1.3858, + "step": 2314500 + }, + { + "epoch": 24.43, + "learning_rate": 3.7784543622107077e-05, + "loss": 1.436, + "step": 2315000 + }, + { + "epoch": 24.44, + "learning_rate": 3.778190529459565e-05, + "loss": 1.3539, + "step": 2315500 + }, + { + "epoch": 24.44, + "learning_rate": 3.777926696708423e-05, + "loss": 1.29, + "step": 2316000 + }, + { + "epoch": 24.45, + "learning_rate": 3.77766286395728e-05, + "loss": 1.3948, + "step": 2316500 + }, + { + "epoch": 24.45, + "learning_rate": 3.777399031206138e-05, + "loss": 1.3732, + "step": 2317000 + }, + { + "epoch": 24.46, + "learning_rate": 3.777135198454996e-05, + "loss": 1.3652, + "step": 2317500 + }, + { + "epoch": 24.46, + "learning_rate": 3.7768713657038536e-05, + "loss": 1.3683, + "step": 2318000 + }, + { + "epoch": 24.47, + "learning_rate": 3.7766075329527105e-05, + "loss": 1.3405, + "step": 2318500 + }, + { + "epoch": 24.47, + "learning_rate": 3.776343700201568e-05, + "loss": 1.366, + "step": 2319000 + }, + { + "epoch": 24.48, + "learning_rate": 3.776079867450426e-05, + "loss": 1.374, + "step": 2319500 + }, + { + "epoch": 24.48, + "learning_rate": 3.775816034699284e-05, + "loss": 1.392, + "step": 2320000 + }, + { + "epoch": 24.49, + "learning_rate": 3.775552201948141e-05, + "loss": 1.3753, + "step": 2320500 + }, + { + "epoch": 24.49, + "learning_rate": 3.775288369196999e-05, + "loss": 1.3557, + "step": 2321000 + }, + { + "epoch": 24.5, + "learning_rate": 3.7750245364458565e-05, + "loss": 1.4086, + "step": 2321500 + }, + { + "epoch": 24.5, + "learning_rate": 3.774760703694714e-05, + "loss": 1.3613, + "step": 2322000 + }, + { + "epoch": 24.51, + "learning_rate": 3.7744968709435716e-05, + "loss": 1.3841, + "step": 2322500 + }, + { + "epoch": 24.52, + "learning_rate": 3.774233038192429e-05, + "loss": 1.3519, + "step": 2323000 + }, + { + "epoch": 24.52, + "learning_rate": 3.7739692054412867e-05, + "loss": 1.3858, + "step": 2323500 + }, + { + "epoch": 24.53, + "learning_rate": 3.773705372690144e-05, + "loss": 1.3292, + "step": 2324000 + }, + { + "epoch": 24.53, + "learning_rate": 3.7734415399390024e-05, + "loss": 1.377, + "step": 2324500 + }, + { + "epoch": 24.54, + "learning_rate": 3.77317770718786e-05, + "loss": 1.3622, + "step": 2325000 + }, + { + "epoch": 24.54, + "learning_rate": 3.772913874436717e-05, + "loss": 1.3346, + "step": 2325500 + }, + { + "epoch": 24.55, + "learning_rate": 3.772650041685575e-05, + "loss": 1.3462, + "step": 2326000 + }, + { + "epoch": 24.55, + "learning_rate": 3.7723862089344326e-05, + "loss": 1.3469, + "step": 2326500 + }, + { + "epoch": 24.56, + "learning_rate": 3.77212237618329e-05, + "loss": 1.3469, + "step": 2327000 + }, + { + "epoch": 24.56, + "learning_rate": 3.771858543432148e-05, + "loss": 1.4152, + "step": 2327500 + }, + { + "epoch": 24.57, + "learning_rate": 3.771594710681005e-05, + "loss": 1.3669, + "step": 2328000 + }, + { + "epoch": 24.57, + "learning_rate": 3.771330877929863e-05, + "loss": 1.3266, + "step": 2328500 + }, + { + "epoch": 24.58, + "learning_rate": 3.7710670451787204e-05, + "loss": 1.3668, + "step": 2329000 + }, + { + "epoch": 24.58, + "learning_rate": 3.7708032124275786e-05, + "loss": 1.3871, + "step": 2329500 + }, + { + "epoch": 24.59, + "learning_rate": 3.7705393796764355e-05, + "loss": 1.3464, + "step": 2330000 + }, + { + "epoch": 24.59, + "learning_rate": 3.770275546925293e-05, + "loss": 1.3571, + "step": 2330500 + }, + { + "epoch": 24.6, + "learning_rate": 3.7700117141741506e-05, + "loss": 1.3817, + "step": 2331000 + }, + { + "epoch": 24.61, + "learning_rate": 3.769747881423009e-05, + "loss": 1.3565, + "step": 2331500 + }, + { + "epoch": 24.61, + "learning_rate": 3.7694840486718663e-05, + "loss": 1.3573, + "step": 2332000 + }, + { + "epoch": 24.62, + "learning_rate": 3.769220215920723e-05, + "loss": 1.3939, + "step": 2332500 + }, + { + "epoch": 24.62, + "learning_rate": 3.7689563831695814e-05, + "loss": 1.4119, + "step": 2333000 + }, + { + "epoch": 24.63, + "learning_rate": 3.768692550418439e-05, + "loss": 1.3524, + "step": 2333500 + }, + { + "epoch": 24.63, + "learning_rate": 3.7684287176672965e-05, + "loss": 1.3539, + "step": 2334000 + }, + { + "epoch": 24.64, + "learning_rate": 3.768164884916154e-05, + "loss": 1.3614, + "step": 2334500 + }, + { + "epoch": 24.64, + "learning_rate": 3.7679010521650116e-05, + "loss": 1.3179, + "step": 2335000 + }, + { + "epoch": 24.65, + "learning_rate": 3.767637219413869e-05, + "loss": 1.4013, + "step": 2335500 + }, + { + "epoch": 24.65, + "learning_rate": 3.767373386662727e-05, + "loss": 1.3649, + "step": 2336000 + }, + { + "epoch": 24.66, + "learning_rate": 3.767109553911585e-05, + "loss": 1.3323, + "step": 2336500 + }, + { + "epoch": 24.66, + "learning_rate": 3.766845721160442e-05, + "loss": 1.3686, + "step": 2337000 + }, + { + "epoch": 24.67, + "learning_rate": 3.7665818884092994e-05, + "loss": 1.3854, + "step": 2337500 + }, + { + "epoch": 24.67, + "learning_rate": 3.7663180556581576e-05, + "loss": 1.3663, + "step": 2338000 + }, + { + "epoch": 24.68, + "learning_rate": 3.766054222907015e-05, + "loss": 1.3773, + "step": 2338500 + }, + { + "epoch": 24.68, + "learning_rate": 3.765790390155873e-05, + "loss": 1.3352, + "step": 2339000 + }, + { + "epoch": 24.69, + "learning_rate": 3.76552655740473e-05, + "loss": 1.3573, + "step": 2339500 + }, + { + "epoch": 24.69, + "learning_rate": 3.765262724653588e-05, + "loss": 1.3849, + "step": 2340000 + }, + { + "epoch": 24.7, + "learning_rate": 3.7649988919024453e-05, + "loss": 1.3423, + "step": 2340500 + }, + { + "epoch": 24.71, + "learning_rate": 3.764735059151303e-05, + "loss": 1.3783, + "step": 2341000 + }, + { + "epoch": 24.71, + "learning_rate": 3.764471226400161e-05, + "loss": 1.3755, + "step": 2341500 + }, + { + "epoch": 24.72, + "learning_rate": 3.764207393649018e-05, + "loss": 1.3464, + "step": 2342000 + }, + { + "epoch": 24.72, + "learning_rate": 3.7639435608978755e-05, + "loss": 1.3971, + "step": 2342500 + }, + { + "epoch": 24.73, + "learning_rate": 3.763679728146733e-05, + "loss": 1.3545, + "step": 2343000 + }, + { + "epoch": 24.73, + "learning_rate": 3.763415895395591e-05, + "loss": 1.3789, + "step": 2343500 + }, + { + "epoch": 24.74, + "learning_rate": 3.763152062644449e-05, + "loss": 1.3714, + "step": 2344000 + }, + { + "epoch": 24.74, + "learning_rate": 3.762888229893306e-05, + "loss": 1.3865, + "step": 2344500 + }, + { + "epoch": 24.75, + "learning_rate": 3.762624397142164e-05, + "loss": 1.332, + "step": 2345000 + }, + { + "epoch": 24.75, + "learning_rate": 3.7623605643910215e-05, + "loss": 1.3781, + "step": 2345500 + }, + { + "epoch": 24.76, + "learning_rate": 3.762096731639879e-05, + "loss": 1.3107, + "step": 2346000 + }, + { + "epoch": 24.76, + "learning_rate": 3.7618328988887366e-05, + "loss": 1.3902, + "step": 2346500 + }, + { + "epoch": 24.77, + "learning_rate": 3.761569066137594e-05, + "loss": 1.3229, + "step": 2347000 + }, + { + "epoch": 24.77, + "learning_rate": 3.761305233386452e-05, + "loss": 1.409, + "step": 2347500 + }, + { + "epoch": 24.78, + "learning_rate": 3.761041400635309e-05, + "loss": 1.397, + "step": 2348000 + }, + { + "epoch": 24.78, + "learning_rate": 3.7607775678841675e-05, + "loss": 1.3586, + "step": 2348500 + }, + { + "epoch": 24.79, + "learning_rate": 3.7605137351330244e-05, + "loss": 1.3384, + "step": 2349000 + }, + { + "epoch": 24.8, + "learning_rate": 3.760249902381882e-05, + "loss": 1.3453, + "step": 2349500 + }, + { + "epoch": 24.8, + "learning_rate": 3.75998606963074e-05, + "loss": 1.3458, + "step": 2350000 + }, + { + "epoch": 24.81, + "learning_rate": 3.759722236879598e-05, + "loss": 1.4208, + "step": 2350500 + }, + { + "epoch": 24.81, + "learning_rate": 3.759458404128455e-05, + "loss": 1.3412, + "step": 2351000 + }, + { + "epoch": 24.82, + "learning_rate": 3.759194571377313e-05, + "loss": 1.3787, + "step": 2351500 + }, + { + "epoch": 24.82, + "learning_rate": 3.75893073862617e-05, + "loss": 1.3814, + "step": 2352000 + }, + { + "epoch": 24.83, + "learning_rate": 3.758666905875028e-05, + "loss": 1.3901, + "step": 2352500 + }, + { + "epoch": 24.83, + "learning_rate": 3.7584030731238854e-05, + "loss": 1.3826, + "step": 2353000 + }, + { + "epoch": 24.84, + "learning_rate": 3.7581392403727436e-05, + "loss": 1.3431, + "step": 2353500 + }, + { + "epoch": 24.84, + "learning_rate": 3.7578754076216005e-05, + "loss": 1.344, + "step": 2354000 + }, + { + "epoch": 24.85, + "learning_rate": 3.757611574870458e-05, + "loss": 1.3376, + "step": 2354500 + }, + { + "epoch": 24.85, + "learning_rate": 3.7573477421193156e-05, + "loss": 1.3964, + "step": 2355000 + }, + { + "epoch": 24.86, + "learning_rate": 3.757083909368174e-05, + "loss": 1.4085, + "step": 2355500 + }, + { + "epoch": 24.86, + "learning_rate": 3.756820076617031e-05, + "loss": 1.3868, + "step": 2356000 + }, + { + "epoch": 24.87, + "learning_rate": 3.756556243865888e-05, + "loss": 1.3089, + "step": 2356500 + }, + { + "epoch": 24.87, + "learning_rate": 3.7562924111147465e-05, + "loss": 1.3324, + "step": 2357000 + }, + { + "epoch": 24.88, + "learning_rate": 3.756028578363604e-05, + "loss": 1.4052, + "step": 2357500 + }, + { + "epoch": 24.88, + "learning_rate": 3.7557647456124616e-05, + "loss": 1.4208, + "step": 2358000 + }, + { + "epoch": 24.89, + "learning_rate": 3.755500912861319e-05, + "loss": 1.4041, + "step": 2358500 + }, + { + "epoch": 24.9, + "learning_rate": 3.755237080110177e-05, + "loss": 1.3417, + "step": 2359000 + }, + { + "epoch": 24.9, + "learning_rate": 3.754973247359034e-05, + "loss": 1.3781, + "step": 2359500 + }, + { + "epoch": 24.91, + "learning_rate": 3.754709414607892e-05, + "loss": 1.3519, + "step": 2360000 + }, + { + "epoch": 24.91, + "learning_rate": 3.75444558185675e-05, + "loss": 1.3492, + "step": 2360500 + }, + { + "epoch": 24.92, + "learning_rate": 3.754181749105607e-05, + "loss": 1.3572, + "step": 2361000 + }, + { + "epoch": 24.92, + "learning_rate": 3.7539179163544644e-05, + "loss": 1.3764, + "step": 2361500 + }, + { + "epoch": 24.93, + "learning_rate": 3.7536540836033227e-05, + "loss": 1.3535, + "step": 2362000 + }, + { + "epoch": 24.93, + "learning_rate": 3.75339025085218e-05, + "loss": 1.3433, + "step": 2362500 + }, + { + "epoch": 24.94, + "learning_rate": 3.753126418101038e-05, + "loss": 1.3574, + "step": 2363000 + }, + { + "epoch": 24.94, + "learning_rate": 3.752862585349895e-05, + "loss": 1.3752, + "step": 2363500 + }, + { + "epoch": 24.95, + "learning_rate": 3.752598752598753e-05, + "loss": 1.3062, + "step": 2364000 + }, + { + "epoch": 24.95, + "learning_rate": 3.7523349198476104e-05, + "loss": 1.3521, + "step": 2364500 + }, + { + "epoch": 24.96, + "learning_rate": 3.752071087096468e-05, + "loss": 1.3714, + "step": 2365000 + }, + { + "epoch": 24.96, + "learning_rate": 3.7518072543453255e-05, + "loss": 1.3816, + "step": 2365500 + }, + { + "epoch": 24.97, + "learning_rate": 3.751543421594183e-05, + "loss": 1.3608, + "step": 2366000 + }, + { + "epoch": 24.97, + "learning_rate": 3.7512795888430406e-05, + "loss": 1.4104, + "step": 2366500 + }, + { + "epoch": 24.98, + "learning_rate": 3.751015756091898e-05, + "loss": 1.3264, + "step": 2367000 + }, + { + "epoch": 24.98, + "learning_rate": 3.7507519233407564e-05, + "loss": 1.4156, + "step": 2367500 + }, + { + "epoch": 24.99, + "learning_rate": 3.750488090589613e-05, + "loss": 1.3883, + "step": 2368000 + }, + { + "epoch": 25.0, + "learning_rate": 3.750224257838471e-05, + "loss": 1.3332, + "step": 2368500 + }, + { + "epoch": 25.0, + "learning_rate": 3.749960425087329e-05, + "loss": 1.2987, + "step": 2369000 + }, + { + "epoch": 25.01, + "learning_rate": 3.7496965923361866e-05, + "loss": 1.3936, + "step": 2369500 + }, + { + "epoch": 25.01, + "learning_rate": 3.749432759585044e-05, + "loss": 1.3503, + "step": 2370000 + }, + { + "epoch": 25.02, + "learning_rate": 3.749168926833902e-05, + "loss": 1.3202, + "step": 2370500 + }, + { + "epoch": 25.02, + "learning_rate": 3.748905094082759e-05, + "loss": 1.3449, + "step": 2371000 + }, + { + "epoch": 25.03, + "learning_rate": 3.748641261331617e-05, + "loss": 1.4357, + "step": 2371500 + }, + { + "epoch": 25.03, + "learning_rate": 3.748377428580474e-05, + "loss": 1.3287, + "step": 2372000 + }, + { + "epoch": 25.04, + "learning_rate": 3.7481135958293325e-05, + "loss": 1.3781, + "step": 2372500 + }, + { + "epoch": 25.04, + "learning_rate": 3.7478497630781894e-05, + "loss": 1.3817, + "step": 2373000 + }, + { + "epoch": 25.05, + "learning_rate": 3.747585930327047e-05, + "loss": 1.3709, + "step": 2373500 + }, + { + "epoch": 25.05, + "learning_rate": 3.747322097575905e-05, + "loss": 1.3895, + "step": 2374000 + }, + { + "epoch": 25.06, + "learning_rate": 3.747058264824763e-05, + "loss": 1.2933, + "step": 2374500 + }, + { + "epoch": 25.06, + "learning_rate": 3.7467944320736196e-05, + "loss": 1.3555, + "step": 2375000 + }, + { + "epoch": 25.07, + "learning_rate": 3.746530599322478e-05, + "loss": 1.373, + "step": 2375500 + }, + { + "epoch": 25.07, + "learning_rate": 3.7462667665713354e-05, + "loss": 1.3368, + "step": 2376000 + }, + { + "epoch": 25.08, + "learning_rate": 3.746002933820193e-05, + "loss": 1.3493, + "step": 2376500 + }, + { + "epoch": 25.09, + "learning_rate": 3.7457391010690505e-05, + "loss": 1.3549, + "step": 2377000 + }, + { + "epoch": 25.09, + "learning_rate": 3.745475268317908e-05, + "loss": 1.4076, + "step": 2377500 + }, + { + "epoch": 25.1, + "learning_rate": 3.7452114355667656e-05, + "loss": 1.4236, + "step": 2378000 + }, + { + "epoch": 25.1, + "learning_rate": 3.744947602815623e-05, + "loss": 1.3479, + "step": 2378500 + }, + { + "epoch": 25.11, + "learning_rate": 3.7446837700644813e-05, + "loss": 1.3641, + "step": 2379000 + }, + { + "epoch": 25.11, + "learning_rate": 3.744419937313339e-05, + "loss": 1.3709, + "step": 2379500 + }, + { + "epoch": 25.12, + "learning_rate": 3.744156104562196e-05, + "loss": 1.3516, + "step": 2380000 + }, + { + "epoch": 25.12, + "learning_rate": 3.743892271811053e-05, + "loss": 1.365, + "step": 2380500 + }, + { + "epoch": 25.13, + "learning_rate": 3.7436284390599115e-05, + "loss": 1.3616, + "step": 2381000 + }, + { + "epoch": 25.13, + "learning_rate": 3.743364606308769e-05, + "loss": 1.363, + "step": 2381500 + }, + { + "epoch": 25.14, + "learning_rate": 3.7431007735576266e-05, + "loss": 1.3488, + "step": 2382000 + }, + { + "epoch": 25.14, + "learning_rate": 3.742836940806484e-05, + "loss": 1.3455, + "step": 2382500 + }, + { + "epoch": 25.15, + "learning_rate": 3.742573108055342e-05, + "loss": 1.3313, + "step": 2383000 + }, + { + "epoch": 25.15, + "learning_rate": 3.742309275304199e-05, + "loss": 1.3178, + "step": 2383500 + }, + { + "epoch": 25.16, + "learning_rate": 3.742045442553057e-05, + "loss": 1.3769, + "step": 2384000 + }, + { + "epoch": 25.16, + "learning_rate": 3.7417816098019144e-05, + "loss": 1.3725, + "step": 2384500 + }, + { + "epoch": 25.17, + "learning_rate": 3.741517777050772e-05, + "loss": 1.3334, + "step": 2385000 + }, + { + "epoch": 25.17, + "learning_rate": 3.7412539442996295e-05, + "loss": 1.3056, + "step": 2385500 + }, + { + "epoch": 25.18, + "learning_rate": 3.740990111548488e-05, + "loss": 1.3694, + "step": 2386000 + }, + { + "epoch": 25.19, + "learning_rate": 3.740726278797345e-05, + "loss": 1.3757, + "step": 2386500 + }, + { + "epoch": 25.19, + "learning_rate": 3.740462446046202e-05, + "loss": 1.3019, + "step": 2387000 + }, + { + "epoch": 25.2, + "learning_rate": 3.7401986132950604e-05, + "loss": 1.3407, + "step": 2387500 + }, + { + "epoch": 25.2, + "learning_rate": 3.739934780543918e-05, + "loss": 1.4105, + "step": 2388000 + }, + { + "epoch": 25.21, + "learning_rate": 3.7396709477927754e-05, + "loss": 1.3754, + "step": 2388500 + }, + { + "epoch": 25.21, + "learning_rate": 3.739407115041633e-05, + "loss": 1.4002, + "step": 2389000 + }, + { + "epoch": 25.22, + "learning_rate": 3.7391432822904905e-05, + "loss": 1.3491, + "step": 2389500 + }, + { + "epoch": 25.22, + "learning_rate": 3.738879449539348e-05, + "loss": 1.3888, + "step": 2390000 + }, + { + "epoch": 25.23, + "learning_rate": 3.7386156167882056e-05, + "loss": 1.3757, + "step": 2390500 + }, + { + "epoch": 25.23, + "learning_rate": 3.738351784037064e-05, + "loss": 1.3612, + "step": 2391000 + }, + { + "epoch": 25.24, + "learning_rate": 3.7380879512859214e-05, + "loss": 1.34, + "step": 2391500 + }, + { + "epoch": 25.24, + "learning_rate": 3.737824118534778e-05, + "loss": 1.3928, + "step": 2392000 + }, + { + "epoch": 25.25, + "learning_rate": 3.737560285783636e-05, + "loss": 1.3215, + "step": 2392500 + }, + { + "epoch": 25.25, + "learning_rate": 3.737296453032494e-05, + "loss": 1.391, + "step": 2393000 + }, + { + "epoch": 25.26, + "learning_rate": 3.7370326202813516e-05, + "loss": 1.3688, + "step": 2393500 + }, + { + "epoch": 25.26, + "learning_rate": 3.7367687875302085e-05, + "loss": 1.3635, + "step": 2394000 + }, + { + "epoch": 25.27, + "learning_rate": 3.736504954779067e-05, + "loss": 1.355, + "step": 2394500 + }, + { + "epoch": 25.28, + "learning_rate": 3.736241122027924e-05, + "loss": 1.3568, + "step": 2395000 + }, + { + "epoch": 25.28, + "learning_rate": 3.735977289276782e-05, + "loss": 1.3318, + "step": 2395500 + }, + { + "epoch": 25.29, + "learning_rate": 3.7357134565256394e-05, + "loss": 1.3466, + "step": 2396000 + }, + { + "epoch": 25.29, + "learning_rate": 3.735449623774497e-05, + "loss": 1.3885, + "step": 2396500 + }, + { + "epoch": 25.3, + "learning_rate": 3.7351857910233545e-05, + "loss": 1.3515, + "step": 2397000 + }, + { + "epoch": 25.3, + "learning_rate": 3.734921958272212e-05, + "loss": 1.3416, + "step": 2397500 + }, + { + "epoch": 25.31, + "learning_rate": 3.73465812552107e-05, + "loss": 1.3666, + "step": 2398000 + }, + { + "epoch": 25.31, + "learning_rate": 3.734394292769928e-05, + "loss": 1.3246, + "step": 2398500 + }, + { + "epoch": 25.32, + "learning_rate": 3.7341304600187846e-05, + "loss": 1.3543, + "step": 2399000 + }, + { + "epoch": 25.32, + "learning_rate": 3.733866627267643e-05, + "loss": 1.3916, + "step": 2399500 + }, + { + "epoch": 25.33, + "learning_rate": 3.7336027945165004e-05, + "loss": 1.395, + "step": 2400000 + }, + { + "epoch": 25.33, + "learning_rate": 3.733338961765358e-05, + "loss": 1.3966, + "step": 2400500 + }, + { + "epoch": 25.34, + "learning_rate": 3.7330751290142155e-05, + "loss": 1.3525, + "step": 2401000 + }, + { + "epoch": 25.34, + "learning_rate": 3.732811296263073e-05, + "loss": 1.3279, + "step": 2401500 + }, + { + "epoch": 25.35, + "learning_rate": 3.7325474635119306e-05, + "loss": 1.4072, + "step": 2402000 + }, + { + "epoch": 25.35, + "learning_rate": 3.732283630760788e-05, + "loss": 1.3323, + "step": 2402500 + }, + { + "epoch": 25.36, + "learning_rate": 3.7320197980096464e-05, + "loss": 1.3832, + "step": 2403000 + }, + { + "epoch": 25.36, + "learning_rate": 3.731755965258503e-05, + "loss": 1.3747, + "step": 2403500 + }, + { + "epoch": 25.37, + "learning_rate": 3.731492132507361e-05, + "loss": 1.3198, + "step": 2404000 + }, + { + "epoch": 25.38, + "learning_rate": 3.7312282997562184e-05, + "loss": 1.3994, + "step": 2404500 + }, + { + "epoch": 25.38, + "learning_rate": 3.7309644670050766e-05, + "loss": 1.4013, + "step": 2405000 + }, + { + "epoch": 25.39, + "learning_rate": 3.730700634253934e-05, + "loss": 1.3923, + "step": 2405500 + }, + { + "epoch": 25.39, + "learning_rate": 3.730436801502791e-05, + "loss": 1.3406, + "step": 2406000 + }, + { + "epoch": 25.4, + "learning_rate": 3.730172968751649e-05, + "loss": 1.3066, + "step": 2406500 + }, + { + "epoch": 25.4, + "learning_rate": 3.729909136000507e-05, + "loss": 1.397, + "step": 2407000 + }, + { + "epoch": 25.41, + "learning_rate": 3.729645303249364e-05, + "loss": 1.3581, + "step": 2407500 + }, + { + "epoch": 25.41, + "learning_rate": 3.729381470498222e-05, + "loss": 1.3965, + "step": 2408000 + }, + { + "epoch": 25.42, + "learning_rate": 3.7291176377470794e-05, + "loss": 1.3734, + "step": 2408500 + }, + { + "epoch": 25.42, + "learning_rate": 3.728853804995937e-05, + "loss": 1.348, + "step": 2409000 + }, + { + "epoch": 25.43, + "learning_rate": 3.7285899722447945e-05, + "loss": 1.339, + "step": 2409500 + }, + { + "epoch": 25.43, + "learning_rate": 3.728326139493653e-05, + "loss": 1.3839, + "step": 2410000 + }, + { + "epoch": 25.44, + "learning_rate": 3.72806230674251e-05, + "loss": 1.3744, + "step": 2410500 + }, + { + "epoch": 25.44, + "learning_rate": 3.727798473991367e-05, + "loss": 1.4064, + "step": 2411000 + }, + { + "epoch": 25.45, + "learning_rate": 3.7275346412402254e-05, + "loss": 1.2999, + "step": 2411500 + }, + { + "epoch": 25.45, + "learning_rate": 3.727270808489083e-05, + "loss": 1.3756, + "step": 2412000 + }, + { + "epoch": 25.46, + "learning_rate": 3.7270069757379405e-05, + "loss": 1.3702, + "step": 2412500 + }, + { + "epoch": 25.47, + "learning_rate": 3.726743142986798e-05, + "loss": 1.3935, + "step": 2413000 + }, + { + "epoch": 25.47, + "learning_rate": 3.7264793102356556e-05, + "loss": 1.3507, + "step": 2413500 + }, + { + "epoch": 25.48, + "learning_rate": 3.726215477484513e-05, + "loss": 1.3585, + "step": 2414000 + }, + { + "epoch": 25.48, + "learning_rate": 3.725951644733371e-05, + "loss": 1.3164, + "step": 2414500 + }, + { + "epoch": 25.49, + "learning_rate": 3.725687811982229e-05, + "loss": 1.344, + "step": 2415000 + }, + { + "epoch": 25.49, + "learning_rate": 3.725423979231086e-05, + "loss": 1.3383, + "step": 2415500 + }, + { + "epoch": 25.5, + "learning_rate": 3.725160146479943e-05, + "loss": 1.3613, + "step": 2416000 + }, + { + "epoch": 25.5, + "learning_rate": 3.724896313728801e-05, + "loss": 1.4194, + "step": 2416500 + }, + { + "epoch": 25.51, + "learning_rate": 3.724632480977659e-05, + "loss": 1.3886, + "step": 2417000 + }, + { + "epoch": 25.51, + "learning_rate": 3.724368648226517e-05, + "loss": 1.3192, + "step": 2417500 + }, + { + "epoch": 25.52, + "learning_rate": 3.7241048154753735e-05, + "loss": 1.3899, + "step": 2418000 + }, + { + "epoch": 25.52, + "learning_rate": 3.723840982724232e-05, + "loss": 1.3755, + "step": 2418500 + }, + { + "epoch": 25.53, + "learning_rate": 3.723577149973089e-05, + "loss": 1.3464, + "step": 2419000 + }, + { + "epoch": 25.53, + "learning_rate": 3.723313317221947e-05, + "loss": 1.341, + "step": 2419500 + }, + { + "epoch": 25.54, + "learning_rate": 3.7230494844708044e-05, + "loss": 1.375, + "step": 2420000 + }, + { + "epoch": 25.54, + "learning_rate": 3.722785651719662e-05, + "loss": 1.3891, + "step": 2420500 + }, + { + "epoch": 25.55, + "learning_rate": 3.7225218189685195e-05, + "loss": 1.3722, + "step": 2421000 + }, + { + "epoch": 25.55, + "learning_rate": 3.722257986217377e-05, + "loss": 1.3554, + "step": 2421500 + }, + { + "epoch": 25.56, + "learning_rate": 3.721994153466235e-05, + "loss": 1.3343, + "step": 2422000 + }, + { + "epoch": 25.57, + "learning_rate": 3.721730320715092e-05, + "loss": 1.3779, + "step": 2422500 + }, + { + "epoch": 25.57, + "learning_rate": 3.72146648796395e-05, + "loss": 1.3528, + "step": 2423000 + }, + { + "epoch": 25.58, + "learning_rate": 3.721202655212808e-05, + "loss": 1.348, + "step": 2423500 + }, + { + "epoch": 25.58, + "learning_rate": 3.7209388224616655e-05, + "loss": 1.3881, + "step": 2424000 + }, + { + "epoch": 25.59, + "learning_rate": 3.720674989710523e-05, + "loss": 1.3869, + "step": 2424500 + }, + { + "epoch": 25.59, + "learning_rate": 3.7204111569593806e-05, + "loss": 1.3665, + "step": 2425000 + }, + { + "epoch": 25.6, + "learning_rate": 3.720147324208238e-05, + "loss": 1.3509, + "step": 2425500 + }, + { + "epoch": 25.6, + "learning_rate": 3.719883491457096e-05, + "loss": 1.3711, + "step": 2426000 + }, + { + "epoch": 25.61, + "learning_rate": 3.719619658705953e-05, + "loss": 1.3374, + "step": 2426500 + }, + { + "epoch": 25.61, + "learning_rate": 3.7193558259548114e-05, + "loss": 1.3733, + "step": 2427000 + }, + { + "epoch": 25.62, + "learning_rate": 3.719091993203668e-05, + "loss": 1.3388, + "step": 2427500 + }, + { + "epoch": 25.62, + "learning_rate": 3.718828160452526e-05, + "loss": 1.4118, + "step": 2428000 + }, + { + "epoch": 25.63, + "learning_rate": 3.7185643277013834e-05, + "loss": 1.3866, + "step": 2428500 + }, + { + "epoch": 25.63, + "learning_rate": 3.7183004949502416e-05, + "loss": 1.3436, + "step": 2429000 + }, + { + "epoch": 25.64, + "learning_rate": 3.718036662199099e-05, + "loss": 1.3448, + "step": 2429500 + }, + { + "epoch": 25.64, + "learning_rate": 3.717772829447956e-05, + "loss": 1.3238, + "step": 2430000 + }, + { + "epoch": 25.65, + "learning_rate": 3.717508996696814e-05, + "loss": 1.3846, + "step": 2430500 + }, + { + "epoch": 25.66, + "learning_rate": 3.717245163945672e-05, + "loss": 1.4025, + "step": 2431000 + }, + { + "epoch": 25.66, + "learning_rate": 3.7169813311945294e-05, + "loss": 1.3441, + "step": 2431500 + }, + { + "epoch": 25.67, + "learning_rate": 3.716717498443387e-05, + "loss": 1.465, + "step": 2432000 + }, + { + "epoch": 25.67, + "learning_rate": 3.7164536656922445e-05, + "loss": 1.3238, + "step": 2432500 + }, + { + "epoch": 25.68, + "learning_rate": 3.716189832941102e-05, + "loss": 1.3157, + "step": 2433000 + }, + { + "epoch": 25.68, + "learning_rate": 3.7159260001899596e-05, + "loss": 1.3808, + "step": 2433500 + }, + { + "epoch": 25.69, + "learning_rate": 3.715662167438818e-05, + "loss": 1.3515, + "step": 2434000 + }, + { + "epoch": 25.69, + "learning_rate": 3.715398334687675e-05, + "loss": 1.3438, + "step": 2434500 + }, + { + "epoch": 25.7, + "learning_rate": 3.715134501936532e-05, + "loss": 1.3936, + "step": 2435000 + }, + { + "epoch": 25.7, + "learning_rate": 3.7148706691853904e-05, + "loss": 1.3786, + "step": 2435500 + }, + { + "epoch": 25.71, + "learning_rate": 3.714606836434248e-05, + "loss": 1.301, + "step": 2436000 + }, + { + "epoch": 25.71, + "learning_rate": 3.7143430036831055e-05, + "loss": 1.3434, + "step": 2436500 + }, + { + "epoch": 25.72, + "learning_rate": 3.714079170931963e-05, + "loss": 1.3468, + "step": 2437000 + }, + { + "epoch": 25.72, + "learning_rate": 3.7138153381808206e-05, + "loss": 1.355, + "step": 2437500 + }, + { + "epoch": 25.73, + "learning_rate": 3.713551505429678e-05, + "loss": 1.3532, + "step": 2438000 + }, + { + "epoch": 25.73, + "learning_rate": 3.713287672678536e-05, + "loss": 1.3144, + "step": 2438500 + }, + { + "epoch": 25.74, + "learning_rate": 3.713023839927394e-05, + "loss": 1.4006, + "step": 2439000 + }, + { + "epoch": 25.74, + "learning_rate": 3.712760007176251e-05, + "loss": 1.3611, + "step": 2439500 + }, + { + "epoch": 25.75, + "learning_rate": 3.7124961744251084e-05, + "loss": 1.3102, + "step": 2440000 + }, + { + "epoch": 25.76, + "learning_rate": 3.712232341673966e-05, + "loss": 1.3429, + "step": 2440500 + }, + { + "epoch": 25.76, + "learning_rate": 3.711968508922824e-05, + "loss": 1.3898, + "step": 2441000 + }, + { + "epoch": 25.77, + "learning_rate": 3.711704676171681e-05, + "loss": 1.3453, + "step": 2441500 + }, + { + "epoch": 25.77, + "learning_rate": 3.7114408434205386e-05, + "loss": 1.3482, + "step": 2442000 + }, + { + "epoch": 25.78, + "learning_rate": 3.711177010669397e-05, + "loss": 1.3192, + "step": 2442500 + }, + { + "epoch": 25.78, + "learning_rate": 3.7109131779182544e-05, + "loss": 1.334, + "step": 2443000 + }, + { + "epoch": 25.79, + "learning_rate": 3.710649345167112e-05, + "loss": 1.4056, + "step": 2443500 + }, + { + "epoch": 25.79, + "learning_rate": 3.7103855124159695e-05, + "loss": 1.3276, + "step": 2444000 + }, + { + "epoch": 25.8, + "learning_rate": 3.710121679664827e-05, + "loss": 1.3188, + "step": 2444500 + }, + { + "epoch": 25.8, + "learning_rate": 3.7098578469136846e-05, + "loss": 1.3418, + "step": 2445000 + }, + { + "epoch": 25.81, + "learning_rate": 3.709594014162542e-05, + "loss": 1.2901, + "step": 2445500 + }, + { + "epoch": 25.81, + "learning_rate": 3.7093301814114e-05, + "loss": 1.3399, + "step": 2446000 + }, + { + "epoch": 25.82, + "learning_rate": 3.709066348660257e-05, + "loss": 1.3667, + "step": 2446500 + }, + { + "epoch": 25.82, + "learning_rate": 3.708802515909115e-05, + "loss": 1.3769, + "step": 2447000 + }, + { + "epoch": 25.83, + "learning_rate": 3.708538683157973e-05, + "loss": 1.3241, + "step": 2447500 + }, + { + "epoch": 25.83, + "learning_rate": 3.7082748504068305e-05, + "loss": 1.3578, + "step": 2448000 + }, + { + "epoch": 25.84, + "learning_rate": 3.708011017655688e-05, + "loss": 1.4072, + "step": 2448500 + }, + { + "epoch": 25.85, + "learning_rate": 3.7077471849045456e-05, + "loss": 1.3517, + "step": 2449000 + }, + { + "epoch": 25.85, + "learning_rate": 3.707483352153403e-05, + "loss": 1.3934, + "step": 2449500 + }, + { + "epoch": 25.86, + "learning_rate": 3.707219519402261e-05, + "loss": 1.3699, + "step": 2450000 + }, + { + "epoch": 25.86, + "learning_rate": 3.706955686651118e-05, + "loss": 1.3378, + "step": 2450500 + }, + { + "epoch": 25.87, + "learning_rate": 3.706691853899976e-05, + "loss": 1.3342, + "step": 2451000 + }, + { + "epoch": 25.87, + "learning_rate": 3.7064280211488334e-05, + "loss": 1.3481, + "step": 2451500 + }, + { + "epoch": 25.88, + "learning_rate": 3.706164188397691e-05, + "loss": 1.3829, + "step": 2452000 + }, + { + "epoch": 25.88, + "learning_rate": 3.705900355646549e-05, + "loss": 1.413, + "step": 2452500 + }, + { + "epoch": 25.89, + "learning_rate": 3.705636522895407e-05, + "loss": 1.3596, + "step": 2453000 + }, + { + "epoch": 25.89, + "learning_rate": 3.7053726901442636e-05, + "loss": 1.3313, + "step": 2453500 + }, + { + "epoch": 25.9, + "learning_rate": 3.705108857393121e-05, + "loss": 1.3197, + "step": 2454000 + }, + { + "epoch": 25.9, + "learning_rate": 3.704845024641979e-05, + "loss": 1.4209, + "step": 2454500 + }, + { + "epoch": 25.91, + "learning_rate": 3.704581191890837e-05, + "loss": 1.3367, + "step": 2455000 + }, + { + "epoch": 25.91, + "learning_rate": 3.7043173591396944e-05, + "loss": 1.3579, + "step": 2455500 + }, + { + "epoch": 25.92, + "learning_rate": 3.704053526388552e-05, + "loss": 1.2887, + "step": 2456000 + }, + { + "epoch": 25.92, + "learning_rate": 3.7037896936374095e-05, + "loss": 1.3794, + "step": 2456500 + }, + { + "epoch": 25.93, + "learning_rate": 3.703525860886267e-05, + "loss": 1.3943, + "step": 2457000 + }, + { + "epoch": 25.93, + "learning_rate": 3.7032620281351246e-05, + "loss": 1.402, + "step": 2457500 + }, + { + "epoch": 25.94, + "learning_rate": 3.702998195383983e-05, + "loss": 1.3507, + "step": 2458000 + }, + { + "epoch": 25.95, + "learning_rate": 3.70273436263284e-05, + "loss": 1.3358, + "step": 2458500 + }, + { + "epoch": 25.95, + "learning_rate": 3.702470529881697e-05, + "loss": 1.3637, + "step": 2459000 + }, + { + "epoch": 25.96, + "learning_rate": 3.7022066971305555e-05, + "loss": 1.3708, + "step": 2459500 + }, + { + "epoch": 25.96, + "learning_rate": 3.701942864379413e-05, + "loss": 1.3969, + "step": 2460000 + }, + { + "epoch": 25.97, + "learning_rate": 3.70167903162827e-05, + "loss": 1.3513, + "step": 2460500 + }, + { + "epoch": 25.97, + "learning_rate": 3.701415198877128e-05, + "loss": 1.3996, + "step": 2461000 + }, + { + "epoch": 25.98, + "learning_rate": 3.701151366125986e-05, + "loss": 1.345, + "step": 2461500 + }, + { + "epoch": 25.98, + "learning_rate": 3.700887533374843e-05, + "loss": 1.3434, + "step": 2462000 + }, + { + "epoch": 25.99, + "learning_rate": 3.700623700623701e-05, + "loss": 1.3277, + "step": 2462500 + }, + { + "epoch": 25.99, + "learning_rate": 3.7003598678725583e-05, + "loss": 1.4212, + "step": 2463000 + }, + { + "epoch": 26.0, + "learning_rate": 3.700096035121416e-05, + "loss": 1.3699, + "step": 2463500 + }, + { + "epoch": 26.0, + "learning_rate": 3.6998322023702734e-05, + "loss": 1.3013, + "step": 2464000 + }, + { + "epoch": 26.01, + "learning_rate": 3.699568369619132e-05, + "loss": 1.3428, + "step": 2464500 + }, + { + "epoch": 26.01, + "learning_rate": 3.699304536867989e-05, + "loss": 1.2574, + "step": 2465000 + }, + { + "epoch": 26.02, + "learning_rate": 3.699040704116846e-05, + "loss": 1.3854, + "step": 2465500 + }, + { + "epoch": 26.02, + "learning_rate": 3.6987768713657036e-05, + "loss": 1.3696, + "step": 2466000 + }, + { + "epoch": 26.03, + "learning_rate": 3.698513038614562e-05, + "loss": 1.3759, + "step": 2466500 + }, + { + "epoch": 26.04, + "learning_rate": 3.6982492058634194e-05, + "loss": 1.3783, + "step": 2467000 + }, + { + "epoch": 26.04, + "learning_rate": 3.697985373112277e-05, + "loss": 1.3063, + "step": 2467500 + }, + { + "epoch": 26.05, + "learning_rate": 3.6977215403611345e-05, + "loss": 1.3347, + "step": 2468000 + }, + { + "epoch": 26.05, + "learning_rate": 3.697457707609992e-05, + "loss": 1.3041, + "step": 2468500 + }, + { + "epoch": 26.06, + "learning_rate": 3.6971938748588496e-05, + "loss": 1.3494, + "step": 2469000 + }, + { + "epoch": 26.06, + "learning_rate": 3.696930042107707e-05, + "loss": 1.3127, + "step": 2469500 + }, + { + "epoch": 26.07, + "learning_rate": 3.696666209356565e-05, + "loss": 1.3913, + "step": 2470000 + }, + { + "epoch": 26.07, + "learning_rate": 3.696402376605422e-05, + "loss": 1.3811, + "step": 2470500 + }, + { + "epoch": 26.08, + "learning_rate": 3.69613854385428e-05, + "loss": 1.343, + "step": 2471000 + }, + { + "epoch": 26.08, + "learning_rate": 3.695874711103138e-05, + "loss": 1.3715, + "step": 2471500 + }, + { + "epoch": 26.09, + "learning_rate": 3.6956108783519956e-05, + "loss": 1.3414, + "step": 2472000 + }, + { + "epoch": 26.09, + "learning_rate": 3.6953470456008524e-05, + "loss": 1.4038, + "step": 2472500 + }, + { + "epoch": 26.1, + "learning_rate": 3.695083212849711e-05, + "loss": 1.4104, + "step": 2473000 + }, + { + "epoch": 26.1, + "learning_rate": 3.694819380098568e-05, + "loss": 1.3027, + "step": 2473500 + }, + { + "epoch": 26.11, + "learning_rate": 3.694555547347426e-05, + "loss": 1.372, + "step": 2474000 + }, + { + "epoch": 26.11, + "learning_rate": 3.694291714596283e-05, + "loss": 1.363, + "step": 2474500 + }, + { + "epoch": 26.12, + "learning_rate": 3.694027881845141e-05, + "loss": 1.3896, + "step": 2475000 + }, + { + "epoch": 26.12, + "learning_rate": 3.6937640490939984e-05, + "loss": 1.3174, + "step": 2475500 + }, + { + "epoch": 26.13, + "learning_rate": 3.693500216342856e-05, + "loss": 1.352, + "step": 2476000 + }, + { + "epoch": 26.14, + "learning_rate": 3.693236383591714e-05, + "loss": 1.3138, + "step": 2476500 + }, + { + "epoch": 26.14, + "learning_rate": 3.692972550840572e-05, + "loss": 1.3225, + "step": 2477000 + }, + { + "epoch": 26.15, + "learning_rate": 3.6927087180894286e-05, + "loss": 1.3482, + "step": 2477500 + }, + { + "epoch": 26.15, + "learning_rate": 3.692444885338286e-05, + "loss": 1.3135, + "step": 2478000 + }, + { + "epoch": 26.16, + "learning_rate": 3.6921810525871444e-05, + "loss": 1.3681, + "step": 2478500 + }, + { + "epoch": 26.16, + "learning_rate": 3.691917219836002e-05, + "loss": 1.321, + "step": 2479000 + }, + { + "epoch": 26.17, + "learning_rate": 3.691653387084859e-05, + "loss": 1.2964, + "step": 2479500 + }, + { + "epoch": 26.17, + "learning_rate": 3.691389554333717e-05, + "loss": 1.3518, + "step": 2480000 + }, + { + "epoch": 26.18, + "learning_rate": 3.6911257215825746e-05, + "loss": 1.3324, + "step": 2480500 + }, + { + "epoch": 26.18, + "learning_rate": 3.690861888831432e-05, + "loss": 1.3562, + "step": 2481000 + }, + { + "epoch": 26.19, + "learning_rate": 3.69059805608029e-05, + "loss": 1.3932, + "step": 2481500 + }, + { + "epoch": 26.19, + "learning_rate": 3.690334223329147e-05, + "loss": 1.3351, + "step": 2482000 + }, + { + "epoch": 26.2, + "learning_rate": 3.690070390578005e-05, + "loss": 1.3524, + "step": 2482500 + }, + { + "epoch": 26.2, + "learning_rate": 3.689806557826862e-05, + "loss": 1.3658, + "step": 2483000 + }, + { + "epoch": 26.21, + "learning_rate": 3.6895427250757205e-05, + "loss": 1.3733, + "step": 2483500 + }, + { + "epoch": 26.21, + "learning_rate": 3.689278892324578e-05, + "loss": 1.3367, + "step": 2484000 + }, + { + "epoch": 26.22, + "learning_rate": 3.689015059573435e-05, + "loss": 1.3325, + "step": 2484500 + }, + { + "epoch": 26.22, + "learning_rate": 3.688751226822293e-05, + "loss": 1.4001, + "step": 2485000 + }, + { + "epoch": 26.23, + "learning_rate": 3.688487394071151e-05, + "loss": 1.3741, + "step": 2485500 + }, + { + "epoch": 26.24, + "learning_rate": 3.688223561320008e-05, + "loss": 1.3429, + "step": 2486000 + }, + { + "epoch": 26.24, + "learning_rate": 3.687959728568866e-05, + "loss": 1.3491, + "step": 2486500 + }, + { + "epoch": 26.25, + "learning_rate": 3.6876958958177234e-05, + "loss": 1.3686, + "step": 2487000 + }, + { + "epoch": 26.25, + "learning_rate": 3.687432063066581e-05, + "loss": 1.3468, + "step": 2487500 + }, + { + "epoch": 26.26, + "learning_rate": 3.6871682303154385e-05, + "loss": 1.3865, + "step": 2488000 + }, + { + "epoch": 26.26, + "learning_rate": 3.686904397564297e-05, + "loss": 1.3137, + "step": 2488500 + }, + { + "epoch": 26.27, + "learning_rate": 3.6866405648131536e-05, + "loss": 1.3697, + "step": 2489000 + }, + { + "epoch": 26.27, + "learning_rate": 3.686376732062011e-05, + "loss": 1.3388, + "step": 2489500 + }, + { + "epoch": 26.28, + "learning_rate": 3.686112899310869e-05, + "loss": 1.446, + "step": 2490000 + }, + { + "epoch": 26.28, + "learning_rate": 3.685849066559727e-05, + "loss": 1.3146, + "step": 2490500 + }, + { + "epoch": 26.29, + "learning_rate": 3.6855852338085845e-05, + "loss": 1.372, + "step": 2491000 + }, + { + "epoch": 26.29, + "learning_rate": 3.685321401057441e-05, + "loss": 1.3481, + "step": 2491500 + }, + { + "epoch": 26.3, + "learning_rate": 3.6850575683062996e-05, + "loss": 1.337, + "step": 2492000 + }, + { + "epoch": 26.3, + "learning_rate": 3.684793735555157e-05, + "loss": 1.4111, + "step": 2492500 + }, + { + "epoch": 26.31, + "learning_rate": 3.6845299028040147e-05, + "loss": 1.376, + "step": 2493000 + }, + { + "epoch": 26.31, + "learning_rate": 3.684266070052872e-05, + "loss": 1.3541, + "step": 2493500 + }, + { + "epoch": 26.32, + "learning_rate": 3.68400223730173e-05, + "loss": 1.3416, + "step": 2494000 + }, + { + "epoch": 26.33, + "learning_rate": 3.683738404550587e-05, + "loss": 1.3632, + "step": 2494500 + }, + { + "epoch": 26.33, + "learning_rate": 3.683474571799445e-05, + "loss": 1.4298, + "step": 2495000 + }, + { + "epoch": 26.34, + "learning_rate": 3.683210739048303e-05, + "loss": 1.3269, + "step": 2495500 + }, + { + "epoch": 26.34, + "learning_rate": 3.6829469062971606e-05, + "loss": 1.3046, + "step": 2496000 + }, + { + "epoch": 26.35, + "learning_rate": 3.6826830735460175e-05, + "loss": 1.3712, + "step": 2496500 + }, + { + "epoch": 26.35, + "learning_rate": 3.682419240794876e-05, + "loss": 1.3653, + "step": 2497000 + }, + { + "epoch": 26.36, + "learning_rate": 3.682155408043733e-05, + "loss": 1.3432, + "step": 2497500 + }, + { + "epoch": 26.36, + "learning_rate": 3.681891575292591e-05, + "loss": 1.3516, + "step": 2498000 + }, + { + "epoch": 26.37, + "learning_rate": 3.6816277425414484e-05, + "loss": 1.3801, + "step": 2498500 + }, + { + "epoch": 26.37, + "learning_rate": 3.681363909790306e-05, + "loss": 1.3609, + "step": 2499000 + }, + { + "epoch": 26.38, + "learning_rate": 3.6811000770391635e-05, + "loss": 1.4133, + "step": 2499500 + }, + { + "epoch": 26.38, + "learning_rate": 3.680836244288021e-05, + "loss": 1.3771, + "step": 2500000 + }, + { + "epoch": 26.39, + "learning_rate": 3.680572411536879e-05, + "loss": 1.3614, + "step": 2500500 + }, + { + "epoch": 26.39, + "learning_rate": 3.680308578785736e-05, + "loss": 1.3743, + "step": 2501000 + }, + { + "epoch": 26.4, + "learning_rate": 3.6800447460345937e-05, + "loss": 1.3287, + "step": 2501500 + }, + { + "epoch": 26.4, + "learning_rate": 3.679780913283451e-05, + "loss": 1.3114, + "step": 2502000 + }, + { + "epoch": 26.41, + "learning_rate": 3.6795170805323094e-05, + "loss": 1.3986, + "step": 2502500 + }, + { + "epoch": 26.41, + "learning_rate": 3.679253247781167e-05, + "loss": 1.3365, + "step": 2503000 + }, + { + "epoch": 26.42, + "learning_rate": 3.678989415030024e-05, + "loss": 1.3756, + "step": 2503500 + }, + { + "epoch": 26.43, + "learning_rate": 3.678725582278882e-05, + "loss": 1.3526, + "step": 2504000 + }, + { + "epoch": 26.43, + "learning_rate": 3.6784617495277396e-05, + "loss": 1.3316, + "step": 2504500 + }, + { + "epoch": 26.44, + "learning_rate": 3.678197916776597e-05, + "loss": 1.3574, + "step": 2505000 + }, + { + "epoch": 26.44, + "learning_rate": 3.677934084025455e-05, + "loss": 1.3347, + "step": 2505500 + }, + { + "epoch": 26.45, + "learning_rate": 3.677670251274312e-05, + "loss": 1.3611, + "step": 2506000 + }, + { + "epoch": 26.45, + "learning_rate": 3.67740641852317e-05, + "loss": 1.3449, + "step": 2506500 + }, + { + "epoch": 26.46, + "learning_rate": 3.6771425857720274e-05, + "loss": 1.336, + "step": 2507000 + }, + { + "epoch": 26.46, + "learning_rate": 3.6768787530208856e-05, + "loss": 1.3649, + "step": 2507500 + }, + { + "epoch": 26.47, + "learning_rate": 3.6766149202697425e-05, + "loss": 1.325, + "step": 2508000 + }, + { + "epoch": 26.47, + "learning_rate": 3.6763510875186e-05, + "loss": 1.3658, + "step": 2508500 + }, + { + "epoch": 26.48, + "learning_rate": 3.676087254767458e-05, + "loss": 1.3281, + "step": 2509000 + }, + { + "epoch": 26.48, + "learning_rate": 3.675823422016316e-05, + "loss": 1.3505, + "step": 2509500 + }, + { + "epoch": 26.49, + "learning_rate": 3.6755595892651733e-05, + "loss": 1.3867, + "step": 2510000 + }, + { + "epoch": 26.49, + "learning_rate": 3.675295756514031e-05, + "loss": 1.3758, + "step": 2510500 + }, + { + "epoch": 26.5, + "learning_rate": 3.6750319237628884e-05, + "loss": 1.4166, + "step": 2511000 + }, + { + "epoch": 26.5, + "learning_rate": 3.674768091011746e-05, + "loss": 1.3169, + "step": 2511500 + }, + { + "epoch": 26.51, + "learning_rate": 3.6745042582606035e-05, + "loss": 1.3472, + "step": 2512000 + }, + { + "epoch": 26.52, + "learning_rate": 3.674240425509462e-05, + "loss": 1.3334, + "step": 2512500 + }, + { + "epoch": 26.52, + "learning_rate": 3.6739765927583186e-05, + "loss": 1.337, + "step": 2513000 + }, + { + "epoch": 26.53, + "learning_rate": 3.673712760007176e-05, + "loss": 1.3874, + "step": 2513500 + }, + { + "epoch": 26.53, + "learning_rate": 3.673448927256034e-05, + "loss": 1.3847, + "step": 2514000 + }, + { + "epoch": 26.54, + "learning_rate": 3.673185094504892e-05, + "loss": 1.3103, + "step": 2514500 + }, + { + "epoch": 26.54, + "learning_rate": 3.6729212617537495e-05, + "loss": 1.3603, + "step": 2515000 + }, + { + "epoch": 26.55, + "learning_rate": 3.6726574290026064e-05, + "loss": 1.3266, + "step": 2515500 + }, + { + "epoch": 26.55, + "learning_rate": 3.6723935962514646e-05, + "loss": 1.3259, + "step": 2516000 + }, + { + "epoch": 26.56, + "learning_rate": 3.672129763500322e-05, + "loss": 1.3814, + "step": 2516500 + }, + { + "epoch": 26.56, + "learning_rate": 3.67186593074918e-05, + "loss": 1.3537, + "step": 2517000 + }, + { + "epoch": 26.57, + "learning_rate": 3.671602097998037e-05, + "loss": 1.319, + "step": 2517500 + }, + { + "epoch": 26.57, + "learning_rate": 3.671338265246895e-05, + "loss": 1.352, + "step": 2518000 + }, + { + "epoch": 26.58, + "learning_rate": 3.6710744324957523e-05, + "loss": 1.3588, + "step": 2518500 + }, + { + "epoch": 26.58, + "learning_rate": 3.67081059974461e-05, + "loss": 1.3351, + "step": 2519000 + }, + { + "epoch": 26.59, + "learning_rate": 3.670546766993468e-05, + "loss": 1.3651, + "step": 2519500 + }, + { + "epoch": 26.59, + "learning_rate": 3.670282934242325e-05, + "loss": 1.3589, + "step": 2520000 + }, + { + "epoch": 26.6, + "learning_rate": 3.6700191014911825e-05, + "loss": 1.3364, + "step": 2520500 + }, + { + "epoch": 26.6, + "learning_rate": 3.669755268740041e-05, + "loss": 1.3392, + "step": 2521000 + }, + { + "epoch": 26.61, + "learning_rate": 3.669491435988898e-05, + "loss": 1.3422, + "step": 2521500 + }, + { + "epoch": 26.62, + "learning_rate": 3.669227603237756e-05, + "loss": 1.3534, + "step": 2522000 + }, + { + "epoch": 26.62, + "learning_rate": 3.6689637704866134e-05, + "loss": 1.337, + "step": 2522500 + }, + { + "epoch": 26.63, + "learning_rate": 3.668699937735471e-05, + "loss": 1.3222, + "step": 2523000 + }, + { + "epoch": 26.63, + "learning_rate": 3.6684361049843285e-05, + "loss": 1.3742, + "step": 2523500 + }, + { + "epoch": 26.64, + "learning_rate": 3.668172272233186e-05, + "loss": 1.3587, + "step": 2524000 + }, + { + "epoch": 26.64, + "learning_rate": 3.667908439482044e-05, + "loss": 1.3457, + "step": 2524500 + }, + { + "epoch": 26.65, + "learning_rate": 3.667644606730901e-05, + "loss": 1.3334, + "step": 2525000 + }, + { + "epoch": 26.65, + "learning_rate": 3.667380773979759e-05, + "loss": 1.3614, + "step": 2525500 + }, + { + "epoch": 26.66, + "learning_rate": 3.667116941228617e-05, + "loss": 1.3573, + "step": 2526000 + }, + { + "epoch": 26.66, + "learning_rate": 3.6668531084774745e-05, + "loss": 1.3574, + "step": 2526500 + }, + { + "epoch": 26.67, + "learning_rate": 3.6665892757263314e-05, + "loss": 1.3789, + "step": 2527000 + }, + { + "epoch": 26.67, + "learning_rate": 3.666325442975189e-05, + "loss": 1.3829, + "step": 2527500 + }, + { + "epoch": 26.68, + "learning_rate": 3.666061610224047e-05, + "loss": 1.3696, + "step": 2528000 + }, + { + "epoch": 26.68, + "learning_rate": 3.665797777472905e-05, + "loss": 1.3563, + "step": 2528500 + }, + { + "epoch": 26.69, + "learning_rate": 3.665533944721762e-05, + "loss": 1.4009, + "step": 2529000 + }, + { + "epoch": 26.69, + "learning_rate": 3.66527011197062e-05, + "loss": 1.368, + "step": 2529500 + }, + { + "epoch": 26.7, + "learning_rate": 3.665006279219477e-05, + "loss": 1.3582, + "step": 2530000 + }, + { + "epoch": 26.71, + "learning_rate": 3.664742446468335e-05, + "loss": 1.3769, + "step": 2530500 + }, + { + "epoch": 26.71, + "learning_rate": 3.6644786137171924e-05, + "loss": 1.3822, + "step": 2531000 + }, + { + "epoch": 26.72, + "learning_rate": 3.6642147809660506e-05, + "loss": 1.3309, + "step": 2531500 + }, + { + "epoch": 26.72, + "learning_rate": 3.6639509482149075e-05, + "loss": 1.347, + "step": 2532000 + }, + { + "epoch": 26.73, + "learning_rate": 3.663687115463765e-05, + "loss": 1.3956, + "step": 2532500 + }, + { + "epoch": 26.73, + "learning_rate": 3.663423282712623e-05, + "loss": 1.3641, + "step": 2533000 + }, + { + "epoch": 26.74, + "learning_rate": 3.663159449961481e-05, + "loss": 1.3754, + "step": 2533500 + }, + { + "epoch": 26.74, + "learning_rate": 3.6628956172103384e-05, + "loss": 1.3452, + "step": 2534000 + }, + { + "epoch": 26.75, + "learning_rate": 3.662631784459196e-05, + "loss": 1.3523, + "step": 2534500 + }, + { + "epoch": 26.75, + "learning_rate": 3.6623679517080535e-05, + "loss": 1.3408, + "step": 2535000 + }, + { + "epoch": 26.76, + "learning_rate": 3.662104118956911e-05, + "loss": 1.3726, + "step": 2535500 + }, + { + "epoch": 26.76, + "learning_rate": 3.6618402862057686e-05, + "loss": 1.3318, + "step": 2536000 + }, + { + "epoch": 26.77, + "learning_rate": 3.661576453454626e-05, + "loss": 1.3601, + "step": 2536500 + }, + { + "epoch": 26.77, + "learning_rate": 3.661312620703484e-05, + "loss": 1.3809, + "step": 2537000 + }, + { + "epoch": 26.78, + "learning_rate": 3.661048787952341e-05, + "loss": 1.4179, + "step": 2537500 + }, + { + "epoch": 26.78, + "learning_rate": 3.6607849552011995e-05, + "loss": 1.3557, + "step": 2538000 + }, + { + "epoch": 26.79, + "learning_rate": 3.660521122450057e-05, + "loss": 1.299, + "step": 2538500 + }, + { + "epoch": 26.79, + "learning_rate": 3.660257289698914e-05, + "loss": 1.3212, + "step": 2539000 + }, + { + "epoch": 26.8, + "learning_rate": 3.6599934569477714e-05, + "loss": 1.3937, + "step": 2539500 + }, + { + "epoch": 26.81, + "learning_rate": 3.6597296241966297e-05, + "loss": 1.3356, + "step": 2540000 + }, + { + "epoch": 26.81, + "learning_rate": 3.659465791445487e-05, + "loss": 1.3562, + "step": 2540500 + }, + { + "epoch": 26.82, + "learning_rate": 3.659201958694345e-05, + "loss": 1.3936, + "step": 2541000 + }, + { + "epoch": 26.82, + "learning_rate": 3.658938125943202e-05, + "loss": 1.3158, + "step": 2541500 + }, + { + "epoch": 26.83, + "learning_rate": 3.65867429319206e-05, + "loss": 1.3589, + "step": 2542000 + }, + { + "epoch": 26.83, + "learning_rate": 3.6584104604409174e-05, + "loss": 1.3272, + "step": 2542500 + }, + { + "epoch": 26.84, + "learning_rate": 3.658146627689775e-05, + "loss": 1.3258, + "step": 2543000 + }, + { + "epoch": 26.84, + "learning_rate": 3.657882794938633e-05, + "loss": 1.4001, + "step": 2543500 + }, + { + "epoch": 26.85, + "learning_rate": 3.65761896218749e-05, + "loss": 1.3125, + "step": 2544000 + }, + { + "epoch": 26.85, + "learning_rate": 3.6573551294363476e-05, + "loss": 1.3205, + "step": 2544500 + }, + { + "epoch": 26.86, + "learning_rate": 3.657091296685206e-05, + "loss": 1.3596, + "step": 2545000 + }, + { + "epoch": 26.86, + "learning_rate": 3.6568274639340634e-05, + "loss": 1.2935, + "step": 2545500 + }, + { + "epoch": 26.87, + "learning_rate": 3.65656363118292e-05, + "loss": 1.358, + "step": 2546000 + }, + { + "epoch": 26.87, + "learning_rate": 3.6562997984317785e-05, + "loss": 1.3402, + "step": 2546500 + }, + { + "epoch": 26.88, + "learning_rate": 3.656035965680636e-05, + "loss": 1.3539, + "step": 2547000 + }, + { + "epoch": 26.88, + "learning_rate": 3.6557721329294936e-05, + "loss": 1.401, + "step": 2547500 + }, + { + "epoch": 26.89, + "learning_rate": 3.655508300178351e-05, + "loss": 1.3469, + "step": 2548000 + }, + { + "epoch": 26.9, + "learning_rate": 3.6552444674272087e-05, + "loss": 1.3675, + "step": 2548500 + }, + { + "epoch": 26.9, + "learning_rate": 3.654980634676066e-05, + "loss": 1.3576, + "step": 2549000 + }, + { + "epoch": 26.91, + "learning_rate": 3.654716801924924e-05, + "loss": 1.3927, + "step": 2549500 + }, + { + "epoch": 26.91, + "learning_rate": 3.654452969173782e-05, + "loss": 1.3256, + "step": 2550000 + }, + { + "epoch": 26.92, + "learning_rate": 3.6541891364226395e-05, + "loss": 1.3496, + "step": 2550500 + }, + { + "epoch": 26.92, + "learning_rate": 3.6539253036714964e-05, + "loss": 1.3766, + "step": 2551000 + }, + { + "epoch": 26.93, + "learning_rate": 3.653661470920354e-05, + "loss": 1.3899, + "step": 2551500 + }, + { + "epoch": 26.93, + "learning_rate": 3.653397638169212e-05, + "loss": 1.3737, + "step": 2552000 + }, + { + "epoch": 26.94, + "learning_rate": 3.65313380541807e-05, + "loss": 1.3328, + "step": 2552500 + }, + { + "epoch": 26.94, + "learning_rate": 3.652869972666927e-05, + "loss": 1.3138, + "step": 2553000 + }, + { + "epoch": 26.95, + "learning_rate": 3.652606139915785e-05, + "loss": 1.3515, + "step": 2553500 + }, + { + "epoch": 26.95, + "learning_rate": 3.6523423071646424e-05, + "loss": 1.3265, + "step": 2554000 + }, + { + "epoch": 26.96, + "learning_rate": 3.6520784744135e-05, + "loss": 1.333, + "step": 2554500 + }, + { + "epoch": 26.96, + "learning_rate": 3.6518146416623575e-05, + "loss": 1.3473, + "step": 2555000 + }, + { + "epoch": 26.97, + "learning_rate": 3.651550808911215e-05, + "loss": 1.3466, + "step": 2555500 + }, + { + "epoch": 26.97, + "learning_rate": 3.6512869761600726e-05, + "loss": 1.3503, + "step": 2556000 + }, + { + "epoch": 26.98, + "learning_rate": 3.65102314340893e-05, + "loss": 1.3572, + "step": 2556500 + }, + { + "epoch": 26.98, + "learning_rate": 3.6507593106577883e-05, + "loss": 1.4002, + "step": 2557000 + }, + { + "epoch": 26.99, + "learning_rate": 3.650495477906646e-05, + "loss": 1.3546, + "step": 2557500 + }, + { + "epoch": 27.0, + "learning_rate": 3.650231645155503e-05, + "loss": 1.3695, + "step": 2558000 + }, + { + "epoch": 27.0, + "learning_rate": 3.649967812404361e-05, + "loss": 1.3149, + "step": 2558500 + }, + { + "epoch": 27.01, + "learning_rate": 3.6497039796532185e-05, + "loss": 1.352, + "step": 2559000 + }, + { + "epoch": 27.01, + "learning_rate": 3.649440146902076e-05, + "loss": 1.363, + "step": 2559500 + }, + { + "epoch": 27.02, + "learning_rate": 3.6491763141509336e-05, + "loss": 1.318, + "step": 2560000 + }, + { + "epoch": 27.02, + "learning_rate": 3.648912481399791e-05, + "loss": 1.3496, + "step": 2560500 + }, + { + "epoch": 27.03, + "learning_rate": 3.648648648648649e-05, + "loss": 1.3475, + "step": 2561000 + }, + { + "epoch": 27.03, + "learning_rate": 3.648384815897506e-05, + "loss": 1.3555, + "step": 2561500 + }, + { + "epoch": 27.04, + "learning_rate": 3.6481209831463645e-05, + "loss": 1.3269, + "step": 2562000 + }, + { + "epoch": 27.04, + "learning_rate": 3.647857150395222e-05, + "loss": 1.2945, + "step": 2562500 + }, + { + "epoch": 27.05, + "learning_rate": 3.647593317644079e-05, + "loss": 1.3292, + "step": 2563000 + }, + { + "epoch": 27.05, + "learning_rate": 3.6473294848929365e-05, + "loss": 1.3606, + "step": 2563500 + }, + { + "epoch": 27.06, + "learning_rate": 3.647065652141795e-05, + "loss": 1.3701, + "step": 2564000 + }, + { + "epoch": 27.06, + "learning_rate": 3.646801819390652e-05, + "loss": 1.3597, + "step": 2564500 + }, + { + "epoch": 27.07, + "learning_rate": 3.646537986639509e-05, + "loss": 1.3778, + "step": 2565000 + }, + { + "epoch": 27.07, + "learning_rate": 3.6462741538883673e-05, + "loss": 1.3416, + "step": 2565500 + }, + { + "epoch": 27.08, + "learning_rate": 3.646010321137225e-05, + "loss": 1.3355, + "step": 2566000 + }, + { + "epoch": 27.09, + "learning_rate": 3.6457464883860824e-05, + "loss": 1.3466, + "step": 2566500 + }, + { + "epoch": 27.09, + "learning_rate": 3.64548265563494e-05, + "loss": 1.3485, + "step": 2567000 + }, + { + "epoch": 27.1, + "learning_rate": 3.6452188228837975e-05, + "loss": 1.3271, + "step": 2567500 + }, + { + "epoch": 27.1, + "learning_rate": 3.644954990132655e-05, + "loss": 1.3198, + "step": 2568000 + }, + { + "epoch": 27.11, + "learning_rate": 3.6446911573815126e-05, + "loss": 1.3552, + "step": 2568500 + }, + { + "epoch": 27.11, + "learning_rate": 3.644427324630371e-05, + "loss": 1.3554, + "step": 2569000 + }, + { + "epoch": 27.12, + "learning_rate": 3.6441634918792284e-05, + "loss": 1.3759, + "step": 2569500 + }, + { + "epoch": 27.12, + "learning_rate": 3.643899659128085e-05, + "loss": 1.3373, + "step": 2570000 + }, + { + "epoch": 27.13, + "learning_rate": 3.6436358263769435e-05, + "loss": 1.3461, + "step": 2570500 + }, + { + "epoch": 27.13, + "learning_rate": 3.643371993625801e-05, + "loss": 1.3432, + "step": 2571000 + }, + { + "epoch": 27.14, + "learning_rate": 3.6431081608746586e-05, + "loss": 1.3212, + "step": 2571500 + }, + { + "epoch": 27.14, + "learning_rate": 3.642844328123516e-05, + "loss": 1.3775, + "step": 2572000 + }, + { + "epoch": 27.15, + "learning_rate": 3.642580495372374e-05, + "loss": 1.3859, + "step": 2572500 + }, + { + "epoch": 27.15, + "learning_rate": 3.642316662621231e-05, + "loss": 1.3441, + "step": 2573000 + }, + { + "epoch": 27.16, + "learning_rate": 3.642052829870089e-05, + "loss": 1.3635, + "step": 2573500 + }, + { + "epoch": 27.16, + "learning_rate": 3.641788997118947e-05, + "loss": 1.3044, + "step": 2574000 + }, + { + "epoch": 27.17, + "learning_rate": 3.641525164367804e-05, + "loss": 1.3501, + "step": 2574500 + }, + { + "epoch": 27.17, + "learning_rate": 3.6412613316166615e-05, + "loss": 1.2977, + "step": 2575000 + }, + { + "epoch": 27.18, + "learning_rate": 3.640997498865519e-05, + "loss": 1.3371, + "step": 2575500 + }, + { + "epoch": 27.19, + "learning_rate": 3.640733666114377e-05, + "loss": 1.3444, + "step": 2576000 + }, + { + "epoch": 27.19, + "learning_rate": 3.640469833363235e-05, + "loss": 1.2901, + "step": 2576500 + }, + { + "epoch": 27.2, + "learning_rate": 3.6402060006120916e-05, + "loss": 1.3477, + "step": 2577000 + }, + { + "epoch": 27.2, + "learning_rate": 3.63994216786095e-05, + "loss": 1.3969, + "step": 2577500 + }, + { + "epoch": 27.21, + "learning_rate": 3.6396783351098074e-05, + "loss": 1.3177, + "step": 2578000 + }, + { + "epoch": 27.21, + "learning_rate": 3.639414502358665e-05, + "loss": 1.3732, + "step": 2578500 + }, + { + "epoch": 27.22, + "learning_rate": 3.6391506696075225e-05, + "loss": 1.3269, + "step": 2579000 + }, + { + "epoch": 27.22, + "learning_rate": 3.63888683685638e-05, + "loss": 1.3163, + "step": 2579500 + }, + { + "epoch": 27.23, + "learning_rate": 3.6386230041052376e-05, + "loss": 1.3143, + "step": 2580000 + }, + { + "epoch": 27.23, + "learning_rate": 3.638359171354095e-05, + "loss": 1.3698, + "step": 2580500 + }, + { + "epoch": 27.24, + "learning_rate": 3.6380953386029534e-05, + "loss": 1.3559, + "step": 2581000 + }, + { + "epoch": 27.24, + "learning_rate": 3.63783150585181e-05, + "loss": 1.3922, + "step": 2581500 + }, + { + "epoch": 27.25, + "learning_rate": 3.637567673100668e-05, + "loss": 1.3195, + "step": 2582000 + }, + { + "epoch": 27.25, + "learning_rate": 3.637303840349526e-05, + "loss": 1.3318, + "step": 2582500 + }, + { + "epoch": 27.26, + "learning_rate": 3.6370400075983836e-05, + "loss": 1.3864, + "step": 2583000 + }, + { + "epoch": 27.26, + "learning_rate": 3.636776174847241e-05, + "loss": 1.3151, + "step": 2583500 + }, + { + "epoch": 27.27, + "learning_rate": 3.636512342096099e-05, + "loss": 1.3536, + "step": 2584000 + }, + { + "epoch": 27.28, + "learning_rate": 3.636248509344956e-05, + "loss": 1.3935, + "step": 2584500 + }, + { + "epoch": 27.28, + "learning_rate": 3.635984676593814e-05, + "loss": 1.3547, + "step": 2585000 + }, + { + "epoch": 27.29, + "learning_rate": 3.635720843842671e-05, + "loss": 1.3806, + "step": 2585500 + }, + { + "epoch": 27.29, + "learning_rate": 3.6354570110915296e-05, + "loss": 1.3637, + "step": 2586000 + }, + { + "epoch": 27.3, + "learning_rate": 3.6351931783403864e-05, + "loss": 1.3019, + "step": 2586500 + }, + { + "epoch": 27.3, + "learning_rate": 3.634929345589244e-05, + "loss": 1.301, + "step": 2587000 + }, + { + "epoch": 27.31, + "learning_rate": 3.6346655128381015e-05, + "loss": 1.3738, + "step": 2587500 + }, + { + "epoch": 27.31, + "learning_rate": 3.63440168008696e-05, + "loss": 1.4064, + "step": 2588000 + }, + { + "epoch": 27.32, + "learning_rate": 3.634137847335817e-05, + "loss": 1.355, + "step": 2588500 + }, + { + "epoch": 27.32, + "learning_rate": 3.633874014584674e-05, + "loss": 1.3365, + "step": 2589000 + }, + { + "epoch": 27.33, + "learning_rate": 3.6336101818335324e-05, + "loss": 1.3379, + "step": 2589500 + }, + { + "epoch": 27.33, + "learning_rate": 3.63334634908239e-05, + "loss": 1.3369, + "step": 2590000 + }, + { + "epoch": 27.34, + "learning_rate": 3.6330825163312475e-05, + "loss": 1.3956, + "step": 2590500 + }, + { + "epoch": 27.34, + "learning_rate": 3.632818683580105e-05, + "loss": 1.3774, + "step": 2591000 + }, + { + "epoch": 27.35, + "learning_rate": 3.6325548508289626e-05, + "loss": 1.3899, + "step": 2591500 + }, + { + "epoch": 27.35, + "learning_rate": 3.63229101807782e-05, + "loss": 1.3468, + "step": 2592000 + }, + { + "epoch": 27.36, + "learning_rate": 3.632027185326678e-05, + "loss": 1.3451, + "step": 2592500 + }, + { + "epoch": 27.36, + "learning_rate": 3.631763352575536e-05, + "loss": 1.3487, + "step": 2593000 + }, + { + "epoch": 27.37, + "learning_rate": 3.631499519824393e-05, + "loss": 1.3564, + "step": 2593500 + }, + { + "epoch": 27.38, + "learning_rate": 3.63123568707325e-05, + "loss": 1.3334, + "step": 2594000 + }, + { + "epoch": 27.38, + "learning_rate": 3.6309718543221086e-05, + "loss": 1.3306, + "step": 2594500 + }, + { + "epoch": 27.39, + "learning_rate": 3.630708021570966e-05, + "loss": 1.3452, + "step": 2595000 + }, + { + "epoch": 27.39, + "learning_rate": 3.6304441888198237e-05, + "loss": 1.3637, + "step": 2595500 + }, + { + "epoch": 27.4, + "learning_rate": 3.630180356068681e-05, + "loss": 1.3055, + "step": 2596000 + }, + { + "epoch": 27.4, + "learning_rate": 3.629916523317539e-05, + "loss": 1.3603, + "step": 2596500 + }, + { + "epoch": 27.41, + "learning_rate": 3.629652690566396e-05, + "loss": 1.3266, + "step": 2597000 + }, + { + "epoch": 27.41, + "learning_rate": 3.629388857815254e-05, + "loss": 1.4133, + "step": 2597500 + }, + { + "epoch": 27.42, + "learning_rate": 3.629125025064112e-05, + "loss": 1.3899, + "step": 2598000 + }, + { + "epoch": 27.42, + "learning_rate": 3.628861192312969e-05, + "loss": 1.3664, + "step": 2598500 + }, + { + "epoch": 27.43, + "learning_rate": 3.6285973595618265e-05, + "loss": 1.3513, + "step": 2599000 + }, + { + "epoch": 27.43, + "learning_rate": 3.628333526810685e-05, + "loss": 1.3438, + "step": 2599500 + }, + { + "epoch": 27.44, + "learning_rate": 3.628069694059542e-05, + "loss": 1.332, + "step": 2600000 + }, + { + "epoch": 27.44, + "learning_rate": 3.627805861308399e-05, + "loss": 1.3389, + "step": 2600500 + }, + { + "epoch": 27.45, + "learning_rate": 3.627542028557257e-05, + "loss": 1.3386, + "step": 2601000 + }, + { + "epoch": 27.45, + "learning_rate": 3.627278195806115e-05, + "loss": 1.3711, + "step": 2601500 + }, + { + "epoch": 27.46, + "learning_rate": 3.6270143630549725e-05, + "loss": 1.3548, + "step": 2602000 + }, + { + "epoch": 27.46, + "learning_rate": 3.62675053030383e-05, + "loss": 1.2865, + "step": 2602500 + }, + { + "epoch": 27.47, + "learning_rate": 3.6264866975526876e-05, + "loss": 1.3838, + "step": 2603000 + }, + { + "epoch": 27.48, + "learning_rate": 3.626222864801545e-05, + "loss": 1.2911, + "step": 2603500 + }, + { + "epoch": 27.48, + "learning_rate": 3.625959032050403e-05, + "loss": 1.3341, + "step": 2604000 + }, + { + "epoch": 27.49, + "learning_rate": 3.62569519929926e-05, + "loss": 1.3807, + "step": 2604500 + }, + { + "epoch": 27.49, + "learning_rate": 3.6254313665481184e-05, + "loss": 1.342, + "step": 2605000 + }, + { + "epoch": 27.5, + "learning_rate": 3.625167533796975e-05, + "loss": 1.315, + "step": 2605500 + }, + { + "epoch": 27.5, + "learning_rate": 3.624903701045833e-05, + "loss": 1.3575, + "step": 2606000 + }, + { + "epoch": 27.51, + "learning_rate": 3.624639868294691e-05, + "loss": 1.3541, + "step": 2606500 + }, + { + "epoch": 27.51, + "learning_rate": 3.6243760355435486e-05, + "loss": 1.3309, + "step": 2607000 + }, + { + "epoch": 27.52, + "learning_rate": 3.624112202792406e-05, + "loss": 1.3619, + "step": 2607500 + }, + { + "epoch": 27.52, + "learning_rate": 3.623848370041264e-05, + "loss": 1.3566, + "step": 2608000 + }, + { + "epoch": 27.53, + "learning_rate": 3.623584537290121e-05, + "loss": 1.3679, + "step": 2608500 + }, + { + "epoch": 27.53, + "learning_rate": 3.623320704538979e-05, + "loss": 1.3365, + "step": 2609000 + }, + { + "epoch": 27.54, + "learning_rate": 3.6230568717878364e-05, + "loss": 1.2894, + "step": 2609500 + }, + { + "epoch": 27.54, + "learning_rate": 3.622793039036694e-05, + "loss": 1.3389, + "step": 2610000 + }, + { + "epoch": 27.55, + "learning_rate": 3.6225292062855515e-05, + "loss": 1.3351, + "step": 2610500 + }, + { + "epoch": 27.55, + "learning_rate": 3.622265373534409e-05, + "loss": 1.3958, + "step": 2611000 + }, + { + "epoch": 27.56, + "learning_rate": 3.622001540783267e-05, + "loss": 1.3391, + "step": 2611500 + }, + { + "epoch": 27.57, + "learning_rate": 3.621737708032125e-05, + "loss": 1.4, + "step": 2612000 + }, + { + "epoch": 27.57, + "learning_rate": 3.621473875280982e-05, + "loss": 1.3319, + "step": 2612500 + }, + { + "epoch": 27.58, + "learning_rate": 3.621210042529839e-05, + "loss": 1.3533, + "step": 2613000 + }, + { + "epoch": 27.58, + "learning_rate": 3.6209462097786974e-05, + "loss": 1.325, + "step": 2613500 + }, + { + "epoch": 27.59, + "learning_rate": 3.620682377027555e-05, + "loss": 1.4136, + "step": 2614000 + }, + { + "epoch": 27.59, + "learning_rate": 3.6204185442764125e-05, + "loss": 1.3561, + "step": 2614500 + }, + { + "epoch": 27.6, + "learning_rate": 3.62015471152527e-05, + "loss": 1.3691, + "step": 2615000 + }, + { + "epoch": 27.6, + "learning_rate": 3.6198908787741276e-05, + "loss": 1.3245, + "step": 2615500 + }, + { + "epoch": 27.61, + "learning_rate": 3.619627046022985e-05, + "loss": 1.3534, + "step": 2616000 + }, + { + "epoch": 27.61, + "learning_rate": 3.619363213271843e-05, + "loss": 1.3239, + "step": 2616500 + }, + { + "epoch": 27.62, + "learning_rate": 3.619099380520701e-05, + "loss": 1.363, + "step": 2617000 + }, + { + "epoch": 27.62, + "learning_rate": 3.618835547769558e-05, + "loss": 1.37, + "step": 2617500 + }, + { + "epoch": 27.63, + "learning_rate": 3.6185717150184154e-05, + "loss": 1.3355, + "step": 2618000 + }, + { + "epoch": 27.63, + "learning_rate": 3.6183078822672736e-05, + "loss": 1.3149, + "step": 2618500 + }, + { + "epoch": 27.64, + "learning_rate": 3.618044049516131e-05, + "loss": 1.3026, + "step": 2619000 + }, + { + "epoch": 27.64, + "learning_rate": 3.617780216764988e-05, + "loss": 1.3858, + "step": 2619500 + }, + { + "epoch": 27.65, + "learning_rate": 3.617516384013846e-05, + "loss": 1.3863, + "step": 2620000 + }, + { + "epoch": 27.65, + "learning_rate": 3.617252551262704e-05, + "loss": 1.3501, + "step": 2620500 + }, + { + "epoch": 27.66, + "learning_rate": 3.6169887185115614e-05, + "loss": 1.352, + "step": 2621000 + }, + { + "epoch": 27.67, + "learning_rate": 3.616724885760419e-05, + "loss": 1.3196, + "step": 2621500 + }, + { + "epoch": 27.67, + "learning_rate": 3.6164610530092765e-05, + "loss": 1.3435, + "step": 2622000 + }, + { + "epoch": 27.68, + "learning_rate": 3.616197220258134e-05, + "loss": 1.3861, + "step": 2622500 + }, + { + "epoch": 27.68, + "learning_rate": 3.6159333875069916e-05, + "loss": 1.3938, + "step": 2623000 + }, + { + "epoch": 27.69, + "learning_rate": 3.61566955475585e-05, + "loss": 1.3559, + "step": 2623500 + }, + { + "epoch": 27.69, + "learning_rate": 3.615405722004707e-05, + "loss": 1.328, + "step": 2624000 + }, + { + "epoch": 27.7, + "learning_rate": 3.615141889253564e-05, + "loss": 1.3376, + "step": 2624500 + }, + { + "epoch": 27.7, + "learning_rate": 3.614878056502422e-05, + "loss": 1.3192, + "step": 2625000 + }, + { + "epoch": 27.71, + "learning_rate": 3.61461422375128e-05, + "loss": 1.3558, + "step": 2625500 + }, + { + "epoch": 27.71, + "learning_rate": 3.6143503910001375e-05, + "loss": 1.3712, + "step": 2626000 + }, + { + "epoch": 27.72, + "learning_rate": 3.614086558248995e-05, + "loss": 1.343, + "step": 2626500 + }, + { + "epoch": 27.72, + "learning_rate": 3.6138227254978526e-05, + "loss": 1.3183, + "step": 2627000 + }, + { + "epoch": 27.73, + "learning_rate": 3.61355889274671e-05, + "loss": 1.3757, + "step": 2627500 + }, + { + "epoch": 27.73, + "learning_rate": 3.613295059995568e-05, + "loss": 1.3842, + "step": 2628000 + }, + { + "epoch": 27.74, + "learning_rate": 3.613031227244425e-05, + "loss": 1.3165, + "step": 2628500 + }, + { + "epoch": 27.74, + "learning_rate": 3.612767394493283e-05, + "loss": 1.3706, + "step": 2629000 + }, + { + "epoch": 27.75, + "learning_rate": 3.6125035617421404e-05, + "loss": 1.3591, + "step": 2629500 + }, + { + "epoch": 27.76, + "learning_rate": 3.612239728990998e-05, + "loss": 1.3385, + "step": 2630000 + }, + { + "epoch": 27.76, + "learning_rate": 3.611975896239856e-05, + "loss": 1.3138, + "step": 2630500 + }, + { + "epoch": 27.77, + "learning_rate": 3.611712063488714e-05, + "loss": 1.3832, + "step": 2631000 + }, + { + "epoch": 27.77, + "learning_rate": 3.6114482307375706e-05, + "loss": 1.3342, + "step": 2631500 + }, + { + "epoch": 27.78, + "learning_rate": 3.611184397986429e-05, + "loss": 1.3566, + "step": 2632000 + }, + { + "epoch": 27.78, + "learning_rate": 3.610920565235286e-05, + "loss": 1.353, + "step": 2632500 + }, + { + "epoch": 27.79, + "learning_rate": 3.610656732484144e-05, + "loss": 1.3718, + "step": 2633000 + }, + { + "epoch": 27.79, + "learning_rate": 3.6103928997330014e-05, + "loss": 1.3672, + "step": 2633500 + }, + { + "epoch": 27.8, + "learning_rate": 3.610129066981859e-05, + "loss": 1.3361, + "step": 2634000 + }, + { + "epoch": 27.8, + "learning_rate": 3.6098652342307165e-05, + "loss": 1.3431, + "step": 2634500 + }, + { + "epoch": 27.81, + "learning_rate": 3.609601401479574e-05, + "loss": 1.3133, + "step": 2635000 + }, + { + "epoch": 27.81, + "learning_rate": 3.609337568728432e-05, + "loss": 1.3534, + "step": 2635500 + }, + { + "epoch": 27.82, + "learning_rate": 3.60907373597729e-05, + "loss": 1.3316, + "step": 2636000 + }, + { + "epoch": 27.82, + "learning_rate": 3.608809903226147e-05, + "loss": 1.3491, + "step": 2636500 + }, + { + "epoch": 27.83, + "learning_rate": 3.608546070475004e-05, + "loss": 1.384, + "step": 2637000 + }, + { + "epoch": 27.83, + "learning_rate": 3.6082822377238625e-05, + "loss": 1.3609, + "step": 2637500 + }, + { + "epoch": 27.84, + "learning_rate": 3.60801840497272e-05, + "loss": 1.3686, + "step": 2638000 + }, + { + "epoch": 27.84, + "learning_rate": 3.607754572221577e-05, + "loss": 1.2952, + "step": 2638500 + }, + { + "epoch": 27.85, + "learning_rate": 3.607490739470435e-05, + "loss": 1.3604, + "step": 2639000 + }, + { + "epoch": 27.86, + "learning_rate": 3.607226906719293e-05, + "loss": 1.3819, + "step": 2639500 + }, + { + "epoch": 27.86, + "learning_rate": 3.60696307396815e-05, + "loss": 1.3664, + "step": 2640000 + }, + { + "epoch": 27.87, + "learning_rate": 3.606699241217008e-05, + "loss": 1.4005, + "step": 2640500 + }, + { + "epoch": 27.87, + "learning_rate": 3.606435408465865e-05, + "loss": 1.368, + "step": 2641000 + }, + { + "epoch": 27.88, + "learning_rate": 3.606171575714723e-05, + "loss": 1.3532, + "step": 2641500 + }, + { + "epoch": 27.88, + "learning_rate": 3.6059077429635804e-05, + "loss": 1.3709, + "step": 2642000 + }, + { + "epoch": 27.89, + "learning_rate": 3.6056439102124387e-05, + "loss": 1.3469, + "step": 2642500 + }, + { + "epoch": 27.89, + "learning_rate": 3.605380077461296e-05, + "loss": 1.3464, + "step": 2643000 + }, + { + "epoch": 27.9, + "learning_rate": 3.605116244710153e-05, + "loss": 1.4002, + "step": 2643500 + }, + { + "epoch": 27.9, + "learning_rate": 3.604852411959011e-05, + "loss": 1.3268, + "step": 2644000 + }, + { + "epoch": 27.91, + "learning_rate": 3.604588579207869e-05, + "loss": 1.3216, + "step": 2644500 + }, + { + "epoch": 27.91, + "learning_rate": 3.6043247464567264e-05, + "loss": 1.3628, + "step": 2645000 + }, + { + "epoch": 27.92, + "learning_rate": 3.604060913705584e-05, + "loss": 1.3726, + "step": 2645500 + }, + { + "epoch": 27.92, + "learning_rate": 3.6037970809544415e-05, + "loss": 1.3041, + "step": 2646000 + }, + { + "epoch": 27.93, + "learning_rate": 3.603533248203299e-05, + "loss": 1.37, + "step": 2646500 + }, + { + "epoch": 27.93, + "learning_rate": 3.6032694154521566e-05, + "loss": 1.3515, + "step": 2647000 + }, + { + "epoch": 27.94, + "learning_rate": 3.603005582701015e-05, + "loss": 1.3384, + "step": 2647500 + }, + { + "epoch": 27.95, + "learning_rate": 3.602741749949872e-05, + "loss": 1.2953, + "step": 2648000 + }, + { + "epoch": 27.95, + "learning_rate": 3.602477917198729e-05, + "loss": 1.3559, + "step": 2648500 + }, + { + "epoch": 27.96, + "learning_rate": 3.602214084447587e-05, + "loss": 1.3149, + "step": 2649000 + }, + { + "epoch": 27.96, + "learning_rate": 3.601950251696445e-05, + "loss": 1.3437, + "step": 2649500 + }, + { + "epoch": 27.97, + "learning_rate": 3.6016864189453026e-05, + "loss": 1.3566, + "step": 2650000 + }, + { + "epoch": 27.97, + "learning_rate": 3.6014225861941594e-05, + "loss": 1.3226, + "step": 2650500 + }, + { + "epoch": 27.98, + "learning_rate": 3.601158753443018e-05, + "loss": 1.3344, + "step": 2651000 + }, + { + "epoch": 27.98, + "learning_rate": 3.600894920691875e-05, + "loss": 1.3171, + "step": 2651500 + }, + { + "epoch": 27.99, + "learning_rate": 3.600631087940733e-05, + "loss": 1.3198, + "step": 2652000 + }, + { + "epoch": 27.99, + "learning_rate": 3.60036725518959e-05, + "loss": 1.3223, + "step": 2652500 + }, + { + "epoch": 28.0, + "learning_rate": 3.600103422438448e-05, + "loss": 1.3553, + "step": 2653000 + }, + { + "epoch": 28.0, + "learning_rate": 3.5998395896873054e-05, + "loss": 1.3302, + "step": 2653500 + }, + { + "epoch": 28.01, + "learning_rate": 3.599575756936163e-05, + "loss": 1.2676, + "step": 2654000 + }, + { + "epoch": 28.01, + "learning_rate": 3.599311924185021e-05, + "loss": 1.3442, + "step": 2654500 + }, + { + "epoch": 28.02, + "learning_rate": 3.599048091433879e-05, + "loss": 1.3381, + "step": 2655000 + }, + { + "epoch": 28.02, + "learning_rate": 3.5987842586827356e-05, + "loss": 1.3376, + "step": 2655500 + }, + { + "epoch": 28.03, + "learning_rate": 3.598520425931594e-05, + "loss": 1.2661, + "step": 2656000 + }, + { + "epoch": 28.03, + "learning_rate": 3.5982565931804514e-05, + "loss": 1.361, + "step": 2656500 + }, + { + "epoch": 28.04, + "learning_rate": 3.597992760429309e-05, + "loss": 1.315, + "step": 2657000 + }, + { + "epoch": 28.05, + "learning_rate": 3.5977289276781665e-05, + "loss": 1.3698, + "step": 2657500 + }, + { + "epoch": 28.05, + "learning_rate": 3.597465094927024e-05, + "loss": 1.3425, + "step": 2658000 + }, + { + "epoch": 28.06, + "learning_rate": 3.5972012621758816e-05, + "loss": 1.3727, + "step": 2658500 + }, + { + "epoch": 28.06, + "learning_rate": 3.596937429424739e-05, + "loss": 1.3278, + "step": 2659000 + }, + { + "epoch": 28.07, + "learning_rate": 3.5966735966735974e-05, + "loss": 1.3052, + "step": 2659500 + }, + { + "epoch": 28.07, + "learning_rate": 3.596409763922454e-05, + "loss": 1.3393, + "step": 2660000 + }, + { + "epoch": 28.08, + "learning_rate": 3.596145931171312e-05, + "loss": 1.3754, + "step": 2660500 + }, + { + "epoch": 28.08, + "learning_rate": 3.595882098420169e-05, + "loss": 1.3593, + "step": 2661000 + }, + { + "epoch": 28.09, + "learning_rate": 3.5956182656690275e-05, + "loss": 1.3709, + "step": 2661500 + }, + { + "epoch": 28.09, + "learning_rate": 3.595354432917885e-05, + "loss": 1.2909, + "step": 2662000 + }, + { + "epoch": 28.1, + "learning_rate": 3.595090600166742e-05, + "loss": 1.3407, + "step": 2662500 + }, + { + "epoch": 28.1, + "learning_rate": 3.5948267674156e-05, + "loss": 1.3932, + "step": 2663000 + }, + { + "epoch": 28.11, + "learning_rate": 3.594562934664458e-05, + "loss": 1.3449, + "step": 2663500 + }, + { + "epoch": 28.11, + "learning_rate": 3.594299101913315e-05, + "loss": 1.3392, + "step": 2664000 + }, + { + "epoch": 28.12, + "learning_rate": 3.594035269162173e-05, + "loss": 1.3602, + "step": 2664500 + }, + { + "epoch": 28.12, + "learning_rate": 3.5937714364110304e-05, + "loss": 1.3492, + "step": 2665000 + }, + { + "epoch": 28.13, + "learning_rate": 3.593507603659888e-05, + "loss": 1.3613, + "step": 2665500 + }, + { + "epoch": 28.14, + "learning_rate": 3.5932437709087455e-05, + "loss": 1.3144, + "step": 2666000 + }, + { + "epoch": 28.14, + "learning_rate": 3.592979938157604e-05, + "loss": 1.3137, + "step": 2666500 + }, + { + "epoch": 28.15, + "learning_rate": 3.5927161054064606e-05, + "loss": 1.2899, + "step": 2667000 + }, + { + "epoch": 28.15, + "learning_rate": 3.592452272655318e-05, + "loss": 1.3154, + "step": 2667500 + }, + { + "epoch": 28.16, + "learning_rate": 3.5921884399041764e-05, + "loss": 1.3147, + "step": 2668000 + }, + { + "epoch": 28.16, + "learning_rate": 3.591924607153034e-05, + "loss": 1.3832, + "step": 2668500 + }, + { + "epoch": 28.17, + "learning_rate": 3.5916607744018915e-05, + "loss": 1.3442, + "step": 2669000 + }, + { + "epoch": 28.17, + "learning_rate": 3.591396941650749e-05, + "loss": 1.3318, + "step": 2669500 + }, + { + "epoch": 28.18, + "learning_rate": 3.5911331088996066e-05, + "loss": 1.3788, + "step": 2670000 + }, + { + "epoch": 28.18, + "learning_rate": 3.590869276148464e-05, + "loss": 1.337, + "step": 2670500 + }, + { + "epoch": 28.19, + "learning_rate": 3.5906054433973216e-05, + "loss": 1.3619, + "step": 2671000 + }, + { + "epoch": 28.19, + "learning_rate": 3.59034161064618e-05, + "loss": 1.4116, + "step": 2671500 + }, + { + "epoch": 28.2, + "learning_rate": 3.590077777895037e-05, + "loss": 1.2991, + "step": 2672000 + }, + { + "epoch": 28.2, + "learning_rate": 3.589813945143894e-05, + "loss": 1.3112, + "step": 2672500 + }, + { + "epoch": 28.21, + "learning_rate": 3.5895501123927525e-05, + "loss": 1.3639, + "step": 2673000 + }, + { + "epoch": 28.21, + "learning_rate": 3.58928627964161e-05, + "loss": 1.3342, + "step": 2673500 + }, + { + "epoch": 28.22, + "learning_rate": 3.5890224468904676e-05, + "loss": 1.3647, + "step": 2674000 + }, + { + "epoch": 28.22, + "learning_rate": 3.5887586141393245e-05, + "loss": 1.3079, + "step": 2674500 + }, + { + "epoch": 28.23, + "learning_rate": 3.588494781388183e-05, + "loss": 1.3159, + "step": 2675000 + }, + { + "epoch": 28.24, + "learning_rate": 3.58823094863704e-05, + "loss": 1.3146, + "step": 2675500 + }, + { + "epoch": 28.24, + "learning_rate": 3.587967115885898e-05, + "loss": 1.3716, + "step": 2676000 + }, + { + "epoch": 28.25, + "learning_rate": 3.5877032831347554e-05, + "loss": 1.3581, + "step": 2676500 + }, + { + "epoch": 28.25, + "learning_rate": 3.587439450383613e-05, + "loss": 1.3256, + "step": 2677000 + }, + { + "epoch": 28.26, + "learning_rate": 3.5871756176324705e-05, + "loss": 1.3201, + "step": 2677500 + }, + { + "epoch": 28.26, + "learning_rate": 3.586911784881328e-05, + "loss": 1.3098, + "step": 2678000 + }, + { + "epoch": 28.27, + "learning_rate": 3.586647952130186e-05, + "loss": 1.3053, + "step": 2678500 + }, + { + "epoch": 28.27, + "learning_rate": 3.586384119379043e-05, + "loss": 1.3243, + "step": 2679000 + }, + { + "epoch": 28.28, + "learning_rate": 3.5861202866279007e-05, + "loss": 1.3197, + "step": 2679500 + }, + { + "epoch": 28.28, + "learning_rate": 3.585856453876759e-05, + "loss": 1.3234, + "step": 2680000 + }, + { + "epoch": 28.29, + "learning_rate": 3.5855926211256164e-05, + "loss": 1.3381, + "step": 2680500 + }, + { + "epoch": 28.29, + "learning_rate": 3.585328788374474e-05, + "loss": 1.3076, + "step": 2681000 + }, + { + "epoch": 28.3, + "learning_rate": 3.5850649556233315e-05, + "loss": 1.372, + "step": 2681500 + }, + { + "epoch": 28.3, + "learning_rate": 3.584801122872189e-05, + "loss": 1.2978, + "step": 2682000 + }, + { + "epoch": 28.31, + "learning_rate": 3.5845372901210466e-05, + "loss": 1.3247, + "step": 2682500 + }, + { + "epoch": 28.31, + "learning_rate": 3.584273457369904e-05, + "loss": 1.3751, + "step": 2683000 + }, + { + "epoch": 28.32, + "learning_rate": 3.5840096246187624e-05, + "loss": 1.3606, + "step": 2683500 + }, + { + "epoch": 28.33, + "learning_rate": 3.583745791867619e-05, + "loss": 1.3779, + "step": 2684000 + }, + { + "epoch": 28.33, + "learning_rate": 3.583481959116477e-05, + "loss": 1.3546, + "step": 2684500 + }, + { + "epoch": 28.34, + "learning_rate": 3.583218126365335e-05, + "loss": 1.2702, + "step": 2685000 + }, + { + "epoch": 28.34, + "learning_rate": 3.5829542936141926e-05, + "loss": 1.3305, + "step": 2685500 + }, + { + "epoch": 28.35, + "learning_rate": 3.5826904608630495e-05, + "loss": 1.3116, + "step": 2686000 + }, + { + "epoch": 28.35, + "learning_rate": 3.582426628111907e-05, + "loss": 1.3731, + "step": 2686500 + }, + { + "epoch": 28.36, + "learning_rate": 3.582162795360765e-05, + "loss": 1.3206, + "step": 2687000 + }, + { + "epoch": 28.36, + "learning_rate": 3.581898962609623e-05, + "loss": 1.3341, + "step": 2687500 + }, + { + "epoch": 28.37, + "learning_rate": 3.58163512985848e-05, + "loss": 1.3735, + "step": 2688000 + }, + { + "epoch": 28.37, + "learning_rate": 3.581371297107338e-05, + "loss": 1.313, + "step": 2688500 + }, + { + "epoch": 28.38, + "learning_rate": 3.5811074643561954e-05, + "loss": 1.3257, + "step": 2689000 + }, + { + "epoch": 28.38, + "learning_rate": 3.580843631605053e-05, + "loss": 1.3504, + "step": 2689500 + }, + { + "epoch": 28.39, + "learning_rate": 3.5805797988539105e-05, + "loss": 1.3201, + "step": 2690000 + }, + { + "epoch": 28.39, + "learning_rate": 3.580315966102769e-05, + "loss": 1.2742, + "step": 2690500 + }, + { + "epoch": 28.4, + "learning_rate": 3.5800521333516256e-05, + "loss": 1.3368, + "step": 2691000 + }, + { + "epoch": 28.4, + "learning_rate": 3.579788300600483e-05, + "loss": 1.3695, + "step": 2691500 + }, + { + "epoch": 28.41, + "learning_rate": 3.5795244678493414e-05, + "loss": 1.3401, + "step": 2692000 + }, + { + "epoch": 28.41, + "learning_rate": 3.579260635098199e-05, + "loss": 1.3148, + "step": 2692500 + }, + { + "epoch": 28.42, + "learning_rate": 3.5789968023470565e-05, + "loss": 1.3454, + "step": 2693000 + }, + { + "epoch": 28.43, + "learning_rate": 3.578732969595914e-05, + "loss": 1.3057, + "step": 2693500 + }, + { + "epoch": 28.43, + "learning_rate": 3.5784691368447716e-05, + "loss": 1.401, + "step": 2694000 + }, + { + "epoch": 28.44, + "learning_rate": 3.578205304093629e-05, + "loss": 1.3594, + "step": 2694500 + }, + { + "epoch": 28.44, + "learning_rate": 3.577941471342487e-05, + "loss": 1.3576, + "step": 2695000 + }, + { + "epoch": 28.45, + "learning_rate": 3.577677638591344e-05, + "loss": 1.359, + "step": 2695500 + }, + { + "epoch": 28.45, + "learning_rate": 3.577413805840202e-05, + "loss": 1.329, + "step": 2696000 + }, + { + "epoch": 28.46, + "learning_rate": 3.5771499730890593e-05, + "loss": 1.3598, + "step": 2696500 + }, + { + "epoch": 28.46, + "learning_rate": 3.5768861403379176e-05, + "loss": 1.3475, + "step": 2697000 + }, + { + "epoch": 28.47, + "learning_rate": 3.576622307586775e-05, + "loss": 1.407, + "step": 2697500 + }, + { + "epoch": 28.47, + "learning_rate": 3.576358474835632e-05, + "loss": 1.3719, + "step": 2698000 + }, + { + "epoch": 28.48, + "learning_rate": 3.5760946420844895e-05, + "loss": 1.3621, + "step": 2698500 + }, + { + "epoch": 28.48, + "learning_rate": 3.575830809333348e-05, + "loss": 1.3697, + "step": 2699000 + }, + { + "epoch": 28.49, + "learning_rate": 3.575566976582205e-05, + "loss": 1.3566, + "step": 2699500 + }, + { + "epoch": 28.49, + "learning_rate": 3.575303143831063e-05, + "loss": 1.3573, + "step": 2700000 + }, + { + "epoch": 28.5, + "learning_rate": 3.5750393110799204e-05, + "loss": 1.3753, + "step": 2700500 + }, + { + "epoch": 28.5, + "learning_rate": 3.574775478328778e-05, + "loss": 1.3246, + "step": 2701000 + }, + { + "epoch": 28.51, + "learning_rate": 3.5745116455776355e-05, + "loss": 1.3223, + "step": 2701500 + }, + { + "epoch": 28.52, + "learning_rate": 3.574247812826493e-05, + "loss": 1.3202, + "step": 2702000 + }, + { + "epoch": 28.52, + "learning_rate": 3.573983980075351e-05, + "loss": 1.3018, + "step": 2702500 + }, + { + "epoch": 28.53, + "learning_rate": 3.573720147324208e-05, + "loss": 1.3952, + "step": 2703000 + }, + { + "epoch": 28.53, + "learning_rate": 3.573456314573066e-05, + "loss": 1.3347, + "step": 2703500 + }, + { + "epoch": 28.54, + "learning_rate": 3.573192481821924e-05, + "loss": 1.2978, + "step": 2704000 + }, + { + "epoch": 28.54, + "learning_rate": 3.5729286490707815e-05, + "loss": 1.3396, + "step": 2704500 + }, + { + "epoch": 28.55, + "learning_rate": 3.5726648163196383e-05, + "loss": 1.2953, + "step": 2705000 + }, + { + "epoch": 28.55, + "learning_rate": 3.5724009835684966e-05, + "loss": 1.3264, + "step": 2705500 + }, + { + "epoch": 28.56, + "learning_rate": 3.572137150817354e-05, + "loss": 1.3315, + "step": 2706000 + }, + { + "epoch": 28.56, + "learning_rate": 3.571873318066212e-05, + "loss": 1.3353, + "step": 2706500 + }, + { + "epoch": 28.57, + "learning_rate": 3.571609485315069e-05, + "loss": 1.3104, + "step": 2707000 + }, + { + "epoch": 28.57, + "learning_rate": 3.571345652563927e-05, + "loss": 1.3164, + "step": 2707500 + }, + { + "epoch": 28.58, + "learning_rate": 3.571081819812784e-05, + "loss": 1.3342, + "step": 2708000 + }, + { + "epoch": 28.58, + "learning_rate": 3.570817987061642e-05, + "loss": 1.3265, + "step": 2708500 + }, + { + "epoch": 28.59, + "learning_rate": 3.5705541543105e-05, + "loss": 1.343, + "step": 2709000 + }, + { + "epoch": 28.59, + "learning_rate": 3.5702903215593576e-05, + "loss": 1.3957, + "step": 2709500 + }, + { + "epoch": 28.6, + "learning_rate": 3.5700264888082145e-05, + "loss": 1.3324, + "step": 2710000 + }, + { + "epoch": 28.6, + "learning_rate": 3.569762656057072e-05, + "loss": 1.3386, + "step": 2710500 + }, + { + "epoch": 28.61, + "learning_rate": 3.56949882330593e-05, + "loss": 1.3338, + "step": 2711000 + }, + { + "epoch": 28.62, + "learning_rate": 3.569234990554788e-05, + "loss": 1.3699, + "step": 2711500 + }, + { + "epoch": 28.62, + "learning_rate": 3.5689711578036454e-05, + "loss": 1.352, + "step": 2712000 + }, + { + "epoch": 28.63, + "learning_rate": 3.568707325052503e-05, + "loss": 1.2898, + "step": 2712500 + }, + { + "epoch": 28.63, + "learning_rate": 3.5684434923013605e-05, + "loss": 1.3697, + "step": 2713000 + }, + { + "epoch": 28.64, + "learning_rate": 3.568179659550218e-05, + "loss": 1.4058, + "step": 2713500 + }, + { + "epoch": 28.64, + "learning_rate": 3.5679158267990756e-05, + "loss": 1.3282, + "step": 2714000 + }, + { + "epoch": 28.65, + "learning_rate": 3.567651994047933e-05, + "loss": 1.3221, + "step": 2714500 + }, + { + "epoch": 28.65, + "learning_rate": 3.567388161296791e-05, + "loss": 1.334, + "step": 2715000 + }, + { + "epoch": 28.66, + "learning_rate": 3.567124328545648e-05, + "loss": 1.3021, + "step": 2715500 + }, + { + "epoch": 28.66, + "learning_rate": 3.5668604957945065e-05, + "loss": 1.3092, + "step": 2716000 + }, + { + "epoch": 28.67, + "learning_rate": 3.566596663043364e-05, + "loss": 1.3462, + "step": 2716500 + }, + { + "epoch": 28.67, + "learning_rate": 3.566332830292221e-05, + "loss": 1.3385, + "step": 2717000 + }, + { + "epoch": 28.68, + "learning_rate": 3.566068997541079e-05, + "loss": 1.3136, + "step": 2717500 + }, + { + "epoch": 28.68, + "learning_rate": 3.5658051647899366e-05, + "loss": 1.3897, + "step": 2718000 + }, + { + "epoch": 28.69, + "learning_rate": 3.565541332038794e-05, + "loss": 1.3515, + "step": 2718500 + }, + { + "epoch": 28.69, + "learning_rate": 3.565277499287652e-05, + "loss": 1.3181, + "step": 2719000 + }, + { + "epoch": 28.7, + "learning_rate": 3.565013666536509e-05, + "loss": 1.3, + "step": 2719500 + }, + { + "epoch": 28.71, + "learning_rate": 3.564749833785367e-05, + "loss": 1.3468, + "step": 2720000 + }, + { + "epoch": 28.71, + "learning_rate": 3.5644860010342244e-05, + "loss": 1.2738, + "step": 2720500 + }, + { + "epoch": 28.72, + "learning_rate": 3.5642221682830826e-05, + "loss": 1.3192, + "step": 2721000 + }, + { + "epoch": 28.72, + "learning_rate": 3.56395833553194e-05, + "loss": 1.3466, + "step": 2721500 + }, + { + "epoch": 28.73, + "learning_rate": 3.563694502780797e-05, + "loss": 1.3038, + "step": 2722000 + }, + { + "epoch": 28.73, + "learning_rate": 3.5634306700296546e-05, + "loss": 1.3299, + "step": 2722500 + }, + { + "epoch": 28.74, + "learning_rate": 3.563166837278513e-05, + "loss": 1.3771, + "step": 2723000 + }, + { + "epoch": 28.74, + "learning_rate": 3.5629030045273704e-05, + "loss": 1.2743, + "step": 2723500 + }, + { + "epoch": 28.75, + "learning_rate": 3.562639171776227e-05, + "loss": 1.3768, + "step": 2724000 + }, + { + "epoch": 28.75, + "learning_rate": 3.5623753390250855e-05, + "loss": 1.3493, + "step": 2724500 + }, + { + "epoch": 28.76, + "learning_rate": 3.562111506273943e-05, + "loss": 1.3595, + "step": 2725000 + }, + { + "epoch": 28.76, + "learning_rate": 3.5618476735228006e-05, + "loss": 1.3928, + "step": 2725500 + }, + { + "epoch": 28.77, + "learning_rate": 3.561583840771658e-05, + "loss": 1.3329, + "step": 2726000 + }, + { + "epoch": 28.77, + "learning_rate": 3.5613200080205157e-05, + "loss": 1.3028, + "step": 2726500 + }, + { + "epoch": 28.78, + "learning_rate": 3.561056175269373e-05, + "loss": 1.3594, + "step": 2727000 + }, + { + "epoch": 28.78, + "learning_rate": 3.560792342518231e-05, + "loss": 1.3072, + "step": 2727500 + }, + { + "epoch": 28.79, + "learning_rate": 3.560528509767089e-05, + "loss": 1.3367, + "step": 2728000 + }, + { + "epoch": 28.79, + "learning_rate": 3.5602646770159465e-05, + "loss": 1.3618, + "step": 2728500 + }, + { + "epoch": 28.8, + "learning_rate": 3.5600008442648034e-05, + "loss": 1.3428, + "step": 2729000 + }, + { + "epoch": 28.81, + "learning_rate": 3.5597370115136616e-05, + "loss": 1.3258, + "step": 2729500 + }, + { + "epoch": 28.81, + "learning_rate": 3.559473178762519e-05, + "loss": 1.3549, + "step": 2730000 + }, + { + "epoch": 28.82, + "learning_rate": 3.559209346011377e-05, + "loss": 1.3431, + "step": 2730500 + }, + { + "epoch": 28.82, + "learning_rate": 3.558945513260234e-05, + "loss": 1.332, + "step": 2731000 + }, + { + "epoch": 28.83, + "learning_rate": 3.558681680509092e-05, + "loss": 1.3724, + "step": 2731500 + }, + { + "epoch": 28.83, + "learning_rate": 3.5584178477579494e-05, + "loss": 1.3247, + "step": 2732000 + }, + { + "epoch": 28.84, + "learning_rate": 3.558154015006807e-05, + "loss": 1.3405, + "step": 2732500 + }, + { + "epoch": 28.84, + "learning_rate": 3.557890182255665e-05, + "loss": 1.3612, + "step": 2733000 + }, + { + "epoch": 28.85, + "learning_rate": 3.557626349504522e-05, + "loss": 1.3591, + "step": 2733500 + }, + { + "epoch": 28.85, + "learning_rate": 3.5573625167533796e-05, + "loss": 1.3294, + "step": 2734000 + }, + { + "epoch": 28.86, + "learning_rate": 3.557098684002237e-05, + "loss": 1.3484, + "step": 2734500 + }, + { + "epoch": 28.86, + "learning_rate": 3.5568348512510953e-05, + "loss": 1.3367, + "step": 2735000 + }, + { + "epoch": 28.87, + "learning_rate": 3.556571018499953e-05, + "loss": 1.3464, + "step": 2735500 + }, + { + "epoch": 28.87, + "learning_rate": 3.55630718574881e-05, + "loss": 1.3512, + "step": 2736000 + }, + { + "epoch": 28.88, + "learning_rate": 3.556043352997668e-05, + "loss": 1.3032, + "step": 2736500 + }, + { + "epoch": 28.88, + "learning_rate": 3.5557795202465255e-05, + "loss": 1.3406, + "step": 2737000 + }, + { + "epoch": 28.89, + "learning_rate": 3.555515687495383e-05, + "loss": 1.3589, + "step": 2737500 + }, + { + "epoch": 28.89, + "learning_rate": 3.5552518547442406e-05, + "loss": 1.3567, + "step": 2738000 + }, + { + "epoch": 28.9, + "learning_rate": 3.554988021993098e-05, + "loss": 1.4049, + "step": 2738500 + }, + { + "epoch": 28.91, + "learning_rate": 3.554724189241956e-05, + "loss": 1.3549, + "step": 2739000 + }, + { + "epoch": 28.91, + "learning_rate": 3.554460356490813e-05, + "loss": 1.3172, + "step": 2739500 + }, + { + "epoch": 28.92, + "learning_rate": 3.5541965237396715e-05, + "loss": 1.3427, + "step": 2740000 + }, + { + "epoch": 28.92, + "learning_rate": 3.553932690988529e-05, + "loss": 1.3326, + "step": 2740500 + }, + { + "epoch": 28.93, + "learning_rate": 3.553668858237386e-05, + "loss": 1.3507, + "step": 2741000 + }, + { + "epoch": 28.93, + "learning_rate": 3.553405025486244e-05, + "loss": 1.3804, + "step": 2741500 + }, + { + "epoch": 28.94, + "learning_rate": 3.553141192735102e-05, + "loss": 1.3095, + "step": 2742000 + }, + { + "epoch": 28.94, + "learning_rate": 3.552877359983959e-05, + "loss": 1.3898, + "step": 2742500 + }, + { + "epoch": 28.95, + "learning_rate": 3.552613527232817e-05, + "loss": 1.3305, + "step": 2743000 + }, + { + "epoch": 28.95, + "learning_rate": 3.5523496944816743e-05, + "loss": 1.3358, + "step": 2743500 + }, + { + "epoch": 28.96, + "learning_rate": 3.552085861730532e-05, + "loss": 1.36, + "step": 2744000 + }, + { + "epoch": 28.96, + "learning_rate": 3.5518220289793894e-05, + "loss": 1.3765, + "step": 2744500 + }, + { + "epoch": 28.97, + "learning_rate": 3.551558196228248e-05, + "loss": 1.3722, + "step": 2745000 + }, + { + "epoch": 28.97, + "learning_rate": 3.5512943634771045e-05, + "loss": 1.3139, + "step": 2745500 + }, + { + "epoch": 28.98, + "learning_rate": 3.551030530725962e-05, + "loss": 1.3428, + "step": 2746000 + }, + { + "epoch": 28.98, + "learning_rate": 3.55076669797482e-05, + "loss": 1.2769, + "step": 2746500 + }, + { + "epoch": 28.99, + "learning_rate": 3.550502865223678e-05, + "loss": 1.3042, + "step": 2747000 + }, + { + "epoch": 29.0, + "learning_rate": 3.5502390324725354e-05, + "loss": 1.318, + "step": 2747500 + }, + { + "epoch": 29.0, + "learning_rate": 3.549975199721392e-05, + "loss": 1.3484, + "step": 2748000 + }, + { + "epoch": 29.01, + "learning_rate": 3.5497113669702505e-05, + "loss": 1.3653, + "step": 2748500 + }, + { + "epoch": 29.01, + "learning_rate": 3.549447534219108e-05, + "loss": 1.3341, + "step": 2749000 + }, + { + "epoch": 29.02, + "learning_rate": 3.5491837014679656e-05, + "loss": 1.3617, + "step": 2749500 + }, + { + "epoch": 29.02, + "learning_rate": 3.548919868716823e-05, + "loss": 1.3244, + "step": 2750000 + }, + { + "epoch": 29.03, + "learning_rate": 3.548656035965681e-05, + "loss": 1.3499, + "step": 2750500 + }, + { + "epoch": 29.03, + "learning_rate": 3.548392203214538e-05, + "loss": 1.3284, + "step": 2751000 + }, + { + "epoch": 29.04, + "learning_rate": 3.548128370463396e-05, + "loss": 1.2954, + "step": 2751500 + }, + { + "epoch": 29.04, + "learning_rate": 3.547864537712254e-05, + "loss": 1.2955, + "step": 2752000 + }, + { + "epoch": 29.05, + "learning_rate": 3.547600704961111e-05, + "loss": 1.3836, + "step": 2752500 + }, + { + "epoch": 29.05, + "learning_rate": 3.5473368722099684e-05, + "loss": 1.3106, + "step": 2753000 + }, + { + "epoch": 29.06, + "learning_rate": 3.547073039458827e-05, + "loss": 1.3257, + "step": 2753500 + }, + { + "epoch": 29.06, + "learning_rate": 3.546809206707684e-05, + "loss": 1.3602, + "step": 2754000 + }, + { + "epoch": 29.07, + "learning_rate": 3.546545373956542e-05, + "loss": 1.3566, + "step": 2754500 + }, + { + "epoch": 29.07, + "learning_rate": 3.546281541205399e-05, + "loss": 1.3158, + "step": 2755000 + }, + { + "epoch": 29.08, + "learning_rate": 3.546017708454257e-05, + "loss": 1.3008, + "step": 2755500 + }, + { + "epoch": 29.08, + "learning_rate": 3.5457538757031144e-05, + "loss": 1.3793, + "step": 2756000 + }, + { + "epoch": 29.09, + "learning_rate": 3.545490042951972e-05, + "loss": 1.2961, + "step": 2756500 + }, + { + "epoch": 29.1, + "learning_rate": 3.54522621020083e-05, + "loss": 1.2915, + "step": 2757000 + }, + { + "epoch": 29.1, + "learning_rate": 3.544962377449687e-05, + "loss": 1.3538, + "step": 2757500 + }, + { + "epoch": 29.11, + "learning_rate": 3.5446985446985446e-05, + "loss": 1.3322, + "step": 2758000 + }, + { + "epoch": 29.11, + "learning_rate": 3.544434711947403e-05, + "loss": 1.3335, + "step": 2758500 + }, + { + "epoch": 29.12, + "learning_rate": 3.5441708791962604e-05, + "loss": 1.2885, + "step": 2759000 + }, + { + "epoch": 29.12, + "learning_rate": 3.543907046445118e-05, + "loss": 1.3741, + "step": 2759500 + }, + { + "epoch": 29.13, + "learning_rate": 3.543643213693975e-05, + "loss": 1.3415, + "step": 2760000 + }, + { + "epoch": 29.13, + "learning_rate": 3.543379380942833e-05, + "loss": 1.3394, + "step": 2760500 + }, + { + "epoch": 29.14, + "learning_rate": 3.5431155481916906e-05, + "loss": 1.3126, + "step": 2761000 + }, + { + "epoch": 29.14, + "learning_rate": 3.542851715440548e-05, + "loss": 1.3357, + "step": 2761500 + }, + { + "epoch": 29.15, + "learning_rate": 3.542587882689406e-05, + "loss": 1.3066, + "step": 2762000 + }, + { + "epoch": 29.15, + "learning_rate": 3.542324049938263e-05, + "loss": 1.3346, + "step": 2762500 + }, + { + "epoch": 29.16, + "learning_rate": 3.542060217187121e-05, + "loss": 1.3112, + "step": 2763000 + }, + { + "epoch": 29.16, + "learning_rate": 3.541796384435978e-05, + "loss": 1.3598, + "step": 2763500 + }, + { + "epoch": 29.17, + "learning_rate": 3.5415325516848366e-05, + "loss": 1.2821, + "step": 2764000 + }, + { + "epoch": 29.17, + "learning_rate": 3.5412687189336934e-05, + "loss": 1.3221, + "step": 2764500 + }, + { + "epoch": 29.18, + "learning_rate": 3.541004886182551e-05, + "loss": 1.3522, + "step": 2765000 + }, + { + "epoch": 29.19, + "learning_rate": 3.540741053431409e-05, + "loss": 1.31, + "step": 2765500 + }, + { + "epoch": 29.19, + "learning_rate": 3.540477220680267e-05, + "loss": 1.3418, + "step": 2766000 + }, + { + "epoch": 29.2, + "learning_rate": 3.540213387929124e-05, + "loss": 1.2949, + "step": 2766500 + }, + { + "epoch": 29.2, + "learning_rate": 3.539949555177982e-05, + "loss": 1.2607, + "step": 2767000 + }, + { + "epoch": 29.21, + "learning_rate": 3.5396857224268394e-05, + "loss": 1.3011, + "step": 2767500 + }, + { + "epoch": 29.21, + "learning_rate": 3.539421889675697e-05, + "loss": 1.3584, + "step": 2768000 + }, + { + "epoch": 29.22, + "learning_rate": 3.5391580569245545e-05, + "loss": 1.3382, + "step": 2768500 + }, + { + "epoch": 29.22, + "learning_rate": 3.538894224173413e-05, + "loss": 1.336, + "step": 2769000 + }, + { + "epoch": 29.23, + "learning_rate": 3.5386303914222696e-05, + "loss": 1.3159, + "step": 2769500 + }, + { + "epoch": 29.23, + "learning_rate": 3.538366558671127e-05, + "loss": 1.3248, + "step": 2770000 + }, + { + "epoch": 29.24, + "learning_rate": 3.5381027259199854e-05, + "loss": 1.3512, + "step": 2770500 + }, + { + "epoch": 29.24, + "learning_rate": 3.537838893168843e-05, + "loss": 1.3702, + "step": 2771000 + }, + { + "epoch": 29.25, + "learning_rate": 3.5375750604177e-05, + "loss": 1.3828, + "step": 2771500 + }, + { + "epoch": 29.25, + "learning_rate": 3.537311227666557e-05, + "loss": 1.3153, + "step": 2772000 + }, + { + "epoch": 29.26, + "learning_rate": 3.5370473949154156e-05, + "loss": 1.3333, + "step": 2772500 + }, + { + "epoch": 29.26, + "learning_rate": 3.536783562164273e-05, + "loss": 1.361, + "step": 2773000 + }, + { + "epoch": 29.27, + "learning_rate": 3.5365197294131307e-05, + "loss": 1.3787, + "step": 2773500 + }, + { + "epoch": 29.27, + "learning_rate": 3.536255896661988e-05, + "loss": 1.3367, + "step": 2774000 + }, + { + "epoch": 29.28, + "learning_rate": 3.535992063910846e-05, + "loss": 1.292, + "step": 2774500 + }, + { + "epoch": 29.29, + "learning_rate": 3.535728231159703e-05, + "loss": 1.3018, + "step": 2775000 + }, + { + "epoch": 29.29, + "learning_rate": 3.535464398408561e-05, + "loss": 1.3663, + "step": 2775500 + }, + { + "epoch": 29.3, + "learning_rate": 3.535200565657419e-05, + "loss": 1.3825, + "step": 2776000 + }, + { + "epoch": 29.3, + "learning_rate": 3.534936732906276e-05, + "loss": 1.3438, + "step": 2776500 + }, + { + "epoch": 29.31, + "learning_rate": 3.5346729001551335e-05, + "loss": 1.3556, + "step": 2777000 + }, + { + "epoch": 29.31, + "learning_rate": 3.534409067403992e-05, + "loss": 1.2937, + "step": 2777500 + }, + { + "epoch": 29.32, + "learning_rate": 3.534145234652849e-05, + "loss": 1.3201, + "step": 2778000 + }, + { + "epoch": 29.32, + "learning_rate": 3.533881401901707e-05, + "loss": 1.2989, + "step": 2778500 + }, + { + "epoch": 29.33, + "learning_rate": 3.5336175691505644e-05, + "loss": 1.3632, + "step": 2779000 + }, + { + "epoch": 29.33, + "learning_rate": 3.533353736399422e-05, + "loss": 1.3411, + "step": 2779500 + }, + { + "epoch": 29.34, + "learning_rate": 3.5330899036482795e-05, + "loss": 1.2687, + "step": 2780000 + }, + { + "epoch": 29.34, + "learning_rate": 3.532826070897137e-05, + "loss": 1.2844, + "step": 2780500 + }, + { + "epoch": 29.35, + "learning_rate": 3.5325622381459946e-05, + "loss": 1.3526, + "step": 2781000 + }, + { + "epoch": 29.35, + "learning_rate": 3.532298405394852e-05, + "loss": 1.3381, + "step": 2781500 + }, + { + "epoch": 29.36, + "learning_rate": 3.53203457264371e-05, + "loss": 1.325, + "step": 2782000 + }, + { + "epoch": 29.36, + "learning_rate": 3.531770739892568e-05, + "loss": 1.3048, + "step": 2782500 + }, + { + "epoch": 29.37, + "learning_rate": 3.5315069071414254e-05, + "loss": 1.3299, + "step": 2783000 + }, + { + "epoch": 29.38, + "learning_rate": 3.531243074390282e-05, + "loss": 1.35, + "step": 2783500 + }, + { + "epoch": 29.38, + "learning_rate": 3.53097924163914e-05, + "loss": 1.3268, + "step": 2784000 + }, + { + "epoch": 29.39, + "learning_rate": 3.530715408887998e-05, + "loss": 1.3719, + "step": 2784500 + }, + { + "epoch": 29.39, + "learning_rate": 3.5304515761368556e-05, + "loss": 1.4089, + "step": 2785000 + }, + { + "epoch": 29.4, + "learning_rate": 3.530187743385713e-05, + "loss": 1.3425, + "step": 2785500 + }, + { + "epoch": 29.4, + "learning_rate": 3.529923910634571e-05, + "loss": 1.31, + "step": 2786000 + }, + { + "epoch": 29.41, + "learning_rate": 3.529660077883428e-05, + "loss": 1.353, + "step": 2786500 + }, + { + "epoch": 29.41, + "learning_rate": 3.529396245132286e-05, + "loss": 1.3531, + "step": 2787000 + }, + { + "epoch": 29.42, + "learning_rate": 3.5291324123811434e-05, + "loss": 1.3165, + "step": 2787500 + }, + { + "epoch": 29.42, + "learning_rate": 3.5288685796300016e-05, + "loss": 1.346, + "step": 2788000 + }, + { + "epoch": 29.43, + "learning_rate": 3.5286047468788585e-05, + "loss": 1.3067, + "step": 2788500 + }, + { + "epoch": 29.43, + "learning_rate": 3.528340914127716e-05, + "loss": 1.3225, + "step": 2789000 + }, + { + "epoch": 29.44, + "learning_rate": 3.528077081376574e-05, + "loss": 1.289, + "step": 2789500 + }, + { + "epoch": 29.44, + "learning_rate": 3.527813248625432e-05, + "loss": 1.3482, + "step": 2790000 + }, + { + "epoch": 29.45, + "learning_rate": 3.527549415874289e-05, + "loss": 1.3327, + "step": 2790500 + }, + { + "epoch": 29.45, + "learning_rate": 3.527285583123147e-05, + "loss": 1.3255, + "step": 2791000 + }, + { + "epoch": 29.46, + "learning_rate": 3.5270217503720044e-05, + "loss": 1.3186, + "step": 2791500 + }, + { + "epoch": 29.46, + "learning_rate": 3.526757917620862e-05, + "loss": 1.3252, + "step": 2792000 + }, + { + "epoch": 29.47, + "learning_rate": 3.5264940848697195e-05, + "loss": 1.3553, + "step": 2792500 + }, + { + "epoch": 29.48, + "learning_rate": 3.526230252118577e-05, + "loss": 1.3318, + "step": 2793000 + }, + { + "epoch": 29.48, + "learning_rate": 3.5259664193674346e-05, + "loss": 1.3018, + "step": 2793500 + }, + { + "epoch": 29.49, + "learning_rate": 3.525702586616292e-05, + "loss": 1.3704, + "step": 2794000 + }, + { + "epoch": 29.49, + "learning_rate": 3.5254387538651504e-05, + "loss": 1.3132, + "step": 2794500 + }, + { + "epoch": 29.5, + "learning_rate": 3.525174921114008e-05, + "loss": 1.3249, + "step": 2795000 + }, + { + "epoch": 29.5, + "learning_rate": 3.524911088362865e-05, + "loss": 1.3382, + "step": 2795500 + }, + { + "epoch": 29.51, + "learning_rate": 3.5246472556117224e-05, + "loss": 1.3629, + "step": 2796000 + }, + { + "epoch": 29.51, + "learning_rate": 3.5243834228605806e-05, + "loss": 1.3628, + "step": 2796500 + }, + { + "epoch": 29.52, + "learning_rate": 3.524119590109438e-05, + "loss": 1.3559, + "step": 2797000 + }, + { + "epoch": 29.52, + "learning_rate": 3.523855757358296e-05, + "loss": 1.2758, + "step": 2797500 + }, + { + "epoch": 29.53, + "learning_rate": 3.523591924607153e-05, + "loss": 1.3567, + "step": 2798000 + }, + { + "epoch": 29.53, + "learning_rate": 3.523328091856011e-05, + "loss": 1.3172, + "step": 2798500 + }, + { + "epoch": 29.54, + "learning_rate": 3.5230642591048684e-05, + "loss": 1.3647, + "step": 2799000 + }, + { + "epoch": 29.54, + "learning_rate": 3.522800426353726e-05, + "loss": 1.3535, + "step": 2799500 + }, + { + "epoch": 29.55, + "learning_rate": 3.5225365936025834e-05, + "loss": 1.3995, + "step": 2800000 + }, + { + "epoch": 29.55, + "learning_rate": 3.522272760851441e-05, + "loss": 1.3477, + "step": 2800500 + }, + { + "epoch": 29.56, + "learning_rate": 3.5220089281002985e-05, + "loss": 1.3391, + "step": 2801000 + }, + { + "epoch": 29.57, + "learning_rate": 3.521745095349157e-05, + "loss": 1.329, + "step": 2801500 + }, + { + "epoch": 29.57, + "learning_rate": 3.521481262598014e-05, + "loss": 1.3838, + "step": 2802000 + }, + { + "epoch": 29.58, + "learning_rate": 3.521217429846871e-05, + "loss": 1.3195, + "step": 2802500 + }, + { + "epoch": 29.58, + "learning_rate": 3.5209535970957294e-05, + "loss": 1.3093, + "step": 2803000 + }, + { + "epoch": 29.59, + "learning_rate": 3.520689764344587e-05, + "loss": 1.35, + "step": 2803500 + }, + { + "epoch": 29.59, + "learning_rate": 3.5204259315934445e-05, + "loss": 1.3382, + "step": 2804000 + }, + { + "epoch": 29.6, + "learning_rate": 3.520162098842302e-05, + "loss": 1.362, + "step": 2804500 + }, + { + "epoch": 29.6, + "learning_rate": 3.5198982660911596e-05, + "loss": 1.3655, + "step": 2805000 + }, + { + "epoch": 29.61, + "learning_rate": 3.519634433340017e-05, + "loss": 1.3239, + "step": 2805500 + }, + { + "epoch": 29.61, + "learning_rate": 3.519370600588875e-05, + "loss": 1.4165, + "step": 2806000 + }, + { + "epoch": 29.62, + "learning_rate": 3.519106767837733e-05, + "loss": 1.2905, + "step": 2806500 + }, + { + "epoch": 29.62, + "learning_rate": 3.5188429350865905e-05, + "loss": 1.3606, + "step": 2807000 + }, + { + "epoch": 29.63, + "learning_rate": 3.5185791023354474e-05, + "loss": 1.3332, + "step": 2807500 + }, + { + "epoch": 29.63, + "learning_rate": 3.518315269584305e-05, + "loss": 1.3255, + "step": 2808000 + }, + { + "epoch": 29.64, + "learning_rate": 3.518051436833163e-05, + "loss": 1.3502, + "step": 2808500 + }, + { + "epoch": 29.64, + "learning_rate": 3.517787604082021e-05, + "loss": 1.3264, + "step": 2809000 + }, + { + "epoch": 29.65, + "learning_rate": 3.5175237713308776e-05, + "loss": 1.3353, + "step": 2809500 + }, + { + "epoch": 29.65, + "learning_rate": 3.517259938579736e-05, + "loss": 1.3422, + "step": 2810000 + }, + { + "epoch": 29.66, + "learning_rate": 3.516996105828593e-05, + "loss": 1.3315, + "step": 2810500 + }, + { + "epoch": 29.67, + "learning_rate": 3.516732273077451e-05, + "loss": 1.3925, + "step": 2811000 + }, + { + "epoch": 29.67, + "learning_rate": 3.5164684403263084e-05, + "loss": 1.3545, + "step": 2811500 + }, + { + "epoch": 29.68, + "learning_rate": 3.516204607575166e-05, + "loss": 1.3639, + "step": 2812000 + }, + { + "epoch": 29.68, + "learning_rate": 3.5159407748240235e-05, + "loss": 1.3077, + "step": 2812500 + }, + { + "epoch": 29.69, + "learning_rate": 3.515676942072881e-05, + "loss": 1.3173, + "step": 2813000 + }, + { + "epoch": 29.69, + "learning_rate": 3.515413109321739e-05, + "loss": 1.3352, + "step": 2813500 + }, + { + "epoch": 29.7, + "learning_rate": 3.515149276570597e-05, + "loss": 1.3024, + "step": 2814000 + }, + { + "epoch": 29.7, + "learning_rate": 3.514885443819454e-05, + "loss": 1.326, + "step": 2814500 + }, + { + "epoch": 29.71, + "learning_rate": 3.514621611068312e-05, + "loss": 1.3533, + "step": 2815000 + }, + { + "epoch": 29.71, + "learning_rate": 3.5143577783171695e-05, + "loss": 1.3348, + "step": 2815500 + }, + { + "epoch": 29.72, + "learning_rate": 3.514093945566027e-05, + "loss": 1.3541, + "step": 2816000 + }, + { + "epoch": 29.72, + "learning_rate": 3.5138301128148846e-05, + "loss": 1.4262, + "step": 2816500 + }, + { + "epoch": 29.73, + "learning_rate": 3.513566280063742e-05, + "loss": 1.3119, + "step": 2817000 + }, + { + "epoch": 29.73, + "learning_rate": 3.5133024473126e-05, + "loss": 1.326, + "step": 2817500 + }, + { + "epoch": 29.74, + "learning_rate": 3.513038614561457e-05, + "loss": 1.3345, + "step": 2818000 + }, + { + "epoch": 29.74, + "learning_rate": 3.5127747818103155e-05, + "loss": 1.2885, + "step": 2818500 + }, + { + "epoch": 29.75, + "learning_rate": 3.512510949059172e-05, + "loss": 1.4033, + "step": 2819000 + }, + { + "epoch": 29.76, + "learning_rate": 3.51224711630803e-05, + "loss": 1.3776, + "step": 2819500 + }, + { + "epoch": 29.76, + "learning_rate": 3.511983283556888e-05, + "loss": 1.3405, + "step": 2820000 + }, + { + "epoch": 29.77, + "learning_rate": 3.5117194508057457e-05, + "loss": 1.3978, + "step": 2820500 + }, + { + "epoch": 29.77, + "learning_rate": 3.511455618054603e-05, + "loss": 1.3328, + "step": 2821000 + }, + { + "epoch": 29.78, + "learning_rate": 3.51119178530346e-05, + "loss": 1.3489, + "step": 2821500 + }, + { + "epoch": 29.78, + "learning_rate": 3.510927952552318e-05, + "loss": 1.2912, + "step": 2822000 + }, + { + "epoch": 29.79, + "learning_rate": 3.510664119801176e-05, + "loss": 1.3637, + "step": 2822500 + }, + { + "epoch": 29.79, + "learning_rate": 3.5104002870500334e-05, + "loss": 1.3329, + "step": 2823000 + }, + { + "epoch": 29.8, + "learning_rate": 3.510136454298891e-05, + "loss": 1.3858, + "step": 2823500 + }, + { + "epoch": 29.8, + "learning_rate": 3.5098726215477485e-05, + "loss": 1.3196, + "step": 2824000 + }, + { + "epoch": 29.81, + "learning_rate": 3.509608788796606e-05, + "loss": 1.3037, + "step": 2824500 + }, + { + "epoch": 29.81, + "learning_rate": 3.5093449560454636e-05, + "loss": 1.3302, + "step": 2825000 + }, + { + "epoch": 29.82, + "learning_rate": 3.509081123294322e-05, + "loss": 1.3284, + "step": 2825500 + }, + { + "epoch": 29.82, + "learning_rate": 3.508817290543179e-05, + "loss": 1.3746, + "step": 2826000 + }, + { + "epoch": 29.83, + "learning_rate": 3.508553457792036e-05, + "loss": 1.2937, + "step": 2826500 + }, + { + "epoch": 29.83, + "learning_rate": 3.5082896250408945e-05, + "loss": 1.2837, + "step": 2827000 + }, + { + "epoch": 29.84, + "learning_rate": 3.508025792289752e-05, + "loss": 1.3365, + "step": 2827500 + }, + { + "epoch": 29.84, + "learning_rate": 3.5077619595386096e-05, + "loss": 1.291, + "step": 2828000 + }, + { + "epoch": 29.85, + "learning_rate": 3.507498126787467e-05, + "loss": 1.2045, + "step": 2828500 + }, + { + "epoch": 29.86, + "learning_rate": 3.507234294036325e-05, + "loss": 1.2879, + "step": 2829000 + }, + { + "epoch": 29.86, + "learning_rate": 3.506970461285182e-05, + "loss": 1.351, + "step": 2829500 + }, + { + "epoch": 29.87, + "learning_rate": 3.50670662853404e-05, + "loss": 1.3518, + "step": 2830000 + }, + { + "epoch": 29.87, + "learning_rate": 3.506442795782898e-05, + "loss": 1.4041, + "step": 2830500 + }, + { + "epoch": 29.88, + "learning_rate": 3.506178963031755e-05, + "loss": 1.3528, + "step": 2831000 + }, + { + "epoch": 29.88, + "learning_rate": 3.5059151302806124e-05, + "loss": 1.3347, + "step": 2831500 + }, + { + "epoch": 29.89, + "learning_rate": 3.5056512975294706e-05, + "loss": 1.3821, + "step": 2832000 + }, + { + "epoch": 29.89, + "learning_rate": 3.505387464778328e-05, + "loss": 1.3287, + "step": 2832500 + }, + { + "epoch": 29.9, + "learning_rate": 3.505123632027186e-05, + "loss": 1.3165, + "step": 2833000 + }, + { + "epoch": 29.9, + "learning_rate": 3.5048597992760426e-05, + "loss": 1.365, + "step": 2833500 + }, + { + "epoch": 29.91, + "learning_rate": 3.504595966524901e-05, + "loss": 1.3363, + "step": 2834000 + }, + { + "epoch": 29.91, + "learning_rate": 3.5043321337737584e-05, + "loss": 1.3637, + "step": 2834500 + }, + { + "epoch": 29.92, + "learning_rate": 3.504068301022616e-05, + "loss": 1.327, + "step": 2835000 + }, + { + "epoch": 29.92, + "learning_rate": 3.5038044682714735e-05, + "loss": 1.343, + "step": 2835500 + }, + { + "epoch": 29.93, + "learning_rate": 3.503540635520331e-05, + "loss": 1.307, + "step": 2836000 + }, + { + "epoch": 29.93, + "learning_rate": 3.5032768027691886e-05, + "loss": 1.2977, + "step": 2836500 + }, + { + "epoch": 29.94, + "learning_rate": 3.503012970018046e-05, + "loss": 1.3526, + "step": 2837000 + }, + { + "epoch": 29.95, + "learning_rate": 3.5027491372669043e-05, + "loss": 1.3344, + "step": 2837500 + }, + { + "epoch": 29.95, + "learning_rate": 3.502485304515761e-05, + "loss": 1.3302, + "step": 2838000 + }, + { + "epoch": 29.96, + "learning_rate": 3.502221471764619e-05, + "loss": 1.3847, + "step": 2838500 + }, + { + "epoch": 29.96, + "learning_rate": 3.501957639013477e-05, + "loss": 1.3983, + "step": 2839000 + }, + { + "epoch": 29.97, + "learning_rate": 3.5016938062623345e-05, + "loss": 1.4041, + "step": 2839500 + }, + { + "epoch": 29.97, + "learning_rate": 3.501429973511192e-05, + "loss": 1.3696, + "step": 2840000 + }, + { + "epoch": 29.98, + "learning_rate": 3.5011661407600496e-05, + "loss": 1.2761, + "step": 2840500 + }, + { + "epoch": 29.98, + "learning_rate": 3.500902308008907e-05, + "loss": 1.3378, + "step": 2841000 + }, + { + "epoch": 29.99, + "learning_rate": 3.500638475257765e-05, + "loss": 1.3479, + "step": 2841500 + }, + { + "epoch": 29.99, + "learning_rate": 3.500374642506622e-05, + "loss": 1.3985, + "step": 2842000 + }, + { + "epoch": 30.0, + "learning_rate": 3.5001108097554805e-05, + "loss": 1.3263, + "step": 2842500 + }, + { + "epoch": 30.0, + "learning_rate": 3.4998469770043374e-05, + "loss": 1.3354, + "step": 2843000 + }, + { + "epoch": 30.01, + "learning_rate": 3.499583144253195e-05, + "loss": 1.297, + "step": 2843500 + }, + { + "epoch": 30.01, + "learning_rate": 3.499319311502053e-05, + "loss": 1.3673, + "step": 2844000 + }, + { + "epoch": 30.02, + "learning_rate": 3.499055478750911e-05, + "loss": 1.2911, + "step": 2844500 + }, + { + "epoch": 30.02, + "learning_rate": 3.4987916459997676e-05, + "loss": 1.3313, + "step": 2845000 + }, + { + "epoch": 30.03, + "learning_rate": 3.498527813248625e-05, + "loss": 1.3618, + "step": 2845500 + }, + { + "epoch": 30.03, + "learning_rate": 3.4982639804974834e-05, + "loss": 1.2773, + "step": 2846000 + }, + { + "epoch": 30.04, + "learning_rate": 3.498000147746341e-05, + "loss": 1.3124, + "step": 2846500 + }, + { + "epoch": 30.05, + "learning_rate": 3.4977363149951985e-05, + "loss": 1.3246, + "step": 2847000 + }, + { + "epoch": 30.05, + "learning_rate": 3.497472482244056e-05, + "loss": 1.365, + "step": 2847500 + }, + { + "epoch": 30.06, + "learning_rate": 3.4972086494929135e-05, + "loss": 1.3538, + "step": 2848000 + }, + { + "epoch": 30.06, + "learning_rate": 3.496944816741771e-05, + "loss": 1.3252, + "step": 2848500 + }, + { + "epoch": 30.07, + "learning_rate": 3.4966809839906286e-05, + "loss": 1.3673, + "step": 2849000 + }, + { + "epoch": 30.07, + "learning_rate": 3.496417151239487e-05, + "loss": 1.323, + "step": 2849500 + }, + { + "epoch": 30.08, + "learning_rate": 3.496153318488344e-05, + "loss": 1.3204, + "step": 2850000 + }, + { + "epoch": 30.08, + "learning_rate": 3.495889485737201e-05, + "loss": 1.3345, + "step": 2850500 + }, + { + "epoch": 30.09, + "learning_rate": 3.4956256529860595e-05, + "loss": 1.3032, + "step": 2851000 + }, + { + "epoch": 30.09, + "learning_rate": 3.495361820234917e-05, + "loss": 1.3146, + "step": 2851500 + }, + { + "epoch": 30.1, + "learning_rate": 3.4950979874837746e-05, + "loss": 1.2691, + "step": 2852000 + }, + { + "epoch": 30.1, + "learning_rate": 3.494834154732632e-05, + "loss": 1.2565, + "step": 2852500 + }, + { + "epoch": 30.11, + "learning_rate": 3.49457032198149e-05, + "loss": 1.338, + "step": 2853000 + }, + { + "epoch": 30.11, + "learning_rate": 3.494306489230347e-05, + "loss": 1.3606, + "step": 2853500 + }, + { + "epoch": 30.12, + "learning_rate": 3.494042656479205e-05, + "loss": 1.3348, + "step": 2854000 + }, + { + "epoch": 30.12, + "learning_rate": 3.4937788237280624e-05, + "loss": 1.3549, + "step": 2854500 + }, + { + "epoch": 30.13, + "learning_rate": 3.49351499097692e-05, + "loss": 1.3724, + "step": 2855000 + }, + { + "epoch": 30.13, + "learning_rate": 3.4932511582257775e-05, + "loss": 1.3598, + "step": 2855500 + }, + { + "epoch": 30.14, + "learning_rate": 3.492987325474636e-05, + "loss": 1.3069, + "step": 2856000 + }, + { + "epoch": 30.15, + "learning_rate": 3.492723492723493e-05, + "loss": 1.2927, + "step": 2856500 + }, + { + "epoch": 30.15, + "learning_rate": 3.49245965997235e-05, + "loss": 1.3274, + "step": 2857000 + }, + { + "epoch": 30.16, + "learning_rate": 3.4921958272212077e-05, + "loss": 1.3407, + "step": 2857500 + }, + { + "epoch": 30.16, + "learning_rate": 3.491931994470066e-05, + "loss": 1.3177, + "step": 2858000 + }, + { + "epoch": 30.17, + "learning_rate": 3.4916681617189234e-05, + "loss": 1.296, + "step": 2858500 + }, + { + "epoch": 30.17, + "learning_rate": 3.491404328967781e-05, + "loss": 1.3405, + "step": 2859000 + }, + { + "epoch": 30.18, + "learning_rate": 3.4911404962166385e-05, + "loss": 1.3365, + "step": 2859500 + }, + { + "epoch": 30.18, + "learning_rate": 3.490876663465496e-05, + "loss": 1.3517, + "step": 2860000 + }, + { + "epoch": 30.19, + "learning_rate": 3.4906128307143536e-05, + "loss": 1.3104, + "step": 2860500 + }, + { + "epoch": 30.19, + "learning_rate": 3.490348997963211e-05, + "loss": 1.3474, + "step": 2861000 + }, + { + "epoch": 30.2, + "learning_rate": 3.4900851652120694e-05, + "loss": 1.3436, + "step": 2861500 + }, + { + "epoch": 30.2, + "learning_rate": 3.489821332460926e-05, + "loss": 1.3208, + "step": 2862000 + }, + { + "epoch": 30.21, + "learning_rate": 3.489557499709784e-05, + "loss": 1.2606, + "step": 2862500 + }, + { + "epoch": 30.21, + "learning_rate": 3.489293666958642e-05, + "loss": 1.3293, + "step": 2863000 + }, + { + "epoch": 30.22, + "learning_rate": 3.4890298342074996e-05, + "loss": 1.3745, + "step": 2863500 + }, + { + "epoch": 30.22, + "learning_rate": 3.4887660014563565e-05, + "loss": 1.3078, + "step": 2864000 + }, + { + "epoch": 30.23, + "learning_rate": 3.488502168705215e-05, + "loss": 1.3446, + "step": 2864500 + }, + { + "epoch": 30.24, + "learning_rate": 3.488238335954072e-05, + "loss": 1.3682, + "step": 2865000 + }, + { + "epoch": 30.24, + "learning_rate": 3.48797450320293e-05, + "loss": 1.3014, + "step": 2865500 + }, + { + "epoch": 30.25, + "learning_rate": 3.487710670451787e-05, + "loss": 1.3258, + "step": 2866000 + }, + { + "epoch": 30.25, + "learning_rate": 3.487446837700645e-05, + "loss": 1.309, + "step": 2866500 + }, + { + "epoch": 30.26, + "learning_rate": 3.4871830049495024e-05, + "loss": 1.271, + "step": 2867000 + }, + { + "epoch": 30.26, + "learning_rate": 3.48691917219836e-05, + "loss": 1.4101, + "step": 2867500 + }, + { + "epoch": 30.27, + "learning_rate": 3.486655339447218e-05, + "loss": 1.3216, + "step": 2868000 + }, + { + "epoch": 30.27, + "learning_rate": 3.486391506696076e-05, + "loss": 1.3255, + "step": 2868500 + }, + { + "epoch": 30.28, + "learning_rate": 3.4861276739449326e-05, + "loss": 1.3041, + "step": 2869000 + }, + { + "epoch": 30.28, + "learning_rate": 3.48586384119379e-05, + "loss": 1.3606, + "step": 2869500 + }, + { + "epoch": 30.29, + "learning_rate": 3.4856000084426484e-05, + "loss": 1.3926, + "step": 2870000 + }, + { + "epoch": 30.29, + "learning_rate": 3.485336175691506e-05, + "loss": 1.3274, + "step": 2870500 + }, + { + "epoch": 30.3, + "learning_rate": 3.4850723429403635e-05, + "loss": 1.3579, + "step": 2871000 + }, + { + "epoch": 30.3, + "learning_rate": 3.484808510189221e-05, + "loss": 1.3663, + "step": 2871500 + }, + { + "epoch": 30.31, + "learning_rate": 3.4845446774380786e-05, + "loss": 1.3301, + "step": 2872000 + }, + { + "epoch": 30.31, + "learning_rate": 3.484280844686936e-05, + "loss": 1.3285, + "step": 2872500 + }, + { + "epoch": 30.32, + "learning_rate": 3.484017011935794e-05, + "loss": 1.3695, + "step": 2873000 + }, + { + "epoch": 30.32, + "learning_rate": 3.483753179184651e-05, + "loss": 1.3494, + "step": 2873500 + }, + { + "epoch": 30.33, + "learning_rate": 3.483489346433509e-05, + "loss": 1.3069, + "step": 2874000 + }, + { + "epoch": 30.34, + "learning_rate": 3.4832255136823663e-05, + "loss": 1.3265, + "step": 2874500 + }, + { + "epoch": 30.34, + "learning_rate": 3.4829616809312246e-05, + "loss": 1.3155, + "step": 2875000 + }, + { + "epoch": 30.35, + "learning_rate": 3.482697848180082e-05, + "loss": 1.3108, + "step": 2875500 + }, + { + "epoch": 30.35, + "learning_rate": 3.482434015428939e-05, + "loss": 1.355, + "step": 2876000 + }, + { + "epoch": 30.36, + "learning_rate": 3.482170182677797e-05, + "loss": 1.3217, + "step": 2876500 + }, + { + "epoch": 30.36, + "learning_rate": 3.481906349926655e-05, + "loss": 1.3268, + "step": 2877000 + }, + { + "epoch": 30.37, + "learning_rate": 3.481642517175512e-05, + "loss": 1.3192, + "step": 2877500 + }, + { + "epoch": 30.37, + "learning_rate": 3.48137868442437e-05, + "loss": 1.3787, + "step": 2878000 + }, + { + "epoch": 30.38, + "learning_rate": 3.4811148516732274e-05, + "loss": 1.317, + "step": 2878500 + }, + { + "epoch": 30.38, + "learning_rate": 3.480851018922085e-05, + "loss": 1.3115, + "step": 2879000 + }, + { + "epoch": 30.39, + "learning_rate": 3.4805871861709425e-05, + "loss": 1.3282, + "step": 2879500 + }, + { + "epoch": 30.39, + "learning_rate": 3.480323353419801e-05, + "loss": 1.3542, + "step": 2880000 + }, + { + "epoch": 30.4, + "learning_rate": 3.480059520668658e-05, + "loss": 1.3346, + "step": 2880500 + }, + { + "epoch": 30.4, + "learning_rate": 3.479795687917515e-05, + "loss": 1.331, + "step": 2881000 + }, + { + "epoch": 30.41, + "learning_rate": 3.479531855166373e-05, + "loss": 1.3419, + "step": 2881500 + }, + { + "epoch": 30.41, + "learning_rate": 3.479268022415231e-05, + "loss": 1.3342, + "step": 2882000 + }, + { + "epoch": 30.42, + "learning_rate": 3.4790041896640885e-05, + "loss": 1.3467, + "step": 2882500 + }, + { + "epoch": 30.43, + "learning_rate": 3.4787403569129453e-05, + "loss": 1.3529, + "step": 2883000 + }, + { + "epoch": 30.43, + "learning_rate": 3.4784765241618036e-05, + "loss": 1.3166, + "step": 2883500 + }, + { + "epoch": 30.44, + "learning_rate": 3.478212691410661e-05, + "loss": 1.3354, + "step": 2884000 + }, + { + "epoch": 30.44, + "learning_rate": 3.477948858659519e-05, + "loss": 1.3521, + "step": 2884500 + }, + { + "epoch": 30.45, + "learning_rate": 3.477685025908376e-05, + "loss": 1.2843, + "step": 2885000 + }, + { + "epoch": 30.45, + "learning_rate": 3.477421193157234e-05, + "loss": 1.3195, + "step": 2885500 + }, + { + "epoch": 30.46, + "learning_rate": 3.477157360406091e-05, + "loss": 1.3785, + "step": 2886000 + }, + { + "epoch": 30.46, + "learning_rate": 3.476893527654949e-05, + "loss": 1.3603, + "step": 2886500 + }, + { + "epoch": 30.47, + "learning_rate": 3.476629694903807e-05, + "loss": 1.3453, + "step": 2887000 + }, + { + "epoch": 30.47, + "learning_rate": 3.4763658621526646e-05, + "loss": 1.3165, + "step": 2887500 + }, + { + "epoch": 30.48, + "learning_rate": 3.4761020294015215e-05, + "loss": 1.3622, + "step": 2888000 + }, + { + "epoch": 30.48, + "learning_rate": 3.47583819665038e-05, + "loss": 1.2965, + "step": 2888500 + }, + { + "epoch": 30.49, + "learning_rate": 3.475574363899237e-05, + "loss": 1.3279, + "step": 2889000 + }, + { + "epoch": 30.49, + "learning_rate": 3.475310531148095e-05, + "loss": 1.3035, + "step": 2889500 + }, + { + "epoch": 30.5, + "learning_rate": 3.4750466983969524e-05, + "loss": 1.3706, + "step": 2890000 + }, + { + "epoch": 30.5, + "learning_rate": 3.47478286564581e-05, + "loss": 1.3035, + "step": 2890500 + }, + { + "epoch": 30.51, + "learning_rate": 3.4745190328946675e-05, + "loss": 1.3108, + "step": 2891000 + }, + { + "epoch": 30.51, + "learning_rate": 3.474255200143525e-05, + "loss": 1.3542, + "step": 2891500 + }, + { + "epoch": 30.52, + "learning_rate": 3.473991367392383e-05, + "loss": 1.2968, + "step": 2892000 + }, + { + "epoch": 30.53, + "learning_rate": 3.47372753464124e-05, + "loss": 1.2883, + "step": 2892500 + }, + { + "epoch": 30.53, + "learning_rate": 3.473463701890098e-05, + "loss": 1.344, + "step": 2893000 + }, + { + "epoch": 30.54, + "learning_rate": 3.473199869138956e-05, + "loss": 1.3019, + "step": 2893500 + }, + { + "epoch": 30.54, + "learning_rate": 3.4729360363878135e-05, + "loss": 1.2628, + "step": 2894000 + }, + { + "epoch": 30.55, + "learning_rate": 3.472672203636671e-05, + "loss": 1.2929, + "step": 2894500 + }, + { + "epoch": 30.55, + "learning_rate": 3.472408370885528e-05, + "loss": 1.3142, + "step": 2895000 + }, + { + "epoch": 30.56, + "learning_rate": 3.472144538134386e-05, + "loss": 1.3495, + "step": 2895500 + }, + { + "epoch": 30.56, + "learning_rate": 3.4718807053832436e-05, + "loss": 1.3095, + "step": 2896000 + }, + { + "epoch": 30.57, + "learning_rate": 3.471616872632101e-05, + "loss": 1.3274, + "step": 2896500 + }, + { + "epoch": 30.57, + "learning_rate": 3.471353039880959e-05, + "loss": 1.3288, + "step": 2897000 + }, + { + "epoch": 30.58, + "learning_rate": 3.471089207129816e-05, + "loss": 1.2937, + "step": 2897500 + }, + { + "epoch": 30.58, + "learning_rate": 3.470825374378674e-05, + "loss": 1.2975, + "step": 2898000 + }, + { + "epoch": 30.59, + "learning_rate": 3.4705615416275314e-05, + "loss": 1.3696, + "step": 2898500 + }, + { + "epoch": 30.59, + "learning_rate": 3.4702977088763896e-05, + "loss": 1.3258, + "step": 2899000 + }, + { + "epoch": 30.6, + "learning_rate": 3.470033876125247e-05, + "loss": 1.2951, + "step": 2899500 + }, + { + "epoch": 30.6, + "learning_rate": 3.469770043374104e-05, + "loss": 1.3627, + "step": 2900000 + }, + { + "epoch": 30.61, + "learning_rate": 3.469506210622962e-05, + "loss": 1.338, + "step": 2900500 + }, + { + "epoch": 30.62, + "learning_rate": 3.46924237787182e-05, + "loss": 1.3357, + "step": 2901000 + }, + { + "epoch": 30.62, + "learning_rate": 3.4689785451206774e-05, + "loss": 1.3606, + "step": 2901500 + }, + { + "epoch": 30.63, + "learning_rate": 3.468714712369535e-05, + "loss": 1.2858, + "step": 2902000 + }, + { + "epoch": 30.63, + "learning_rate": 3.4684508796183925e-05, + "loss": 1.3089, + "step": 2902500 + }, + { + "epoch": 30.64, + "learning_rate": 3.46818704686725e-05, + "loss": 1.3152, + "step": 2903000 + }, + { + "epoch": 30.64, + "learning_rate": 3.4679232141161076e-05, + "loss": 1.3097, + "step": 2903500 + }, + { + "epoch": 30.65, + "learning_rate": 3.467659381364966e-05, + "loss": 1.3026, + "step": 2904000 + }, + { + "epoch": 30.65, + "learning_rate": 3.4673955486138227e-05, + "loss": 1.3191, + "step": 2904500 + }, + { + "epoch": 30.66, + "learning_rate": 3.46713171586268e-05, + "loss": 1.2856, + "step": 2905000 + }, + { + "epoch": 30.66, + "learning_rate": 3.4668678831115384e-05, + "loss": 1.3313, + "step": 2905500 + }, + { + "epoch": 30.67, + "learning_rate": 3.466604050360396e-05, + "loss": 1.3477, + "step": 2906000 + }, + { + "epoch": 30.67, + "learning_rate": 3.4663402176092535e-05, + "loss": 1.3585, + "step": 2906500 + }, + { + "epoch": 30.68, + "learning_rate": 3.4660763848581104e-05, + "loss": 1.2758, + "step": 2907000 + }, + { + "epoch": 30.68, + "learning_rate": 3.4658125521069686e-05, + "loss": 1.3542, + "step": 2907500 + }, + { + "epoch": 30.69, + "learning_rate": 3.465548719355826e-05, + "loss": 1.3456, + "step": 2908000 + }, + { + "epoch": 30.69, + "learning_rate": 3.465284886604684e-05, + "loss": 1.3215, + "step": 2908500 + }, + { + "epoch": 30.7, + "learning_rate": 3.465021053853541e-05, + "loss": 1.3375, + "step": 2909000 + }, + { + "epoch": 30.7, + "learning_rate": 3.464757221102399e-05, + "loss": 1.3852, + "step": 2909500 + }, + { + "epoch": 30.71, + "learning_rate": 3.4644933883512564e-05, + "loss": 1.3661, + "step": 2910000 + }, + { + "epoch": 30.72, + "learning_rate": 3.464229555600114e-05, + "loss": 1.2937, + "step": 2910500 + }, + { + "epoch": 30.72, + "learning_rate": 3.463965722848972e-05, + "loss": 1.3217, + "step": 2911000 + }, + { + "epoch": 30.73, + "learning_rate": 3.463701890097829e-05, + "loss": 1.3579, + "step": 2911500 + }, + { + "epoch": 30.73, + "learning_rate": 3.4634380573466866e-05, + "loss": 1.3153, + "step": 2912000 + }, + { + "epoch": 30.74, + "learning_rate": 3.463174224595545e-05, + "loss": 1.3653, + "step": 2912500 + }, + { + "epoch": 30.74, + "learning_rate": 3.462910391844402e-05, + "loss": 1.3174, + "step": 2913000 + }, + { + "epoch": 30.75, + "learning_rate": 3.46264655909326e-05, + "loss": 1.2952, + "step": 2913500 + }, + { + "epoch": 30.75, + "learning_rate": 3.4623827263421174e-05, + "loss": 1.2989, + "step": 2914000 + }, + { + "epoch": 30.76, + "learning_rate": 3.462118893590975e-05, + "loss": 1.2874, + "step": 2914500 + }, + { + "epoch": 30.76, + "learning_rate": 3.4618550608398325e-05, + "loss": 1.3298, + "step": 2915000 + }, + { + "epoch": 30.77, + "learning_rate": 3.46159122808869e-05, + "loss": 1.3423, + "step": 2915500 + }, + { + "epoch": 30.77, + "learning_rate": 3.461327395337548e-05, + "loss": 1.3617, + "step": 2916000 + }, + { + "epoch": 30.78, + "learning_rate": 3.461063562586405e-05, + "loss": 1.3311, + "step": 2916500 + }, + { + "epoch": 30.78, + "learning_rate": 3.460799729835263e-05, + "loss": 1.3634, + "step": 2917000 + }, + { + "epoch": 30.79, + "learning_rate": 3.460535897084121e-05, + "loss": 1.3637, + "step": 2917500 + }, + { + "epoch": 30.79, + "learning_rate": 3.4602720643329785e-05, + "loss": 1.3869, + "step": 2918000 + }, + { + "epoch": 30.8, + "learning_rate": 3.460008231581836e-05, + "loss": 1.2896, + "step": 2918500 + }, + { + "epoch": 30.81, + "learning_rate": 3.459744398830693e-05, + "loss": 1.2858, + "step": 2919000 + }, + { + "epoch": 30.81, + "learning_rate": 3.459480566079551e-05, + "loss": 1.2768, + "step": 2919500 + }, + { + "epoch": 30.82, + "learning_rate": 3.459216733328409e-05, + "loss": 1.3199, + "step": 2920000 + }, + { + "epoch": 30.82, + "learning_rate": 3.458952900577266e-05, + "loss": 1.3491, + "step": 2920500 + }, + { + "epoch": 30.83, + "learning_rate": 3.458689067826124e-05, + "loss": 1.333, + "step": 2921000 + }, + { + "epoch": 30.83, + "learning_rate": 3.4584252350749813e-05, + "loss": 1.3955, + "step": 2921500 + }, + { + "epoch": 30.84, + "learning_rate": 3.458161402323839e-05, + "loss": 1.3572, + "step": 2922000 + }, + { + "epoch": 30.84, + "learning_rate": 3.4578975695726964e-05, + "loss": 1.3583, + "step": 2922500 + }, + { + "epoch": 30.85, + "learning_rate": 3.457633736821555e-05, + "loss": 1.3276, + "step": 2923000 + }, + { + "epoch": 30.85, + "learning_rate": 3.4573699040704115e-05, + "loss": 1.3544, + "step": 2923500 + }, + { + "epoch": 30.86, + "learning_rate": 3.457106071319269e-05, + "loss": 1.3462, + "step": 2924000 + }, + { + "epoch": 30.86, + "learning_rate": 3.456842238568127e-05, + "loss": 1.3738, + "step": 2924500 + }, + { + "epoch": 30.87, + "learning_rate": 3.456578405816985e-05, + "loss": 1.2898, + "step": 2925000 + }, + { + "epoch": 30.87, + "learning_rate": 3.4563145730658424e-05, + "loss": 1.2812, + "step": 2925500 + }, + { + "epoch": 30.88, + "learning_rate": 3.4560507403147e-05, + "loss": 1.2996, + "step": 2926000 + }, + { + "epoch": 30.88, + "learning_rate": 3.4557869075635575e-05, + "loss": 1.3454, + "step": 2926500 + }, + { + "epoch": 30.89, + "learning_rate": 3.455523074812415e-05, + "loss": 1.3464, + "step": 2927000 + }, + { + "epoch": 30.89, + "learning_rate": 3.4552592420612726e-05, + "loss": 1.3269, + "step": 2927500 + }, + { + "epoch": 30.9, + "learning_rate": 3.454995409310131e-05, + "loss": 1.3106, + "step": 2928000 + }, + { + "epoch": 30.91, + "learning_rate": 3.454731576558988e-05, + "loss": 1.3222, + "step": 2928500 + }, + { + "epoch": 30.91, + "learning_rate": 3.454467743807845e-05, + "loss": 1.3829, + "step": 2929000 + }, + { + "epoch": 30.92, + "learning_rate": 3.4542039110567035e-05, + "loss": 1.2961, + "step": 2929500 + }, + { + "epoch": 30.92, + "learning_rate": 3.453940078305561e-05, + "loss": 1.2825, + "step": 2930000 + }, + { + "epoch": 30.93, + "learning_rate": 3.453676245554418e-05, + "loss": 1.3756, + "step": 2930500 + }, + { + "epoch": 30.93, + "learning_rate": 3.4534124128032754e-05, + "loss": 1.351, + "step": 2931000 + }, + { + "epoch": 30.94, + "learning_rate": 3.453148580052134e-05, + "loss": 1.3243, + "step": 2931500 + }, + { + "epoch": 30.94, + "learning_rate": 3.452884747300991e-05, + "loss": 1.3259, + "step": 2932000 + }, + { + "epoch": 30.95, + "learning_rate": 3.452620914549849e-05, + "loss": 1.279, + "step": 2932500 + }, + { + "epoch": 30.95, + "learning_rate": 3.452357081798706e-05, + "loss": 1.3576, + "step": 2933000 + }, + { + "epoch": 30.96, + "learning_rate": 3.452093249047564e-05, + "loss": 1.309, + "step": 2933500 + }, + { + "epoch": 30.96, + "learning_rate": 3.4518294162964214e-05, + "loss": 1.3545, + "step": 2934000 + }, + { + "epoch": 30.97, + "learning_rate": 3.451565583545279e-05, + "loss": 1.2933, + "step": 2934500 + }, + { + "epoch": 30.97, + "learning_rate": 3.451301750794137e-05, + "loss": 1.3522, + "step": 2935000 + }, + { + "epoch": 30.98, + "learning_rate": 3.451037918042994e-05, + "loss": 1.3658, + "step": 2935500 + }, + { + "epoch": 30.98, + "learning_rate": 3.4507740852918516e-05, + "loss": 1.3539, + "step": 2936000 + }, + { + "epoch": 30.99, + "learning_rate": 3.45051025254071e-05, + "loss": 1.3368, + "step": 2936500 + }, + { + "epoch": 31.0, + "learning_rate": 3.4502464197895674e-05, + "loss": 1.3163, + "step": 2937000 + }, + { + "epoch": 31.0, + "learning_rate": 3.449982587038425e-05, + "loss": 1.2974, + "step": 2937500 + }, + { + "epoch": 31.01, + "learning_rate": 3.4497187542872825e-05, + "loss": 1.308, + "step": 2938000 + }, + { + "epoch": 31.01, + "learning_rate": 3.44945492153614e-05, + "loss": 1.3159, + "step": 2938500 + }, + { + "epoch": 31.02, + "learning_rate": 3.4491910887849976e-05, + "loss": 1.339, + "step": 2939000 + }, + { + "epoch": 31.02, + "learning_rate": 3.448927256033855e-05, + "loss": 1.325, + "step": 2939500 + }, + { + "epoch": 31.03, + "learning_rate": 3.448663423282713e-05, + "loss": 1.2808, + "step": 2940000 + }, + { + "epoch": 31.03, + "learning_rate": 3.44839959053157e-05, + "loss": 1.3359, + "step": 2940500 + }, + { + "epoch": 31.04, + "learning_rate": 3.448135757780428e-05, + "loss": 1.3107, + "step": 2941000 + }, + { + "epoch": 31.04, + "learning_rate": 3.447871925029286e-05, + "loss": 1.297, + "step": 2941500 + }, + { + "epoch": 31.05, + "learning_rate": 3.4476080922781436e-05, + "loss": 1.2868, + "step": 2942000 + }, + { + "epoch": 31.05, + "learning_rate": 3.4473442595270004e-05, + "loss": 1.3611, + "step": 2942500 + }, + { + "epoch": 31.06, + "learning_rate": 3.447080426775858e-05, + "loss": 1.3521, + "step": 2943000 + }, + { + "epoch": 31.06, + "learning_rate": 3.446816594024716e-05, + "loss": 1.2947, + "step": 2943500 + }, + { + "epoch": 31.07, + "learning_rate": 3.446552761273574e-05, + "loss": 1.3434, + "step": 2944000 + }, + { + "epoch": 31.07, + "learning_rate": 3.446288928522431e-05, + "loss": 1.2927, + "step": 2944500 + }, + { + "epoch": 31.08, + "learning_rate": 3.446025095771289e-05, + "loss": 1.2569, + "step": 2945000 + }, + { + "epoch": 31.08, + "learning_rate": 3.4457612630201464e-05, + "loss": 1.3203, + "step": 2945500 + }, + { + "epoch": 31.09, + "learning_rate": 3.445497430269004e-05, + "loss": 1.3287, + "step": 2946000 + }, + { + "epoch": 31.1, + "learning_rate": 3.4452335975178615e-05, + "loss": 1.3026, + "step": 2946500 + }, + { + "epoch": 31.1, + "learning_rate": 3.44496976476672e-05, + "loss": 1.3099, + "step": 2947000 + }, + { + "epoch": 31.11, + "learning_rate": 3.4447059320155766e-05, + "loss": 1.3329, + "step": 2947500 + }, + { + "epoch": 31.11, + "learning_rate": 3.444442099264434e-05, + "loss": 1.3313, + "step": 2948000 + }, + { + "epoch": 31.12, + "learning_rate": 3.4441782665132924e-05, + "loss": 1.3335, + "step": 2948500 + }, + { + "epoch": 31.12, + "learning_rate": 3.44391443376215e-05, + "loss": 1.2974, + "step": 2949000 + }, + { + "epoch": 31.13, + "learning_rate": 3.443650601011007e-05, + "loss": 1.2974, + "step": 2949500 + }, + { + "epoch": 31.13, + "learning_rate": 3.443386768259865e-05, + "loss": 1.3266, + "step": 2950000 + }, + { + "epoch": 31.14, + "learning_rate": 3.4431229355087226e-05, + "loss": 1.3037, + "step": 2950500 + }, + { + "epoch": 31.14, + "learning_rate": 3.44285910275758e-05, + "loss": 1.3332, + "step": 2951000 + }, + { + "epoch": 31.15, + "learning_rate": 3.4425952700064377e-05, + "loss": 1.3212, + "step": 2951500 + }, + { + "epoch": 31.15, + "learning_rate": 3.442331437255295e-05, + "loss": 1.3523, + "step": 2952000 + }, + { + "epoch": 31.16, + "learning_rate": 3.442067604504153e-05, + "loss": 1.2899, + "step": 2952500 + }, + { + "epoch": 31.16, + "learning_rate": 3.44180377175301e-05, + "loss": 1.2977, + "step": 2953000 + }, + { + "epoch": 31.17, + "learning_rate": 3.4415399390018685e-05, + "loss": 1.3376, + "step": 2953500 + }, + { + "epoch": 31.17, + "learning_rate": 3.441276106250726e-05, + "loss": 1.3528, + "step": 2954000 + }, + { + "epoch": 31.18, + "learning_rate": 3.441012273499583e-05, + "loss": 1.2765, + "step": 2954500 + }, + { + "epoch": 31.19, + "learning_rate": 3.4407484407484405e-05, + "loss": 1.2993, + "step": 2955000 + }, + { + "epoch": 31.19, + "learning_rate": 3.440484607997299e-05, + "loss": 1.3049, + "step": 2955500 + }, + { + "epoch": 31.2, + "learning_rate": 3.440220775246156e-05, + "loss": 1.3373, + "step": 2956000 + }, + { + "epoch": 31.2, + "learning_rate": 3.439956942495014e-05, + "loss": 1.2966, + "step": 2956500 + }, + { + "epoch": 31.21, + "learning_rate": 3.4396931097438714e-05, + "loss": 1.3182, + "step": 2957000 + }, + { + "epoch": 31.21, + "learning_rate": 3.439429276992729e-05, + "loss": 1.3135, + "step": 2957500 + }, + { + "epoch": 31.22, + "learning_rate": 3.4391654442415865e-05, + "loss": 1.3739, + "step": 2958000 + }, + { + "epoch": 31.22, + "learning_rate": 3.438901611490444e-05, + "loss": 1.3245, + "step": 2958500 + }, + { + "epoch": 31.23, + "learning_rate": 3.4386377787393016e-05, + "loss": 1.3188, + "step": 2959000 + }, + { + "epoch": 31.23, + "learning_rate": 3.438373945988159e-05, + "loss": 1.3596, + "step": 2959500 + }, + { + "epoch": 31.24, + "learning_rate": 3.4381101132370167e-05, + "loss": 1.3569, + "step": 2960000 + }, + { + "epoch": 31.24, + "learning_rate": 3.437846280485875e-05, + "loss": 1.2994, + "step": 2960500 + }, + { + "epoch": 31.25, + "learning_rate": 3.4375824477347324e-05, + "loss": 1.3253, + "step": 2961000 + }, + { + "epoch": 31.25, + "learning_rate": 3.437318614983589e-05, + "loss": 1.2827, + "step": 2961500 + }, + { + "epoch": 31.26, + "learning_rate": 3.4370547822324475e-05, + "loss": 1.2817, + "step": 2962000 + }, + { + "epoch": 31.26, + "learning_rate": 3.436790949481305e-05, + "loss": 1.3932, + "step": 2962500 + }, + { + "epoch": 31.27, + "learning_rate": 3.4365271167301626e-05, + "loss": 1.2973, + "step": 2963000 + }, + { + "epoch": 31.27, + "learning_rate": 3.43626328397902e-05, + "loss": 1.3379, + "step": 2963500 + }, + { + "epoch": 31.28, + "learning_rate": 3.435999451227878e-05, + "loss": 1.3333, + "step": 2964000 + }, + { + "epoch": 31.29, + "learning_rate": 3.435735618476735e-05, + "loss": 1.3082, + "step": 2964500 + }, + { + "epoch": 31.29, + "learning_rate": 3.435471785725593e-05, + "loss": 1.334, + "step": 2965000 + }, + { + "epoch": 31.3, + "learning_rate": 3.435207952974451e-05, + "loss": 1.3176, + "step": 2965500 + }, + { + "epoch": 31.3, + "learning_rate": 3.4349441202233086e-05, + "loss": 1.3792, + "step": 2966000 + }, + { + "epoch": 31.31, + "learning_rate": 3.4346802874721655e-05, + "loss": 1.3113, + "step": 2966500 + }, + { + "epoch": 31.31, + "learning_rate": 3.434416454721023e-05, + "loss": 1.3345, + "step": 2967000 + }, + { + "epoch": 31.32, + "learning_rate": 3.434152621969881e-05, + "loss": 1.3111, + "step": 2967500 + }, + { + "epoch": 31.32, + "learning_rate": 3.433888789218739e-05, + "loss": 1.2871, + "step": 2968000 + }, + { + "epoch": 31.33, + "learning_rate": 3.433624956467596e-05, + "loss": 1.358, + "step": 2968500 + }, + { + "epoch": 31.33, + "learning_rate": 3.433361123716454e-05, + "loss": 1.3116, + "step": 2969000 + }, + { + "epoch": 31.34, + "learning_rate": 3.4330972909653114e-05, + "loss": 1.3022, + "step": 2969500 + }, + { + "epoch": 31.34, + "learning_rate": 3.432833458214169e-05, + "loss": 1.2764, + "step": 2970000 + }, + { + "epoch": 31.35, + "learning_rate": 3.4325696254630265e-05, + "loss": 1.3061, + "step": 2970500 + }, + { + "epoch": 31.35, + "learning_rate": 3.432305792711884e-05, + "loss": 1.3034, + "step": 2971000 + }, + { + "epoch": 31.36, + "learning_rate": 3.4320419599607416e-05, + "loss": 1.3481, + "step": 2971500 + }, + { + "epoch": 31.36, + "learning_rate": 3.431778127209599e-05, + "loss": 1.3153, + "step": 2972000 + }, + { + "epoch": 31.37, + "learning_rate": 3.4315142944584574e-05, + "loss": 1.3302, + "step": 2972500 + }, + { + "epoch": 31.37, + "learning_rate": 3.431250461707315e-05, + "loss": 1.3583, + "step": 2973000 + }, + { + "epoch": 31.38, + "learning_rate": 3.430986628956172e-05, + "loss": 1.3437, + "step": 2973500 + }, + { + "epoch": 31.39, + "learning_rate": 3.43072279620503e-05, + "loss": 1.3757, + "step": 2974000 + }, + { + "epoch": 31.39, + "learning_rate": 3.4304589634538876e-05, + "loss": 1.3672, + "step": 2974500 + }, + { + "epoch": 31.4, + "learning_rate": 3.430195130702745e-05, + "loss": 1.3432, + "step": 2975000 + }, + { + "epoch": 31.4, + "learning_rate": 3.429931297951603e-05, + "loss": 1.3246, + "step": 2975500 + }, + { + "epoch": 31.41, + "learning_rate": 3.42966746520046e-05, + "loss": 1.2494, + "step": 2976000 + }, + { + "epoch": 31.41, + "learning_rate": 3.429403632449318e-05, + "loss": 1.3153, + "step": 2976500 + }, + { + "epoch": 31.42, + "learning_rate": 3.4291397996981753e-05, + "loss": 1.3191, + "step": 2977000 + }, + { + "epoch": 31.42, + "learning_rate": 3.4288759669470336e-05, + "loss": 1.3546, + "step": 2977500 + }, + { + "epoch": 31.43, + "learning_rate": 3.4286121341958904e-05, + "loss": 1.2933, + "step": 2978000 + }, + { + "epoch": 31.43, + "learning_rate": 3.428348301444748e-05, + "loss": 1.3156, + "step": 2978500 + }, + { + "epoch": 31.44, + "learning_rate": 3.428084468693606e-05, + "loss": 1.3106, + "step": 2979000 + }, + { + "epoch": 31.44, + "learning_rate": 3.427820635942464e-05, + "loss": 1.3104, + "step": 2979500 + }, + { + "epoch": 31.45, + "learning_rate": 3.427556803191321e-05, + "loss": 1.4219, + "step": 2980000 + }, + { + "epoch": 31.45, + "learning_rate": 3.427292970440178e-05, + "loss": 1.4534, + "step": 2980500 + }, + { + "epoch": 31.46, + "learning_rate": 3.4270291376890364e-05, + "loss": 1.2592, + "step": 2981000 + }, + { + "epoch": 31.46, + "learning_rate": 3.426765304937894e-05, + "loss": 1.3483, + "step": 2981500 + }, + { + "epoch": 31.47, + "learning_rate": 3.4265014721867515e-05, + "loss": 1.3004, + "step": 2982000 + }, + { + "epoch": 31.48, + "learning_rate": 3.426237639435609e-05, + "loss": 1.3185, + "step": 2982500 + }, + { + "epoch": 31.48, + "learning_rate": 3.4259738066844666e-05, + "loss": 1.3617, + "step": 2983000 + }, + { + "epoch": 31.49, + "learning_rate": 3.425709973933324e-05, + "loss": 1.3282, + "step": 2983500 + }, + { + "epoch": 31.49, + "learning_rate": 3.425446141182182e-05, + "loss": 1.3373, + "step": 2984000 + }, + { + "epoch": 31.5, + "learning_rate": 3.42518230843104e-05, + "loss": 1.2853, + "step": 2984500 + }, + { + "epoch": 31.5, + "learning_rate": 3.4249184756798975e-05, + "loss": 1.3133, + "step": 2985000 + }, + { + "epoch": 31.51, + "learning_rate": 3.4246546429287544e-05, + "loss": 1.3601, + "step": 2985500 + }, + { + "epoch": 31.51, + "learning_rate": 3.4243908101776126e-05, + "loss": 1.3133, + "step": 2986000 + }, + { + "epoch": 31.52, + "learning_rate": 3.42412697742647e-05, + "loss": 1.3055, + "step": 2986500 + }, + { + "epoch": 31.52, + "learning_rate": 3.423863144675328e-05, + "loss": 1.2773, + "step": 2987000 + }, + { + "epoch": 31.53, + "learning_rate": 3.423599311924185e-05, + "loss": 1.3604, + "step": 2987500 + }, + { + "epoch": 31.53, + "learning_rate": 3.423335479173043e-05, + "loss": 1.3502, + "step": 2988000 + }, + { + "epoch": 31.54, + "learning_rate": 3.4230716464219e-05, + "loss": 1.3072, + "step": 2988500 + }, + { + "epoch": 31.54, + "learning_rate": 3.422807813670758e-05, + "loss": 1.3283, + "step": 2989000 + }, + { + "epoch": 31.55, + "learning_rate": 3.422543980919616e-05, + "loss": 1.356, + "step": 2989500 + }, + { + "epoch": 31.55, + "learning_rate": 3.422280148168473e-05, + "loss": 1.3593, + "step": 2990000 + }, + { + "epoch": 31.56, + "learning_rate": 3.4220163154173305e-05, + "loss": 1.2088, + "step": 2990500 + }, + { + "epoch": 31.56, + "learning_rate": 3.421752482666189e-05, + "loss": 1.3283, + "step": 2991000 + }, + { + "epoch": 31.57, + "learning_rate": 3.421488649915046e-05, + "loss": 1.3149, + "step": 2991500 + }, + { + "epoch": 31.58, + "learning_rate": 3.421224817163904e-05, + "loss": 1.3603, + "step": 2992000 + }, + { + "epoch": 31.58, + "learning_rate": 3.420960984412761e-05, + "loss": 1.3524, + "step": 2992500 + }, + { + "epoch": 31.59, + "learning_rate": 3.420697151661619e-05, + "loss": 1.2835, + "step": 2993000 + }, + { + "epoch": 31.59, + "learning_rate": 3.4204333189104765e-05, + "loss": 1.2831, + "step": 2993500 + }, + { + "epoch": 31.6, + "learning_rate": 3.420169486159334e-05, + "loss": 1.2941, + "step": 2994000 + }, + { + "epoch": 31.6, + "learning_rate": 3.4199056534081916e-05, + "loss": 1.3121, + "step": 2994500 + }, + { + "epoch": 31.61, + "learning_rate": 3.419641820657049e-05, + "loss": 1.3653, + "step": 2995000 + }, + { + "epoch": 31.61, + "learning_rate": 3.419377987905907e-05, + "loss": 1.329, + "step": 2995500 + }, + { + "epoch": 31.62, + "learning_rate": 3.419114155154764e-05, + "loss": 1.288, + "step": 2996000 + }, + { + "epoch": 31.62, + "learning_rate": 3.4188503224036225e-05, + "loss": 1.3552, + "step": 2996500 + }, + { + "epoch": 31.63, + "learning_rate": 3.418586489652479e-05, + "loss": 1.3678, + "step": 2997000 + }, + { + "epoch": 31.63, + "learning_rate": 3.418322656901337e-05, + "loss": 1.3085, + "step": 2997500 + }, + { + "epoch": 31.64, + "learning_rate": 3.418058824150195e-05, + "loss": 1.3188, + "step": 2998000 + }, + { + "epoch": 31.64, + "learning_rate": 3.4177949913990527e-05, + "loss": 1.3274, + "step": 2998500 + }, + { + "epoch": 31.65, + "learning_rate": 3.41753115864791e-05, + "loss": 1.3021, + "step": 2999000 + }, + { + "epoch": 31.65, + "learning_rate": 3.417267325896768e-05, + "loss": 1.3229, + "step": 2999500 + }, + { + "epoch": 31.66, + "learning_rate": 3.417003493145625e-05, + "loss": 1.3432, + "step": 3000000 + }, + { + "epoch": 31.67, + "learning_rate": 3.416739660394483e-05, + "loss": 1.3627, + "step": 3000500 + }, + { + "epoch": 31.67, + "learning_rate": 3.4164758276433404e-05, + "loss": 1.3333, + "step": 3001000 + }, + { + "epoch": 31.68, + "learning_rate": 3.4162119948921986e-05, + "loss": 1.3303, + "step": 3001500 + }, + { + "epoch": 31.68, + "learning_rate": 3.4159481621410555e-05, + "loss": 1.3361, + "step": 3002000 + }, + { + "epoch": 31.69, + "learning_rate": 3.415684329389913e-05, + "loss": 1.3061, + "step": 3002500 + }, + { + "epoch": 31.69, + "learning_rate": 3.415420496638771e-05, + "loss": 1.259, + "step": 3003000 + }, + { + "epoch": 31.7, + "learning_rate": 3.415156663887629e-05, + "loss": 1.3602, + "step": 3003500 + }, + { + "epoch": 31.7, + "learning_rate": 3.4148928311364864e-05, + "loss": 1.3303, + "step": 3004000 + }, + { + "epoch": 31.71, + "learning_rate": 3.414628998385343e-05, + "loss": 1.3541, + "step": 3004500 + }, + { + "epoch": 31.71, + "learning_rate": 3.4143651656342015e-05, + "loss": 1.34, + "step": 3005000 + }, + { + "epoch": 31.72, + "learning_rate": 3.414101332883059e-05, + "loss": 1.2543, + "step": 3005500 + }, + { + "epoch": 31.72, + "learning_rate": 3.4138375001319166e-05, + "loss": 1.3023, + "step": 3006000 + }, + { + "epoch": 31.73, + "learning_rate": 3.413573667380774e-05, + "loss": 1.3957, + "step": 3006500 + }, + { + "epoch": 31.73, + "learning_rate": 3.4133098346296317e-05, + "loss": 1.3615, + "step": 3007000 + }, + { + "epoch": 31.74, + "learning_rate": 3.413046001878489e-05, + "loss": 1.3752, + "step": 3007500 + }, + { + "epoch": 31.74, + "learning_rate": 3.412782169127347e-05, + "loss": 1.3343, + "step": 3008000 + }, + { + "epoch": 31.75, + "learning_rate": 3.412518336376205e-05, + "loss": 1.2833, + "step": 3008500 + }, + { + "epoch": 31.75, + "learning_rate": 3.412254503625062e-05, + "loss": 1.3056, + "step": 3009000 + }, + { + "epoch": 31.76, + "learning_rate": 3.4119906708739194e-05, + "loss": 1.2981, + "step": 3009500 + }, + { + "epoch": 31.77, + "learning_rate": 3.4117268381227776e-05, + "loss": 1.2663, + "step": 3010000 + }, + { + "epoch": 31.77, + "learning_rate": 3.411463005371635e-05, + "loss": 1.3244, + "step": 3010500 + }, + { + "epoch": 31.78, + "learning_rate": 3.411199172620493e-05, + "loss": 1.3485, + "step": 3011000 + }, + { + "epoch": 31.78, + "learning_rate": 3.41093533986935e-05, + "loss": 1.3011, + "step": 3011500 + }, + { + "epoch": 31.79, + "learning_rate": 3.410671507118208e-05, + "loss": 1.3208, + "step": 3012000 + }, + { + "epoch": 31.79, + "learning_rate": 3.4104076743670654e-05, + "loss": 1.3376, + "step": 3012500 + }, + { + "epoch": 31.8, + "learning_rate": 3.410143841615923e-05, + "loss": 1.3114, + "step": 3013000 + }, + { + "epoch": 31.8, + "learning_rate": 3.409880008864781e-05, + "loss": 1.2925, + "step": 3013500 + }, + { + "epoch": 31.81, + "learning_rate": 3.409616176113638e-05, + "loss": 1.3269, + "step": 3014000 + }, + { + "epoch": 31.81, + "learning_rate": 3.4093523433624956e-05, + "loss": 1.311, + "step": 3014500 + }, + { + "epoch": 31.82, + "learning_rate": 3.409088510611354e-05, + "loss": 1.378, + "step": 3015000 + }, + { + "epoch": 31.82, + "learning_rate": 3.4088246778602113e-05, + "loss": 1.3422, + "step": 3015500 + }, + { + "epoch": 31.83, + "learning_rate": 3.408560845109068e-05, + "loss": 1.383, + "step": 3016000 + }, + { + "epoch": 31.83, + "learning_rate": 3.408297012357926e-05, + "loss": 1.3558, + "step": 3016500 + }, + { + "epoch": 31.84, + "learning_rate": 3.408033179606784e-05, + "loss": 1.3211, + "step": 3017000 + }, + { + "epoch": 31.84, + "learning_rate": 3.4077693468556415e-05, + "loss": 1.3365, + "step": 3017500 + }, + { + "epoch": 31.85, + "learning_rate": 3.407505514104499e-05, + "loss": 1.344, + "step": 3018000 + }, + { + "epoch": 31.86, + "learning_rate": 3.4072416813533566e-05, + "loss": 1.3664, + "step": 3018500 + }, + { + "epoch": 31.86, + "learning_rate": 3.406977848602214e-05, + "loss": 1.3253, + "step": 3019000 + }, + { + "epoch": 31.87, + "learning_rate": 3.406714015851072e-05, + "loss": 1.3528, + "step": 3019500 + }, + { + "epoch": 31.87, + "learning_rate": 3.406450183099929e-05, + "loss": 1.3579, + "step": 3020000 + }, + { + "epoch": 31.88, + "learning_rate": 3.4061863503487875e-05, + "loss": 1.2794, + "step": 3020500 + }, + { + "epoch": 31.88, + "learning_rate": 3.4059225175976444e-05, + "loss": 1.3004, + "step": 3021000 + }, + { + "epoch": 31.89, + "learning_rate": 3.405658684846502e-05, + "loss": 1.3088, + "step": 3021500 + }, + { + "epoch": 31.89, + "learning_rate": 3.40539485209536e-05, + "loss": 1.3516, + "step": 3022000 + }, + { + "epoch": 31.9, + "learning_rate": 3.405131019344218e-05, + "loss": 1.3139, + "step": 3022500 + }, + { + "epoch": 31.9, + "learning_rate": 3.404867186593075e-05, + "loss": 1.2817, + "step": 3023000 + }, + { + "epoch": 31.91, + "learning_rate": 3.404603353841933e-05, + "loss": 1.262, + "step": 3023500 + }, + { + "epoch": 31.91, + "learning_rate": 3.4043395210907904e-05, + "loss": 1.3042, + "step": 3024000 + }, + { + "epoch": 31.92, + "learning_rate": 3.404075688339648e-05, + "loss": 1.3038, + "step": 3024500 + }, + { + "epoch": 31.92, + "learning_rate": 3.4038118555885054e-05, + "loss": 1.2466, + "step": 3025000 + }, + { + "epoch": 31.93, + "learning_rate": 3.403548022837363e-05, + "loss": 1.2899, + "step": 3025500 + }, + { + "epoch": 31.93, + "learning_rate": 3.4032841900862205e-05, + "loss": 1.3064, + "step": 3026000 + }, + { + "epoch": 31.94, + "learning_rate": 3.403020357335078e-05, + "loss": 1.3365, + "step": 3026500 + }, + { + "epoch": 31.94, + "learning_rate": 3.402756524583936e-05, + "loss": 1.3411, + "step": 3027000 + }, + { + "epoch": 31.95, + "learning_rate": 3.402492691832794e-05, + "loss": 1.3403, + "step": 3027500 + }, + { + "epoch": 31.96, + "learning_rate": 3.402228859081651e-05, + "loss": 1.3067, + "step": 3028000 + }, + { + "epoch": 31.96, + "learning_rate": 3.401965026330508e-05, + "loss": 1.3516, + "step": 3028500 + }, + { + "epoch": 31.97, + "learning_rate": 3.4017011935793665e-05, + "loss": 1.3256, + "step": 3029000 + }, + { + "epoch": 31.97, + "learning_rate": 3.401437360828224e-05, + "loss": 1.3752, + "step": 3029500 + }, + { + "epoch": 31.98, + "learning_rate": 3.4011735280770816e-05, + "loss": 1.3268, + "step": 3030000 + }, + { + "epoch": 31.98, + "learning_rate": 3.400909695325939e-05, + "loss": 1.3252, + "step": 3030500 + }, + { + "epoch": 31.99, + "learning_rate": 3.400645862574797e-05, + "loss": 1.3327, + "step": 3031000 + }, + { + "epoch": 31.99, + "learning_rate": 3.400382029823654e-05, + "loss": 1.3377, + "step": 3031500 + }, + { + "epoch": 32.0, + "learning_rate": 3.400118197072512e-05, + "loss": 1.274, + "step": 3032000 + }, + { + "epoch": 32.0, + "learning_rate": 3.39985436432137e-05, + "loss": 1.3466, + "step": 3032500 + }, + { + "epoch": 32.01, + "learning_rate": 3.399590531570227e-05, + "loss": 1.351, + "step": 3033000 + }, + { + "epoch": 32.01, + "learning_rate": 3.3993266988190845e-05, + "loss": 1.2676, + "step": 3033500 + }, + { + "epoch": 32.02, + "learning_rate": 3.399062866067943e-05, + "loss": 1.3439, + "step": 3034000 + }, + { + "epoch": 32.02, + "learning_rate": 3.3987990333168e-05, + "loss": 1.293, + "step": 3034500 + }, + { + "epoch": 32.03, + "learning_rate": 3.398535200565657e-05, + "loss": 1.3005, + "step": 3035000 + }, + { + "epoch": 32.03, + "learning_rate": 3.398271367814515e-05, + "loss": 1.2846, + "step": 3035500 + }, + { + "epoch": 32.04, + "learning_rate": 3.398007535063373e-05, + "loss": 1.3229, + "step": 3036000 + }, + { + "epoch": 32.05, + "learning_rate": 3.3977437023122304e-05, + "loss": 1.3119, + "step": 3036500 + }, + { + "epoch": 32.05, + "learning_rate": 3.397479869561088e-05, + "loss": 1.3376, + "step": 3037000 + }, + { + "epoch": 32.06, + "learning_rate": 3.3972160368099455e-05, + "loss": 1.2995, + "step": 3037500 + }, + { + "epoch": 32.06, + "learning_rate": 3.396952204058803e-05, + "loss": 1.308, + "step": 3038000 + }, + { + "epoch": 32.07, + "learning_rate": 3.3966883713076606e-05, + "loss": 1.3327, + "step": 3038500 + }, + { + "epoch": 32.07, + "learning_rate": 3.396424538556519e-05, + "loss": 1.2938, + "step": 3039000 + }, + { + "epoch": 32.08, + "learning_rate": 3.3961607058053764e-05, + "loss": 1.2791, + "step": 3039500 + }, + { + "epoch": 32.08, + "learning_rate": 3.395896873054233e-05, + "loss": 1.3358, + "step": 3040000 + }, + { + "epoch": 32.09, + "learning_rate": 3.395633040303091e-05, + "loss": 1.2996, + "step": 3040500 + }, + { + "epoch": 32.09, + "learning_rate": 3.395369207551949e-05, + "loss": 1.2983, + "step": 3041000 + }, + { + "epoch": 32.1, + "learning_rate": 3.3951053748008066e-05, + "loss": 1.2655, + "step": 3041500 + }, + { + "epoch": 32.1, + "learning_rate": 3.394841542049664e-05, + "loss": 1.3048, + "step": 3042000 + }, + { + "epoch": 32.11, + "learning_rate": 3.394577709298522e-05, + "loss": 1.3368, + "step": 3042500 + }, + { + "epoch": 32.11, + "learning_rate": 3.394313876547379e-05, + "loss": 1.3417, + "step": 3043000 + }, + { + "epoch": 32.12, + "learning_rate": 3.394050043796237e-05, + "loss": 1.3506, + "step": 3043500 + }, + { + "epoch": 32.12, + "learning_rate": 3.393786211045094e-05, + "loss": 1.3002, + "step": 3044000 + }, + { + "epoch": 32.13, + "learning_rate": 3.393522378293952e-05, + "loss": 1.3223, + "step": 3044500 + }, + { + "epoch": 32.13, + "learning_rate": 3.3932585455428094e-05, + "loss": 1.3562, + "step": 3045000 + }, + { + "epoch": 32.14, + "learning_rate": 3.392994712791667e-05, + "loss": 1.3079, + "step": 3045500 + }, + { + "epoch": 32.15, + "learning_rate": 3.392730880040525e-05, + "loss": 1.3599, + "step": 3046000 + }, + { + "epoch": 32.15, + "learning_rate": 3.392467047289383e-05, + "loss": 1.3633, + "step": 3046500 + }, + { + "epoch": 32.16, + "learning_rate": 3.3922032145382396e-05, + "loss": 1.3232, + "step": 3047000 + }, + { + "epoch": 32.16, + "learning_rate": 3.391939381787098e-05, + "loss": 1.2371, + "step": 3047500 + }, + { + "epoch": 32.17, + "learning_rate": 3.3916755490359554e-05, + "loss": 1.343, + "step": 3048000 + }, + { + "epoch": 32.17, + "learning_rate": 3.391411716284813e-05, + "loss": 1.3193, + "step": 3048500 + }, + { + "epoch": 32.18, + "learning_rate": 3.3911478835336705e-05, + "loss": 1.2731, + "step": 3049000 + }, + { + "epoch": 32.18, + "learning_rate": 3.390884050782528e-05, + "loss": 1.304, + "step": 3049500 + }, + { + "epoch": 32.19, + "learning_rate": 3.3906202180313856e-05, + "loss": 1.2939, + "step": 3050000 + }, + { + "epoch": 32.19, + "learning_rate": 3.390356385280243e-05, + "loss": 1.3219, + "step": 3050500 + }, + { + "epoch": 32.2, + "learning_rate": 3.3900925525291014e-05, + "loss": 1.3022, + "step": 3051000 + }, + { + "epoch": 32.2, + "learning_rate": 3.389828719777959e-05, + "loss": 1.3416, + "step": 3051500 + }, + { + "epoch": 32.21, + "learning_rate": 3.389564887026816e-05, + "loss": 1.2794, + "step": 3052000 + }, + { + "epoch": 32.21, + "learning_rate": 3.389301054275674e-05, + "loss": 1.2673, + "step": 3052500 + }, + { + "epoch": 32.22, + "learning_rate": 3.3890372215245316e-05, + "loss": 1.3195, + "step": 3053000 + }, + { + "epoch": 32.22, + "learning_rate": 3.388773388773389e-05, + "loss": 1.3055, + "step": 3053500 + }, + { + "epoch": 32.23, + "learning_rate": 3.388509556022246e-05, + "loss": 1.3559, + "step": 3054000 + }, + { + "epoch": 32.24, + "learning_rate": 3.388245723271104e-05, + "loss": 1.3352, + "step": 3054500 + }, + { + "epoch": 32.24, + "learning_rate": 3.387981890519962e-05, + "loss": 1.2975, + "step": 3055000 + }, + { + "epoch": 32.25, + "learning_rate": 3.387718057768819e-05, + "loss": 1.3181, + "step": 3055500 + }, + { + "epoch": 32.25, + "learning_rate": 3.387454225017677e-05, + "loss": 1.2834, + "step": 3056000 + }, + { + "epoch": 32.26, + "learning_rate": 3.3871903922665344e-05, + "loss": 1.382, + "step": 3056500 + }, + { + "epoch": 32.26, + "learning_rate": 3.386926559515392e-05, + "loss": 1.3915, + "step": 3057000 + }, + { + "epoch": 32.27, + "learning_rate": 3.3866627267642495e-05, + "loss": 1.329, + "step": 3057500 + }, + { + "epoch": 32.27, + "learning_rate": 3.386398894013108e-05, + "loss": 1.306, + "step": 3058000 + }, + { + "epoch": 32.28, + "learning_rate": 3.386135061261965e-05, + "loss": 1.3305, + "step": 3058500 + }, + { + "epoch": 32.28, + "learning_rate": 3.385871228510822e-05, + "loss": 1.3088, + "step": 3059000 + }, + { + "epoch": 32.29, + "learning_rate": 3.3856073957596804e-05, + "loss": 1.2924, + "step": 3059500 + }, + { + "epoch": 32.29, + "learning_rate": 3.385343563008538e-05, + "loss": 1.2951, + "step": 3060000 + }, + { + "epoch": 32.3, + "learning_rate": 3.3850797302573955e-05, + "loss": 1.3224, + "step": 3060500 + }, + { + "epoch": 32.3, + "learning_rate": 3.384815897506253e-05, + "loss": 1.3246, + "step": 3061000 + }, + { + "epoch": 32.31, + "learning_rate": 3.3845520647551106e-05, + "loss": 1.3105, + "step": 3061500 + }, + { + "epoch": 32.31, + "learning_rate": 3.384288232003968e-05, + "loss": 1.311, + "step": 3062000 + }, + { + "epoch": 32.32, + "learning_rate": 3.384024399252826e-05, + "loss": 1.3238, + "step": 3062500 + }, + { + "epoch": 32.32, + "learning_rate": 3.383760566501684e-05, + "loss": 1.3135, + "step": 3063000 + }, + { + "epoch": 32.33, + "learning_rate": 3.383496733750541e-05, + "loss": 1.325, + "step": 3063500 + }, + { + "epoch": 32.34, + "learning_rate": 3.383232900999398e-05, + "loss": 1.3277, + "step": 3064000 + }, + { + "epoch": 32.34, + "learning_rate": 3.3829690682482565e-05, + "loss": 1.3236, + "step": 3064500 + }, + { + "epoch": 32.35, + "learning_rate": 3.382705235497114e-05, + "loss": 1.3021, + "step": 3065000 + }, + { + "epoch": 32.35, + "learning_rate": 3.3824414027459716e-05, + "loss": 1.3202, + "step": 3065500 + }, + { + "epoch": 32.36, + "learning_rate": 3.3821775699948285e-05, + "loss": 1.2826, + "step": 3066000 + }, + { + "epoch": 32.36, + "learning_rate": 3.381913737243687e-05, + "loss": 1.341, + "step": 3066500 + }, + { + "epoch": 32.37, + "learning_rate": 3.381649904492544e-05, + "loss": 1.3404, + "step": 3067000 + }, + { + "epoch": 32.37, + "learning_rate": 3.381386071741402e-05, + "loss": 1.3299, + "step": 3067500 + }, + { + "epoch": 32.38, + "learning_rate": 3.3811222389902594e-05, + "loss": 1.3352, + "step": 3068000 + }, + { + "epoch": 32.38, + "learning_rate": 3.380858406239117e-05, + "loss": 1.3778, + "step": 3068500 + }, + { + "epoch": 32.39, + "learning_rate": 3.3805945734879745e-05, + "loss": 1.3598, + "step": 3069000 + }, + { + "epoch": 32.39, + "learning_rate": 3.380330740736832e-05, + "loss": 1.3169, + "step": 3069500 + }, + { + "epoch": 32.4, + "learning_rate": 3.38006690798569e-05, + "loss": 1.3017, + "step": 3070000 + }, + { + "epoch": 32.4, + "learning_rate": 3.379803075234547e-05, + "loss": 1.3544, + "step": 3070500 + }, + { + "epoch": 32.41, + "learning_rate": 3.379539242483405e-05, + "loss": 1.2965, + "step": 3071000 + }, + { + "epoch": 32.41, + "learning_rate": 3.379275409732263e-05, + "loss": 1.3169, + "step": 3071500 + }, + { + "epoch": 32.42, + "learning_rate": 3.3790115769811204e-05, + "loss": 1.3275, + "step": 3072000 + }, + { + "epoch": 32.43, + "learning_rate": 3.378747744229978e-05, + "loss": 1.2824, + "step": 3072500 + }, + { + "epoch": 32.43, + "learning_rate": 3.3784839114788355e-05, + "loss": 1.3781, + "step": 3073000 + }, + { + "epoch": 32.44, + "learning_rate": 3.378220078727693e-05, + "loss": 1.3157, + "step": 3073500 + }, + { + "epoch": 32.44, + "learning_rate": 3.3779562459765506e-05, + "loss": 1.3387, + "step": 3074000 + }, + { + "epoch": 32.45, + "learning_rate": 3.377692413225408e-05, + "loss": 1.3354, + "step": 3074500 + }, + { + "epoch": 32.45, + "learning_rate": 3.3774285804742664e-05, + "loss": 1.3077, + "step": 3075000 + }, + { + "epoch": 32.46, + "learning_rate": 3.377164747723123e-05, + "loss": 1.3129, + "step": 3075500 + }, + { + "epoch": 32.46, + "learning_rate": 3.376900914971981e-05, + "loss": 1.3193, + "step": 3076000 + }, + { + "epoch": 32.47, + "learning_rate": 3.376637082220839e-05, + "loss": 1.3091, + "step": 3076500 + }, + { + "epoch": 32.47, + "learning_rate": 3.3763732494696966e-05, + "loss": 1.3142, + "step": 3077000 + }, + { + "epoch": 32.48, + "learning_rate": 3.376109416718554e-05, + "loss": 1.2785, + "step": 3077500 + }, + { + "epoch": 32.48, + "learning_rate": 3.375845583967411e-05, + "loss": 1.3893, + "step": 3078000 + }, + { + "epoch": 32.49, + "learning_rate": 3.375581751216269e-05, + "loss": 1.3043, + "step": 3078500 + }, + { + "epoch": 32.49, + "learning_rate": 3.375317918465127e-05, + "loss": 1.2967, + "step": 3079000 + }, + { + "epoch": 32.5, + "learning_rate": 3.3750540857139844e-05, + "loss": 1.32, + "step": 3079500 + }, + { + "epoch": 32.5, + "learning_rate": 3.374790252962842e-05, + "loss": 1.2949, + "step": 3080000 + }, + { + "epoch": 32.51, + "learning_rate": 3.3745264202116995e-05, + "loss": 1.3191, + "step": 3080500 + }, + { + "epoch": 32.51, + "learning_rate": 3.374262587460557e-05, + "loss": 1.3227, + "step": 3081000 + }, + { + "epoch": 32.52, + "learning_rate": 3.3739987547094146e-05, + "loss": 1.3086, + "step": 3081500 + }, + { + "epoch": 32.53, + "learning_rate": 3.373734921958273e-05, + "loss": 1.2742, + "step": 3082000 + }, + { + "epoch": 32.53, + "learning_rate": 3.3734710892071296e-05, + "loss": 1.2619, + "step": 3082500 + }, + { + "epoch": 32.54, + "learning_rate": 3.373207256455987e-05, + "loss": 1.3296, + "step": 3083000 + }, + { + "epoch": 32.54, + "learning_rate": 3.3729434237048454e-05, + "loss": 1.3891, + "step": 3083500 + }, + { + "epoch": 32.55, + "learning_rate": 3.372679590953703e-05, + "loss": 1.3308, + "step": 3084000 + }, + { + "epoch": 32.55, + "learning_rate": 3.3724157582025605e-05, + "loss": 1.3052, + "step": 3084500 + }, + { + "epoch": 32.56, + "learning_rate": 3.372151925451418e-05, + "loss": 1.3007, + "step": 3085000 + }, + { + "epoch": 32.56, + "learning_rate": 3.3718880927002756e-05, + "loss": 1.3954, + "step": 3085500 + }, + { + "epoch": 32.57, + "learning_rate": 3.371624259949133e-05, + "loss": 1.3383, + "step": 3086000 + }, + { + "epoch": 32.57, + "learning_rate": 3.371360427197991e-05, + "loss": 1.3261, + "step": 3086500 + }, + { + "epoch": 32.58, + "learning_rate": 3.371096594446849e-05, + "loss": 1.302, + "step": 3087000 + }, + { + "epoch": 32.58, + "learning_rate": 3.370832761695706e-05, + "loss": 1.3609, + "step": 3087500 + }, + { + "epoch": 32.59, + "learning_rate": 3.3705689289445634e-05, + "loss": 1.3172, + "step": 3088000 + }, + { + "epoch": 32.59, + "learning_rate": 3.3703050961934216e-05, + "loss": 1.3594, + "step": 3088500 + }, + { + "epoch": 32.6, + "learning_rate": 3.370041263442279e-05, + "loss": 1.3406, + "step": 3089000 + }, + { + "epoch": 32.6, + "learning_rate": 3.369777430691136e-05, + "loss": 1.3072, + "step": 3089500 + }, + { + "epoch": 32.61, + "learning_rate": 3.3695135979399936e-05, + "loss": 1.3432, + "step": 3090000 + }, + { + "epoch": 32.62, + "learning_rate": 3.369249765188852e-05, + "loss": 1.337, + "step": 3090500 + }, + { + "epoch": 32.62, + "learning_rate": 3.368985932437709e-05, + "loss": 1.3223, + "step": 3091000 + }, + { + "epoch": 32.63, + "learning_rate": 3.368722099686567e-05, + "loss": 1.3863, + "step": 3091500 + }, + { + "epoch": 32.63, + "learning_rate": 3.3684582669354244e-05, + "loss": 1.2792, + "step": 3092000 + }, + { + "epoch": 32.64, + "learning_rate": 3.368194434184282e-05, + "loss": 1.3809, + "step": 3092500 + }, + { + "epoch": 32.64, + "learning_rate": 3.3679306014331395e-05, + "loss": 1.2583, + "step": 3093000 + }, + { + "epoch": 32.65, + "learning_rate": 3.367666768681997e-05, + "loss": 1.284, + "step": 3093500 + }, + { + "epoch": 32.65, + "learning_rate": 3.367402935930855e-05, + "loss": 1.3344, + "step": 3094000 + }, + { + "epoch": 32.66, + "learning_rate": 3.367139103179712e-05, + "loss": 1.2422, + "step": 3094500 + }, + { + "epoch": 32.66, + "learning_rate": 3.36687527042857e-05, + "loss": 1.2904, + "step": 3095000 + }, + { + "epoch": 32.67, + "learning_rate": 3.366611437677428e-05, + "loss": 1.2966, + "step": 3095500 + }, + { + "epoch": 32.67, + "learning_rate": 3.3663476049262855e-05, + "loss": 1.3461, + "step": 3096000 + }, + { + "epoch": 32.68, + "learning_rate": 3.366083772175143e-05, + "loss": 1.319, + "step": 3096500 + }, + { + "epoch": 32.68, + "learning_rate": 3.3658199394240006e-05, + "loss": 1.3128, + "step": 3097000 + }, + { + "epoch": 32.69, + "learning_rate": 3.365556106672858e-05, + "loss": 1.3462, + "step": 3097500 + }, + { + "epoch": 32.69, + "learning_rate": 3.365292273921716e-05, + "loss": 1.3279, + "step": 3098000 + }, + { + "epoch": 32.7, + "learning_rate": 3.365028441170573e-05, + "loss": 1.3106, + "step": 3098500 + }, + { + "epoch": 32.7, + "learning_rate": 3.364764608419431e-05, + "loss": 1.2906, + "step": 3099000 + }, + { + "epoch": 32.71, + "learning_rate": 3.3645007756682883e-05, + "loss": 1.297, + "step": 3099500 + }, + { + "epoch": 32.72, + "learning_rate": 3.364236942917146e-05, + "loss": 1.3876, + "step": 3100000 + }, + { + "epoch": 32.72, + "learning_rate": 3.363973110166004e-05, + "loss": 1.3538, + "step": 3100500 + }, + { + "epoch": 32.73, + "learning_rate": 3.363709277414862e-05, + "loss": 1.318, + "step": 3101000 + }, + { + "epoch": 32.73, + "learning_rate": 3.3634454446637185e-05, + "loss": 1.3282, + "step": 3101500 + }, + { + "epoch": 32.74, + "learning_rate": 3.363181611912576e-05, + "loss": 1.3573, + "step": 3102000 + }, + { + "epoch": 32.74, + "learning_rate": 3.362917779161434e-05, + "loss": 1.3514, + "step": 3102500 + }, + { + "epoch": 32.75, + "learning_rate": 3.362653946410292e-05, + "loss": 1.2824, + "step": 3103000 + }, + { + "epoch": 32.75, + "learning_rate": 3.3623901136591494e-05, + "loss": 1.2605, + "step": 3103500 + }, + { + "epoch": 32.76, + "learning_rate": 3.362126280908007e-05, + "loss": 1.279, + "step": 3104000 + }, + { + "epoch": 32.76, + "learning_rate": 3.3618624481568645e-05, + "loss": 1.3289, + "step": 3104500 + }, + { + "epoch": 32.77, + "learning_rate": 3.361598615405722e-05, + "loss": 1.3502, + "step": 3105000 + }, + { + "epoch": 32.77, + "learning_rate": 3.3613347826545796e-05, + "loss": 1.3815, + "step": 3105500 + }, + { + "epoch": 32.78, + "learning_rate": 3.361070949903438e-05, + "loss": 1.3624, + "step": 3106000 + }, + { + "epoch": 32.78, + "learning_rate": 3.360807117152295e-05, + "loss": 1.299, + "step": 3106500 + }, + { + "epoch": 32.79, + "learning_rate": 3.360543284401152e-05, + "loss": 1.3467, + "step": 3107000 + }, + { + "epoch": 32.79, + "learning_rate": 3.3602794516500105e-05, + "loss": 1.2956, + "step": 3107500 + }, + { + "epoch": 32.8, + "learning_rate": 3.360015618898868e-05, + "loss": 1.3669, + "step": 3108000 + }, + { + "epoch": 32.8, + "learning_rate": 3.359751786147725e-05, + "loss": 1.3356, + "step": 3108500 + }, + { + "epoch": 32.81, + "learning_rate": 3.359487953396583e-05, + "loss": 1.2488, + "step": 3109000 + }, + { + "epoch": 32.82, + "learning_rate": 3.359224120645441e-05, + "loss": 1.3108, + "step": 3109500 + }, + { + "epoch": 32.82, + "learning_rate": 3.358960287894298e-05, + "loss": 1.3443, + "step": 3110000 + }, + { + "epoch": 32.83, + "learning_rate": 3.358696455143156e-05, + "loss": 1.3172, + "step": 3110500 + }, + { + "epoch": 32.83, + "learning_rate": 3.358432622392013e-05, + "loss": 1.3387, + "step": 3111000 + }, + { + "epoch": 32.84, + "learning_rate": 3.358168789640871e-05, + "loss": 1.2983, + "step": 3111500 + }, + { + "epoch": 32.84, + "learning_rate": 3.3579049568897284e-05, + "loss": 1.34, + "step": 3112000 + }, + { + "epoch": 32.85, + "learning_rate": 3.3576411241385866e-05, + "loss": 1.324, + "step": 3112500 + }, + { + "epoch": 32.85, + "learning_rate": 3.357377291387444e-05, + "loss": 1.3058, + "step": 3113000 + }, + { + "epoch": 32.86, + "learning_rate": 3.357113458636301e-05, + "loss": 1.363, + "step": 3113500 + }, + { + "epoch": 32.86, + "learning_rate": 3.3568496258851586e-05, + "loss": 1.2958, + "step": 3114000 + }, + { + "epoch": 32.87, + "learning_rate": 3.356585793134017e-05, + "loss": 1.2676, + "step": 3114500 + }, + { + "epoch": 32.87, + "learning_rate": 3.3563219603828744e-05, + "loss": 1.2565, + "step": 3115000 + }, + { + "epoch": 32.88, + "learning_rate": 3.356058127631732e-05, + "loss": 1.297, + "step": 3115500 + }, + { + "epoch": 32.88, + "learning_rate": 3.3557942948805895e-05, + "loss": 1.3434, + "step": 3116000 + }, + { + "epoch": 32.89, + "learning_rate": 3.355530462129447e-05, + "loss": 1.3295, + "step": 3116500 + }, + { + "epoch": 32.89, + "learning_rate": 3.3552666293783046e-05, + "loss": 1.3651, + "step": 3117000 + }, + { + "epoch": 32.9, + "learning_rate": 3.355002796627162e-05, + "loss": 1.3809, + "step": 3117500 + }, + { + "epoch": 32.91, + "learning_rate": 3.35473896387602e-05, + "loss": 1.2903, + "step": 3118000 + }, + { + "epoch": 32.91, + "learning_rate": 3.354475131124877e-05, + "loss": 1.3358, + "step": 3118500 + }, + { + "epoch": 32.92, + "learning_rate": 3.354211298373735e-05, + "loss": 1.3558, + "step": 3119000 + }, + { + "epoch": 32.92, + "learning_rate": 3.353947465622593e-05, + "loss": 1.3355, + "step": 3119500 + }, + { + "epoch": 32.93, + "learning_rate": 3.3536836328714505e-05, + "loss": 1.2515, + "step": 3120000 + }, + { + "epoch": 32.93, + "learning_rate": 3.3534198001203074e-05, + "loss": 1.2607, + "step": 3120500 + }, + { + "epoch": 32.94, + "learning_rate": 3.3531559673691656e-05, + "loss": 1.3547, + "step": 3121000 + }, + { + "epoch": 32.94, + "learning_rate": 3.352892134618023e-05, + "loss": 1.323, + "step": 3121500 + }, + { + "epoch": 32.95, + "learning_rate": 3.352628301866881e-05, + "loss": 1.3893, + "step": 3122000 + }, + { + "epoch": 32.95, + "learning_rate": 3.352364469115738e-05, + "loss": 1.3341, + "step": 3122500 + }, + { + "epoch": 32.96, + "learning_rate": 3.352100636364596e-05, + "loss": 1.3107, + "step": 3123000 + }, + { + "epoch": 32.96, + "learning_rate": 3.3518368036134534e-05, + "loss": 1.3091, + "step": 3123500 + }, + { + "epoch": 32.97, + "learning_rate": 3.351572970862311e-05, + "loss": 1.2871, + "step": 3124000 + }, + { + "epoch": 32.97, + "learning_rate": 3.351309138111169e-05, + "loss": 1.3001, + "step": 3124500 + }, + { + "epoch": 32.98, + "learning_rate": 3.351045305360027e-05, + "loss": 1.2986, + "step": 3125000 + }, + { + "epoch": 32.98, + "learning_rate": 3.3507814726088836e-05, + "loss": 1.3866, + "step": 3125500 + }, + { + "epoch": 32.99, + "learning_rate": 3.350517639857742e-05, + "loss": 1.3474, + "step": 3126000 + }, + { + "epoch": 32.99, + "learning_rate": 3.3502538071065994e-05, + "loss": 1.3552, + "step": 3126500 + }, + { + "epoch": 33.0, + "learning_rate": 3.349989974355457e-05, + "loss": 1.3064, + "step": 3127000 + }, + { + "epoch": 33.01, + "learning_rate": 3.349726141604314e-05, + "loss": 1.2893, + "step": 3127500 + }, + { + "epoch": 33.01, + "learning_rate": 3.349462308853172e-05, + "loss": 1.3011, + "step": 3128000 + }, + { + "epoch": 33.02, + "learning_rate": 3.3491984761020296e-05, + "loss": 1.2912, + "step": 3128500 + }, + { + "epoch": 33.02, + "learning_rate": 3.348934643350887e-05, + "loss": 1.2716, + "step": 3129000 + }, + { + "epoch": 33.03, + "learning_rate": 3.3486708105997447e-05, + "loss": 1.2622, + "step": 3129500 + }, + { + "epoch": 33.03, + "learning_rate": 3.348406977848602e-05, + "loss": 1.352, + "step": 3130000 + }, + { + "epoch": 33.04, + "learning_rate": 3.34814314509746e-05, + "loss": 1.2974, + "step": 3130500 + }, + { + "epoch": 33.04, + "learning_rate": 3.347879312346317e-05, + "loss": 1.2722, + "step": 3131000 + }, + { + "epoch": 33.05, + "learning_rate": 3.3476154795951755e-05, + "loss": 1.33, + "step": 3131500 + }, + { + "epoch": 33.05, + "learning_rate": 3.347351646844033e-05, + "loss": 1.3298, + "step": 3132000 + }, + { + "epoch": 33.06, + "learning_rate": 3.34708781409289e-05, + "loss": 1.3995, + "step": 3132500 + }, + { + "epoch": 33.06, + "learning_rate": 3.346823981341748e-05, + "loss": 1.3107, + "step": 3133000 + }, + { + "epoch": 33.07, + "learning_rate": 3.346560148590606e-05, + "loss": 1.2894, + "step": 3133500 + }, + { + "epoch": 33.07, + "learning_rate": 3.346296315839463e-05, + "loss": 1.3711, + "step": 3134000 + }, + { + "epoch": 33.08, + "learning_rate": 3.346032483088321e-05, + "loss": 1.3176, + "step": 3134500 + }, + { + "epoch": 33.08, + "learning_rate": 3.3457686503371784e-05, + "loss": 1.3349, + "step": 3135000 + }, + { + "epoch": 33.09, + "learning_rate": 3.345504817586036e-05, + "loss": 1.2919, + "step": 3135500 + }, + { + "epoch": 33.1, + "learning_rate": 3.3452409848348935e-05, + "loss": 1.3352, + "step": 3136000 + }, + { + "epoch": 33.1, + "learning_rate": 3.344977152083752e-05, + "loss": 1.2971, + "step": 3136500 + }, + { + "epoch": 33.11, + "learning_rate": 3.3447133193326086e-05, + "loss": 1.2955, + "step": 3137000 + }, + { + "epoch": 33.11, + "learning_rate": 3.344449486581466e-05, + "loss": 1.2467, + "step": 3137500 + }, + { + "epoch": 33.12, + "learning_rate": 3.344185653830324e-05, + "loss": 1.2866, + "step": 3138000 + }, + { + "epoch": 33.12, + "learning_rate": 3.343921821079182e-05, + "loss": 1.327, + "step": 3138500 + }, + { + "epoch": 33.13, + "learning_rate": 3.3436579883280394e-05, + "loss": 1.2242, + "step": 3139000 + }, + { + "epoch": 33.13, + "learning_rate": 3.343394155576896e-05, + "loss": 1.3064, + "step": 3139500 + }, + { + "epoch": 33.14, + "learning_rate": 3.3431303228257545e-05, + "loss": 1.3097, + "step": 3140000 + }, + { + "epoch": 33.14, + "learning_rate": 3.342866490074612e-05, + "loss": 1.2946, + "step": 3140500 + }, + { + "epoch": 33.15, + "learning_rate": 3.3426026573234696e-05, + "loss": 1.2597, + "step": 3141000 + }, + { + "epoch": 33.15, + "learning_rate": 3.342338824572327e-05, + "loss": 1.3384, + "step": 3141500 + }, + { + "epoch": 33.16, + "learning_rate": 3.342074991821185e-05, + "loss": 1.3398, + "step": 3142000 + }, + { + "epoch": 33.16, + "learning_rate": 3.341811159070042e-05, + "loss": 1.2819, + "step": 3142500 + }, + { + "epoch": 33.17, + "learning_rate": 3.3415473263189e-05, + "loss": 1.2956, + "step": 3143000 + }, + { + "epoch": 33.17, + "learning_rate": 3.341283493567758e-05, + "loss": 1.3216, + "step": 3143500 + }, + { + "epoch": 33.18, + "learning_rate": 3.3410196608166156e-05, + "loss": 1.3346, + "step": 3144000 + }, + { + "epoch": 33.18, + "learning_rate": 3.3407558280654725e-05, + "loss": 1.3153, + "step": 3144500 + }, + { + "epoch": 33.19, + "learning_rate": 3.340491995314331e-05, + "loss": 1.338, + "step": 3145000 + }, + { + "epoch": 33.2, + "learning_rate": 3.340228162563188e-05, + "loss": 1.3357, + "step": 3145500 + }, + { + "epoch": 33.2, + "learning_rate": 3.339964329812046e-05, + "loss": 1.2876, + "step": 3146000 + }, + { + "epoch": 33.21, + "learning_rate": 3.3397004970609033e-05, + "loss": 1.2794, + "step": 3146500 + }, + { + "epoch": 33.21, + "learning_rate": 3.339436664309761e-05, + "loss": 1.3514, + "step": 3147000 + }, + { + "epoch": 33.22, + "learning_rate": 3.3391728315586184e-05, + "loss": 1.351, + "step": 3147500 + }, + { + "epoch": 33.22, + "learning_rate": 3.338908998807476e-05, + "loss": 1.2531, + "step": 3148000 + }, + { + "epoch": 33.23, + "learning_rate": 3.338645166056334e-05, + "loss": 1.3592, + "step": 3148500 + }, + { + "epoch": 33.23, + "learning_rate": 3.338381333305191e-05, + "loss": 1.3355, + "step": 3149000 + }, + { + "epoch": 33.24, + "learning_rate": 3.3381175005540486e-05, + "loss": 1.3561, + "step": 3149500 + }, + { + "epoch": 33.24, + "learning_rate": 3.337853667802907e-05, + "loss": 1.3022, + "step": 3150000 + }, + { + "epoch": 33.25, + "learning_rate": 3.3375898350517644e-05, + "loss": 1.287, + "step": 3150500 + }, + { + "epoch": 33.25, + "learning_rate": 3.337326002300622e-05, + "loss": 1.3155, + "step": 3151000 + }, + { + "epoch": 33.26, + "learning_rate": 3.337062169549479e-05, + "loss": 1.3188, + "step": 3151500 + }, + { + "epoch": 33.26, + "learning_rate": 3.336798336798337e-05, + "loss": 1.3827, + "step": 3152000 + }, + { + "epoch": 33.27, + "learning_rate": 3.3365345040471946e-05, + "loss": 1.2648, + "step": 3152500 + }, + { + "epoch": 33.27, + "learning_rate": 3.336270671296052e-05, + "loss": 1.3122, + "step": 3153000 + }, + { + "epoch": 33.28, + "learning_rate": 3.33600683854491e-05, + "loss": 1.3149, + "step": 3153500 + }, + { + "epoch": 33.29, + "learning_rate": 3.335743005793767e-05, + "loss": 1.3203, + "step": 3154000 + }, + { + "epoch": 33.29, + "learning_rate": 3.335479173042625e-05, + "loss": 1.3852, + "step": 3154500 + }, + { + "epoch": 33.3, + "learning_rate": 3.3352153402914823e-05, + "loss": 1.3158, + "step": 3155000 + }, + { + "epoch": 33.3, + "learning_rate": 3.3349515075403406e-05, + "loss": 1.3343, + "step": 3155500 + }, + { + "epoch": 33.31, + "learning_rate": 3.3346876747891974e-05, + "loss": 1.2996, + "step": 3156000 + }, + { + "epoch": 33.31, + "learning_rate": 3.334423842038055e-05, + "loss": 1.3486, + "step": 3156500 + }, + { + "epoch": 33.32, + "learning_rate": 3.334160009286913e-05, + "loss": 1.3468, + "step": 3157000 + }, + { + "epoch": 33.32, + "learning_rate": 3.333896176535771e-05, + "loss": 1.3262, + "step": 3157500 + }, + { + "epoch": 33.33, + "learning_rate": 3.333632343784628e-05, + "loss": 1.3404, + "step": 3158000 + }, + { + "epoch": 33.33, + "learning_rate": 3.333368511033486e-05, + "loss": 1.2765, + "step": 3158500 + }, + { + "epoch": 33.34, + "learning_rate": 3.3331046782823434e-05, + "loss": 1.3066, + "step": 3159000 + }, + { + "epoch": 33.34, + "learning_rate": 3.332840845531201e-05, + "loss": 1.3136, + "step": 3159500 + }, + { + "epoch": 33.35, + "learning_rate": 3.3325770127800585e-05, + "loss": 1.2696, + "step": 3160000 + }, + { + "epoch": 33.35, + "learning_rate": 3.332313180028917e-05, + "loss": 1.291, + "step": 3160500 + }, + { + "epoch": 33.36, + "learning_rate": 3.3320493472777736e-05, + "loss": 1.3075, + "step": 3161000 + }, + { + "epoch": 33.36, + "learning_rate": 3.331785514526631e-05, + "loss": 1.2606, + "step": 3161500 + }, + { + "epoch": 33.37, + "learning_rate": 3.3315216817754894e-05, + "loss": 1.3536, + "step": 3162000 + }, + { + "epoch": 33.37, + "learning_rate": 3.331257849024347e-05, + "loss": 1.3163, + "step": 3162500 + }, + { + "epoch": 33.38, + "learning_rate": 3.3309940162732045e-05, + "loss": 1.2909, + "step": 3163000 + }, + { + "epoch": 33.39, + "learning_rate": 3.3307301835220614e-05, + "loss": 1.3512, + "step": 3163500 + }, + { + "epoch": 33.39, + "learning_rate": 3.3304663507709196e-05, + "loss": 1.3493, + "step": 3164000 + }, + { + "epoch": 33.4, + "learning_rate": 3.330202518019777e-05, + "loss": 1.3511, + "step": 3164500 + }, + { + "epoch": 33.4, + "learning_rate": 3.329938685268635e-05, + "loss": 1.2561, + "step": 3165000 + }, + { + "epoch": 33.41, + "learning_rate": 3.329674852517492e-05, + "loss": 1.2963, + "step": 3165500 + }, + { + "epoch": 33.41, + "learning_rate": 3.32941101976635e-05, + "loss": 1.3398, + "step": 3166000 + }, + { + "epoch": 33.42, + "learning_rate": 3.329147187015207e-05, + "loss": 1.2586, + "step": 3166500 + }, + { + "epoch": 33.42, + "learning_rate": 3.328883354264065e-05, + "loss": 1.3096, + "step": 3167000 + }, + { + "epoch": 33.43, + "learning_rate": 3.328619521512923e-05, + "loss": 1.3269, + "step": 3167500 + }, + { + "epoch": 33.43, + "learning_rate": 3.32835568876178e-05, + "loss": 1.3565, + "step": 3168000 + }, + { + "epoch": 33.44, + "learning_rate": 3.3280918560106375e-05, + "loss": 1.3701, + "step": 3168500 + }, + { + "epoch": 33.44, + "learning_rate": 3.327828023259496e-05, + "loss": 1.3081, + "step": 3169000 + }, + { + "epoch": 33.45, + "learning_rate": 3.327564190508353e-05, + "loss": 1.3246, + "step": 3169500 + }, + { + "epoch": 33.45, + "learning_rate": 3.327300357757211e-05, + "loss": 1.3195, + "step": 3170000 + }, + { + "epoch": 33.46, + "learning_rate": 3.3270365250060684e-05, + "loss": 1.2866, + "step": 3170500 + }, + { + "epoch": 33.46, + "learning_rate": 3.326772692254926e-05, + "loss": 1.2834, + "step": 3171000 + }, + { + "epoch": 33.47, + "learning_rate": 3.3265088595037835e-05, + "loss": 1.2788, + "step": 3171500 + }, + { + "epoch": 33.48, + "learning_rate": 3.326245026752641e-05, + "loss": 1.3093, + "step": 3172000 + }, + { + "epoch": 33.48, + "learning_rate": 3.325981194001499e-05, + "loss": 1.3269, + "step": 3172500 + }, + { + "epoch": 33.49, + "learning_rate": 3.325717361250356e-05, + "loss": 1.2981, + "step": 3173000 + }, + { + "epoch": 33.49, + "learning_rate": 3.325453528499214e-05, + "loss": 1.3106, + "step": 3173500 + }, + { + "epoch": 33.5, + "learning_rate": 3.325189695748072e-05, + "loss": 1.3352, + "step": 3174000 + }, + { + "epoch": 33.5, + "learning_rate": 3.3249258629969295e-05, + "loss": 1.3502, + "step": 3174500 + }, + { + "epoch": 33.51, + "learning_rate": 3.324662030245786e-05, + "loss": 1.3368, + "step": 3175000 + }, + { + "epoch": 33.51, + "learning_rate": 3.324398197494644e-05, + "loss": 1.2912, + "step": 3175500 + }, + { + "epoch": 33.52, + "learning_rate": 3.324134364743502e-05, + "loss": 1.3728, + "step": 3176000 + }, + { + "epoch": 33.52, + "learning_rate": 3.3238705319923597e-05, + "loss": 1.3164, + "step": 3176500 + }, + { + "epoch": 33.53, + "learning_rate": 3.323606699241217e-05, + "loss": 1.3249, + "step": 3177000 + }, + { + "epoch": 33.53, + "learning_rate": 3.323342866490075e-05, + "loss": 1.3019, + "step": 3177500 + }, + { + "epoch": 33.54, + "learning_rate": 3.323079033738932e-05, + "loss": 1.3066, + "step": 3178000 + }, + { + "epoch": 33.54, + "learning_rate": 3.32281520098779e-05, + "loss": 1.3147, + "step": 3178500 + }, + { + "epoch": 33.55, + "learning_rate": 3.3225513682366474e-05, + "loss": 1.2926, + "step": 3179000 + }, + { + "epoch": 33.55, + "learning_rate": 3.3222875354855056e-05, + "loss": 1.307, + "step": 3179500 + }, + { + "epoch": 33.56, + "learning_rate": 3.3220237027343625e-05, + "loss": 1.3108, + "step": 3180000 + }, + { + "epoch": 33.56, + "learning_rate": 3.32175986998322e-05, + "loss": 1.3267, + "step": 3180500 + }, + { + "epoch": 33.57, + "learning_rate": 3.321496037232078e-05, + "loss": 1.3503, + "step": 3181000 + }, + { + "epoch": 33.58, + "learning_rate": 3.321232204480936e-05, + "loss": 1.2908, + "step": 3181500 + }, + { + "epoch": 33.58, + "learning_rate": 3.3209683717297934e-05, + "loss": 1.3671, + "step": 3182000 + }, + { + "epoch": 33.59, + "learning_rate": 3.320704538978651e-05, + "loss": 1.3109, + "step": 3182500 + }, + { + "epoch": 33.59, + "learning_rate": 3.3204407062275085e-05, + "loss": 1.3116, + "step": 3183000 + }, + { + "epoch": 33.6, + "learning_rate": 3.320176873476366e-05, + "loss": 1.316, + "step": 3183500 + }, + { + "epoch": 33.6, + "learning_rate": 3.3199130407252236e-05, + "loss": 1.3167, + "step": 3184000 + }, + { + "epoch": 33.61, + "learning_rate": 3.319649207974081e-05, + "loss": 1.301, + "step": 3184500 + }, + { + "epoch": 33.61, + "learning_rate": 3.3193853752229387e-05, + "loss": 1.3861, + "step": 3185000 + }, + { + "epoch": 33.62, + "learning_rate": 3.319121542471796e-05, + "loss": 1.3462, + "step": 3185500 + }, + { + "epoch": 33.62, + "learning_rate": 3.3188577097206544e-05, + "loss": 1.3412, + "step": 3186000 + }, + { + "epoch": 33.63, + "learning_rate": 3.318593876969512e-05, + "loss": 1.3537, + "step": 3186500 + }, + { + "epoch": 33.63, + "learning_rate": 3.318330044218369e-05, + "loss": 1.3113, + "step": 3187000 + }, + { + "epoch": 33.64, + "learning_rate": 3.3180662114672264e-05, + "loss": 1.291, + "step": 3187500 + }, + { + "epoch": 33.64, + "learning_rate": 3.3178023787160846e-05, + "loss": 1.3276, + "step": 3188000 + }, + { + "epoch": 33.65, + "learning_rate": 3.317538545964942e-05, + "loss": 1.3084, + "step": 3188500 + }, + { + "epoch": 33.65, + "learning_rate": 3.3172747132138e-05, + "loss": 1.3225, + "step": 3189000 + }, + { + "epoch": 33.66, + "learning_rate": 3.317010880462657e-05, + "loss": 1.3293, + "step": 3189500 + }, + { + "epoch": 33.67, + "learning_rate": 3.316747047711515e-05, + "loss": 1.3066, + "step": 3190000 + }, + { + "epoch": 33.67, + "learning_rate": 3.3164832149603724e-05, + "loss": 1.2862, + "step": 3190500 + }, + { + "epoch": 33.68, + "learning_rate": 3.31621938220923e-05, + "loss": 1.327, + "step": 3191000 + }, + { + "epoch": 33.68, + "learning_rate": 3.315955549458088e-05, + "loss": 1.3003, + "step": 3191500 + }, + { + "epoch": 33.69, + "learning_rate": 3.315691716706945e-05, + "loss": 1.2913, + "step": 3192000 + }, + { + "epoch": 33.69, + "learning_rate": 3.3154278839558026e-05, + "loss": 1.3118, + "step": 3192500 + }, + { + "epoch": 33.7, + "learning_rate": 3.315164051204661e-05, + "loss": 1.3427, + "step": 3193000 + }, + { + "epoch": 33.7, + "learning_rate": 3.3149002184535183e-05, + "loss": 1.3039, + "step": 3193500 + }, + { + "epoch": 33.71, + "learning_rate": 3.314636385702375e-05, + "loss": 1.3641, + "step": 3194000 + }, + { + "epoch": 33.71, + "learning_rate": 3.3143725529512334e-05, + "loss": 1.317, + "step": 3194500 + }, + { + "epoch": 33.72, + "learning_rate": 3.314108720200091e-05, + "loss": 1.3056, + "step": 3195000 + }, + { + "epoch": 33.72, + "learning_rate": 3.3138448874489485e-05, + "loss": 1.2794, + "step": 3195500 + }, + { + "epoch": 33.73, + "learning_rate": 3.313581054697806e-05, + "loss": 1.3115, + "step": 3196000 + }, + { + "epoch": 33.73, + "learning_rate": 3.3133172219466636e-05, + "loss": 1.3187, + "step": 3196500 + }, + { + "epoch": 33.74, + "learning_rate": 3.313053389195521e-05, + "loss": 1.307, + "step": 3197000 + }, + { + "epoch": 33.74, + "learning_rate": 3.312789556444379e-05, + "loss": 1.3294, + "step": 3197500 + }, + { + "epoch": 33.75, + "learning_rate": 3.312525723693237e-05, + "loss": 1.3329, + "step": 3198000 + }, + { + "epoch": 33.75, + "learning_rate": 3.3122618909420945e-05, + "loss": 1.2826, + "step": 3198500 + }, + { + "epoch": 33.76, + "learning_rate": 3.3119980581909514e-05, + "loss": 1.3323, + "step": 3199000 + }, + { + "epoch": 33.77, + "learning_rate": 3.3117342254398096e-05, + "loss": 1.3218, + "step": 3199500 + }, + { + "epoch": 33.77, + "learning_rate": 3.311470392688667e-05, + "loss": 1.3738, + "step": 3200000 + }, + { + "epoch": 33.78, + "learning_rate": 3.311206559937525e-05, + "loss": 1.2933, + "step": 3200500 + }, + { + "epoch": 33.78, + "learning_rate": 3.310942727186382e-05, + "loss": 1.2597, + "step": 3201000 + }, + { + "epoch": 33.79, + "learning_rate": 3.31067889443524e-05, + "loss": 1.3242, + "step": 3201500 + }, + { + "epoch": 33.79, + "learning_rate": 3.3104150616840973e-05, + "loss": 1.3009, + "step": 3202000 + }, + { + "epoch": 33.8, + "learning_rate": 3.310151228932955e-05, + "loss": 1.3434, + "step": 3202500 + }, + { + "epoch": 33.8, + "learning_rate": 3.3098873961818124e-05, + "loss": 1.3552, + "step": 3203000 + }, + { + "epoch": 33.81, + "learning_rate": 3.30962356343067e-05, + "loss": 1.3384, + "step": 3203500 + }, + { + "epoch": 33.81, + "learning_rate": 3.3093597306795275e-05, + "loss": 1.2885, + "step": 3204000 + }, + { + "epoch": 33.82, + "learning_rate": 3.309095897928385e-05, + "loss": 1.3533, + "step": 3204500 + }, + { + "epoch": 33.82, + "learning_rate": 3.308832065177243e-05, + "loss": 1.2907, + "step": 3205000 + }, + { + "epoch": 33.83, + "learning_rate": 3.308568232426101e-05, + "loss": 1.3217, + "step": 3205500 + }, + { + "epoch": 33.83, + "learning_rate": 3.308304399674958e-05, + "loss": 1.3183, + "step": 3206000 + }, + { + "epoch": 33.84, + "learning_rate": 3.308040566923816e-05, + "loss": 1.321, + "step": 3206500 + }, + { + "epoch": 33.84, + "learning_rate": 3.3077767341726735e-05, + "loss": 1.3482, + "step": 3207000 + }, + { + "epoch": 33.85, + "learning_rate": 3.307512901421531e-05, + "loss": 1.3263, + "step": 3207500 + }, + { + "epoch": 33.86, + "learning_rate": 3.3072490686703886e-05, + "loss": 1.3386, + "step": 3208000 + }, + { + "epoch": 33.86, + "learning_rate": 3.306985235919246e-05, + "loss": 1.2713, + "step": 3208500 + }, + { + "epoch": 33.87, + "learning_rate": 3.306721403168104e-05, + "loss": 1.278, + "step": 3209000 + }, + { + "epoch": 33.87, + "learning_rate": 3.306457570416961e-05, + "loss": 1.3538, + "step": 3209500 + }, + { + "epoch": 33.88, + "learning_rate": 3.3061937376658195e-05, + "loss": 1.2459, + "step": 3210000 + }, + { + "epoch": 33.88, + "learning_rate": 3.305929904914677e-05, + "loss": 1.3107, + "step": 3210500 + }, + { + "epoch": 33.89, + "learning_rate": 3.305666072163534e-05, + "loss": 1.2672, + "step": 3211000 + }, + { + "epoch": 33.89, + "learning_rate": 3.305402239412392e-05, + "loss": 1.3116, + "step": 3211500 + }, + { + "epoch": 33.9, + "learning_rate": 3.30513840666125e-05, + "loss": 1.3515, + "step": 3212000 + }, + { + "epoch": 33.9, + "learning_rate": 3.304874573910107e-05, + "loss": 1.3336, + "step": 3212500 + }, + { + "epoch": 33.91, + "learning_rate": 3.304610741158964e-05, + "loss": 1.367, + "step": 3213000 + }, + { + "epoch": 33.91, + "learning_rate": 3.304346908407822e-05, + "loss": 1.3202, + "step": 3213500 + }, + { + "epoch": 33.92, + "learning_rate": 3.30408307565668e-05, + "loss": 1.3372, + "step": 3214000 + }, + { + "epoch": 33.92, + "learning_rate": 3.3038192429055374e-05, + "loss": 1.2827, + "step": 3214500 + }, + { + "epoch": 33.93, + "learning_rate": 3.303555410154395e-05, + "loss": 1.3017, + "step": 3215000 + }, + { + "epoch": 33.93, + "learning_rate": 3.3032915774032525e-05, + "loss": 1.3545, + "step": 3215500 + }, + { + "epoch": 33.94, + "learning_rate": 3.30302774465211e-05, + "loss": 1.3382, + "step": 3216000 + }, + { + "epoch": 33.94, + "learning_rate": 3.3027639119009676e-05, + "loss": 1.324, + "step": 3216500 + }, + { + "epoch": 33.95, + "learning_rate": 3.302500079149826e-05, + "loss": 1.344, + "step": 3217000 + }, + { + "epoch": 33.96, + "learning_rate": 3.3022362463986834e-05, + "loss": 1.3175, + "step": 3217500 + }, + { + "epoch": 33.96, + "learning_rate": 3.30197241364754e-05, + "loss": 1.3327, + "step": 3218000 + }, + { + "epoch": 33.97, + "learning_rate": 3.3017085808963985e-05, + "loss": 1.3199, + "step": 3218500 + }, + { + "epoch": 33.97, + "learning_rate": 3.301444748145256e-05, + "loss": 1.3494, + "step": 3219000 + }, + { + "epoch": 33.98, + "learning_rate": 3.3011809153941136e-05, + "loss": 1.3002, + "step": 3219500 + }, + { + "epoch": 33.98, + "learning_rate": 3.300917082642971e-05, + "loss": 1.3497, + "step": 3220000 + }, + { + "epoch": 33.99, + "learning_rate": 3.300653249891829e-05, + "loss": 1.3129, + "step": 3220500 + }, + { + "epoch": 33.99, + "learning_rate": 3.300389417140686e-05, + "loss": 1.3057, + "step": 3221000 + }, + { + "epoch": 34.0, + "learning_rate": 3.300125584389544e-05, + "loss": 1.2889, + "step": 3221500 + }, + { + "epoch": 34.0, + "learning_rate": 3.299861751638402e-05, + "loss": 1.2558, + "step": 3222000 + }, + { + "epoch": 34.01, + "learning_rate": 3.299597918887259e-05, + "loss": 1.2933, + "step": 3222500 + }, + { + "epoch": 34.01, + "learning_rate": 3.2993340861361164e-05, + "loss": 1.3215, + "step": 3223000 + }, + { + "epoch": 34.02, + "learning_rate": 3.2990702533849747e-05, + "loss": 1.2686, + "step": 3223500 + }, + { + "epoch": 34.02, + "learning_rate": 3.298806420633832e-05, + "loss": 1.2178, + "step": 3224000 + }, + { + "epoch": 34.03, + "learning_rate": 3.29854258788269e-05, + "loss": 1.3814, + "step": 3224500 + }, + { + "epoch": 34.03, + "learning_rate": 3.2982787551315466e-05, + "loss": 1.2955, + "step": 3225000 + }, + { + "epoch": 34.04, + "learning_rate": 3.298014922380405e-05, + "loss": 1.2715, + "step": 3225500 + }, + { + "epoch": 34.04, + "learning_rate": 3.2977510896292624e-05, + "loss": 1.299, + "step": 3226000 + }, + { + "epoch": 34.05, + "learning_rate": 3.29748725687812e-05, + "loss": 1.3215, + "step": 3226500 + }, + { + "epoch": 34.06, + "learning_rate": 3.2972234241269775e-05, + "loss": 1.2777, + "step": 3227000 + }, + { + "epoch": 34.06, + "learning_rate": 3.296959591375835e-05, + "loss": 1.3414, + "step": 3227500 + }, + { + "epoch": 34.07, + "learning_rate": 3.2966957586246926e-05, + "loss": 1.3415, + "step": 3228000 + }, + { + "epoch": 34.07, + "learning_rate": 3.29643192587355e-05, + "loss": 1.3075, + "step": 3228500 + }, + { + "epoch": 34.08, + "learning_rate": 3.2961680931224084e-05, + "loss": 1.4117, + "step": 3229000 + }, + { + "epoch": 34.08, + "learning_rate": 3.295904260371266e-05, + "loss": 1.3101, + "step": 3229500 + }, + { + "epoch": 34.09, + "learning_rate": 3.295640427620123e-05, + "loss": 1.3818, + "step": 3230000 + }, + { + "epoch": 34.09, + "learning_rate": 3.295376594868981e-05, + "loss": 1.3704, + "step": 3230500 + }, + { + "epoch": 34.1, + "learning_rate": 3.2951127621178386e-05, + "loss": 1.2713, + "step": 3231000 + }, + { + "epoch": 34.1, + "learning_rate": 3.294848929366696e-05, + "loss": 1.2711, + "step": 3231500 + }, + { + "epoch": 34.11, + "learning_rate": 3.2945850966155537e-05, + "loss": 1.27, + "step": 3232000 + }, + { + "epoch": 34.11, + "learning_rate": 3.294321263864411e-05, + "loss": 1.3425, + "step": 3232500 + }, + { + "epoch": 34.12, + "learning_rate": 3.294057431113269e-05, + "loss": 1.3171, + "step": 3233000 + }, + { + "epoch": 34.12, + "learning_rate": 3.293793598362126e-05, + "loss": 1.3296, + "step": 3233500 + }, + { + "epoch": 34.13, + "learning_rate": 3.2935297656109845e-05, + "loss": 1.3085, + "step": 3234000 + }, + { + "epoch": 34.13, + "learning_rate": 3.2932659328598414e-05, + "loss": 1.295, + "step": 3234500 + }, + { + "epoch": 34.14, + "learning_rate": 3.293002100108699e-05, + "loss": 1.3112, + "step": 3235000 + }, + { + "epoch": 34.15, + "learning_rate": 3.292738267357557e-05, + "loss": 1.2964, + "step": 3235500 + }, + { + "epoch": 34.15, + "learning_rate": 3.292474434606415e-05, + "loss": 1.3135, + "step": 3236000 + }, + { + "epoch": 34.16, + "learning_rate": 3.292210601855272e-05, + "loss": 1.3481, + "step": 3236500 + }, + { + "epoch": 34.16, + "learning_rate": 3.291946769104129e-05, + "loss": 1.3046, + "step": 3237000 + }, + { + "epoch": 34.17, + "learning_rate": 3.2916829363529874e-05, + "loss": 1.2585, + "step": 3237500 + }, + { + "epoch": 34.17, + "learning_rate": 3.291419103601845e-05, + "loss": 1.2676, + "step": 3238000 + }, + { + "epoch": 34.18, + "learning_rate": 3.2911552708507025e-05, + "loss": 1.2602, + "step": 3238500 + }, + { + "epoch": 34.18, + "learning_rate": 3.29089143809956e-05, + "loss": 1.2973, + "step": 3239000 + }, + { + "epoch": 34.19, + "learning_rate": 3.2906276053484176e-05, + "loss": 1.2955, + "step": 3239500 + }, + { + "epoch": 34.19, + "learning_rate": 3.290363772597275e-05, + "loss": 1.2571, + "step": 3240000 + }, + { + "epoch": 34.2, + "learning_rate": 3.290099939846133e-05, + "loss": 1.3381, + "step": 3240500 + }, + { + "epoch": 34.2, + "learning_rate": 3.289836107094991e-05, + "loss": 1.2416, + "step": 3241000 + }, + { + "epoch": 34.21, + "learning_rate": 3.289572274343848e-05, + "loss": 1.3953, + "step": 3241500 + }, + { + "epoch": 34.21, + "learning_rate": 3.289308441592705e-05, + "loss": 1.3548, + "step": 3242000 + }, + { + "epoch": 34.22, + "learning_rate": 3.2890446088415635e-05, + "loss": 1.3128, + "step": 3242500 + }, + { + "epoch": 34.22, + "learning_rate": 3.288780776090421e-05, + "loss": 1.29, + "step": 3243000 + }, + { + "epoch": 34.23, + "learning_rate": 3.2885169433392786e-05, + "loss": 1.2992, + "step": 3243500 + }, + { + "epoch": 34.23, + "learning_rate": 3.288253110588136e-05, + "loss": 1.3104, + "step": 3244000 + }, + { + "epoch": 34.24, + "learning_rate": 3.287989277836994e-05, + "loss": 1.3415, + "step": 3244500 + }, + { + "epoch": 34.25, + "learning_rate": 3.287725445085851e-05, + "loss": 1.3043, + "step": 3245000 + }, + { + "epoch": 34.25, + "learning_rate": 3.287461612334709e-05, + "loss": 1.3252, + "step": 3245500 + }, + { + "epoch": 34.26, + "learning_rate": 3.287197779583567e-05, + "loss": 1.2893, + "step": 3246000 + }, + { + "epoch": 34.26, + "learning_rate": 3.286933946832424e-05, + "loss": 1.3089, + "step": 3246500 + }, + { + "epoch": 34.27, + "learning_rate": 3.2866701140812815e-05, + "loss": 1.3389, + "step": 3247000 + }, + { + "epoch": 34.27, + "learning_rate": 3.28640628133014e-05, + "loss": 1.3125, + "step": 3247500 + }, + { + "epoch": 34.28, + "learning_rate": 3.286142448578997e-05, + "loss": 1.336, + "step": 3248000 + }, + { + "epoch": 34.28, + "learning_rate": 3.285878615827855e-05, + "loss": 1.3375, + "step": 3248500 + }, + { + "epoch": 34.29, + "learning_rate": 3.285614783076712e-05, + "loss": 1.2896, + "step": 3249000 + }, + { + "epoch": 34.29, + "learning_rate": 3.28535095032557e-05, + "loss": 1.2884, + "step": 3249500 + }, + { + "epoch": 34.3, + "learning_rate": 3.2850871175744274e-05, + "loss": 1.3394, + "step": 3250000 + }, + { + "epoch": 34.3, + "learning_rate": 3.284823284823285e-05, + "loss": 1.2778, + "step": 3250500 + }, + { + "epoch": 34.31, + "learning_rate": 3.2845594520721425e-05, + "loss": 1.3489, + "step": 3251000 + }, + { + "epoch": 34.31, + "learning_rate": 3.284295619321e-05, + "loss": 1.2453, + "step": 3251500 + }, + { + "epoch": 34.32, + "learning_rate": 3.2840317865698576e-05, + "loss": 1.3505, + "step": 3252000 + }, + { + "epoch": 34.32, + "learning_rate": 3.283767953818715e-05, + "loss": 1.2954, + "step": 3252500 + }, + { + "epoch": 34.33, + "learning_rate": 3.2835041210675734e-05, + "loss": 1.3283, + "step": 3253000 + }, + { + "epoch": 34.34, + "learning_rate": 3.28324028831643e-05, + "loss": 1.3218, + "step": 3253500 + }, + { + "epoch": 34.34, + "learning_rate": 3.282976455565288e-05, + "loss": 1.2296, + "step": 3254000 + }, + { + "epoch": 34.35, + "learning_rate": 3.282712622814146e-05, + "loss": 1.3185, + "step": 3254500 + }, + { + "epoch": 34.35, + "learning_rate": 3.2824487900630036e-05, + "loss": 1.3322, + "step": 3255000 + }, + { + "epoch": 34.36, + "learning_rate": 3.282184957311861e-05, + "loss": 1.2891, + "step": 3255500 + }, + { + "epoch": 34.36, + "learning_rate": 3.281921124560719e-05, + "loss": 1.3007, + "step": 3256000 + }, + { + "epoch": 34.37, + "learning_rate": 3.281657291809576e-05, + "loss": 1.3372, + "step": 3256500 + }, + { + "epoch": 34.37, + "learning_rate": 3.281393459058434e-05, + "loss": 1.3206, + "step": 3257000 + }, + { + "epoch": 34.38, + "learning_rate": 3.2811296263072914e-05, + "loss": 1.3174, + "step": 3257500 + }, + { + "epoch": 34.38, + "learning_rate": 3.2808657935561496e-05, + "loss": 1.271, + "step": 3258000 + }, + { + "epoch": 34.39, + "learning_rate": 3.2806019608050065e-05, + "loss": 1.3047, + "step": 3258500 + }, + { + "epoch": 34.39, + "learning_rate": 3.280338128053864e-05, + "loss": 1.2941, + "step": 3259000 + }, + { + "epoch": 34.4, + "learning_rate": 3.280074295302722e-05, + "loss": 1.3239, + "step": 3259500 + }, + { + "epoch": 34.4, + "learning_rate": 3.27981046255158e-05, + "loss": 1.3285, + "step": 3260000 + }, + { + "epoch": 34.41, + "learning_rate": 3.2795466298004366e-05, + "loss": 1.3125, + "step": 3260500 + }, + { + "epoch": 34.41, + "learning_rate": 3.279282797049294e-05, + "loss": 1.312, + "step": 3261000 + }, + { + "epoch": 34.42, + "learning_rate": 3.2790189642981524e-05, + "loss": 1.2865, + "step": 3261500 + }, + { + "epoch": 34.42, + "learning_rate": 3.27875513154701e-05, + "loss": 1.2769, + "step": 3262000 + }, + { + "epoch": 34.43, + "learning_rate": 3.2784912987958675e-05, + "loss": 1.317, + "step": 3262500 + }, + { + "epoch": 34.44, + "learning_rate": 3.278227466044725e-05, + "loss": 1.3048, + "step": 3263000 + }, + { + "epoch": 34.44, + "learning_rate": 3.2779636332935826e-05, + "loss": 1.313, + "step": 3263500 + }, + { + "epoch": 34.45, + "learning_rate": 3.27769980054244e-05, + "loss": 1.3216, + "step": 3264000 + }, + { + "epoch": 34.45, + "learning_rate": 3.277435967791298e-05, + "loss": 1.3537, + "step": 3264500 + }, + { + "epoch": 34.46, + "learning_rate": 3.277172135040156e-05, + "loss": 1.288, + "step": 3265000 + }, + { + "epoch": 34.46, + "learning_rate": 3.276908302289013e-05, + "loss": 1.2625, + "step": 3265500 + }, + { + "epoch": 34.47, + "learning_rate": 3.2766444695378704e-05, + "loss": 1.3367, + "step": 3266000 + }, + { + "epoch": 34.47, + "learning_rate": 3.2763806367867286e-05, + "loss": 1.3401, + "step": 3266500 + }, + { + "epoch": 34.48, + "learning_rate": 3.276116804035586e-05, + "loss": 1.2935, + "step": 3267000 + }, + { + "epoch": 34.48, + "learning_rate": 3.275852971284444e-05, + "loss": 1.3239, + "step": 3267500 + }, + { + "epoch": 34.49, + "learning_rate": 3.275589138533301e-05, + "loss": 1.3044, + "step": 3268000 + }, + { + "epoch": 34.49, + "learning_rate": 3.275325305782159e-05, + "loss": 1.3248, + "step": 3268500 + }, + { + "epoch": 34.5, + "learning_rate": 3.275061473031016e-05, + "loss": 1.3818, + "step": 3269000 + }, + { + "epoch": 34.5, + "learning_rate": 3.274797640279874e-05, + "loss": 1.3073, + "step": 3269500 + }, + { + "epoch": 34.51, + "learning_rate": 3.2745338075287314e-05, + "loss": 1.3256, + "step": 3270000 + }, + { + "epoch": 34.51, + "learning_rate": 3.274269974777589e-05, + "loss": 1.3106, + "step": 3270500 + }, + { + "epoch": 34.52, + "learning_rate": 3.2740061420264465e-05, + "loss": 1.333, + "step": 3271000 + }, + { + "epoch": 34.53, + "learning_rate": 3.273742309275305e-05, + "loss": 1.3467, + "step": 3271500 + }, + { + "epoch": 34.53, + "learning_rate": 3.273478476524162e-05, + "loss": 1.3422, + "step": 3272000 + }, + { + "epoch": 34.54, + "learning_rate": 3.273214643773019e-05, + "loss": 1.3051, + "step": 3272500 + }, + { + "epoch": 34.54, + "learning_rate": 3.2729508110218774e-05, + "loss": 1.3029, + "step": 3273000 + }, + { + "epoch": 34.55, + "learning_rate": 3.272686978270735e-05, + "loss": 1.328, + "step": 3273500 + }, + { + "epoch": 34.55, + "learning_rate": 3.2724231455195925e-05, + "loss": 1.3118, + "step": 3274000 + }, + { + "epoch": 34.56, + "learning_rate": 3.27215931276845e-05, + "loss": 1.3052, + "step": 3274500 + }, + { + "epoch": 34.56, + "learning_rate": 3.2718954800173076e-05, + "loss": 1.2605, + "step": 3275000 + }, + { + "epoch": 34.57, + "learning_rate": 3.271631647266165e-05, + "loss": 1.3502, + "step": 3275500 + }, + { + "epoch": 34.57, + "learning_rate": 3.271367814515023e-05, + "loss": 1.3527, + "step": 3276000 + }, + { + "epoch": 34.58, + "learning_rate": 3.27110398176388e-05, + "loss": 1.2374, + "step": 3276500 + }, + { + "epoch": 34.58, + "learning_rate": 3.2708401490127385e-05, + "loss": 1.2863, + "step": 3277000 + }, + { + "epoch": 34.59, + "learning_rate": 3.270576316261595e-05, + "loss": 1.3758, + "step": 3277500 + }, + { + "epoch": 34.59, + "learning_rate": 3.270312483510453e-05, + "loss": 1.2928, + "step": 3278000 + }, + { + "epoch": 34.6, + "learning_rate": 3.270048650759311e-05, + "loss": 1.3418, + "step": 3278500 + }, + { + "epoch": 34.6, + "learning_rate": 3.2697848180081687e-05, + "loss": 1.3556, + "step": 3279000 + }, + { + "epoch": 34.61, + "learning_rate": 3.2695209852570255e-05, + "loss": 1.3507, + "step": 3279500 + }, + { + "epoch": 34.61, + "learning_rate": 3.269257152505884e-05, + "loss": 1.3493, + "step": 3280000 + }, + { + "epoch": 34.62, + "learning_rate": 3.268993319754741e-05, + "loss": 1.3556, + "step": 3280500 + }, + { + "epoch": 34.63, + "learning_rate": 3.268729487003599e-05, + "loss": 1.3153, + "step": 3281000 + }, + { + "epoch": 34.63, + "learning_rate": 3.2684656542524564e-05, + "loss": 1.2871, + "step": 3281500 + }, + { + "epoch": 34.64, + "learning_rate": 3.268201821501314e-05, + "loss": 1.2688, + "step": 3282000 + }, + { + "epoch": 34.64, + "learning_rate": 3.2679379887501715e-05, + "loss": 1.3194, + "step": 3282500 + }, + { + "epoch": 34.65, + "learning_rate": 3.267674155999029e-05, + "loss": 1.3175, + "step": 3283000 + }, + { + "epoch": 34.65, + "learning_rate": 3.267410323247887e-05, + "loss": 1.3148, + "step": 3283500 + }, + { + "epoch": 34.66, + "learning_rate": 3.267146490496745e-05, + "loss": 1.334, + "step": 3284000 + }, + { + "epoch": 34.66, + "learning_rate": 3.266882657745602e-05, + "loss": 1.3011, + "step": 3284500 + }, + { + "epoch": 34.67, + "learning_rate": 3.26661882499446e-05, + "loss": 1.3726, + "step": 3285000 + }, + { + "epoch": 34.67, + "learning_rate": 3.2663549922433175e-05, + "loss": 1.3204, + "step": 3285500 + }, + { + "epoch": 34.68, + "learning_rate": 3.266091159492175e-05, + "loss": 1.3395, + "step": 3286000 + }, + { + "epoch": 34.68, + "learning_rate": 3.2658273267410326e-05, + "loss": 1.3144, + "step": 3286500 + }, + { + "epoch": 34.69, + "learning_rate": 3.26556349398989e-05, + "loss": 1.3257, + "step": 3287000 + }, + { + "epoch": 34.69, + "learning_rate": 3.265299661238748e-05, + "loss": 1.2427, + "step": 3287500 + }, + { + "epoch": 34.7, + "learning_rate": 3.265035828487605e-05, + "loss": 1.2841, + "step": 3288000 + }, + { + "epoch": 34.7, + "learning_rate": 3.264771995736463e-05, + "loss": 1.3274, + "step": 3288500 + }, + { + "epoch": 34.71, + "learning_rate": 3.26450816298532e-05, + "loss": 1.3338, + "step": 3289000 + }, + { + "epoch": 34.72, + "learning_rate": 3.264244330234178e-05, + "loss": 1.3277, + "step": 3289500 + }, + { + "epoch": 34.72, + "learning_rate": 3.2639804974830354e-05, + "loss": 1.3306, + "step": 3290000 + }, + { + "epoch": 34.73, + "learning_rate": 3.2637166647318936e-05, + "loss": 1.341, + "step": 3290500 + }, + { + "epoch": 34.73, + "learning_rate": 3.263452831980751e-05, + "loss": 1.2837, + "step": 3291000 + }, + { + "epoch": 34.74, + "learning_rate": 3.263188999229608e-05, + "loss": 1.3344, + "step": 3291500 + }, + { + "epoch": 34.74, + "learning_rate": 3.262925166478466e-05, + "loss": 1.2827, + "step": 3292000 + }, + { + "epoch": 34.75, + "learning_rate": 3.262661333727324e-05, + "loss": 1.2952, + "step": 3292500 + }, + { + "epoch": 34.75, + "learning_rate": 3.2623975009761814e-05, + "loss": 1.278, + "step": 3293000 + }, + { + "epoch": 34.76, + "learning_rate": 3.262133668225039e-05, + "loss": 1.2901, + "step": 3293500 + }, + { + "epoch": 34.76, + "learning_rate": 3.2618698354738965e-05, + "loss": 1.313, + "step": 3294000 + }, + { + "epoch": 34.77, + "learning_rate": 3.261606002722754e-05, + "loss": 1.3016, + "step": 3294500 + }, + { + "epoch": 34.77, + "learning_rate": 3.2613421699716116e-05, + "loss": 1.302, + "step": 3295000 + }, + { + "epoch": 34.78, + "learning_rate": 3.26107833722047e-05, + "loss": 1.3227, + "step": 3295500 + }, + { + "epoch": 34.78, + "learning_rate": 3.2608145044693273e-05, + "loss": 1.2401, + "step": 3296000 + }, + { + "epoch": 34.79, + "learning_rate": 3.260550671718184e-05, + "loss": 1.339, + "step": 3296500 + }, + { + "epoch": 34.79, + "learning_rate": 3.2602868389670424e-05, + "loss": 1.3217, + "step": 3297000 + }, + { + "epoch": 34.8, + "learning_rate": 3.2600230062159e-05, + "loss": 1.3168, + "step": 3297500 + }, + { + "epoch": 34.8, + "learning_rate": 3.2597591734647575e-05, + "loss": 1.2718, + "step": 3298000 + }, + { + "epoch": 34.81, + "learning_rate": 3.2594953407136144e-05, + "loss": 1.2859, + "step": 3298500 + }, + { + "epoch": 34.82, + "learning_rate": 3.2592315079624726e-05, + "loss": 1.3084, + "step": 3299000 + }, + { + "epoch": 34.82, + "learning_rate": 3.25896767521133e-05, + "loss": 1.3176, + "step": 3299500 + }, + { + "epoch": 34.83, + "learning_rate": 3.258703842460188e-05, + "loss": 1.3069, + "step": 3300000 + }, + { + "epoch": 34.83, + "learning_rate": 3.258440009709045e-05, + "loss": 1.3093, + "step": 3300500 + }, + { + "epoch": 34.84, + "learning_rate": 3.258176176957903e-05, + "loss": 1.2626, + "step": 3301000 + }, + { + "epoch": 34.84, + "learning_rate": 3.2579123442067604e-05, + "loss": 1.3116, + "step": 3301500 + }, + { + "epoch": 34.85, + "learning_rate": 3.257648511455618e-05, + "loss": 1.3213, + "step": 3302000 + }, + { + "epoch": 34.85, + "learning_rate": 3.257384678704476e-05, + "loss": 1.3029, + "step": 3302500 + }, + { + "epoch": 34.86, + "learning_rate": 3.257120845953334e-05, + "loss": 1.3392, + "step": 3303000 + }, + { + "epoch": 34.86, + "learning_rate": 3.2568570132021906e-05, + "loss": 1.2915, + "step": 3303500 + }, + { + "epoch": 34.87, + "learning_rate": 3.256593180451049e-05, + "loss": 1.2715, + "step": 3304000 + }, + { + "epoch": 34.87, + "learning_rate": 3.2563293476999064e-05, + "loss": 1.3344, + "step": 3304500 + }, + { + "epoch": 34.88, + "learning_rate": 3.256065514948764e-05, + "loss": 1.3022, + "step": 3305000 + }, + { + "epoch": 34.88, + "learning_rate": 3.2558016821976215e-05, + "loss": 1.289, + "step": 3305500 + }, + { + "epoch": 34.89, + "learning_rate": 3.255537849446479e-05, + "loss": 1.3373, + "step": 3306000 + }, + { + "epoch": 34.89, + "learning_rate": 3.2552740166953366e-05, + "loss": 1.3209, + "step": 3306500 + }, + { + "epoch": 34.9, + "learning_rate": 3.255010183944194e-05, + "loss": 1.3425, + "step": 3307000 + }, + { + "epoch": 34.91, + "learning_rate": 3.254746351193052e-05, + "loss": 1.3134, + "step": 3307500 + }, + { + "epoch": 34.91, + "learning_rate": 3.254482518441909e-05, + "loss": 1.2812, + "step": 3308000 + }, + { + "epoch": 34.92, + "learning_rate": 3.254218685690767e-05, + "loss": 1.2592, + "step": 3308500 + }, + { + "epoch": 34.92, + "learning_rate": 3.253954852939625e-05, + "loss": 1.3604, + "step": 3309000 + }, + { + "epoch": 34.93, + "learning_rate": 3.2536910201884825e-05, + "loss": 1.3166, + "step": 3309500 + }, + { + "epoch": 34.93, + "learning_rate": 3.25342718743734e-05, + "loss": 1.3783, + "step": 3310000 + }, + { + "epoch": 34.94, + "learning_rate": 3.253163354686197e-05, + "loss": 1.3289, + "step": 3310500 + }, + { + "epoch": 34.94, + "learning_rate": 3.252899521935055e-05, + "loss": 1.263, + "step": 3311000 + }, + { + "epoch": 34.95, + "learning_rate": 3.252635689183913e-05, + "loss": 1.3541, + "step": 3311500 + }, + { + "epoch": 34.95, + "learning_rate": 3.25237185643277e-05, + "loss": 1.2955, + "step": 3312000 + }, + { + "epoch": 34.96, + "learning_rate": 3.252108023681628e-05, + "loss": 1.3328, + "step": 3312500 + }, + { + "epoch": 34.96, + "learning_rate": 3.2518441909304854e-05, + "loss": 1.3573, + "step": 3313000 + }, + { + "epoch": 34.97, + "learning_rate": 3.251580358179343e-05, + "loss": 1.2823, + "step": 3313500 + }, + { + "epoch": 34.97, + "learning_rate": 3.2513165254282005e-05, + "loss": 1.3211, + "step": 3314000 + }, + { + "epoch": 34.98, + "learning_rate": 3.251052692677059e-05, + "loss": 1.347, + "step": 3314500 + }, + { + "epoch": 34.98, + "learning_rate": 3.2507888599259156e-05, + "loss": 1.2679, + "step": 3315000 + }, + { + "epoch": 34.99, + "learning_rate": 3.250525027174773e-05, + "loss": 1.3187, + "step": 3315500 + }, + { + "epoch": 34.99, + "learning_rate": 3.250261194423631e-05, + "loss": 1.3376, + "step": 3316000 + }, + { + "epoch": 35.0, + "learning_rate": 3.249997361672489e-05, + "loss": 1.3143, + "step": 3316500 + }, + { + "epoch": 35.01, + "learning_rate": 3.2497335289213464e-05, + "loss": 1.3282, + "step": 3317000 + }, + { + "epoch": 35.01, + "learning_rate": 3.249469696170204e-05, + "loss": 1.267, + "step": 3317500 + }, + { + "epoch": 35.02, + "learning_rate": 3.2492058634190615e-05, + "loss": 1.2521, + "step": 3318000 + }, + { + "epoch": 35.02, + "learning_rate": 3.248942030667919e-05, + "loss": 1.3308, + "step": 3318500 + }, + { + "epoch": 35.03, + "learning_rate": 3.2486781979167766e-05, + "loss": 1.2637, + "step": 3319000 + }, + { + "epoch": 35.03, + "learning_rate": 3.248414365165635e-05, + "loss": 1.3144, + "step": 3319500 + }, + { + "epoch": 35.04, + "learning_rate": 3.248150532414492e-05, + "loss": 1.3129, + "step": 3320000 + }, + { + "epoch": 35.04, + "learning_rate": 3.247886699663349e-05, + "loss": 1.2922, + "step": 3320500 + }, + { + "epoch": 35.05, + "learning_rate": 3.2476228669122075e-05, + "loss": 1.3077, + "step": 3321000 + }, + { + "epoch": 35.05, + "learning_rate": 3.247359034161065e-05, + "loss": 1.2704, + "step": 3321500 + }, + { + "epoch": 35.06, + "learning_rate": 3.2470952014099226e-05, + "loss": 1.2828, + "step": 3322000 + }, + { + "epoch": 35.06, + "learning_rate": 3.2468313686587795e-05, + "loss": 1.3033, + "step": 3322500 + }, + { + "epoch": 35.07, + "learning_rate": 3.246567535907638e-05, + "loss": 1.2496, + "step": 3323000 + }, + { + "epoch": 35.07, + "learning_rate": 3.246303703156495e-05, + "loss": 1.3323, + "step": 3323500 + }, + { + "epoch": 35.08, + "learning_rate": 3.246039870405353e-05, + "loss": 1.3229, + "step": 3324000 + }, + { + "epoch": 35.08, + "learning_rate": 3.24577603765421e-05, + "loss": 1.3097, + "step": 3324500 + }, + { + "epoch": 35.09, + "learning_rate": 3.245512204903068e-05, + "loss": 1.3332, + "step": 3325000 + }, + { + "epoch": 35.1, + "learning_rate": 3.2452483721519254e-05, + "loss": 1.2869, + "step": 3325500 + }, + { + "epoch": 35.1, + "learning_rate": 3.244984539400783e-05, + "loss": 1.3401, + "step": 3326000 + }, + { + "epoch": 35.11, + "learning_rate": 3.244720706649641e-05, + "loss": 1.3031, + "step": 3326500 + }, + { + "epoch": 35.11, + "learning_rate": 3.244456873898498e-05, + "loss": 1.2857, + "step": 3327000 + }, + { + "epoch": 35.12, + "learning_rate": 3.2441930411473556e-05, + "loss": 1.3691, + "step": 3327500 + }, + { + "epoch": 35.12, + "learning_rate": 3.243929208396214e-05, + "loss": 1.3464, + "step": 3328000 + }, + { + "epoch": 35.13, + "learning_rate": 3.2436653756450714e-05, + "loss": 1.2873, + "step": 3328500 + }, + { + "epoch": 35.13, + "learning_rate": 3.243401542893929e-05, + "loss": 1.2936, + "step": 3329000 + }, + { + "epoch": 35.14, + "learning_rate": 3.2431377101427865e-05, + "loss": 1.283, + "step": 3329500 + }, + { + "epoch": 35.14, + "learning_rate": 3.242873877391644e-05, + "loss": 1.2765, + "step": 3330000 + }, + { + "epoch": 35.15, + "learning_rate": 3.2426100446405016e-05, + "loss": 1.2968, + "step": 3330500 + }, + { + "epoch": 35.15, + "learning_rate": 3.242346211889359e-05, + "loss": 1.2759, + "step": 3331000 + }, + { + "epoch": 35.16, + "learning_rate": 3.2420823791382174e-05, + "loss": 1.2986, + "step": 3331500 + }, + { + "epoch": 35.16, + "learning_rate": 3.241818546387074e-05, + "loss": 1.3082, + "step": 3332000 + }, + { + "epoch": 35.17, + "learning_rate": 3.241554713635932e-05, + "loss": 1.3032, + "step": 3332500 + }, + { + "epoch": 35.17, + "learning_rate": 3.24129088088479e-05, + "loss": 1.3105, + "step": 3333000 + }, + { + "epoch": 35.18, + "learning_rate": 3.2410270481336476e-05, + "loss": 1.3073, + "step": 3333500 + }, + { + "epoch": 35.18, + "learning_rate": 3.2407632153825044e-05, + "loss": 1.2848, + "step": 3334000 + }, + { + "epoch": 35.19, + "learning_rate": 3.240499382631362e-05, + "loss": 1.2727, + "step": 3334500 + }, + { + "epoch": 35.2, + "learning_rate": 3.24023554988022e-05, + "loss": 1.3173, + "step": 3335000 + }, + { + "epoch": 35.2, + "learning_rate": 3.239971717129078e-05, + "loss": 1.3117, + "step": 3335500 + }, + { + "epoch": 35.21, + "learning_rate": 3.239707884377935e-05, + "loss": 1.3018, + "step": 3336000 + }, + { + "epoch": 35.21, + "learning_rate": 3.239444051626793e-05, + "loss": 1.2742, + "step": 3336500 + }, + { + "epoch": 35.22, + "learning_rate": 3.2391802188756504e-05, + "loss": 1.3232, + "step": 3337000 + }, + { + "epoch": 35.22, + "learning_rate": 3.238916386124508e-05, + "loss": 1.3547, + "step": 3337500 + }, + { + "epoch": 35.23, + "learning_rate": 3.2386525533733655e-05, + "loss": 1.3141, + "step": 3338000 + }, + { + "epoch": 35.23, + "learning_rate": 3.238388720622224e-05, + "loss": 1.2623, + "step": 3338500 + }, + { + "epoch": 35.24, + "learning_rate": 3.2381248878710806e-05, + "loss": 1.3982, + "step": 3339000 + }, + { + "epoch": 35.24, + "learning_rate": 3.237861055119938e-05, + "loss": 1.3163, + "step": 3339500 + }, + { + "epoch": 35.25, + "learning_rate": 3.2375972223687964e-05, + "loss": 1.2853, + "step": 3340000 + }, + { + "epoch": 35.25, + "learning_rate": 3.237333389617654e-05, + "loss": 1.2563, + "step": 3340500 + }, + { + "epoch": 35.26, + "learning_rate": 3.2370695568665115e-05, + "loss": 1.3293, + "step": 3341000 + }, + { + "epoch": 35.26, + "learning_rate": 3.236805724115369e-05, + "loss": 1.2829, + "step": 3341500 + }, + { + "epoch": 35.27, + "learning_rate": 3.2365418913642266e-05, + "loss": 1.2506, + "step": 3342000 + }, + { + "epoch": 35.27, + "learning_rate": 3.236278058613084e-05, + "loss": 1.2898, + "step": 3342500 + }, + { + "epoch": 35.28, + "learning_rate": 3.236014225861942e-05, + "loss": 1.3228, + "step": 3343000 + }, + { + "epoch": 35.28, + "learning_rate": 3.235750393110799e-05, + "loss": 1.2863, + "step": 3343500 + }, + { + "epoch": 35.29, + "learning_rate": 3.235486560359657e-05, + "loss": 1.3688, + "step": 3344000 + }, + { + "epoch": 35.3, + "learning_rate": 3.235222727608514e-05, + "loss": 1.2793, + "step": 3344500 + }, + { + "epoch": 35.3, + "learning_rate": 3.2349588948573725e-05, + "loss": 1.2555, + "step": 3345000 + }, + { + "epoch": 35.31, + "learning_rate": 3.23469506210623e-05, + "loss": 1.2876, + "step": 3345500 + }, + { + "epoch": 35.31, + "learning_rate": 3.234431229355087e-05, + "loss": 1.254, + "step": 3346000 + }, + { + "epoch": 35.32, + "learning_rate": 3.234167396603945e-05, + "loss": 1.3048, + "step": 3346500 + }, + { + "epoch": 35.32, + "learning_rate": 3.233903563852803e-05, + "loss": 1.354, + "step": 3347000 + }, + { + "epoch": 35.33, + "learning_rate": 3.23363973110166e-05, + "loss": 1.3816, + "step": 3347500 + }, + { + "epoch": 35.33, + "learning_rate": 3.233375898350518e-05, + "loss": 1.3683, + "step": 3348000 + }, + { + "epoch": 35.34, + "learning_rate": 3.2331120655993754e-05, + "loss": 1.2878, + "step": 3348500 + }, + { + "epoch": 35.34, + "learning_rate": 3.232848232848233e-05, + "loss": 1.3261, + "step": 3349000 + }, + { + "epoch": 35.35, + "learning_rate": 3.2325844000970905e-05, + "loss": 1.3175, + "step": 3349500 + }, + { + "epoch": 35.35, + "learning_rate": 3.232320567345948e-05, + "loss": 1.2946, + "step": 3350000 + }, + { + "epoch": 35.36, + "learning_rate": 3.232056734594806e-05, + "loss": 1.3184, + "step": 3350500 + }, + { + "epoch": 35.36, + "learning_rate": 3.231792901843663e-05, + "loss": 1.3249, + "step": 3351000 + }, + { + "epoch": 35.37, + "learning_rate": 3.231529069092521e-05, + "loss": 1.285, + "step": 3351500 + }, + { + "epoch": 35.37, + "learning_rate": 3.231265236341379e-05, + "loss": 1.3183, + "step": 3352000 + }, + { + "epoch": 35.38, + "learning_rate": 3.2310014035902365e-05, + "loss": 1.3327, + "step": 3352500 + }, + { + "epoch": 35.39, + "learning_rate": 3.230737570839093e-05, + "loss": 1.3164, + "step": 3353000 + }, + { + "epoch": 35.39, + "learning_rate": 3.2304737380879516e-05, + "loss": 1.2909, + "step": 3353500 + }, + { + "epoch": 35.4, + "learning_rate": 3.230209905336809e-05, + "loss": 1.3256, + "step": 3354000 + }, + { + "epoch": 35.4, + "learning_rate": 3.2299460725856666e-05, + "loss": 1.3115, + "step": 3354500 + }, + { + "epoch": 35.41, + "learning_rate": 3.229682239834524e-05, + "loss": 1.2867, + "step": 3355000 + }, + { + "epoch": 35.41, + "learning_rate": 3.229418407083382e-05, + "loss": 1.2568, + "step": 3355500 + }, + { + "epoch": 35.42, + "learning_rate": 3.229154574332239e-05, + "loss": 1.2942, + "step": 3356000 + }, + { + "epoch": 35.42, + "learning_rate": 3.228890741581097e-05, + "loss": 1.3226, + "step": 3356500 + }, + { + "epoch": 35.43, + "learning_rate": 3.228626908829955e-05, + "loss": 1.2666, + "step": 3357000 + }, + { + "epoch": 35.43, + "learning_rate": 3.2283630760788126e-05, + "loss": 1.2832, + "step": 3357500 + }, + { + "epoch": 35.44, + "learning_rate": 3.2280992433276695e-05, + "loss": 1.3169, + "step": 3358000 + }, + { + "epoch": 35.44, + "learning_rate": 3.227835410576528e-05, + "loss": 1.2809, + "step": 3358500 + }, + { + "epoch": 35.45, + "learning_rate": 3.227571577825385e-05, + "loss": 1.2725, + "step": 3359000 + }, + { + "epoch": 35.45, + "learning_rate": 3.227307745074243e-05, + "loss": 1.3165, + "step": 3359500 + }, + { + "epoch": 35.46, + "learning_rate": 3.2270439123231004e-05, + "loss": 1.3392, + "step": 3360000 + }, + { + "epoch": 35.46, + "learning_rate": 3.226780079571958e-05, + "loss": 1.2745, + "step": 3360500 + }, + { + "epoch": 35.47, + "learning_rate": 3.2265162468208155e-05, + "loss": 1.3238, + "step": 3361000 + }, + { + "epoch": 35.47, + "learning_rate": 3.226252414069673e-05, + "loss": 1.2903, + "step": 3361500 + }, + { + "epoch": 35.48, + "learning_rate": 3.2259885813185306e-05, + "loss": 1.3401, + "step": 3362000 + }, + { + "epoch": 35.49, + "learning_rate": 3.225724748567388e-05, + "loss": 1.3136, + "step": 3362500 + }, + { + "epoch": 35.49, + "learning_rate": 3.2254609158162457e-05, + "loss": 1.2528, + "step": 3363000 + }, + { + "epoch": 35.5, + "learning_rate": 3.225197083065103e-05, + "loss": 1.3093, + "step": 3363500 + }, + { + "epoch": 35.5, + "learning_rate": 3.2249332503139614e-05, + "loss": 1.2914, + "step": 3364000 + }, + { + "epoch": 35.51, + "learning_rate": 3.224669417562819e-05, + "loss": 1.2961, + "step": 3364500 + }, + { + "epoch": 35.51, + "learning_rate": 3.224405584811676e-05, + "loss": 1.3304, + "step": 3365000 + }, + { + "epoch": 35.52, + "learning_rate": 3.224141752060534e-05, + "loss": 1.3033, + "step": 3365500 + }, + { + "epoch": 35.52, + "learning_rate": 3.2238779193093916e-05, + "loss": 1.2998, + "step": 3366000 + }, + { + "epoch": 35.53, + "learning_rate": 3.223614086558249e-05, + "loss": 1.3956, + "step": 3366500 + }, + { + "epoch": 35.53, + "learning_rate": 3.223350253807107e-05, + "loss": 1.2939, + "step": 3367000 + }, + { + "epoch": 35.54, + "learning_rate": 3.223086421055964e-05, + "loss": 1.3304, + "step": 3367500 + }, + { + "epoch": 35.54, + "learning_rate": 3.222822588304822e-05, + "loss": 1.3282, + "step": 3368000 + }, + { + "epoch": 35.55, + "learning_rate": 3.2225587555536794e-05, + "loss": 1.2735, + "step": 3368500 + }, + { + "epoch": 35.55, + "learning_rate": 3.2222949228025376e-05, + "loss": 1.2829, + "step": 3369000 + }, + { + "epoch": 35.56, + "learning_rate": 3.222031090051395e-05, + "loss": 1.3421, + "step": 3369500 + }, + { + "epoch": 35.56, + "learning_rate": 3.221767257300252e-05, + "loss": 1.3165, + "step": 3370000 + }, + { + "epoch": 35.57, + "learning_rate": 3.22150342454911e-05, + "loss": 1.3031, + "step": 3370500 + }, + { + "epoch": 35.58, + "learning_rate": 3.221239591797968e-05, + "loss": 1.3093, + "step": 3371000 + }, + { + "epoch": 35.58, + "learning_rate": 3.220975759046825e-05, + "loss": 1.324, + "step": 3371500 + }, + { + "epoch": 35.59, + "learning_rate": 3.220711926295682e-05, + "loss": 1.3185, + "step": 3372000 + }, + { + "epoch": 35.59, + "learning_rate": 3.2204480935445404e-05, + "loss": 1.365, + "step": 3372500 + }, + { + "epoch": 35.6, + "learning_rate": 3.220184260793398e-05, + "loss": 1.3447, + "step": 3373000 + }, + { + "epoch": 35.6, + "learning_rate": 3.2199204280422555e-05, + "loss": 1.2985, + "step": 3373500 + }, + { + "epoch": 35.61, + "learning_rate": 3.219656595291113e-05, + "loss": 1.3086, + "step": 3374000 + }, + { + "epoch": 35.61, + "learning_rate": 3.2193927625399706e-05, + "loss": 1.3139, + "step": 3374500 + }, + { + "epoch": 35.62, + "learning_rate": 3.219128929788828e-05, + "loss": 1.2734, + "step": 3375000 + }, + { + "epoch": 35.62, + "learning_rate": 3.218865097037686e-05, + "loss": 1.2954, + "step": 3375500 + }, + { + "epoch": 35.63, + "learning_rate": 3.218601264286544e-05, + "loss": 1.2664, + "step": 3376000 + }, + { + "epoch": 35.63, + "learning_rate": 3.2183374315354015e-05, + "loss": 1.3353, + "step": 3376500 + }, + { + "epoch": 35.64, + "learning_rate": 3.2180735987842584e-05, + "loss": 1.3147, + "step": 3377000 + }, + { + "epoch": 35.64, + "learning_rate": 3.2178097660331166e-05, + "loss": 1.3677, + "step": 3377500 + }, + { + "epoch": 35.65, + "learning_rate": 3.217545933281974e-05, + "loss": 1.3092, + "step": 3378000 + }, + { + "epoch": 35.65, + "learning_rate": 3.217282100530832e-05, + "loss": 1.2832, + "step": 3378500 + }, + { + "epoch": 35.66, + "learning_rate": 3.217018267779689e-05, + "loss": 1.3259, + "step": 3379000 + }, + { + "epoch": 35.66, + "learning_rate": 3.216754435028547e-05, + "loss": 1.2887, + "step": 3379500 + }, + { + "epoch": 35.67, + "learning_rate": 3.2164906022774043e-05, + "loss": 1.3324, + "step": 3380000 + }, + { + "epoch": 35.68, + "learning_rate": 3.216226769526262e-05, + "loss": 1.3417, + "step": 3380500 + }, + { + "epoch": 35.68, + "learning_rate": 3.21596293677512e-05, + "loss": 1.3076, + "step": 3381000 + }, + { + "epoch": 35.69, + "learning_rate": 3.215699104023977e-05, + "loss": 1.3274, + "step": 3381500 + }, + { + "epoch": 35.69, + "learning_rate": 3.2154352712728345e-05, + "loss": 1.313, + "step": 3382000 + }, + { + "epoch": 35.7, + "learning_rate": 3.215171438521693e-05, + "loss": 1.3182, + "step": 3382500 + }, + { + "epoch": 35.7, + "learning_rate": 3.21490760577055e-05, + "loss": 1.3163, + "step": 3383000 + }, + { + "epoch": 35.71, + "learning_rate": 3.214643773019408e-05, + "loss": 1.2353, + "step": 3383500 + }, + { + "epoch": 35.71, + "learning_rate": 3.214379940268265e-05, + "loss": 1.2792, + "step": 3384000 + }, + { + "epoch": 35.72, + "learning_rate": 3.214116107517123e-05, + "loss": 1.2776, + "step": 3384500 + }, + { + "epoch": 35.72, + "learning_rate": 3.2138522747659805e-05, + "loss": 1.3411, + "step": 3385000 + }, + { + "epoch": 35.73, + "learning_rate": 3.213588442014838e-05, + "loss": 1.2824, + "step": 3385500 + }, + { + "epoch": 35.73, + "learning_rate": 3.2133246092636956e-05, + "loss": 1.3001, + "step": 3386000 + }, + { + "epoch": 35.74, + "learning_rate": 3.213060776512553e-05, + "loss": 1.3303, + "step": 3386500 + }, + { + "epoch": 35.74, + "learning_rate": 3.212796943761411e-05, + "loss": 1.264, + "step": 3387000 + }, + { + "epoch": 35.75, + "learning_rate": 3.212533111010268e-05, + "loss": 1.239, + "step": 3387500 + }, + { + "epoch": 35.75, + "learning_rate": 3.2122692782591265e-05, + "loss": 1.2878, + "step": 3388000 + }, + { + "epoch": 35.76, + "learning_rate": 3.212005445507984e-05, + "loss": 1.3013, + "step": 3388500 + }, + { + "epoch": 35.77, + "learning_rate": 3.211741612756841e-05, + "loss": 1.2855, + "step": 3389000 + }, + { + "epoch": 35.77, + "learning_rate": 3.211477780005699e-05, + "loss": 1.2948, + "step": 3389500 + }, + { + "epoch": 35.78, + "learning_rate": 3.211213947254557e-05, + "loss": 1.3197, + "step": 3390000 + }, + { + "epoch": 35.78, + "learning_rate": 3.210950114503414e-05, + "loss": 1.306, + "step": 3390500 + }, + { + "epoch": 35.79, + "learning_rate": 3.210686281752272e-05, + "loss": 1.3721, + "step": 3391000 + }, + { + "epoch": 35.79, + "learning_rate": 3.210422449001129e-05, + "loss": 1.3112, + "step": 3391500 + }, + { + "epoch": 35.8, + "learning_rate": 3.210158616249987e-05, + "loss": 1.321, + "step": 3392000 + }, + { + "epoch": 35.8, + "learning_rate": 3.2098947834988444e-05, + "loss": 1.3018, + "step": 3392500 + }, + { + "epoch": 35.81, + "learning_rate": 3.2096309507477026e-05, + "loss": 1.3218, + "step": 3393000 + }, + { + "epoch": 35.81, + "learning_rate": 3.2093671179965595e-05, + "loss": 1.34, + "step": 3393500 + }, + { + "epoch": 35.82, + "learning_rate": 3.209103285245417e-05, + "loss": 1.3263, + "step": 3394000 + }, + { + "epoch": 35.82, + "learning_rate": 3.208839452494275e-05, + "loss": 1.3037, + "step": 3394500 + }, + { + "epoch": 35.83, + "learning_rate": 3.208575619743133e-05, + "loss": 1.2947, + "step": 3395000 + }, + { + "epoch": 35.83, + "learning_rate": 3.2083117869919904e-05, + "loss": 1.3031, + "step": 3395500 + }, + { + "epoch": 35.84, + "learning_rate": 3.208047954240847e-05, + "loss": 1.325, + "step": 3396000 + }, + { + "epoch": 35.84, + "learning_rate": 3.2077841214897055e-05, + "loss": 1.3524, + "step": 3396500 + }, + { + "epoch": 35.85, + "learning_rate": 3.207520288738563e-05, + "loss": 1.2493, + "step": 3397000 + }, + { + "epoch": 35.85, + "learning_rate": 3.2072564559874206e-05, + "loss": 1.3388, + "step": 3397500 + }, + { + "epoch": 35.86, + "learning_rate": 3.206992623236279e-05, + "loss": 1.2963, + "step": 3398000 + }, + { + "epoch": 35.87, + "learning_rate": 3.206728790485136e-05, + "loss": 1.3261, + "step": 3398500 + }, + { + "epoch": 35.87, + "learning_rate": 3.206464957733993e-05, + "loss": 1.2814, + "step": 3399000 + }, + { + "epoch": 35.88, + "learning_rate": 3.206201124982851e-05, + "loss": 1.2833, + "step": 3399500 + }, + { + "epoch": 35.88, + "learning_rate": 3.205937292231709e-05, + "loss": 1.264, + "step": 3400000 + }, + { + "epoch": 35.89, + "learning_rate": 3.205673459480566e-05, + "loss": 1.2544, + "step": 3400500 + }, + { + "epoch": 35.89, + "learning_rate": 3.2054096267294234e-05, + "loss": 1.3241, + "step": 3401000 + }, + { + "epoch": 35.9, + "learning_rate": 3.2051457939782817e-05, + "loss": 1.3062, + "step": 3401500 + }, + { + "epoch": 35.9, + "learning_rate": 3.204881961227139e-05, + "loss": 1.2721, + "step": 3402000 + }, + { + "epoch": 35.91, + "learning_rate": 3.204618128475997e-05, + "loss": 1.2978, + "step": 3402500 + }, + { + "epoch": 35.91, + "learning_rate": 3.204354295724854e-05, + "loss": 1.3007, + "step": 3403000 + }, + { + "epoch": 35.92, + "learning_rate": 3.204090462973712e-05, + "loss": 1.3314, + "step": 3403500 + }, + { + "epoch": 35.92, + "learning_rate": 3.2038266302225694e-05, + "loss": 1.321, + "step": 3404000 + }, + { + "epoch": 35.93, + "learning_rate": 3.203562797471427e-05, + "loss": 1.2821, + "step": 3404500 + }, + { + "epoch": 35.93, + "learning_rate": 3.203298964720285e-05, + "loss": 1.329, + "step": 3405000 + }, + { + "epoch": 35.94, + "learning_rate": 3.203035131969142e-05, + "loss": 1.2505, + "step": 3405500 + }, + { + "epoch": 35.94, + "learning_rate": 3.2027712992179996e-05, + "loss": 1.3061, + "step": 3406000 + }, + { + "epoch": 35.95, + "learning_rate": 3.202507466466858e-05, + "loss": 1.3506, + "step": 3406500 + }, + { + "epoch": 35.96, + "learning_rate": 3.2022436337157154e-05, + "loss": 1.2848, + "step": 3407000 + }, + { + "epoch": 35.96, + "learning_rate": 3.201979800964573e-05, + "loss": 1.2351, + "step": 3407500 + }, + { + "epoch": 35.97, + "learning_rate": 3.20171596821343e-05, + "loss": 1.3178, + "step": 3408000 + }, + { + "epoch": 35.97, + "learning_rate": 3.201452135462288e-05, + "loss": 1.2878, + "step": 3408500 + }, + { + "epoch": 35.98, + "learning_rate": 3.2011883027111456e-05, + "loss": 1.3587, + "step": 3409000 + }, + { + "epoch": 35.98, + "learning_rate": 3.200924469960003e-05, + "loss": 1.4041, + "step": 3409500 + }, + { + "epoch": 35.99, + "learning_rate": 3.2006606372088607e-05, + "loss": 1.3553, + "step": 3410000 + }, + { + "epoch": 35.99, + "learning_rate": 3.200396804457718e-05, + "loss": 1.3405, + "step": 3410500 + }, + { + "epoch": 36.0, + "learning_rate": 3.200132971706576e-05, + "loss": 1.3439, + "step": 3411000 + }, + { + "epoch": 36.0, + "learning_rate": 3.199869138955433e-05, + "loss": 1.2915, + "step": 3411500 + }, + { + "epoch": 36.01, + "learning_rate": 3.1996053062042915e-05, + "loss": 1.3599, + "step": 3412000 + }, + { + "epoch": 36.01, + "learning_rate": 3.1993414734531484e-05, + "loss": 1.3213, + "step": 3412500 + }, + { + "epoch": 36.02, + "learning_rate": 3.199077640702006e-05, + "loss": 1.2963, + "step": 3413000 + }, + { + "epoch": 36.02, + "learning_rate": 3.198813807950864e-05, + "loss": 1.2931, + "step": 3413500 + }, + { + "epoch": 36.03, + "learning_rate": 3.198549975199722e-05, + "loss": 1.2656, + "step": 3414000 + }, + { + "epoch": 36.03, + "learning_rate": 3.198286142448579e-05, + "loss": 1.3036, + "step": 3414500 + }, + { + "epoch": 36.04, + "learning_rate": 3.198022309697437e-05, + "loss": 1.3521, + "step": 3415000 + }, + { + "epoch": 36.04, + "learning_rate": 3.1977584769462944e-05, + "loss": 1.2994, + "step": 3415500 + }, + { + "epoch": 36.05, + "learning_rate": 3.197494644195152e-05, + "loss": 1.2671, + "step": 3416000 + }, + { + "epoch": 36.06, + "learning_rate": 3.1972308114440095e-05, + "loss": 1.2654, + "step": 3416500 + }, + { + "epoch": 36.06, + "learning_rate": 3.196966978692868e-05, + "loss": 1.303, + "step": 3417000 + }, + { + "epoch": 36.07, + "learning_rate": 3.1967031459417246e-05, + "loss": 1.3033, + "step": 3417500 + }, + { + "epoch": 36.07, + "learning_rate": 3.196439313190582e-05, + "loss": 1.3188, + "step": 3418000 + }, + { + "epoch": 36.08, + "learning_rate": 3.1961754804394403e-05, + "loss": 1.281, + "step": 3418500 + }, + { + "epoch": 36.08, + "learning_rate": 3.195911647688298e-05, + "loss": 1.3069, + "step": 3419000 + }, + { + "epoch": 36.09, + "learning_rate": 3.195647814937155e-05, + "loss": 1.2874, + "step": 3419500 + }, + { + "epoch": 36.09, + "learning_rate": 3.195383982186013e-05, + "loss": 1.2861, + "step": 3420000 + }, + { + "epoch": 36.1, + "learning_rate": 3.1951201494348705e-05, + "loss": 1.282, + "step": 3420500 + }, + { + "epoch": 36.1, + "learning_rate": 3.194856316683728e-05, + "loss": 1.3354, + "step": 3421000 + }, + { + "epoch": 36.11, + "learning_rate": 3.1945924839325856e-05, + "loss": 1.2998, + "step": 3421500 + }, + { + "epoch": 36.11, + "learning_rate": 3.194328651181443e-05, + "loss": 1.3079, + "step": 3422000 + }, + { + "epoch": 36.12, + "learning_rate": 3.194064818430301e-05, + "loss": 1.2694, + "step": 3422500 + }, + { + "epoch": 36.12, + "learning_rate": 3.193800985679158e-05, + "loss": 1.2697, + "step": 3423000 + }, + { + "epoch": 36.13, + "learning_rate": 3.193537152928016e-05, + "loss": 1.3719, + "step": 3423500 + }, + { + "epoch": 36.13, + "learning_rate": 3.193273320176874e-05, + "loss": 1.3045, + "step": 3424000 + }, + { + "epoch": 36.14, + "learning_rate": 3.193009487425731e-05, + "loss": 1.3182, + "step": 3424500 + }, + { + "epoch": 36.15, + "learning_rate": 3.1927456546745885e-05, + "loss": 1.3024, + "step": 3425000 + }, + { + "epoch": 36.15, + "learning_rate": 3.192481821923447e-05, + "loss": 1.2764, + "step": 3425500 + }, + { + "epoch": 36.16, + "learning_rate": 3.192217989172304e-05, + "loss": 1.3114, + "step": 3426000 + }, + { + "epoch": 36.16, + "learning_rate": 3.191954156421162e-05, + "loss": 1.2808, + "step": 3426500 + }, + { + "epoch": 36.17, + "learning_rate": 3.1916903236700193e-05, + "loss": 1.3368, + "step": 3427000 + }, + { + "epoch": 36.17, + "learning_rate": 3.191426490918877e-05, + "loss": 1.3199, + "step": 3427500 + }, + { + "epoch": 36.18, + "learning_rate": 3.1911626581677344e-05, + "loss": 1.3201, + "step": 3428000 + }, + { + "epoch": 36.18, + "learning_rate": 3.190898825416592e-05, + "loss": 1.273, + "step": 3428500 + }, + { + "epoch": 36.19, + "learning_rate": 3.1906349926654495e-05, + "loss": 1.3349, + "step": 3429000 + }, + { + "epoch": 36.19, + "learning_rate": 3.190371159914307e-05, + "loss": 1.3314, + "step": 3429500 + }, + { + "epoch": 36.2, + "learning_rate": 3.1901073271631646e-05, + "loss": 1.2621, + "step": 3430000 + }, + { + "epoch": 36.2, + "learning_rate": 3.189843494412023e-05, + "loss": 1.3375, + "step": 3430500 + }, + { + "epoch": 36.21, + "learning_rate": 3.1895796616608804e-05, + "loss": 1.2945, + "step": 3431000 + }, + { + "epoch": 36.21, + "learning_rate": 3.189315828909737e-05, + "loss": 1.2716, + "step": 3431500 + }, + { + "epoch": 36.22, + "learning_rate": 3.1890519961585955e-05, + "loss": 1.2819, + "step": 3432000 + }, + { + "epoch": 36.22, + "learning_rate": 3.188788163407453e-05, + "loss": 1.3364, + "step": 3432500 + }, + { + "epoch": 36.23, + "learning_rate": 3.1885243306563106e-05, + "loss": 1.3239, + "step": 3433000 + }, + { + "epoch": 36.23, + "learning_rate": 3.188260497905168e-05, + "loss": 1.2668, + "step": 3433500 + }, + { + "epoch": 36.24, + "learning_rate": 3.187996665154026e-05, + "loss": 1.3217, + "step": 3434000 + }, + { + "epoch": 36.25, + "learning_rate": 3.187732832402883e-05, + "loss": 1.3135, + "step": 3434500 + }, + { + "epoch": 36.25, + "learning_rate": 3.187468999651741e-05, + "loss": 1.3086, + "step": 3435000 + }, + { + "epoch": 36.26, + "learning_rate": 3.1872051669005984e-05, + "loss": 1.3335, + "step": 3435500 + }, + { + "epoch": 36.26, + "learning_rate": 3.1869413341494566e-05, + "loss": 1.2929, + "step": 3436000 + }, + { + "epoch": 36.27, + "learning_rate": 3.1866775013983134e-05, + "loss": 1.2855, + "step": 3436500 + }, + { + "epoch": 36.27, + "learning_rate": 3.186413668647171e-05, + "loss": 1.3044, + "step": 3437000 + }, + { + "epoch": 36.28, + "learning_rate": 3.186149835896029e-05, + "loss": 1.3197, + "step": 3437500 + }, + { + "epoch": 36.28, + "learning_rate": 3.185886003144887e-05, + "loss": 1.2898, + "step": 3438000 + }, + { + "epoch": 36.29, + "learning_rate": 3.1856221703937436e-05, + "loss": 1.3115, + "step": 3438500 + }, + { + "epoch": 36.29, + "learning_rate": 3.185358337642602e-05, + "loss": 1.3275, + "step": 3439000 + }, + { + "epoch": 36.3, + "learning_rate": 3.1850945048914594e-05, + "loss": 1.3462, + "step": 3439500 + }, + { + "epoch": 36.3, + "learning_rate": 3.184830672140317e-05, + "loss": 1.3389, + "step": 3440000 + }, + { + "epoch": 36.31, + "learning_rate": 3.1845668393891745e-05, + "loss": 1.3338, + "step": 3440500 + }, + { + "epoch": 36.31, + "learning_rate": 3.184303006638032e-05, + "loss": 1.3057, + "step": 3441000 + }, + { + "epoch": 36.32, + "learning_rate": 3.1840391738868896e-05, + "loss": 1.3027, + "step": 3441500 + }, + { + "epoch": 36.32, + "learning_rate": 3.183775341135747e-05, + "loss": 1.2566, + "step": 3442000 + }, + { + "epoch": 36.33, + "learning_rate": 3.1835115083846054e-05, + "loss": 1.3479, + "step": 3442500 + }, + { + "epoch": 36.34, + "learning_rate": 3.183247675633463e-05, + "loss": 1.2729, + "step": 3443000 + }, + { + "epoch": 36.34, + "learning_rate": 3.18298384288232e-05, + "loss": 1.3078, + "step": 3443500 + }, + { + "epoch": 36.35, + "learning_rate": 3.182720010131178e-05, + "loss": 1.2878, + "step": 3444000 + }, + { + "epoch": 36.35, + "learning_rate": 3.1824561773800356e-05, + "loss": 1.3511, + "step": 3444500 + }, + { + "epoch": 36.36, + "learning_rate": 3.182192344628893e-05, + "loss": 1.3416, + "step": 3445000 + }, + { + "epoch": 36.36, + "learning_rate": 3.181928511877751e-05, + "loss": 1.3127, + "step": 3445500 + }, + { + "epoch": 36.37, + "learning_rate": 3.181664679126608e-05, + "loss": 1.3096, + "step": 3446000 + }, + { + "epoch": 36.37, + "learning_rate": 3.181400846375466e-05, + "loss": 1.3012, + "step": 3446500 + }, + { + "epoch": 36.38, + "learning_rate": 3.181137013624323e-05, + "loss": 1.3508, + "step": 3447000 + }, + { + "epoch": 36.38, + "learning_rate": 3.180873180873181e-05, + "loss": 1.3412, + "step": 3447500 + }, + { + "epoch": 36.39, + "learning_rate": 3.1806093481220384e-05, + "loss": 1.3027, + "step": 3448000 + }, + { + "epoch": 36.39, + "learning_rate": 3.180345515370896e-05, + "loss": 1.2829, + "step": 3448500 + }, + { + "epoch": 36.4, + "learning_rate": 3.1800816826197535e-05, + "loss": 1.3455, + "step": 3449000 + }, + { + "epoch": 36.4, + "learning_rate": 3.179817849868612e-05, + "loss": 1.3141, + "step": 3449500 + }, + { + "epoch": 36.41, + "learning_rate": 3.179554017117469e-05, + "loss": 1.2862, + "step": 3450000 + }, + { + "epoch": 36.41, + "learning_rate": 3.179290184366326e-05, + "loss": 1.3245, + "step": 3450500 + }, + { + "epoch": 36.42, + "learning_rate": 3.1790263516151844e-05, + "loss": 1.2743, + "step": 3451000 + }, + { + "epoch": 36.42, + "learning_rate": 3.178762518864042e-05, + "loss": 1.3071, + "step": 3451500 + }, + { + "epoch": 36.43, + "learning_rate": 3.1784986861128995e-05, + "loss": 1.2824, + "step": 3452000 + }, + { + "epoch": 36.44, + "learning_rate": 3.178234853361757e-05, + "loss": 1.2841, + "step": 3452500 + }, + { + "epoch": 36.44, + "learning_rate": 3.1779710206106146e-05, + "loss": 1.2612, + "step": 3453000 + }, + { + "epoch": 36.45, + "learning_rate": 3.177707187859472e-05, + "loss": 1.2588, + "step": 3453500 + }, + { + "epoch": 36.45, + "learning_rate": 3.17744335510833e-05, + "loss": 1.2779, + "step": 3454000 + }, + { + "epoch": 36.46, + "learning_rate": 3.177179522357188e-05, + "loss": 1.2871, + "step": 3454500 + }, + { + "epoch": 36.46, + "learning_rate": 3.1769156896060455e-05, + "loss": 1.2744, + "step": 3455000 + }, + { + "epoch": 36.47, + "learning_rate": 3.176651856854902e-05, + "loss": 1.3723, + "step": 3455500 + }, + { + "epoch": 36.47, + "learning_rate": 3.1763880241037606e-05, + "loss": 1.3229, + "step": 3456000 + }, + { + "epoch": 36.48, + "learning_rate": 3.176124191352618e-05, + "loss": 1.292, + "step": 3456500 + }, + { + "epoch": 36.48, + "learning_rate": 3.1758603586014757e-05, + "loss": 1.3108, + "step": 3457000 + }, + { + "epoch": 36.49, + "learning_rate": 3.1755965258503325e-05, + "loss": 1.3146, + "step": 3457500 + }, + { + "epoch": 36.49, + "learning_rate": 3.175332693099191e-05, + "loss": 1.2721, + "step": 3458000 + }, + { + "epoch": 36.5, + "learning_rate": 3.175068860348048e-05, + "loss": 1.2681, + "step": 3458500 + }, + { + "epoch": 36.5, + "learning_rate": 3.174805027596906e-05, + "loss": 1.318, + "step": 3459000 + }, + { + "epoch": 36.51, + "learning_rate": 3.1745411948457634e-05, + "loss": 1.2839, + "step": 3459500 + }, + { + "epoch": 36.51, + "learning_rate": 3.174277362094621e-05, + "loss": 1.2645, + "step": 3460000 + }, + { + "epoch": 36.52, + "learning_rate": 3.1740135293434785e-05, + "loss": 1.3626, + "step": 3460500 + }, + { + "epoch": 36.53, + "learning_rate": 3.173749696592336e-05, + "loss": 1.2433, + "step": 3461000 + }, + { + "epoch": 36.53, + "learning_rate": 3.173485863841194e-05, + "loss": 1.3081, + "step": 3461500 + }, + { + "epoch": 36.54, + "learning_rate": 3.173222031090052e-05, + "loss": 1.3224, + "step": 3462000 + }, + { + "epoch": 36.54, + "learning_rate": 3.172958198338909e-05, + "loss": 1.295, + "step": 3462500 + }, + { + "epoch": 36.55, + "learning_rate": 3.172694365587767e-05, + "loss": 1.2893, + "step": 3463000 + }, + { + "epoch": 36.55, + "learning_rate": 3.1724305328366245e-05, + "loss": 1.3241, + "step": 3463500 + }, + { + "epoch": 36.56, + "learning_rate": 3.172166700085482e-05, + "loss": 1.3496, + "step": 3464000 + }, + { + "epoch": 36.56, + "learning_rate": 3.1719028673343396e-05, + "loss": 1.3775, + "step": 3464500 + }, + { + "epoch": 36.57, + "learning_rate": 3.171639034583197e-05, + "loss": 1.3175, + "step": 3465000 + }, + { + "epoch": 36.57, + "learning_rate": 3.171375201832055e-05, + "loss": 1.3133, + "step": 3465500 + }, + { + "epoch": 36.58, + "learning_rate": 3.171111369080912e-05, + "loss": 1.3219, + "step": 3466000 + }, + { + "epoch": 36.58, + "learning_rate": 3.1708475363297704e-05, + "loss": 1.3065, + "step": 3466500 + }, + { + "epoch": 36.59, + "learning_rate": 3.170583703578627e-05, + "loss": 1.3005, + "step": 3467000 + }, + { + "epoch": 36.59, + "learning_rate": 3.170319870827485e-05, + "loss": 1.2658, + "step": 3467500 + }, + { + "epoch": 36.6, + "learning_rate": 3.170056038076343e-05, + "loss": 1.2803, + "step": 3468000 + }, + { + "epoch": 36.6, + "learning_rate": 3.1697922053252006e-05, + "loss": 1.3296, + "step": 3468500 + }, + { + "epoch": 36.61, + "learning_rate": 3.169528372574058e-05, + "loss": 1.3021, + "step": 3469000 + }, + { + "epoch": 36.61, + "learning_rate": 3.169264539822915e-05, + "loss": 1.2946, + "step": 3469500 + }, + { + "epoch": 36.62, + "learning_rate": 3.169000707071773e-05, + "loss": 1.2531, + "step": 3470000 + }, + { + "epoch": 36.63, + "learning_rate": 3.168736874320631e-05, + "loss": 1.3658, + "step": 3470500 + }, + { + "epoch": 36.63, + "learning_rate": 3.1684730415694884e-05, + "loss": 1.2815, + "step": 3471000 + }, + { + "epoch": 36.64, + "learning_rate": 3.168209208818346e-05, + "loss": 1.2771, + "step": 3471500 + }, + { + "epoch": 36.64, + "learning_rate": 3.1679453760672035e-05, + "loss": 1.3529, + "step": 3472000 + }, + { + "epoch": 36.65, + "learning_rate": 3.167681543316061e-05, + "loss": 1.3334, + "step": 3472500 + }, + { + "epoch": 36.65, + "learning_rate": 3.1674177105649186e-05, + "loss": 1.283, + "step": 3473000 + }, + { + "epoch": 36.66, + "learning_rate": 3.167153877813777e-05, + "loss": 1.2908, + "step": 3473500 + }, + { + "epoch": 36.66, + "learning_rate": 3.1668900450626343e-05, + "loss": 1.277, + "step": 3474000 + }, + { + "epoch": 36.67, + "learning_rate": 3.166626212311491e-05, + "loss": 1.2739, + "step": 3474500 + }, + { + "epoch": 36.67, + "learning_rate": 3.1663623795603494e-05, + "loss": 1.2821, + "step": 3475000 + }, + { + "epoch": 36.68, + "learning_rate": 3.166098546809207e-05, + "loss": 1.272, + "step": 3475500 + }, + { + "epoch": 36.68, + "learning_rate": 3.1658347140580645e-05, + "loss": 1.275, + "step": 3476000 + }, + { + "epoch": 36.69, + "learning_rate": 3.165570881306922e-05, + "loss": 1.2274, + "step": 3476500 + }, + { + "epoch": 36.69, + "learning_rate": 3.1653070485557796e-05, + "loss": 1.3197, + "step": 3477000 + }, + { + "epoch": 36.7, + "learning_rate": 3.165043215804637e-05, + "loss": 1.196, + "step": 3477500 + }, + { + "epoch": 36.7, + "learning_rate": 3.164779383053495e-05, + "loss": 1.2854, + "step": 3478000 + }, + { + "epoch": 36.71, + "learning_rate": 3.164515550302353e-05, + "loss": 1.2703, + "step": 3478500 + }, + { + "epoch": 36.71, + "learning_rate": 3.16425171755121e-05, + "loss": 1.2607, + "step": 3479000 + }, + { + "epoch": 36.72, + "learning_rate": 3.1639878848000674e-05, + "loss": 1.3349, + "step": 3479500 + }, + { + "epoch": 36.73, + "learning_rate": 3.1637240520489256e-05, + "loss": 1.2538, + "step": 3480000 + }, + { + "epoch": 36.73, + "learning_rate": 3.163460219297783e-05, + "loss": 1.2865, + "step": 3480500 + }, + { + "epoch": 36.74, + "learning_rate": 3.163196386546641e-05, + "loss": 1.2793, + "step": 3481000 + }, + { + "epoch": 36.74, + "learning_rate": 3.1629325537954976e-05, + "loss": 1.2554, + "step": 3481500 + }, + { + "epoch": 36.75, + "learning_rate": 3.162668721044356e-05, + "loss": 1.2991, + "step": 3482000 + }, + { + "epoch": 36.75, + "learning_rate": 3.1624048882932134e-05, + "loss": 1.3173, + "step": 3482500 + }, + { + "epoch": 36.76, + "learning_rate": 3.162141055542071e-05, + "loss": 1.2879, + "step": 3483000 + }, + { + "epoch": 36.76, + "learning_rate": 3.161877222790929e-05, + "loss": 1.3041, + "step": 3483500 + }, + { + "epoch": 36.77, + "learning_rate": 3.161613390039786e-05, + "loss": 1.3767, + "step": 3484000 + }, + { + "epoch": 36.77, + "learning_rate": 3.1613495572886435e-05, + "loss": 1.2725, + "step": 3484500 + }, + { + "epoch": 36.78, + "learning_rate": 3.161085724537501e-05, + "loss": 1.3227, + "step": 3485000 + }, + { + "epoch": 36.78, + "learning_rate": 3.160821891786359e-05, + "loss": 1.2992, + "step": 3485500 + }, + { + "epoch": 36.79, + "learning_rate": 3.160558059035216e-05, + "loss": 1.2995, + "step": 3486000 + }, + { + "epoch": 36.79, + "learning_rate": 3.160294226284074e-05, + "loss": 1.281, + "step": 3486500 + }, + { + "epoch": 36.8, + "learning_rate": 3.160030393532932e-05, + "loss": 1.3515, + "step": 3487000 + }, + { + "epoch": 36.8, + "learning_rate": 3.1597665607817895e-05, + "loss": 1.2775, + "step": 3487500 + }, + { + "epoch": 36.81, + "learning_rate": 3.159502728030647e-05, + "loss": 1.2939, + "step": 3488000 + }, + { + "epoch": 36.82, + "learning_rate": 3.1592388952795046e-05, + "loss": 1.2884, + "step": 3488500 + }, + { + "epoch": 36.82, + "learning_rate": 3.158975062528362e-05, + "loss": 1.2933, + "step": 3489000 + }, + { + "epoch": 36.83, + "learning_rate": 3.15871122977722e-05, + "loss": 1.2886, + "step": 3489500 + }, + { + "epoch": 36.83, + "learning_rate": 3.158447397026077e-05, + "loss": 1.3439, + "step": 3490000 + }, + { + "epoch": 36.84, + "learning_rate": 3.1581835642749355e-05, + "loss": 1.2699, + "step": 3490500 + }, + { + "epoch": 36.84, + "learning_rate": 3.1579197315237924e-05, + "loss": 1.277, + "step": 3491000 + }, + { + "epoch": 36.85, + "learning_rate": 3.15765589877265e-05, + "loss": 1.4058, + "step": 3491500 + }, + { + "epoch": 36.85, + "learning_rate": 3.157392066021508e-05, + "loss": 1.3213, + "step": 3492000 + }, + { + "epoch": 36.86, + "learning_rate": 3.157128233270366e-05, + "loss": 1.314, + "step": 3492500 + }, + { + "epoch": 36.86, + "learning_rate": 3.156864400519223e-05, + "loss": 1.3191, + "step": 3493000 + }, + { + "epoch": 36.87, + "learning_rate": 3.156600567768081e-05, + "loss": 1.2955, + "step": 3493500 + }, + { + "epoch": 36.87, + "learning_rate": 3.156336735016938e-05, + "loss": 1.3016, + "step": 3494000 + }, + { + "epoch": 36.88, + "learning_rate": 3.156072902265796e-05, + "loss": 1.2911, + "step": 3494500 + }, + { + "epoch": 36.88, + "learning_rate": 3.1558090695146534e-05, + "loss": 1.287, + "step": 3495000 + }, + { + "epoch": 36.89, + "learning_rate": 3.155545236763511e-05, + "loss": 1.2406, + "step": 3495500 + }, + { + "epoch": 36.89, + "learning_rate": 3.1552814040123685e-05, + "loss": 1.3367, + "step": 3496000 + }, + { + "epoch": 36.9, + "learning_rate": 3.155017571261226e-05, + "loss": 1.3336, + "step": 3496500 + }, + { + "epoch": 36.9, + "learning_rate": 3.1547537385100836e-05, + "loss": 1.3336, + "step": 3497000 + }, + { + "epoch": 36.91, + "learning_rate": 3.154489905758942e-05, + "loss": 1.3178, + "step": 3497500 + }, + { + "epoch": 36.92, + "learning_rate": 3.154226073007799e-05, + "loss": 1.3729, + "step": 3498000 + }, + { + "epoch": 36.92, + "learning_rate": 3.153962240256656e-05, + "loss": 1.2757, + "step": 3498500 + }, + { + "epoch": 36.93, + "learning_rate": 3.1536984075055145e-05, + "loss": 1.3081, + "step": 3499000 + }, + { + "epoch": 36.93, + "learning_rate": 3.153434574754372e-05, + "loss": 1.3061, + "step": 3499500 + }, + { + "epoch": 36.94, + "learning_rate": 3.1531707420032296e-05, + "loss": 1.2998, + "step": 3500000 + }, + { + "epoch": 36.94, + "learning_rate": 3.152906909252087e-05, + "loss": 1.369, + "step": 3500500 + }, + { + "epoch": 36.95, + "learning_rate": 3.152643076500945e-05, + "loss": 1.3256, + "step": 3501000 + }, + { + "epoch": 36.95, + "learning_rate": 3.152379243749802e-05, + "loss": 1.3197, + "step": 3501500 + }, + { + "epoch": 36.96, + "learning_rate": 3.15211541099866e-05, + "loss": 1.2965, + "step": 3502000 + }, + { + "epoch": 36.96, + "learning_rate": 3.151851578247518e-05, + "loss": 1.2806, + "step": 3502500 + }, + { + "epoch": 36.97, + "learning_rate": 3.151587745496375e-05, + "loss": 1.3501, + "step": 3503000 + }, + { + "epoch": 36.97, + "learning_rate": 3.1513239127452324e-05, + "loss": 1.3758, + "step": 3503500 + }, + { + "epoch": 36.98, + "learning_rate": 3.1510600799940907e-05, + "loss": 1.2739, + "step": 3504000 + }, + { + "epoch": 36.98, + "learning_rate": 3.150796247242948e-05, + "loss": 1.3115, + "step": 3504500 + }, + { + "epoch": 36.99, + "learning_rate": 3.150532414491805e-05, + "loss": 1.2546, + "step": 3505000 + }, + { + "epoch": 36.99, + "learning_rate": 3.150268581740663e-05, + "loss": 1.2759, + "step": 3505500 + }, + { + "epoch": 37.0, + "learning_rate": 3.150004748989521e-05, + "loss": 1.3059, + "step": 3506000 + }, + { + "epoch": 37.01, + "learning_rate": 3.1497409162383784e-05, + "loss": 1.2743, + "step": 3506500 + }, + { + "epoch": 37.01, + "learning_rate": 3.149477083487236e-05, + "loss": 1.204, + "step": 3507000 + }, + { + "epoch": 37.02, + "learning_rate": 3.1492132507360935e-05, + "loss": 1.3306, + "step": 3507500 + }, + { + "epoch": 37.02, + "learning_rate": 3.148949417984951e-05, + "loss": 1.306, + "step": 3508000 + }, + { + "epoch": 37.03, + "learning_rate": 3.1486855852338086e-05, + "loss": 1.2886, + "step": 3508500 + }, + { + "epoch": 37.03, + "learning_rate": 3.148421752482666e-05, + "loss": 1.2411, + "step": 3509000 + }, + { + "epoch": 37.04, + "learning_rate": 3.1481579197315244e-05, + "loss": 1.2975, + "step": 3509500 + }, + { + "epoch": 37.04, + "learning_rate": 3.147894086980381e-05, + "loss": 1.2473, + "step": 3510000 + }, + { + "epoch": 37.05, + "learning_rate": 3.147630254229239e-05, + "loss": 1.3332, + "step": 3510500 + }, + { + "epoch": 37.05, + "learning_rate": 3.147366421478097e-05, + "loss": 1.3025, + "step": 3511000 + }, + { + "epoch": 37.06, + "learning_rate": 3.1471025887269546e-05, + "loss": 1.2582, + "step": 3511500 + }, + { + "epoch": 37.06, + "learning_rate": 3.146838755975812e-05, + "loss": 1.3181, + "step": 3512000 + }, + { + "epoch": 37.07, + "learning_rate": 3.14657492322467e-05, + "loss": 1.304, + "step": 3512500 + }, + { + "epoch": 37.07, + "learning_rate": 3.146311090473527e-05, + "loss": 1.2764, + "step": 3513000 + }, + { + "epoch": 37.08, + "learning_rate": 3.146047257722385e-05, + "loss": 1.2825, + "step": 3513500 + }, + { + "epoch": 37.08, + "learning_rate": 3.145783424971242e-05, + "loss": 1.2407, + "step": 3514000 + }, + { + "epoch": 37.09, + "learning_rate": 3.1455195922201e-05, + "loss": 1.2931, + "step": 3514500 + }, + { + "epoch": 37.09, + "learning_rate": 3.1452557594689574e-05, + "loss": 1.2562, + "step": 3515000 + }, + { + "epoch": 37.1, + "learning_rate": 3.144991926717815e-05, + "loss": 1.2767, + "step": 3515500 + }, + { + "epoch": 37.11, + "learning_rate": 3.144728093966673e-05, + "loss": 1.3452, + "step": 3516000 + }, + { + "epoch": 37.11, + "learning_rate": 3.144464261215531e-05, + "loss": 1.3305, + "step": 3516500 + }, + { + "epoch": 37.12, + "learning_rate": 3.1442004284643876e-05, + "loss": 1.2927, + "step": 3517000 + }, + { + "epoch": 37.12, + "learning_rate": 3.143936595713246e-05, + "loss": 1.2694, + "step": 3517500 + }, + { + "epoch": 37.13, + "learning_rate": 3.1436727629621034e-05, + "loss": 1.2725, + "step": 3518000 + }, + { + "epoch": 37.13, + "learning_rate": 3.143408930210961e-05, + "loss": 1.2999, + "step": 3518500 + }, + { + "epoch": 37.14, + "learning_rate": 3.1431450974598185e-05, + "loss": 1.3309, + "step": 3519000 + }, + { + "epoch": 37.14, + "learning_rate": 3.142881264708676e-05, + "loss": 1.3217, + "step": 3519500 + }, + { + "epoch": 37.15, + "learning_rate": 3.1426174319575336e-05, + "loss": 1.3062, + "step": 3520000 + }, + { + "epoch": 37.15, + "learning_rate": 3.142353599206391e-05, + "loss": 1.3038, + "step": 3520500 + }, + { + "epoch": 37.16, + "learning_rate": 3.142089766455249e-05, + "loss": 1.273, + "step": 3521000 + }, + { + "epoch": 37.16, + "learning_rate": 3.141825933704107e-05, + "loss": 1.3366, + "step": 3521500 + }, + { + "epoch": 37.17, + "learning_rate": 3.141562100952964e-05, + "loss": 1.3053, + "step": 3522000 + }, + { + "epoch": 37.17, + "learning_rate": 3.141298268201821e-05, + "loss": 1.3027, + "step": 3522500 + }, + { + "epoch": 37.18, + "learning_rate": 3.1410344354506795e-05, + "loss": 1.2441, + "step": 3523000 + }, + { + "epoch": 37.18, + "learning_rate": 3.140770602699537e-05, + "loss": 1.2713, + "step": 3523500 + }, + { + "epoch": 37.19, + "learning_rate": 3.140506769948394e-05, + "loss": 1.2683, + "step": 3524000 + }, + { + "epoch": 37.2, + "learning_rate": 3.140242937197252e-05, + "loss": 1.2926, + "step": 3524500 + }, + { + "epoch": 37.2, + "learning_rate": 3.13997910444611e-05, + "loss": 1.3188, + "step": 3525000 + }, + { + "epoch": 37.21, + "learning_rate": 3.139715271694967e-05, + "loss": 1.2798, + "step": 3525500 + }, + { + "epoch": 37.21, + "learning_rate": 3.139451438943825e-05, + "loss": 1.304, + "step": 3526000 + }, + { + "epoch": 37.22, + "learning_rate": 3.1391876061926824e-05, + "loss": 1.3754, + "step": 3526500 + }, + { + "epoch": 37.22, + "learning_rate": 3.13892377344154e-05, + "loss": 1.3202, + "step": 3527000 + }, + { + "epoch": 37.23, + "learning_rate": 3.1386599406903975e-05, + "loss": 1.2923, + "step": 3527500 + }, + { + "epoch": 37.23, + "learning_rate": 3.138396107939256e-05, + "loss": 1.2978, + "step": 3528000 + }, + { + "epoch": 37.24, + "learning_rate": 3.138132275188113e-05, + "loss": 1.3355, + "step": 3528500 + }, + { + "epoch": 37.24, + "learning_rate": 3.13786844243697e-05, + "loss": 1.2581, + "step": 3529000 + }, + { + "epoch": 37.25, + "learning_rate": 3.1376046096858284e-05, + "loss": 1.3587, + "step": 3529500 + }, + { + "epoch": 37.25, + "learning_rate": 3.137340776934686e-05, + "loss": 1.3081, + "step": 3530000 + }, + { + "epoch": 37.26, + "learning_rate": 3.1370769441835435e-05, + "loss": 1.3271, + "step": 3530500 + }, + { + "epoch": 37.26, + "learning_rate": 3.136813111432401e-05, + "loss": 1.2658, + "step": 3531000 + }, + { + "epoch": 37.27, + "learning_rate": 3.1365492786812585e-05, + "loss": 1.3463, + "step": 3531500 + }, + { + "epoch": 37.27, + "learning_rate": 3.136285445930116e-05, + "loss": 1.3232, + "step": 3532000 + }, + { + "epoch": 37.28, + "learning_rate": 3.1360216131789736e-05, + "loss": 1.2941, + "step": 3532500 + }, + { + "epoch": 37.28, + "learning_rate": 3.135757780427831e-05, + "loss": 1.2635, + "step": 3533000 + }, + { + "epoch": 37.29, + "learning_rate": 3.135493947676689e-05, + "loss": 1.3233, + "step": 3533500 + }, + { + "epoch": 37.3, + "learning_rate": 3.135230114925546e-05, + "loss": 1.3269, + "step": 3534000 + }, + { + "epoch": 37.3, + "learning_rate": 3.134966282174404e-05, + "loss": 1.2183, + "step": 3534500 + }, + { + "epoch": 37.31, + "learning_rate": 3.134702449423262e-05, + "loss": 1.3354, + "step": 3535000 + }, + { + "epoch": 37.31, + "learning_rate": 3.1344386166721196e-05, + "loss": 1.2628, + "step": 3535500 + }, + { + "epoch": 37.32, + "learning_rate": 3.1341747839209765e-05, + "loss": 1.2855, + "step": 3536000 + }, + { + "epoch": 37.32, + "learning_rate": 3.133910951169835e-05, + "loss": 1.2879, + "step": 3536500 + }, + { + "epoch": 37.33, + "learning_rate": 3.133647118418692e-05, + "loss": 1.2446, + "step": 3537000 + }, + { + "epoch": 37.33, + "learning_rate": 3.13338328566755e-05, + "loss": 1.2713, + "step": 3537500 + }, + { + "epoch": 37.34, + "learning_rate": 3.1331194529164074e-05, + "loss": 1.2536, + "step": 3538000 + }, + { + "epoch": 37.34, + "learning_rate": 3.132855620165265e-05, + "loss": 1.2811, + "step": 3538500 + }, + { + "epoch": 37.35, + "learning_rate": 3.1325917874141225e-05, + "loss": 1.2432, + "step": 3539000 + }, + { + "epoch": 37.35, + "learning_rate": 3.13232795466298e-05, + "loss": 1.2986, + "step": 3539500 + }, + { + "epoch": 37.36, + "learning_rate": 3.132064121911838e-05, + "loss": 1.2511, + "step": 3540000 + }, + { + "epoch": 37.36, + "learning_rate": 3.131800289160696e-05, + "loss": 1.3599, + "step": 3540500 + }, + { + "epoch": 37.37, + "learning_rate": 3.1315364564095527e-05, + "loss": 1.302, + "step": 3541000 + }, + { + "epoch": 37.37, + "learning_rate": 3.131272623658411e-05, + "loss": 1.2909, + "step": 3541500 + }, + { + "epoch": 37.38, + "learning_rate": 3.1310087909072684e-05, + "loss": 1.2782, + "step": 3542000 + }, + { + "epoch": 37.39, + "learning_rate": 3.130744958156126e-05, + "loss": 1.3049, + "step": 3542500 + }, + { + "epoch": 37.39, + "learning_rate": 3.130481125404983e-05, + "loss": 1.2527, + "step": 3543000 + }, + { + "epoch": 37.4, + "learning_rate": 3.130217292653841e-05, + "loss": 1.311, + "step": 3543500 + }, + { + "epoch": 37.4, + "learning_rate": 3.1299534599026986e-05, + "loss": 1.3096, + "step": 3544000 + }, + { + "epoch": 37.41, + "learning_rate": 3.129689627151556e-05, + "loss": 1.2773, + "step": 3544500 + }, + { + "epoch": 37.41, + "learning_rate": 3.129425794400414e-05, + "loss": 1.3178, + "step": 3545000 + }, + { + "epoch": 37.42, + "learning_rate": 3.129161961649271e-05, + "loss": 1.2972, + "step": 3545500 + }, + { + "epoch": 37.42, + "learning_rate": 3.128898128898129e-05, + "loss": 1.3083, + "step": 3546000 + }, + { + "epoch": 37.43, + "learning_rate": 3.1286342961469864e-05, + "loss": 1.2639, + "step": 3546500 + }, + { + "epoch": 37.43, + "learning_rate": 3.1283704633958446e-05, + "loss": 1.3255, + "step": 3547000 + }, + { + "epoch": 37.44, + "learning_rate": 3.128106630644702e-05, + "loss": 1.2943, + "step": 3547500 + }, + { + "epoch": 37.44, + "learning_rate": 3.127842797893559e-05, + "loss": 1.2451, + "step": 3548000 + }, + { + "epoch": 37.45, + "learning_rate": 3.127578965142417e-05, + "loss": 1.2843, + "step": 3548500 + }, + { + "epoch": 37.45, + "learning_rate": 3.127315132391275e-05, + "loss": 1.2924, + "step": 3549000 + }, + { + "epoch": 37.46, + "learning_rate": 3.127051299640132e-05, + "loss": 1.3469, + "step": 3549500 + }, + { + "epoch": 37.46, + "learning_rate": 3.12678746688899e-05, + "loss": 1.27, + "step": 3550000 + }, + { + "epoch": 37.47, + "learning_rate": 3.1265236341378474e-05, + "loss": 1.3546, + "step": 3550500 + }, + { + "epoch": 37.47, + "learning_rate": 3.126259801386705e-05, + "loss": 1.3035, + "step": 3551000 + }, + { + "epoch": 37.48, + "learning_rate": 3.1259959686355625e-05, + "loss": 1.2968, + "step": 3551500 + }, + { + "epoch": 37.49, + "learning_rate": 3.125732135884421e-05, + "loss": 1.2922, + "step": 3552000 + }, + { + "epoch": 37.49, + "learning_rate": 3.1254683031332776e-05, + "loss": 1.2902, + "step": 3552500 + }, + { + "epoch": 37.5, + "learning_rate": 3.125204470382135e-05, + "loss": 1.2772, + "step": 3553000 + }, + { + "epoch": 37.5, + "learning_rate": 3.1249406376309934e-05, + "loss": 1.2903, + "step": 3553500 + }, + { + "epoch": 37.51, + "learning_rate": 3.124676804879851e-05, + "loss": 1.3347, + "step": 3554000 + }, + { + "epoch": 37.51, + "learning_rate": 3.1244129721287085e-05, + "loss": 1.2396, + "step": 3554500 + }, + { + "epoch": 37.52, + "learning_rate": 3.1241491393775654e-05, + "loss": 1.2524, + "step": 3555000 + }, + { + "epoch": 37.52, + "learning_rate": 3.1238853066264236e-05, + "loss": 1.299, + "step": 3555500 + }, + { + "epoch": 37.53, + "learning_rate": 3.123621473875281e-05, + "loss": 1.2833, + "step": 3556000 + }, + { + "epoch": 37.53, + "learning_rate": 3.123357641124139e-05, + "loss": 1.2958, + "step": 3556500 + }, + { + "epoch": 37.54, + "learning_rate": 3.123093808372997e-05, + "loss": 1.3694, + "step": 3557000 + }, + { + "epoch": 37.54, + "learning_rate": 3.122829975621854e-05, + "loss": 1.3125, + "step": 3557500 + }, + { + "epoch": 37.55, + "learning_rate": 3.1225661428707113e-05, + "loss": 1.324, + "step": 3558000 + }, + { + "epoch": 37.55, + "learning_rate": 3.122302310119569e-05, + "loss": 1.2821, + "step": 3558500 + }, + { + "epoch": 37.56, + "learning_rate": 3.122038477368427e-05, + "loss": 1.2851, + "step": 3559000 + }, + { + "epoch": 37.56, + "learning_rate": 3.121774644617285e-05, + "loss": 1.2913, + "step": 3559500 + }, + { + "epoch": 37.57, + "learning_rate": 3.1215108118661415e-05, + "loss": 1.3314, + "step": 3560000 + }, + { + "epoch": 37.58, + "learning_rate": 3.121246979115e-05, + "loss": 1.2375, + "step": 3560500 + }, + { + "epoch": 37.58, + "learning_rate": 3.120983146363857e-05, + "loss": 1.3295, + "step": 3561000 + }, + { + "epoch": 37.59, + "learning_rate": 3.120719313612715e-05, + "loss": 1.2751, + "step": 3561500 + }, + { + "epoch": 37.59, + "learning_rate": 3.1204554808615724e-05, + "loss": 1.3341, + "step": 3562000 + }, + { + "epoch": 37.6, + "learning_rate": 3.12019164811043e-05, + "loss": 1.2574, + "step": 3562500 + }, + { + "epoch": 37.6, + "learning_rate": 3.1199278153592875e-05, + "loss": 1.2863, + "step": 3563000 + }, + { + "epoch": 37.61, + "learning_rate": 3.119663982608145e-05, + "loss": 1.3683, + "step": 3563500 + }, + { + "epoch": 37.61, + "learning_rate": 3.119400149857003e-05, + "loss": 1.2716, + "step": 3564000 + }, + { + "epoch": 37.62, + "learning_rate": 3.11913631710586e-05, + "loss": 1.2802, + "step": 3564500 + }, + { + "epoch": 37.62, + "learning_rate": 3.118872484354718e-05, + "loss": 1.2951, + "step": 3565000 + }, + { + "epoch": 37.63, + "learning_rate": 3.118608651603576e-05, + "loss": 1.276, + "step": 3565500 + }, + { + "epoch": 37.63, + "learning_rate": 3.1183448188524335e-05, + "loss": 1.296, + "step": 3566000 + }, + { + "epoch": 37.64, + "learning_rate": 3.118080986101291e-05, + "loss": 1.3109, + "step": 3566500 + }, + { + "epoch": 37.64, + "learning_rate": 3.117817153350148e-05, + "loss": 1.3153, + "step": 3567000 + }, + { + "epoch": 37.65, + "learning_rate": 3.117553320599006e-05, + "loss": 1.3087, + "step": 3567500 + }, + { + "epoch": 37.65, + "learning_rate": 3.117289487847864e-05, + "loss": 1.3113, + "step": 3568000 + }, + { + "epoch": 37.66, + "learning_rate": 3.117025655096721e-05, + "loss": 1.2798, + "step": 3568500 + }, + { + "epoch": 37.66, + "learning_rate": 3.116761822345579e-05, + "loss": 1.3298, + "step": 3569000 + }, + { + "epoch": 37.67, + "learning_rate": 3.116497989594436e-05, + "loss": 1.2994, + "step": 3569500 + }, + { + "epoch": 37.68, + "learning_rate": 3.116234156843294e-05, + "loss": 1.3055, + "step": 3570000 + }, + { + "epoch": 37.68, + "learning_rate": 3.1159703240921514e-05, + "loss": 1.345, + "step": 3570500 + }, + { + "epoch": 37.69, + "learning_rate": 3.1157064913410096e-05, + "loss": 1.2459, + "step": 3571000 + }, + { + "epoch": 37.69, + "learning_rate": 3.1154426585898665e-05, + "loss": 1.2933, + "step": 3571500 + }, + { + "epoch": 37.7, + "learning_rate": 3.115178825838724e-05, + "loss": 1.3094, + "step": 3572000 + }, + { + "epoch": 37.7, + "learning_rate": 3.114914993087582e-05, + "loss": 1.2663, + "step": 3572500 + }, + { + "epoch": 37.71, + "learning_rate": 3.11465116033644e-05, + "loss": 1.2994, + "step": 3573000 + }, + { + "epoch": 37.71, + "learning_rate": 3.1143873275852974e-05, + "loss": 1.2687, + "step": 3573500 + }, + { + "epoch": 37.72, + "learning_rate": 3.114123494834155e-05, + "loss": 1.3251, + "step": 3574000 + }, + { + "epoch": 37.72, + "learning_rate": 3.1138596620830125e-05, + "loss": 1.2766, + "step": 3574500 + }, + { + "epoch": 37.73, + "learning_rate": 3.11359582933187e-05, + "loss": 1.2729, + "step": 3575000 + }, + { + "epoch": 37.73, + "learning_rate": 3.1133319965807276e-05, + "loss": 1.3013, + "step": 3575500 + }, + { + "epoch": 37.74, + "learning_rate": 3.113068163829586e-05, + "loss": 1.3001, + "step": 3576000 + }, + { + "epoch": 37.74, + "learning_rate": 3.112804331078443e-05, + "loss": 1.3328, + "step": 3576500 + }, + { + "epoch": 37.75, + "learning_rate": 3.1125404983273e-05, + "loss": 1.2695, + "step": 3577000 + }, + { + "epoch": 37.75, + "learning_rate": 3.1122766655761585e-05, + "loss": 1.3324, + "step": 3577500 + }, + { + "epoch": 37.76, + "learning_rate": 3.112012832825016e-05, + "loss": 1.3068, + "step": 3578000 + }, + { + "epoch": 37.77, + "learning_rate": 3.111749000073873e-05, + "loss": 1.3331, + "step": 3578500 + }, + { + "epoch": 37.77, + "learning_rate": 3.111485167322731e-05, + "loss": 1.266, + "step": 3579000 + }, + { + "epoch": 37.78, + "learning_rate": 3.1112213345715886e-05, + "loss": 1.3525, + "step": 3579500 + }, + { + "epoch": 37.78, + "learning_rate": 3.110957501820446e-05, + "loss": 1.339, + "step": 3580000 + }, + { + "epoch": 37.79, + "learning_rate": 3.110693669069304e-05, + "loss": 1.2937, + "step": 3580500 + }, + { + "epoch": 37.79, + "learning_rate": 3.110429836318161e-05, + "loss": 1.3404, + "step": 3581000 + }, + { + "epoch": 37.8, + "learning_rate": 3.110166003567019e-05, + "loss": 1.3609, + "step": 3581500 + }, + { + "epoch": 37.8, + "learning_rate": 3.1099021708158764e-05, + "loss": 1.2835, + "step": 3582000 + }, + { + "epoch": 37.81, + "learning_rate": 3.109638338064734e-05, + "loss": 1.3117, + "step": 3582500 + }, + { + "epoch": 37.81, + "learning_rate": 3.109374505313592e-05, + "loss": 1.3059, + "step": 3583000 + }, + { + "epoch": 37.82, + "learning_rate": 3.109110672562449e-05, + "loss": 1.2819, + "step": 3583500 + }, + { + "epoch": 37.82, + "learning_rate": 3.1088468398113066e-05, + "loss": 1.3394, + "step": 3584000 + }, + { + "epoch": 37.83, + "learning_rate": 3.108583007060165e-05, + "loss": 1.3265, + "step": 3584500 + }, + { + "epoch": 37.83, + "learning_rate": 3.1083191743090224e-05, + "loss": 1.2447, + "step": 3585000 + }, + { + "epoch": 37.84, + "learning_rate": 3.10805534155788e-05, + "loss": 1.3353, + "step": 3585500 + }, + { + "epoch": 37.84, + "learning_rate": 3.1077915088067375e-05, + "loss": 1.3104, + "step": 3586000 + }, + { + "epoch": 37.85, + "learning_rate": 3.107527676055595e-05, + "loss": 1.3407, + "step": 3586500 + }, + { + "epoch": 37.85, + "learning_rate": 3.1072638433044526e-05, + "loss": 1.2549, + "step": 3587000 + }, + { + "epoch": 37.86, + "learning_rate": 3.10700001055331e-05, + "loss": 1.2954, + "step": 3587500 + }, + { + "epoch": 37.87, + "learning_rate": 3.1067361778021677e-05, + "loss": 1.3035, + "step": 3588000 + }, + { + "epoch": 37.87, + "learning_rate": 3.106472345051025e-05, + "loss": 1.2972, + "step": 3588500 + }, + { + "epoch": 37.88, + "learning_rate": 3.106208512299883e-05, + "loss": 1.3019, + "step": 3589000 + }, + { + "epoch": 37.88, + "learning_rate": 3.105944679548741e-05, + "loss": 1.3349, + "step": 3589500 + }, + { + "epoch": 37.89, + "learning_rate": 3.1056808467975985e-05, + "loss": 1.3267, + "step": 3590000 + }, + { + "epoch": 37.89, + "learning_rate": 3.1054170140464554e-05, + "loss": 1.3267, + "step": 3590500 + }, + { + "epoch": 37.9, + "learning_rate": 3.1051531812953136e-05, + "loss": 1.284, + "step": 3591000 + }, + { + "epoch": 37.9, + "learning_rate": 3.104889348544171e-05, + "loss": 1.2897, + "step": 3591500 + }, + { + "epoch": 37.91, + "learning_rate": 3.104625515793029e-05, + "loss": 1.306, + "step": 3592000 + }, + { + "epoch": 37.91, + "learning_rate": 3.104361683041886e-05, + "loss": 1.2621, + "step": 3592500 + }, + { + "epoch": 37.92, + "learning_rate": 3.104097850290744e-05, + "loss": 1.3002, + "step": 3593000 + }, + { + "epoch": 37.92, + "learning_rate": 3.1038340175396014e-05, + "loss": 1.3352, + "step": 3593500 + }, + { + "epoch": 37.93, + "learning_rate": 3.103570184788459e-05, + "loss": 1.3168, + "step": 3594000 + }, + { + "epoch": 37.93, + "learning_rate": 3.1033063520373165e-05, + "loss": 1.2641, + "step": 3594500 + }, + { + "epoch": 37.94, + "learning_rate": 3.103042519286175e-05, + "loss": 1.2941, + "step": 3595000 + }, + { + "epoch": 37.94, + "learning_rate": 3.1027786865350316e-05, + "loss": 1.3031, + "step": 3595500 + }, + { + "epoch": 37.95, + "learning_rate": 3.102514853783889e-05, + "loss": 1.2373, + "step": 3596000 + }, + { + "epoch": 37.95, + "learning_rate": 3.102251021032747e-05, + "loss": 1.324, + "step": 3596500 + }, + { + "epoch": 37.96, + "learning_rate": 3.101987188281605e-05, + "loss": 1.3248, + "step": 3597000 + }, + { + "epoch": 37.97, + "learning_rate": 3.101723355530462e-05, + "loss": 1.3311, + "step": 3597500 + }, + { + "epoch": 37.97, + "learning_rate": 3.10145952277932e-05, + "loss": 1.2579, + "step": 3598000 + }, + { + "epoch": 37.98, + "learning_rate": 3.1011956900281775e-05, + "loss": 1.2958, + "step": 3598500 + }, + { + "epoch": 37.98, + "learning_rate": 3.100931857277035e-05, + "loss": 1.2695, + "step": 3599000 + }, + { + "epoch": 37.99, + "learning_rate": 3.1006680245258926e-05, + "loss": 1.3572, + "step": 3599500 + }, + { + "epoch": 37.99, + "learning_rate": 3.10040419177475e-05, + "loss": 1.3317, + "step": 3600000 + }, + { + "epoch": 38.0, + "learning_rate": 3.100140359023608e-05, + "loss": 1.3077, + "step": 3600500 + }, + { + "epoch": 38.0, + "learning_rate": 3.099876526272465e-05, + "loss": 1.3127, + "step": 3601000 + }, + { + "epoch": 38.01, + "learning_rate": 3.0996126935213235e-05, + "loss": 1.2715, + "step": 3601500 + }, + { + "epoch": 38.01, + "learning_rate": 3.099348860770181e-05, + "loss": 1.2618, + "step": 3602000 + }, + { + "epoch": 38.02, + "learning_rate": 3.099085028019038e-05, + "loss": 1.261, + "step": 3602500 + }, + { + "epoch": 38.02, + "learning_rate": 3.098821195267896e-05, + "loss": 1.2368, + "step": 3603000 + }, + { + "epoch": 38.03, + "learning_rate": 3.098557362516754e-05, + "loss": 1.3434, + "step": 3603500 + }, + { + "epoch": 38.03, + "learning_rate": 3.098293529765611e-05, + "loss": 1.2844, + "step": 3604000 + }, + { + "epoch": 38.04, + "learning_rate": 3.098029697014469e-05, + "loss": 1.2871, + "step": 3604500 + }, + { + "epoch": 38.04, + "learning_rate": 3.0977658642633263e-05, + "loss": 1.285, + "step": 3605000 + }, + { + "epoch": 38.05, + "learning_rate": 3.097502031512184e-05, + "loss": 1.2593, + "step": 3605500 + }, + { + "epoch": 38.06, + "learning_rate": 3.0972381987610414e-05, + "loss": 1.2517, + "step": 3606000 + }, + { + "epoch": 38.06, + "learning_rate": 3.096974366009899e-05, + "loss": 1.3258, + "step": 3606500 + }, + { + "epoch": 38.07, + "learning_rate": 3.0967105332587565e-05, + "loss": 1.2934, + "step": 3607000 + }, + { + "epoch": 38.07, + "learning_rate": 3.096446700507614e-05, + "loss": 1.3306, + "step": 3607500 + }, + { + "epoch": 38.08, + "learning_rate": 3.0961828677564716e-05, + "loss": 1.2237, + "step": 3608000 + }, + { + "epoch": 38.08, + "learning_rate": 3.09591903500533e-05, + "loss": 1.2778, + "step": 3608500 + }, + { + "epoch": 38.09, + "learning_rate": 3.0956552022541874e-05, + "loss": 1.3481, + "step": 3609000 + }, + { + "epoch": 38.09, + "learning_rate": 3.095391369503044e-05, + "loss": 1.3004, + "step": 3609500 + }, + { + "epoch": 38.1, + "learning_rate": 3.0951275367519025e-05, + "loss": 1.2927, + "step": 3610000 + }, + { + "epoch": 38.1, + "learning_rate": 3.09486370400076e-05, + "loss": 1.314, + "step": 3610500 + }, + { + "epoch": 38.11, + "learning_rate": 3.0945998712496176e-05, + "loss": 1.2793, + "step": 3611000 + }, + { + "epoch": 38.11, + "learning_rate": 3.094336038498475e-05, + "loss": 1.2633, + "step": 3611500 + }, + { + "epoch": 38.12, + "learning_rate": 3.094072205747333e-05, + "loss": 1.273, + "step": 3612000 + }, + { + "epoch": 38.12, + "learning_rate": 3.09380837299619e-05, + "loss": 1.2879, + "step": 3612500 + }, + { + "epoch": 38.13, + "learning_rate": 3.093544540245048e-05, + "loss": 1.2624, + "step": 3613000 + }, + { + "epoch": 38.13, + "learning_rate": 3.093280707493906e-05, + "loss": 1.2828, + "step": 3613500 + }, + { + "epoch": 38.14, + "learning_rate": 3.0930168747427636e-05, + "loss": 1.3175, + "step": 3614000 + }, + { + "epoch": 38.14, + "learning_rate": 3.0927530419916204e-05, + "loss": 1.3011, + "step": 3614500 + }, + { + "epoch": 38.15, + "learning_rate": 3.092489209240479e-05, + "loss": 1.2962, + "step": 3615000 + }, + { + "epoch": 38.16, + "learning_rate": 3.092225376489336e-05, + "loss": 1.3362, + "step": 3615500 + }, + { + "epoch": 38.16, + "learning_rate": 3.091961543738194e-05, + "loss": 1.3329, + "step": 3616000 + }, + { + "epoch": 38.17, + "learning_rate": 3.0916977109870506e-05, + "loss": 1.2369, + "step": 3616500 + }, + { + "epoch": 38.17, + "learning_rate": 3.091433878235909e-05, + "loss": 1.3477, + "step": 3617000 + }, + { + "epoch": 38.18, + "learning_rate": 3.0911700454847664e-05, + "loss": 1.252, + "step": 3617500 + }, + { + "epoch": 38.18, + "learning_rate": 3.090906212733624e-05, + "loss": 1.2455, + "step": 3618000 + }, + { + "epoch": 38.19, + "learning_rate": 3.0906423799824815e-05, + "loss": 1.3031, + "step": 3618500 + }, + { + "epoch": 38.19, + "learning_rate": 3.090378547231339e-05, + "loss": 1.3106, + "step": 3619000 + }, + { + "epoch": 38.2, + "learning_rate": 3.0901147144801966e-05, + "loss": 1.3114, + "step": 3619500 + }, + { + "epoch": 38.2, + "learning_rate": 3.089850881729054e-05, + "loss": 1.3085, + "step": 3620000 + }, + { + "epoch": 38.21, + "learning_rate": 3.0895870489779124e-05, + "loss": 1.3261, + "step": 3620500 + }, + { + "epoch": 38.21, + "learning_rate": 3.08932321622677e-05, + "loss": 1.3116, + "step": 3621000 + }, + { + "epoch": 38.22, + "learning_rate": 3.089059383475627e-05, + "loss": 1.2837, + "step": 3621500 + }, + { + "epoch": 38.22, + "learning_rate": 3.088795550724485e-05, + "loss": 1.2882, + "step": 3622000 + }, + { + "epoch": 38.23, + "learning_rate": 3.0885317179733426e-05, + "loss": 1.326, + "step": 3622500 + }, + { + "epoch": 38.23, + "learning_rate": 3.0882678852222e-05, + "loss": 1.2903, + "step": 3623000 + }, + { + "epoch": 38.24, + "learning_rate": 3.088004052471058e-05, + "loss": 1.304, + "step": 3623500 + }, + { + "epoch": 38.25, + "learning_rate": 3.087740219719915e-05, + "loss": 1.2466, + "step": 3624000 + }, + { + "epoch": 38.25, + "learning_rate": 3.087476386968773e-05, + "loss": 1.2843, + "step": 3624500 + }, + { + "epoch": 38.26, + "learning_rate": 3.08721255421763e-05, + "loss": 1.2736, + "step": 3625000 + }, + { + "epoch": 38.26, + "learning_rate": 3.0869487214664886e-05, + "loss": 1.2839, + "step": 3625500 + }, + { + "epoch": 38.27, + "learning_rate": 3.0866848887153454e-05, + "loss": 1.2872, + "step": 3626000 + }, + { + "epoch": 38.27, + "learning_rate": 3.086421055964203e-05, + "loss": 1.2936, + "step": 3626500 + }, + { + "epoch": 38.28, + "learning_rate": 3.086157223213061e-05, + "loss": 1.3215, + "step": 3627000 + }, + { + "epoch": 38.28, + "learning_rate": 3.085893390461919e-05, + "loss": 1.295, + "step": 3627500 + }, + { + "epoch": 38.29, + "learning_rate": 3.085629557710776e-05, + "loss": 1.3557, + "step": 3628000 + }, + { + "epoch": 38.29, + "learning_rate": 3.085365724959633e-05, + "loss": 1.2848, + "step": 3628500 + }, + { + "epoch": 38.3, + "learning_rate": 3.0851018922084914e-05, + "loss": 1.3248, + "step": 3629000 + }, + { + "epoch": 38.3, + "learning_rate": 3.084838059457349e-05, + "loss": 1.2777, + "step": 3629500 + }, + { + "epoch": 38.31, + "learning_rate": 3.0845742267062065e-05, + "loss": 1.2879, + "step": 3630000 + }, + { + "epoch": 38.31, + "learning_rate": 3.084310393955065e-05, + "loss": 1.297, + "step": 3630500 + }, + { + "epoch": 38.32, + "learning_rate": 3.0840465612039216e-05, + "loss": 1.2889, + "step": 3631000 + }, + { + "epoch": 38.32, + "learning_rate": 3.083782728452779e-05, + "loss": 1.3347, + "step": 3631500 + }, + { + "epoch": 38.33, + "learning_rate": 3.083518895701637e-05, + "loss": 1.2896, + "step": 3632000 + }, + { + "epoch": 38.33, + "learning_rate": 3.083255062950495e-05, + "loss": 1.255, + "step": 3632500 + }, + { + "epoch": 38.34, + "learning_rate": 3.0829912301993525e-05, + "loss": 1.294, + "step": 3633000 + }, + { + "epoch": 38.35, + "learning_rate": 3.082727397448209e-05, + "loss": 1.4025, + "step": 3633500 + }, + { + "epoch": 38.35, + "learning_rate": 3.0824635646970676e-05, + "loss": 1.2801, + "step": 3634000 + }, + { + "epoch": 38.36, + "learning_rate": 3.082199731945925e-05, + "loss": 1.2967, + "step": 3634500 + }, + { + "epoch": 38.36, + "learning_rate": 3.0819358991947827e-05, + "loss": 1.2847, + "step": 3635000 + }, + { + "epoch": 38.37, + "learning_rate": 3.08167206644364e-05, + "loss": 1.2721, + "step": 3635500 + }, + { + "epoch": 38.37, + "learning_rate": 3.081408233692498e-05, + "loss": 1.3239, + "step": 3636000 + }, + { + "epoch": 38.38, + "learning_rate": 3.081144400941355e-05, + "loss": 1.2762, + "step": 3636500 + }, + { + "epoch": 38.38, + "learning_rate": 3.080880568190213e-05, + "loss": 1.2819, + "step": 3637000 + }, + { + "epoch": 38.39, + "learning_rate": 3.080616735439071e-05, + "loss": 1.272, + "step": 3637500 + }, + { + "epoch": 38.39, + "learning_rate": 3.080352902687928e-05, + "loss": 1.2374, + "step": 3638000 + }, + { + "epoch": 38.4, + "learning_rate": 3.0800890699367855e-05, + "loss": 1.3617, + "step": 3638500 + }, + { + "epoch": 38.4, + "learning_rate": 3.079825237185644e-05, + "loss": 1.2834, + "step": 3639000 + }, + { + "epoch": 38.41, + "learning_rate": 3.079561404434501e-05, + "loss": 1.2947, + "step": 3639500 + }, + { + "epoch": 38.41, + "learning_rate": 3.079297571683359e-05, + "loss": 1.3261, + "step": 3640000 + }, + { + "epoch": 38.42, + "learning_rate": 3.079033738932216e-05, + "loss": 1.2946, + "step": 3640500 + }, + { + "epoch": 38.42, + "learning_rate": 3.078769906181074e-05, + "loss": 1.2273, + "step": 3641000 + }, + { + "epoch": 38.43, + "learning_rate": 3.0785060734299315e-05, + "loss": 1.2319, + "step": 3641500 + }, + { + "epoch": 38.44, + "learning_rate": 3.078242240678789e-05, + "loss": 1.3579, + "step": 3642000 + }, + { + "epoch": 38.44, + "learning_rate": 3.077978407927647e-05, + "loss": 1.2767, + "step": 3642500 + }, + { + "epoch": 38.45, + "learning_rate": 3.077714575176504e-05, + "loss": 1.3563, + "step": 3643000 + }, + { + "epoch": 38.45, + "learning_rate": 3.0774507424253617e-05, + "loss": 1.248, + "step": 3643500 + }, + { + "epoch": 38.46, + "learning_rate": 3.077186909674219e-05, + "loss": 1.261, + "step": 3644000 + }, + { + "epoch": 38.46, + "learning_rate": 3.0769230769230774e-05, + "loss": 1.2807, + "step": 3644500 + }, + { + "epoch": 38.47, + "learning_rate": 3.076659244171934e-05, + "loss": 1.2621, + "step": 3645000 + }, + { + "epoch": 38.47, + "learning_rate": 3.076395411420792e-05, + "loss": 1.3265, + "step": 3645500 + }, + { + "epoch": 38.48, + "learning_rate": 3.07613157866965e-05, + "loss": 1.2932, + "step": 3646000 + }, + { + "epoch": 38.48, + "learning_rate": 3.0758677459185076e-05, + "loss": 1.2887, + "step": 3646500 + }, + { + "epoch": 38.49, + "learning_rate": 3.075603913167365e-05, + "loss": 1.2912, + "step": 3647000 + }, + { + "epoch": 38.49, + "learning_rate": 3.075340080416223e-05, + "loss": 1.3059, + "step": 3647500 + }, + { + "epoch": 38.5, + "learning_rate": 3.07507624766508e-05, + "loss": 1.3369, + "step": 3648000 + }, + { + "epoch": 38.5, + "learning_rate": 3.074812414913938e-05, + "loss": 1.2788, + "step": 3648500 + }, + { + "epoch": 38.51, + "learning_rate": 3.0745485821627954e-05, + "loss": 1.3355, + "step": 3649000 + }, + { + "epoch": 38.51, + "learning_rate": 3.0742847494116536e-05, + "loss": 1.3327, + "step": 3649500 + }, + { + "epoch": 38.52, + "learning_rate": 3.0740209166605105e-05, + "loss": 1.3675, + "step": 3650000 + }, + { + "epoch": 38.52, + "learning_rate": 3.073757083909368e-05, + "loss": 1.3394, + "step": 3650500 + }, + { + "epoch": 38.53, + "learning_rate": 3.073493251158226e-05, + "loss": 1.2871, + "step": 3651000 + }, + { + "epoch": 38.54, + "learning_rate": 3.073229418407084e-05, + "loss": 1.3155, + "step": 3651500 + }, + { + "epoch": 38.54, + "learning_rate": 3.0729655856559413e-05, + "loss": 1.3138, + "step": 3652000 + }, + { + "epoch": 38.55, + "learning_rate": 3.072701752904799e-05, + "loss": 1.2531, + "step": 3652500 + }, + { + "epoch": 38.55, + "learning_rate": 3.0724379201536564e-05, + "loss": 1.3086, + "step": 3653000 + }, + { + "epoch": 38.56, + "learning_rate": 3.072174087402514e-05, + "loss": 1.2629, + "step": 3653500 + }, + { + "epoch": 38.56, + "learning_rate": 3.0719102546513715e-05, + "loss": 1.2449, + "step": 3654000 + }, + { + "epoch": 38.57, + "learning_rate": 3.071646421900229e-05, + "loss": 1.2981, + "step": 3654500 + }, + { + "epoch": 38.57, + "learning_rate": 3.0713825891490866e-05, + "loss": 1.3214, + "step": 3655000 + }, + { + "epoch": 38.58, + "learning_rate": 3.071118756397944e-05, + "loss": 1.3296, + "step": 3655500 + }, + { + "epoch": 38.58, + "learning_rate": 3.070854923646802e-05, + "loss": 1.3121, + "step": 3656000 + }, + { + "epoch": 38.59, + "learning_rate": 3.07059109089566e-05, + "loss": 1.3163, + "step": 3656500 + }, + { + "epoch": 38.59, + "learning_rate": 3.070327258144517e-05, + "loss": 1.3148, + "step": 3657000 + }, + { + "epoch": 38.6, + "learning_rate": 3.0700634253933744e-05, + "loss": 1.3179, + "step": 3657500 + }, + { + "epoch": 38.6, + "learning_rate": 3.0697995926422326e-05, + "loss": 1.3077, + "step": 3658000 + }, + { + "epoch": 38.61, + "learning_rate": 3.06953575989109e-05, + "loss": 1.2817, + "step": 3658500 + }, + { + "epoch": 38.61, + "learning_rate": 3.069271927139948e-05, + "loss": 1.3094, + "step": 3659000 + }, + { + "epoch": 38.62, + "learning_rate": 3.069008094388805e-05, + "loss": 1.2963, + "step": 3659500 + }, + { + "epoch": 38.63, + "learning_rate": 3.068744261637663e-05, + "loss": 1.2877, + "step": 3660000 + }, + { + "epoch": 38.63, + "learning_rate": 3.0684804288865203e-05, + "loss": 1.3078, + "step": 3660500 + }, + { + "epoch": 38.64, + "learning_rate": 3.068216596135378e-05, + "loss": 1.2477, + "step": 3661000 + }, + { + "epoch": 38.64, + "learning_rate": 3.067952763384236e-05, + "loss": 1.3078, + "step": 3661500 + }, + { + "epoch": 38.65, + "learning_rate": 3.067688930633093e-05, + "loss": 1.3221, + "step": 3662000 + }, + { + "epoch": 38.65, + "learning_rate": 3.0674250978819505e-05, + "loss": 1.2753, + "step": 3662500 + }, + { + "epoch": 38.66, + "learning_rate": 3.067161265130809e-05, + "loss": 1.2861, + "step": 3663000 + }, + { + "epoch": 38.66, + "learning_rate": 3.066897432379666e-05, + "loss": 1.2798, + "step": 3663500 + }, + { + "epoch": 38.67, + "learning_rate": 3.066633599628523e-05, + "loss": 1.3205, + "step": 3664000 + }, + { + "epoch": 38.67, + "learning_rate": 3.0663697668773814e-05, + "loss": 1.2458, + "step": 3664500 + }, + { + "epoch": 38.68, + "learning_rate": 3.066105934126239e-05, + "loss": 1.2799, + "step": 3665000 + }, + { + "epoch": 38.68, + "learning_rate": 3.0658421013750965e-05, + "loss": 1.2375, + "step": 3665500 + }, + { + "epoch": 38.69, + "learning_rate": 3.065578268623954e-05, + "loss": 1.2995, + "step": 3666000 + }, + { + "epoch": 38.69, + "learning_rate": 3.0653144358728116e-05, + "loss": 1.3302, + "step": 3666500 + }, + { + "epoch": 38.7, + "learning_rate": 3.065050603121669e-05, + "loss": 1.2859, + "step": 3667000 + }, + { + "epoch": 38.7, + "learning_rate": 3.064786770370527e-05, + "loss": 1.3259, + "step": 3667500 + }, + { + "epoch": 38.71, + "learning_rate": 3.064522937619384e-05, + "loss": 1.2783, + "step": 3668000 + }, + { + "epoch": 38.71, + "learning_rate": 3.0642591048682425e-05, + "loss": 1.3033, + "step": 3668500 + }, + { + "epoch": 38.72, + "learning_rate": 3.0639952721170994e-05, + "loss": 1.2916, + "step": 3669000 + }, + { + "epoch": 38.73, + "learning_rate": 3.063731439365957e-05, + "loss": 1.3068, + "step": 3669500 + }, + { + "epoch": 38.73, + "learning_rate": 3.063467606614815e-05, + "loss": 1.303, + "step": 3670000 + }, + { + "epoch": 38.74, + "learning_rate": 3.063203773863673e-05, + "loss": 1.2467, + "step": 3670500 + }, + { + "epoch": 38.74, + "learning_rate": 3.06293994111253e-05, + "loss": 1.2778, + "step": 3671000 + }, + { + "epoch": 38.75, + "learning_rate": 3.062676108361388e-05, + "loss": 1.268, + "step": 3671500 + }, + { + "epoch": 38.75, + "learning_rate": 3.062412275610245e-05, + "loss": 1.2974, + "step": 3672000 + }, + { + "epoch": 38.76, + "learning_rate": 3.062148442859103e-05, + "loss": 1.3043, + "step": 3672500 + }, + { + "epoch": 38.76, + "learning_rate": 3.0618846101079604e-05, + "loss": 1.2938, + "step": 3673000 + }, + { + "epoch": 38.77, + "learning_rate": 3.061620777356818e-05, + "loss": 1.3003, + "step": 3673500 + }, + { + "epoch": 38.77, + "learning_rate": 3.0613569446056755e-05, + "loss": 1.3006, + "step": 3674000 + }, + { + "epoch": 38.78, + "learning_rate": 3.061093111854533e-05, + "loss": 1.2978, + "step": 3674500 + }, + { + "epoch": 38.78, + "learning_rate": 3.060829279103391e-05, + "loss": 1.3091, + "step": 3675000 + }, + { + "epoch": 38.79, + "learning_rate": 3.060565446352249e-05, + "loss": 1.2824, + "step": 3675500 + }, + { + "epoch": 38.79, + "learning_rate": 3.060301613601106e-05, + "loss": 1.3213, + "step": 3676000 + }, + { + "epoch": 38.8, + "learning_rate": 3.060037780849964e-05, + "loss": 1.3462, + "step": 3676500 + }, + { + "epoch": 38.8, + "learning_rate": 3.0597739480988215e-05, + "loss": 1.3336, + "step": 3677000 + }, + { + "epoch": 38.81, + "learning_rate": 3.059510115347679e-05, + "loss": 1.2941, + "step": 3677500 + }, + { + "epoch": 38.82, + "learning_rate": 3.0592462825965366e-05, + "loss": 1.3338, + "step": 3678000 + }, + { + "epoch": 38.82, + "learning_rate": 3.058982449845394e-05, + "loss": 1.2603, + "step": 3678500 + }, + { + "epoch": 38.83, + "learning_rate": 3.058718617094252e-05, + "loss": 1.2919, + "step": 3679000 + }, + { + "epoch": 38.83, + "learning_rate": 3.058454784343109e-05, + "loss": 1.3111, + "step": 3679500 + }, + { + "epoch": 38.84, + "learning_rate": 3.058190951591967e-05, + "loss": 1.2985, + "step": 3680000 + }, + { + "epoch": 38.84, + "learning_rate": 3.057927118840825e-05, + "loss": 1.2699, + "step": 3680500 + }, + { + "epoch": 38.85, + "learning_rate": 3.057663286089682e-05, + "loss": 1.305, + "step": 3681000 + }, + { + "epoch": 38.85, + "learning_rate": 3.0573994533385394e-05, + "loss": 1.3043, + "step": 3681500 + }, + { + "epoch": 38.86, + "learning_rate": 3.0571356205873977e-05, + "loss": 1.3073, + "step": 3682000 + }, + { + "epoch": 38.86, + "learning_rate": 3.056871787836255e-05, + "loss": 1.3479, + "step": 3682500 + }, + { + "epoch": 38.87, + "learning_rate": 3.056607955085112e-05, + "loss": 1.2775, + "step": 3683000 + }, + { + "epoch": 38.87, + "learning_rate": 3.05634412233397e-05, + "loss": 1.2491, + "step": 3683500 + }, + { + "epoch": 38.88, + "learning_rate": 3.056080289582828e-05, + "loss": 1.2949, + "step": 3684000 + }, + { + "epoch": 38.88, + "learning_rate": 3.0558164568316854e-05, + "loss": 1.306, + "step": 3684500 + }, + { + "epoch": 38.89, + "learning_rate": 3.055552624080543e-05, + "loss": 1.3837, + "step": 3685000 + }, + { + "epoch": 38.89, + "learning_rate": 3.0552887913294005e-05, + "loss": 1.2902, + "step": 3685500 + }, + { + "epoch": 38.9, + "learning_rate": 3.055024958578258e-05, + "loss": 1.2794, + "step": 3686000 + }, + { + "epoch": 38.9, + "learning_rate": 3.0547611258271156e-05, + "loss": 1.304, + "step": 3686500 + }, + { + "epoch": 38.91, + "learning_rate": 3.054497293075974e-05, + "loss": 1.3062, + "step": 3687000 + }, + { + "epoch": 38.92, + "learning_rate": 3.0542334603248314e-05, + "loss": 1.2228, + "step": 3687500 + }, + { + "epoch": 38.92, + "learning_rate": 3.053969627573688e-05, + "loss": 1.2226, + "step": 3688000 + }, + { + "epoch": 38.93, + "learning_rate": 3.0537057948225465e-05, + "loss": 1.3092, + "step": 3688500 + }, + { + "epoch": 38.93, + "learning_rate": 3.053441962071404e-05, + "loss": 1.2809, + "step": 3689000 + }, + { + "epoch": 38.94, + "learning_rate": 3.0531781293202616e-05, + "loss": 1.3087, + "step": 3689500 + }, + { + "epoch": 38.94, + "learning_rate": 3.052914296569119e-05, + "loss": 1.3397, + "step": 3690000 + }, + { + "epoch": 38.95, + "learning_rate": 3.052650463817977e-05, + "loss": 1.3004, + "step": 3690500 + }, + { + "epoch": 38.95, + "learning_rate": 3.052386631066834e-05, + "loss": 1.3025, + "step": 3691000 + }, + { + "epoch": 38.96, + "learning_rate": 3.052122798315692e-05, + "loss": 1.2884, + "step": 3691500 + }, + { + "epoch": 38.96, + "learning_rate": 3.051858965564549e-05, + "loss": 1.338, + "step": 3692000 + }, + { + "epoch": 38.97, + "learning_rate": 3.051595132813407e-05, + "loss": 1.2948, + "step": 3692500 + }, + { + "epoch": 38.97, + "learning_rate": 3.0513313000622644e-05, + "loss": 1.2783, + "step": 3693000 + }, + { + "epoch": 38.98, + "learning_rate": 3.0510674673111223e-05, + "loss": 1.302, + "step": 3693500 + }, + { + "epoch": 38.98, + "learning_rate": 3.05080363455998e-05, + "loss": 1.3005, + "step": 3694000 + }, + { + "epoch": 38.99, + "learning_rate": 3.0505398018088377e-05, + "loss": 1.2677, + "step": 3694500 + }, + { + "epoch": 38.99, + "learning_rate": 3.050275969057695e-05, + "loss": 1.3221, + "step": 3695000 + }, + { + "epoch": 39.0, + "learning_rate": 3.0500121363065525e-05, + "loss": 1.2966, + "step": 3695500 + }, + { + "epoch": 39.01, + "learning_rate": 3.0497483035554104e-05, + "loss": 1.2627, + "step": 3696000 + }, + { + "epoch": 39.01, + "learning_rate": 3.049484470804268e-05, + "loss": 1.3116, + "step": 3696500 + }, + { + "epoch": 39.02, + "learning_rate": 3.0492206380531258e-05, + "loss": 1.2912, + "step": 3697000 + }, + { + "epoch": 39.02, + "learning_rate": 3.048956805301983e-05, + "loss": 1.2074, + "step": 3697500 + }, + { + "epoch": 39.03, + "learning_rate": 3.0486929725508406e-05, + "loss": 1.2317, + "step": 3698000 + }, + { + "epoch": 39.03, + "learning_rate": 3.0484291397996985e-05, + "loss": 1.2856, + "step": 3698500 + }, + { + "epoch": 39.04, + "learning_rate": 3.048165307048556e-05, + "loss": 1.2923, + "step": 3699000 + }, + { + "epoch": 39.04, + "learning_rate": 3.047901474297414e-05, + "loss": 1.2612, + "step": 3699500 + }, + { + "epoch": 39.05, + "learning_rate": 3.047637641546271e-05, + "loss": 1.2874, + "step": 3700000 + }, + { + "epoch": 39.05, + "learning_rate": 3.0473738087951287e-05, + "loss": 1.2982, + "step": 3700500 + }, + { + "epoch": 39.06, + "learning_rate": 3.0471099760439865e-05, + "loss": 1.3113, + "step": 3701000 + }, + { + "epoch": 39.06, + "learning_rate": 3.046846143292844e-05, + "loss": 1.2585, + "step": 3701500 + }, + { + "epoch": 39.07, + "learning_rate": 3.0465823105417013e-05, + "loss": 1.3048, + "step": 3702000 + }, + { + "epoch": 39.07, + "learning_rate": 3.046318477790559e-05, + "loss": 1.3022, + "step": 3702500 + }, + { + "epoch": 39.08, + "learning_rate": 3.0460546450394167e-05, + "loss": 1.3191, + "step": 3703000 + }, + { + "epoch": 39.08, + "learning_rate": 3.0457908122882746e-05, + "loss": 1.256, + "step": 3703500 + }, + { + "epoch": 39.09, + "learning_rate": 3.0455269795371322e-05, + "loss": 1.3103, + "step": 3704000 + }, + { + "epoch": 39.09, + "learning_rate": 3.0452631467859894e-05, + "loss": 1.2801, + "step": 3704500 + }, + { + "epoch": 39.1, + "learning_rate": 3.044999314034847e-05, + "loss": 1.2666, + "step": 3705000 + }, + { + "epoch": 39.11, + "learning_rate": 3.0447354812837048e-05, + "loss": 1.3158, + "step": 3705500 + }, + { + "epoch": 39.11, + "learning_rate": 3.0444716485325624e-05, + "loss": 1.3281, + "step": 3706000 + }, + { + "epoch": 39.12, + "learning_rate": 3.0442078157814203e-05, + "loss": 1.2227, + "step": 3706500 + }, + { + "epoch": 39.12, + "learning_rate": 3.0439439830302775e-05, + "loss": 1.3007, + "step": 3707000 + }, + { + "epoch": 39.13, + "learning_rate": 3.043680150279135e-05, + "loss": 1.2608, + "step": 3707500 + }, + { + "epoch": 39.13, + "learning_rate": 3.043416317527993e-05, + "loss": 1.277, + "step": 3708000 + }, + { + "epoch": 39.14, + "learning_rate": 3.0431524847768504e-05, + "loss": 1.257, + "step": 3708500 + }, + { + "epoch": 39.14, + "learning_rate": 3.0428886520257083e-05, + "loss": 1.3345, + "step": 3709000 + }, + { + "epoch": 39.15, + "learning_rate": 3.0426248192745655e-05, + "loss": 1.3154, + "step": 3709500 + }, + { + "epoch": 39.15, + "learning_rate": 3.042360986523423e-05, + "loss": 1.285, + "step": 3710000 + }, + { + "epoch": 39.16, + "learning_rate": 3.042097153772281e-05, + "loss": 1.2749, + "step": 3710500 + }, + { + "epoch": 39.16, + "learning_rate": 3.0418333210211385e-05, + "loss": 1.3185, + "step": 3711000 + }, + { + "epoch": 39.17, + "learning_rate": 3.0415694882699957e-05, + "loss": 1.2652, + "step": 3711500 + }, + { + "epoch": 39.17, + "learning_rate": 3.0413056555188536e-05, + "loss": 1.2825, + "step": 3712000 + }, + { + "epoch": 39.18, + "learning_rate": 3.0410418227677112e-05, + "loss": 1.2574, + "step": 3712500 + }, + { + "epoch": 39.18, + "learning_rate": 3.040777990016569e-05, + "loss": 1.2609, + "step": 3713000 + }, + { + "epoch": 39.19, + "learning_rate": 3.0405141572654266e-05, + "loss": 1.2943, + "step": 3713500 + }, + { + "epoch": 39.19, + "learning_rate": 3.0402503245142838e-05, + "loss": 1.314, + "step": 3714000 + }, + { + "epoch": 39.2, + "learning_rate": 3.0399864917631417e-05, + "loss": 1.3016, + "step": 3714500 + }, + { + "epoch": 39.21, + "learning_rate": 3.0397226590119993e-05, + "loss": 1.3011, + "step": 3715000 + }, + { + "epoch": 39.21, + "learning_rate": 3.039458826260857e-05, + "loss": 1.2779, + "step": 3715500 + }, + { + "epoch": 39.22, + "learning_rate": 3.0391949935097147e-05, + "loss": 1.3138, + "step": 3716000 + }, + { + "epoch": 39.22, + "learning_rate": 3.038931160758572e-05, + "loss": 1.2833, + "step": 3716500 + }, + { + "epoch": 39.23, + "learning_rate": 3.0386673280074295e-05, + "loss": 1.2896, + "step": 3717000 + }, + { + "epoch": 39.23, + "learning_rate": 3.0384034952562873e-05, + "loss": 1.2702, + "step": 3717500 + }, + { + "epoch": 39.24, + "learning_rate": 3.038139662505145e-05, + "loss": 1.2876, + "step": 3718000 + }, + { + "epoch": 39.24, + "learning_rate": 3.0378758297540028e-05, + "loss": 1.3494, + "step": 3718500 + }, + { + "epoch": 39.25, + "learning_rate": 3.03761199700286e-05, + "loss": 1.2678, + "step": 3719000 + }, + { + "epoch": 39.25, + "learning_rate": 3.0373481642517175e-05, + "loss": 1.3026, + "step": 3719500 + }, + { + "epoch": 39.26, + "learning_rate": 3.0370843315005754e-05, + "loss": 1.3281, + "step": 3720000 + }, + { + "epoch": 39.26, + "learning_rate": 3.036820498749433e-05, + "loss": 1.2905, + "step": 3720500 + }, + { + "epoch": 39.27, + "learning_rate": 3.0365566659982902e-05, + "loss": 1.2438, + "step": 3721000 + }, + { + "epoch": 39.27, + "learning_rate": 3.036292833247148e-05, + "loss": 1.3483, + "step": 3721500 + }, + { + "epoch": 39.28, + "learning_rate": 3.0360290004960056e-05, + "loss": 1.3028, + "step": 3722000 + }, + { + "epoch": 39.28, + "learning_rate": 3.0357651677448635e-05, + "loss": 1.2744, + "step": 3722500 + }, + { + "epoch": 39.29, + "learning_rate": 3.035501334993721e-05, + "loss": 1.3814, + "step": 3723000 + }, + { + "epoch": 39.3, + "learning_rate": 3.0352375022425783e-05, + "loss": 1.3052, + "step": 3723500 + }, + { + "epoch": 39.3, + "learning_rate": 3.034973669491436e-05, + "loss": 1.2657, + "step": 3724000 + }, + { + "epoch": 39.31, + "learning_rate": 3.0347098367402937e-05, + "loss": 1.3492, + "step": 3724500 + }, + { + "epoch": 39.31, + "learning_rate": 3.0344460039891516e-05, + "loss": 1.285, + "step": 3725000 + }, + { + "epoch": 39.32, + "learning_rate": 3.034182171238009e-05, + "loss": 1.2748, + "step": 3725500 + }, + { + "epoch": 39.32, + "learning_rate": 3.0339183384868663e-05, + "loss": 1.29, + "step": 3726000 + }, + { + "epoch": 39.33, + "learning_rate": 3.0336545057357242e-05, + "loss": 1.3286, + "step": 3726500 + }, + { + "epoch": 39.33, + "learning_rate": 3.0333906729845818e-05, + "loss": 1.3099, + "step": 3727000 + }, + { + "epoch": 39.34, + "learning_rate": 3.0331268402334397e-05, + "loss": 1.3255, + "step": 3727500 + }, + { + "epoch": 39.34, + "learning_rate": 3.0328630074822972e-05, + "loss": 1.3504, + "step": 3728000 + }, + { + "epoch": 39.35, + "learning_rate": 3.0325991747311544e-05, + "loss": 1.2631, + "step": 3728500 + }, + { + "epoch": 39.35, + "learning_rate": 3.032335341980012e-05, + "loss": 1.282, + "step": 3729000 + }, + { + "epoch": 39.36, + "learning_rate": 3.03207150922887e-05, + "loss": 1.3016, + "step": 3729500 + }, + { + "epoch": 39.36, + "learning_rate": 3.0318076764777274e-05, + "loss": 1.2831, + "step": 3730000 + }, + { + "epoch": 39.37, + "learning_rate": 3.0315438437265846e-05, + "loss": 1.3083, + "step": 3730500 + }, + { + "epoch": 39.37, + "learning_rate": 3.0312800109754425e-05, + "loss": 1.2929, + "step": 3731000 + }, + { + "epoch": 39.38, + "learning_rate": 3.0310161782243e-05, + "loss": 1.274, + "step": 3731500 + }, + { + "epoch": 39.38, + "learning_rate": 3.030752345473158e-05, + "loss": 1.2958, + "step": 3732000 + }, + { + "epoch": 39.39, + "learning_rate": 3.0304885127220155e-05, + "loss": 1.2795, + "step": 3732500 + }, + { + "epoch": 39.4, + "learning_rate": 3.0302246799708727e-05, + "loss": 1.2969, + "step": 3733000 + }, + { + "epoch": 39.4, + "learning_rate": 3.0299608472197306e-05, + "loss": 1.2972, + "step": 3733500 + }, + { + "epoch": 39.41, + "learning_rate": 3.029697014468588e-05, + "loss": 1.244, + "step": 3734000 + }, + { + "epoch": 39.41, + "learning_rate": 3.029433181717446e-05, + "loss": 1.2301, + "step": 3734500 + }, + { + "epoch": 39.42, + "learning_rate": 3.0291693489663036e-05, + "loss": 1.2682, + "step": 3735000 + }, + { + "epoch": 39.42, + "learning_rate": 3.0289055162151608e-05, + "loss": 1.2676, + "step": 3735500 + }, + { + "epoch": 39.43, + "learning_rate": 3.0286416834640187e-05, + "loss": 1.2455, + "step": 3736000 + }, + { + "epoch": 39.43, + "learning_rate": 3.0283778507128762e-05, + "loss": 1.2341, + "step": 3736500 + }, + { + "epoch": 39.44, + "learning_rate": 3.028114017961734e-05, + "loss": 1.3537, + "step": 3737000 + }, + { + "epoch": 39.44, + "learning_rate": 3.0278501852105917e-05, + "loss": 1.2914, + "step": 3737500 + }, + { + "epoch": 39.45, + "learning_rate": 3.027586352459449e-05, + "loss": 1.2666, + "step": 3738000 + }, + { + "epoch": 39.45, + "learning_rate": 3.0273225197083068e-05, + "loss": 1.2779, + "step": 3738500 + }, + { + "epoch": 39.46, + "learning_rate": 3.0270586869571643e-05, + "loss": 1.2993, + "step": 3739000 + }, + { + "epoch": 39.46, + "learning_rate": 3.0267948542060222e-05, + "loss": 1.2787, + "step": 3739500 + }, + { + "epoch": 39.47, + "learning_rate": 3.026531021454879e-05, + "loss": 1.2817, + "step": 3740000 + }, + { + "epoch": 39.47, + "learning_rate": 3.026267188703737e-05, + "loss": 1.2879, + "step": 3740500 + }, + { + "epoch": 39.48, + "learning_rate": 3.0260033559525945e-05, + "loss": 1.2679, + "step": 3741000 + }, + { + "epoch": 39.49, + "learning_rate": 3.0257395232014524e-05, + "loss": 1.2716, + "step": 3741500 + }, + { + "epoch": 39.49, + "learning_rate": 3.02547569045031e-05, + "loss": 1.3122, + "step": 3742000 + }, + { + "epoch": 39.5, + "learning_rate": 3.025211857699167e-05, + "loss": 1.2329, + "step": 3742500 + }, + { + "epoch": 39.5, + "learning_rate": 3.024948024948025e-05, + "loss": 1.2737, + "step": 3743000 + }, + { + "epoch": 39.51, + "learning_rate": 3.0246841921968826e-05, + "loss": 1.3606, + "step": 3743500 + }, + { + "epoch": 39.51, + "learning_rate": 3.0244203594457405e-05, + "loss": 1.2852, + "step": 3744000 + }, + { + "epoch": 39.52, + "learning_rate": 3.024156526694598e-05, + "loss": 1.2874, + "step": 3744500 + }, + { + "epoch": 39.52, + "learning_rate": 3.0238926939434552e-05, + "loss": 1.2643, + "step": 3745000 + }, + { + "epoch": 39.53, + "learning_rate": 3.023628861192313e-05, + "loss": 1.2696, + "step": 3745500 + }, + { + "epoch": 39.53, + "learning_rate": 3.0233650284411707e-05, + "loss": 1.2566, + "step": 3746000 + }, + { + "epoch": 39.54, + "learning_rate": 3.0231011956900286e-05, + "loss": 1.3058, + "step": 3746500 + }, + { + "epoch": 39.54, + "learning_rate": 3.022837362938886e-05, + "loss": 1.3486, + "step": 3747000 + }, + { + "epoch": 39.55, + "learning_rate": 3.0225735301877433e-05, + "loss": 1.3049, + "step": 3747500 + }, + { + "epoch": 39.55, + "learning_rate": 3.0223096974366012e-05, + "loss": 1.3453, + "step": 3748000 + }, + { + "epoch": 39.56, + "learning_rate": 3.0220458646854588e-05, + "loss": 1.2766, + "step": 3748500 + }, + { + "epoch": 39.56, + "learning_rate": 3.0217820319343166e-05, + "loss": 1.2796, + "step": 3749000 + }, + { + "epoch": 39.57, + "learning_rate": 3.021518199183174e-05, + "loss": 1.276, + "step": 3749500 + }, + { + "epoch": 39.57, + "learning_rate": 3.0212543664320314e-05, + "loss": 1.2452, + "step": 3750000 + }, + { + "epoch": 39.58, + "learning_rate": 3.0209905336808893e-05, + "loss": 1.2875, + "step": 3750500 + }, + { + "epoch": 39.59, + "learning_rate": 3.020726700929747e-05, + "loss": 1.3366, + "step": 3751000 + }, + { + "epoch": 39.59, + "learning_rate": 3.0204628681786047e-05, + "loss": 1.2837, + "step": 3751500 + }, + { + "epoch": 39.6, + "learning_rate": 3.0201990354274616e-05, + "loss": 1.2585, + "step": 3752000 + }, + { + "epoch": 39.6, + "learning_rate": 3.0199352026763195e-05, + "loss": 1.3008, + "step": 3752500 + }, + { + "epoch": 39.61, + "learning_rate": 3.019671369925177e-05, + "loss": 1.3413, + "step": 3753000 + }, + { + "epoch": 39.61, + "learning_rate": 3.019407537174035e-05, + "loss": 1.2834, + "step": 3753500 + }, + { + "epoch": 39.62, + "learning_rate": 3.0191437044228925e-05, + "loss": 1.3547, + "step": 3754000 + }, + { + "epoch": 39.62, + "learning_rate": 3.0188798716717497e-05, + "loss": 1.283, + "step": 3754500 + }, + { + "epoch": 39.63, + "learning_rate": 3.0186160389206076e-05, + "loss": 1.2926, + "step": 3755000 + }, + { + "epoch": 39.63, + "learning_rate": 3.018352206169465e-05, + "loss": 1.2841, + "step": 3755500 + }, + { + "epoch": 39.64, + "learning_rate": 3.018088373418323e-05, + "loss": 1.2965, + "step": 3756000 + }, + { + "epoch": 39.64, + "learning_rate": 3.0178245406671805e-05, + "loss": 1.2879, + "step": 3756500 + }, + { + "epoch": 39.65, + "learning_rate": 3.0175607079160378e-05, + "loss": 1.314, + "step": 3757000 + }, + { + "epoch": 39.65, + "learning_rate": 3.0172968751648956e-05, + "loss": 1.2496, + "step": 3757500 + }, + { + "epoch": 39.66, + "learning_rate": 3.0170330424137532e-05, + "loss": 1.2746, + "step": 3758000 + }, + { + "epoch": 39.66, + "learning_rate": 3.016769209662611e-05, + "loss": 1.3129, + "step": 3758500 + }, + { + "epoch": 39.67, + "learning_rate": 3.0165053769114683e-05, + "loss": 1.2871, + "step": 3759000 + }, + { + "epoch": 39.68, + "learning_rate": 3.016241544160326e-05, + "loss": 1.2567, + "step": 3759500 + }, + { + "epoch": 39.68, + "learning_rate": 3.0159777114091837e-05, + "loss": 1.2215, + "step": 3760000 + }, + { + "epoch": 39.69, + "learning_rate": 3.0157138786580413e-05, + "loss": 1.3382, + "step": 3760500 + }, + { + "epoch": 39.69, + "learning_rate": 3.015450045906899e-05, + "loss": 1.2814, + "step": 3761000 + }, + { + "epoch": 39.7, + "learning_rate": 3.0151862131557564e-05, + "loss": 1.3407, + "step": 3761500 + }, + { + "epoch": 39.7, + "learning_rate": 3.014922380404614e-05, + "loss": 1.2551, + "step": 3762000 + }, + { + "epoch": 39.71, + "learning_rate": 3.0146585476534718e-05, + "loss": 1.2868, + "step": 3762500 + }, + { + "epoch": 39.71, + "learning_rate": 3.0143947149023294e-05, + "loss": 1.3158, + "step": 3763000 + }, + { + "epoch": 39.72, + "learning_rate": 3.0141308821511872e-05, + "loss": 1.3082, + "step": 3763500 + }, + { + "epoch": 39.72, + "learning_rate": 3.013867049400044e-05, + "loss": 1.2573, + "step": 3764000 + }, + { + "epoch": 39.73, + "learning_rate": 3.013603216648902e-05, + "loss": 1.3264, + "step": 3764500 + }, + { + "epoch": 39.73, + "learning_rate": 3.0133393838977596e-05, + "loss": 1.323, + "step": 3765000 + }, + { + "epoch": 39.74, + "learning_rate": 3.0130755511466174e-05, + "loss": 1.3086, + "step": 3765500 + }, + { + "epoch": 39.74, + "learning_rate": 3.012811718395475e-05, + "loss": 1.2547, + "step": 3766000 + }, + { + "epoch": 39.75, + "learning_rate": 3.0125478856443322e-05, + "loss": 1.2832, + "step": 3766500 + }, + { + "epoch": 39.75, + "learning_rate": 3.01228405289319e-05, + "loss": 1.247, + "step": 3767000 + }, + { + "epoch": 39.76, + "learning_rate": 3.0120202201420476e-05, + "loss": 1.271, + "step": 3767500 + }, + { + "epoch": 39.76, + "learning_rate": 3.0117563873909055e-05, + "loss": 1.287, + "step": 3768000 + }, + { + "epoch": 39.77, + "learning_rate": 3.0114925546397627e-05, + "loss": 1.2517, + "step": 3768500 + }, + { + "epoch": 39.78, + "learning_rate": 3.0112287218886203e-05, + "loss": 1.2938, + "step": 3769000 + }, + { + "epoch": 39.78, + "learning_rate": 3.010964889137478e-05, + "loss": 1.3196, + "step": 3769500 + }, + { + "epoch": 39.79, + "learning_rate": 3.0107010563863357e-05, + "loss": 1.2566, + "step": 3770000 + }, + { + "epoch": 39.79, + "learning_rate": 3.0104372236351936e-05, + "loss": 1.2695, + "step": 3770500 + }, + { + "epoch": 39.8, + "learning_rate": 3.0101733908840508e-05, + "loss": 1.3343, + "step": 3771000 + }, + { + "epoch": 39.8, + "learning_rate": 3.0099095581329084e-05, + "loss": 1.3017, + "step": 3771500 + }, + { + "epoch": 39.81, + "learning_rate": 3.0096457253817663e-05, + "loss": 1.2487, + "step": 3772000 + }, + { + "epoch": 39.81, + "learning_rate": 3.0093818926306238e-05, + "loss": 1.2956, + "step": 3772500 + }, + { + "epoch": 39.82, + "learning_rate": 3.0091180598794817e-05, + "loss": 1.3198, + "step": 3773000 + }, + { + "epoch": 39.82, + "learning_rate": 3.008854227128339e-05, + "loss": 1.2935, + "step": 3773500 + }, + { + "epoch": 39.83, + "learning_rate": 3.0085903943771964e-05, + "loss": 1.3179, + "step": 3774000 + }, + { + "epoch": 39.83, + "learning_rate": 3.0083265616260543e-05, + "loss": 1.3211, + "step": 3774500 + }, + { + "epoch": 39.84, + "learning_rate": 3.008062728874912e-05, + "loss": 1.287, + "step": 3775000 + }, + { + "epoch": 39.84, + "learning_rate": 3.0077988961237698e-05, + "loss": 1.3208, + "step": 3775500 + }, + { + "epoch": 39.85, + "learning_rate": 3.0075350633726266e-05, + "loss": 1.3272, + "step": 3776000 + }, + { + "epoch": 39.85, + "learning_rate": 3.0072712306214845e-05, + "loss": 1.2802, + "step": 3776500 + }, + { + "epoch": 39.86, + "learning_rate": 3.007007397870342e-05, + "loss": 1.2805, + "step": 3777000 + }, + { + "epoch": 39.87, + "learning_rate": 3.0067435651192e-05, + "loss": 1.2899, + "step": 3777500 + }, + { + "epoch": 39.87, + "learning_rate": 3.0064797323680572e-05, + "loss": 1.3023, + "step": 3778000 + }, + { + "epoch": 39.88, + "learning_rate": 3.0062158996169147e-05, + "loss": 1.2987, + "step": 3778500 + }, + { + "epoch": 39.88, + "learning_rate": 3.0059520668657726e-05, + "loss": 1.2757, + "step": 3779000 + }, + { + "epoch": 39.89, + "learning_rate": 3.00568823411463e-05, + "loss": 1.2729, + "step": 3779500 + }, + { + "epoch": 39.89, + "learning_rate": 3.005424401363488e-05, + "loss": 1.2831, + "step": 3780000 + }, + { + "epoch": 39.9, + "learning_rate": 3.0051605686123453e-05, + "loss": 1.2759, + "step": 3780500 + }, + { + "epoch": 39.9, + "learning_rate": 3.0048967358612028e-05, + "loss": 1.32, + "step": 3781000 + }, + { + "epoch": 39.91, + "learning_rate": 3.0046329031100607e-05, + "loss": 1.2889, + "step": 3781500 + }, + { + "epoch": 39.91, + "learning_rate": 3.0043690703589182e-05, + "loss": 1.3122, + "step": 3782000 + }, + { + "epoch": 39.92, + "learning_rate": 3.004105237607776e-05, + "loss": 1.3377, + "step": 3782500 + }, + { + "epoch": 39.92, + "learning_rate": 3.0038414048566333e-05, + "loss": 1.2814, + "step": 3783000 + }, + { + "epoch": 39.93, + "learning_rate": 3.003577572105491e-05, + "loss": 1.2565, + "step": 3783500 + }, + { + "epoch": 39.93, + "learning_rate": 3.0033137393543488e-05, + "loss": 1.1767, + "step": 3784000 + }, + { + "epoch": 39.94, + "learning_rate": 3.0030499066032063e-05, + "loss": 1.2786, + "step": 3784500 + }, + { + "epoch": 39.94, + "learning_rate": 3.0027860738520642e-05, + "loss": 1.2608, + "step": 3785000 + }, + { + "epoch": 39.95, + "learning_rate": 3.0025222411009214e-05, + "loss": 1.2622, + "step": 3785500 + }, + { + "epoch": 39.95, + "learning_rate": 3.002258408349779e-05, + "loss": 1.308, + "step": 3786000 + }, + { + "epoch": 39.96, + "learning_rate": 3.001994575598637e-05, + "loss": 1.2896, + "step": 3786500 + }, + { + "epoch": 39.97, + "learning_rate": 3.0017307428474944e-05, + "loss": 1.3324, + "step": 3787000 + }, + { + "epoch": 39.97, + "learning_rate": 3.0014669100963516e-05, + "loss": 1.2818, + "step": 3787500 + }, + { + "epoch": 39.98, + "learning_rate": 3.001203077345209e-05, + "loss": 1.3104, + "step": 3788000 + }, + { + "epoch": 39.98, + "learning_rate": 3.000939244594067e-05, + "loss": 1.3077, + "step": 3788500 + }, + { + "epoch": 39.99, + "learning_rate": 3.000675411842925e-05, + "loss": 1.2942, + "step": 3789000 + }, + { + "epoch": 39.99, + "learning_rate": 3.0004115790917825e-05, + "loss": 1.2591, + "step": 3789500 + }, + { + "epoch": 40.0, + "learning_rate": 3.0001477463406397e-05, + "loss": 1.2968, + "step": 3790000 + }, + { + "epoch": 40.0, + "learning_rate": 2.9998839135894972e-05, + "loss": 1.2073, + "step": 3790500 + }, + { + "epoch": 40.01, + "learning_rate": 2.999620080838355e-05, + "loss": 1.3145, + "step": 3791000 + }, + { + "epoch": 40.01, + "learning_rate": 2.9993562480872127e-05, + "loss": 1.3051, + "step": 3791500 + }, + { + "epoch": 40.02, + "learning_rate": 2.9990924153360706e-05, + "loss": 1.327, + "step": 3792000 + }, + { + "epoch": 40.02, + "learning_rate": 2.9988285825849278e-05, + "loss": 1.2778, + "step": 3792500 + }, + { + "epoch": 40.03, + "learning_rate": 2.9985647498337853e-05, + "loss": 1.2692, + "step": 3793000 + }, + { + "epoch": 40.03, + "learning_rate": 2.9983009170826432e-05, + "loss": 1.263, + "step": 3793500 + }, + { + "epoch": 40.04, + "learning_rate": 2.9980370843315008e-05, + "loss": 1.2983, + "step": 3794000 + }, + { + "epoch": 40.04, + "learning_rate": 2.9977732515803587e-05, + "loss": 1.2429, + "step": 3794500 + }, + { + "epoch": 40.05, + "learning_rate": 2.997509418829216e-05, + "loss": 1.2505, + "step": 3795000 + }, + { + "epoch": 40.06, + "learning_rate": 2.9972455860780734e-05, + "loss": 1.262, + "step": 3795500 + }, + { + "epoch": 40.06, + "learning_rate": 2.9969817533269313e-05, + "loss": 1.2848, + "step": 3796000 + }, + { + "epoch": 40.07, + "learning_rate": 2.996717920575789e-05, + "loss": 1.24, + "step": 3796500 + }, + { + "epoch": 40.07, + "learning_rate": 2.996454087824646e-05, + "loss": 1.2671, + "step": 3797000 + }, + { + "epoch": 40.08, + "learning_rate": 2.996190255073504e-05, + "loss": 1.2988, + "step": 3797500 + }, + { + "epoch": 40.08, + "learning_rate": 2.9959264223223615e-05, + "loss": 1.2656, + "step": 3798000 + }, + { + "epoch": 40.09, + "learning_rate": 2.9956625895712194e-05, + "loss": 1.2575, + "step": 3798500 + }, + { + "epoch": 40.09, + "learning_rate": 2.995398756820077e-05, + "loss": 1.2889, + "step": 3799000 + }, + { + "epoch": 40.1, + "learning_rate": 2.995134924068934e-05, + "loss": 1.2592, + "step": 3799500 + }, + { + "epoch": 40.1, + "learning_rate": 2.994871091317792e-05, + "loss": 1.2684, + "step": 3800000 + }, + { + "epoch": 40.11, + "learning_rate": 2.9946072585666496e-05, + "loss": 1.2743, + "step": 3800500 + }, + { + "epoch": 40.11, + "learning_rate": 2.9943434258155075e-05, + "loss": 1.2797, + "step": 3801000 + }, + { + "epoch": 40.12, + "learning_rate": 2.994079593064365e-05, + "loss": 1.2696, + "step": 3801500 + }, + { + "epoch": 40.12, + "learning_rate": 2.9938157603132222e-05, + "loss": 1.2236, + "step": 3802000 + }, + { + "epoch": 40.13, + "learning_rate": 2.9935519275620798e-05, + "loss": 1.2962, + "step": 3802500 + }, + { + "epoch": 40.13, + "learning_rate": 2.9932880948109377e-05, + "loss": 1.2417, + "step": 3803000 + }, + { + "epoch": 40.14, + "learning_rate": 2.9930242620597952e-05, + "loss": 1.2544, + "step": 3803500 + }, + { + "epoch": 40.14, + "learning_rate": 2.992760429308653e-05, + "loss": 1.3332, + "step": 3804000 + }, + { + "epoch": 40.15, + "learning_rate": 2.9924965965575103e-05, + "loss": 1.2941, + "step": 3804500 + }, + { + "epoch": 40.16, + "learning_rate": 2.992232763806368e-05, + "loss": 1.2687, + "step": 3805000 + }, + { + "epoch": 40.16, + "learning_rate": 2.9919689310552257e-05, + "loss": 1.2653, + "step": 3805500 + }, + { + "epoch": 40.17, + "learning_rate": 2.9917050983040833e-05, + "loss": 1.3053, + "step": 3806000 + }, + { + "epoch": 40.17, + "learning_rate": 2.9914412655529405e-05, + "loss": 1.3103, + "step": 3806500 + }, + { + "epoch": 40.18, + "learning_rate": 2.9911774328017984e-05, + "loss": 1.3036, + "step": 3807000 + }, + { + "epoch": 40.18, + "learning_rate": 2.990913600050656e-05, + "loss": 1.2935, + "step": 3807500 + }, + { + "epoch": 40.19, + "learning_rate": 2.9906497672995138e-05, + "loss": 1.253, + "step": 3808000 + }, + { + "epoch": 40.19, + "learning_rate": 2.9903859345483714e-05, + "loss": 1.3203, + "step": 3808500 + }, + { + "epoch": 40.2, + "learning_rate": 2.9901221017972286e-05, + "loss": 1.2584, + "step": 3809000 + }, + { + "epoch": 40.2, + "learning_rate": 2.9898582690460865e-05, + "loss": 1.2824, + "step": 3809500 + }, + { + "epoch": 40.21, + "learning_rate": 2.989594436294944e-05, + "loss": 1.2789, + "step": 3810000 + }, + { + "epoch": 40.21, + "learning_rate": 2.989330603543802e-05, + "loss": 1.3012, + "step": 3810500 + }, + { + "epoch": 40.22, + "learning_rate": 2.9890667707926595e-05, + "loss": 1.3143, + "step": 3811000 + }, + { + "epoch": 40.22, + "learning_rate": 2.9888029380415167e-05, + "loss": 1.2876, + "step": 3811500 + }, + { + "epoch": 40.23, + "learning_rate": 2.9885391052903746e-05, + "loss": 1.3121, + "step": 3812000 + }, + { + "epoch": 40.23, + "learning_rate": 2.988275272539232e-05, + "loss": 1.291, + "step": 3812500 + }, + { + "epoch": 40.24, + "learning_rate": 2.98801143978809e-05, + "loss": 1.3156, + "step": 3813000 + }, + { + "epoch": 40.25, + "learning_rate": 2.987747607036947e-05, + "loss": 1.2794, + "step": 3813500 + }, + { + "epoch": 40.25, + "learning_rate": 2.9874837742858047e-05, + "loss": 1.3067, + "step": 3814000 + }, + { + "epoch": 40.26, + "learning_rate": 2.9872199415346623e-05, + "loss": 1.2771, + "step": 3814500 + }, + { + "epoch": 40.26, + "learning_rate": 2.9869561087835202e-05, + "loss": 1.2814, + "step": 3815000 + }, + { + "epoch": 40.27, + "learning_rate": 2.9866922760323777e-05, + "loss": 1.3138, + "step": 3815500 + }, + { + "epoch": 40.27, + "learning_rate": 2.986428443281235e-05, + "loss": 1.2927, + "step": 3816000 + }, + { + "epoch": 40.28, + "learning_rate": 2.986164610530093e-05, + "loss": 1.2839, + "step": 3816500 + }, + { + "epoch": 40.28, + "learning_rate": 2.9859007777789504e-05, + "loss": 1.2602, + "step": 3817000 + }, + { + "epoch": 40.29, + "learning_rate": 2.9856369450278083e-05, + "loss": 1.2687, + "step": 3817500 + }, + { + "epoch": 40.29, + "learning_rate": 2.9853731122766658e-05, + "loss": 1.3279, + "step": 3818000 + }, + { + "epoch": 40.3, + "learning_rate": 2.985109279525523e-05, + "loss": 1.2802, + "step": 3818500 + }, + { + "epoch": 40.3, + "learning_rate": 2.984845446774381e-05, + "loss": 1.2766, + "step": 3819000 + }, + { + "epoch": 40.31, + "learning_rate": 2.9845816140232385e-05, + "loss": 1.2289, + "step": 3819500 + }, + { + "epoch": 40.31, + "learning_rate": 2.9843177812720964e-05, + "loss": 1.264, + "step": 3820000 + }, + { + "epoch": 40.32, + "learning_rate": 2.984053948520954e-05, + "loss": 1.2971, + "step": 3820500 + }, + { + "epoch": 40.32, + "learning_rate": 2.983790115769811e-05, + "loss": 1.2918, + "step": 3821000 + }, + { + "epoch": 40.33, + "learning_rate": 2.983526283018669e-05, + "loss": 1.2784, + "step": 3821500 + }, + { + "epoch": 40.33, + "learning_rate": 2.9832624502675265e-05, + "loss": 1.2708, + "step": 3822000 + }, + { + "epoch": 40.34, + "learning_rate": 2.9829986175163844e-05, + "loss": 1.2726, + "step": 3822500 + }, + { + "epoch": 40.35, + "learning_rate": 2.9827347847652416e-05, + "loss": 1.3161, + "step": 3823000 + }, + { + "epoch": 40.35, + "learning_rate": 2.9824709520140992e-05, + "loss": 1.2442, + "step": 3823500 + }, + { + "epoch": 40.36, + "learning_rate": 2.982207119262957e-05, + "loss": 1.2927, + "step": 3824000 + }, + { + "epoch": 40.36, + "learning_rate": 2.9819432865118146e-05, + "loss": 1.2855, + "step": 3824500 + }, + { + "epoch": 40.37, + "learning_rate": 2.9816794537606725e-05, + "loss": 1.233, + "step": 3825000 + }, + { + "epoch": 40.37, + "learning_rate": 2.9814156210095294e-05, + "loss": 1.2776, + "step": 3825500 + }, + { + "epoch": 40.38, + "learning_rate": 2.9811517882583873e-05, + "loss": 1.3113, + "step": 3826000 + }, + { + "epoch": 40.38, + "learning_rate": 2.9808879555072448e-05, + "loss": 1.2881, + "step": 3826500 + }, + { + "epoch": 40.39, + "learning_rate": 2.9806241227561027e-05, + "loss": 1.2635, + "step": 3827000 + }, + { + "epoch": 40.39, + "learning_rate": 2.9803602900049603e-05, + "loss": 1.2669, + "step": 3827500 + }, + { + "epoch": 40.4, + "learning_rate": 2.9800964572538175e-05, + "loss": 1.3229, + "step": 3828000 + }, + { + "epoch": 40.4, + "learning_rate": 2.9798326245026754e-05, + "loss": 1.2892, + "step": 3828500 + }, + { + "epoch": 40.41, + "learning_rate": 2.979568791751533e-05, + "loss": 1.287, + "step": 3829000 + }, + { + "epoch": 40.41, + "learning_rate": 2.9793049590003908e-05, + "loss": 1.3417, + "step": 3829500 + }, + { + "epoch": 40.42, + "learning_rate": 2.9790411262492483e-05, + "loss": 1.2762, + "step": 3830000 + }, + { + "epoch": 40.42, + "learning_rate": 2.9787772934981056e-05, + "loss": 1.2892, + "step": 3830500 + }, + { + "epoch": 40.43, + "learning_rate": 2.9785134607469634e-05, + "loss": 1.2975, + "step": 3831000 + }, + { + "epoch": 40.44, + "learning_rate": 2.978249627995821e-05, + "loss": 1.2667, + "step": 3831500 + }, + { + "epoch": 40.44, + "learning_rate": 2.977985795244679e-05, + "loss": 1.3025, + "step": 3832000 + }, + { + "epoch": 40.45, + "learning_rate": 2.977721962493536e-05, + "loss": 1.3342, + "step": 3832500 + }, + { + "epoch": 40.45, + "learning_rate": 2.9774581297423936e-05, + "loss": 1.344, + "step": 3833000 + }, + { + "epoch": 40.46, + "learning_rate": 2.9771942969912515e-05, + "loss": 1.2725, + "step": 3833500 + }, + { + "epoch": 40.46, + "learning_rate": 2.976930464240109e-05, + "loss": 1.297, + "step": 3834000 + }, + { + "epoch": 40.47, + "learning_rate": 2.976666631488967e-05, + "loss": 1.2629, + "step": 3834500 + }, + { + "epoch": 40.47, + "learning_rate": 2.976402798737824e-05, + "loss": 1.2456, + "step": 3835000 + }, + { + "epoch": 40.48, + "learning_rate": 2.9761389659866817e-05, + "loss": 1.2878, + "step": 3835500 + }, + { + "epoch": 40.48, + "learning_rate": 2.9758751332355396e-05, + "loss": 1.301, + "step": 3836000 + }, + { + "epoch": 40.49, + "learning_rate": 2.975611300484397e-05, + "loss": 1.3487, + "step": 3836500 + }, + { + "epoch": 40.49, + "learning_rate": 2.975347467733255e-05, + "loss": 1.3096, + "step": 3837000 + }, + { + "epoch": 40.5, + "learning_rate": 2.975083634982112e-05, + "loss": 1.2346, + "step": 3837500 + }, + { + "epoch": 40.5, + "learning_rate": 2.9748198022309698e-05, + "loss": 1.3194, + "step": 3838000 + }, + { + "epoch": 40.51, + "learning_rate": 2.9745559694798273e-05, + "loss": 1.2866, + "step": 3838500 + }, + { + "epoch": 40.51, + "learning_rate": 2.9742921367286852e-05, + "loss": 1.2559, + "step": 3839000 + }, + { + "epoch": 40.52, + "learning_rate": 2.9740283039775428e-05, + "loss": 1.2477, + "step": 3839500 + }, + { + "epoch": 40.52, + "learning_rate": 2.9737644712264e-05, + "loss": 1.3168, + "step": 3840000 + }, + { + "epoch": 40.53, + "learning_rate": 2.973500638475258e-05, + "loss": 1.2672, + "step": 3840500 + }, + { + "epoch": 40.54, + "learning_rate": 2.9732368057241154e-05, + "loss": 1.2665, + "step": 3841000 + }, + { + "epoch": 40.54, + "learning_rate": 2.9729729729729733e-05, + "loss": 1.275, + "step": 3841500 + }, + { + "epoch": 40.55, + "learning_rate": 2.9727091402218305e-05, + "loss": 1.2878, + "step": 3842000 + }, + { + "epoch": 40.55, + "learning_rate": 2.972445307470688e-05, + "loss": 1.3002, + "step": 3842500 + }, + { + "epoch": 40.56, + "learning_rate": 2.972181474719546e-05, + "loss": 1.3255, + "step": 3843000 + }, + { + "epoch": 40.56, + "learning_rate": 2.9719176419684035e-05, + "loss": 1.2739, + "step": 3843500 + }, + { + "epoch": 40.57, + "learning_rate": 2.9716538092172614e-05, + "loss": 1.2965, + "step": 3844000 + }, + { + "epoch": 40.57, + "learning_rate": 2.9713899764661186e-05, + "loss": 1.2723, + "step": 3844500 + }, + { + "epoch": 40.58, + "learning_rate": 2.971126143714976e-05, + "loss": 1.2924, + "step": 3845000 + }, + { + "epoch": 40.58, + "learning_rate": 2.970862310963834e-05, + "loss": 1.2846, + "step": 3845500 + }, + { + "epoch": 40.59, + "learning_rate": 2.9705984782126916e-05, + "loss": 1.2413, + "step": 3846000 + }, + { + "epoch": 40.59, + "learning_rate": 2.9703346454615495e-05, + "loss": 1.2336, + "step": 3846500 + }, + { + "epoch": 40.6, + "learning_rate": 2.9700708127104067e-05, + "loss": 1.296, + "step": 3847000 + }, + { + "epoch": 40.6, + "learning_rate": 2.9698069799592642e-05, + "loss": 1.3092, + "step": 3847500 + }, + { + "epoch": 40.61, + "learning_rate": 2.969543147208122e-05, + "loss": 1.2596, + "step": 3848000 + }, + { + "epoch": 40.61, + "learning_rate": 2.9692793144569797e-05, + "loss": 1.3005, + "step": 3848500 + }, + { + "epoch": 40.62, + "learning_rate": 2.9690154817058376e-05, + "loss": 1.2493, + "step": 3849000 + }, + { + "epoch": 40.62, + "learning_rate": 2.9687516489546944e-05, + "loss": 1.2205, + "step": 3849500 + }, + { + "epoch": 40.63, + "learning_rate": 2.9684878162035523e-05, + "loss": 1.2676, + "step": 3850000 + }, + { + "epoch": 40.64, + "learning_rate": 2.96822398345241e-05, + "loss": 1.2452, + "step": 3850500 + }, + { + "epoch": 40.64, + "learning_rate": 2.9679601507012678e-05, + "loss": 1.2786, + "step": 3851000 + }, + { + "epoch": 40.65, + "learning_rate": 2.967696317950125e-05, + "loss": 1.2345, + "step": 3851500 + }, + { + "epoch": 40.65, + "learning_rate": 2.9674324851989825e-05, + "loss": 1.2567, + "step": 3852000 + }, + { + "epoch": 40.66, + "learning_rate": 2.9671686524478404e-05, + "loss": 1.3184, + "step": 3852500 + }, + { + "epoch": 40.66, + "learning_rate": 2.966904819696698e-05, + "loss": 1.2621, + "step": 3853000 + }, + { + "epoch": 40.67, + "learning_rate": 2.966640986945556e-05, + "loss": 1.2586, + "step": 3853500 + }, + { + "epoch": 40.67, + "learning_rate": 2.966377154194413e-05, + "loss": 1.2999, + "step": 3854000 + }, + { + "epoch": 40.68, + "learning_rate": 2.9661133214432706e-05, + "loss": 1.3012, + "step": 3854500 + }, + { + "epoch": 40.68, + "learning_rate": 2.9658494886921285e-05, + "loss": 1.3094, + "step": 3855000 + }, + { + "epoch": 40.69, + "learning_rate": 2.965585655940986e-05, + "loss": 1.2551, + "step": 3855500 + }, + { + "epoch": 40.69, + "learning_rate": 2.965321823189844e-05, + "loss": 1.2835, + "step": 3856000 + }, + { + "epoch": 40.7, + "learning_rate": 2.965057990438701e-05, + "loss": 1.2885, + "step": 3856500 + }, + { + "epoch": 40.7, + "learning_rate": 2.9647941576875587e-05, + "loss": 1.2825, + "step": 3857000 + }, + { + "epoch": 40.71, + "learning_rate": 2.9645303249364166e-05, + "loss": 1.3136, + "step": 3857500 + }, + { + "epoch": 40.71, + "learning_rate": 2.964266492185274e-05, + "loss": 1.2414, + "step": 3858000 + }, + { + "epoch": 40.72, + "learning_rate": 2.964002659434132e-05, + "loss": 1.2583, + "step": 3858500 + }, + { + "epoch": 40.73, + "learning_rate": 2.9637388266829892e-05, + "loss": 1.2388, + "step": 3859000 + }, + { + "epoch": 40.73, + "learning_rate": 2.9634749939318468e-05, + "loss": 1.322, + "step": 3859500 + }, + { + "epoch": 40.74, + "learning_rate": 2.9632111611807047e-05, + "loss": 1.299, + "step": 3860000 + }, + { + "epoch": 40.74, + "learning_rate": 2.9629473284295622e-05, + "loss": 1.2891, + "step": 3860500 + }, + { + "epoch": 40.75, + "learning_rate": 2.9626834956784194e-05, + "loss": 1.3002, + "step": 3861000 + }, + { + "epoch": 40.75, + "learning_rate": 2.962419662927277e-05, + "loss": 1.2993, + "step": 3861500 + }, + { + "epoch": 40.76, + "learning_rate": 2.962155830176135e-05, + "loss": 1.3152, + "step": 3862000 + }, + { + "epoch": 40.76, + "learning_rate": 2.9618919974249927e-05, + "loss": 1.2641, + "step": 3862500 + }, + { + "epoch": 40.77, + "learning_rate": 2.9616281646738503e-05, + "loss": 1.2902, + "step": 3863000 + }, + { + "epoch": 40.77, + "learning_rate": 2.9613643319227075e-05, + "loss": 1.3493, + "step": 3863500 + }, + { + "epoch": 40.78, + "learning_rate": 2.961100499171565e-05, + "loss": 1.2589, + "step": 3864000 + }, + { + "epoch": 40.78, + "learning_rate": 2.960836666420423e-05, + "loss": 1.2822, + "step": 3864500 + }, + { + "epoch": 40.79, + "learning_rate": 2.9605728336692805e-05, + "loss": 1.3098, + "step": 3865000 + }, + { + "epoch": 40.79, + "learning_rate": 2.9603090009181384e-05, + "loss": 1.3106, + "step": 3865500 + }, + { + "epoch": 40.8, + "learning_rate": 2.9600451681669956e-05, + "loss": 1.2315, + "step": 3866000 + }, + { + "epoch": 40.8, + "learning_rate": 2.959781335415853e-05, + "loss": 1.2861, + "step": 3866500 + }, + { + "epoch": 40.81, + "learning_rate": 2.959517502664711e-05, + "loss": 1.2881, + "step": 3867000 + }, + { + "epoch": 40.81, + "learning_rate": 2.9592536699135686e-05, + "loss": 1.3084, + "step": 3867500 + }, + { + "epoch": 40.82, + "learning_rate": 2.9589898371624264e-05, + "loss": 1.2859, + "step": 3868000 + }, + { + "epoch": 40.83, + "learning_rate": 2.9587260044112837e-05, + "loss": 1.3075, + "step": 3868500 + }, + { + "epoch": 40.83, + "learning_rate": 2.9584621716601412e-05, + "loss": 1.3354, + "step": 3869000 + }, + { + "epoch": 40.84, + "learning_rate": 2.958198338908999e-05, + "loss": 1.307, + "step": 3869500 + }, + { + "epoch": 40.84, + "learning_rate": 2.9579345061578566e-05, + "loss": 1.3242, + "step": 3870000 + }, + { + "epoch": 40.85, + "learning_rate": 2.957670673406714e-05, + "loss": 1.2135, + "step": 3870500 + }, + { + "epoch": 40.85, + "learning_rate": 2.9574068406555717e-05, + "loss": 1.2997, + "step": 3871000 + }, + { + "epoch": 40.86, + "learning_rate": 2.9571430079044293e-05, + "loss": 1.2701, + "step": 3871500 + }, + { + "epoch": 40.86, + "learning_rate": 2.9568791751532872e-05, + "loss": 1.2237, + "step": 3872000 + }, + { + "epoch": 40.87, + "learning_rate": 2.9566153424021447e-05, + "loss": 1.2509, + "step": 3872500 + }, + { + "epoch": 40.87, + "learning_rate": 2.956351509651002e-05, + "loss": 1.3115, + "step": 3873000 + }, + { + "epoch": 40.88, + "learning_rate": 2.9560876768998598e-05, + "loss": 1.281, + "step": 3873500 + }, + { + "epoch": 40.88, + "learning_rate": 2.9558238441487174e-05, + "loss": 1.2742, + "step": 3874000 + }, + { + "epoch": 40.89, + "learning_rate": 2.9555600113975753e-05, + "loss": 1.216, + "step": 3874500 + }, + { + "epoch": 40.89, + "learning_rate": 2.9552961786464328e-05, + "loss": 1.2375, + "step": 3875000 + }, + { + "epoch": 40.9, + "learning_rate": 2.95503234589529e-05, + "loss": 1.3085, + "step": 3875500 + }, + { + "epoch": 40.9, + "learning_rate": 2.9547685131441476e-05, + "loss": 1.2771, + "step": 3876000 + }, + { + "epoch": 40.91, + "learning_rate": 2.9545046803930055e-05, + "loss": 1.3081, + "step": 3876500 + }, + { + "epoch": 40.92, + "learning_rate": 2.954240847641863e-05, + "loss": 1.2853, + "step": 3877000 + }, + { + "epoch": 40.92, + "learning_rate": 2.953977014890721e-05, + "loss": 1.318, + "step": 3877500 + }, + { + "epoch": 40.93, + "learning_rate": 2.953713182139578e-05, + "loss": 1.2957, + "step": 3878000 + }, + { + "epoch": 40.93, + "learning_rate": 2.9534493493884356e-05, + "loss": 1.2807, + "step": 3878500 + }, + { + "epoch": 40.94, + "learning_rate": 2.9531855166372935e-05, + "loss": 1.3099, + "step": 3879000 + }, + { + "epoch": 40.94, + "learning_rate": 2.952921683886151e-05, + "loss": 1.3132, + "step": 3879500 + }, + { + "epoch": 40.95, + "learning_rate": 2.9526578511350083e-05, + "loss": 1.2709, + "step": 3880000 + }, + { + "epoch": 40.95, + "learning_rate": 2.9523940183838662e-05, + "loss": 1.2494, + "step": 3880500 + }, + { + "epoch": 40.96, + "learning_rate": 2.9521301856327237e-05, + "loss": 1.222, + "step": 3881000 + }, + { + "epoch": 40.96, + "learning_rate": 2.9518663528815816e-05, + "loss": 1.2866, + "step": 3881500 + }, + { + "epoch": 40.97, + "learning_rate": 2.951602520130439e-05, + "loss": 1.2742, + "step": 3882000 + }, + { + "epoch": 40.97, + "learning_rate": 2.9513386873792964e-05, + "loss": 1.2825, + "step": 3882500 + }, + { + "epoch": 40.98, + "learning_rate": 2.9510748546281543e-05, + "loss": 1.2741, + "step": 3883000 + }, + { + "epoch": 40.98, + "learning_rate": 2.9508110218770118e-05, + "loss": 1.2877, + "step": 3883500 + }, + { + "epoch": 40.99, + "learning_rate": 2.9505471891258697e-05, + "loss": 1.3264, + "step": 3884000 + }, + { + "epoch": 40.99, + "learning_rate": 2.9502833563747273e-05, + "loss": 1.2687, + "step": 3884500 + }, + { + "epoch": 41.0, + "learning_rate": 2.9500195236235845e-05, + "loss": 1.2987, + "step": 3885000 + }, + { + "epoch": 41.0, + "learning_rate": 2.9497556908724423e-05, + "loss": 1.2782, + "step": 3885500 + }, + { + "epoch": 41.01, + "learning_rate": 2.9494918581213e-05, + "loss": 1.2783, + "step": 3886000 + }, + { + "epoch": 41.02, + "learning_rate": 2.9492280253701578e-05, + "loss": 1.2871, + "step": 3886500 + }, + { + "epoch": 41.02, + "learning_rate": 2.9489641926190153e-05, + "loss": 1.2709, + "step": 3887000 + }, + { + "epoch": 41.03, + "learning_rate": 2.9487003598678725e-05, + "loss": 1.2787, + "step": 3887500 + }, + { + "epoch": 41.03, + "learning_rate": 2.94843652711673e-05, + "loss": 1.2966, + "step": 3888000 + }, + { + "epoch": 41.04, + "learning_rate": 2.948172694365588e-05, + "loss": 1.2757, + "step": 3888500 + }, + { + "epoch": 41.04, + "learning_rate": 2.9479088616144455e-05, + "loss": 1.3147, + "step": 3889000 + }, + { + "epoch": 41.05, + "learning_rate": 2.9476450288633027e-05, + "loss": 1.2713, + "step": 3889500 + }, + { + "epoch": 41.05, + "learning_rate": 2.9473811961121606e-05, + "loss": 1.2595, + "step": 3890000 + }, + { + "epoch": 41.06, + "learning_rate": 2.9471173633610182e-05, + "loss": 1.2561, + "step": 3890500 + }, + { + "epoch": 41.06, + "learning_rate": 2.946853530609876e-05, + "loss": 1.22, + "step": 3891000 + }, + { + "epoch": 41.07, + "learning_rate": 2.9465896978587336e-05, + "loss": 1.2484, + "step": 3891500 + }, + { + "epoch": 41.07, + "learning_rate": 2.9463258651075908e-05, + "loss": 1.2289, + "step": 3892000 + }, + { + "epoch": 41.08, + "learning_rate": 2.9460620323564487e-05, + "loss": 1.2898, + "step": 3892500 + }, + { + "epoch": 41.08, + "learning_rate": 2.9457981996053063e-05, + "loss": 1.3129, + "step": 3893000 + }, + { + "epoch": 41.09, + "learning_rate": 2.945534366854164e-05, + "loss": 1.2559, + "step": 3893500 + }, + { + "epoch": 41.09, + "learning_rate": 2.9452705341030217e-05, + "loss": 1.2632, + "step": 3894000 + }, + { + "epoch": 41.1, + "learning_rate": 2.945006701351879e-05, + "loss": 1.2475, + "step": 3894500 + }, + { + "epoch": 41.11, + "learning_rate": 2.9447428686007368e-05, + "loss": 1.2604, + "step": 3895000 + }, + { + "epoch": 41.11, + "learning_rate": 2.9444790358495943e-05, + "loss": 1.3092, + "step": 3895500 + }, + { + "epoch": 41.12, + "learning_rate": 2.9442152030984522e-05, + "loss": 1.2973, + "step": 3896000 + }, + { + "epoch": 41.12, + "learning_rate": 2.9439513703473098e-05, + "loss": 1.2917, + "step": 3896500 + }, + { + "epoch": 41.13, + "learning_rate": 2.943687537596167e-05, + "loss": 1.3229, + "step": 3897000 + }, + { + "epoch": 41.13, + "learning_rate": 2.943423704845025e-05, + "loss": 1.3144, + "step": 3897500 + }, + { + "epoch": 41.14, + "learning_rate": 2.9431598720938824e-05, + "loss": 1.3009, + "step": 3898000 + }, + { + "epoch": 41.14, + "learning_rate": 2.9428960393427403e-05, + "loss": 1.2182, + "step": 3898500 + }, + { + "epoch": 41.15, + "learning_rate": 2.9426322065915972e-05, + "loss": 1.2465, + "step": 3899000 + }, + { + "epoch": 41.15, + "learning_rate": 2.942368373840455e-05, + "loss": 1.2324, + "step": 3899500 + }, + { + "epoch": 41.16, + "learning_rate": 2.9421045410893126e-05, + "loss": 1.2768, + "step": 3900000 + }, + { + "epoch": 41.16, + "learning_rate": 2.9418407083381705e-05, + "loss": 1.2958, + "step": 3900500 + }, + { + "epoch": 41.17, + "learning_rate": 2.941576875587028e-05, + "loss": 1.2084, + "step": 3901000 + }, + { + "epoch": 41.17, + "learning_rate": 2.9413130428358853e-05, + "loss": 1.2451, + "step": 3901500 + }, + { + "epoch": 41.18, + "learning_rate": 2.941049210084743e-05, + "loss": 1.2615, + "step": 3902000 + }, + { + "epoch": 41.18, + "learning_rate": 2.9407853773336007e-05, + "loss": 1.2457, + "step": 3902500 + }, + { + "epoch": 41.19, + "learning_rate": 2.9405215445824586e-05, + "loss": 1.2258, + "step": 3903000 + }, + { + "epoch": 41.19, + "learning_rate": 2.940257711831316e-05, + "loss": 1.3397, + "step": 3903500 + }, + { + "epoch": 41.2, + "learning_rate": 2.9399938790801733e-05, + "loss": 1.2863, + "step": 3904000 + }, + { + "epoch": 41.21, + "learning_rate": 2.9397300463290312e-05, + "loss": 1.2657, + "step": 3904500 + }, + { + "epoch": 41.21, + "learning_rate": 2.9394662135778888e-05, + "loss": 1.2744, + "step": 3905000 + }, + { + "epoch": 41.22, + "learning_rate": 2.9392023808267467e-05, + "loss": 1.2772, + "step": 3905500 + }, + { + "epoch": 41.22, + "learning_rate": 2.9389385480756042e-05, + "loss": 1.2587, + "step": 3906000 + }, + { + "epoch": 41.23, + "learning_rate": 2.9386747153244614e-05, + "loss": 1.2857, + "step": 3906500 + }, + { + "epoch": 41.23, + "learning_rate": 2.9384108825733193e-05, + "loss": 1.2379, + "step": 3907000 + }, + { + "epoch": 41.24, + "learning_rate": 2.938147049822177e-05, + "loss": 1.2925, + "step": 3907500 + }, + { + "epoch": 41.24, + "learning_rate": 2.9378832170710348e-05, + "loss": 1.3325, + "step": 3908000 + }, + { + "epoch": 41.25, + "learning_rate": 2.937619384319892e-05, + "loss": 1.3342, + "step": 3908500 + }, + { + "epoch": 41.25, + "learning_rate": 2.9373555515687495e-05, + "loss": 1.2762, + "step": 3909000 + }, + { + "epoch": 41.26, + "learning_rate": 2.9370917188176074e-05, + "loss": 1.2631, + "step": 3909500 + }, + { + "epoch": 41.26, + "learning_rate": 2.936827886066465e-05, + "loss": 1.3033, + "step": 3910000 + }, + { + "epoch": 41.27, + "learning_rate": 2.936564053315323e-05, + "loss": 1.2892, + "step": 3910500 + }, + { + "epoch": 41.27, + "learning_rate": 2.9363002205641797e-05, + "loss": 1.2456, + "step": 3911000 + }, + { + "epoch": 41.28, + "learning_rate": 2.9360363878130376e-05, + "loss": 1.1919, + "step": 3911500 + }, + { + "epoch": 41.28, + "learning_rate": 2.935772555061895e-05, + "loss": 1.2703, + "step": 3912000 + }, + { + "epoch": 41.29, + "learning_rate": 2.935508722310753e-05, + "loss": 1.3329, + "step": 3912500 + }, + { + "epoch": 41.3, + "learning_rate": 2.9352448895596106e-05, + "loss": 1.2833, + "step": 3913000 + }, + { + "epoch": 41.3, + "learning_rate": 2.9349810568084678e-05, + "loss": 1.2743, + "step": 3913500 + }, + { + "epoch": 41.31, + "learning_rate": 2.9347172240573257e-05, + "loss": 1.3183, + "step": 3914000 + }, + { + "epoch": 41.31, + "learning_rate": 2.9344533913061832e-05, + "loss": 1.2807, + "step": 3914500 + }, + { + "epoch": 41.32, + "learning_rate": 2.934189558555041e-05, + "loss": 1.2692, + "step": 3915000 + }, + { + "epoch": 41.32, + "learning_rate": 2.9339257258038987e-05, + "loss": 1.3309, + "step": 3915500 + }, + { + "epoch": 41.33, + "learning_rate": 2.933661893052756e-05, + "loss": 1.2836, + "step": 3916000 + }, + { + "epoch": 41.33, + "learning_rate": 2.9333980603016138e-05, + "loss": 1.3107, + "step": 3916500 + }, + { + "epoch": 41.34, + "learning_rate": 2.9331342275504713e-05, + "loss": 1.235, + "step": 3917000 + }, + { + "epoch": 41.34, + "learning_rate": 2.9328703947993292e-05, + "loss": 1.2778, + "step": 3917500 + }, + { + "epoch": 41.35, + "learning_rate": 2.9326065620481864e-05, + "loss": 1.2726, + "step": 3918000 + }, + { + "epoch": 41.35, + "learning_rate": 2.932342729297044e-05, + "loss": 1.304, + "step": 3918500 + }, + { + "epoch": 41.36, + "learning_rate": 2.932078896545902e-05, + "loss": 1.2657, + "step": 3919000 + }, + { + "epoch": 41.36, + "learning_rate": 2.9318150637947594e-05, + "loss": 1.2901, + "step": 3919500 + }, + { + "epoch": 41.37, + "learning_rate": 2.9315512310436173e-05, + "loss": 1.2499, + "step": 3920000 + }, + { + "epoch": 41.37, + "learning_rate": 2.9312873982924745e-05, + "loss": 1.2927, + "step": 3920500 + }, + { + "epoch": 41.38, + "learning_rate": 2.931023565541332e-05, + "loss": 1.311, + "step": 3921000 + }, + { + "epoch": 41.38, + "learning_rate": 2.93075973279019e-05, + "loss": 1.2014, + "step": 3921500 + }, + { + "epoch": 41.39, + "learning_rate": 2.9304959000390475e-05, + "loss": 1.2747, + "step": 3922000 + }, + { + "epoch": 41.4, + "learning_rate": 2.9302320672879054e-05, + "loss": 1.24, + "step": 3922500 + }, + { + "epoch": 41.4, + "learning_rate": 2.9299682345367622e-05, + "loss": 1.2629, + "step": 3923000 + }, + { + "epoch": 41.41, + "learning_rate": 2.92970440178562e-05, + "loss": 1.2601, + "step": 3923500 + }, + { + "epoch": 41.41, + "learning_rate": 2.9294405690344777e-05, + "loss": 1.3351, + "step": 3924000 + }, + { + "epoch": 41.42, + "learning_rate": 2.9291767362833356e-05, + "loss": 1.308, + "step": 3924500 + }, + { + "epoch": 41.42, + "learning_rate": 2.9289129035321934e-05, + "loss": 1.3393, + "step": 3925000 + }, + { + "epoch": 41.43, + "learning_rate": 2.9286490707810503e-05, + "loss": 1.3037, + "step": 3925500 + }, + { + "epoch": 41.43, + "learning_rate": 2.9283852380299082e-05, + "loss": 1.2267, + "step": 3926000 + }, + { + "epoch": 41.44, + "learning_rate": 2.9281214052787657e-05, + "loss": 1.2495, + "step": 3926500 + }, + { + "epoch": 41.44, + "learning_rate": 2.9278575725276236e-05, + "loss": 1.2476, + "step": 3927000 + }, + { + "epoch": 41.45, + "learning_rate": 2.927593739776481e-05, + "loss": 1.3104, + "step": 3927500 + }, + { + "epoch": 41.45, + "learning_rate": 2.9273299070253384e-05, + "loss": 1.2962, + "step": 3928000 + }, + { + "epoch": 41.46, + "learning_rate": 2.9270660742741963e-05, + "loss": 1.3519, + "step": 3928500 + }, + { + "epoch": 41.46, + "learning_rate": 2.9268022415230538e-05, + "loss": 1.2402, + "step": 3929000 + }, + { + "epoch": 41.47, + "learning_rate": 2.9265384087719117e-05, + "loss": 1.2622, + "step": 3929500 + }, + { + "epoch": 41.47, + "learning_rate": 2.926274576020769e-05, + "loss": 1.2771, + "step": 3930000 + }, + { + "epoch": 41.48, + "learning_rate": 2.9260107432696265e-05, + "loss": 1.2216, + "step": 3930500 + }, + { + "epoch": 41.49, + "learning_rate": 2.9257469105184844e-05, + "loss": 1.3431, + "step": 3931000 + }, + { + "epoch": 41.49, + "learning_rate": 2.925483077767342e-05, + "loss": 1.2765, + "step": 3931500 + }, + { + "epoch": 41.5, + "learning_rate": 2.9252192450161998e-05, + "loss": 1.314, + "step": 3932000 + }, + { + "epoch": 41.5, + "learning_rate": 2.924955412265057e-05, + "loss": 1.3019, + "step": 3932500 + }, + { + "epoch": 41.51, + "learning_rate": 2.9246915795139146e-05, + "loss": 1.2738, + "step": 3933000 + }, + { + "epoch": 41.51, + "learning_rate": 2.9244277467627724e-05, + "loss": 1.3521, + "step": 3933500 + }, + { + "epoch": 41.52, + "learning_rate": 2.92416391401163e-05, + "loss": 1.3333, + "step": 3934000 + }, + { + "epoch": 41.52, + "learning_rate": 2.923900081260488e-05, + "loss": 1.2845, + "step": 3934500 + }, + { + "epoch": 41.53, + "learning_rate": 2.9236362485093448e-05, + "loss": 1.2674, + "step": 3935000 + }, + { + "epoch": 41.53, + "learning_rate": 2.9233724157582026e-05, + "loss": 1.2631, + "step": 3935500 + }, + { + "epoch": 41.54, + "learning_rate": 2.9231085830070605e-05, + "loss": 1.2796, + "step": 3936000 + }, + { + "epoch": 41.54, + "learning_rate": 2.922844750255918e-05, + "loss": 1.3116, + "step": 3936500 + }, + { + "epoch": 41.55, + "learning_rate": 2.9225809175047753e-05, + "loss": 1.315, + "step": 3937000 + }, + { + "epoch": 41.55, + "learning_rate": 2.922317084753633e-05, + "loss": 1.2797, + "step": 3937500 + }, + { + "epoch": 41.56, + "learning_rate": 2.9220532520024907e-05, + "loss": 1.2644, + "step": 3938000 + }, + { + "epoch": 41.56, + "learning_rate": 2.9217894192513483e-05, + "loss": 1.3176, + "step": 3938500 + }, + { + "epoch": 41.57, + "learning_rate": 2.921525586500206e-05, + "loss": 1.2216, + "step": 3939000 + }, + { + "epoch": 41.57, + "learning_rate": 2.9212617537490634e-05, + "loss": 1.2379, + "step": 3939500 + }, + { + "epoch": 41.58, + "learning_rate": 2.920997920997921e-05, + "loss": 1.3044, + "step": 3940000 + }, + { + "epoch": 41.59, + "learning_rate": 2.9207340882467788e-05, + "loss": 1.2812, + "step": 3940500 + }, + { + "epoch": 41.59, + "learning_rate": 2.9204702554956364e-05, + "loss": 1.2792, + "step": 3941000 + }, + { + "epoch": 41.6, + "learning_rate": 2.9202064227444942e-05, + "loss": 1.2953, + "step": 3941500 + }, + { + "epoch": 41.6, + "learning_rate": 2.9199425899933515e-05, + "loss": 1.249, + "step": 3942000 + }, + { + "epoch": 41.61, + "learning_rate": 2.919678757242209e-05, + "loss": 1.3042, + "step": 3942500 + }, + { + "epoch": 41.61, + "learning_rate": 2.919414924491067e-05, + "loss": 1.2485, + "step": 3943000 + }, + { + "epoch": 41.62, + "learning_rate": 2.9191510917399244e-05, + "loss": 1.3151, + "step": 3943500 + }, + { + "epoch": 41.62, + "learning_rate": 2.9188872589887823e-05, + "loss": 1.2435, + "step": 3944000 + }, + { + "epoch": 41.63, + "learning_rate": 2.9186234262376395e-05, + "loss": 1.1918, + "step": 3944500 + }, + { + "epoch": 41.63, + "learning_rate": 2.918359593486497e-05, + "loss": 1.2874, + "step": 3945000 + }, + { + "epoch": 41.64, + "learning_rate": 2.918095760735355e-05, + "loss": 1.2233, + "step": 3945500 + }, + { + "epoch": 41.64, + "learning_rate": 2.9178319279842125e-05, + "loss": 1.2975, + "step": 3946000 + }, + { + "epoch": 41.65, + "learning_rate": 2.9175680952330697e-05, + "loss": 1.2814, + "step": 3946500 + }, + { + "epoch": 41.65, + "learning_rate": 2.9173042624819276e-05, + "loss": 1.2976, + "step": 3947000 + }, + { + "epoch": 41.66, + "learning_rate": 2.917040429730785e-05, + "loss": 1.263, + "step": 3947500 + }, + { + "epoch": 41.66, + "learning_rate": 2.916776596979643e-05, + "loss": 1.2902, + "step": 3948000 + }, + { + "epoch": 41.67, + "learning_rate": 2.9165127642285006e-05, + "loss": 1.2235, + "step": 3948500 + }, + { + "epoch": 41.68, + "learning_rate": 2.9162489314773578e-05, + "loss": 1.2764, + "step": 3949000 + }, + { + "epoch": 41.68, + "learning_rate": 2.9159850987262154e-05, + "loss": 1.305, + "step": 3949500 + }, + { + "epoch": 41.69, + "learning_rate": 2.9157212659750732e-05, + "loss": 1.3046, + "step": 3950000 + }, + { + "epoch": 41.69, + "learning_rate": 2.9154574332239308e-05, + "loss": 1.2743, + "step": 3950500 + }, + { + "epoch": 41.7, + "learning_rate": 2.9151936004727887e-05, + "loss": 1.3085, + "step": 3951000 + }, + { + "epoch": 41.7, + "learning_rate": 2.914929767721646e-05, + "loss": 1.3261, + "step": 3951500 + }, + { + "epoch": 41.71, + "learning_rate": 2.9146659349705034e-05, + "loss": 1.3159, + "step": 3952000 + }, + { + "epoch": 41.71, + "learning_rate": 2.9144021022193613e-05, + "loss": 1.2358, + "step": 3952500 + }, + { + "epoch": 41.72, + "learning_rate": 2.914138269468219e-05, + "loss": 1.2988, + "step": 3953000 + }, + { + "epoch": 41.72, + "learning_rate": 2.9138744367170768e-05, + "loss": 1.2806, + "step": 3953500 + }, + { + "epoch": 41.73, + "learning_rate": 2.913610603965934e-05, + "loss": 1.2541, + "step": 3954000 + }, + { + "epoch": 41.73, + "learning_rate": 2.9133467712147915e-05, + "loss": 1.267, + "step": 3954500 + }, + { + "epoch": 41.74, + "learning_rate": 2.9130829384636494e-05, + "loss": 1.2859, + "step": 3955000 + }, + { + "epoch": 41.74, + "learning_rate": 2.912819105712507e-05, + "loss": 1.2896, + "step": 3955500 + }, + { + "epoch": 41.75, + "learning_rate": 2.9125552729613642e-05, + "loss": 1.2515, + "step": 3956000 + }, + { + "epoch": 41.75, + "learning_rate": 2.912291440210222e-05, + "loss": 1.3311, + "step": 3956500 + }, + { + "epoch": 41.76, + "learning_rate": 2.9120276074590796e-05, + "loss": 1.2552, + "step": 3957000 + }, + { + "epoch": 41.76, + "learning_rate": 2.9117637747079375e-05, + "loss": 1.2881, + "step": 3957500 + }, + { + "epoch": 41.77, + "learning_rate": 2.911499941956795e-05, + "loss": 1.2998, + "step": 3958000 + }, + { + "epoch": 41.78, + "learning_rate": 2.9112361092056523e-05, + "loss": 1.2806, + "step": 3958500 + }, + { + "epoch": 41.78, + "learning_rate": 2.91097227645451e-05, + "loss": 1.3078, + "step": 3959000 + }, + { + "epoch": 41.79, + "learning_rate": 2.9107084437033677e-05, + "loss": 1.2879, + "step": 3959500 + }, + { + "epoch": 41.79, + "learning_rate": 2.9104446109522256e-05, + "loss": 1.3182, + "step": 3960000 + }, + { + "epoch": 41.8, + "learning_rate": 2.910180778201083e-05, + "loss": 1.2441, + "step": 3960500 + }, + { + "epoch": 41.8, + "learning_rate": 2.9099169454499403e-05, + "loss": 1.2734, + "step": 3961000 + }, + { + "epoch": 41.81, + "learning_rate": 2.909653112698798e-05, + "loss": 1.2831, + "step": 3961500 + }, + { + "epoch": 41.81, + "learning_rate": 2.9093892799476558e-05, + "loss": 1.2526, + "step": 3962000 + }, + { + "epoch": 41.82, + "learning_rate": 2.9091254471965133e-05, + "loss": 1.3214, + "step": 3962500 + }, + { + "epoch": 41.82, + "learning_rate": 2.9088616144453712e-05, + "loss": 1.3016, + "step": 3963000 + }, + { + "epoch": 41.83, + "learning_rate": 2.9085977816942284e-05, + "loss": 1.2588, + "step": 3963500 + }, + { + "epoch": 41.83, + "learning_rate": 2.908333948943086e-05, + "loss": 1.214, + "step": 3964000 + }, + { + "epoch": 41.84, + "learning_rate": 2.908070116191944e-05, + "loss": 1.3237, + "step": 3964500 + }, + { + "epoch": 41.84, + "learning_rate": 2.9078062834408014e-05, + "loss": 1.2564, + "step": 3965000 + }, + { + "epoch": 41.85, + "learning_rate": 2.9075424506896586e-05, + "loss": 1.2834, + "step": 3965500 + }, + { + "epoch": 41.85, + "learning_rate": 2.9072786179385165e-05, + "loss": 1.2958, + "step": 3966000 + }, + { + "epoch": 41.86, + "learning_rate": 2.907014785187374e-05, + "loss": 1.268, + "step": 3966500 + }, + { + "epoch": 41.86, + "learning_rate": 2.906750952436232e-05, + "loss": 1.2976, + "step": 3967000 + }, + { + "epoch": 41.87, + "learning_rate": 2.9064871196850895e-05, + "loss": 1.2896, + "step": 3967500 + }, + { + "epoch": 41.88, + "learning_rate": 2.9062232869339467e-05, + "loss": 1.2822, + "step": 3968000 + }, + { + "epoch": 41.88, + "learning_rate": 2.9059594541828046e-05, + "loss": 1.3549, + "step": 3968500 + }, + { + "epoch": 41.89, + "learning_rate": 2.905695621431662e-05, + "loss": 1.2775, + "step": 3969000 + }, + { + "epoch": 41.89, + "learning_rate": 2.90543178868052e-05, + "loss": 1.3109, + "step": 3969500 + }, + { + "epoch": 41.9, + "learning_rate": 2.9051679559293776e-05, + "loss": 1.2612, + "step": 3970000 + }, + { + "epoch": 41.9, + "learning_rate": 2.9049041231782348e-05, + "loss": 1.3096, + "step": 3970500 + }, + { + "epoch": 41.91, + "learning_rate": 2.9046402904270927e-05, + "loss": 1.2832, + "step": 3971000 + }, + { + "epoch": 41.91, + "learning_rate": 2.9043764576759502e-05, + "loss": 1.2875, + "step": 3971500 + }, + { + "epoch": 41.92, + "learning_rate": 2.904112624924808e-05, + "loss": 1.2729, + "step": 3972000 + }, + { + "epoch": 41.92, + "learning_rate": 2.9038487921736657e-05, + "loss": 1.2272, + "step": 3972500 + }, + { + "epoch": 41.93, + "learning_rate": 2.903584959422523e-05, + "loss": 1.3132, + "step": 3973000 + }, + { + "epoch": 41.93, + "learning_rate": 2.9033211266713804e-05, + "loss": 1.247, + "step": 3973500 + }, + { + "epoch": 41.94, + "learning_rate": 2.9030572939202383e-05, + "loss": 1.2938, + "step": 3974000 + }, + { + "epoch": 41.94, + "learning_rate": 2.902793461169096e-05, + "loss": 1.2442, + "step": 3974500 + }, + { + "epoch": 41.95, + "learning_rate": 2.902529628417953e-05, + "loss": 1.2343, + "step": 3975000 + }, + { + "epoch": 41.95, + "learning_rate": 2.902265795666811e-05, + "loss": 1.2513, + "step": 3975500 + }, + { + "epoch": 41.96, + "learning_rate": 2.9020019629156685e-05, + "loss": 1.3541, + "step": 3976000 + }, + { + "epoch": 41.97, + "learning_rate": 2.9017381301645264e-05, + "loss": 1.3127, + "step": 3976500 + }, + { + "epoch": 41.97, + "learning_rate": 2.901474297413384e-05, + "loss": 1.3185, + "step": 3977000 + }, + { + "epoch": 41.98, + "learning_rate": 2.901210464662241e-05, + "loss": 1.3199, + "step": 3977500 + }, + { + "epoch": 41.98, + "learning_rate": 2.900946631911099e-05, + "loss": 1.2896, + "step": 3978000 + }, + { + "epoch": 41.99, + "learning_rate": 2.9006827991599566e-05, + "loss": 1.2315, + "step": 3978500 + }, + { + "epoch": 41.99, + "learning_rate": 2.9004189664088145e-05, + "loss": 1.286, + "step": 3979000 + }, + { + "epoch": 42.0, + "learning_rate": 2.900155133657672e-05, + "loss": 1.2375, + "step": 3979500 + }, + { + "epoch": 42.0, + "learning_rate": 2.8998913009065292e-05, + "loss": 1.256, + "step": 3980000 + }, + { + "epoch": 42.01, + "learning_rate": 2.899627468155387e-05, + "loss": 1.2418, + "step": 3980500 + }, + { + "epoch": 42.01, + "learning_rate": 2.8993636354042447e-05, + "loss": 1.2534, + "step": 3981000 + }, + { + "epoch": 42.02, + "learning_rate": 2.8990998026531025e-05, + "loss": 1.2367, + "step": 3981500 + }, + { + "epoch": 42.02, + "learning_rate": 2.89883596990196e-05, + "loss": 1.2877, + "step": 3982000 + }, + { + "epoch": 42.03, + "learning_rate": 2.8985721371508173e-05, + "loss": 1.2967, + "step": 3982500 + }, + { + "epoch": 42.03, + "learning_rate": 2.8983083043996752e-05, + "loss": 1.2861, + "step": 3983000 + }, + { + "epoch": 42.04, + "learning_rate": 2.8980444716485327e-05, + "loss": 1.2334, + "step": 3983500 + }, + { + "epoch": 42.04, + "learning_rate": 2.8977806388973906e-05, + "loss": 1.2337, + "step": 3984000 + }, + { + "epoch": 42.05, + "learning_rate": 2.8975168061462475e-05, + "loss": 1.2845, + "step": 3984500 + }, + { + "epoch": 42.05, + "learning_rate": 2.8972529733951054e-05, + "loss": 1.239, + "step": 3985000 + }, + { + "epoch": 42.06, + "learning_rate": 2.896989140643963e-05, + "loss": 1.2662, + "step": 3985500 + }, + { + "epoch": 42.07, + "learning_rate": 2.8967253078928208e-05, + "loss": 1.2181, + "step": 3986000 + }, + { + "epoch": 42.07, + "learning_rate": 2.8964614751416784e-05, + "loss": 1.3112, + "step": 3986500 + }, + { + "epoch": 42.08, + "learning_rate": 2.8961976423905356e-05, + "loss": 1.2161, + "step": 3987000 + }, + { + "epoch": 42.08, + "learning_rate": 2.8959338096393935e-05, + "loss": 1.2596, + "step": 3987500 + }, + { + "epoch": 42.09, + "learning_rate": 2.895669976888251e-05, + "loss": 1.2455, + "step": 3988000 + }, + { + "epoch": 42.09, + "learning_rate": 2.895406144137109e-05, + "loss": 1.2925, + "step": 3988500 + }, + { + "epoch": 42.1, + "learning_rate": 2.8951423113859665e-05, + "loss": 1.3084, + "step": 3989000 + }, + { + "epoch": 42.1, + "learning_rate": 2.8948784786348237e-05, + "loss": 1.2711, + "step": 3989500 + }, + { + "epoch": 42.11, + "learning_rate": 2.8946146458836816e-05, + "loss": 1.2956, + "step": 3990000 + }, + { + "epoch": 42.11, + "learning_rate": 2.894350813132539e-05, + "loss": 1.2159, + "step": 3990500 + }, + { + "epoch": 42.12, + "learning_rate": 2.894086980381397e-05, + "loss": 1.3134, + "step": 3991000 + }, + { + "epoch": 42.12, + "learning_rate": 2.8938231476302545e-05, + "loss": 1.2774, + "step": 3991500 + }, + { + "epoch": 42.13, + "learning_rate": 2.8935593148791117e-05, + "loss": 1.2675, + "step": 3992000 + }, + { + "epoch": 42.13, + "learning_rate": 2.8932954821279696e-05, + "loss": 1.2479, + "step": 3992500 + }, + { + "epoch": 42.14, + "learning_rate": 2.8930316493768272e-05, + "loss": 1.2596, + "step": 3993000 + }, + { + "epoch": 42.14, + "learning_rate": 2.892767816625685e-05, + "loss": 1.3228, + "step": 3993500 + }, + { + "epoch": 42.15, + "learning_rate": 2.8925039838745423e-05, + "loss": 1.2573, + "step": 3994000 + }, + { + "epoch": 42.16, + "learning_rate": 2.8922401511233998e-05, + "loss": 1.2541, + "step": 3994500 + }, + { + "epoch": 42.16, + "learning_rate": 2.8919763183722577e-05, + "loss": 1.2875, + "step": 3995000 + }, + { + "epoch": 42.17, + "learning_rate": 2.8917124856211153e-05, + "loss": 1.264, + "step": 3995500 + }, + { + "epoch": 42.17, + "learning_rate": 2.891448652869973e-05, + "loss": 1.2671, + "step": 3996000 + }, + { + "epoch": 42.18, + "learning_rate": 2.89118482011883e-05, + "loss": 1.2826, + "step": 3996500 + }, + { + "epoch": 42.18, + "learning_rate": 2.890920987367688e-05, + "loss": 1.2997, + "step": 3997000 + }, + { + "epoch": 42.19, + "learning_rate": 2.8906571546165455e-05, + "loss": 1.2166, + "step": 3997500 + }, + { + "epoch": 42.19, + "learning_rate": 2.8903933218654033e-05, + "loss": 1.2844, + "step": 3998000 + }, + { + "epoch": 42.2, + "learning_rate": 2.8901294891142612e-05, + "loss": 1.2475, + "step": 3998500 + }, + { + "epoch": 42.2, + "learning_rate": 2.889865656363118e-05, + "loss": 1.2709, + "step": 3999000 + }, + { + "epoch": 42.21, + "learning_rate": 2.889601823611976e-05, + "loss": 1.2664, + "step": 3999500 + }, + { + "epoch": 42.21, + "learning_rate": 2.8893379908608335e-05, + "loss": 1.2986, + "step": 4000000 + }, + { + "epoch": 42.22, + "learning_rate": 2.8890741581096914e-05, + "loss": 1.2661, + "step": 4000500 + }, + { + "epoch": 42.22, + "learning_rate": 2.888810325358549e-05, + "loss": 1.2648, + "step": 4001000 + }, + { + "epoch": 42.23, + "learning_rate": 2.8885464926074062e-05, + "loss": 1.2735, + "step": 4001500 + }, + { + "epoch": 42.23, + "learning_rate": 2.888282659856264e-05, + "loss": 1.2427, + "step": 4002000 + }, + { + "epoch": 42.24, + "learning_rate": 2.8880188271051216e-05, + "loss": 1.2756, + "step": 4002500 + }, + { + "epoch": 42.24, + "learning_rate": 2.8877549943539795e-05, + "loss": 1.2559, + "step": 4003000 + }, + { + "epoch": 42.25, + "learning_rate": 2.8874911616028367e-05, + "loss": 1.313, + "step": 4003500 + }, + { + "epoch": 42.26, + "learning_rate": 2.8872273288516943e-05, + "loss": 1.2188, + "step": 4004000 + }, + { + "epoch": 42.26, + "learning_rate": 2.886963496100552e-05, + "loss": 1.2476, + "step": 4004500 + }, + { + "epoch": 42.27, + "learning_rate": 2.8866996633494097e-05, + "loss": 1.2987, + "step": 4005000 + }, + { + "epoch": 42.27, + "learning_rate": 2.8864358305982676e-05, + "loss": 1.3074, + "step": 4005500 + }, + { + "epoch": 42.28, + "learning_rate": 2.8861719978471248e-05, + "loss": 1.2805, + "step": 4006000 + }, + { + "epoch": 42.28, + "learning_rate": 2.8859081650959824e-05, + "loss": 1.2125, + "step": 4006500 + }, + { + "epoch": 42.29, + "learning_rate": 2.8856443323448402e-05, + "loss": 1.2467, + "step": 4007000 + }, + { + "epoch": 42.29, + "learning_rate": 2.8853804995936978e-05, + "loss": 1.2509, + "step": 4007500 + }, + { + "epoch": 42.3, + "learning_rate": 2.8851166668425557e-05, + "loss": 1.3075, + "step": 4008000 + }, + { + "epoch": 42.3, + "learning_rate": 2.8848528340914125e-05, + "loss": 1.2375, + "step": 4008500 + }, + { + "epoch": 42.31, + "learning_rate": 2.8845890013402704e-05, + "loss": 1.2313, + "step": 4009000 + }, + { + "epoch": 42.31, + "learning_rate": 2.8843251685891283e-05, + "loss": 1.2556, + "step": 4009500 + }, + { + "epoch": 42.32, + "learning_rate": 2.884061335837986e-05, + "loss": 1.2491, + "step": 4010000 + }, + { + "epoch": 42.32, + "learning_rate": 2.8837975030868438e-05, + "loss": 1.2501, + "step": 4010500 + }, + { + "epoch": 42.33, + "learning_rate": 2.8835336703357006e-05, + "loss": 1.2733, + "step": 4011000 + }, + { + "epoch": 42.33, + "learning_rate": 2.8832698375845585e-05, + "loss": 1.2476, + "step": 4011500 + }, + { + "epoch": 42.34, + "learning_rate": 2.883006004833416e-05, + "loss": 1.2293, + "step": 4012000 + }, + { + "epoch": 42.35, + "learning_rate": 2.882742172082274e-05, + "loss": 1.2775, + "step": 4012500 + }, + { + "epoch": 42.35, + "learning_rate": 2.882478339331131e-05, + "loss": 1.2917, + "step": 4013000 + }, + { + "epoch": 42.36, + "learning_rate": 2.8822145065799887e-05, + "loss": 1.2224, + "step": 4013500 + }, + { + "epoch": 42.36, + "learning_rate": 2.8819506738288466e-05, + "loss": 1.2441, + "step": 4014000 + }, + { + "epoch": 42.37, + "learning_rate": 2.881686841077704e-05, + "loss": 1.2804, + "step": 4014500 + }, + { + "epoch": 42.37, + "learning_rate": 2.881423008326562e-05, + "loss": 1.238, + "step": 4015000 + }, + { + "epoch": 42.38, + "learning_rate": 2.8811591755754192e-05, + "loss": 1.2379, + "step": 4015500 + }, + { + "epoch": 42.38, + "learning_rate": 2.8808953428242768e-05, + "loss": 1.2544, + "step": 4016000 + }, + { + "epoch": 42.39, + "learning_rate": 2.8806315100731347e-05, + "loss": 1.249, + "step": 4016500 + }, + { + "epoch": 42.39, + "learning_rate": 2.8803676773219922e-05, + "loss": 1.3206, + "step": 4017000 + }, + { + "epoch": 42.4, + "learning_rate": 2.88010384457085e-05, + "loss": 1.3087, + "step": 4017500 + }, + { + "epoch": 42.4, + "learning_rate": 2.8798400118197073e-05, + "loss": 1.2692, + "step": 4018000 + }, + { + "epoch": 42.41, + "learning_rate": 2.879576179068565e-05, + "loss": 1.2561, + "step": 4018500 + }, + { + "epoch": 42.41, + "learning_rate": 2.8793123463174228e-05, + "loss": 1.3202, + "step": 4019000 + }, + { + "epoch": 42.42, + "learning_rate": 2.8790485135662803e-05, + "loss": 1.2494, + "step": 4019500 + }, + { + "epoch": 42.42, + "learning_rate": 2.8787846808151382e-05, + "loss": 1.2625, + "step": 4020000 + }, + { + "epoch": 42.43, + "learning_rate": 2.8785208480639954e-05, + "loss": 1.2354, + "step": 4020500 + }, + { + "epoch": 42.43, + "learning_rate": 2.878257015312853e-05, + "loss": 1.2605, + "step": 4021000 + }, + { + "epoch": 42.44, + "learning_rate": 2.877993182561711e-05, + "loss": 1.3107, + "step": 4021500 + }, + { + "epoch": 42.45, + "learning_rate": 2.8777293498105684e-05, + "loss": 1.3275, + "step": 4022000 + }, + { + "epoch": 42.45, + "learning_rate": 2.8774655170594256e-05, + "loss": 1.247, + "step": 4022500 + }, + { + "epoch": 42.46, + "learning_rate": 2.877201684308283e-05, + "loss": 1.3305, + "step": 4023000 + }, + { + "epoch": 42.46, + "learning_rate": 2.876937851557141e-05, + "loss": 1.2991, + "step": 4023500 + }, + { + "epoch": 42.47, + "learning_rate": 2.8766740188059986e-05, + "loss": 1.2979, + "step": 4024000 + }, + { + "epoch": 42.47, + "learning_rate": 2.8764101860548565e-05, + "loss": 1.3003, + "step": 4024500 + }, + { + "epoch": 42.48, + "learning_rate": 2.8761463533037137e-05, + "loss": 1.2881, + "step": 4025000 + }, + { + "epoch": 42.48, + "learning_rate": 2.8758825205525712e-05, + "loss": 1.2938, + "step": 4025500 + }, + { + "epoch": 42.49, + "learning_rate": 2.875618687801429e-05, + "loss": 1.2961, + "step": 4026000 + }, + { + "epoch": 42.49, + "learning_rate": 2.8753548550502867e-05, + "loss": 1.247, + "step": 4026500 + }, + { + "epoch": 42.5, + "learning_rate": 2.8750910222991446e-05, + "loss": 1.2898, + "step": 4027000 + }, + { + "epoch": 42.5, + "learning_rate": 2.8748271895480018e-05, + "loss": 1.3503, + "step": 4027500 + }, + { + "epoch": 42.51, + "learning_rate": 2.8745633567968593e-05, + "loss": 1.3053, + "step": 4028000 + }, + { + "epoch": 42.51, + "learning_rate": 2.8742995240457172e-05, + "loss": 1.2472, + "step": 4028500 + }, + { + "epoch": 42.52, + "learning_rate": 2.8740356912945748e-05, + "loss": 1.2439, + "step": 4029000 + }, + { + "epoch": 42.52, + "learning_rate": 2.8737718585434326e-05, + "loss": 1.2707, + "step": 4029500 + }, + { + "epoch": 42.53, + "learning_rate": 2.87350802579229e-05, + "loss": 1.307, + "step": 4030000 + }, + { + "epoch": 42.54, + "learning_rate": 2.8732441930411474e-05, + "loss": 1.2667, + "step": 4030500 + }, + { + "epoch": 42.54, + "learning_rate": 2.8729803602900053e-05, + "loss": 1.3454, + "step": 4031000 + }, + { + "epoch": 42.55, + "learning_rate": 2.872716527538863e-05, + "loss": 1.2483, + "step": 4031500 + }, + { + "epoch": 42.55, + "learning_rate": 2.87245269478772e-05, + "loss": 1.2839, + "step": 4032000 + }, + { + "epoch": 42.56, + "learning_rate": 2.872188862036578e-05, + "loss": 1.3193, + "step": 4032500 + }, + { + "epoch": 42.56, + "learning_rate": 2.8719250292854355e-05, + "loss": 1.2832, + "step": 4033000 + }, + { + "epoch": 42.57, + "learning_rate": 2.8716611965342934e-05, + "loss": 1.3364, + "step": 4033500 + }, + { + "epoch": 42.57, + "learning_rate": 2.871397363783151e-05, + "loss": 1.3013, + "step": 4034000 + }, + { + "epoch": 42.58, + "learning_rate": 2.871133531032008e-05, + "loss": 1.3178, + "step": 4034500 + }, + { + "epoch": 42.58, + "learning_rate": 2.8708696982808657e-05, + "loss": 1.3141, + "step": 4035000 + }, + { + "epoch": 42.59, + "learning_rate": 2.8706058655297236e-05, + "loss": 1.24, + "step": 4035500 + }, + { + "epoch": 42.59, + "learning_rate": 2.870342032778581e-05, + "loss": 1.2989, + "step": 4036000 + }, + { + "epoch": 42.6, + "learning_rate": 2.870078200027439e-05, + "loss": 1.2601, + "step": 4036500 + }, + { + "epoch": 42.6, + "learning_rate": 2.8698143672762962e-05, + "loss": 1.2557, + "step": 4037000 + }, + { + "epoch": 42.61, + "learning_rate": 2.8695505345251538e-05, + "loss": 1.2139, + "step": 4037500 + }, + { + "epoch": 42.61, + "learning_rate": 2.8692867017740116e-05, + "loss": 1.2897, + "step": 4038000 + }, + { + "epoch": 42.62, + "learning_rate": 2.8690228690228692e-05, + "loss": 1.2548, + "step": 4038500 + }, + { + "epoch": 42.62, + "learning_rate": 2.868759036271727e-05, + "loss": 1.273, + "step": 4039000 + }, + { + "epoch": 42.63, + "learning_rate": 2.8684952035205843e-05, + "loss": 1.2842, + "step": 4039500 + }, + { + "epoch": 42.64, + "learning_rate": 2.868231370769442e-05, + "loss": 1.2641, + "step": 4040000 + }, + { + "epoch": 42.64, + "learning_rate": 2.8679675380182997e-05, + "loss": 1.2132, + "step": 4040500 + }, + { + "epoch": 42.65, + "learning_rate": 2.8677037052671573e-05, + "loss": 1.2665, + "step": 4041000 + }, + { + "epoch": 42.65, + "learning_rate": 2.8674398725160145e-05, + "loss": 1.2703, + "step": 4041500 + }, + { + "epoch": 42.66, + "learning_rate": 2.8671760397648724e-05, + "loss": 1.2896, + "step": 4042000 + }, + { + "epoch": 42.66, + "learning_rate": 2.86691220701373e-05, + "loss": 1.311, + "step": 4042500 + }, + { + "epoch": 42.67, + "learning_rate": 2.8666483742625878e-05, + "loss": 1.2905, + "step": 4043000 + }, + { + "epoch": 42.67, + "learning_rate": 2.8663845415114454e-05, + "loss": 1.3263, + "step": 4043500 + }, + { + "epoch": 42.68, + "learning_rate": 2.8661207087603026e-05, + "loss": 1.2728, + "step": 4044000 + }, + { + "epoch": 42.68, + "learning_rate": 2.8658568760091605e-05, + "loss": 1.2705, + "step": 4044500 + }, + { + "epoch": 42.69, + "learning_rate": 2.865593043258018e-05, + "loss": 1.3108, + "step": 4045000 + }, + { + "epoch": 42.69, + "learning_rate": 2.865329210506876e-05, + "loss": 1.1922, + "step": 4045500 + }, + { + "epoch": 42.7, + "learning_rate": 2.8650653777557334e-05, + "loss": 1.2619, + "step": 4046000 + }, + { + "epoch": 42.7, + "learning_rate": 2.8648015450045907e-05, + "loss": 1.2481, + "step": 4046500 + }, + { + "epoch": 42.71, + "learning_rate": 2.8645377122534482e-05, + "loss": 1.2659, + "step": 4047000 + }, + { + "epoch": 42.71, + "learning_rate": 2.864273879502306e-05, + "loss": 1.2755, + "step": 4047500 + }, + { + "epoch": 42.72, + "learning_rate": 2.8640100467511636e-05, + "loss": 1.2941, + "step": 4048000 + }, + { + "epoch": 42.73, + "learning_rate": 2.8637462140000215e-05, + "loss": 1.3295, + "step": 4048500 + }, + { + "epoch": 42.73, + "learning_rate": 2.8634823812488787e-05, + "loss": 1.2977, + "step": 4049000 + }, + { + "epoch": 42.74, + "learning_rate": 2.8632185484977363e-05, + "loss": 1.2877, + "step": 4049500 + }, + { + "epoch": 42.74, + "learning_rate": 2.8629547157465942e-05, + "loss": 1.2302, + "step": 4050000 + }, + { + "epoch": 42.75, + "learning_rate": 2.8626908829954517e-05, + "loss": 1.2874, + "step": 4050500 + }, + { + "epoch": 42.75, + "learning_rate": 2.862427050244309e-05, + "loss": 1.307, + "step": 4051000 + }, + { + "epoch": 42.76, + "learning_rate": 2.8621632174931668e-05, + "loss": 1.3237, + "step": 4051500 + }, + { + "epoch": 42.76, + "learning_rate": 2.8618993847420244e-05, + "loss": 1.2934, + "step": 4052000 + }, + { + "epoch": 42.77, + "learning_rate": 2.8616355519908823e-05, + "loss": 1.2573, + "step": 4052500 + }, + { + "epoch": 42.77, + "learning_rate": 2.8613717192397398e-05, + "loss": 1.295, + "step": 4053000 + }, + { + "epoch": 42.78, + "learning_rate": 2.861107886488597e-05, + "loss": 1.299, + "step": 4053500 + }, + { + "epoch": 42.78, + "learning_rate": 2.860844053737455e-05, + "loss": 1.3359, + "step": 4054000 + }, + { + "epoch": 42.79, + "learning_rate": 2.8605802209863125e-05, + "loss": 1.247, + "step": 4054500 + }, + { + "epoch": 42.79, + "learning_rate": 2.8603163882351703e-05, + "loss": 1.2991, + "step": 4055000 + }, + { + "epoch": 42.8, + "learning_rate": 2.860052555484028e-05, + "loss": 1.2891, + "step": 4055500 + }, + { + "epoch": 42.8, + "learning_rate": 2.859788722732885e-05, + "loss": 1.2613, + "step": 4056000 + }, + { + "epoch": 42.81, + "learning_rate": 2.859524889981743e-05, + "loss": 1.2838, + "step": 4056500 + }, + { + "epoch": 42.81, + "learning_rate": 2.8592610572306005e-05, + "loss": 1.2755, + "step": 4057000 + }, + { + "epoch": 42.82, + "learning_rate": 2.8589972244794584e-05, + "loss": 1.2663, + "step": 4057500 + }, + { + "epoch": 42.83, + "learning_rate": 2.8587333917283153e-05, + "loss": 1.3341, + "step": 4058000 + }, + { + "epoch": 42.83, + "learning_rate": 2.8584695589771732e-05, + "loss": 1.2752, + "step": 4058500 + }, + { + "epoch": 42.84, + "learning_rate": 2.8582057262260307e-05, + "loss": 1.2919, + "step": 4059000 + }, + { + "epoch": 42.84, + "learning_rate": 2.8579418934748886e-05, + "loss": 1.2442, + "step": 4059500 + }, + { + "epoch": 42.85, + "learning_rate": 2.857678060723746e-05, + "loss": 1.2864, + "step": 4060000 + }, + { + "epoch": 42.85, + "learning_rate": 2.8574142279726034e-05, + "loss": 1.1978, + "step": 4060500 + }, + { + "epoch": 42.86, + "learning_rate": 2.8571503952214613e-05, + "loss": 1.2368, + "step": 4061000 + }, + { + "epoch": 42.86, + "learning_rate": 2.8568865624703188e-05, + "loss": 1.2691, + "step": 4061500 + }, + { + "epoch": 42.87, + "learning_rate": 2.8566227297191767e-05, + "loss": 1.3094, + "step": 4062000 + }, + { + "epoch": 42.87, + "learning_rate": 2.8563588969680342e-05, + "loss": 1.2768, + "step": 4062500 + }, + { + "epoch": 42.88, + "learning_rate": 2.8560950642168915e-05, + "loss": 1.2823, + "step": 4063000 + }, + { + "epoch": 42.88, + "learning_rate": 2.8558312314657493e-05, + "loss": 1.2533, + "step": 4063500 + }, + { + "epoch": 42.89, + "learning_rate": 2.855567398714607e-05, + "loss": 1.2663, + "step": 4064000 + }, + { + "epoch": 42.89, + "learning_rate": 2.8553035659634648e-05, + "loss": 1.3035, + "step": 4064500 + }, + { + "epoch": 42.9, + "learning_rate": 2.8550397332123223e-05, + "loss": 1.3426, + "step": 4065000 + }, + { + "epoch": 42.9, + "learning_rate": 2.8547759004611795e-05, + "loss": 1.2816, + "step": 4065500 + }, + { + "epoch": 42.91, + "learning_rate": 2.8545120677100374e-05, + "loss": 1.256, + "step": 4066000 + }, + { + "epoch": 42.92, + "learning_rate": 2.854248234958895e-05, + "loss": 1.2517, + "step": 4066500 + }, + { + "epoch": 42.92, + "learning_rate": 2.853984402207753e-05, + "loss": 1.2477, + "step": 4067000 + }, + { + "epoch": 42.93, + "learning_rate": 2.85372056945661e-05, + "loss": 1.2788, + "step": 4067500 + }, + { + "epoch": 42.93, + "learning_rate": 2.8534567367054676e-05, + "loss": 1.3344, + "step": 4068000 + }, + { + "epoch": 42.94, + "learning_rate": 2.8531929039543255e-05, + "loss": 1.1978, + "step": 4068500 + }, + { + "epoch": 42.94, + "learning_rate": 2.852929071203183e-05, + "loss": 1.2451, + "step": 4069000 + }, + { + "epoch": 42.95, + "learning_rate": 2.852665238452041e-05, + "loss": 1.3336, + "step": 4069500 + }, + { + "epoch": 42.95, + "learning_rate": 2.8524014057008978e-05, + "loss": 1.2927, + "step": 4070000 + }, + { + "epoch": 42.96, + "learning_rate": 2.8521375729497557e-05, + "loss": 1.2424, + "step": 4070500 + }, + { + "epoch": 42.96, + "learning_rate": 2.8518737401986133e-05, + "loss": 1.2722, + "step": 4071000 + }, + { + "epoch": 42.97, + "learning_rate": 2.851609907447471e-05, + "loss": 1.3419, + "step": 4071500 + }, + { + "epoch": 42.97, + "learning_rate": 2.851346074696329e-05, + "loss": 1.2677, + "step": 4072000 + }, + { + "epoch": 42.98, + "learning_rate": 2.851082241945186e-05, + "loss": 1.2728, + "step": 4072500 + }, + { + "epoch": 42.98, + "learning_rate": 2.8508184091940438e-05, + "loss": 1.336, + "step": 4073000 + }, + { + "epoch": 42.99, + "learning_rate": 2.8505545764429013e-05, + "loss": 1.2793, + "step": 4073500 + }, + { + "epoch": 42.99, + "learning_rate": 2.8502907436917592e-05, + "loss": 1.2664, + "step": 4074000 + }, + { + "epoch": 43.0, + "learning_rate": 2.8500269109406168e-05, + "loss": 1.2903, + "step": 4074500 + }, + { + "epoch": 43.0, + "learning_rate": 2.849763078189474e-05, + "loss": 1.2872, + "step": 4075000 + }, + { + "epoch": 43.01, + "learning_rate": 2.849499245438332e-05, + "loss": 1.2584, + "step": 4075500 + }, + { + "epoch": 43.02, + "learning_rate": 2.8492354126871894e-05, + "loss": 1.2225, + "step": 4076000 + }, + { + "epoch": 43.02, + "learning_rate": 2.8489715799360473e-05, + "loss": 1.2586, + "step": 4076500 + }, + { + "epoch": 43.03, + "learning_rate": 2.8487077471849045e-05, + "loss": 1.2468, + "step": 4077000 + }, + { + "epoch": 43.03, + "learning_rate": 2.848443914433762e-05, + "loss": 1.3046, + "step": 4077500 + }, + { + "epoch": 43.04, + "learning_rate": 2.84818008168262e-05, + "loss": 1.3063, + "step": 4078000 + }, + { + "epoch": 43.04, + "learning_rate": 2.8479162489314775e-05, + "loss": 1.2688, + "step": 4078500 + }, + { + "epoch": 43.05, + "learning_rate": 2.8476524161803354e-05, + "loss": 1.2908, + "step": 4079000 + }, + { + "epoch": 43.05, + "learning_rate": 2.8473885834291926e-05, + "loss": 1.2418, + "step": 4079500 + }, + { + "epoch": 43.06, + "learning_rate": 2.84712475067805e-05, + "loss": 1.3131, + "step": 4080000 + }, + { + "epoch": 43.06, + "learning_rate": 2.846860917926908e-05, + "loss": 1.2918, + "step": 4080500 + }, + { + "epoch": 43.07, + "learning_rate": 2.8465970851757656e-05, + "loss": 1.2299, + "step": 4081000 + }, + { + "epoch": 43.07, + "learning_rate": 2.8463332524246235e-05, + "loss": 1.2403, + "step": 4081500 + }, + { + "epoch": 43.08, + "learning_rate": 2.8460694196734803e-05, + "loss": 1.2692, + "step": 4082000 + }, + { + "epoch": 43.08, + "learning_rate": 2.8458055869223382e-05, + "loss": 1.2775, + "step": 4082500 + }, + { + "epoch": 43.09, + "learning_rate": 2.845541754171196e-05, + "loss": 1.2699, + "step": 4083000 + }, + { + "epoch": 43.09, + "learning_rate": 2.8452779214200537e-05, + "loss": 1.2366, + "step": 4083500 + }, + { + "epoch": 43.1, + "learning_rate": 2.8450140886689116e-05, + "loss": 1.2786, + "step": 4084000 + }, + { + "epoch": 43.1, + "learning_rate": 2.8447502559177684e-05, + "loss": 1.2418, + "step": 4084500 + }, + { + "epoch": 43.11, + "learning_rate": 2.8444864231666263e-05, + "loss": 1.277, + "step": 4085000 + }, + { + "epoch": 43.12, + "learning_rate": 2.844222590415484e-05, + "loss": 1.2597, + "step": 4085500 + }, + { + "epoch": 43.12, + "learning_rate": 2.8439587576643417e-05, + "loss": 1.2567, + "step": 4086000 + }, + { + "epoch": 43.13, + "learning_rate": 2.843694924913199e-05, + "loss": 1.2621, + "step": 4086500 + }, + { + "epoch": 43.13, + "learning_rate": 2.8434310921620565e-05, + "loss": 1.2322, + "step": 4087000 + }, + { + "epoch": 43.14, + "learning_rate": 2.8431672594109144e-05, + "loss": 1.2903, + "step": 4087500 + }, + { + "epoch": 43.14, + "learning_rate": 2.842903426659772e-05, + "loss": 1.2317, + "step": 4088000 + }, + { + "epoch": 43.15, + "learning_rate": 2.84263959390863e-05, + "loss": 1.2454, + "step": 4088500 + }, + { + "epoch": 43.15, + "learning_rate": 2.842375761157487e-05, + "loss": 1.2996, + "step": 4089000 + }, + { + "epoch": 43.16, + "learning_rate": 2.8421119284063446e-05, + "loss": 1.2473, + "step": 4089500 + }, + { + "epoch": 43.16, + "learning_rate": 2.8418480956552025e-05, + "loss": 1.2198, + "step": 4090000 + }, + { + "epoch": 43.17, + "learning_rate": 2.84158426290406e-05, + "loss": 1.2687, + "step": 4090500 + }, + { + "epoch": 43.17, + "learning_rate": 2.841320430152918e-05, + "loss": 1.3179, + "step": 4091000 + }, + { + "epoch": 43.18, + "learning_rate": 2.841056597401775e-05, + "loss": 1.2674, + "step": 4091500 + }, + { + "epoch": 43.18, + "learning_rate": 2.8407927646506327e-05, + "loss": 1.2113, + "step": 4092000 + }, + { + "epoch": 43.19, + "learning_rate": 2.8405289318994906e-05, + "loss": 1.2466, + "step": 4092500 + }, + { + "epoch": 43.19, + "learning_rate": 2.840265099148348e-05, + "loss": 1.3149, + "step": 4093000 + }, + { + "epoch": 43.2, + "learning_rate": 2.840001266397206e-05, + "loss": 1.2403, + "step": 4093500 + }, + { + "epoch": 43.21, + "learning_rate": 2.8397374336460632e-05, + "loss": 1.2608, + "step": 4094000 + }, + { + "epoch": 43.21, + "learning_rate": 2.8394736008949208e-05, + "loss": 1.2035, + "step": 4094500 + }, + { + "epoch": 43.22, + "learning_rate": 2.8392097681437786e-05, + "loss": 1.2606, + "step": 4095000 + }, + { + "epoch": 43.22, + "learning_rate": 2.8389459353926362e-05, + "loss": 1.3013, + "step": 4095500 + }, + { + "epoch": 43.23, + "learning_rate": 2.8386821026414934e-05, + "loss": 1.2838, + "step": 4096000 + }, + { + "epoch": 43.23, + "learning_rate": 2.838418269890351e-05, + "loss": 1.2518, + "step": 4096500 + }, + { + "epoch": 43.24, + "learning_rate": 2.838154437139209e-05, + "loss": 1.3333, + "step": 4097000 + }, + { + "epoch": 43.24, + "learning_rate": 2.8378906043880664e-05, + "loss": 1.3063, + "step": 4097500 + }, + { + "epoch": 43.25, + "learning_rate": 2.8376267716369243e-05, + "loss": 1.3303, + "step": 4098000 + }, + { + "epoch": 43.25, + "learning_rate": 2.8373629388857815e-05, + "loss": 1.2816, + "step": 4098500 + }, + { + "epoch": 43.26, + "learning_rate": 2.837099106134639e-05, + "loss": 1.2343, + "step": 4099000 + }, + { + "epoch": 43.26, + "learning_rate": 2.836835273383497e-05, + "loss": 1.2343, + "step": 4099500 + }, + { + "epoch": 43.27, + "learning_rate": 2.8365714406323545e-05, + "loss": 1.2869, + "step": 4100000 + }, + { + "epoch": 43.27, + "learning_rate": 2.8363076078812124e-05, + "loss": 1.2625, + "step": 4100500 + }, + { + "epoch": 43.28, + "learning_rate": 2.8360437751300696e-05, + "loss": 1.2675, + "step": 4101000 + }, + { + "epoch": 43.28, + "learning_rate": 2.835779942378927e-05, + "loss": 1.2758, + "step": 4101500 + }, + { + "epoch": 43.29, + "learning_rate": 2.835516109627785e-05, + "loss": 1.2216, + "step": 4102000 + }, + { + "epoch": 43.29, + "learning_rate": 2.8352522768766426e-05, + "loss": 1.3036, + "step": 4102500 + }, + { + "epoch": 43.3, + "learning_rate": 2.8349884441255004e-05, + "loss": 1.2765, + "step": 4103000 + }, + { + "epoch": 43.31, + "learning_rate": 2.8347246113743576e-05, + "loss": 1.2661, + "step": 4103500 + }, + { + "epoch": 43.31, + "learning_rate": 2.8344607786232152e-05, + "loss": 1.237, + "step": 4104000 + }, + { + "epoch": 43.32, + "learning_rate": 2.834196945872073e-05, + "loss": 1.2546, + "step": 4104500 + }, + { + "epoch": 43.32, + "learning_rate": 2.8339331131209306e-05, + "loss": 1.2498, + "step": 4105000 + }, + { + "epoch": 43.33, + "learning_rate": 2.833669280369788e-05, + "loss": 1.2624, + "step": 4105500 + }, + { + "epoch": 43.33, + "learning_rate": 2.8334054476186457e-05, + "loss": 1.2765, + "step": 4106000 + }, + { + "epoch": 43.34, + "learning_rate": 2.8331416148675033e-05, + "loss": 1.2298, + "step": 4106500 + }, + { + "epoch": 43.34, + "learning_rate": 2.832877782116361e-05, + "loss": 1.2721, + "step": 4107000 + }, + { + "epoch": 43.35, + "learning_rate": 2.8326139493652187e-05, + "loss": 1.2732, + "step": 4107500 + }, + { + "epoch": 43.35, + "learning_rate": 2.832350116614076e-05, + "loss": 1.3115, + "step": 4108000 + }, + { + "epoch": 43.36, + "learning_rate": 2.8320862838629335e-05, + "loss": 1.255, + "step": 4108500 + }, + { + "epoch": 43.36, + "learning_rate": 2.8318224511117914e-05, + "loss": 1.2978, + "step": 4109000 + }, + { + "epoch": 43.37, + "learning_rate": 2.831558618360649e-05, + "loss": 1.2897, + "step": 4109500 + }, + { + "epoch": 43.37, + "learning_rate": 2.8312947856095068e-05, + "loss": 1.2279, + "step": 4110000 + }, + { + "epoch": 43.38, + "learning_rate": 2.831030952858364e-05, + "loss": 1.242, + "step": 4110500 + }, + { + "epoch": 43.38, + "learning_rate": 2.8307671201072216e-05, + "loss": 1.2971, + "step": 4111000 + }, + { + "epoch": 43.39, + "learning_rate": 2.8305032873560794e-05, + "loss": 1.329, + "step": 4111500 + }, + { + "epoch": 43.4, + "learning_rate": 2.830239454604937e-05, + "loss": 1.2786, + "step": 4112000 + }, + { + "epoch": 43.4, + "learning_rate": 2.829975621853795e-05, + "loss": 1.2268, + "step": 4112500 + }, + { + "epoch": 43.41, + "learning_rate": 2.829711789102652e-05, + "loss": 1.3142, + "step": 4113000 + }, + { + "epoch": 43.41, + "learning_rate": 2.8294479563515096e-05, + "loss": 1.2762, + "step": 4113500 + }, + { + "epoch": 43.42, + "learning_rate": 2.8291841236003675e-05, + "loss": 1.2728, + "step": 4114000 + }, + { + "epoch": 43.42, + "learning_rate": 2.828920290849225e-05, + "loss": 1.2839, + "step": 4114500 + }, + { + "epoch": 43.43, + "learning_rate": 2.8286564580980823e-05, + "loss": 1.329, + "step": 4115000 + }, + { + "epoch": 43.43, + "learning_rate": 2.8283926253469402e-05, + "loss": 1.2847, + "step": 4115500 + }, + { + "epoch": 43.44, + "learning_rate": 2.8281287925957977e-05, + "loss": 1.2584, + "step": 4116000 + }, + { + "epoch": 43.44, + "learning_rate": 2.8278649598446556e-05, + "loss": 1.2389, + "step": 4116500 + }, + { + "epoch": 43.45, + "learning_rate": 2.827601127093513e-05, + "loss": 1.2597, + "step": 4117000 + }, + { + "epoch": 43.45, + "learning_rate": 2.8273372943423704e-05, + "loss": 1.2592, + "step": 4117500 + }, + { + "epoch": 43.46, + "learning_rate": 2.8270734615912283e-05, + "loss": 1.2803, + "step": 4118000 + }, + { + "epoch": 43.46, + "learning_rate": 2.8268096288400858e-05, + "loss": 1.275, + "step": 4118500 + }, + { + "epoch": 43.47, + "learning_rate": 2.8265457960889437e-05, + "loss": 1.2472, + "step": 4119000 + }, + { + "epoch": 43.47, + "learning_rate": 2.8262819633378012e-05, + "loss": 1.2748, + "step": 4119500 + }, + { + "epoch": 43.48, + "learning_rate": 2.8260181305866584e-05, + "loss": 1.2377, + "step": 4120000 + }, + { + "epoch": 43.48, + "learning_rate": 2.825754297835516e-05, + "loss": 1.256, + "step": 4120500 + }, + { + "epoch": 43.49, + "learning_rate": 2.825490465084374e-05, + "loss": 1.2798, + "step": 4121000 + }, + { + "epoch": 43.5, + "learning_rate": 2.8252266323332314e-05, + "loss": 1.2864, + "step": 4121500 + }, + { + "epoch": 43.5, + "learning_rate": 2.8249627995820893e-05, + "loss": 1.2851, + "step": 4122000 + }, + { + "epoch": 43.51, + "learning_rate": 2.8246989668309465e-05, + "loss": 1.2802, + "step": 4122500 + }, + { + "epoch": 43.51, + "learning_rate": 2.824435134079804e-05, + "loss": 1.2707, + "step": 4123000 + }, + { + "epoch": 43.52, + "learning_rate": 2.824171301328662e-05, + "loss": 1.325, + "step": 4123500 + }, + { + "epoch": 43.52, + "learning_rate": 2.8239074685775195e-05, + "loss": 1.254, + "step": 4124000 + }, + { + "epoch": 43.53, + "learning_rate": 2.8236436358263767e-05, + "loss": 1.2688, + "step": 4124500 + }, + { + "epoch": 43.53, + "learning_rate": 2.8233798030752346e-05, + "loss": 1.2587, + "step": 4125000 + }, + { + "epoch": 43.54, + "learning_rate": 2.823115970324092e-05, + "loss": 1.2819, + "step": 4125500 + }, + { + "epoch": 43.54, + "learning_rate": 2.82285213757295e-05, + "loss": 1.2314, + "step": 4126000 + }, + { + "epoch": 43.55, + "learning_rate": 2.8225883048218076e-05, + "loss": 1.2654, + "step": 4126500 + }, + { + "epoch": 43.55, + "learning_rate": 2.8223244720706648e-05, + "loss": 1.2805, + "step": 4127000 + }, + { + "epoch": 43.56, + "learning_rate": 2.8220606393195227e-05, + "loss": 1.2532, + "step": 4127500 + }, + { + "epoch": 43.56, + "learning_rate": 2.8217968065683802e-05, + "loss": 1.2731, + "step": 4128000 + }, + { + "epoch": 43.57, + "learning_rate": 2.821532973817238e-05, + "loss": 1.2992, + "step": 4128500 + }, + { + "epoch": 43.57, + "learning_rate": 2.8212691410660957e-05, + "loss": 1.1971, + "step": 4129000 + }, + { + "epoch": 43.58, + "learning_rate": 2.821005308314953e-05, + "loss": 1.2881, + "step": 4129500 + }, + { + "epoch": 43.59, + "learning_rate": 2.8207414755638108e-05, + "loss": 1.3036, + "step": 4130000 + }, + { + "epoch": 43.59, + "learning_rate": 2.8204776428126683e-05, + "loss": 1.2519, + "step": 4130500 + }, + { + "epoch": 43.6, + "learning_rate": 2.8202138100615262e-05, + "loss": 1.2538, + "step": 4131000 + }, + { + "epoch": 43.6, + "learning_rate": 2.8199499773103838e-05, + "loss": 1.2736, + "step": 4131500 + }, + { + "epoch": 43.61, + "learning_rate": 2.819686144559241e-05, + "loss": 1.3308, + "step": 4132000 + }, + { + "epoch": 43.61, + "learning_rate": 2.8194223118080985e-05, + "loss": 1.2863, + "step": 4132500 + }, + { + "epoch": 43.62, + "learning_rate": 2.8191584790569564e-05, + "loss": 1.3037, + "step": 4133000 + }, + { + "epoch": 43.62, + "learning_rate": 2.818894646305814e-05, + "loss": 1.2843, + "step": 4133500 + }, + { + "epoch": 43.63, + "learning_rate": 2.818630813554671e-05, + "loss": 1.2685, + "step": 4134000 + }, + { + "epoch": 43.63, + "learning_rate": 2.818366980803529e-05, + "loss": 1.3115, + "step": 4134500 + }, + { + "epoch": 43.64, + "learning_rate": 2.8181031480523866e-05, + "loss": 1.2531, + "step": 4135000 + }, + { + "epoch": 43.64, + "learning_rate": 2.8178393153012445e-05, + "loss": 1.2945, + "step": 4135500 + }, + { + "epoch": 43.65, + "learning_rate": 2.817575482550102e-05, + "loss": 1.2239, + "step": 4136000 + }, + { + "epoch": 43.65, + "learning_rate": 2.8173116497989593e-05, + "loss": 1.2265, + "step": 4136500 + }, + { + "epoch": 43.66, + "learning_rate": 2.817047817047817e-05, + "loss": 1.3333, + "step": 4137000 + }, + { + "epoch": 43.66, + "learning_rate": 2.8167839842966747e-05, + "loss": 1.2899, + "step": 4137500 + }, + { + "epoch": 43.67, + "learning_rate": 2.8165201515455326e-05, + "loss": 1.25, + "step": 4138000 + }, + { + "epoch": 43.67, + "learning_rate": 2.81625631879439e-05, + "loss": 1.274, + "step": 4138500 + }, + { + "epoch": 43.68, + "learning_rate": 2.8159924860432473e-05, + "loss": 1.2634, + "step": 4139000 + }, + { + "epoch": 43.69, + "learning_rate": 2.8157286532921052e-05, + "loss": 1.3068, + "step": 4139500 + }, + { + "epoch": 43.69, + "learning_rate": 2.8154648205409628e-05, + "loss": 1.2671, + "step": 4140000 + }, + { + "epoch": 43.7, + "learning_rate": 2.8152009877898207e-05, + "loss": 1.216, + "step": 4140500 + }, + { + "epoch": 43.7, + "learning_rate": 2.8149371550386782e-05, + "loss": 1.2618, + "step": 4141000 + }, + { + "epoch": 43.71, + "learning_rate": 2.8146733222875354e-05, + "loss": 1.2986, + "step": 4141500 + }, + { + "epoch": 43.71, + "learning_rate": 2.8144094895363933e-05, + "loss": 1.2513, + "step": 4142000 + }, + { + "epoch": 43.72, + "learning_rate": 2.814145656785251e-05, + "loss": 1.2792, + "step": 4142500 + }, + { + "epoch": 43.72, + "learning_rate": 2.8138818240341087e-05, + "loss": 1.2605, + "step": 4143000 + }, + { + "epoch": 43.73, + "learning_rate": 2.8136179912829656e-05, + "loss": 1.2428, + "step": 4143500 + }, + { + "epoch": 43.73, + "learning_rate": 2.8133541585318235e-05, + "loss": 1.3134, + "step": 4144000 + }, + { + "epoch": 43.74, + "learning_rate": 2.813090325780681e-05, + "loss": 1.3273, + "step": 4144500 + }, + { + "epoch": 43.74, + "learning_rate": 2.812826493029539e-05, + "loss": 1.3172, + "step": 4145000 + }, + { + "epoch": 43.75, + "learning_rate": 2.8125626602783968e-05, + "loss": 1.2511, + "step": 4145500 + }, + { + "epoch": 43.75, + "learning_rate": 2.8122988275272537e-05, + "loss": 1.3296, + "step": 4146000 + }, + { + "epoch": 43.76, + "learning_rate": 2.8120349947761116e-05, + "loss": 1.2817, + "step": 4146500 + }, + { + "epoch": 43.76, + "learning_rate": 2.811771162024969e-05, + "loss": 1.2177, + "step": 4147000 + }, + { + "epoch": 43.77, + "learning_rate": 2.811507329273827e-05, + "loss": 1.2253, + "step": 4147500 + }, + { + "epoch": 43.78, + "learning_rate": 2.8112434965226846e-05, + "loss": 1.2508, + "step": 4148000 + }, + { + "epoch": 43.78, + "learning_rate": 2.8109796637715418e-05, + "loss": 1.2732, + "step": 4148500 + }, + { + "epoch": 43.79, + "learning_rate": 2.8107158310203997e-05, + "loss": 1.3124, + "step": 4149000 + }, + { + "epoch": 43.79, + "learning_rate": 2.8104519982692572e-05, + "loss": 1.2687, + "step": 4149500 + }, + { + "epoch": 43.8, + "learning_rate": 2.810188165518115e-05, + "loss": 1.2666, + "step": 4150000 + }, + { + "epoch": 43.8, + "learning_rate": 2.8099243327669726e-05, + "loss": 1.3096, + "step": 4150500 + }, + { + "epoch": 43.81, + "learning_rate": 2.80966050001583e-05, + "loss": 1.2895, + "step": 4151000 + }, + { + "epoch": 43.81, + "learning_rate": 2.8093966672646877e-05, + "loss": 1.262, + "step": 4151500 + }, + { + "epoch": 43.82, + "learning_rate": 2.8091328345135453e-05, + "loss": 1.3304, + "step": 4152000 + }, + { + "epoch": 43.82, + "learning_rate": 2.8088690017624032e-05, + "loss": 1.3112, + "step": 4152500 + }, + { + "epoch": 43.83, + "learning_rate": 2.8086051690112604e-05, + "loss": 1.2445, + "step": 4153000 + }, + { + "epoch": 43.83, + "learning_rate": 2.808341336260118e-05, + "loss": 1.2587, + "step": 4153500 + }, + { + "epoch": 43.84, + "learning_rate": 2.8080775035089758e-05, + "loss": 1.2631, + "step": 4154000 + }, + { + "epoch": 43.84, + "learning_rate": 2.8078136707578334e-05, + "loss": 1.2896, + "step": 4154500 + }, + { + "epoch": 43.85, + "learning_rate": 2.8075498380066913e-05, + "loss": 1.2033, + "step": 4155000 + }, + { + "epoch": 43.85, + "learning_rate": 2.807286005255548e-05, + "loss": 1.205, + "step": 4155500 + }, + { + "epoch": 43.86, + "learning_rate": 2.807022172504406e-05, + "loss": 1.2958, + "step": 4156000 + }, + { + "epoch": 43.86, + "learning_rate": 2.806758339753264e-05, + "loss": 1.3089, + "step": 4156500 + }, + { + "epoch": 43.87, + "learning_rate": 2.8064945070021215e-05, + "loss": 1.224, + "step": 4157000 + }, + { + "epoch": 43.88, + "learning_rate": 2.8062306742509793e-05, + "loss": 1.2814, + "step": 4157500 + }, + { + "epoch": 43.88, + "learning_rate": 2.8059668414998362e-05, + "loss": 1.2153, + "step": 4158000 + }, + { + "epoch": 43.89, + "learning_rate": 2.805703008748694e-05, + "loss": 1.2693, + "step": 4158500 + }, + { + "epoch": 43.89, + "learning_rate": 2.8054391759975517e-05, + "loss": 1.2444, + "step": 4159000 + }, + { + "epoch": 43.9, + "learning_rate": 2.8051753432464095e-05, + "loss": 1.2112, + "step": 4159500 + }, + { + "epoch": 43.9, + "learning_rate": 2.804911510495267e-05, + "loss": 1.2973, + "step": 4160000 + }, + { + "epoch": 43.91, + "learning_rate": 2.8046476777441243e-05, + "loss": 1.2887, + "step": 4160500 + }, + { + "epoch": 43.91, + "learning_rate": 2.8043838449929822e-05, + "loss": 1.2492, + "step": 4161000 + }, + { + "epoch": 43.92, + "learning_rate": 2.8041200122418397e-05, + "loss": 1.325, + "step": 4161500 + }, + { + "epoch": 43.92, + "learning_rate": 2.8038561794906976e-05, + "loss": 1.2583, + "step": 4162000 + }, + { + "epoch": 43.93, + "learning_rate": 2.803592346739555e-05, + "loss": 1.2671, + "step": 4162500 + }, + { + "epoch": 43.93, + "learning_rate": 2.8033285139884124e-05, + "loss": 1.2993, + "step": 4163000 + }, + { + "epoch": 43.94, + "learning_rate": 2.8030646812372703e-05, + "loss": 1.2827, + "step": 4163500 + }, + { + "epoch": 43.94, + "learning_rate": 2.8028008484861278e-05, + "loss": 1.2375, + "step": 4164000 + }, + { + "epoch": 43.95, + "learning_rate": 2.8025370157349857e-05, + "loss": 1.2518, + "step": 4164500 + }, + { + "epoch": 43.95, + "learning_rate": 2.802273182983843e-05, + "loss": 1.2935, + "step": 4165000 + }, + { + "epoch": 43.96, + "learning_rate": 2.8020093502327005e-05, + "loss": 1.1985, + "step": 4165500 + }, + { + "epoch": 43.97, + "learning_rate": 2.8017455174815584e-05, + "loss": 1.2837, + "step": 4166000 + }, + { + "epoch": 43.97, + "learning_rate": 2.801481684730416e-05, + "loss": 1.2756, + "step": 4166500 + }, + { + "epoch": 43.98, + "learning_rate": 2.8012178519792738e-05, + "loss": 1.2889, + "step": 4167000 + }, + { + "epoch": 43.98, + "learning_rate": 2.800954019228131e-05, + "loss": 1.2715, + "step": 4167500 + }, + { + "epoch": 43.99, + "learning_rate": 2.8006901864769885e-05, + "loss": 1.2758, + "step": 4168000 + }, + { + "epoch": 43.99, + "learning_rate": 2.8004263537258464e-05, + "loss": 1.2348, + "step": 4168500 + }, + { + "epoch": 44.0, + "learning_rate": 2.800162520974704e-05, + "loss": 1.2239, + "step": 4169000 + }, + { + "epoch": 44.0, + "learning_rate": 2.799898688223562e-05, + "loss": 1.2747, + "step": 4169500 + }, + { + "epoch": 44.01, + "learning_rate": 2.7996348554724187e-05, + "loss": 1.2632, + "step": 4170000 + }, + { + "epoch": 44.01, + "learning_rate": 2.7993710227212766e-05, + "loss": 1.2196, + "step": 4170500 + }, + { + "epoch": 44.02, + "learning_rate": 2.7991071899701342e-05, + "loss": 1.2756, + "step": 4171000 + }, + { + "epoch": 44.02, + "learning_rate": 2.798843357218992e-05, + "loss": 1.2115, + "step": 4171500 + }, + { + "epoch": 44.03, + "learning_rate": 2.7985795244678493e-05, + "loss": 1.2712, + "step": 4172000 + }, + { + "epoch": 44.03, + "learning_rate": 2.7983156917167068e-05, + "loss": 1.2618, + "step": 4172500 + }, + { + "epoch": 44.04, + "learning_rate": 2.7980518589655647e-05, + "loss": 1.2611, + "step": 4173000 + }, + { + "epoch": 44.04, + "learning_rate": 2.7977880262144223e-05, + "loss": 1.2974, + "step": 4173500 + }, + { + "epoch": 44.05, + "learning_rate": 2.79752419346328e-05, + "loss": 1.2199, + "step": 4174000 + }, + { + "epoch": 44.05, + "learning_rate": 2.7972603607121374e-05, + "loss": 1.2339, + "step": 4174500 + }, + { + "epoch": 44.06, + "learning_rate": 2.796996527960995e-05, + "loss": 1.2957, + "step": 4175000 + }, + { + "epoch": 44.07, + "learning_rate": 2.7967326952098528e-05, + "loss": 1.2807, + "step": 4175500 + }, + { + "epoch": 44.07, + "learning_rate": 2.7964688624587103e-05, + "loss": 1.221, + "step": 4176000 + }, + { + "epoch": 44.08, + "learning_rate": 2.7962050297075682e-05, + "loss": 1.2851, + "step": 4176500 + }, + { + "epoch": 44.08, + "learning_rate": 2.7959411969564254e-05, + "loss": 1.2824, + "step": 4177000 + }, + { + "epoch": 44.09, + "learning_rate": 2.795677364205283e-05, + "loss": 1.2459, + "step": 4177500 + }, + { + "epoch": 44.09, + "learning_rate": 2.795413531454141e-05, + "loss": 1.2534, + "step": 4178000 + }, + { + "epoch": 44.1, + "learning_rate": 2.7951496987029984e-05, + "loss": 1.3443, + "step": 4178500 + }, + { + "epoch": 44.1, + "learning_rate": 2.7948858659518563e-05, + "loss": 1.2736, + "step": 4179000 + }, + { + "epoch": 44.11, + "learning_rate": 2.7946220332007135e-05, + "loss": 1.3069, + "step": 4179500 + }, + { + "epoch": 44.11, + "learning_rate": 2.794358200449571e-05, + "loss": 1.2612, + "step": 4180000 + }, + { + "epoch": 44.12, + "learning_rate": 2.794094367698429e-05, + "loss": 1.2397, + "step": 4180500 + }, + { + "epoch": 44.12, + "learning_rate": 2.7938305349472865e-05, + "loss": 1.279, + "step": 4181000 + } + ], + "logging_steps": 500, + "max_steps": 9475700, + "num_train_epochs": 100, + "save_steps": 1000, + "total_flos": 7.09037848506394e+17, + "trial_name": null, + "trial_params": null +}