diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,18826 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.870122878876536, + "global_step": 313500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 7.5249, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 8e-05, + "loss": 7.4245, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012, + "loss": 7.2963, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016, + "loss": 7.1522, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002, + "loss": 7.0025, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 0.00024, + "loss": 6.9945, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 0.00028, + "loss": 6.8504, + "step": 700 + }, + { + "epoch": 0.01, + "learning_rate": 0.00032, + "loss": 6.7442, + "step": 800 + }, + { + "epoch": 0.01, + "learning_rate": 0.00036, + "loss": 6.6172, + "step": 900 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004, + "loss": 6.4995, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003999971000413244, + "loss": 6.4012, + "step": 1100 + }, + { + "epoch": 0.01, + "learning_rate": 0.00039999420008264883, + "loss": 6.3129, + "step": 1200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00039999130012397324, + "loss": 6.2215, + "step": 1300 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003999884001652977, + "loss": 6.1408, + "step": 1400 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003999855002066221, + "loss": 6.0646, + "step": 1500 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003999826002479465, + "loss": 5.9936, + "step": 1600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003999797002892709, + "loss": 5.9392, + "step": 1700 + }, + { + "epoch": 0.01, + "learning_rate": 0.00039997680033059533, + "loss": 5.8718, + "step": 1800 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003999739003719197, + "loss": 5.8278, + "step": 1900 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003999710004132441, + "loss": 5.7876, + "step": 2000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00039996810045456855, + "loss": 5.7336, + "step": 2100 + }, + { + "epoch": 0.02, + "learning_rate": 0.00039996520049589296, + "loss": 5.6916, + "step": 2200 + }, + { + "epoch": 0.02, + "learning_rate": 0.00039996230053721736, + "loss": 5.6522, + "step": 2300 + }, + { + "epoch": 0.02, + "learning_rate": 0.00039995940057854177, + "loss": 5.6134, + "step": 2400 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003999565006198662, + "loss": 5.5815, + "step": 2500 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003999536006611906, + "loss": 5.5551, + "step": 2600 + }, + { + "epoch": 0.02, + "learning_rate": 0.000399950700702515, + "loss": 5.5124, + "step": 2700 + }, + { + "epoch": 0.02, + "learning_rate": 0.00039994780074383945, + "loss": 5.4807, + "step": 2800 + }, + { + "epoch": 0.02, + "learning_rate": 0.00039994490078516386, + "loss": 5.4541, + "step": 2900 + }, + { + "epoch": 0.02, + "learning_rate": 0.00039994200082648827, + "loss": 5.4356, + "step": 3000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00039993910086781267, + "loss": 5.4024, + "step": 3100 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003999362009091371, + "loss": 5.3989, + "step": 3200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003999333009504615, + "loss": 5.3606, + "step": 3300 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003999304009917859, + "loss": 5.347, + "step": 3400 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999275010331103, + "loss": 5.323, + "step": 3500 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999246010744347, + "loss": 5.3208, + "step": 3600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999217011157591, + "loss": 5.2871, + "step": 3700 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999188011570835, + "loss": 5.281, + "step": 3800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999159011984079, + "loss": 5.2553, + "step": 3900 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039991300123973233, + "loss": 5.2373, + "step": 4000 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039991010128105674, + "loss": 5.2314, + "step": 4100 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039990720132238115, + "loss": 5.22, + "step": 4200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003999043013637056, + "loss": 5.2, + "step": 4300 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039990140140503, + "loss": 5.1779, + "step": 4400 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003998985014463544, + "loss": 5.1744, + "step": 4500 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039989560148767883, + "loss": 5.1456, + "step": 4600 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039989270152900324, + "loss": 5.1417, + "step": 4700 + }, + { + "epoch": 0.03, + "learning_rate": 0.00039988980157032764, + "loss": 5.1384, + "step": 4800 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039988690161165205, + "loss": 5.1298, + "step": 4900 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039988400165297646, + "loss": 5.1061, + "step": 5000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039988110169430086, + "loss": 5.1056, + "step": 5100 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039987820173562527, + "loss": 5.0904, + "step": 5200 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003998753017769497, + "loss": 5.0778, + "step": 5300 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003998724018182741, + "loss": 5.0676, + "step": 5400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003998695018595985, + "loss": 5.0624, + "step": 5500 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003998666019009229, + "loss": 5.0432, + "step": 5600 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039986370194224736, + "loss": 5.0553, + "step": 5700 + }, + { + "epoch": 0.04, + "learning_rate": 0.00039986080198357177, + "loss": 5.0277, + "step": 5800 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003998579020248962, + "loss": 5.0281, + "step": 5900 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003998550020662206, + "loss": 5.0132, + "step": 6000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000399852102107545, + "loss": 5.0255, + "step": 6100 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003998492021488694, + "loss": 5.0, + "step": 6200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003998463021901938, + "loss": 4.9881, + "step": 6300 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039984340223151826, + "loss": 4.9915, + "step": 6400 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039984050227284267, + "loss": 4.9707, + "step": 6500 + }, + { + "epoch": 0.05, + "learning_rate": 0.000399837602314167, + "loss": 4.9711, + "step": 6600 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039983470235549143, + "loss": 4.9566, + "step": 6700 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039983180239681584, + "loss": 4.9628, + "step": 6800 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039982890243814024, + "loss": 4.9445, + "step": 6900 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039982600247946465, + "loss": 4.9404, + "step": 7000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003998231025207891, + "loss": 4.947, + "step": 7100 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003998202025621135, + "loss": 4.9271, + "step": 7200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003998173026034379, + "loss": 4.927, + "step": 7300 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039981440264476233, + "loss": 4.9295, + "step": 7400 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039981150268608674, + "loss": 4.9216, + "step": 7500 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039980860272741114, + "loss": 4.8989, + "step": 7600 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039980570276873555, + "loss": 4.8931, + "step": 7700 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039980280281006, + "loss": 4.898, + "step": 7800 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003997999028513844, + "loss": 4.8746, + "step": 7900 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003997970028927088, + "loss": 4.8949, + "step": 8000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039979410293403323, + "loss": 4.8863, + "step": 8100 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039979120297535764, + "loss": 4.876, + "step": 8200 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039978830301668205, + "loss": 4.8615, + "step": 8300 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003997854030580064, + "loss": 4.8695, + "step": 8400 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039978250309933086, + "loss": 4.8451, + "step": 8500 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039977960314065527, + "loss": 4.8589, + "step": 8600 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003997767031819797, + "loss": 4.8441, + "step": 8700 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003997738032233041, + "loss": 4.8367, + "step": 8800 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003997709032646285, + "loss": 4.8329, + "step": 8900 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003997680033059529, + "loss": 4.8386, + "step": 9000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003997651033472773, + "loss": 4.8264, + "step": 9100 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039976220338860176, + "loss": 4.8295, + "step": 9200 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039975930342992617, + "loss": 4.8141, + "step": 9300 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003997564034712506, + "loss": 4.8151, + "step": 9400 + }, + { + "epoch": 0.07, + "learning_rate": 0.000399753503512575, + "loss": 4.8054, + "step": 9500 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003997506035538994, + "loss": 4.8111, + "step": 9600 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003997477035952238, + "loss": 4.8031, + "step": 9700 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003997448036365482, + "loss": 4.7996, + "step": 9800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003997419036778726, + "loss": 4.7805, + "step": 9900 + }, + { + "epoch": 0.07, + "learning_rate": 0.000399739003719197, + "loss": 4.7895, + "step": 10000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003997361037605214, + "loss": 4.7956, + "step": 10100 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039973320380184583, + "loss": 4.786, + "step": 10200 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039973030384317024, + "loss": 4.779, + "step": 10300 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039972740388449465, + "loss": 4.7627, + "step": 10400 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039972450392581905, + "loss": 4.7751, + "step": 10500 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039972160396714346, + "loss": 4.7646, + "step": 10600 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003997187040084679, + "loss": 4.7597, + "step": 10700 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003997158040497923, + "loss": 4.7488, + "step": 10800 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039971290409111673, + "loss": 4.7669, + "step": 10900 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039971000413244114, + "loss": 4.7648, + "step": 11000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039970710417376555, + "loss": 4.7447, + "step": 11100 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039970420421508995, + "loss": 4.758, + "step": 11200 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039970130425641436, + "loss": 4.7358, + "step": 11300 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039969840429773877, + "loss": 4.7366, + "step": 11400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003996955043390632, + "loss": 4.7382, + "step": 11500 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003996926043803876, + "loss": 4.7178, + "step": 11600 + }, + { + "epoch": 0.08, + "learning_rate": 0.000399689704421712, + "loss": 4.7148, + "step": 11700 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003996868044630364, + "loss": 4.7175, + "step": 11800 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003996839045043608, + "loss": 4.7243, + "step": 11900 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003996810045456852, + "loss": 4.7101, + "step": 12000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039967810458700967, + "loss": 4.7186, + "step": 12100 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003996752046283341, + "loss": 4.7145, + "step": 12200 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003996723046696585, + "loss": 4.7153, + "step": 12300 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003996694047109829, + "loss": 4.7011, + "step": 12400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003996665047523073, + "loss": 4.7046, + "step": 12500 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003996636047936317, + "loss": 4.7093, + "step": 12600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003996607048349561, + "loss": 4.7058, + "step": 12700 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003996578048762806, + "loss": 4.6925, + "step": 12800 + }, + { + "epoch": 0.09, + "learning_rate": 0.000399654904917605, + "loss": 4.6863, + "step": 12900 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003996520049589294, + "loss": 4.6947, + "step": 13000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039964910500025374, + "loss": 4.6748, + "step": 13100 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039964620504157815, + "loss": 4.6839, + "step": 13200 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039964330508290255, + "loss": 4.6826, + "step": 13300 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039964040512422696, + "loss": 4.6741, + "step": 13400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003996375051655514, + "loss": 4.6974, + "step": 13500 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039963460520687583, + "loss": 4.6824, + "step": 13600 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039963170524820023, + "loss": 4.6565, + "step": 13700 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039962880528952464, + "loss": 4.6693, + "step": 13800 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039962590533084905, + "loss": 4.6679, + "step": 13900 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039962300537217346, + "loss": 4.675, + "step": 14000 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039962010541349786, + "loss": 4.6514, + "step": 14100 + }, + { + "epoch": 0.1, + "learning_rate": 0.000399617234454409, + "loss": 4.6673, + "step": 14200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003996143344957334, + "loss": 4.663, + "step": 14300 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039961143453705783, + "loss": 4.6478, + "step": 14400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003996085345783823, + "loss": 4.6534, + "step": 14500 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003996056346197067, + "loss": 4.6648, + "step": 14600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003996027346610311, + "loss": 4.6412, + "step": 14700 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003995998347023555, + "loss": 4.6512, + "step": 14800 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003995969347436799, + "loss": 4.643, + "step": 14900 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039959403478500433, + "loss": 4.646, + "step": 15000 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039959113482632873, + "loss": 4.6289, + "step": 15100 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003995882348676532, + "loss": 4.6295, + "step": 15200 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003995853349089776, + "loss": 4.6296, + "step": 15300 + }, + { + "epoch": 0.11, + "learning_rate": 0.000399582434950302, + "loss": 4.6334, + "step": 15400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003995795349916264, + "loss": 4.6282, + "step": 15500 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003995766350329508, + "loss": 4.6187, + "step": 15600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003995737350742752, + "loss": 4.6249, + "step": 15700 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003995708351155996, + "loss": 4.6235, + "step": 15800 + }, + { + "epoch": 0.12, + "learning_rate": 0.000399567935156924, + "loss": 4.6273, + "step": 15900 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039956503519824845, + "loss": 4.6227, + "step": 16000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003995621642391596, + "loss": 4.6248, + "step": 16100 + }, + { + "epoch": 0.12, + "learning_rate": 0.000399559264280484, + "loss": 4.6205, + "step": 16200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003995563643218085, + "loss": 4.6148, + "step": 16300 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003995534643631329, + "loss": 4.6072, + "step": 16400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003995505644044573, + "loss": 4.6021, + "step": 16500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039954766444578164, + "loss": 4.6062, + "step": 16600 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039954476448710605, + "loss": 4.6003, + "step": 16700 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039954186452843045, + "loss": 4.5972, + "step": 16800 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039953896456975486, + "loss": 4.5961, + "step": 16900 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003995360646110793, + "loss": 4.595, + "step": 17000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039953316465240373, + "loss": 4.6016, + "step": 17100 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039953026469372814, + "loss": 4.5977, + "step": 17200 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039952736473505254, + "loss": 4.5895, + "step": 17300 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039952446477637695, + "loss": 4.5916, + "step": 17400 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039952156481770136, + "loss": 4.5927, + "step": 17500 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039951866485902576, + "loss": 4.5807, + "step": 17600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003995157649003502, + "loss": 4.583, + "step": 17700 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039951286494167463, + "loss": 4.5926, + "step": 17800 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039950996498299904, + "loss": 4.5893, + "step": 17900 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039950706502432345, + "loss": 4.5956, + "step": 18000 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039950416506564785, + "loss": 4.5921, + "step": 18100 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039950126510697226, + "loss": 4.5907, + "step": 18200 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003994983651482966, + "loss": 4.5894, + "step": 18300 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039949546518962107, + "loss": 4.5749, + "step": 18400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003994925652309455, + "loss": 4.5697, + "step": 18500 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003994896652722699, + "loss": 4.5698, + "step": 18600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003994867653135943, + "loss": 4.5787, + "step": 18700 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003994838653549187, + "loss": 4.5683, + "step": 18800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003994809653962431, + "loss": 4.578, + "step": 18900 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003994780654375675, + "loss": 4.5643, + "step": 19000 + }, + { + "epoch": 0.14, + "learning_rate": 0.000399475165478892, + "loss": 4.5671, + "step": 19100 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003994722655202164, + "loss": 4.5668, + "step": 19200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003994693655615408, + "loss": 4.5718, + "step": 19300 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003994664656028652, + "loss": 4.5703, + "step": 19400 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003994635656441896, + "loss": 4.5647, + "step": 19500 + }, + { + "epoch": 0.14, + "learning_rate": 0.000399460665685514, + "loss": 4.5664, + "step": 19600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003994577657268384, + "loss": 4.5551, + "step": 19700 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003994548657681628, + "loss": 4.5567, + "step": 19800 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039945196580948723, + "loss": 4.5506, + "step": 19900 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039944906585081164, + "loss": 4.554, + "step": 20000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003994461948917228, + "loss": 4.56, + "step": 20100 + }, + { + "epoch": 0.15, + "learning_rate": 0.00039944329493304725, + "loss": 4.5506, + "step": 20200 + }, + { + "epoch": 0.15, + "learning_rate": 0.00039944039497437166, + "loss": 4.5531, + "step": 20300 + }, + { + "epoch": 0.15, + "learning_rate": 0.00039943752401528276, + "loss": 4.5437, + "step": 20400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003994346240566072, + "loss": 4.5508, + "step": 20500 + }, + { + "epoch": 0.15, + "learning_rate": 0.00039943172409793163, + "loss": 4.5415, + "step": 20600 + }, + { + "epoch": 0.15, + "learning_rate": 0.00039942882413925604, + "loss": 4.5366, + "step": 20700 + }, + { + "epoch": 0.15, + "learning_rate": 0.00039942592418058044, + "loss": 4.548, + "step": 20800 + }, + { + "epoch": 0.15, + "learning_rate": 0.00039942302422190485, + "loss": 4.5344, + "step": 20900 + }, + { + "epoch": 0.15, + "learning_rate": 0.00039942012426322926, + "loss": 4.5442, + "step": 21000 + }, + { + "epoch": 0.15, + "learning_rate": 0.00039941722430455366, + "loss": 4.5482, + "step": 21100 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003994143243458781, + "loss": 4.5418, + "step": 21200 + }, + { + "epoch": 0.15, + "learning_rate": 0.00039941142438720253, + "loss": 4.5367, + "step": 21300 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039940852442852694, + "loss": 4.5358, + "step": 21400 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039940562446985135, + "loss": 4.5447, + "step": 21500 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039940272451117575, + "loss": 4.526, + "step": 21600 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039939982455250016, + "loss": 4.5345, + "step": 21700 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003993969245938245, + "loss": 4.5279, + "step": 21800 + }, + { + "epoch": 0.16, + "learning_rate": 0.000399394024635149, + "loss": 4.5342, + "step": 21900 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003993911246764734, + "loss": 4.5548, + "step": 22000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003993882247177978, + "loss": 4.5387, + "step": 22100 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003993853247591222, + "loss": 4.5268, + "step": 22200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003993824248004466, + "loss": 4.5262, + "step": 22300 + }, + { + "epoch": 0.16, + "learning_rate": 0.000399379524841771, + "loss": 4.5308, + "step": 22400 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003993766248830954, + "loss": 4.5193, + "step": 22500 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003993737249244199, + "loss": 4.5265, + "step": 22600 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003993708249657443, + "loss": 4.5237, + "step": 22700 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003993679250070687, + "loss": 4.5195, + "step": 22800 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003993650250483931, + "loss": 4.519, + "step": 22900 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003993621250897175, + "loss": 4.5273, + "step": 23000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003993592251310419, + "loss": 4.5212, + "step": 23100 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003993563251723663, + "loss": 4.5279, + "step": 23200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003993534252136907, + "loss": 4.5119, + "step": 23300 + }, + { + "epoch": 0.17, + "learning_rate": 0.00039935052525501513, + "loss": 4.521, + "step": 23400 + }, + { + "epoch": 0.17, + "learning_rate": 0.00039934762529633954, + "loss": 4.5117, + "step": 23500 + }, + { + "epoch": 0.17, + "learning_rate": 0.00039934472533766395, + "loss": 4.5088, + "step": 23600 + }, + { + "epoch": 0.17, + "learning_rate": 0.00039934182537898835, + "loss": 4.5179, + "step": 23700 + }, + { + "epoch": 0.17, + "learning_rate": 0.00039933892542031276, + "loss": 4.5149, + "step": 23800 + }, + { + "epoch": 0.17, + "learning_rate": 0.00039933602546163717, + "loss": 4.5133, + "step": 23900 + }, + { + "epoch": 0.17, + "learning_rate": 0.00039933312550296157, + "loss": 4.507, + "step": 24000 + }, + { + "epoch": 0.17, + "learning_rate": 0.00039933022554428603, + "loss": 4.5053, + "step": 24100 + }, + { + "epoch": 0.18, + "learning_rate": 0.00039932732558561044, + "loss": 4.5091, + "step": 24200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003993244546265216, + "loss": 4.5171, + "step": 24300 + }, + { + "epoch": 0.18, + "learning_rate": 0.000399321554667846, + "loss": 4.5014, + "step": 24400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003993186547091704, + "loss": 4.4979, + "step": 24500 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003993157547504948, + "loss": 4.5012, + "step": 24600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003993128547918192, + "loss": 4.4973, + "step": 24700 + }, + { + "epoch": 0.18, + "learning_rate": 0.00039930995483314363, + "loss": 4.5032, + "step": 24800 + }, + { + "epoch": 0.18, + "learning_rate": 0.00039930705487446804, + "loss": 4.499, + "step": 24900 + }, + { + "epoch": 0.18, + "learning_rate": 0.00039930415491579244, + "loss": 4.504, + "step": 25000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003993012549571169, + "loss": 4.4902, + "step": 25100 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003992983549984413, + "loss": 4.5014, + "step": 25200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003992954550397657, + "loss": 4.4936, + "step": 25300 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003992925550810901, + "loss": 4.4869, + "step": 25400 + }, + { + "epoch": 0.18, + "learning_rate": 0.00039928965512241453, + "loss": 4.4961, + "step": 25500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00039928675516373894, + "loss": 4.4884, + "step": 25600 + }, + { + "epoch": 0.19, + "learning_rate": 0.00039928385520506335, + "loss": 4.4965, + "step": 25700 + }, + { + "epoch": 0.19, + "learning_rate": 0.00039928095524638775, + "loss": 4.4952, + "step": 25800 + }, + { + "epoch": 0.19, + "learning_rate": 0.00039927805528771216, + "loss": 4.5, + "step": 25900 + }, + { + "epoch": 0.19, + "learning_rate": 0.00039927515532903657, + "loss": 4.4953, + "step": 26000 + }, + { + "epoch": 0.19, + "learning_rate": 0.000399272255370361, + "loss": 4.4753, + "step": 26100 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003992693554116854, + "loss": 4.4931, + "step": 26200 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003992664554530098, + "loss": 4.4685, + "step": 26300 + }, + { + "epoch": 0.19, + "learning_rate": 0.000399263584493921, + "loss": 4.475, + "step": 26400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003992606845352454, + "loss": 4.486, + "step": 26500 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003992577845765698, + "loss": 4.4834, + "step": 26600 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003992548846178942, + "loss": 4.4812, + "step": 26700 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003992519846592186, + "loss": 4.481, + "step": 26800 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039924908470054303, + "loss": 4.4975, + "step": 26900 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039924618474186744, + "loss": 4.4752, + "step": 27000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039924328478319185, + "loss": 4.4828, + "step": 27100 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039924038482451625, + "loss": 4.4886, + "step": 27200 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039923748486584066, + "loss": 4.4786, + "step": 27300 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039923458490716507, + "loss": 4.4813, + "step": 27400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003992316849484895, + "loss": 4.4642, + "step": 27500 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039922878498981393, + "loss": 4.4782, + "step": 27600 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039922588503113834, + "loss": 4.4758, + "step": 27700 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039922298507246275, + "loss": 4.4762, + "step": 27800 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039922008511378716, + "loss": 4.4768, + "step": 27900 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039921718515511156, + "loss": 4.4591, + "step": 28000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00039921428519643597, + "loss": 4.479, + "step": 28100 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003992113852377604, + "loss": 4.4681, + "step": 28200 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003992084852790848, + "loss": 4.4625, + "step": 28300 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003992055853204092, + "loss": 4.4815, + "step": 28400 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003992026853617336, + "loss": 4.4657, + "step": 28500 + }, + { + "epoch": 0.21, + "learning_rate": 0.000399199785403058, + "loss": 4.4603, + "step": 28600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003991968854443824, + "loss": 4.4598, + "step": 28700 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003991939854857068, + "loss": 4.4784, + "step": 28800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003991910855270312, + "loss": 4.4623, + "step": 28900 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003991881855683557, + "loss": 4.4726, + "step": 29000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003991852856096801, + "loss": 4.4721, + "step": 29100 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003991823856510045, + "loss": 4.4544, + "step": 29200 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003991794856923289, + "loss": 4.4704, + "step": 29300 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003991765857336533, + "loss": 4.4737, + "step": 29400 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003991736857749777, + "loss": 4.4494, + "step": 29500 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003991707858163021, + "loss": 4.4581, + "step": 29600 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003991678858576266, + "loss": 4.4576, + "step": 29700 + }, + { + "epoch": 0.22, + "learning_rate": 0.000399164985898951, + "loss": 4.4729, + "step": 29800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003991620859402754, + "loss": 4.4587, + "step": 29900 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003991591859815998, + "loss": 4.4684, + "step": 30000 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039915628602292416, + "loss": 4.4572, + "step": 30100 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039915338606424857, + "loss": 4.454, + "step": 30200 + }, + { + "epoch": 0.22, + "learning_rate": 0.000399150486105573, + "loss": 4.4692, + "step": 30300 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003991476151464842, + "loss": 4.462, + "step": 30400 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003991447151878086, + "loss": 4.4503, + "step": 30500 + }, + { + "epoch": 0.22, + "learning_rate": 0.000399141815229133, + "loss": 4.4513, + "step": 30600 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039913891527045746, + "loss": 4.4542, + "step": 30700 + }, + { + "epoch": 0.22, + "learning_rate": 0.00039913601531178187, + "loss": 4.4477, + "step": 30800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003991331153531062, + "loss": 4.4425, + "step": 30900 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003991302153944306, + "loss": 4.4414, + "step": 31000 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039912734443534184, + "loss": 4.4521, + "step": 31100 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039912444447666624, + "loss": 4.4455, + "step": 31200 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039912154451799065, + "loss": 4.446, + "step": 31300 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039911864455931506, + "loss": 4.453, + "step": 31400 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039911574460063946, + "loss": 4.4424, + "step": 31500 + }, + { + "epoch": 0.23, + "learning_rate": 0.00039911284464196387, + "loss": 4.4522, + "step": 31600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003991099446832883, + "loss": 4.4638, + "step": 31700 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003991070447246127, + "loss": 4.453, + "step": 31800 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003991041447659371, + "loss": 4.4428, + "step": 31900 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003991012448072615, + "loss": 4.4468, + "step": 32000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003990983448485859, + "loss": 4.4442, + "step": 32100 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003990954448899103, + "loss": 4.4541, + "step": 32200 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003990925449312347, + "loss": 4.4468, + "step": 32300 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003990896449725591, + "loss": 4.4621, + "step": 32400 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003990867450138836, + "loss": 4.4444, + "step": 32500 + }, + { + "epoch": 0.24, + "learning_rate": 0.000399083845055208, + "loss": 4.4453, + "step": 32600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003990809450965324, + "loss": 4.4377, + "step": 32700 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003990780451378568, + "loss": 4.4415, + "step": 32800 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003990751451791812, + "loss": 4.4396, + "step": 32900 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003990722452205056, + "loss": 4.4329, + "step": 33000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039906934526183003, + "loss": 4.4411, + "step": 33100 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003990664453031545, + "loss": 4.4406, + "step": 33200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003990635743440656, + "loss": 4.4348, + "step": 33300 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039906067438539, + "loss": 4.4598, + "step": 33400 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039905777442671446, + "loss": 4.4428, + "step": 33500 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039905487446803887, + "loss": 4.4196, + "step": 33600 + }, + { + "epoch": 0.24, + "learning_rate": 0.00039905197450936327, + "loss": 4.444, + "step": 33700 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003990490745506877, + "loss": 4.4447, + "step": 33800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003990461745920121, + "loss": 4.4396, + "step": 33900 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003990432746333365, + "loss": 4.4386, + "step": 34000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003990403746746609, + "loss": 4.4236, + "step": 34100 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039903747471598536, + "loss": 4.4409, + "step": 34200 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039903457475730977, + "loss": 4.4217, + "step": 34300 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003990316747986341, + "loss": 4.4228, + "step": 34400 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039902877483995853, + "loss": 4.4377, + "step": 34500 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039902587488128293, + "loss": 4.4298, + "step": 34600 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039902297492260734, + "loss": 4.4369, + "step": 34700 + }, + { + "epoch": 0.25, + "learning_rate": 0.00039902007496393175, + "loss": 4.4349, + "step": 34800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003990171750052562, + "loss": 4.4325, + "step": 34900 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003990142750465806, + "loss": 4.4373, + "step": 35000 + }, + { + "epoch": 0.25, + "learning_rate": 0.000399011375087905, + "loss": 4.4358, + "step": 35100 + }, + { + "epoch": 0.26, + "learning_rate": 0.00039900847512922943, + "loss": 4.433, + "step": 35200 + }, + { + "epoch": 0.26, + "learning_rate": 0.00039900557517055384, + "loss": 4.4364, + "step": 35300 + }, + { + "epoch": 0.26, + "learning_rate": 0.00039900267521187824, + "loss": 4.4277, + "step": 35400 + }, + { + "epoch": 0.26, + "learning_rate": 0.00039899977525320265, + "loss": 4.4235, + "step": 35500 + }, + { + "epoch": 0.26, + "learning_rate": 0.00039899687529452706, + "loss": 4.4316, + "step": 35600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003989939753358515, + "loss": 4.4371, + "step": 35700 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003989910753771759, + "loss": 4.4453, + "step": 35800 + }, + { + "epoch": 0.26, + "learning_rate": 0.00039898817541850033, + "loss": 4.4217, + "step": 35900 + }, + { + "epoch": 0.26, + "learning_rate": 0.00039898527545982474, + "loss": 4.4165, + "step": 36000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003989824045007359, + "loss": 4.4134, + "step": 36100 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003989795045420603, + "loss": 4.4362, + "step": 36200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003989766045833847, + "loss": 4.4136, + "step": 36300 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003989737046247091, + "loss": 4.424, + "step": 36400 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003989708046660335, + "loss": 4.4294, + "step": 36500 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039896790470735793, + "loss": 4.4104, + "step": 36600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003989650047486824, + "loss": 4.4121, + "step": 36700 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003989621047900068, + "loss": 4.4182, + "step": 36800 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003989592048313312, + "loss": 4.4178, + "step": 36900 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003989563048726556, + "loss": 4.4155, + "step": 37000 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039895340491397996, + "loss": 4.4153, + "step": 37100 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039895050495530437, + "loss": 4.4201, + "step": 37200 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003989476049966288, + "loss": 4.405, + "step": 37300 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039894470503795324, + "loss": 4.4177, + "step": 37400 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039894180507927765, + "loss": 4.4167, + "step": 37500 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039893890512060205, + "loss": 4.4321, + "step": 37600 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039893600516192646, + "loss": 4.4311, + "step": 37700 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039893310520325087, + "loss": 4.4272, + "step": 37800 + }, + { + "epoch": 0.27, + "learning_rate": 0.00039893020524457527, + "loss": 4.4111, + "step": 37900 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003989273052858997, + "loss": 4.4138, + "step": 38000 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039892440532722414, + "loss": 4.425, + "step": 38100 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039892150536854855, + "loss": 4.4153, + "step": 38200 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039891860540987295, + "loss": 4.4175, + "step": 38300 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039891570545119736, + "loss": 4.4099, + "step": 38400 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039891280549252177, + "loss": 4.4074, + "step": 38500 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003989099345334329, + "loss": 4.4198, + "step": 38600 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039890703457475733, + "loss": 4.402, + "step": 38700 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039890413461608174, + "loss": 4.422, + "step": 38800 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039890123465740614, + "loss": 4.4098, + "step": 38900 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039889833469873055, + "loss": 4.4247, + "step": 39000 + }, + { + "epoch": 0.28, + "learning_rate": 0.00039889543474005496, + "loss": 4.4148, + "step": 39100 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003988925347813794, + "loss": 4.4229, + "step": 39200 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003988896348227038, + "loss": 4.4047, + "step": 39300 + }, + { + "epoch": 0.29, + "learning_rate": 0.00039888673486402823, + "loss": 4.4182, + "step": 39400 + }, + { + "epoch": 0.29, + "learning_rate": 0.00039888383490535264, + "loss": 4.4045, + "step": 39500 + }, + { + "epoch": 0.29, + "learning_rate": 0.00039888093494667705, + "loss": 4.3967, + "step": 39600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988780349880014, + "loss": 4.4076, + "step": 39700 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988751350293258, + "loss": 4.4214, + "step": 39800 + }, + { + "epoch": 0.29, + "learning_rate": 0.00039887223507065027, + "loss": 4.4008, + "step": 39900 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988693351119747, + "loss": 4.4068, + "step": 40000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988664351532991, + "loss": 4.4005, + "step": 40100 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988635351946235, + "loss": 4.4085, + "step": 40200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988606352359479, + "loss": 4.3981, + "step": 40300 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988577352772723, + "loss": 4.4089, + "step": 40400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003988548353185967, + "loss": 4.4073, + "step": 40500 + }, + { + "epoch": 0.29, + "learning_rate": 0.00039885193535992117, + "loss": 4.3983, + "step": 40600 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003988490354012456, + "loss": 4.4018, + "step": 40700 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039884613544257, + "loss": 4.4078, + "step": 40800 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003988432354838944, + "loss": 4.4, + "step": 40900 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003988403355252188, + "loss": 4.4072, + "step": 41000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003988374355665432, + "loss": 4.4035, + "step": 41100 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003988345356078676, + "loss": 4.4031, + "step": 41200 + }, + { + "epoch": 0.3, + "learning_rate": 0.000398831635649192, + "loss": 4.4069, + "step": 41300 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003988287646901032, + "loss": 4.4018, + "step": 41400 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003988258647314276, + "loss": 4.3995, + "step": 41500 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039882296477275204, + "loss": 4.3974, + "step": 41600 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039882006481407645, + "loss": 4.3838, + "step": 41700 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039881716485540086, + "loss": 4.3974, + "step": 41800 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039881426489672526, + "loss": 4.3962, + "step": 41900 + }, + { + "epoch": 0.3, + "learning_rate": 0.00039881136493804967, + "loss": 4.3919, + "step": 42000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003988084649793741, + "loss": 4.4028, + "step": 42100 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003988055650206985, + "loss": 4.3955, + "step": 42200 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039880269406160964, + "loss": 4.3963, + "step": 42300 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039879979410293405, + "loss": 4.3949, + "step": 42400 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039879689414425845, + "loss": 4.4055, + "step": 42500 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039879399418558286, + "loss": 4.4035, + "step": 42600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003987910942269073, + "loss": 4.3877, + "step": 42700 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039878819426823173, + "loss": 4.3861, + "step": 42800 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039878529430955613, + "loss": 4.3848, + "step": 42900 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039878239435088054, + "loss": 4.3957, + "step": 43000 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039877949439220495, + "loss": 4.3857, + "step": 43100 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003987765944335293, + "loss": 4.3898, + "step": 43200 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003987736944748537, + "loss": 4.3961, + "step": 43300 + }, + { + "epoch": 0.31, + "learning_rate": 0.00039877079451617817, + "loss": 4.3806, + "step": 43400 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003987678945575026, + "loss": 4.3897, + "step": 43500 + }, + { + "epoch": 0.32, + "learning_rate": 0.000398764994598827, + "loss": 4.4047, + "step": 43600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003987620946401514, + "loss": 4.3974, + "step": 43700 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003987591946814758, + "loss": 4.3858, + "step": 43800 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003987562947228002, + "loss": 4.4, + "step": 43900 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003987533947641246, + "loss": 4.3958, + "step": 44000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039875049480544907, + "loss": 4.3926, + "step": 44100 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003987475948467735, + "loss": 4.388, + "step": 44200 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003987446948880979, + "loss": 4.3938, + "step": 44300 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003987417949294223, + "loss": 4.3846, + "step": 44400 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003987388949707467, + "loss": 4.389, + "step": 44500 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003987359950120711, + "loss": 4.3866, + "step": 44600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003987330950533955, + "loss": 4.3773, + "step": 44700 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003987301950947199, + "loss": 4.3949, + "step": 44800 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003987272951360443, + "loss": 4.3797, + "step": 44900 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039872439517736873, + "loss": 4.384, + "step": 45000 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039872149521869314, + "loss": 4.3934, + "step": 45100 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039871859526001755, + "loss": 4.3909, + "step": 45200 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039871569530134195, + "loss": 4.3728, + "step": 45300 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039871279534266636, + "loss": 4.395, + "step": 45400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003987098953839908, + "loss": 4.3871, + "step": 45500 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039870699542531523, + "loss": 4.3832, + "step": 45600 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039870409546663964, + "loss": 4.3742, + "step": 45700 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039870119550796404, + "loss": 4.381, + "step": 45800 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039869829554928845, + "loss": 4.3971, + "step": 45900 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039869539559061286, + "loss": 4.3802, + "step": 46000 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039869249563193726, + "loss": 4.3745, + "step": 46100 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039868959567326167, + "loss": 4.3812, + "step": 46200 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003986866957145861, + "loss": 4.3976, + "step": 46300 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003986837957559105, + "loss": 4.386, + "step": 46400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003986808957972349, + "loss": 4.3642, + "step": 46500 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003986779958385593, + "loss": 4.3862, + "step": 46600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003986750958798837, + "loss": 4.3941, + "step": 46700 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003986721959212081, + "loss": 4.379, + "step": 46800 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039866929596253257, + "loss": 4.3804, + "step": 46900 + }, + { + "epoch": 0.34, + "learning_rate": 0.000398666396003857, + "loss": 4.3729, + "step": 47000 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039866352504476813, + "loss": 4.3822, + "step": 47100 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039866062508609254, + "loss": 4.3843, + "step": 47200 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039865772512741695, + "loss": 4.3838, + "step": 47300 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039865482516874136, + "loss": 4.3859, + "step": 47400 + }, + { + "epoch": 0.34, + "learning_rate": 0.00039865192521006576, + "loss": 4.3724, + "step": 47500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039864902525139017, + "loss": 4.3892, + "step": 47600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003986461252927146, + "loss": 4.3672, + "step": 47700 + }, + { + "epoch": 0.35, + "learning_rate": 0.000398643225334039, + "loss": 4.3651, + "step": 47800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003986403253753634, + "loss": 4.3786, + "step": 47900 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039863742541668785, + "loss": 4.3967, + "step": 48000 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039863452545801226, + "loss": 4.385, + "step": 48100 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039863162549933666, + "loss": 4.3676, + "step": 48200 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039862872554066107, + "loss": 4.3751, + "step": 48300 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003986258255819855, + "loss": 4.3833, + "step": 48400 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003986229256233099, + "loss": 4.3806, + "step": 48500 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003986200256646343, + "loss": 4.3697, + "step": 48600 + }, + { + "epoch": 0.35, + "learning_rate": 0.00039861712570595875, + "loss": 4.3775, + "step": 48700 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003986142257472831, + "loss": 4.3717, + "step": 48800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003986113257886075, + "loss": 4.3814, + "step": 48900 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003986084258299319, + "loss": 4.367, + "step": 49000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003986055258712563, + "loss": 4.3737, + "step": 49100 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039860265491216754, + "loss": 4.3704, + "step": 49200 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039859975495349194, + "loss": 4.3675, + "step": 49300 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039859685499481635, + "loss": 4.3789, + "step": 49400 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039859395503614076, + "loss": 4.3818, + "step": 49500 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039859105507746516, + "loss": 4.3611, + "step": 49600 + }, + { + "epoch": 0.36, + "learning_rate": 0.00039858815511878957, + "loss": 4.367, + "step": 49700 + }, + { + "epoch": 0.36, + "learning_rate": 0.000398585255160114, + "loss": 4.3723, + "step": 49800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003985823552014384, + "loss": 4.3899, + "step": 49900 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003985794552427628, + "loss": 4.3712, + "step": 50000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003985765552840872, + "loss": 4.3684, + "step": 50100 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003985736553254116, + "loss": 4.3575, + "step": 50200 + }, + { + "epoch": 0.36, + "learning_rate": 0.000398570755366736, + "loss": 4.3683, + "step": 50300 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003985678554080605, + "loss": 4.3736, + "step": 50400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003985649554493849, + "loss": 4.3733, + "step": 50500 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003985620554907093, + "loss": 4.371, + "step": 50600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003985591555320337, + "loss": 4.3776, + "step": 50700 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003985562555733581, + "loss": 4.3645, + "step": 50800 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003985533556146825, + "loss": 4.3786, + "step": 50900 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003985504556560069, + "loss": 4.3601, + "step": 51000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003985475556973314, + "loss": 4.3573, + "step": 51100 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003985446557386558, + "loss": 4.3657, + "step": 51200 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003985417847795669, + "loss": 4.3718, + "step": 51300 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003985388848208913, + "loss": 4.3632, + "step": 51400 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039853598486221575, + "loss": 4.3697, + "step": 51500 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039853308490354016, + "loss": 4.372, + "step": 51600 + }, + { + "epoch": 0.37, + "learning_rate": 0.00039853018494486457, + "loss": 4.3683, + "step": 51700 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039852728498618897, + "loss": 4.3676, + "step": 51800 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003985243850275134, + "loss": 4.3665, + "step": 51900 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003985214850688378, + "loss": 4.3636, + "step": 52000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003985185851101622, + "loss": 4.3774, + "step": 52100 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039851568515148665, + "loss": 4.3563, + "step": 52200 + }, + { + "epoch": 0.38, + "learning_rate": 0.000398512785192811, + "loss": 4.3693, + "step": 52300 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003985098852341354, + "loss": 4.3636, + "step": 52400 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003985069852754598, + "loss": 4.3713, + "step": 52500 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039850408531678423, + "loss": 4.361, + "step": 52600 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039850118535810863, + "loss": 4.3664, + "step": 52700 + }, + { + "epoch": 0.38, + "learning_rate": 0.00039849828539943304, + "loss": 4.3517, + "step": 52800 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003984953854407575, + "loss": 4.3798, + "step": 52900 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003984924854820819, + "loss": 4.3538, + "step": 53000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003984895855234063, + "loss": 4.3578, + "step": 53100 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003984866855647307, + "loss": 4.3685, + "step": 53200 + }, + { + "epoch": 0.39, + "learning_rate": 0.00039848378560605513, + "loss": 4.3532, + "step": 53300 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003984809146469663, + "loss": 4.3596, + "step": 53400 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003984780146882907, + "loss": 4.3534, + "step": 53500 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003984751147296151, + "loss": 4.3583, + "step": 53600 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003984722147709395, + "loss": 4.3584, + "step": 53700 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003984693148122639, + "loss": 4.3701, + "step": 53800 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003984664148535884, + "loss": 4.3599, + "step": 53900 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003984635148949128, + "loss": 4.3645, + "step": 54000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003984606149362372, + "loss": 4.364, + "step": 54100 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003984577149775616, + "loss": 4.3543, + "step": 54200 + }, + { + "epoch": 0.39, + "learning_rate": 0.000398454815018886, + "loss": 4.3657, + "step": 54300 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003984519150602104, + "loss": 4.366, + "step": 54400 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003984490151015348, + "loss": 4.3526, + "step": 54500 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003984461151428593, + "loss": 4.3572, + "step": 54600 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003984432151841837, + "loss": 4.3721, + "step": 54700 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003984403152255081, + "loss": 4.3658, + "step": 54800 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039843741526683244, + "loss": 4.3704, + "step": 54900 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039843451530815685, + "loss": 4.3646, + "step": 55000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039843161534948126, + "loss": 4.3688, + "step": 55100 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039842871539080566, + "loss": 4.3529, + "step": 55200 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039842581543213007, + "loss": 4.3489, + "step": 55300 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039842291547345453, + "loss": 4.3561, + "step": 55400 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039842001551477894, + "loss": 4.3474, + "step": 55500 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039841711555610335, + "loss": 4.3559, + "step": 55600 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039841424459701456, + "loss": 4.3482, + "step": 55700 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003984113446383389, + "loss": 4.3582, + "step": 55800 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003984084446796633, + "loss": 4.3656, + "step": 55900 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003984055447209877, + "loss": 4.3684, + "step": 56000 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039840264476231213, + "loss": 4.3416, + "step": 56100 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039839974480363654, + "loss": 4.3511, + "step": 56200 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039839684484496094, + "loss": 4.3609, + "step": 56300 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003983939448862854, + "loss": 4.3558, + "step": 56400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003983910449276098, + "loss": 4.3542, + "step": 56500 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003983881449689342, + "loss": 4.3581, + "step": 56600 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003983852450102586, + "loss": 4.3616, + "step": 56700 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039838234505158303, + "loss": 4.3459, + "step": 56800 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039837944509290744, + "loss": 4.3589, + "step": 56900 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039837654513423185, + "loss": 4.3536, + "step": 57000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003983736451755563, + "loss": 4.357, + "step": 57100 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003983707742164674, + "loss": 4.3541, + "step": 57200 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003983678742577918, + "loss": 4.3573, + "step": 57300 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003983649742991163, + "loss": 4.3491, + "step": 57400 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003983620743404407, + "loss": 4.3425, + "step": 57500 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003983591743817651, + "loss": 4.3395, + "step": 57600 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003983562744230895, + "loss": 4.354, + "step": 57700 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003983533744644139, + "loss": 4.348, + "step": 57800 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003983504745057383, + "loss": 4.3514, + "step": 57900 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003983475745470627, + "loss": 4.3522, + "step": 58000 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003983446745883872, + "loss": 4.3412, + "step": 58100 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003983417746297116, + "loss": 4.3544, + "step": 58200 + }, + { + "epoch": 0.42, + "learning_rate": 0.000398338874671036, + "loss": 4.3416, + "step": 58300 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039833597471236034, + "loss": 4.3478, + "step": 58400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039833307475368475, + "loss": 4.3602, + "step": 58500 + }, + { + "epoch": 0.42, + "learning_rate": 0.00039833017479500916, + "loss": 4.3517, + "step": 58600 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039832727483633357, + "loss": 4.3461, + "step": 58700 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039832437487765797, + "loss": 4.3473, + "step": 58800 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039832147491898243, + "loss": 4.3622, + "step": 58900 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039831857496030684, + "loss": 4.3519, + "step": 59000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039831567500163125, + "loss": 4.3516, + "step": 59100 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039831277504295565, + "loss": 4.3558, + "step": 59200 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039830987508428006, + "loss": 4.3369, + "step": 59300 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039830697512560447, + "loss": 4.3411, + "step": 59400 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003983040751669289, + "loss": 4.3516, + "step": 59500 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039830117520825334, + "loss": 4.3511, + "step": 59600 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039829827524957774, + "loss": 4.3326, + "step": 59700 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039829537529090215, + "loss": 4.3435, + "step": 59800 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039829247533222656, + "loss": 4.3449, + "step": 59900 + }, + { + "epoch": 0.43, + "learning_rate": 0.00039828957537355096, + "loss": 4.3469, + "step": 60000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003982867044144621, + "loss": 4.3556, + "step": 60100 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003982838044557865, + "loss": 4.3438, + "step": 60200 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039828090449711093, + "loss": 4.3458, + "step": 60300 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039827800453843534, + "loss": 4.3492, + "step": 60400 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039827510457975975, + "loss": 4.3402, + "step": 60500 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003982722046210842, + "loss": 4.347, + "step": 60600 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003982693046624086, + "loss": 4.3419, + "step": 60700 + }, + { + "epoch": 0.44, + "learning_rate": 0.000398266404703733, + "loss": 4.344, + "step": 60800 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039826350474505743, + "loss": 4.3489, + "step": 60900 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039826060478638183, + "loss": 4.3309, + "step": 61000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003982577048277062, + "loss": 4.3381, + "step": 61100 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003982548048690306, + "loss": 4.3312, + "step": 61200 + }, + { + "epoch": 0.44, + "learning_rate": 0.00039825190491035506, + "loss": 4.3407, + "step": 61300 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039824900495167946, + "loss": 4.3458, + "step": 61400 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039824610499300387, + "loss": 4.3456, + "step": 61500 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003982432050343283, + "loss": 4.3384, + "step": 61600 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003982403050756527, + "loss": 4.3463, + "step": 61700 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003982374051169771, + "loss": 4.3421, + "step": 61800 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003982345051583015, + "loss": 4.3329, + "step": 61900 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039823160519962596, + "loss": 4.3358, + "step": 62000 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039822870524095036, + "loss": 4.3414, + "step": 62100 + }, + { + "epoch": 0.45, + "learning_rate": 0.00039822580528227477, + "loss": 4.3404, + "step": 62200 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003982229053235992, + "loss": 4.3417, + "step": 62300 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003982200053649236, + "loss": 4.3528, + "step": 62400 + }, + { + "epoch": 0.45, + "learning_rate": 0.000398217105406248, + "loss": 4.3532, + "step": 62500 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003982142054475724, + "loss": 4.3408, + "step": 62600 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003982113054888968, + "loss": 4.3464, + "step": 62700 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003982084055302212, + "loss": 4.3349, + "step": 62800 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003982055055715456, + "loss": 4.3414, + "step": 62900 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039820260561287, + "loss": 4.3405, + "step": 63000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039819970565419443, + "loss": 4.3327, + "step": 63100 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039819680569551884, + "loss": 4.3315, + "step": 63200 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039819390573684325, + "loss": 4.3471, + "step": 63300 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039819100577816765, + "loss": 4.3362, + "step": 63400 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003981881058194921, + "loss": 4.3402, + "step": 63500 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003981852058608165, + "loss": 4.3534, + "step": 63600 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039818230590214093, + "loss": 4.3537, + "step": 63700 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039817940594346534, + "loss": 4.341, + "step": 63800 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039817650598478974, + "loss": 4.345, + "step": 63900 + }, + { + "epoch": 0.46, + "learning_rate": 0.00039817360602611415, + "loss": 4.3254, + "step": 64000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003981707350670253, + "loss": 4.3333, + "step": 64100 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003981678351083497, + "loss": 4.3414, + "step": 64200 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003981649351496741, + "loss": 4.3459, + "step": 64300 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003981620351909985, + "loss": 4.3469, + "step": 64400 + }, + { + "epoch": 0.47, + "learning_rate": 0.000398159135232323, + "loss": 4.334, + "step": 64500 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003981562642732341, + "loss": 4.3412, + "step": 64600 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003981533643145585, + "loss": 4.3464, + "step": 64700 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039815046435588296, + "loss": 4.331, + "step": 64800 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039814756439720736, + "loss": 4.3276, + "step": 64900 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039814466443853177, + "loss": 4.3396, + "step": 65000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003981417644798562, + "loss": 4.3285, + "step": 65100 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003981388645211806, + "loss": 4.332, + "step": 65200 + }, + { + "epoch": 0.47, + "learning_rate": 0.000398135964562505, + "loss": 4.3383, + "step": 65300 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003981330646038294, + "loss": 4.3288, + "step": 65400 + }, + { + "epoch": 0.47, + "learning_rate": 0.00039813016464515386, + "loss": 4.3364, + "step": 65500 + }, + { + "epoch": 0.48, + "learning_rate": 0.00039812726468647827, + "loss": 4.3462, + "step": 65600 + }, + { + "epoch": 0.48, + "learning_rate": 0.00039812436472780267, + "loss": 4.3381, + "step": 65700 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003981214647691271, + "loss": 4.3373, + "step": 65800 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003981185648104515, + "loss": 4.3279, + "step": 65900 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003981156648517759, + "loss": 4.3283, + "step": 66000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003981127648931003, + "loss": 4.3437, + "step": 66100 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003981098649344247, + "loss": 4.3372, + "step": 66200 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003981069649757491, + "loss": 4.3405, + "step": 66300 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003981040650170735, + "loss": 4.3272, + "step": 66400 + }, + { + "epoch": 0.48, + "learning_rate": 0.00039810116505839793, + "loss": 4.3323, + "step": 66500 + }, + { + "epoch": 0.48, + "learning_rate": 0.00039809826509972233, + "loss": 4.3339, + "step": 66600 + }, + { + "epoch": 0.48, + "learning_rate": 0.00039809536514104674, + "loss": 4.3378, + "step": 66700 + }, + { + "epoch": 0.48, + "learning_rate": 0.00039809246518237115, + "loss": 4.324, + "step": 66800 + }, + { + "epoch": 0.48, + "learning_rate": 0.00039808956522369556, + "loss": 4.3355, + "step": 66900 + }, + { + "epoch": 0.49, + "learning_rate": 0.00039808666526502, + "loss": 4.3302, + "step": 67000 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003980837653063444, + "loss": 4.3344, + "step": 67100 + }, + { + "epoch": 0.49, + "learning_rate": 0.00039808086534766883, + "loss": 4.3298, + "step": 67200 + }, + { + "epoch": 0.49, + "learning_rate": 0.00039807796538899324, + "loss": 4.3348, + "step": 67300 + }, + { + "epoch": 0.49, + "learning_rate": 0.00039807506543031764, + "loss": 4.3222, + "step": 67400 + }, + { + "epoch": 0.49, + "learning_rate": 0.00039807216547164205, + "loss": 4.3414, + "step": 67500 + }, + { + "epoch": 0.49, + "learning_rate": 0.00039806926551296646, + "loss": 4.3412, + "step": 67600 + }, + { + "epoch": 0.49, + "learning_rate": 0.00039806636555429086, + "loss": 4.3311, + "step": 67700 + }, + { + "epoch": 0.49, + "learning_rate": 0.00039806346559561527, + "loss": 4.3226, + "step": 67800 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003980605656369397, + "loss": 4.3339, + "step": 67900 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003980576656782641, + "loss": 4.3287, + "step": 68000 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003980547657195885, + "loss": 4.3376, + "step": 68100 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003980518657609129, + "loss": 4.3267, + "step": 68200 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003980489658022373, + "loss": 4.3272, + "step": 68300 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039804606584356177, + "loss": 4.3225, + "step": 68400 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003980431658848862, + "loss": 4.3295, + "step": 68500 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003980402659262106, + "loss": 4.3222, + "step": 68600 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039803739496712174, + "loss": 4.3408, + "step": 68700 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039803449500844614, + "loss": 4.3119, + "step": 68800 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039803159504977055, + "loss": 4.3384, + "step": 68900 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039802869509109496, + "loss": 4.3343, + "step": 69000 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039802579513241936, + "loss": 4.3281, + "step": 69100 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039802289517374377, + "loss": 4.3139, + "step": 69200 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003980199952150682, + "loss": 4.3301, + "step": 69300 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039801709525639264, + "loss": 4.3332, + "step": 69400 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039801419529771705, + "loss": 4.337, + "step": 69500 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039801129533904145, + "loss": 4.3271, + "step": 69600 + }, + { + "epoch": 0.51, + "learning_rate": 0.00039800839538036586, + "loss": 4.3258, + "step": 69700 + }, + { + "epoch": 0.51, + "learning_rate": 0.00039800549542169027, + "loss": 4.33, + "step": 69800 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003980025954630147, + "loss": 4.3234, + "step": 69900 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003979996955043391, + "loss": 4.3194, + "step": 70000 + }, + { + "epoch": 0.51, + "learning_rate": 0.00039799679554566354, + "loss": 4.3364, + "step": 70100 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003979938955869879, + "loss": 4.3157, + "step": 70200 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003979909956283123, + "loss": 4.3206, + "step": 70300 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003979880956696367, + "loss": 4.3303, + "step": 70400 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003979851957109611, + "loss": 4.3294, + "step": 70500 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003979822957522855, + "loss": 4.3323, + "step": 70600 + }, + { + "epoch": 0.51, + "learning_rate": 0.00039797942479319673, + "loss": 4.3283, + "step": 70700 + }, + { + "epoch": 0.51, + "learning_rate": 0.00039797652483452114, + "loss": 4.3262, + "step": 70800 + }, + { + "epoch": 0.51, + "learning_rate": 0.00039797362487584555, + "loss": 4.3394, + "step": 70900 + }, + { + "epoch": 0.51, + "learning_rate": 0.00039797072491716995, + "loss": 4.3221, + "step": 71000 + }, + { + "epoch": 0.52, + "learning_rate": 0.00039796782495849436, + "loss": 4.3174, + "step": 71100 + }, + { + "epoch": 0.52, + "learning_rate": 0.00039796492499981877, + "loss": 4.3255, + "step": 71200 + }, + { + "epoch": 0.52, + "learning_rate": 0.00039796202504114317, + "loss": 4.3246, + "step": 71300 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003979591250824676, + "loss": 4.3381, + "step": 71400 + }, + { + "epoch": 0.52, + "learning_rate": 0.000397956225123792, + "loss": 4.3167, + "step": 71500 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003979533251651164, + "loss": 4.3265, + "step": 71600 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003979504252064408, + "loss": 4.3185, + "step": 71700 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003979475252477652, + "loss": 4.3182, + "step": 71800 + }, + { + "epoch": 0.52, + "learning_rate": 0.00039794462528908967, + "loss": 4.3236, + "step": 71900 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003979417253304141, + "loss": 4.3343, + "step": 72000 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003979388253717385, + "loss": 4.3246, + "step": 72100 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003979359254130629, + "loss": 4.3288, + "step": 72200 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003979330254543873, + "loss": 4.3278, + "step": 72300 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003979301254957117, + "loss": 4.3328, + "step": 72400 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003979272255370361, + "loss": 4.3192, + "step": 72500 + }, + { + "epoch": 0.53, + "learning_rate": 0.00039792432557836057, + "loss": 4.3197, + "step": 72600 + }, + { + "epoch": 0.53, + "learning_rate": 0.00039792145461927167, + "loss": 4.3286, + "step": 72700 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003979185546605961, + "loss": 4.3167, + "step": 72800 + }, + { + "epoch": 0.53, + "learning_rate": 0.00039791565470192054, + "loss": 4.3365, + "step": 72900 + }, + { + "epoch": 0.53, + "learning_rate": 0.00039791275474324495, + "loss": 4.3209, + "step": 73000 + }, + { + "epoch": 0.53, + "learning_rate": 0.00039790985478456935, + "loss": 4.3276, + "step": 73100 + }, + { + "epoch": 0.53, + "learning_rate": 0.00039790695482589376, + "loss": 4.3296, + "step": 73200 + }, + { + "epoch": 0.53, + "learning_rate": 0.00039790405486721817, + "loss": 4.3378, + "step": 73300 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003979011549085426, + "loss": 4.3275, + "step": 73400 + }, + { + "epoch": 0.53, + "learning_rate": 0.000397898254949867, + "loss": 4.3136, + "step": 73500 + }, + { + "epoch": 0.53, + "learning_rate": 0.00039789535499119144, + "loss": 4.3269, + "step": 73600 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003978924550325158, + "loss": 4.3162, + "step": 73700 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003978895550738402, + "loss": 4.3217, + "step": 73800 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003978866551151646, + "loss": 4.3215, + "step": 73900 + }, + { + "epoch": 0.54, + "learning_rate": 0.000397883755156489, + "loss": 4.3126, + "step": 74000 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003978808551978134, + "loss": 4.323, + "step": 74100 + }, + { + "epoch": 0.54, + "learning_rate": 0.00039787795523913783, + "loss": 4.3177, + "step": 74200 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003978750552804623, + "loss": 4.3188, + "step": 74300 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003978721553217867, + "loss": 4.3199, + "step": 74400 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003978692553631111, + "loss": 4.3167, + "step": 74500 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003978663554044355, + "loss": 4.3257, + "step": 74600 + }, + { + "epoch": 0.54, + "learning_rate": 0.00039786348444534667, + "loss": 4.3207, + "step": 74700 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003978606134862578, + "loss": 4.3194, + "step": 74800 + }, + { + "epoch": 0.54, + "learning_rate": 0.00039785771352758223, + "loss": 4.3163, + "step": 74900 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003978548135689067, + "loss": 4.3241, + "step": 75000 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003978519136102311, + "loss": 4.3171, + "step": 75100 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003978490136515555, + "loss": 4.3135, + "step": 75200 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003978461136928799, + "loss": 4.2999, + "step": 75300 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003978432137342043, + "loss": 4.3271, + "step": 75400 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003978403137755287, + "loss": 4.3209, + "step": 75500 + }, + { + "epoch": 0.55, + "learning_rate": 0.00039783741381685313, + "loss": 4.3168, + "step": 75600 + }, + { + "epoch": 0.55, + "learning_rate": 0.00039783451385817754, + "loss": 4.3166, + "step": 75700 + }, + { + "epoch": 0.55, + "learning_rate": 0.00039783161389950195, + "loss": 4.3268, + "step": 75800 + }, + { + "epoch": 0.55, + "learning_rate": 0.00039782871394082635, + "loss": 4.3196, + "step": 75900 + }, + { + "epoch": 0.55, + "learning_rate": 0.00039782581398215076, + "loss": 4.324, + "step": 76000 + }, + { + "epoch": 0.55, + "learning_rate": 0.00039782291402347517, + "loss": 4.3112, + "step": 76100 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003978200140647996, + "loss": 4.3216, + "step": 76200 + }, + { + "epoch": 0.55, + "learning_rate": 0.000397817114106124, + "loss": 4.3224, + "step": 76300 + }, + { + "epoch": 0.55, + "learning_rate": 0.00039781421414744844, + "loss": 4.3126, + "step": 76400 + }, + { + "epoch": 0.55, + "learning_rate": 0.00039781131418877285, + "loss": 4.3174, + "step": 76500 + }, + { + "epoch": 0.56, + "learning_rate": 0.00039780841423009725, + "loss": 4.3177, + "step": 76600 + }, + { + "epoch": 0.56, + "learning_rate": 0.00039780551427142166, + "loss": 4.3218, + "step": 76700 + }, + { + "epoch": 0.56, + "learning_rate": 0.00039780261431274607, + "loss": 4.3055, + "step": 76800 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003977997433536572, + "loss": 4.3088, + "step": 76900 + }, + { + "epoch": 0.56, + "learning_rate": 0.00039779684339498163, + "loss": 4.3219, + "step": 77000 + }, + { + "epoch": 0.56, + "learning_rate": 0.00039779394343630604, + "loss": 4.3148, + "step": 77100 + }, + { + "epoch": 0.56, + "learning_rate": 0.00039779104347763045, + "loss": 4.3246, + "step": 77200 + }, + { + "epoch": 0.56, + "learning_rate": 0.00039778814351895485, + "loss": 4.321, + "step": 77300 + }, + { + "epoch": 0.56, + "learning_rate": 0.00039778524356027926, + "loss": 4.3198, + "step": 77400 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003977823436016037, + "loss": 4.3153, + "step": 77500 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003977794436429281, + "loss": 4.3154, + "step": 77600 + }, + { + "epoch": 0.56, + "learning_rate": 0.00039777654368425253, + "loss": 4.3111, + "step": 77700 + }, + { + "epoch": 0.56, + "learning_rate": 0.00039777364372557694, + "loss": 4.3119, + "step": 77800 + }, + { + "epoch": 0.56, + "learning_rate": 0.00039777074376690135, + "loss": 4.3128, + "step": 77900 + }, + { + "epoch": 0.57, + "learning_rate": 0.00039776784380822575, + "loss": 4.3329, + "step": 78000 + }, + { + "epoch": 0.57, + "learning_rate": 0.00039776494384955016, + "loss": 4.3164, + "step": 78100 + }, + { + "epoch": 0.57, + "learning_rate": 0.00039776204389087457, + "loss": 4.3201, + "step": 78200 + }, + { + "epoch": 0.57, + "learning_rate": 0.000397759143932199, + "loss": 4.3105, + "step": 78300 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003977562439735234, + "loss": 4.3049, + "step": 78400 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003977533440148478, + "loss": 4.3109, + "step": 78500 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003977504440561722, + "loss": 4.31, + "step": 78600 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003977475440974966, + "loss": 4.3107, + "step": 78700 + }, + { + "epoch": 0.57, + "learning_rate": 0.000397744644138821, + "loss": 4.3232, + "step": 78800 + }, + { + "epoch": 0.57, + "learning_rate": 0.00039774174418014547, + "loss": 4.3069, + "step": 78900 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003977388442214699, + "loss": 4.3179, + "step": 79000 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003977359442627943, + "loss": 4.3081, + "step": 79100 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003977330443041187, + "loss": 4.3147, + "step": 79200 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003977301443454431, + "loss": 4.3211, + "step": 79300 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003977272443867675, + "loss": 4.3235, + "step": 79400 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003977243444280919, + "loss": 4.3216, + "step": 79500 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039772144446941637, + "loss": 4.3162, + "step": 79600 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003977185445107408, + "loss": 4.3045, + "step": 79700 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039771564455206513, + "loss": 4.3194, + "step": 79800 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039771274459338954, + "loss": 4.3133, + "step": 79900 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039770984463471395, + "loss": 4.3221, + "step": 80000 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039770694467603835, + "loss": 4.3041, + "step": 80100 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039770404471736276, + "loss": 4.3223, + "step": 80200 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003977011447586872, + "loss": 4.2999, + "step": 80300 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039769824480001163, + "loss": 4.3263, + "step": 80400 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003976953738409228, + "loss": 4.305, + "step": 80500 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039769247388224724, + "loss": 4.3165, + "step": 80600 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003976895739235716, + "loss": 4.3012, + "step": 80700 + }, + { + "epoch": 0.59, + "learning_rate": 0.000397686673964896, + "loss": 4.3061, + "step": 80800 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003976837740062204, + "loss": 4.3181, + "step": 80900 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003976808740475448, + "loss": 4.3128, + "step": 81000 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003976779740888692, + "loss": 4.3148, + "step": 81100 + }, + { + "epoch": 0.59, + "learning_rate": 0.00039767507413019363, + "loss": 4.3188, + "step": 81200 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003976721741715181, + "loss": 4.3024, + "step": 81300 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003976692742128425, + "loss": 4.3242, + "step": 81400 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003976663742541669, + "loss": 4.3085, + "step": 81500 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003976634742954913, + "loss": 4.295, + "step": 81600 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003976605743368157, + "loss": 4.314, + "step": 81700 + }, + { + "epoch": 0.59, + "learning_rate": 0.00039765767437814013, + "loss": 4.3074, + "step": 81800 + }, + { + "epoch": 0.59, + "learning_rate": 0.00039765477441946453, + "loss": 4.3215, + "step": 81900 + }, + { + "epoch": 0.59, + "learning_rate": 0.00039765187446078894, + "loss": 4.3204, + "step": 82000 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003976489745021134, + "loss": 4.3203, + "step": 82100 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003976460745434378, + "loss": 4.2888, + "step": 82200 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003976431745847622, + "loss": 4.3087, + "step": 82300 + }, + { + "epoch": 0.6, + "learning_rate": 0.00039764027462608657, + "loss": 4.3037, + "step": 82400 + }, + { + "epoch": 0.6, + "learning_rate": 0.000397637374667411, + "loss": 4.3128, + "step": 82500 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003976344747087354, + "loss": 4.306, + "step": 82600 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003976315747500598, + "loss": 4.3015, + "step": 82700 + }, + { + "epoch": 0.6, + "learning_rate": 0.000397628703790971, + "loss": 4.3198, + "step": 82800 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003976258038322954, + "loss": 4.3212, + "step": 82900 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003976229038736198, + "loss": 4.311, + "step": 83000 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003976200039149443, + "loss": 4.3074, + "step": 83100 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003976171039562687, + "loss": 4.3135, + "step": 83200 + }, + { + "epoch": 0.6, + "learning_rate": 0.00039761420399759303, + "loss": 4.3029, + "step": 83300 + }, + { + "epoch": 0.6, + "learning_rate": 0.00039761130403891744, + "loss": 4.2986, + "step": 83400 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039760840408024185, + "loss": 4.3017, + "step": 83500 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039760550412156625, + "loss": 4.3036, + "step": 83600 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039760260416289066, + "loss": 4.3135, + "step": 83700 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003975997042042151, + "loss": 4.3113, + "step": 83800 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039759680424553953, + "loss": 4.308, + "step": 83900 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039759390428686394, + "loss": 4.3196, + "step": 84000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039759100432818834, + "loss": 4.2985, + "step": 84100 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039758810436951275, + "loss": 4.3025, + "step": 84200 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039758520441083716, + "loss": 4.3179, + "step": 84300 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039758230445216156, + "loss": 4.3193, + "step": 84400 + }, + { + "epoch": 0.61, + "learning_rate": 0.000397579404493486, + "loss": 4.3009, + "step": 84500 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039757650453481043, + "loss": 4.3073, + "step": 84600 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039757360457613484, + "loss": 4.2989, + "step": 84700 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039757070461745925, + "loss": 4.3057, + "step": 84800 + }, + { + "epoch": 0.62, + "learning_rate": 0.00039756780465878365, + "loss": 4.3077, + "step": 84900 + }, + { + "epoch": 0.62, + "learning_rate": 0.000397564904700108, + "loss": 4.3297, + "step": 85000 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003975620047414324, + "loss": 4.3152, + "step": 85100 + }, + { + "epoch": 0.62, + "learning_rate": 0.00039755910478275687, + "loss": 4.3153, + "step": 85200 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003975562048240813, + "loss": 4.3156, + "step": 85300 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003975533048654057, + "loss": 4.3197, + "step": 85400 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003975504049067301, + "loss": 4.3036, + "step": 85500 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003975475049480545, + "loss": 4.3038, + "step": 85600 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003975446049893789, + "loss": 4.3113, + "step": 85700 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003975417050307033, + "loss": 4.3148, + "step": 85800 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003975388050720278, + "loss": 4.3065, + "step": 85900 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003975359051133522, + "loss": 4.3023, + "step": 86000 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003975330051546766, + "loss": 4.3168, + "step": 86100 + }, + { + "epoch": 0.62, + "learning_rate": 0.000397530105196001, + "loss": 4.2915, + "step": 86200 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003975272052373254, + "loss": 4.3124, + "step": 86300 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003975243052786498, + "loss": 4.3176, + "step": 86400 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003975214053199742, + "loss": 4.3152, + "step": 86500 + }, + { + "epoch": 0.63, + "learning_rate": 0.00039751853436088537, + "loss": 4.3189, + "step": 86600 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003975156344022098, + "loss": 4.3059, + "step": 86700 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003975127344435342, + "loss": 4.3068, + "step": 86800 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003975098344848586, + "loss": 4.302, + "step": 86900 + }, + { + "epoch": 0.63, + "learning_rate": 0.00039750693452618305, + "loss": 4.3152, + "step": 87000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00039750403456750746, + "loss": 4.3042, + "step": 87100 + }, + { + "epoch": 0.63, + "learning_rate": 0.00039750113460883187, + "loss": 4.2914, + "step": 87200 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003974982346501563, + "loss": 4.3008, + "step": 87300 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003974953346914807, + "loss": 4.2996, + "step": 87400 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003974924347328051, + "loss": 4.3072, + "step": 87500 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003974895347741295, + "loss": 4.2957, + "step": 87600 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003974866348154539, + "loss": 4.2925, + "step": 87700 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003974837348567783, + "loss": 4.3096, + "step": 87800 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003974808348981027, + "loss": 4.2973, + "step": 87900 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003974779349394271, + "loss": 4.2975, + "step": 88000 + }, + { + "epoch": 0.64, + "learning_rate": 0.00039747503498075153, + "loss": 4.2968, + "step": 88100 + }, + { + "epoch": 0.64, + "learning_rate": 0.00039747213502207594, + "loss": 4.296, + "step": 88200 + }, + { + "epoch": 0.64, + "learning_rate": 0.00039746923506340034, + "loss": 4.3037, + "step": 88300 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003974663351047248, + "loss": 4.3106, + "step": 88400 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003974634351460492, + "loss": 4.3039, + "step": 88500 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003974605351873736, + "loss": 4.3038, + "step": 88600 + }, + { + "epoch": 0.64, + "learning_rate": 0.000397457635228698, + "loss": 4.3034, + "step": 88700 + }, + { + "epoch": 0.64, + "learning_rate": 0.00039745473527002243, + "loss": 4.3024, + "step": 88800 + }, + { + "epoch": 0.64, + "learning_rate": 0.00039745183531134684, + "loss": 4.303, + "step": 88900 + }, + { + "epoch": 0.65, + "learning_rate": 0.00039744893535267125, + "loss": 4.3078, + "step": 89000 + }, + { + "epoch": 0.65, + "learning_rate": 0.00039744603539399565, + "loss": 4.2963, + "step": 89100 + }, + { + "epoch": 0.65, + "learning_rate": 0.00039744313543532006, + "loss": 4.3088, + "step": 89200 + }, + { + "epoch": 0.65, + "learning_rate": 0.00039744023547664447, + "loss": 4.2959, + "step": 89300 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003974373355179689, + "loss": 4.3016, + "step": 89400 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003974344355592933, + "loss": 4.3035, + "step": 89500 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003974315356006177, + "loss": 4.3175, + "step": 89600 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003974286356419421, + "loss": 4.3129, + "step": 89700 + }, + { + "epoch": 0.65, + "learning_rate": 0.00039742573568326655, + "loss": 4.3068, + "step": 89800 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003974228647241777, + "loss": 4.3021, + "step": 89900 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003974199647655021, + "loss": 4.3034, + "step": 90000 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003974170648068265, + "loss": 4.286, + "step": 90100 + }, + { + "epoch": 0.65, + "learning_rate": 0.00039741416484815093, + "loss": 4.3072, + "step": 90200 + }, + { + "epoch": 0.65, + "learning_rate": 0.00039741126488947534, + "loss": 4.2942, + "step": 90300 + }, + { + "epoch": 0.66, + "learning_rate": 0.00039740836493079975, + "loss": 4.2958, + "step": 90400 + }, + { + "epoch": 0.66, + "learning_rate": 0.00039740546497212415, + "loss": 4.3019, + "step": 90500 + }, + { + "epoch": 0.66, + "learning_rate": 0.00039740256501344856, + "loss": 4.304, + "step": 90600 + }, + { + "epoch": 0.66, + "learning_rate": 0.00039739966505477297, + "loss": 4.3096, + "step": 90700 + }, + { + "epoch": 0.66, + "learning_rate": 0.00039739676509609737, + "loss": 4.2915, + "step": 90800 + }, + { + "epoch": 0.66, + "learning_rate": 0.00039739386513742183, + "loss": 4.2932, + "step": 90900 + }, + { + "epoch": 0.66, + "learning_rate": 0.00039739096517874624, + "loss": 4.2946, + "step": 91000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00039738806522007065, + "loss": 4.3032, + "step": 91100 + }, + { + "epoch": 0.66, + "learning_rate": 0.00039738516526139505, + "loss": 4.2987, + "step": 91200 + }, + { + "epoch": 0.66, + "learning_rate": 0.00039738226530271946, + "loss": 4.3038, + "step": 91300 + }, + { + "epoch": 0.66, + "learning_rate": 0.00039737936534404387, + "loss": 4.3122, + "step": 91400 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003973764653853683, + "loss": 4.2956, + "step": 91500 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003973735654266927, + "loss": 4.2915, + "step": 91600 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003973706654680171, + "loss": 4.3036, + "step": 91700 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003973677655093415, + "loss": 4.2947, + "step": 91800 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003973648655506659, + "loss": 4.2907, + "step": 91900 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003973619655919903, + "loss": 4.3012, + "step": 92000 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003973590656333147, + "loss": 4.2921, + "step": 92100 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003973561656746391, + "loss": 4.2968, + "step": 92200 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003973532657159636, + "loss": 4.3061, + "step": 92300 + }, + { + "epoch": 0.67, + "learning_rate": 0.000397350365757288, + "loss": 4.3078, + "step": 92400 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003973474657986124, + "loss": 4.2969, + "step": 92500 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003973445658399368, + "loss": 4.3104, + "step": 92600 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003973416658812612, + "loss": 4.2987, + "step": 92700 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003973387659225856, + "loss": 4.3104, + "step": 92800 + }, + { + "epoch": 0.67, + "learning_rate": 0.00039733586596391, + "loss": 4.2906, + "step": 92900 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003973329660052345, + "loss": 4.2869, + "step": 93000 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003973300660465589, + "loss": 4.3019, + "step": 93100 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003973271660878833, + "loss": 4.3014, + "step": 93200 + }, + { + "epoch": 0.68, + "learning_rate": 0.00039732426612920765, + "loss": 4.2934, + "step": 93300 + }, + { + "epoch": 0.68, + "learning_rate": 0.00039732136617053206, + "loss": 4.3007, + "step": 93400 + }, + { + "epoch": 0.68, + "learning_rate": 0.00039731846621185647, + "loss": 4.2998, + "step": 93500 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003973155662531809, + "loss": 4.292, + "step": 93600 + }, + { + "epoch": 0.68, + "learning_rate": 0.00039731266629450533, + "loss": 4.2964, + "step": 93700 + }, + { + "epoch": 0.68, + "learning_rate": 0.00039730976633582974, + "loss": 4.2941, + "step": 93800 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003973068953767409, + "loss": 4.3129, + "step": 93900 + }, + { + "epoch": 0.68, + "learning_rate": 0.00039730399541806536, + "loss": 4.3095, + "step": 94000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00039730109545938977, + "loss": 4.3021, + "step": 94100 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003972981955007141, + "loss": 4.2855, + "step": 94200 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003972952955420385, + "loss": 4.2978, + "step": 94300 + }, + { + "epoch": 0.68, + "learning_rate": 0.00039729239558336293, + "loss": 4.2914, + "step": 94400 + }, + { + "epoch": 0.69, + "learning_rate": 0.00039728949562468734, + "loss": 4.2905, + "step": 94500 + }, + { + "epoch": 0.69, + "learning_rate": 0.00039728659566601175, + "loss": 4.3034, + "step": 94600 + }, + { + "epoch": 0.69, + "learning_rate": 0.00039728369570733615, + "loss": 4.3056, + "step": 94700 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003972807957486606, + "loss": 4.2995, + "step": 94800 + }, + { + "epoch": 0.69, + "learning_rate": 0.000397277895789985, + "loss": 4.2862, + "step": 94900 + }, + { + "epoch": 0.69, + "learning_rate": 0.00039727499583130943, + "loss": 4.2878, + "step": 95000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00039727209587263383, + "loss": 4.3099, + "step": 95100 + }, + { + "epoch": 0.69, + "learning_rate": 0.00039726919591395824, + "loss": 4.3024, + "step": 95200 + }, + { + "epoch": 0.69, + "learning_rate": 0.00039726629595528265, + "loss": 4.2892, + "step": 95300 + }, + { + "epoch": 0.69, + "learning_rate": 0.00039726339599660705, + "loss": 4.293, + "step": 95400 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003972604960379315, + "loss": 4.2984, + "step": 95500 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003972575960792559, + "loss": 4.2892, + "step": 95600 + }, + { + "epoch": 0.69, + "learning_rate": 0.00039725469612058033, + "loss": 4.3078, + "step": 95700 + }, + { + "epoch": 0.69, + "learning_rate": 0.00039725179616190474, + "loss": 4.3032, + "step": 95800 + }, + { + "epoch": 0.7, + "learning_rate": 0.00039724895420240264, + "loss": 4.2906, + "step": 95900 + }, + { + "epoch": 0.7, + "learning_rate": 0.00039724605424372705, + "loss": 4.3019, + "step": 96000 + }, + { + "epoch": 0.7, + "learning_rate": 0.00039724315428505145, + "loss": 4.3107, + "step": 96100 + }, + { + "epoch": 0.7, + "learning_rate": 0.00039724025432637586, + "loss": 4.2904, + "step": 96200 + }, + { + "epoch": 0.7, + "learning_rate": 0.00039723735436770027, + "loss": 4.2902, + "step": 96300 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003972344544090247, + "loss": 4.2956, + "step": 96400 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003972315544503491, + "loss": 4.3014, + "step": 96500 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003972286544916735, + "loss": 4.3016, + "step": 96600 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003972257545329979, + "loss": 4.2941, + "step": 96700 + }, + { + "epoch": 0.7, + "learning_rate": 0.00039722285457432236, + "loss": 4.2973, + "step": 96800 + }, + { + "epoch": 0.7, + "learning_rate": 0.00039721995461564676, + "loss": 4.2949, + "step": 96900 + }, + { + "epoch": 0.7, + "learning_rate": 0.00039721705465697117, + "loss": 4.3019, + "step": 97000 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003972141546982956, + "loss": 4.3049, + "step": 97100 + }, + { + "epoch": 0.7, + "learning_rate": 0.00039721125473962, + "loss": 4.294, + "step": 97200 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003972083547809444, + "loss": 4.2886, + "step": 97300 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003972054548222688, + "loss": 4.2925, + "step": 97400 + }, + { + "epoch": 0.71, + "learning_rate": 0.00039720255486359326, + "loss": 4.284, + "step": 97500 + }, + { + "epoch": 0.71, + "learning_rate": 0.00039719965490491767, + "loss": 4.2887, + "step": 97600 + }, + { + "epoch": 0.71, + "learning_rate": 0.000397196754946242, + "loss": 4.283, + "step": 97700 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003971938549875664, + "loss": 4.285, + "step": 97800 + }, + { + "epoch": 0.71, + "learning_rate": 0.00039719095502889083, + "loss": 4.2927, + "step": 97900 + }, + { + "epoch": 0.71, + "learning_rate": 0.00039718805507021524, + "loss": 4.2888, + "step": 98000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00039718515511153965, + "loss": 4.305, + "step": 98100 + }, + { + "epoch": 0.71, + "learning_rate": 0.00039718225515286405, + "loss": 4.2909, + "step": 98200 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003971793551941885, + "loss": 4.291, + "step": 98300 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003971764552355129, + "loss": 4.2927, + "step": 98400 + }, + { + "epoch": 0.71, + "learning_rate": 0.00039717355527683733, + "loss": 4.2801, + "step": 98500 + }, + { + "epoch": 0.71, + "learning_rate": 0.00039717065531816174, + "loss": 4.2853, + "step": 98600 + }, + { + "epoch": 0.72, + "learning_rate": 0.00039716775535948614, + "loss": 4.2895, + "step": 98700 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003971648844003973, + "loss": 4.3037, + "step": 98800 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003971619844417217, + "loss": 4.2859, + "step": 98900 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003971590844830461, + "loss": 4.2954, + "step": 99000 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003971561845243705, + "loss": 4.2884, + "step": 99100 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003971532845656949, + "loss": 4.2725, + "step": 99200 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003971503846070194, + "loss": 4.2974, + "step": 99300 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003971474846483438, + "loss": 4.2816, + "step": 99400 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003971445846896682, + "loss": 4.2933, + "step": 99500 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003971416847309926, + "loss": 4.2935, + "step": 99600 + }, + { + "epoch": 0.72, + "learning_rate": 0.000397138784772317, + "loss": 4.2789, + "step": 99700 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003971358848136414, + "loss": 4.3014, + "step": 99800 + }, + { + "epoch": 0.72, + "learning_rate": 0.00039713298485496583, + "loss": 4.2906, + "step": 99900 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003971300848962903, + "loss": 4.2927, + "step": 100000 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003971271849376147, + "loss": 4.2868, + "step": 100100 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003971242849789391, + "loss": 4.3011, + "step": 100200 + }, + { + "epoch": 0.73, + "learning_rate": 0.00039712138502026346, + "loss": 4.2866, + "step": 100300 + }, + { + "epoch": 0.73, + "learning_rate": 0.00039711848506158786, + "loss": 4.2973, + "step": 100400 + }, + { + "epoch": 0.73, + "learning_rate": 0.00039711558510291227, + "loss": 4.2933, + "step": 100500 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003971126851442367, + "loss": 4.3056, + "step": 100600 + }, + { + "epoch": 0.73, + "learning_rate": 0.00039710978518556114, + "loss": 4.3027, + "step": 100700 + }, + { + "epoch": 0.73, + "learning_rate": 0.00039710688522688554, + "loss": 4.2829, + "step": 100800 + }, + { + "epoch": 0.73, + "learning_rate": 0.00039710398526820995, + "loss": 4.2927, + "step": 100900 + }, + { + "epoch": 0.73, + "learning_rate": 0.00039710108530953436, + "loss": 4.2806, + "step": 101000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00039709818535085876, + "loss": 4.2885, + "step": 101100 + }, + { + "epoch": 0.73, + "learning_rate": 0.00039709534339135667, + "loss": 4.2879, + "step": 101200 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003970924434326811, + "loss": 4.2853, + "step": 101300 + }, + { + "epoch": 0.74, + "learning_rate": 0.00039708954347400554, + "loss": 4.2776, + "step": 101400 + }, + { + "epoch": 0.74, + "learning_rate": 0.00039708664351532994, + "loss": 4.2923, + "step": 101500 + }, + { + "epoch": 0.74, + "learning_rate": 0.00039708374355665435, + "loss": 4.3018, + "step": 101600 + }, + { + "epoch": 0.74, + "learning_rate": 0.00039708084359797876, + "loss": 4.2882, + "step": 101700 + }, + { + "epoch": 0.74, + "learning_rate": 0.00039707794363930316, + "loss": 4.3042, + "step": 101800 + }, + { + "epoch": 0.74, + "learning_rate": 0.00039707504368062757, + "loss": 4.2913, + "step": 101900 + }, + { + "epoch": 0.74, + "learning_rate": 0.000397072143721952, + "loss": 4.2866, + "step": 102000 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003970692437632764, + "loss": 4.2943, + "step": 102100 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003970663438046008, + "loss": 4.296, + "step": 102200 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003970634438459252, + "loss": 4.3029, + "step": 102300 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003970605438872496, + "loss": 4.2852, + "step": 102400 + }, + { + "epoch": 0.74, + "learning_rate": 0.000397057643928574, + "loss": 4.2851, + "step": 102500 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003970547439698984, + "loss": 4.2951, + "step": 102600 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003970518440112228, + "loss": 4.2886, + "step": 102700 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003970489440525473, + "loss": 4.2816, + "step": 102800 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003970460440938717, + "loss": 4.2791, + "step": 102900 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003970431441351961, + "loss": 4.304, + "step": 103000 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003970402441765205, + "loss": 4.3072, + "step": 103100 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003970373442178449, + "loss": 4.2974, + "step": 103200 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003970344442591693, + "loss": 4.2894, + "step": 103300 + }, + { + "epoch": 0.75, + "learning_rate": 0.00039703154430049373, + "loss": 4.2919, + "step": 103400 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003970286443418182, + "loss": 4.2871, + "step": 103500 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003970257443831426, + "loss": 4.2919, + "step": 103600 + }, + { + "epoch": 0.75, + "learning_rate": 0.000397022844424467, + "loss": 4.2905, + "step": 103700 + }, + { + "epoch": 0.75, + "learning_rate": 0.00039701994446579136, + "loss": 4.2854, + "step": 103800 + }, + { + "epoch": 0.75, + "learning_rate": 0.00039701704450711576, + "loss": 4.2898, + "step": 103900 + }, + { + "epoch": 0.75, + "learning_rate": 0.00039701414454844017, + "loss": 4.2863, + "step": 104000 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003970112445897646, + "loss": 4.28, + "step": 104100 + }, + { + "epoch": 0.76, + "learning_rate": 0.00039700834463108904, + "loss": 4.2931, + "step": 104200 + }, + { + "epoch": 0.76, + "learning_rate": 0.00039700544467241345, + "loss": 4.286, + "step": 104300 + }, + { + "epoch": 0.76, + "learning_rate": 0.00039700254471373785, + "loss": 4.2823, + "step": 104400 + }, + { + "epoch": 0.76, + "learning_rate": 0.00039699964475506226, + "loss": 4.2969, + "step": 104500 + }, + { + "epoch": 0.76, + "learning_rate": 0.00039699674479638667, + "loss": 4.2883, + "step": 104600 + }, + { + "epoch": 0.76, + "learning_rate": 0.00039699384483771107, + "loss": 4.2882, + "step": 104700 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003969909448790355, + "loss": 4.2915, + "step": 104800 + }, + { + "epoch": 0.76, + "learning_rate": 0.00039698804492035994, + "loss": 4.2948, + "step": 104900 + }, + { + "epoch": 0.76, + "learning_rate": 0.00039698514496168435, + "loss": 4.2823, + "step": 105000 + }, + { + "epoch": 0.76, + "learning_rate": 0.00039698224500300875, + "loss": 4.2887, + "step": 105100 + }, + { + "epoch": 0.76, + "learning_rate": 0.00039697934504433316, + "loss": 4.3017, + "step": 105200 + }, + { + "epoch": 0.76, + "learning_rate": 0.00039697644508565757, + "loss": 4.2847, + "step": 105300 + }, + { + "epoch": 0.76, + "learning_rate": 0.000396973545126982, + "loss": 4.2847, + "step": 105400 + }, + { + "epoch": 0.76, + "learning_rate": 0.00039697064516830633, + "loss": 4.2762, + "step": 105500 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003969677452096308, + "loss": 4.282, + "step": 105600 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003969648452509552, + "loss": 4.2733, + "step": 105700 + }, + { + "epoch": 0.77, + "learning_rate": 0.00039696197429186635, + "loss": 4.2831, + "step": 105800 + }, + { + "epoch": 0.77, + "learning_rate": 0.00039695907433319076, + "loss": 4.2963, + "step": 105900 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003969561743745152, + "loss": 4.2827, + "step": 106000 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003969532744158396, + "loss": 4.2829, + "step": 106100 + }, + { + "epoch": 0.77, + "learning_rate": 0.00039695037445716403, + "loss": 4.3016, + "step": 106200 + }, + { + "epoch": 0.77, + "learning_rate": 0.00039694747449848844, + "loss": 4.2758, + "step": 106300 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003969445745398128, + "loss": 4.285, + "step": 106400 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003969416745811372, + "loss": 4.2768, + "step": 106500 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003969387746224616, + "loss": 4.282, + "step": 106600 + }, + { + "epoch": 0.77, + "learning_rate": 0.00039693587466378607, + "loss": 4.2867, + "step": 106700 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003969329747051105, + "loss": 4.2834, + "step": 106800 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003969300747464349, + "loss": 4.2813, + "step": 106900 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003969271747877593, + "loss": 4.2724, + "step": 107000 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003969242748290837, + "loss": 4.2833, + "step": 107100 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003969213748704081, + "loss": 4.291, + "step": 107200 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003969184749117325, + "loss": 4.3, + "step": 107300 + }, + { + "epoch": 0.78, + "learning_rate": 0.00039691557495305697, + "loss": 4.2695, + "step": 107400 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003969126749943814, + "loss": 4.2923, + "step": 107500 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003969097750357058, + "loss": 4.2882, + "step": 107600 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003969068750770302, + "loss": 4.2899, + "step": 107700 + }, + { + "epoch": 0.78, + "learning_rate": 0.00039690400411794135, + "loss": 4.289, + "step": 107800 + }, + { + "epoch": 0.78, + "learning_rate": 0.00039690110415926575, + "loss": 4.2907, + "step": 107900 + }, + { + "epoch": 0.78, + "learning_rate": 0.00039689820420059016, + "loss": 4.2824, + "step": 108000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00039689530424191457, + "loss": 4.2796, + "step": 108100 + }, + { + "epoch": 0.78, + "learning_rate": 0.000396892404283239, + "loss": 4.2789, + "step": 108200 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003968895043245634, + "loss": 4.2819, + "step": 108300 + }, + { + "epoch": 0.79, + "learning_rate": 0.00039688660436588784, + "loss": 4.2845, + "step": 108400 + }, + { + "epoch": 0.79, + "learning_rate": 0.00039688370440721225, + "loss": 4.2861, + "step": 108500 + }, + { + "epoch": 0.79, + "learning_rate": 0.00039688080444853666, + "loss": 4.2913, + "step": 108600 + }, + { + "epoch": 0.79, + "learning_rate": 0.00039687790448986106, + "loss": 4.2836, + "step": 108700 + }, + { + "epoch": 0.79, + "learning_rate": 0.00039687500453118547, + "loss": 4.2795, + "step": 108800 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003968721045725099, + "loss": 4.2933, + "step": 108900 + }, + { + "epoch": 0.79, + "learning_rate": 0.00039686920461383423, + "loss": 4.2742, + "step": 109000 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003968663046551587, + "loss": 4.2866, + "step": 109100 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003968634046964831, + "loss": 4.282, + "step": 109200 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003968605047378075, + "loss": 4.2888, + "step": 109300 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003968576047791319, + "loss": 4.2737, + "step": 109400 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003968547048204563, + "loss": 4.2662, + "step": 109500 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003968518048617807, + "loss": 4.277, + "step": 109600 + }, + { + "epoch": 0.8, + "learning_rate": 0.00039684890490310513, + "loss": 4.2831, + "step": 109700 + }, + { + "epoch": 0.8, + "learning_rate": 0.00039684600494442954, + "loss": 4.2857, + "step": 109800 + }, + { + "epoch": 0.8, + "learning_rate": 0.000396843104985754, + "loss": 4.2864, + "step": 109900 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003968402050270784, + "loss": 4.2856, + "step": 110000 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003968373340679895, + "loss": 4.2922, + "step": 110100 + }, + { + "epoch": 0.8, + "learning_rate": 0.00039683443410931397, + "loss": 4.2754, + "step": 110200 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003968315341506384, + "loss": 4.2915, + "step": 110300 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003968286341919628, + "loss": 4.2943, + "step": 110400 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003968257342332872, + "loss": 4.2867, + "step": 110500 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003968228342746116, + "loss": 4.2769, + "step": 110600 + }, + { + "epoch": 0.8, + "learning_rate": 0.000396819934315936, + "loss": 4.2751, + "step": 110700 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003968170343572604, + "loss": 4.2774, + "step": 110800 + }, + { + "epoch": 0.8, + "learning_rate": 0.00039681416339817157, + "loss": 4.2794, + "step": 110900 + }, + { + "epoch": 0.8, + "learning_rate": 0.00039681126343949597, + "loss": 4.2798, + "step": 111000 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003968083634808204, + "loss": 4.2742, + "step": 111100 + }, + { + "epoch": 0.81, + "learning_rate": 0.00039680546352214484, + "loss": 4.2794, + "step": 111200 + }, + { + "epoch": 0.81, + "learning_rate": 0.00039680256356346925, + "loss": 4.2833, + "step": 111300 + }, + { + "epoch": 0.81, + "learning_rate": 0.00039679966360479365, + "loss": 4.2835, + "step": 111400 + }, + { + "epoch": 0.81, + "learning_rate": 0.00039679676364611806, + "loss": 4.2894, + "step": 111500 + }, + { + "epoch": 0.81, + "learning_rate": 0.00039679386368744247, + "loss": 4.2781, + "step": 111600 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003967909637287669, + "loss": 4.2776, + "step": 111700 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003967880637700913, + "loss": 4.2741, + "step": 111800 + }, + { + "epoch": 0.81, + "learning_rate": 0.00039678516381141574, + "loss": 4.2827, + "step": 111900 + }, + { + "epoch": 0.81, + "learning_rate": 0.00039678226385274015, + "loss": 4.2771, + "step": 112000 + }, + { + "epoch": 0.81, + "learning_rate": 0.00039677936389406456, + "loss": 4.2883, + "step": 112100 + }, + { + "epoch": 0.81, + "learning_rate": 0.00039677646393538896, + "loss": 4.2786, + "step": 112200 + }, + { + "epoch": 0.81, + "learning_rate": 0.00039677356397671337, + "loss": 4.2872, + "step": 112300 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003967706640180378, + "loss": 4.2827, + "step": 112400 + }, + { + "epoch": 0.82, + "learning_rate": 0.00039676776405936213, + "loss": 4.2862, + "step": 112500 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003967648641006866, + "loss": 4.2799, + "step": 112600 + }, + { + "epoch": 0.82, + "learning_rate": 0.000396761964142011, + "loss": 4.2776, + "step": 112700 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003967590641833354, + "loss": 4.2855, + "step": 112800 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003967561642246598, + "loss": 4.2861, + "step": 112900 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003967532642659842, + "loss": 4.2839, + "step": 113000 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003967503643073086, + "loss": 4.2861, + "step": 113100 + }, + { + "epoch": 0.82, + "learning_rate": 0.00039674746434863303, + "loss": 4.2834, + "step": 113200 + }, + { + "epoch": 0.82, + "learning_rate": 0.00039674456438995744, + "loss": 4.278, + "step": 113300 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003967416644312819, + "loss": 4.2751, + "step": 113400 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003967387644726063, + "loss": 4.2729, + "step": 113500 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003967358645139307, + "loss": 4.2754, + "step": 113600 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003967329645552551, + "loss": 4.277, + "step": 113700 + }, + { + "epoch": 0.82, + "learning_rate": 0.00039673006459657953, + "loss": 4.2856, + "step": 113800 + }, + { + "epoch": 0.83, + "learning_rate": 0.00039672716463790393, + "loss": 4.2828, + "step": 113900 + }, + { + "epoch": 0.83, + "learning_rate": 0.00039672426467922834, + "loss": 4.2771, + "step": 114000 + }, + { + "epoch": 0.83, + "learning_rate": 0.00039672136472055275, + "loss": 4.2881, + "step": 114100 + }, + { + "epoch": 0.83, + "learning_rate": 0.00039671846476187716, + "loss": 4.2722, + "step": 114200 + }, + { + "epoch": 0.83, + "learning_rate": 0.00039671556480320156, + "loss": 4.2664, + "step": 114300 + }, + { + "epoch": 0.83, + "learning_rate": 0.00039671266484452597, + "loss": 4.2841, + "step": 114400 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003967097648858504, + "loss": 4.2696, + "step": 114500 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003967068649271748, + "loss": 4.2749, + "step": 114600 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003967039649684992, + "loss": 4.2675, + "step": 114700 + }, + { + "epoch": 0.83, + "learning_rate": 0.00039670106500982365, + "loss": 4.2747, + "step": 114800 + }, + { + "epoch": 0.83, + "learning_rate": 0.00039669816505114806, + "loss": 4.2868, + "step": 114900 + }, + { + "epoch": 0.83, + "learning_rate": 0.00039669526509247246, + "loss": 4.2695, + "step": 115000 + }, + { + "epoch": 0.83, + "learning_rate": 0.00039669236513379687, + "loss": 4.2923, + "step": 115100 + }, + { + "epoch": 0.84, + "learning_rate": 0.00039668949417470803, + "loss": 4.2827, + "step": 115200 + }, + { + "epoch": 0.84, + "learning_rate": 0.00039668659421603243, + "loss": 4.2652, + "step": 115300 + }, + { + "epoch": 0.84, + "learning_rate": 0.00039668369425735684, + "loss": 4.2896, + "step": 115400 + }, + { + "epoch": 0.84, + "learning_rate": 0.00039668079429868125, + "loss": 4.276, + "step": 115500 + }, + { + "epoch": 0.84, + "learning_rate": 0.00039667789434000565, + "loss": 4.2712, + "step": 115600 + }, + { + "epoch": 0.84, + "learning_rate": 0.00039667499438133006, + "loss": 4.2788, + "step": 115700 + }, + { + "epoch": 0.84, + "learning_rate": 0.0003966720944226545, + "loss": 4.2892, + "step": 115800 + }, + { + "epoch": 0.84, + "learning_rate": 0.00039666919446397893, + "loss": 4.2666, + "step": 115900 + }, + { + "epoch": 0.84, + "learning_rate": 0.00039666629450530334, + "loss": 4.2871, + "step": 116000 + }, + { + "epoch": 0.84, + "learning_rate": 0.00039666339454662774, + "loss": 4.2708, + "step": 116100 + }, + { + "epoch": 0.84, + "learning_rate": 0.00039666049458795215, + "loss": 4.2699, + "step": 116200 + }, + { + "epoch": 0.84, + "learning_rate": 0.00039665759462927656, + "loss": 4.2756, + "step": 116300 + }, + { + "epoch": 0.84, + "learning_rate": 0.00039665469467060096, + "loss": 4.2829, + "step": 116400 + }, + { + "epoch": 0.84, + "learning_rate": 0.00039665179471192537, + "loss": 4.2591, + "step": 116500 + }, + { + "epoch": 0.85, + "learning_rate": 0.0003966488947532498, + "loss": 4.2768, + "step": 116600 + }, + { + "epoch": 0.85, + "learning_rate": 0.0003966459947945742, + "loss": 4.2712, + "step": 116700 + }, + { + "epoch": 0.85, + "learning_rate": 0.0003966430948358986, + "loss": 4.2759, + "step": 116800 + }, + { + "epoch": 0.85, + "learning_rate": 0.000396640194877223, + "loss": 4.2799, + "step": 116900 + }, + { + "epoch": 0.85, + "learning_rate": 0.0003966372949185474, + "loss": 4.2729, + "step": 117000 + }, + { + "epoch": 0.85, + "learning_rate": 0.0003966343949598718, + "loss": 4.2845, + "step": 117100 + }, + { + "epoch": 0.85, + "learning_rate": 0.0003966314950011963, + "loss": 4.2895, + "step": 117200 + }, + { + "epoch": 0.85, + "learning_rate": 0.0003966285950425207, + "loss": 4.2766, + "step": 117300 + }, + { + "epoch": 0.85, + "learning_rate": 0.0003966256950838451, + "loss": 4.2725, + "step": 117400 + }, + { + "epoch": 0.85, + "learning_rate": 0.0003966227951251695, + "loss": 4.2705, + "step": 117500 + }, + { + "epoch": 0.85, + "learning_rate": 0.00039661992416608065, + "loss": 4.2751, + "step": 117600 + }, + { + "epoch": 0.85, + "learning_rate": 0.00039661702420740506, + "loss": 4.2885, + "step": 117700 + }, + { + "epoch": 0.85, + "learning_rate": 0.00039661412424872946, + "loss": 4.2791, + "step": 117800 + }, + { + "epoch": 0.85, + "learning_rate": 0.00039661122429005387, + "loss": 4.2657, + "step": 117900 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003966083243313783, + "loss": 4.2747, + "step": 118000 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003966054243727027, + "loss": 4.2659, + "step": 118100 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003966025244140271, + "loss": 4.272, + "step": 118200 + }, + { + "epoch": 0.86, + "learning_rate": 0.00039659962445535155, + "loss": 4.2731, + "step": 118300 + }, + { + "epoch": 0.86, + "learning_rate": 0.00039659672449667596, + "loss": 4.2728, + "step": 118400 + }, + { + "epoch": 0.86, + "learning_rate": 0.00039659382453800037, + "loss": 4.2754, + "step": 118500 + }, + { + "epoch": 0.86, + "learning_rate": 0.00039659092457932477, + "loss": 4.2579, + "step": 118600 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003965880246206492, + "loss": 4.2682, + "step": 118700 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003965851246619736, + "loss": 4.2716, + "step": 118800 + }, + { + "epoch": 0.86, + "learning_rate": 0.000396582224703298, + "loss": 4.2762, + "step": 118900 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003965793247446224, + "loss": 4.2748, + "step": 119000 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003965764247859468, + "loss": 4.2798, + "step": 119100 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003965735248272712, + "loss": 4.2757, + "step": 119200 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003965706248685956, + "loss": 4.2638, + "step": 119300 + }, + { + "epoch": 0.87, + "learning_rate": 0.00039656772490992003, + "loss": 4.2775, + "step": 119400 + }, + { + "epoch": 0.87, + "learning_rate": 0.00039656482495124443, + "loss": 4.2831, + "step": 119500 + }, + { + "epoch": 0.87, + "learning_rate": 0.00039656192499256884, + "loss": 4.2815, + "step": 119600 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003965590250338933, + "loss": 4.2884, + "step": 119700 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003965561250752177, + "loss": 4.2711, + "step": 119800 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003965532251165421, + "loss": 4.2808, + "step": 119900 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003965503251578665, + "loss": 4.2742, + "step": 120000 + }, + { + "epoch": 0.87, + "learning_rate": 0.00039654742519919093, + "loss": 4.2935, + "step": 120100 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003965445542401021, + "loss": 4.2844, + "step": 120200 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003965416542814265, + "loss": 4.2746, + "step": 120300 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003965387543227509, + "loss": 4.2778, + "step": 120400 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003965358543640753, + "loss": 4.2756, + "step": 120500 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003965329544053997, + "loss": 4.2768, + "step": 120600 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003965300544467242, + "loss": 4.2817, + "step": 120700 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003965271544880486, + "loss": 4.2652, + "step": 120800 + }, + { + "epoch": 0.88, + "learning_rate": 0.000396524254529373, + "loss": 4.2863, + "step": 120900 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003965213545706974, + "loss": 4.2822, + "step": 121000 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003965184546120218, + "loss": 4.2892, + "step": 121100 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003965155546533462, + "loss": 4.2535, + "step": 121200 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003965126546946706, + "loss": 4.2852, + "step": 121300 + }, + { + "epoch": 0.88, + "learning_rate": 0.000396509754735995, + "loss": 4.2759, + "step": 121400 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003965068547773195, + "loss": 4.2878, + "step": 121500 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003965039548186439, + "loss": 4.268, + "step": 121600 + }, + { + "epoch": 0.88, + "learning_rate": 0.00039650105485996824, + "loss": 4.2701, + "step": 121700 + }, + { + "epoch": 0.88, + "learning_rate": 0.00039649815490129265, + "loss": 4.2734, + "step": 121800 + }, + { + "epoch": 0.88, + "learning_rate": 0.00039649525494261706, + "loss": 4.2782, + "step": 121900 + }, + { + "epoch": 0.88, + "learning_rate": 0.00039649235498394146, + "loss": 4.2584, + "step": 122000 + }, + { + "epoch": 0.89, + "learning_rate": 0.00039648945502526587, + "loss": 4.2763, + "step": 122100 + }, + { + "epoch": 0.89, + "learning_rate": 0.00039648655506659033, + "loss": 4.2663, + "step": 122200 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003964836841075015, + "loss": 4.284, + "step": 122300 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003964807841488259, + "loss": 4.2748, + "step": 122400 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003964778841901503, + "loss": 4.2781, + "step": 122500 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003964749842314747, + "loss": 4.2765, + "step": 122600 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003964720842727991, + "loss": 4.2673, + "step": 122700 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003964691843141235, + "loss": 4.2709, + "step": 122800 + }, + { + "epoch": 0.89, + "learning_rate": 0.00039646628435544793, + "loss": 4.2804, + "step": 122900 + }, + { + "epoch": 0.89, + "learning_rate": 0.00039646338439677234, + "loss": 4.2731, + "step": 123000 + }, + { + "epoch": 0.89, + "learning_rate": 0.00039646048443809674, + "loss": 4.273, + "step": 123100 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003964575844794212, + "loss": 4.2746, + "step": 123200 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003964546845207456, + "loss": 4.2671, + "step": 123300 + }, + { + "epoch": 0.89, + "learning_rate": 0.00039645178456207, + "loss": 4.2716, + "step": 123400 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003964488846033944, + "loss": 4.2724, + "step": 123500 + }, + { + "epoch": 0.9, + "learning_rate": 0.00039644598464471883, + "loss": 4.275, + "step": 123600 + }, + { + "epoch": 0.9, + "learning_rate": 0.00039644308468604324, + "loss": 4.2616, + "step": 123700 + }, + { + "epoch": 0.9, + "learning_rate": 0.00039644018472736764, + "loss": 4.2698, + "step": 123800 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003964372847686921, + "loss": 4.2842, + "step": 123900 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003964343848100165, + "loss": 4.2763, + "step": 124000 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003964314848513409, + "loss": 4.266, + "step": 124100 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003964285848926653, + "loss": 4.2588, + "step": 124200 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003964256849339897, + "loss": 4.2668, + "step": 124300 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003964227849753141, + "loss": 4.2848, + "step": 124400 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003964198850166385, + "loss": 4.2707, + "step": 124500 + }, + { + "epoch": 0.9, + "learning_rate": 0.00039641698505796295, + "loss": 4.2824, + "step": 124600 + }, + { + "epoch": 0.9, + "learning_rate": 0.00039641408509928736, + "loss": 4.2885, + "step": 124700 + }, + { + "epoch": 0.9, + "learning_rate": 0.00039641118514061177, + "loss": 4.2784, + "step": 124800 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003964082851819362, + "loss": 4.2662, + "step": 124900 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003964053852232606, + "loss": 4.2795, + "step": 125000 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003964025142641718, + "loss": 4.2695, + "step": 125100 + }, + { + "epoch": 0.91, + "learning_rate": 0.00039639961430549614, + "loss": 4.263, + "step": 125200 + }, + { + "epoch": 0.91, + "learning_rate": 0.00039639671434682055, + "loss": 4.2742, + "step": 125300 + }, + { + "epoch": 0.91, + "learning_rate": 0.00039639381438814496, + "loss": 4.2688, + "step": 125400 + }, + { + "epoch": 0.91, + "learning_rate": 0.00039639091442946937, + "loss": 4.2836, + "step": 125500 + }, + { + "epoch": 0.91, + "learning_rate": 0.00039638801447079377, + "loss": 4.2806, + "step": 125600 + }, + { + "epoch": 0.91, + "learning_rate": 0.00039638511451211823, + "loss": 4.2731, + "step": 125700 + }, + { + "epoch": 0.91, + "learning_rate": 0.00039638221455344264, + "loss": 4.2705, + "step": 125800 + }, + { + "epoch": 0.91, + "learning_rate": 0.00039637931459476705, + "loss": 4.2607, + "step": 125900 + }, + { + "epoch": 0.91, + "learning_rate": 0.00039637641463609145, + "loss": 4.2876, + "step": 126000 + }, + { + "epoch": 0.91, + "learning_rate": 0.00039637351467741586, + "loss": 4.2794, + "step": 126100 + }, + { + "epoch": 0.91, + "learning_rate": 0.00039637061471874027, + "loss": 4.2814, + "step": 126200 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003963677147600647, + "loss": 4.2742, + "step": 126300 + }, + { + "epoch": 0.92, + "learning_rate": 0.00039636481480138914, + "loss": 4.2698, + "step": 126400 + }, + { + "epoch": 0.92, + "learning_rate": 0.00039636191484271354, + "loss": 4.2679, + "step": 126500 + }, + { + "epoch": 0.92, + "learning_rate": 0.00039635901488403795, + "loss": 4.2704, + "step": 126600 + }, + { + "epoch": 0.92, + "learning_rate": 0.00039635611492536236, + "loss": 4.2771, + "step": 126700 + }, + { + "epoch": 0.92, + "learning_rate": 0.00039635321496668676, + "loss": 4.2709, + "step": 126800 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003963503150080111, + "loss": 4.2732, + "step": 126900 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003963474150493355, + "loss": 4.2806, + "step": 127000 + }, + { + "epoch": 0.92, + "learning_rate": 0.00039634451509066, + "loss": 4.2706, + "step": 127100 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003963416151319844, + "loss": 4.2771, + "step": 127200 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003963387151733088, + "loss": 4.2728, + "step": 127300 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003963358152146332, + "loss": 4.2712, + "step": 127400 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003963329152559576, + "loss": 4.2618, + "step": 127500 + }, + { + "epoch": 0.93, + "learning_rate": 0.000396330015297282, + "loss": 4.2807, + "step": 127600 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003963271153386064, + "loss": 4.2607, + "step": 127700 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003963242153799309, + "loss": 4.2743, + "step": 127800 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003963213154212553, + "loss": 4.2683, + "step": 127900 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003963184154625797, + "loss": 4.2652, + "step": 128000 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003963155155039041, + "loss": 4.2703, + "step": 128100 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003963126155452285, + "loss": 4.2654, + "step": 128200 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003963097155865529, + "loss": 4.2552, + "step": 128300 + }, + { + "epoch": 0.93, + "learning_rate": 0.00039630681562787733, + "loss": 4.2764, + "step": 128400 + }, + { + "epoch": 0.93, + "learning_rate": 0.00039630391566920173, + "loss": 4.2754, + "step": 128500 + }, + { + "epoch": 0.93, + "learning_rate": 0.00039630101571052614, + "loss": 4.2797, + "step": 128600 + }, + { + "epoch": 0.93, + "learning_rate": 0.00039629811575185055, + "loss": 4.271, + "step": 128700 + }, + { + "epoch": 0.93, + "learning_rate": 0.00039629521579317495, + "loss": 4.2601, + "step": 128800 + }, + { + "epoch": 0.93, + "learning_rate": 0.00039629231583449936, + "loss": 4.271, + "step": 128900 + }, + { + "epoch": 0.94, + "learning_rate": 0.00039628941587582377, + "loss": 4.2733, + "step": 129000 + }, + { + "epoch": 0.94, + "learning_rate": 0.000396286544916735, + "loss": 4.2675, + "step": 129100 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003962836449580594, + "loss": 4.2694, + "step": 129200 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003962807449993838, + "loss": 4.2684, + "step": 129300 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003962778450407082, + "loss": 4.2599, + "step": 129400 + }, + { + "epoch": 0.94, + "learning_rate": 0.00039627494508203255, + "loss": 4.2662, + "step": 129500 + }, + { + "epoch": 0.94, + "learning_rate": 0.000396272045123357, + "loss": 4.2849, + "step": 129600 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003962691451646814, + "loss": 4.2765, + "step": 129700 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003962662452060058, + "loss": 4.2723, + "step": 129800 + }, + { + "epoch": 0.94, + "learning_rate": 0.00039626334524733023, + "loss": 4.2655, + "step": 129900 + }, + { + "epoch": 0.94, + "learning_rate": 0.00039626044528865464, + "loss": 4.2715, + "step": 130000 + }, + { + "epoch": 0.94, + "learning_rate": 0.00039625754532997905, + "loss": 4.2692, + "step": 130100 + }, + { + "epoch": 0.94, + "learning_rate": 0.00039625464537130345, + "loss": 4.2724, + "step": 130200 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003962517454126279, + "loss": 4.2797, + "step": 130300 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003962488454539523, + "loss": 4.2683, + "step": 130400 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003962459744948634, + "loss": 4.2658, + "step": 130500 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003962430745361879, + "loss": 4.2783, + "step": 130600 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003962401745775123, + "loss": 4.2649, + "step": 130700 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003962372746188367, + "loss": 4.2539, + "step": 130800 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003962343746601611, + "loss": 4.2705, + "step": 130900 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003962314747014855, + "loss": 4.2675, + "step": 131000 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003962285747428099, + "loss": 4.2783, + "step": 131100 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003962256747841343, + "loss": 4.2692, + "step": 131200 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003962227748254588, + "loss": 4.2667, + "step": 131300 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003962198748667832, + "loss": 4.26, + "step": 131400 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003962169749081076, + "loss": 4.2503, + "step": 131500 + }, + { + "epoch": 0.95, + "learning_rate": 0.00039621410394901876, + "loss": 4.283, + "step": 131600 + }, + { + "epoch": 0.95, + "learning_rate": 0.00039621120399034316, + "loss": 4.2748, + "step": 131700 + }, + { + "epoch": 0.96, + "learning_rate": 0.00039620830403166757, + "loss": 4.2489, + "step": 131800 + }, + { + "epoch": 0.96, + "learning_rate": 0.000396205404072992, + "loss": 4.2781, + "step": 131900 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003962025041143164, + "loss": 4.2765, + "step": 132000 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003961996041556408, + "loss": 4.2661, + "step": 132100 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003961967041969652, + "loss": 4.2691, + "step": 132200 + }, + { + "epoch": 0.96, + "learning_rate": 0.00039619380423828966, + "loss": 4.275, + "step": 132300 + }, + { + "epoch": 0.96, + "learning_rate": 0.00039619090427961407, + "loss": 4.2648, + "step": 132400 + }, + { + "epoch": 0.96, + "learning_rate": 0.00039618800432093847, + "loss": 4.2667, + "step": 132500 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003961851043622629, + "loss": 4.2687, + "step": 132600 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003961822044035873, + "loss": 4.2784, + "step": 132700 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003961793044449117, + "loss": 4.2728, + "step": 132800 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003961764044862361, + "loss": 4.2728, + "step": 132900 + }, + { + "epoch": 0.96, + "learning_rate": 0.00039617350452756045, + "loss": 4.2564, + "step": 133000 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003961706045688849, + "loss": 4.2728, + "step": 133100 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003961677046102093, + "loss": 4.2606, + "step": 133200 + }, + { + "epoch": 0.97, + "learning_rate": 0.00039616480465153373, + "loss": 4.2636, + "step": 133300 + }, + { + "epoch": 0.97, + "learning_rate": 0.00039616190469285813, + "loss": 4.2775, + "step": 133400 + }, + { + "epoch": 0.97, + "learning_rate": 0.00039615900473418254, + "loss": 4.2599, + "step": 133500 + }, + { + "epoch": 0.97, + "learning_rate": 0.00039615610477550695, + "loss": 4.2682, + "step": 133600 + }, + { + "epoch": 0.97, + "learning_rate": 0.00039615320481683136, + "loss": 4.2596, + "step": 133700 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003961503048581558, + "loss": 4.2736, + "step": 133800 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003961474048994802, + "loss": 4.2766, + "step": 133900 + }, + { + "epoch": 0.97, + "learning_rate": 0.00039614450494080463, + "loss": 4.255, + "step": 134000 + }, + { + "epoch": 0.97, + "learning_rate": 0.00039614160498212904, + "loss": 4.2703, + "step": 134100 + }, + { + "epoch": 0.97, + "learning_rate": 0.00039613870502345344, + "loss": 4.2619, + "step": 134200 + }, + { + "epoch": 0.97, + "learning_rate": 0.00039613580506477785, + "loss": 4.2752, + "step": 134300 + }, + { + "epoch": 0.97, + "learning_rate": 0.00039613290510610226, + "loss": 4.2613, + "step": 134400 + }, + { + "epoch": 0.98, + "learning_rate": 0.00039613000514742666, + "loss": 4.2564, + "step": 134500 + }, + { + "epoch": 0.98, + "learning_rate": 0.00039612710518875107, + "loss": 4.2566, + "step": 134600 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003961242052300755, + "loss": 4.2684, + "step": 134700 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003961213052713999, + "loss": 4.2739, + "step": 134800 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003961184053127243, + "loss": 4.2727, + "step": 134900 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003961155053540487, + "loss": 4.2665, + "step": 135000 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003961126053953731, + "loss": 4.2768, + "step": 135100 + }, + { + "epoch": 0.98, + "learning_rate": 0.00039610970543669757, + "loss": 4.2743, + "step": 135200 + }, + { + "epoch": 0.98, + "learning_rate": 0.000396106805478022, + "loss": 4.2744, + "step": 135300 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003961039055193464, + "loss": 4.2652, + "step": 135400 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003961010055606708, + "loss": 4.2782, + "step": 135500 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003960981056019952, + "loss": 4.2642, + "step": 135600 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003960952056433196, + "loss": 4.2705, + "step": 135700 + }, + { + "epoch": 0.98, + "learning_rate": 0.00039609233468423076, + "loss": 4.2776, + "step": 135800 + }, + { + "epoch": 0.99, + "learning_rate": 0.00039608943472555516, + "loss": 4.2741, + "step": 135900 + }, + { + "epoch": 0.99, + "learning_rate": 0.00039608653476687957, + "loss": 4.2702, + "step": 136000 + }, + { + "epoch": 0.99, + "learning_rate": 0.000396083634808204, + "loss": 4.2594, + "step": 136100 + }, + { + "epoch": 0.99, + "learning_rate": 0.00039608073484952844, + "loss": 4.2726, + "step": 136200 + }, + { + "epoch": 0.99, + "learning_rate": 0.00039607783489085285, + "loss": 4.2659, + "step": 136300 + }, + { + "epoch": 0.99, + "learning_rate": 0.00039607493493217725, + "loss": 4.2722, + "step": 136400 + }, + { + "epoch": 0.99, + "learning_rate": 0.00039607203497350166, + "loss": 4.2703, + "step": 136500 + }, + { + "epoch": 0.99, + "learning_rate": 0.00039606913501482607, + "loss": 4.2611, + "step": 136600 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003960662350561505, + "loss": 4.2531, + "step": 136700 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003960633350974749, + "loss": 4.2809, + "step": 136800 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003960604351387993, + "loss": 4.2615, + "step": 136900 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003960575351801237, + "loss": 4.2644, + "step": 137000 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003960546352214481, + "loss": 4.2657, + "step": 137100 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003960517352627725, + "loss": 4.2759, + "step": 137200 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003960488353040969, + "loss": 4.2551, + "step": 137300 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003960459353454213, + "loss": 4.2732, + "step": 137400 + }, + { + "epoch": 1.0, + "learning_rate": 0.00039604303538674573, + "loss": 4.273, + "step": 137500 + }, + { + "epoch": 1.0, + "learning_rate": 0.00039604013542807014, + "loss": 4.2673, + "step": 137600 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003960372354693946, + "loss": 4.2651, + "step": 137700 + }, + { + "epoch": 1.0, + "learning_rate": 0.000396034335510719, + "loss": 4.2722, + "step": 137800 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003960314355520434, + "loss": 4.2633, + "step": 137900 + }, + { + "epoch": 1.0, + "learning_rate": 0.00039602856459295457, + "loss": 4.279, + "step": 138000 + }, + { + "epoch": 1.0, + "learning_rate": 0.00039602566463427897, + "loss": 4.2348, + "step": 138100 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003960227646756034, + "loss": 4.2516, + "step": 138200 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003960198647169278, + "loss": 4.2515, + "step": 138300 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003960169647582522, + "loss": 4.2477, + "step": 138400 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003960140647995766, + "loss": 4.2495, + "step": 138500 + }, + { + "epoch": 1.0, + "learning_rate": 0.000396011164840901, + "loss": 4.2598, + "step": 138600 + }, + { + "epoch": 1.01, + "learning_rate": 0.00039600826488222547, + "loss": 4.2388, + "step": 138700 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003960053649235499, + "loss": 4.2453, + "step": 138800 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003960024649648743, + "loss": 4.2645, + "step": 138900 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003959995650061987, + "loss": 4.2496, + "step": 139000 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003959966650475231, + "loss": 4.2489, + "step": 139100 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003959937650888475, + "loss": 4.2465, + "step": 139200 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003959908651301719, + "loss": 4.2596, + "step": 139300 + }, + { + "epoch": 1.01, + "learning_rate": 0.00039598796517149637, + "loss": 4.2449, + "step": 139400 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003959850652128207, + "loss": 4.2567, + "step": 139500 + }, + { + "epoch": 1.01, + "learning_rate": 0.00039598216525414513, + "loss": 4.2672, + "step": 139600 + }, + { + "epoch": 1.01, + "learning_rate": 0.00039597926529546954, + "loss": 4.2543, + "step": 139700 + }, + { + "epoch": 1.01, + "learning_rate": 0.00039597636533679394, + "loss": 4.2485, + "step": 139800 + }, + { + "epoch": 1.01, + "learning_rate": 0.00039597346537811835, + "loss": 4.2451, + "step": 139900 + }, + { + "epoch": 1.01, + "learning_rate": 0.00039597056541944276, + "loss": 4.2691, + "step": 140000 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003959676654607672, + "loss": 4.2628, + "step": 140100 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003959647655020916, + "loss": 4.2664, + "step": 140200 + }, + { + "epoch": 1.02, + "learning_rate": 0.00039596186554341603, + "loss": 4.2495, + "step": 140300 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003959589945843272, + "loss": 4.2386, + "step": 140400 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003959560946256516, + "loss": 4.2492, + "step": 140500 + }, + { + "epoch": 1.02, + "learning_rate": 0.000395953194666976, + "loss": 4.2657, + "step": 140600 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003959502947083004, + "loss": 4.2436, + "step": 140700 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003959473947496248, + "loss": 4.2594, + "step": 140800 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003959444947909492, + "loss": 4.2587, + "step": 140900 + }, + { + "epoch": 1.02, + "learning_rate": 0.00039594159483227363, + "loss": 4.2593, + "step": 141000 + }, + { + "epoch": 1.02, + "learning_rate": 0.00039593869487359804, + "loss": 4.2574, + "step": 141100 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003959357949149225, + "loss": 4.2377, + "step": 141200 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003959328949562469, + "loss": 4.2616, + "step": 141300 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003959299949975713, + "loss": 4.248, + "step": 141400 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003959270950388957, + "loss": 4.2529, + "step": 141500 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003959241950802201, + "loss": 4.255, + "step": 141600 + }, + { + "epoch": 1.03, + "learning_rate": 0.00039592129512154453, + "loss": 4.2466, + "step": 141700 + }, + { + "epoch": 1.03, + "learning_rate": 0.00039591839516286894, + "loss": 4.2584, + "step": 141800 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003959154952041934, + "loss": 4.2561, + "step": 141900 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003959125952455178, + "loss": 4.2589, + "step": 142000 + }, + { + "epoch": 1.03, + "learning_rate": 0.00039590969528684216, + "loss": 4.2486, + "step": 142100 + }, + { + "epoch": 1.03, + "learning_rate": 0.00039590679532816657, + "loss": 4.2545, + "step": 142200 + }, + { + "epoch": 1.03, + "learning_rate": 0.000395903895369491, + "loss": 4.2491, + "step": 142300 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003959009954108154, + "loss": 4.2506, + "step": 142400 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003958981244517266, + "loss": 4.2516, + "step": 142500 + }, + { + "epoch": 1.03, + "learning_rate": 0.000395895224493051, + "loss": 4.2553, + "step": 142600 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003958923245343754, + "loss": 4.2579, + "step": 142700 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003958894245756998, + "loss": 4.2466, + "step": 142800 + }, + { + "epoch": 1.04, + "learning_rate": 0.00039588652461702427, + "loss": 4.251, + "step": 142900 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003958836246583486, + "loss": 4.265, + "step": 143000 + }, + { + "epoch": 1.04, + "learning_rate": 0.00039588072469967303, + "loss": 4.2554, + "step": 143100 + }, + { + "epoch": 1.04, + "learning_rate": 0.00039587782474099744, + "loss": 4.2616, + "step": 143200 + }, + { + "epoch": 1.04, + "learning_rate": 0.00039587492478232184, + "loss": 4.2547, + "step": 143300 + }, + { + "epoch": 1.04, + "learning_rate": 0.00039587202482364625, + "loss": 4.2471, + "step": 143400 + }, + { + "epoch": 1.04, + "learning_rate": 0.00039586912486497066, + "loss": 4.2546, + "step": 143500 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003958662249062951, + "loss": 4.2579, + "step": 143600 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003958633249476195, + "loss": 4.2548, + "step": 143700 + }, + { + "epoch": 1.04, + "learning_rate": 0.00039586042498894393, + "loss": 4.261, + "step": 143800 + }, + { + "epoch": 1.04, + "learning_rate": 0.00039585752503026834, + "loss": 4.2619, + "step": 143900 + }, + { + "epoch": 1.04, + "learning_rate": 0.00039585462507159275, + "loss": 4.2535, + "step": 144000 + }, + { + "epoch": 1.04, + "learning_rate": 0.00039585172511291715, + "loss": 4.2474, + "step": 144100 + }, + { + "epoch": 1.05, + "learning_rate": 0.00039584882515424156, + "loss": 4.2622, + "step": 144200 + }, + { + "epoch": 1.05, + "learning_rate": 0.000395845925195566, + "loss": 4.257, + "step": 144300 + }, + { + "epoch": 1.05, + "learning_rate": 0.00039584302523689043, + "loss": 4.2449, + "step": 144400 + }, + { + "epoch": 1.05, + "learning_rate": 0.00039584012527821484, + "loss": 4.2581, + "step": 144500 + }, + { + "epoch": 1.05, + "learning_rate": 0.00039583722531953924, + "loss": 4.2738, + "step": 144600 + }, + { + "epoch": 1.05, + "learning_rate": 0.00039583432536086365, + "loss": 4.2582, + "step": 144700 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003958314544017748, + "loss": 4.2646, + "step": 144800 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003958285544430992, + "loss": 4.2498, + "step": 144900 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003958256544844236, + "loss": 4.2492, + "step": 145000 + }, + { + "epoch": 1.05, + "learning_rate": 0.000395822754525748, + "loss": 4.2535, + "step": 145100 + }, + { + "epoch": 1.05, + "learning_rate": 0.00039581985456707243, + "loss": 4.2594, + "step": 145200 + }, + { + "epoch": 1.05, + "learning_rate": 0.00039581695460839684, + "loss": 4.2462, + "step": 145300 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003958140546497213, + "loss": 4.261, + "step": 145400 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003958111546910457, + "loss": 4.2398, + "step": 145500 + }, + { + "epoch": 1.06, + "learning_rate": 0.00039580825473237006, + "loss": 4.2438, + "step": 145600 + }, + { + "epoch": 1.06, + "learning_rate": 0.00039580535477369447, + "loss": 4.2646, + "step": 145700 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003958024548150189, + "loss": 4.2509, + "step": 145800 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003957995548563433, + "loss": 4.2446, + "step": 145900 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003957966548976677, + "loss": 4.2389, + "step": 146000 + }, + { + "epoch": 1.06, + "learning_rate": 0.00039579375493899215, + "loss": 4.247, + "step": 146100 + }, + { + "epoch": 1.06, + "learning_rate": 0.00039579085498031656, + "loss": 4.2615, + "step": 146200 + }, + { + "epoch": 1.06, + "learning_rate": 0.00039578795502164096, + "loss": 4.2644, + "step": 146300 + }, + { + "epoch": 1.06, + "learning_rate": 0.00039578505506296537, + "loss": 4.2549, + "step": 146400 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003957821551042898, + "loss": 4.2529, + "step": 146500 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003957792551456142, + "loss": 4.2562, + "step": 146600 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003957763551869386, + "loss": 4.2523, + "step": 146700 + }, + { + "epoch": 1.06, + "learning_rate": 0.00039577345522826305, + "loss": 4.2565, + "step": 146800 + }, + { + "epoch": 1.06, + "learning_rate": 0.00039577055526958746, + "loss": 4.256, + "step": 146900 + }, + { + "epoch": 1.07, + "learning_rate": 0.00039576765531091187, + "loss": 4.2638, + "step": 147000 + }, + { + "epoch": 1.07, + "learning_rate": 0.000395764784351823, + "loss": 4.2532, + "step": 147100 + }, + { + "epoch": 1.07, + "learning_rate": 0.00039576188439314743, + "loss": 4.2485, + "step": 147200 + }, + { + "epoch": 1.07, + "learning_rate": 0.00039575898443447183, + "loss": 4.2545, + "step": 147300 + }, + { + "epoch": 1.07, + "learning_rate": 0.00039575608447579624, + "loss": 4.2574, + "step": 147400 + }, + { + "epoch": 1.07, + "learning_rate": 0.00039575318451712065, + "loss": 4.2408, + "step": 147500 + }, + { + "epoch": 1.07, + "learning_rate": 0.00039575028455844506, + "loss": 4.2522, + "step": 147600 + }, + { + "epoch": 1.07, + "learning_rate": 0.00039574738459976946, + "loss": 4.2633, + "step": 147700 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003957444846410939, + "loss": 4.2535, + "step": 147800 + }, + { + "epoch": 1.07, + "learning_rate": 0.00039574158468241833, + "loss": 4.2572, + "step": 147900 + }, + { + "epoch": 1.07, + "learning_rate": 0.00039573868472374274, + "loss": 4.2556, + "step": 148000 + }, + { + "epoch": 1.07, + "learning_rate": 0.00039573578476506714, + "loss": 4.2544, + "step": 148100 + }, + { + "epoch": 1.07, + "learning_rate": 0.00039573288480639155, + "loss": 4.2551, + "step": 148200 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003957299848477159, + "loss": 4.2632, + "step": 148300 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003957270848890403, + "loss": 4.2524, + "step": 148400 + }, + { + "epoch": 1.08, + "learning_rate": 0.00039572418493036477, + "loss": 4.2406, + "step": 148500 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003957212849716892, + "loss": 4.2583, + "step": 148600 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003957183850130136, + "loss": 4.2527, + "step": 148700 + }, + { + "epoch": 1.08, + "learning_rate": 0.000395715485054338, + "loss": 4.2723, + "step": 148800 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003957125850956624, + "loss": 4.2573, + "step": 148900 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003957096851369868, + "loss": 4.264, + "step": 149000 + }, + { + "epoch": 1.08, + "learning_rate": 0.000395706814177898, + "loss": 4.2452, + "step": 149100 + }, + { + "epoch": 1.08, + "learning_rate": 0.00039570391421922237, + "loss": 4.2617, + "step": 149200 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003957010142605468, + "loss": 4.2467, + "step": 149300 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003956981143018712, + "loss": 4.2598, + "step": 149400 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003956952143431956, + "loss": 4.268, + "step": 149500 + }, + { + "epoch": 1.08, + "learning_rate": 0.00039569231438452005, + "loss": 4.2565, + "step": 149600 + }, + { + "epoch": 1.09, + "learning_rate": 0.00039568941442584446, + "loss": 4.2635, + "step": 149700 + }, + { + "epoch": 1.09, + "learning_rate": 0.00039568651446716886, + "loss": 4.2547, + "step": 149800 + }, + { + "epoch": 1.09, + "learning_rate": 0.00039568361450849327, + "loss": 4.2567, + "step": 149900 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003956807145498177, + "loss": 4.251, + "step": 150000 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003956778145911421, + "loss": 4.2569, + "step": 150100 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003956749146324665, + "loss": 4.2409, + "step": 150200 + }, + { + "epoch": 1.09, + "learning_rate": 0.00039567201467379095, + "loss": 4.2351, + "step": 150300 + }, + { + "epoch": 1.09, + "learning_rate": 0.00039566911471511536, + "loss": 4.2555, + "step": 150400 + }, + { + "epoch": 1.09, + "learning_rate": 0.00039566621475643977, + "loss": 4.2598, + "step": 150500 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003956633147977642, + "loss": 4.2591, + "step": 150600 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003956604148390886, + "loss": 4.2627, + "step": 150700 + }, + { + "epoch": 1.09, + "learning_rate": 0.000395657514880413, + "loss": 4.2596, + "step": 150800 + }, + { + "epoch": 1.09, + "learning_rate": 0.00039565461492173734, + "loss": 4.2493, + "step": 150900 + }, + { + "epoch": 1.09, + "learning_rate": 0.00039565174396264855, + "loss": 4.2587, + "step": 151000 + }, + { + "epoch": 1.1, + "learning_rate": 0.00039564884400397296, + "loss": 4.2508, + "step": 151100 + }, + { + "epoch": 1.1, + "learning_rate": 0.00039564594404529736, + "loss": 4.2541, + "step": 151200 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003956430440866218, + "loss": 4.2507, + "step": 151300 + }, + { + "epoch": 1.1, + "learning_rate": 0.00039564014412794623, + "loss": 4.2361, + "step": 151400 + }, + { + "epoch": 1.1, + "learning_rate": 0.00039563724416927064, + "loss": 4.2563, + "step": 151500 + }, + { + "epoch": 1.1, + "learning_rate": 0.00039563434421059505, + "loss": 4.2622, + "step": 151600 + }, + { + "epoch": 1.1, + "learning_rate": 0.00039563144425191945, + "loss": 4.2608, + "step": 151700 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003956285442932438, + "loss": 4.2494, + "step": 151800 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003956256443345682, + "loss": 4.2624, + "step": 151900 + }, + { + "epoch": 1.1, + "learning_rate": 0.00039562274437589267, + "loss": 4.2411, + "step": 152000 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003956198444172171, + "loss": 4.2463, + "step": 152100 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003956169444585415, + "loss": 4.2665, + "step": 152200 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003956140444998659, + "loss": 4.2685, + "step": 152300 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003956111445411903, + "loss": 4.2457, + "step": 152400 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003956082445825147, + "loss": 4.2617, + "step": 152500 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003956053446238391, + "loss": 4.2457, + "step": 152600 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003956024446651635, + "loss": 4.2505, + "step": 152700 + }, + { + "epoch": 1.11, + "learning_rate": 0.000395599544706488, + "loss": 4.2533, + "step": 152800 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003955966447478124, + "loss": 4.2467, + "step": 152900 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003955937447891368, + "loss": 4.2475, + "step": 153000 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003955908448304612, + "loss": 4.265, + "step": 153100 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003955879448717856, + "loss": 4.2647, + "step": 153200 + }, + { + "epoch": 1.11, + "learning_rate": 0.00039558504491311, + "loss": 4.2636, + "step": 153300 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003955821449544344, + "loss": 4.2619, + "step": 153400 + }, + { + "epoch": 1.11, + "learning_rate": 0.00039557924499575883, + "loss": 4.2391, + "step": 153500 + }, + { + "epoch": 1.11, + "learning_rate": 0.00039557634503708324, + "loss": 4.2457, + "step": 153600 + }, + { + "epoch": 1.11, + "learning_rate": 0.00039557344507840764, + "loss": 4.2502, + "step": 153700 + }, + { + "epoch": 1.11, + "learning_rate": 0.00039557054511973205, + "loss": 4.2532, + "step": 153800 + }, + { + "epoch": 1.12, + "learning_rate": 0.00039556764516105646, + "loss": 4.269, + "step": 153900 + }, + { + "epoch": 1.12, + "learning_rate": 0.00039556474520238086, + "loss": 4.2413, + "step": 154000 + }, + { + "epoch": 1.12, + "learning_rate": 0.00039556184524370527, + "loss": 4.2538, + "step": 154100 + }, + { + "epoch": 1.12, + "learning_rate": 0.00039555894528502973, + "loss": 4.2508, + "step": 154200 + }, + { + "epoch": 1.12, + "learning_rate": 0.00039555604532635414, + "loss": 4.2591, + "step": 154300 + }, + { + "epoch": 1.12, + "learning_rate": 0.00039555314536767855, + "loss": 4.2544, + "step": 154400 + }, + { + "epoch": 1.12, + "learning_rate": 0.00039555024540900295, + "loss": 4.2476, + "step": 154500 + }, + { + "epoch": 1.12, + "learning_rate": 0.00039554734545032736, + "loss": 4.2444, + "step": 154600 + }, + { + "epoch": 1.12, + "learning_rate": 0.00039554444549165177, + "loss": 4.2557, + "step": 154700 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003955415455329762, + "loss": 4.2562, + "step": 154800 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003955386455743006, + "loss": 4.2605, + "step": 154900 + }, + { + "epoch": 1.12, + "learning_rate": 0.000395535745615625, + "loss": 4.2444, + "step": 155000 + }, + { + "epoch": 1.12, + "learning_rate": 0.00039553287465653614, + "loss": 4.2384, + "step": 155100 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003955299746978606, + "loss": 4.2535, + "step": 155200 + }, + { + "epoch": 1.13, + "learning_rate": 0.000395527074739185, + "loss": 4.2563, + "step": 155300 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003955241747805094, + "loss": 4.2638, + "step": 155400 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003955212748218338, + "loss": 4.2588, + "step": 155500 + }, + { + "epoch": 1.13, + "learning_rate": 0.00039551837486315823, + "loss": 4.2572, + "step": 155600 + }, + { + "epoch": 1.13, + "learning_rate": 0.00039551547490448264, + "loss": 4.2492, + "step": 155700 + }, + { + "epoch": 1.13, + "learning_rate": 0.00039551257494580705, + "loss": 4.264, + "step": 155800 + }, + { + "epoch": 1.13, + "learning_rate": 0.00039550967498713145, + "loss": 4.2591, + "step": 155900 + }, + { + "epoch": 1.13, + "learning_rate": 0.00039550677502845586, + "loss": 4.2505, + "step": 156000 + }, + { + "epoch": 1.13, + "learning_rate": 0.00039550387506978027, + "loss": 4.2703, + "step": 156100 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003955009751111047, + "loss": 4.2503, + "step": 156200 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003954980751524291, + "loss": 4.252, + "step": 156300 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003954951751937535, + "loss": 4.2568, + "step": 156400 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003954922752350779, + "loss": 4.2623, + "step": 156500 + }, + { + "epoch": 1.14, + "learning_rate": 0.00039548937527640235, + "loss": 4.2493, + "step": 156600 + }, + { + "epoch": 1.14, + "learning_rate": 0.00039548647531772676, + "loss": 4.2528, + "step": 156700 + }, + { + "epoch": 1.14, + "learning_rate": 0.00039548357535905117, + "loss": 4.2652, + "step": 156800 + }, + { + "epoch": 1.14, + "learning_rate": 0.0003954806754003756, + "loss": 4.2479, + "step": 156900 + }, + { + "epoch": 1.14, + "learning_rate": 0.0003954777754417, + "loss": 4.2426, + "step": 157000 + }, + { + "epoch": 1.14, + "learning_rate": 0.00039547490448261114, + "loss": 4.2587, + "step": 157100 + }, + { + "epoch": 1.14, + "learning_rate": 0.00039547200452393554, + "loss": 4.2479, + "step": 157200 + }, + { + "epoch": 1.14, + "learning_rate": 0.00039546910456525995, + "loss": 4.2462, + "step": 157300 + }, + { + "epoch": 1.14, + "learning_rate": 0.00039546620460658436, + "loss": 4.2426, + "step": 157400 + }, + { + "epoch": 1.14, + "learning_rate": 0.00039546330464790877, + "loss": 4.2572, + "step": 157500 + }, + { + "epoch": 1.14, + "learning_rate": 0.00039546040468923317, + "loss": 4.2526, + "step": 157600 + }, + { + "epoch": 1.14, + "learning_rate": 0.00039545750473055763, + "loss": 4.2522, + "step": 157700 + }, + { + "epoch": 1.14, + "learning_rate": 0.00039545460477188204, + "loss": 4.2531, + "step": 157800 + }, + { + "epoch": 1.14, + "learning_rate": 0.00039545170481320645, + "loss": 4.2479, + "step": 157900 + }, + { + "epoch": 1.15, + "learning_rate": 0.00039544880485453085, + "loss": 4.2396, + "step": 158000 + }, + { + "epoch": 1.15, + "learning_rate": 0.00039544590489585526, + "loss": 4.2475, + "step": 158100 + }, + { + "epoch": 1.15, + "learning_rate": 0.00039544300493717967, + "loss": 4.2521, + "step": 158200 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003954401049785041, + "loss": 4.2605, + "step": 158300 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003954372050198285, + "loss": 4.2524, + "step": 158400 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003954343050611529, + "loss": 4.2557, + "step": 158500 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003954314051024773, + "loss": 4.2417, + "step": 158600 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003954285051438017, + "loss": 4.2463, + "step": 158700 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003954256051851261, + "loss": 4.2477, + "step": 158800 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003954227052264505, + "loss": 4.2471, + "step": 158900 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003954198052677749, + "loss": 4.2452, + "step": 159000 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003954169053090994, + "loss": 4.2543, + "step": 159100 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003954140053504238, + "loss": 4.2592, + "step": 159200 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003954111053917482, + "loss": 4.2399, + "step": 159300 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039540823443265935, + "loss": 4.2619, + "step": 159400 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039540533447398376, + "loss": 4.2622, + "step": 159500 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039540243451530817, + "loss": 4.2482, + "step": 159600 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003953995345566326, + "loss": 4.2518, + "step": 159700 + }, + { + "epoch": 1.16, + "learning_rate": 0.000395396634597957, + "loss": 4.2489, + "step": 159800 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003953937346392814, + "loss": 4.2451, + "step": 159900 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003953908346806058, + "loss": 4.2581, + "step": 160000 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039538793472193026, + "loss": 4.2473, + "step": 160100 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039538503476325466, + "loss": 4.2599, + "step": 160200 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039538213480457907, + "loss": 4.2561, + "step": 160300 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003953792348459035, + "loss": 4.2408, + "step": 160400 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003953763348872279, + "loss": 4.2504, + "step": 160500 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003953734349285523, + "loss": 4.2531, + "step": 160600 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003953705349698767, + "loss": 4.2596, + "step": 160700 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003953676350112011, + "loss": 4.2453, + "step": 160800 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003953647350525255, + "loss": 4.2489, + "step": 160900 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003953618350938499, + "loss": 4.2474, + "step": 161000 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003953589351351743, + "loss": 4.2477, + "step": 161100 + }, + { + "epoch": 1.17, + "learning_rate": 0.00039535603517649873, + "loss": 4.2698, + "step": 161200 + }, + { + "epoch": 1.17, + "learning_rate": 0.00039535313521782314, + "loss": 4.2399, + "step": 161300 + }, + { + "epoch": 1.17, + "learning_rate": 0.00039535023525914755, + "loss": 4.2424, + "step": 161400 + }, + { + "epoch": 1.17, + "learning_rate": 0.00039534733530047195, + "loss": 4.2503, + "step": 161500 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003953444353417964, + "loss": 4.2413, + "step": 161600 + }, + { + "epoch": 1.17, + "learning_rate": 0.00039534156438270757, + "loss": 4.2328, + "step": 161700 + }, + { + "epoch": 1.17, + "learning_rate": 0.000395338664424032, + "loss": 4.2463, + "step": 161800 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003953357644653564, + "loss": 4.265, + "step": 161900 + }, + { + "epoch": 1.17, + "learning_rate": 0.00039533289350626754, + "loss": 4.2594, + "step": 162000 + }, + { + "epoch": 1.18, + "learning_rate": 0.00039532999354759195, + "loss": 4.2401, + "step": 162100 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003953270935889164, + "loss": 4.2512, + "step": 162200 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003953241936302408, + "loss": 4.2466, + "step": 162300 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003953212936715652, + "loss": 4.2471, + "step": 162400 + }, + { + "epoch": 1.18, + "learning_rate": 0.00039531839371288963, + "loss": 4.2502, + "step": 162500 + }, + { + "epoch": 1.18, + "learning_rate": 0.00039531549375421403, + "loss": 4.255, + "step": 162600 + }, + { + "epoch": 1.18, + "learning_rate": 0.00039531259379553844, + "loss": 4.2443, + "step": 162700 + }, + { + "epoch": 1.18, + "learning_rate": 0.00039530969383686285, + "loss": 4.2465, + "step": 162800 + }, + { + "epoch": 1.18, + "learning_rate": 0.00039530679387818725, + "loss": 4.2448, + "step": 162900 + }, + { + "epoch": 1.18, + "learning_rate": 0.00039530389391951166, + "loss": 4.2576, + "step": 163000 + }, + { + "epoch": 1.18, + "learning_rate": 0.00039530099396083607, + "loss": 4.2583, + "step": 163100 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003952980940021605, + "loss": 4.2422, + "step": 163200 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003952951940434849, + "loss": 4.2529, + "step": 163300 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003952922940848093, + "loss": 4.2453, + "step": 163400 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003952893941261337, + "loss": 4.2632, + "step": 163500 + }, + { + "epoch": 1.19, + "learning_rate": 0.00039528649416745816, + "loss": 4.2557, + "step": 163600 + }, + { + "epoch": 1.19, + "learning_rate": 0.00039528359420878256, + "loss": 4.2465, + "step": 163700 + }, + { + "epoch": 1.19, + "learning_rate": 0.00039528069425010697, + "loss": 4.2573, + "step": 163800 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003952777942914314, + "loss": 4.258, + "step": 163900 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003952748943327558, + "loss": 4.2399, + "step": 164000 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003952719943740802, + "loss": 4.2495, + "step": 164100 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003952690944154046, + "loss": 4.2519, + "step": 164200 + }, + { + "epoch": 1.19, + "learning_rate": 0.000395266194456729, + "loss": 4.2527, + "step": 164300 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003952632944980534, + "loss": 4.2697, + "step": 164400 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003952603945393778, + "loss": 4.2511, + "step": 164500 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003952574945807022, + "loss": 4.2514, + "step": 164600 + }, + { + "epoch": 1.19, + "learning_rate": 0.00039525459462202663, + "loss": 4.2456, + "step": 164700 + }, + { + "epoch": 1.19, + "learning_rate": 0.00039525169466335104, + "loss": 4.2442, + "step": 164800 + }, + { + "epoch": 1.2, + "learning_rate": 0.00039524879470467545, + "loss": 4.2546, + "step": 164900 + }, + { + "epoch": 1.2, + "learning_rate": 0.00039524589474599985, + "loss": 4.2513, + "step": 165000 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003952429947873243, + "loss": 4.2516, + "step": 165100 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003952400948286487, + "loss": 4.2343, + "step": 165200 + }, + { + "epoch": 1.2, + "learning_rate": 0.00039523719486997313, + "loss": 4.2417, + "step": 165300 + }, + { + "epoch": 1.2, + "learning_rate": 0.00039523429491129754, + "loss": 4.252, + "step": 165400 + }, + { + "epoch": 1.2, + "learning_rate": 0.00039523139495262194, + "loss": 4.2574, + "step": 165500 + }, + { + "epoch": 1.2, + "learning_rate": 0.00039522849499394635, + "loss": 4.2377, + "step": 165600 + }, + { + "epoch": 1.2, + "learning_rate": 0.00039522559503527076, + "loss": 4.2505, + "step": 165700 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003952226950765952, + "loss": 4.2502, + "step": 165800 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003952197951179196, + "loss": 4.2452, + "step": 165900 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003952169241588307, + "loss": 4.2442, + "step": 166000 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003952140242001552, + "loss": 4.2651, + "step": 166100 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003952111242414796, + "loss": 4.2394, + "step": 166200 + }, + { + "epoch": 1.21, + "learning_rate": 0.000395208224282804, + "loss": 4.2423, + "step": 166300 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003952053243241284, + "loss": 4.2465, + "step": 166400 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003952024243654528, + "loss": 4.2552, + "step": 166500 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003951995244067772, + "loss": 4.2369, + "step": 166600 + }, + { + "epoch": 1.21, + "learning_rate": 0.00039519662444810163, + "loss": 4.2428, + "step": 166700 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003951937244894261, + "loss": 4.2376, + "step": 166800 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003951908245307505, + "loss": 4.2468, + "step": 166900 + }, + { + "epoch": 1.21, + "learning_rate": 0.00039518792457207485, + "loss": 4.2556, + "step": 167000 + }, + { + "epoch": 1.21, + "learning_rate": 0.00039518502461339926, + "loss": 4.2601, + "step": 167100 + }, + { + "epoch": 1.21, + "learning_rate": 0.00039518212465472366, + "loss": 4.2398, + "step": 167200 + }, + { + "epoch": 1.21, + "learning_rate": 0.00039517922469604807, + "loss": 4.2503, + "step": 167300 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003951763247373725, + "loss": 4.252, + "step": 167400 + }, + { + "epoch": 1.21, + "learning_rate": 0.00039517342477869694, + "loss": 4.2462, + "step": 167500 + }, + { + "epoch": 1.21, + "learning_rate": 0.00039517052482002134, + "loss": 4.2549, + "step": 167600 + }, + { + "epoch": 1.22, + "learning_rate": 0.00039516762486134575, + "loss": 4.2526, + "step": 167700 + }, + { + "epoch": 1.22, + "learning_rate": 0.00039516472490267016, + "loss": 4.2472, + "step": 167800 + }, + { + "epoch": 1.22, + "learning_rate": 0.00039516182494399456, + "loss": 4.251, + "step": 167900 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003951589539849057, + "loss": 4.2491, + "step": 168000 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003951560540262301, + "loss": 4.2455, + "step": 168100 + }, + { + "epoch": 1.22, + "learning_rate": 0.00039515315406755453, + "loss": 4.2562, + "step": 168200 + }, + { + "epoch": 1.22, + "learning_rate": 0.00039515025410887894, + "loss": 4.2486, + "step": 168300 + }, + { + "epoch": 1.22, + "learning_rate": 0.00039514735415020335, + "loss": 4.2489, + "step": 168400 + }, + { + "epoch": 1.22, + "learning_rate": 0.00039514445419152775, + "loss": 4.2482, + "step": 168500 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003951415542328522, + "loss": 4.2502, + "step": 168600 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003951386542741766, + "loss": 4.2531, + "step": 168700 + }, + { + "epoch": 1.22, + "learning_rate": 0.00039513575431550103, + "loss": 4.2576, + "step": 168800 + }, + { + "epoch": 1.22, + "learning_rate": 0.00039513285435682544, + "loss": 4.2488, + "step": 168900 + }, + { + "epoch": 1.23, + "learning_rate": 0.00039512995439814984, + "loss": 4.2498, + "step": 169000 + }, + { + "epoch": 1.23, + "learning_rate": 0.00039512705443947425, + "loss": 4.2599, + "step": 169100 + }, + { + "epoch": 1.23, + "learning_rate": 0.00039512415448079866, + "loss": 4.2467, + "step": 169200 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003951212545221231, + "loss": 4.2617, + "step": 169300 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003951183545634475, + "loss": 4.252, + "step": 169400 + }, + { + "epoch": 1.23, + "learning_rate": 0.00039511545460477193, + "loss": 4.2441, + "step": 169500 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003951125546460963, + "loss": 4.2484, + "step": 169600 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003951096546874207, + "loss": 4.2389, + "step": 169700 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003951067547287451, + "loss": 4.2483, + "step": 169800 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003951038547700695, + "loss": 4.2404, + "step": 169900 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003951009838109807, + "loss": 4.2543, + "step": 170000 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003950980838523051, + "loss": 4.2443, + "step": 170100 + }, + { + "epoch": 1.23, + "learning_rate": 0.00039509518389362953, + "loss": 4.2447, + "step": 170200 + }, + { + "epoch": 1.23, + "learning_rate": 0.000395092283934954, + "loss": 4.2425, + "step": 170300 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003950893839762784, + "loss": 4.2595, + "step": 170400 + }, + { + "epoch": 1.24, + "learning_rate": 0.00039508648401760275, + "loss": 4.2506, + "step": 170500 + }, + { + "epoch": 1.24, + "learning_rate": 0.00039508358405892716, + "loss": 4.2452, + "step": 170600 + }, + { + "epoch": 1.24, + "learning_rate": 0.00039508068410025156, + "loss": 4.2309, + "step": 170700 + }, + { + "epoch": 1.24, + "learning_rate": 0.00039507778414157597, + "loss": 4.2503, + "step": 170800 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003950748841829004, + "loss": 4.2473, + "step": 170900 + }, + { + "epoch": 1.24, + "learning_rate": 0.00039507198422422484, + "loss": 4.249, + "step": 171000 + }, + { + "epoch": 1.24, + "learning_rate": 0.00039506908426554924, + "loss": 4.2266, + "step": 171100 + }, + { + "epoch": 1.24, + "learning_rate": 0.00039506618430687365, + "loss": 4.2446, + "step": 171200 + }, + { + "epoch": 1.24, + "learning_rate": 0.00039506328434819806, + "loss": 4.2349, + "step": 171300 + }, + { + "epoch": 1.24, + "learning_rate": 0.00039506038438952247, + "loss": 4.2511, + "step": 171400 + }, + { + "epoch": 1.24, + "learning_rate": 0.00039505748443084687, + "loss": 4.2544, + "step": 171500 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003950545844721713, + "loss": 4.2381, + "step": 171600 + }, + { + "epoch": 1.24, + "learning_rate": 0.00039505168451349574, + "loss": 4.2493, + "step": 171700 + }, + { + "epoch": 1.25, + "learning_rate": 0.00039504878455482015, + "loss": 4.2547, + "step": 171800 + }, + { + "epoch": 1.25, + "learning_rate": 0.00039504588459614455, + "loss": 4.2496, + "step": 171900 + }, + { + "epoch": 1.25, + "learning_rate": 0.00039504298463746896, + "loss": 4.2503, + "step": 172000 + }, + { + "epoch": 1.25, + "learning_rate": 0.00039504008467879337, + "loss": 4.2579, + "step": 172100 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003950372137197045, + "loss": 4.2527, + "step": 172200 + }, + { + "epoch": 1.25, + "learning_rate": 0.00039503431376102893, + "loss": 4.2438, + "step": 172300 + }, + { + "epoch": 1.25, + "learning_rate": 0.00039503141380235334, + "loss": 4.2481, + "step": 172400 + }, + { + "epoch": 1.25, + "learning_rate": 0.00039502851384367774, + "loss": 4.2443, + "step": 172500 + }, + { + "epoch": 1.25, + "learning_rate": 0.00039502561388500215, + "loss": 4.2456, + "step": 172600 + }, + { + "epoch": 1.25, + "learning_rate": 0.00039502271392632656, + "loss": 4.243, + "step": 172700 + }, + { + "epoch": 1.25, + "learning_rate": 0.000395019813967651, + "loss": 4.2417, + "step": 172800 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003950169140089754, + "loss": 4.2558, + "step": 172900 + }, + { + "epoch": 1.25, + "learning_rate": 0.00039501401405029983, + "loss": 4.2401, + "step": 173000 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003950111140916242, + "loss": 4.238, + "step": 173100 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003950082141329486, + "loss": 4.2407, + "step": 173200 + }, + { + "epoch": 1.26, + "learning_rate": 0.000395005314174273, + "loss": 4.2553, + "step": 173300 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003950024142155974, + "loss": 4.2581, + "step": 173400 + }, + { + "epoch": 1.26, + "learning_rate": 0.00039499951425692187, + "loss": 4.2497, + "step": 173500 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003949966142982463, + "loss": 4.2464, + "step": 173600 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003949937143395707, + "loss": 4.2521, + "step": 173700 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003949908143808951, + "loss": 4.2417, + "step": 173800 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003949879144222195, + "loss": 4.2551, + "step": 173900 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003949850144635439, + "loss": 4.2321, + "step": 174000 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003949821145048683, + "loss": 4.2372, + "step": 174100 + }, + { + "epoch": 1.26, + "learning_rate": 0.00039497924354577946, + "loss": 4.2426, + "step": 174200 + }, + { + "epoch": 1.26, + "learning_rate": 0.00039497634358710387, + "loss": 4.2481, + "step": 174300 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003949734436284283, + "loss": 4.2485, + "step": 174400 + }, + { + "epoch": 1.27, + "learning_rate": 0.00039497054366975274, + "loss": 4.2411, + "step": 174500 + }, + { + "epoch": 1.27, + "learning_rate": 0.00039496764371107715, + "loss": 4.2329, + "step": 174600 + }, + { + "epoch": 1.27, + "learning_rate": 0.00039496474375240155, + "loss": 4.2445, + "step": 174700 + }, + { + "epoch": 1.27, + "learning_rate": 0.00039496184379372596, + "loss": 4.2483, + "step": 174800 + }, + { + "epoch": 1.27, + "learning_rate": 0.00039495894383505037, + "loss": 4.2478, + "step": 174900 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003949560438763748, + "loss": 4.2374, + "step": 175000 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003949531439176992, + "loss": 4.2471, + "step": 175100 + }, + { + "epoch": 1.27, + "learning_rate": 0.00039495024395902364, + "loss": 4.243, + "step": 175200 + }, + { + "epoch": 1.27, + "learning_rate": 0.00039494734400034805, + "loss": 4.2447, + "step": 175300 + }, + { + "epoch": 1.27, + "learning_rate": 0.00039494444404167246, + "loss": 4.2432, + "step": 175400 + }, + { + "epoch": 1.27, + "learning_rate": 0.00039494154408299686, + "loss": 4.2437, + "step": 175500 + }, + { + "epoch": 1.27, + "learning_rate": 0.00039493864412432127, + "loss": 4.237, + "step": 175600 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003949357441656457, + "loss": 4.2475, + "step": 175700 + }, + { + "epoch": 1.27, + "learning_rate": 0.00039493284420697003, + "loss": 4.2503, + "step": 175800 + }, + { + "epoch": 1.28, + "learning_rate": 0.00039492994424829444, + "loss": 4.2545, + "step": 175900 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003949270442896189, + "loss": 4.2475, + "step": 176000 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003949241443309433, + "loss": 4.2422, + "step": 176100 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003949212443722677, + "loss": 4.2499, + "step": 176200 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003949183444135921, + "loss": 4.2583, + "step": 176300 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003949154444549165, + "loss": 4.2398, + "step": 176400 + }, + { + "epoch": 1.28, + "learning_rate": 0.00039491254449624093, + "loss": 4.2461, + "step": 176500 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003949096735371521, + "loss": 4.2565, + "step": 176600 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003949067735784765, + "loss": 4.2535, + "step": 176700 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003949038736198009, + "loss": 4.247, + "step": 176800 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003949009736611253, + "loss": 4.2469, + "step": 176900 + }, + { + "epoch": 1.28, + "learning_rate": 0.00039489807370244977, + "loss": 4.2396, + "step": 177000 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003948951737437742, + "loss": 4.2375, + "step": 177100 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003948922737850986, + "loss": 4.2518, + "step": 177200 + }, + { + "epoch": 1.29, + "learning_rate": 0.000394889373826423, + "loss": 4.2501, + "step": 177300 + }, + { + "epoch": 1.29, + "learning_rate": 0.0003948864738677474, + "loss": 4.2533, + "step": 177400 + }, + { + "epoch": 1.29, + "learning_rate": 0.0003948835739090718, + "loss": 4.2508, + "step": 177500 + }, + { + "epoch": 1.29, + "learning_rate": 0.0003948806739503962, + "loss": 4.2594, + "step": 177600 + }, + { + "epoch": 1.29, + "learning_rate": 0.00039487777399172067, + "loss": 4.2615, + "step": 177700 + }, + { + "epoch": 1.29, + "learning_rate": 0.0003948748740330451, + "loss": 4.2342, + "step": 177800 + }, + { + "epoch": 1.29, + "learning_rate": 0.0003948719740743695, + "loss": 4.2494, + "step": 177900 + }, + { + "epoch": 1.29, + "learning_rate": 0.0003948690741156939, + "loss": 4.2422, + "step": 178000 + }, + { + "epoch": 1.29, + "learning_rate": 0.0003948661741570183, + "loss": 4.2537, + "step": 178100 + }, + { + "epoch": 1.29, + "learning_rate": 0.0003948632741983427, + "loss": 4.252, + "step": 178200 + }, + { + "epoch": 1.29, + "learning_rate": 0.0003948603742396671, + "loss": 4.2387, + "step": 178300 + }, + { + "epoch": 1.29, + "learning_rate": 0.0003948574742809915, + "loss": 4.2387, + "step": 178400 + }, + { + "epoch": 1.29, + "learning_rate": 0.0003948545743223159, + "loss": 4.2423, + "step": 178500 + }, + { + "epoch": 1.29, + "learning_rate": 0.00039485167436364033, + "loss": 4.2516, + "step": 178600 + }, + { + "epoch": 1.3, + "learning_rate": 0.00039484877440496474, + "loss": 4.2478, + "step": 178700 + }, + { + "epoch": 1.3, + "learning_rate": 0.00039484590344587595, + "loss": 4.2387, + "step": 178800 + }, + { + "epoch": 1.3, + "learning_rate": 0.00039484300348720036, + "loss": 4.2497, + "step": 178900 + }, + { + "epoch": 1.3, + "learning_rate": 0.00039484010352852476, + "loss": 4.2537, + "step": 179000 + }, + { + "epoch": 1.3, + "learning_rate": 0.00039483720356984917, + "loss": 4.2445, + "step": 179100 + }, + { + "epoch": 1.3, + "learning_rate": 0.0003948343036111736, + "loss": 4.2379, + "step": 179200 + }, + { + "epoch": 1.3, + "learning_rate": 0.00039483140365249793, + "loss": 4.2404, + "step": 179300 + }, + { + "epoch": 1.3, + "learning_rate": 0.00039482850369382234, + "loss": 4.2511, + "step": 179400 + }, + { + "epoch": 1.3, + "learning_rate": 0.0003948256037351468, + "loss": 4.241, + "step": 179500 + }, + { + "epoch": 1.3, + "learning_rate": 0.0003948227037764712, + "loss": 4.2438, + "step": 179600 + }, + { + "epoch": 1.3, + "learning_rate": 0.0003948198038177956, + "loss": 4.2575, + "step": 179700 + }, + { + "epoch": 1.3, + "learning_rate": 0.00039481690385912, + "loss": 4.2564, + "step": 179800 + }, + { + "epoch": 1.3, + "learning_rate": 0.0003948140039004444, + "loss": 4.2578, + "step": 179900 + }, + { + "epoch": 1.3, + "learning_rate": 0.00039481110394176883, + "loss": 4.2502, + "step": 180000 + }, + { + "epoch": 1.31, + "learning_rate": 0.00039480820398309324, + "loss": 4.2363, + "step": 180100 + }, + { + "epoch": 1.31, + "learning_rate": 0.0003948053040244177, + "loss": 4.2503, + "step": 180200 + }, + { + "epoch": 1.31, + "learning_rate": 0.0003948024040657421, + "loss": 4.2465, + "step": 180300 + }, + { + "epoch": 1.31, + "learning_rate": 0.0003947995041070665, + "loss": 4.232, + "step": 180400 + }, + { + "epoch": 1.31, + "learning_rate": 0.0003947966041483909, + "loss": 4.2513, + "step": 180500 + }, + { + "epoch": 1.31, + "learning_rate": 0.00039479370418971533, + "loss": 4.2469, + "step": 180600 + }, + { + "epoch": 1.31, + "learning_rate": 0.00039479080423103973, + "loss": 4.2332, + "step": 180700 + }, + { + "epoch": 1.31, + "learning_rate": 0.0003947879332719509, + "loss": 4.2381, + "step": 180800 + }, + { + "epoch": 1.31, + "learning_rate": 0.0003947850333132753, + "loss": 4.2465, + "step": 180900 + }, + { + "epoch": 1.31, + "learning_rate": 0.0003947821333545997, + "loss": 4.2518, + "step": 181000 + }, + { + "epoch": 1.31, + "learning_rate": 0.0003947792333959241, + "loss": 4.2401, + "step": 181100 + }, + { + "epoch": 1.31, + "learning_rate": 0.00039477633343724857, + "loss": 4.2408, + "step": 181200 + }, + { + "epoch": 1.31, + "learning_rate": 0.000394773433478573, + "loss": 4.2419, + "step": 181300 + }, + { + "epoch": 1.32, + "learning_rate": 0.0003947705335198974, + "loss": 4.2499, + "step": 181400 + }, + { + "epoch": 1.32, + "learning_rate": 0.0003947676335612218, + "loss": 4.2542, + "step": 181500 + }, + { + "epoch": 1.32, + "learning_rate": 0.0003947647336025462, + "loss": 4.2349, + "step": 181600 + }, + { + "epoch": 1.32, + "learning_rate": 0.0003947618336438706, + "loss": 4.2343, + "step": 181700 + }, + { + "epoch": 1.32, + "learning_rate": 0.000394758933685195, + "loss": 4.2483, + "step": 181800 + }, + { + "epoch": 1.32, + "learning_rate": 0.0003947560337265194, + "loss": 4.245, + "step": 181900 + }, + { + "epoch": 1.32, + "learning_rate": 0.00039475313376784383, + "loss": 4.2411, + "step": 182000 + }, + { + "epoch": 1.32, + "learning_rate": 0.00039475023380916823, + "loss": 4.2443, + "step": 182100 + }, + { + "epoch": 1.32, + "learning_rate": 0.00039474733385049264, + "loss": 4.2458, + "step": 182200 + }, + { + "epoch": 1.32, + "learning_rate": 0.00039474443389181705, + "loss": 4.2314, + "step": 182300 + }, + { + "epoch": 1.32, + "learning_rate": 0.00039474153393314145, + "loss": 4.2527, + "step": 182400 + }, + { + "epoch": 1.32, + "learning_rate": 0.00039473863397446586, + "loss": 4.2434, + "step": 182500 + }, + { + "epoch": 1.32, + "learning_rate": 0.0003947357340157903, + "loss": 4.2455, + "step": 182600 + }, + { + "epoch": 1.32, + "learning_rate": 0.00039473283405711473, + "loss": 4.2465, + "step": 182700 + }, + { + "epoch": 1.33, + "learning_rate": 0.00039472993409843914, + "loss": 4.235, + "step": 182800 + }, + { + "epoch": 1.33, + "learning_rate": 0.00039472706313935024, + "loss": 4.2439, + "step": 182900 + }, + { + "epoch": 1.33, + "learning_rate": 0.0003947241631806747, + "loss": 4.2463, + "step": 183000 + }, + { + "epoch": 1.33, + "learning_rate": 0.0003947212632219991, + "loss": 4.2397, + "step": 183100 + }, + { + "epoch": 1.33, + "learning_rate": 0.0003947183632633235, + "loss": 4.2323, + "step": 183200 + }, + { + "epoch": 1.33, + "learning_rate": 0.0003947154633046479, + "loss": 4.2362, + "step": 183300 + }, + { + "epoch": 1.33, + "learning_rate": 0.0003947125633459723, + "loss": 4.2381, + "step": 183400 + }, + { + "epoch": 1.33, + "learning_rate": 0.00039470966338729673, + "loss": 4.2362, + "step": 183500 + }, + { + "epoch": 1.33, + "learning_rate": 0.00039470676342862114, + "loss": 4.2463, + "step": 183600 + }, + { + "epoch": 1.33, + "learning_rate": 0.0003947038634699456, + "loss": 4.2469, + "step": 183700 + }, + { + "epoch": 1.33, + "learning_rate": 0.00039470096351127, + "loss": 4.2345, + "step": 183800 + }, + { + "epoch": 1.33, + "learning_rate": 0.0003946980635525944, + "loss": 4.2353, + "step": 183900 + }, + { + "epoch": 1.33, + "learning_rate": 0.0003946951635939188, + "loss": 4.2511, + "step": 184000 + }, + { + "epoch": 1.33, + "learning_rate": 0.00039469226363524323, + "loss": 4.2479, + "step": 184100 + }, + { + "epoch": 1.34, + "learning_rate": 0.00039468936367656764, + "loss": 4.2371, + "step": 184200 + }, + { + "epoch": 1.34, + "learning_rate": 0.00039468646371789204, + "loss": 4.2472, + "step": 184300 + }, + { + "epoch": 1.34, + "learning_rate": 0.00039468356375921645, + "loss": 4.2206, + "step": 184400 + }, + { + "epoch": 1.34, + "learning_rate": 0.00039468066380054086, + "loss": 4.246, + "step": 184500 + }, + { + "epoch": 1.34, + "learning_rate": 0.00039467776384186526, + "loss": 4.2367, + "step": 184600 + }, + { + "epoch": 1.34, + "learning_rate": 0.00039467486388318967, + "loss": 4.2343, + "step": 184700 + }, + { + "epoch": 1.34, + "learning_rate": 0.0003946719639245141, + "loss": 4.2406, + "step": 184800 + }, + { + "epoch": 1.34, + "learning_rate": 0.0003946690639658385, + "loss": 4.2375, + "step": 184900 + }, + { + "epoch": 1.34, + "learning_rate": 0.0003946661640071629, + "loss": 4.238, + "step": 185000 + }, + { + "epoch": 1.34, + "learning_rate": 0.00039466326404848735, + "loss": 4.2403, + "step": 185100 + }, + { + "epoch": 1.34, + "learning_rate": 0.0003946603930893985, + "loss": 4.2545, + "step": 185200 + }, + { + "epoch": 1.34, + "learning_rate": 0.0003946574931307229, + "loss": 4.2324, + "step": 185300 + }, + { + "epoch": 1.34, + "learning_rate": 0.0003946545931720473, + "loss": 4.2307, + "step": 185400 + }, + { + "epoch": 1.34, + "learning_rate": 0.00039465169321337173, + "loss": 4.2425, + "step": 185500 + }, + { + "epoch": 1.35, + "learning_rate": 0.00039464879325469614, + "loss": 4.2389, + "step": 185600 + }, + { + "epoch": 1.35, + "learning_rate": 0.00039464589329602054, + "loss": 4.2422, + "step": 185700 + }, + { + "epoch": 1.35, + "learning_rate": 0.00039464299333734495, + "loss": 4.2504, + "step": 185800 + }, + { + "epoch": 1.35, + "learning_rate": 0.00039464009337866936, + "loss": 4.2449, + "step": 185900 + }, + { + "epoch": 1.35, + "learning_rate": 0.00039463719341999376, + "loss": 4.2461, + "step": 186000 + }, + { + "epoch": 1.35, + "learning_rate": 0.0003946342934613182, + "loss": 4.2384, + "step": 186100 + }, + { + "epoch": 1.35, + "learning_rate": 0.00039463139350264263, + "loss": 4.2495, + "step": 186200 + }, + { + "epoch": 1.35, + "learning_rate": 0.00039462849354396704, + "loss": 4.2372, + "step": 186300 + }, + { + "epoch": 1.35, + "learning_rate": 0.00039462559358529144, + "loss": 4.2443, + "step": 186400 + }, + { + "epoch": 1.35, + "learning_rate": 0.00039462269362661585, + "loss": 4.2348, + "step": 186500 + }, + { + "epoch": 1.35, + "learning_rate": 0.00039461979366794026, + "loss": 4.2503, + "step": 186600 + }, + { + "epoch": 1.35, + "learning_rate": 0.00039461689370926467, + "loss": 4.2285, + "step": 186700 + }, + { + "epoch": 1.35, + "learning_rate": 0.00039461399375058907, + "loss": 4.2288, + "step": 186800 + }, + { + "epoch": 1.35, + "learning_rate": 0.0003946110937919135, + "loss": 4.2437, + "step": 186900 + }, + { + "epoch": 1.36, + "learning_rate": 0.0003946081938332379, + "loss": 4.2417, + "step": 187000 + }, + { + "epoch": 1.36, + "learning_rate": 0.0003946052938745623, + "loss": 4.2582, + "step": 187100 + }, + { + "epoch": 1.36, + "learning_rate": 0.0003946023939158867, + "loss": 4.2373, + "step": 187200 + }, + { + "epoch": 1.36, + "learning_rate": 0.0003945994939572111, + "loss": 4.2327, + "step": 187300 + }, + { + "epoch": 1.36, + "learning_rate": 0.0003945966229981223, + "loss": 4.2433, + "step": 187400 + }, + { + "epoch": 1.36, + "learning_rate": 0.0003945937230394467, + "loss": 4.2409, + "step": 187500 + }, + { + "epoch": 1.36, + "learning_rate": 0.00039459082308077113, + "loss": 4.2376, + "step": 187600 + }, + { + "epoch": 1.36, + "learning_rate": 0.00039458792312209554, + "loss": 4.2481, + "step": 187700 + }, + { + "epoch": 1.36, + "learning_rate": 0.00039458502316341994, + "loss": 4.2359, + "step": 187800 + }, + { + "epoch": 1.36, + "learning_rate": 0.00039458212320474435, + "loss": 4.2465, + "step": 187900 + }, + { + "epoch": 1.36, + "learning_rate": 0.00039457922324606876, + "loss": 4.2432, + "step": 188000 + }, + { + "epoch": 1.36, + "learning_rate": 0.00039457632328739316, + "loss": 4.2548, + "step": 188100 + }, + { + "epoch": 1.36, + "learning_rate": 0.00039457342332871757, + "loss": 4.2376, + "step": 188200 + }, + { + "epoch": 1.37, + "learning_rate": 0.000394570523370042, + "loss": 4.2375, + "step": 188300 + }, + { + "epoch": 1.37, + "learning_rate": 0.0003945676234113664, + "loss": 4.2419, + "step": 188400 + }, + { + "epoch": 1.37, + "learning_rate": 0.0003945647234526908, + "loss": 4.2399, + "step": 188500 + }, + { + "epoch": 1.37, + "learning_rate": 0.00039456182349401525, + "loss": 4.2453, + "step": 188600 + }, + { + "epoch": 1.37, + "learning_rate": 0.00039455892353533966, + "loss": 4.245, + "step": 188700 + }, + { + "epoch": 1.37, + "learning_rate": 0.00039455602357666407, + "loss": 4.251, + "step": 188800 + }, + { + "epoch": 1.37, + "learning_rate": 0.0003945531236179885, + "loss": 4.2343, + "step": 188900 + }, + { + "epoch": 1.37, + "learning_rate": 0.0003945502236593129, + "loss": 4.251, + "step": 189000 + }, + { + "epoch": 1.37, + "learning_rate": 0.0003945473237006373, + "loss": 4.248, + "step": 189100 + }, + { + "epoch": 1.37, + "learning_rate": 0.0003945444237419617, + "loss": 4.2409, + "step": 189200 + }, + { + "epoch": 1.37, + "learning_rate": 0.0003945415237832861, + "loss": 4.2433, + "step": 189300 + }, + { + "epoch": 1.37, + "learning_rate": 0.0003945386238246105, + "loss": 4.2262, + "step": 189400 + }, + { + "epoch": 1.37, + "learning_rate": 0.0003945357238659349, + "loss": 4.2595, + "step": 189500 + }, + { + "epoch": 1.37, + "learning_rate": 0.0003945328239072593, + "loss": 4.2491, + "step": 189600 + }, + { + "epoch": 1.38, + "learning_rate": 0.00039452995294817053, + "loss": 4.2435, + "step": 189700 + }, + { + "epoch": 1.38, + "learning_rate": 0.00039452705298949494, + "loss": 4.251, + "step": 189800 + }, + { + "epoch": 1.38, + "learning_rate": 0.00039452415303081935, + "loss": 4.2327, + "step": 189900 + }, + { + "epoch": 1.38, + "learning_rate": 0.00039452125307214375, + "loss": 4.2446, + "step": 190000 + }, + { + "epoch": 1.38, + "learning_rate": 0.00039451835311346816, + "loss": 4.2381, + "step": 190100 + }, + { + "epoch": 1.38, + "learning_rate": 0.00039451545315479257, + "loss": 4.2438, + "step": 190200 + }, + { + "epoch": 1.38, + "learning_rate": 0.000394512553196117, + "loss": 4.2417, + "step": 190300 + }, + { + "epoch": 1.38, + "learning_rate": 0.0003945096532374414, + "loss": 4.2311, + "step": 190400 + }, + { + "epoch": 1.38, + "learning_rate": 0.0003945067532787658, + "loss": 4.2484, + "step": 190500 + }, + { + "epoch": 1.38, + "learning_rate": 0.0003945038533200902, + "loss": 4.248, + "step": 190600 + }, + { + "epoch": 1.38, + "learning_rate": 0.0003945009533614146, + "loss": 4.2416, + "step": 190700 + }, + { + "epoch": 1.38, + "learning_rate": 0.000394498053402739, + "loss": 4.2397, + "step": 190800 + }, + { + "epoch": 1.38, + "learning_rate": 0.0003944951534440634, + "loss": 4.2414, + "step": 190900 + }, + { + "epoch": 1.38, + "learning_rate": 0.0003944922534853878, + "loss": 4.2362, + "step": 191000 + }, + { + "epoch": 1.39, + "learning_rate": 0.0003944893535267123, + "loss": 4.2453, + "step": 191100 + }, + { + "epoch": 1.39, + "learning_rate": 0.0003944864535680367, + "loss": 4.2485, + "step": 191200 + }, + { + "epoch": 1.39, + "learning_rate": 0.0003944835536093611, + "loss": 4.2283, + "step": 191300 + }, + { + "epoch": 1.39, + "learning_rate": 0.0003944806536506855, + "loss": 4.234, + "step": 191400 + }, + { + "epoch": 1.39, + "learning_rate": 0.0003944777536920099, + "loss": 4.2374, + "step": 191500 + }, + { + "epoch": 1.39, + "learning_rate": 0.0003944748537333343, + "loss": 4.2372, + "step": 191600 + }, + { + "epoch": 1.39, + "learning_rate": 0.0003944719537746587, + "loss": 4.2455, + "step": 191700 + }, + { + "epoch": 1.39, + "learning_rate": 0.0003944690538159832, + "loss": 4.2393, + "step": 191800 + }, + { + "epoch": 1.39, + "learning_rate": 0.0003944661828568943, + "loss": 4.2455, + "step": 191900 + }, + { + "epoch": 1.39, + "learning_rate": 0.0003944632828982187, + "loss": 4.2444, + "step": 192000 + }, + { + "epoch": 1.39, + "learning_rate": 0.00039446038293954315, + "loss": 4.2436, + "step": 192100 + }, + { + "epoch": 1.39, + "learning_rate": 0.00039445748298086756, + "loss": 4.2468, + "step": 192200 + }, + { + "epoch": 1.39, + "learning_rate": 0.00039445458302219197, + "loss": 4.2339, + "step": 192300 + }, + { + "epoch": 1.39, + "learning_rate": 0.0003944516830635164, + "loss": 4.2444, + "step": 192400 + }, + { + "epoch": 1.4, + "learning_rate": 0.0003944487831048408, + "loss": 4.2364, + "step": 192500 + }, + { + "epoch": 1.4, + "learning_rate": 0.0003944458831461652, + "loss": 4.2414, + "step": 192600 + }, + { + "epoch": 1.4, + "learning_rate": 0.0003944429831874896, + "loss": 4.2432, + "step": 192700 + }, + { + "epoch": 1.4, + "learning_rate": 0.000394440083228814, + "loss": 4.2395, + "step": 192800 + }, + { + "epoch": 1.4, + "learning_rate": 0.0003944371832701384, + "loss": 4.2493, + "step": 192900 + }, + { + "epoch": 1.4, + "learning_rate": 0.0003944342833114628, + "loss": 4.2294, + "step": 193000 + }, + { + "epoch": 1.4, + "learning_rate": 0.0003944313833527872, + "loss": 4.2443, + "step": 193100 + }, + { + "epoch": 1.4, + "learning_rate": 0.00039442848339411163, + "loss": 4.2432, + "step": 193200 + }, + { + "epoch": 1.4, + "learning_rate": 0.00039442558343543604, + "loss": 4.2491, + "step": 193300 + }, + { + "epoch": 1.4, + "learning_rate": 0.00039442268347676044, + "loss": 4.2449, + "step": 193400 + }, + { + "epoch": 1.4, + "learning_rate": 0.0003944197835180849, + "loss": 4.2501, + "step": 193500 + }, + { + "epoch": 1.4, + "learning_rate": 0.0003944168835594093, + "loss": 4.246, + "step": 193600 + }, + { + "epoch": 1.4, + "learning_rate": 0.0003944139836007337, + "loss": 4.2438, + "step": 193700 + }, + { + "epoch": 1.4, + "learning_rate": 0.0003944110836420581, + "loss": 4.2383, + "step": 193800 + }, + { + "epoch": 1.41, + "learning_rate": 0.00039440818368338253, + "loss": 4.2495, + "step": 193900 + }, + { + "epoch": 1.41, + "learning_rate": 0.00039440528372470694, + "loss": 4.2485, + "step": 194000 + }, + { + "epoch": 1.41, + "learning_rate": 0.0003944024127656181, + "loss": 4.2516, + "step": 194100 + }, + { + "epoch": 1.41, + "learning_rate": 0.0003943995128069425, + "loss": 4.247, + "step": 194200 + }, + { + "epoch": 1.41, + "learning_rate": 0.0003943966128482669, + "loss": 4.2355, + "step": 194300 + }, + { + "epoch": 1.41, + "learning_rate": 0.0003943937128895913, + "loss": 4.2426, + "step": 194400 + }, + { + "epoch": 1.41, + "learning_rate": 0.0003943908129309157, + "loss": 4.2326, + "step": 194500 + }, + { + "epoch": 1.41, + "learning_rate": 0.0003943879129722402, + "loss": 4.2439, + "step": 194600 + }, + { + "epoch": 1.41, + "learning_rate": 0.0003943850130135646, + "loss": 4.2482, + "step": 194700 + }, + { + "epoch": 1.41, + "learning_rate": 0.000394382113054889, + "loss": 4.2383, + "step": 194800 + }, + { + "epoch": 1.41, + "learning_rate": 0.0003943792130962134, + "loss": 4.237, + "step": 194900 + }, + { + "epoch": 1.41, + "learning_rate": 0.0003943763131375378, + "loss": 4.2469, + "step": 195000 + }, + { + "epoch": 1.41, + "learning_rate": 0.00039437344217844897, + "loss": 4.2335, + "step": 195100 + }, + { + "epoch": 1.42, + "learning_rate": 0.0003943705422197734, + "loss": 4.2309, + "step": 195200 + }, + { + "epoch": 1.42, + "learning_rate": 0.0003943676422610978, + "loss": 4.2539, + "step": 195300 + }, + { + "epoch": 1.42, + "learning_rate": 0.0003943647423024222, + "loss": 4.2412, + "step": 195400 + }, + { + "epoch": 1.42, + "learning_rate": 0.0003943618423437466, + "loss": 4.2427, + "step": 195500 + }, + { + "epoch": 1.42, + "learning_rate": 0.00039435894238507106, + "loss": 4.2354, + "step": 195600 + }, + { + "epoch": 1.42, + "learning_rate": 0.00039435604242639546, + "loss": 4.2345, + "step": 195700 + }, + { + "epoch": 1.42, + "learning_rate": 0.00039435314246771987, + "loss": 4.2424, + "step": 195800 + }, + { + "epoch": 1.42, + "learning_rate": 0.0003943502425090443, + "loss": 4.222, + "step": 195900 + }, + { + "epoch": 1.42, + "learning_rate": 0.0003943473425503687, + "loss": 4.247, + "step": 196000 + }, + { + "epoch": 1.42, + "learning_rate": 0.0003943444425916931, + "loss": 4.2434, + "step": 196100 + }, + { + "epoch": 1.42, + "learning_rate": 0.0003943415426330175, + "loss": 4.2339, + "step": 196200 + }, + { + "epoch": 1.42, + "learning_rate": 0.0003943386426743419, + "loss": 4.2446, + "step": 196300 + }, + { + "epoch": 1.42, + "learning_rate": 0.0003943357427156663, + "loss": 4.2428, + "step": 196400 + }, + { + "epoch": 1.42, + "learning_rate": 0.0003943328427569907, + "loss": 4.2415, + "step": 196500 + }, + { + "epoch": 1.43, + "learning_rate": 0.0003943299427983151, + "loss": 4.2372, + "step": 196600 + }, + { + "epoch": 1.43, + "learning_rate": 0.00039432704283963953, + "loss": 4.2347, + "step": 196700 + }, + { + "epoch": 1.43, + "learning_rate": 0.00039432414288096394, + "loss": 4.2416, + "step": 196800 + }, + { + "epoch": 1.43, + "learning_rate": 0.00039432124292228834, + "loss": 4.234, + "step": 196900 + }, + { + "epoch": 1.43, + "learning_rate": 0.0003943183429636128, + "loss": 4.2457, + "step": 197000 + }, + { + "epoch": 1.43, + "learning_rate": 0.0003943154430049372, + "loss": 4.2304, + "step": 197100 + }, + { + "epoch": 1.43, + "learning_rate": 0.0003943125430462616, + "loss": 4.2424, + "step": 197200 + }, + { + "epoch": 1.43, + "learning_rate": 0.000394309643087586, + "loss": 4.2469, + "step": 197300 + }, + { + "epoch": 1.43, + "learning_rate": 0.00039430674312891043, + "loss": 4.246, + "step": 197400 + }, + { + "epoch": 1.43, + "learning_rate": 0.00039430384317023484, + "loss": 4.2446, + "step": 197500 + }, + { + "epoch": 1.43, + "learning_rate": 0.00039430094321155925, + "loss": 4.2214, + "step": 197600 + }, + { + "epoch": 1.43, + "learning_rate": 0.0003942980432528837, + "loss": 4.2304, + "step": 197700 + }, + { + "epoch": 1.43, + "learning_rate": 0.0003942951432942081, + "loss": 4.2399, + "step": 197800 + }, + { + "epoch": 1.43, + "learning_rate": 0.0003942922433355325, + "loss": 4.2276, + "step": 197900 + }, + { + "epoch": 1.44, + "learning_rate": 0.0003942893433768569, + "loss": 4.2269, + "step": 198000 + }, + { + "epoch": 1.44, + "learning_rate": 0.0003942864434181813, + "loss": 4.2476, + "step": 198100 + }, + { + "epoch": 1.44, + "learning_rate": 0.0003942835434595057, + "loss": 4.2372, + "step": 198200 + }, + { + "epoch": 1.44, + "learning_rate": 0.0003942806435008301, + "loss": 4.2312, + "step": 198300 + }, + { + "epoch": 1.44, + "learning_rate": 0.00039427774354215456, + "loss": 4.2332, + "step": 198400 + }, + { + "epoch": 1.44, + "learning_rate": 0.00039427484358347896, + "loss": 4.2284, + "step": 198500 + }, + { + "epoch": 1.44, + "learning_rate": 0.0003942719726243901, + "loss": 4.2366, + "step": 198600 + }, + { + "epoch": 1.44, + "learning_rate": 0.0003942690726657145, + "loss": 4.2303, + "step": 198700 + }, + { + "epoch": 1.44, + "learning_rate": 0.000394266172707039, + "loss": 4.2492, + "step": 198800 + }, + { + "epoch": 1.44, + "learning_rate": 0.00039426327274836334, + "loss": 4.2397, + "step": 198900 + }, + { + "epoch": 1.44, + "learning_rate": 0.00039426037278968775, + "loss": 4.2377, + "step": 199000 + }, + { + "epoch": 1.44, + "learning_rate": 0.00039425747283101215, + "loss": 4.2412, + "step": 199100 + }, + { + "epoch": 1.44, + "learning_rate": 0.00039425457287233656, + "loss": 4.2473, + "step": 199200 + }, + { + "epoch": 1.44, + "learning_rate": 0.00039425167291366097, + "loss": 4.2414, + "step": 199300 + }, + { + "epoch": 1.45, + "learning_rate": 0.0003942487729549854, + "loss": 4.2353, + "step": 199400 + }, + { + "epoch": 1.45, + "learning_rate": 0.00039424587299630984, + "loss": 4.2498, + "step": 199500 + }, + { + "epoch": 1.45, + "learning_rate": 0.00039424297303763424, + "loss": 4.2437, + "step": 199600 + }, + { + "epoch": 1.45, + "learning_rate": 0.00039424007307895865, + "loss": 4.2436, + "step": 199700 + }, + { + "epoch": 1.45, + "learning_rate": 0.00039423717312028306, + "loss": 4.2223, + "step": 199800 + }, + { + "epoch": 1.45, + "learning_rate": 0.00039423427316160746, + "loss": 4.2379, + "step": 199900 + }, + { + "epoch": 1.45, + "learning_rate": 0.00039423137320293187, + "loss": 4.241, + "step": 200000 + }, + { + "epoch": 1.45, + "learning_rate": 0.0003942284732442563, + "loss": 4.2418, + "step": 200100 + }, + { + "epoch": 1.45, + "learning_rate": 0.00039422557328558074, + "loss": 4.232, + "step": 200200 + }, + { + "epoch": 1.45, + "learning_rate": 0.00039422267332690514, + "loss": 4.2411, + "step": 200300 + }, + { + "epoch": 1.45, + "learning_rate": 0.00039421977336822955, + "loss": 4.224, + "step": 200400 + }, + { + "epoch": 1.45, + "learning_rate": 0.00039421687340955396, + "loss": 4.2369, + "step": 200500 + }, + { + "epoch": 1.45, + "learning_rate": 0.0003942139734508783, + "loss": 4.2296, + "step": 200600 + }, + { + "epoch": 1.45, + "learning_rate": 0.0003942110734922027, + "loss": 4.2469, + "step": 200700 + }, + { + "epoch": 1.46, + "learning_rate": 0.0003942081735335271, + "loss": 4.239, + "step": 200800 + }, + { + "epoch": 1.46, + "learning_rate": 0.0003942052735748516, + "loss": 4.2417, + "step": 200900 + }, + { + "epoch": 1.46, + "learning_rate": 0.000394202373616176, + "loss": 4.2231, + "step": 201000 + }, + { + "epoch": 1.46, + "learning_rate": 0.0003941994736575004, + "loss": 4.2336, + "step": 201100 + }, + { + "epoch": 1.46, + "learning_rate": 0.0003941965736988248, + "loss": 4.2381, + "step": 201200 + }, + { + "epoch": 1.46, + "learning_rate": 0.0003941936737401492, + "loss": 4.2288, + "step": 201300 + }, + { + "epoch": 1.46, + "learning_rate": 0.0003941907737814736, + "loss": 4.2203, + "step": 201400 + }, + { + "epoch": 1.46, + "learning_rate": 0.000394187873822798, + "loss": 4.2356, + "step": 201500 + }, + { + "epoch": 1.46, + "learning_rate": 0.0003941849738641225, + "loss": 4.2297, + "step": 201600 + }, + { + "epoch": 1.46, + "learning_rate": 0.0003941820739054469, + "loss": 4.2499, + "step": 201700 + }, + { + "epoch": 1.46, + "learning_rate": 0.0003941791739467713, + "loss": 4.2437, + "step": 201800 + }, + { + "epoch": 1.46, + "learning_rate": 0.0003941762739880957, + "loss": 4.244, + "step": 201900 + }, + { + "epoch": 1.46, + "learning_rate": 0.0003941733740294201, + "loss": 4.2298, + "step": 202000 + }, + { + "epoch": 1.47, + "learning_rate": 0.0003941704740707445, + "loss": 4.2339, + "step": 202100 + }, + { + "epoch": 1.47, + "learning_rate": 0.00039416757411206893, + "loss": 4.2423, + "step": 202200 + }, + { + "epoch": 1.47, + "learning_rate": 0.00039416467415339334, + "loss": 4.2519, + "step": 202300 + }, + { + "epoch": 1.47, + "learning_rate": 0.00039416177419471774, + "loss": 4.2359, + "step": 202400 + }, + { + "epoch": 1.47, + "learning_rate": 0.00039415887423604215, + "loss": 4.2326, + "step": 202500 + }, + { + "epoch": 1.47, + "learning_rate": 0.00039415597427736656, + "loss": 4.2396, + "step": 202600 + }, + { + "epoch": 1.47, + "learning_rate": 0.00039415310331827777, + "loss": 4.2349, + "step": 202700 + }, + { + "epoch": 1.47, + "learning_rate": 0.0003941502033596022, + "loss": 4.2423, + "step": 202800 + }, + { + "epoch": 1.47, + "learning_rate": 0.0003941473034009266, + "loss": 4.238, + "step": 202900 + }, + { + "epoch": 1.47, + "learning_rate": 0.000394144403442251, + "loss": 4.229, + "step": 203000 + }, + { + "epoch": 1.47, + "learning_rate": 0.0003941415034835754, + "loss": 4.2441, + "step": 203100 + }, + { + "epoch": 1.47, + "learning_rate": 0.0003941386035248998, + "loss": 4.2345, + "step": 203200 + }, + { + "epoch": 1.47, + "learning_rate": 0.00039413570356622415, + "loss": 4.2351, + "step": 203300 + }, + { + "epoch": 1.47, + "learning_rate": 0.0003941328036075486, + "loss": 4.2282, + "step": 203400 + }, + { + "epoch": 1.48, + "learning_rate": 0.000394129903648873, + "loss": 4.2345, + "step": 203500 + }, + { + "epoch": 1.48, + "learning_rate": 0.00039412700369019743, + "loss": 4.2342, + "step": 203600 + }, + { + "epoch": 1.48, + "learning_rate": 0.00039412410373152184, + "loss": 4.2292, + "step": 203700 + }, + { + "epoch": 1.48, + "learning_rate": 0.00039412120377284624, + "loss": 4.236, + "step": 203800 + }, + { + "epoch": 1.48, + "learning_rate": 0.00039411830381417065, + "loss": 4.2461, + "step": 203900 + }, + { + "epoch": 1.48, + "learning_rate": 0.00039411540385549506, + "loss": 4.2365, + "step": 204000 + }, + { + "epoch": 1.48, + "learning_rate": 0.0003941125038968195, + "loss": 4.2419, + "step": 204100 + }, + { + "epoch": 1.48, + "learning_rate": 0.0003941096039381439, + "loss": 4.2321, + "step": 204200 + }, + { + "epoch": 1.48, + "learning_rate": 0.00039410670397946833, + "loss": 4.2413, + "step": 204300 + }, + { + "epoch": 1.48, + "learning_rate": 0.00039410380402079274, + "loss": 4.2307, + "step": 204400 + }, + { + "epoch": 1.48, + "learning_rate": 0.00039410090406211714, + "loss": 4.239, + "step": 204500 + }, + { + "epoch": 1.48, + "learning_rate": 0.00039409800410344155, + "loss": 4.2339, + "step": 204600 + }, + { + "epoch": 1.48, + "learning_rate": 0.0003940951331443527, + "loss": 4.2236, + "step": 204700 + }, + { + "epoch": 1.48, + "learning_rate": 0.0003940922331856771, + "loss": 4.2333, + "step": 204800 + }, + { + "epoch": 1.49, + "learning_rate": 0.0003940893332270015, + "loss": 4.2352, + "step": 204900 + }, + { + "epoch": 1.49, + "learning_rate": 0.00039408643326832593, + "loss": 4.2356, + "step": 205000 + }, + { + "epoch": 1.49, + "learning_rate": 0.0003940835333096504, + "loss": 4.2215, + "step": 205100 + }, + { + "epoch": 1.49, + "learning_rate": 0.0003940806333509748, + "loss": 4.2324, + "step": 205200 + }, + { + "epoch": 1.49, + "learning_rate": 0.0003940777333922992, + "loss": 4.224, + "step": 205300 + }, + { + "epoch": 1.49, + "learning_rate": 0.0003940748334336236, + "loss": 4.2293, + "step": 205400 + }, + { + "epoch": 1.49, + "learning_rate": 0.000394071933474948, + "loss": 4.2332, + "step": 205500 + }, + { + "epoch": 1.49, + "learning_rate": 0.0003940690335162724, + "loss": 4.231, + "step": 205600 + }, + { + "epoch": 1.49, + "learning_rate": 0.00039406613355759683, + "loss": 4.243, + "step": 205700 + }, + { + "epoch": 1.49, + "learning_rate": 0.00039406323359892124, + "loss": 4.2456, + "step": 205800 + }, + { + "epoch": 1.49, + "learning_rate": 0.00039406033364024564, + "loss": 4.2336, + "step": 205900 + }, + { + "epoch": 1.49, + "learning_rate": 0.00039405743368157005, + "loss": 4.2268, + "step": 206000 + }, + { + "epoch": 1.49, + "learning_rate": 0.00039405453372289446, + "loss": 4.2345, + "step": 206100 + }, + { + "epoch": 1.49, + "learning_rate": 0.00039405163376421886, + "loss": 4.245, + "step": 206200 + }, + { + "epoch": 1.5, + "learning_rate": 0.00039404873380554327, + "loss": 4.231, + "step": 206300 + }, + { + "epoch": 1.5, + "learning_rate": 0.0003940458338468677, + "loss": 4.2425, + "step": 206400 + }, + { + "epoch": 1.5, + "learning_rate": 0.00039404293388819214, + "loss": 4.2317, + "step": 206500 + }, + { + "epoch": 1.5, + "learning_rate": 0.00039404003392951655, + "loss": 4.2469, + "step": 206600 + }, + { + "epoch": 1.5, + "learning_rate": 0.00039403713397084095, + "loss": 4.2289, + "step": 206700 + }, + { + "epoch": 1.5, + "learning_rate": 0.00039403426301175206, + "loss": 4.2345, + "step": 206800 + }, + { + "epoch": 1.5, + "learning_rate": 0.0003940313630530765, + "loss": 4.2437, + "step": 206900 + }, + { + "epoch": 1.5, + "learning_rate": 0.0003940284630944009, + "loss": 4.2487, + "step": 207000 + }, + { + "epoch": 1.5, + "learning_rate": 0.00039402556313572533, + "loss": 4.2278, + "step": 207100 + }, + { + "epoch": 1.5, + "learning_rate": 0.00039402266317704974, + "loss": 4.2416, + "step": 207200 + }, + { + "epoch": 1.5, + "learning_rate": 0.00039401976321837414, + "loss": 4.2374, + "step": 207300 + }, + { + "epoch": 1.5, + "learning_rate": 0.00039401686325969855, + "loss": 4.2125, + "step": 207400 + }, + { + "epoch": 1.5, + "learning_rate": 0.00039401396330102296, + "loss": 4.2443, + "step": 207500 + }, + { + "epoch": 1.5, + "learning_rate": 0.0003940110633423474, + "loss": 4.2315, + "step": 207600 + }, + { + "epoch": 1.51, + "learning_rate": 0.0003940081633836718, + "loss": 4.2317, + "step": 207700 + }, + { + "epoch": 1.51, + "learning_rate": 0.00039400526342499623, + "loss": 4.247, + "step": 207800 + }, + { + "epoch": 1.51, + "learning_rate": 0.0003940023924659074, + "loss": 4.2553, + "step": 207900 + }, + { + "epoch": 1.51, + "learning_rate": 0.0003939994925072318, + "loss": 4.2419, + "step": 208000 + }, + { + "epoch": 1.51, + "learning_rate": 0.0003939965925485562, + "loss": 4.2336, + "step": 208100 + }, + { + "epoch": 1.51, + "learning_rate": 0.0003939936925898806, + "loss": 4.2333, + "step": 208200 + }, + { + "epoch": 1.51, + "learning_rate": 0.000393990792631205, + "loss": 4.2327, + "step": 208300 + }, + { + "epoch": 1.51, + "learning_rate": 0.0003939878926725294, + "loss": 4.2234, + "step": 208400 + }, + { + "epoch": 1.51, + "learning_rate": 0.00039398499271385383, + "loss": 4.2528, + "step": 208500 + }, + { + "epoch": 1.51, + "learning_rate": 0.0003939820927551783, + "loss": 4.2527, + "step": 208600 + }, + { + "epoch": 1.51, + "learning_rate": 0.0003939791927965027, + "loss": 4.2363, + "step": 208700 + }, + { + "epoch": 1.51, + "learning_rate": 0.0003939762928378271, + "loss": 4.2296, + "step": 208800 + }, + { + "epoch": 1.51, + "learning_rate": 0.0003939733928791515, + "loss": 4.2398, + "step": 208900 + }, + { + "epoch": 1.52, + "learning_rate": 0.0003939704929204759, + "loss": 4.2339, + "step": 209000 + }, + { + "epoch": 1.52, + "learning_rate": 0.0003939675929618003, + "loss": 4.2326, + "step": 209100 + }, + { + "epoch": 1.52, + "learning_rate": 0.00039396469300312473, + "loss": 4.2301, + "step": 209200 + }, + { + "epoch": 1.52, + "learning_rate": 0.00039396179304444914, + "loss": 4.2418, + "step": 209300 + }, + { + "epoch": 1.52, + "learning_rate": 0.00039395889308577355, + "loss": 4.2343, + "step": 209400 + }, + { + "epoch": 1.52, + "learning_rate": 0.00039395599312709795, + "loss": 4.2402, + "step": 209500 + }, + { + "epoch": 1.52, + "learning_rate": 0.00039395309316842236, + "loss": 4.235, + "step": 209600 + }, + { + "epoch": 1.52, + "learning_rate": 0.00039395019320974677, + "loss": 4.2345, + "step": 209700 + }, + { + "epoch": 1.52, + "learning_rate": 0.0003939472932510712, + "loss": 4.2195, + "step": 209800 + }, + { + "epoch": 1.52, + "learning_rate": 0.0003939443932923956, + "loss": 4.2402, + "step": 209900 + }, + { + "epoch": 1.52, + "learning_rate": 0.00039394149333372004, + "loss": 4.2376, + "step": 210000 + }, + { + "epoch": 1.52, + "learning_rate": 0.00039393859337504445, + "loss": 4.2339, + "step": 210100 + }, + { + "epoch": 1.52, + "learning_rate": 0.00039393569341636885, + "loss": 4.2332, + "step": 210200 + }, + { + "epoch": 1.52, + "learning_rate": 0.00039393279345769326, + "loss": 4.2462, + "step": 210300 + }, + { + "epoch": 1.53, + "learning_rate": 0.00039392989349901767, + "loss": 4.2319, + "step": 210400 + }, + { + "epoch": 1.53, + "learning_rate": 0.0003939269935403421, + "loss": 4.2314, + "step": 210500 + }, + { + "epoch": 1.53, + "learning_rate": 0.0003939240935816665, + "loss": 4.2521, + "step": 210600 + }, + { + "epoch": 1.53, + "learning_rate": 0.0003939211936229909, + "loss": 4.2572, + "step": 210700 + }, + { + "epoch": 1.53, + "learning_rate": 0.0003939182936643153, + "loss": 4.2359, + "step": 210800 + }, + { + "epoch": 1.53, + "learning_rate": 0.0003939153937056397, + "loss": 4.2251, + "step": 210900 + }, + { + "epoch": 1.53, + "learning_rate": 0.0003939124937469641, + "loss": 4.224, + "step": 211000 + }, + { + "epoch": 1.53, + "learning_rate": 0.0003939095937882885, + "loss": 4.2269, + "step": 211100 + }, + { + "epoch": 1.53, + "learning_rate": 0.0003939066938296129, + "loss": 4.2426, + "step": 211200 + }, + { + "epoch": 1.53, + "learning_rate": 0.00039390379387093733, + "loss": 4.2305, + "step": 211300 + }, + { + "epoch": 1.53, + "learning_rate": 0.00039390089391226174, + "loss": 4.2288, + "step": 211400 + }, + { + "epoch": 1.53, + "learning_rate": 0.0003938979939535862, + "loss": 4.2415, + "step": 211500 + }, + { + "epoch": 1.53, + "learning_rate": 0.0003938950939949106, + "loss": 4.2227, + "step": 211600 + }, + { + "epoch": 1.53, + "learning_rate": 0.000393892194036235, + "loss": 4.2342, + "step": 211700 + }, + { + "epoch": 1.54, + "learning_rate": 0.0003938892940775594, + "loss": 4.2318, + "step": 211800 + }, + { + "epoch": 1.54, + "learning_rate": 0.0003938863941188838, + "loss": 4.2499, + "step": 211900 + }, + { + "epoch": 1.54, + "learning_rate": 0.00039388349416020823, + "loss": 4.2326, + "step": 212000 + }, + { + "epoch": 1.54, + "learning_rate": 0.00039388059420153264, + "loss": 4.2411, + "step": 212100 + }, + { + "epoch": 1.54, + "learning_rate": 0.0003938776942428571, + "loss": 4.2339, + "step": 212200 + }, + { + "epoch": 1.54, + "learning_rate": 0.0003938747942841815, + "loss": 4.2337, + "step": 212300 + }, + { + "epoch": 1.54, + "learning_rate": 0.0003938719233250926, + "loss": 4.2333, + "step": 212400 + }, + { + "epoch": 1.54, + "learning_rate": 0.00039386902336641707, + "loss": 4.2422, + "step": 212500 + }, + { + "epoch": 1.54, + "learning_rate": 0.0003938661234077415, + "loss": 4.233, + "step": 212600 + }, + { + "epoch": 1.54, + "learning_rate": 0.0003938632234490659, + "loss": 4.234, + "step": 212700 + }, + { + "epoch": 1.54, + "learning_rate": 0.0003938603234903903, + "loss": 4.2346, + "step": 212800 + }, + { + "epoch": 1.54, + "learning_rate": 0.0003938574235317147, + "loss": 4.2429, + "step": 212900 + }, + { + "epoch": 1.54, + "learning_rate": 0.0003938545235730391, + "loss": 4.237, + "step": 213000 + }, + { + "epoch": 1.54, + "learning_rate": 0.0003938516236143635, + "loss": 4.2218, + "step": 213100 + }, + { + "epoch": 1.55, + "learning_rate": 0.00039384872365568797, + "loss": 4.2279, + "step": 213200 + }, + { + "epoch": 1.55, + "learning_rate": 0.0003938458236970123, + "loss": 4.2316, + "step": 213300 + }, + { + "epoch": 1.55, + "learning_rate": 0.00039384292373833673, + "loss": 4.2474, + "step": 213400 + }, + { + "epoch": 1.55, + "learning_rate": 0.00039384002377966114, + "loss": 4.2375, + "step": 213500 + }, + { + "epoch": 1.55, + "learning_rate": 0.00039383712382098555, + "loss": 4.237, + "step": 213600 + }, + { + "epoch": 1.55, + "learning_rate": 0.00039383422386230995, + "loss": 4.2349, + "step": 213700 + }, + { + "epoch": 1.55, + "learning_rate": 0.00039383132390363436, + "loss": 4.2437, + "step": 213800 + }, + { + "epoch": 1.55, + "learning_rate": 0.0003938284239449588, + "loss": 4.2312, + "step": 213900 + }, + { + "epoch": 1.55, + "learning_rate": 0.00039382552398628323, + "loss": 4.2361, + "step": 214000 + }, + { + "epoch": 1.55, + "learning_rate": 0.00039382262402760763, + "loss": 4.2213, + "step": 214100 + }, + { + "epoch": 1.55, + "learning_rate": 0.00039381972406893204, + "loss": 4.2217, + "step": 214200 + }, + { + "epoch": 1.55, + "learning_rate": 0.00039381682411025645, + "loss": 4.2384, + "step": 214300 + }, + { + "epoch": 1.55, + "learning_rate": 0.0003938139531511676, + "loss": 4.2504, + "step": 214400 + }, + { + "epoch": 1.55, + "learning_rate": 0.000393811053192492, + "loss": 4.231, + "step": 214500 + }, + { + "epoch": 1.56, + "learning_rate": 0.0003938081532338164, + "loss": 4.233, + "step": 214600 + }, + { + "epoch": 1.56, + "learning_rate": 0.0003938052532751408, + "loss": 4.2442, + "step": 214700 + }, + { + "epoch": 1.56, + "learning_rate": 0.00039380235331646523, + "loss": 4.2481, + "step": 214800 + }, + { + "epoch": 1.56, + "learning_rate": 0.00039379945335778964, + "loss": 4.2263, + "step": 214900 + }, + { + "epoch": 1.56, + "learning_rate": 0.0003937965533991141, + "loss": 4.2362, + "step": 215000 + }, + { + "epoch": 1.56, + "learning_rate": 0.0003937936534404385, + "loss": 4.2322, + "step": 215100 + }, + { + "epoch": 1.56, + "learning_rate": 0.0003937907534817629, + "loss": 4.2345, + "step": 215200 + }, + { + "epoch": 1.56, + "learning_rate": 0.0003937878535230873, + "loss": 4.2298, + "step": 215300 + }, + { + "epoch": 1.56, + "learning_rate": 0.0003937849535644117, + "loss": 4.2262, + "step": 215400 + }, + { + "epoch": 1.56, + "learning_rate": 0.00039378205360573613, + "loss": 4.2196, + "step": 215500 + }, + { + "epoch": 1.56, + "learning_rate": 0.00039377915364706054, + "loss": 4.234, + "step": 215600 + }, + { + "epoch": 1.56, + "learning_rate": 0.000393776253688385, + "loss": 4.2379, + "step": 215700 + }, + { + "epoch": 1.56, + "learning_rate": 0.0003937733537297094, + "loss": 4.2434, + "step": 215800 + }, + { + "epoch": 1.57, + "learning_rate": 0.00039377045377103376, + "loss": 4.2361, + "step": 215900 + }, + { + "epoch": 1.57, + "learning_rate": 0.00039376755381235817, + "loss": 4.2286, + "step": 216000 + }, + { + "epoch": 1.57, + "learning_rate": 0.0003937646538536826, + "loss": 4.2322, + "step": 216100 + }, + { + "epoch": 1.57, + "learning_rate": 0.000393761753895007, + "loss": 4.2278, + "step": 216200 + }, + { + "epoch": 1.57, + "learning_rate": 0.0003937588539363314, + "loss": 4.2328, + "step": 216300 + }, + { + "epoch": 1.57, + "learning_rate": 0.00039375595397765585, + "loss": 4.2343, + "step": 216400 + }, + { + "epoch": 1.57, + "learning_rate": 0.00039375305401898026, + "loss": 4.2419, + "step": 216500 + }, + { + "epoch": 1.57, + "learning_rate": 0.0003937501830598914, + "loss": 4.2303, + "step": 216600 + }, + { + "epoch": 1.57, + "learning_rate": 0.0003937472831012159, + "loss": 4.2266, + "step": 216700 + }, + { + "epoch": 1.57, + "learning_rate": 0.0003937443831425402, + "loss": 4.2287, + "step": 216800 + }, + { + "epoch": 1.57, + "learning_rate": 0.00039374148318386463, + "loss": 4.2296, + "step": 216900 + }, + { + "epoch": 1.57, + "learning_rate": 0.00039373858322518904, + "loss": 4.2313, + "step": 217000 + }, + { + "epoch": 1.57, + "learning_rate": 0.00039373568326651345, + "loss": 4.2308, + "step": 217100 + }, + { + "epoch": 1.57, + "learning_rate": 0.00039373278330783785, + "loss": 4.2106, + "step": 217200 + }, + { + "epoch": 1.58, + "learning_rate": 0.00039372988334916226, + "loss": 4.2291, + "step": 217300 + }, + { + "epoch": 1.58, + "learning_rate": 0.0003937269833904867, + "loss": 4.2263, + "step": 217400 + }, + { + "epoch": 1.58, + "learning_rate": 0.00039372408343181113, + "loss": 4.2415, + "step": 217500 + }, + { + "epoch": 1.58, + "learning_rate": 0.00039372118347313554, + "loss": 4.2271, + "step": 217600 + }, + { + "epoch": 1.58, + "learning_rate": 0.00039371828351445994, + "loss": 4.2382, + "step": 217700 + }, + { + "epoch": 1.58, + "learning_rate": 0.00039371538355578435, + "loss": 4.2244, + "step": 217800 + }, + { + "epoch": 1.58, + "learning_rate": 0.00039371248359710876, + "loss": 4.245, + "step": 217900 + }, + { + "epoch": 1.58, + "learning_rate": 0.00039370958363843316, + "loss": 4.2287, + "step": 218000 + }, + { + "epoch": 1.58, + "learning_rate": 0.0003937066836797576, + "loss": 4.2371, + "step": 218100 + }, + { + "epoch": 1.58, + "learning_rate": 0.00039370378372108203, + "loss": 4.2309, + "step": 218200 + }, + { + "epoch": 1.58, + "learning_rate": 0.00039370088376240644, + "loss": 4.2295, + "step": 218300 + }, + { + "epoch": 1.58, + "learning_rate": 0.00039369798380373084, + "loss": 4.2329, + "step": 218400 + }, + { + "epoch": 1.58, + "learning_rate": 0.0003936950838450552, + "loss": 4.2414, + "step": 218500 + }, + { + "epoch": 1.58, + "learning_rate": 0.0003936921838863796, + "loss": 4.2279, + "step": 218600 + }, + { + "epoch": 1.59, + "learning_rate": 0.0003936893129272908, + "loss": 4.2257, + "step": 218700 + }, + { + "epoch": 1.59, + "learning_rate": 0.0003936864129686152, + "loss": 4.2244, + "step": 218800 + }, + { + "epoch": 1.59, + "learning_rate": 0.00039368351300993963, + "loss": 4.2432, + "step": 218900 + }, + { + "epoch": 1.59, + "learning_rate": 0.00039368061305126404, + "loss": 4.2399, + "step": 219000 + }, + { + "epoch": 1.59, + "learning_rate": 0.00039367771309258844, + "loss": 4.2405, + "step": 219100 + }, + { + "epoch": 1.59, + "learning_rate": 0.0003936748131339129, + "loss": 4.2273, + "step": 219200 + }, + { + "epoch": 1.59, + "learning_rate": 0.0003936719131752373, + "loss": 4.2454, + "step": 219300 + }, + { + "epoch": 1.59, + "learning_rate": 0.00039366901321656166, + "loss": 4.2263, + "step": 219400 + }, + { + "epoch": 1.59, + "learning_rate": 0.00039366611325788607, + "loss": 4.2314, + "step": 219500 + }, + { + "epoch": 1.59, + "learning_rate": 0.0003936632132992105, + "loss": 4.2361, + "step": 219600 + }, + { + "epoch": 1.59, + "learning_rate": 0.0003936603133405349, + "loss": 4.233, + "step": 219700 + }, + { + "epoch": 1.59, + "learning_rate": 0.0003936574133818593, + "loss": 4.2425, + "step": 219800 + }, + { + "epoch": 1.59, + "learning_rate": 0.00039365451342318375, + "loss": 4.2325, + "step": 219900 + }, + { + "epoch": 1.59, + "learning_rate": 0.00039365161346450816, + "loss": 4.237, + "step": 220000 + }, + { + "epoch": 1.6, + "learning_rate": 0.00039364871350583256, + "loss": 4.2324, + "step": 220100 + }, + { + "epoch": 1.6, + "learning_rate": 0.00039364581354715697, + "loss": 4.2249, + "step": 220200 + }, + { + "epoch": 1.6, + "learning_rate": 0.0003936429135884814, + "loss": 4.2393, + "step": 220300 + }, + { + "epoch": 1.6, + "learning_rate": 0.0003936400136298058, + "loss": 4.2343, + "step": 220400 + }, + { + "epoch": 1.6, + "learning_rate": 0.0003936371136711302, + "loss": 4.2241, + "step": 220500 + }, + { + "epoch": 1.6, + "learning_rate": 0.00039363421371245465, + "loss": 4.2289, + "step": 220600 + }, + { + "epoch": 1.6, + "learning_rate": 0.00039363131375377906, + "loss": 4.2338, + "step": 220700 + }, + { + "epoch": 1.6, + "learning_rate": 0.00039362844279469016, + "loss": 4.2318, + "step": 220800 + }, + { + "epoch": 1.6, + "learning_rate": 0.0003936255428360146, + "loss": 4.2356, + "step": 220900 + }, + { + "epoch": 1.6, + "learning_rate": 0.00039362264287733903, + "loss": 4.2347, + "step": 221000 + }, + { + "epoch": 1.6, + "learning_rate": 0.00039361974291866344, + "loss": 4.235, + "step": 221100 + }, + { + "epoch": 1.6, + "learning_rate": 0.00039361684295998784, + "loss": 4.2321, + "step": 221200 + }, + { + "epoch": 1.6, + "learning_rate": 0.00039361394300131225, + "loss": 4.2346, + "step": 221300 + }, + { + "epoch": 1.61, + "learning_rate": 0.00039361104304263666, + "loss": 4.2392, + "step": 221400 + }, + { + "epoch": 1.61, + "learning_rate": 0.00039360814308396106, + "loss": 4.2293, + "step": 221500 + }, + { + "epoch": 1.61, + "learning_rate": 0.0003936052431252855, + "loss": 4.2344, + "step": 221600 + }, + { + "epoch": 1.61, + "learning_rate": 0.00039360234316660993, + "loss": 4.2169, + "step": 221700 + }, + { + "epoch": 1.61, + "learning_rate": 0.00039359944320793434, + "loss": 4.2392, + "step": 221800 + }, + { + "epoch": 1.61, + "learning_rate": 0.00039359654324925875, + "loss": 4.2456, + "step": 221900 + }, + { + "epoch": 1.61, + "learning_rate": 0.0003935936432905831, + "loss": 4.2333, + "step": 222000 + }, + { + "epoch": 1.61, + "learning_rate": 0.0003935907433319075, + "loss": 4.2249, + "step": 222100 + }, + { + "epoch": 1.61, + "learning_rate": 0.0003935878433732319, + "loss": 4.2384, + "step": 222200 + }, + { + "epoch": 1.61, + "learning_rate": 0.0003935849434145563, + "loss": 4.2257, + "step": 222300 + }, + { + "epoch": 1.61, + "learning_rate": 0.0003935820434558808, + "loss": 4.242, + "step": 222400 + }, + { + "epoch": 1.61, + "learning_rate": 0.0003935791434972052, + "loss": 4.2391, + "step": 222500 + }, + { + "epoch": 1.61, + "learning_rate": 0.0003935762435385296, + "loss": 4.2357, + "step": 222600 + }, + { + "epoch": 1.61, + "learning_rate": 0.000393573343579854, + "loss": 4.2296, + "step": 222700 + }, + { + "epoch": 1.62, + "learning_rate": 0.0003935704436211784, + "loss": 4.2468, + "step": 222800 + }, + { + "epoch": 1.62, + "learning_rate": 0.0003935675436625028, + "loss": 4.2324, + "step": 222900 + }, + { + "epoch": 1.62, + "learning_rate": 0.0003935646437038272, + "loss": 4.2309, + "step": 223000 + }, + { + "epoch": 1.62, + "learning_rate": 0.0003935617727447384, + "loss": 4.2184, + "step": 223100 + }, + { + "epoch": 1.62, + "learning_rate": 0.0003935588727860628, + "loss": 4.2363, + "step": 223200 + }, + { + "epoch": 1.62, + "learning_rate": 0.0003935559728273872, + "loss": 4.2352, + "step": 223300 + }, + { + "epoch": 1.62, + "learning_rate": 0.00039355307286871165, + "loss": 4.2389, + "step": 223400 + }, + { + "epoch": 1.62, + "learning_rate": 0.00039355017291003606, + "loss": 4.2157, + "step": 223500 + }, + { + "epoch": 1.62, + "learning_rate": 0.00039354727295136047, + "loss": 4.234, + "step": 223600 + }, + { + "epoch": 1.62, + "learning_rate": 0.0003935443729926849, + "loss": 4.2363, + "step": 223700 + }, + { + "epoch": 1.62, + "learning_rate": 0.0003935414730340093, + "loss": 4.2245, + "step": 223800 + }, + { + "epoch": 1.62, + "learning_rate": 0.0003935385730753337, + "loss": 4.2468, + "step": 223900 + }, + { + "epoch": 1.62, + "learning_rate": 0.0003935356731166581, + "loss": 4.2331, + "step": 224000 + }, + { + "epoch": 1.62, + "learning_rate": 0.00039353277315798255, + "loss": 4.2273, + "step": 224100 + }, + { + "epoch": 1.63, + "learning_rate": 0.00039352987319930696, + "loss": 4.2436, + "step": 224200 + }, + { + "epoch": 1.63, + "learning_rate": 0.00039352700224021806, + "loss": 4.2224, + "step": 224300 + }, + { + "epoch": 1.63, + "learning_rate": 0.0003935241022815425, + "loss": 4.2405, + "step": 224400 + }, + { + "epoch": 1.63, + "learning_rate": 0.00039352120232286693, + "loss": 4.239, + "step": 224500 + }, + { + "epoch": 1.63, + "learning_rate": 0.00039351830236419134, + "loss": 4.2395, + "step": 224600 + }, + { + "epoch": 1.63, + "learning_rate": 0.00039351540240551574, + "loss": 4.2378, + "step": 224700 + }, + { + "epoch": 1.63, + "learning_rate": 0.00039351250244684015, + "loss": 4.2323, + "step": 224800 + }, + { + "epoch": 1.63, + "learning_rate": 0.00039350960248816456, + "loss": 4.2291, + "step": 224900 + }, + { + "epoch": 1.63, + "learning_rate": 0.0003935067315290757, + "loss": 4.2433, + "step": 225000 + }, + { + "epoch": 1.63, + "learning_rate": 0.0003935038315704001, + "loss": 4.2343, + "step": 225100 + }, + { + "epoch": 1.63, + "learning_rate": 0.00039350093161172453, + "loss": 4.2314, + "step": 225200 + }, + { + "epoch": 1.63, + "learning_rate": 0.00039349803165304894, + "loss": 4.2277, + "step": 225300 + }, + { + "epoch": 1.63, + "learning_rate": 0.00039349513169437334, + "loss": 4.2319, + "step": 225400 + }, + { + "epoch": 1.63, + "learning_rate": 0.0003934922317356978, + "loss": 4.2411, + "step": 225500 + }, + { + "epoch": 1.64, + "learning_rate": 0.0003934893317770222, + "loss": 4.2248, + "step": 225600 + }, + { + "epoch": 1.64, + "learning_rate": 0.0003934864318183466, + "loss": 4.2377, + "step": 225700 + }, + { + "epoch": 1.64, + "learning_rate": 0.000393483531859671, + "loss": 4.221, + "step": 225800 + }, + { + "epoch": 1.64, + "learning_rate": 0.00039348063190099543, + "loss": 4.2296, + "step": 225900 + }, + { + "epoch": 1.64, + "learning_rate": 0.00039347773194231984, + "loss": 4.2163, + "step": 226000 + }, + { + "epoch": 1.64, + "learning_rate": 0.00039347483198364424, + "loss": 4.2379, + "step": 226100 + }, + { + "epoch": 1.64, + "learning_rate": 0.0003934719320249687, + "loss": 4.2291, + "step": 226200 + }, + { + "epoch": 1.64, + "learning_rate": 0.0003934690320662931, + "loss": 4.2275, + "step": 226300 + }, + { + "epoch": 1.64, + "learning_rate": 0.00039346613210761746, + "loss": 4.2278, + "step": 226400 + }, + { + "epoch": 1.64, + "learning_rate": 0.00039346323214894187, + "loss": 4.239, + "step": 226500 + }, + { + "epoch": 1.64, + "learning_rate": 0.0003934603321902663, + "loss": 4.2242, + "step": 226600 + }, + { + "epoch": 1.64, + "learning_rate": 0.0003934574322315907, + "loss": 4.2281, + "step": 226700 + }, + { + "epoch": 1.64, + "learning_rate": 0.0003934545322729151, + "loss": 4.2382, + "step": 226800 + }, + { + "epoch": 1.64, + "learning_rate": 0.00039345163231423955, + "loss": 4.2375, + "step": 226900 + }, + { + "epoch": 1.65, + "learning_rate": 0.00039344873235556396, + "loss": 4.2289, + "step": 227000 + }, + { + "epoch": 1.65, + "learning_rate": 0.00039344583239688837, + "loss": 4.2295, + "step": 227100 + }, + { + "epoch": 1.65, + "learning_rate": 0.0003934429324382128, + "loss": 4.2306, + "step": 227200 + }, + { + "epoch": 1.65, + "learning_rate": 0.0003934400324795372, + "loss": 4.2231, + "step": 227300 + }, + { + "epoch": 1.65, + "learning_rate": 0.0003934371325208616, + "loss": 4.2139, + "step": 227400 + }, + { + "epoch": 1.65, + "learning_rate": 0.000393434232562186, + "loss": 4.2382, + "step": 227500 + }, + { + "epoch": 1.65, + "learning_rate": 0.00039343133260351046, + "loss": 4.2306, + "step": 227600 + }, + { + "epoch": 1.65, + "learning_rate": 0.00039342843264483486, + "loss": 4.2278, + "step": 227700 + }, + { + "epoch": 1.65, + "learning_rate": 0.00039342553268615927, + "loss": 4.2354, + "step": 227800 + }, + { + "epoch": 1.65, + "learning_rate": 0.0003934226327274837, + "loss": 4.2277, + "step": 227900 + }, + { + "epoch": 1.65, + "learning_rate": 0.0003934197327688081, + "loss": 4.2312, + "step": 228000 + }, + { + "epoch": 1.65, + "learning_rate": 0.00039341683281013244, + "loss": 4.2366, + "step": 228100 + }, + { + "epoch": 1.65, + "learning_rate": 0.00039341393285145684, + "loss": 4.2361, + "step": 228200 + }, + { + "epoch": 1.66, + "learning_rate": 0.0003934110328927813, + "loss": 4.2283, + "step": 228300 + }, + { + "epoch": 1.66, + "learning_rate": 0.0003934081329341057, + "loss": 4.2341, + "step": 228400 + }, + { + "epoch": 1.66, + "learning_rate": 0.0003934052329754301, + "loss": 4.2399, + "step": 228500 + }, + { + "epoch": 1.66, + "learning_rate": 0.0003934023330167545, + "loss": 4.2355, + "step": 228600 + }, + { + "epoch": 1.66, + "learning_rate": 0.00039339943305807893, + "loss": 4.225, + "step": 228700 + }, + { + "epoch": 1.66, + "learning_rate": 0.00039339653309940334, + "loss": 4.2189, + "step": 228800 + }, + { + "epoch": 1.66, + "learning_rate": 0.00039339363314072775, + "loss": 4.2301, + "step": 228900 + }, + { + "epoch": 1.66, + "learning_rate": 0.0003933907331820522, + "loss": 4.2339, + "step": 229000 + }, + { + "epoch": 1.66, + "learning_rate": 0.0003933878332233766, + "loss": 4.245, + "step": 229100 + }, + { + "epoch": 1.66, + "learning_rate": 0.000393384933264701, + "loss": 4.2378, + "step": 229200 + }, + { + "epoch": 1.66, + "learning_rate": 0.0003933820333060254, + "loss": 4.2221, + "step": 229300 + }, + { + "epoch": 1.66, + "learning_rate": 0.00039337913334734983, + "loss": 4.2356, + "step": 229400 + }, + { + "epoch": 1.66, + "learning_rate": 0.00039337623338867424, + "loss": 4.2163, + "step": 229500 + }, + { + "epoch": 1.66, + "learning_rate": 0.00039337333342999865, + "loss": 4.2289, + "step": 229600 + }, + { + "epoch": 1.67, + "learning_rate": 0.00039337043347132305, + "loss": 4.2284, + "step": 229700 + }, + { + "epoch": 1.67, + "learning_rate": 0.00039336753351264746, + "loss": 4.2371, + "step": 229800 + }, + { + "epoch": 1.67, + "learning_rate": 0.0003933646625535586, + "loss": 4.2255, + "step": 229900 + }, + { + "epoch": 1.67, + "learning_rate": 0.000393361762594883, + "loss": 4.2097, + "step": 230000 + }, + { + "epoch": 1.67, + "learning_rate": 0.0003933588626362075, + "loss": 4.2304, + "step": 230100 + }, + { + "epoch": 1.67, + "learning_rate": 0.0003933559626775319, + "loss": 4.2308, + "step": 230200 + }, + { + "epoch": 1.67, + "learning_rate": 0.0003933530627188563, + "loss": 4.2288, + "step": 230300 + }, + { + "epoch": 1.67, + "learning_rate": 0.0003933501627601807, + "loss": 4.2227, + "step": 230400 + }, + { + "epoch": 1.67, + "learning_rate": 0.0003933472628015051, + "loss": 4.2218, + "step": 230500 + }, + { + "epoch": 1.67, + "learning_rate": 0.0003933443628428295, + "loss": 4.2297, + "step": 230600 + }, + { + "epoch": 1.67, + "learning_rate": 0.0003933414628841539, + "loss": 4.2276, + "step": 230700 + }, + { + "epoch": 1.67, + "learning_rate": 0.00039333856292547833, + "loss": 4.2367, + "step": 230800 + }, + { + "epoch": 1.67, + "learning_rate": 0.00039333566296680274, + "loss": 4.23, + "step": 230900 + }, + { + "epoch": 1.67, + "learning_rate": 0.00039333276300812715, + "loss": 4.2248, + "step": 231000 + }, + { + "epoch": 1.68, + "learning_rate": 0.00039332986304945155, + "loss": 4.239, + "step": 231100 + }, + { + "epoch": 1.68, + "learning_rate": 0.00039332696309077596, + "loss": 4.2214, + "step": 231200 + }, + { + "epoch": 1.68, + "learning_rate": 0.00039332406313210037, + "loss": 4.2281, + "step": 231300 + }, + { + "epoch": 1.68, + "learning_rate": 0.0003933211631734248, + "loss": 4.2197, + "step": 231400 + }, + { + "epoch": 1.68, + "learning_rate": 0.00039331826321474924, + "loss": 4.2234, + "step": 231500 + }, + { + "epoch": 1.68, + "learning_rate": 0.00039331536325607364, + "loss": 4.2262, + "step": 231600 + }, + { + "epoch": 1.68, + "learning_rate": 0.00039331246329739805, + "loss": 4.2361, + "step": 231700 + }, + { + "epoch": 1.68, + "learning_rate": 0.00039330956333872246, + "loss": 4.2284, + "step": 231800 + }, + { + "epoch": 1.68, + "learning_rate": 0.00039330666338004686, + "loss": 4.2252, + "step": 231900 + }, + { + "epoch": 1.68, + "learning_rate": 0.00039330376342137127, + "loss": 4.2284, + "step": 232000 + }, + { + "epoch": 1.68, + "learning_rate": 0.0003933008634626957, + "loss": 4.2286, + "step": 232100 + }, + { + "epoch": 1.68, + "learning_rate": 0.0003932979635040201, + "loss": 4.2302, + "step": 232200 + }, + { + "epoch": 1.68, + "learning_rate": 0.0003932950635453445, + "loss": 4.2365, + "step": 232300 + }, + { + "epoch": 1.68, + "learning_rate": 0.0003932921635866689, + "loss": 4.231, + "step": 232400 + }, + { + "epoch": 1.69, + "learning_rate": 0.0003932892636279933, + "loss": 4.2278, + "step": 232500 + }, + { + "epoch": 1.69, + "learning_rate": 0.0003932863636693177, + "loss": 4.216, + "step": 232600 + }, + { + "epoch": 1.69, + "learning_rate": 0.0003932834637106421, + "loss": 4.239, + "step": 232700 + }, + { + "epoch": 1.69, + "learning_rate": 0.0003932805637519665, + "loss": 4.2304, + "step": 232800 + }, + { + "epoch": 1.69, + "learning_rate": 0.000393277663793291, + "loss": 4.2207, + "step": 232900 + }, + { + "epoch": 1.69, + "learning_rate": 0.0003932747638346154, + "loss": 4.2322, + "step": 233000 + }, + { + "epoch": 1.69, + "learning_rate": 0.0003932718638759398, + "loss": 4.2309, + "step": 233100 + }, + { + "epoch": 1.69, + "learning_rate": 0.0003932689639172642, + "loss": 4.226, + "step": 233200 + }, + { + "epoch": 1.69, + "learning_rate": 0.0003932660639585886, + "loss": 4.2244, + "step": 233300 + }, + { + "epoch": 1.69, + "learning_rate": 0.000393263163999913, + "loss": 4.231, + "step": 233400 + }, + { + "epoch": 1.69, + "learning_rate": 0.00039326026404123743, + "loss": 4.226, + "step": 233500 + }, + { + "epoch": 1.69, + "learning_rate": 0.0003932573640825619, + "loss": 4.2258, + "step": 233600 + }, + { + "epoch": 1.69, + "learning_rate": 0.00039325446412388624, + "loss": 4.2397, + "step": 233700 + }, + { + "epoch": 1.69, + "learning_rate": 0.00039325156416521065, + "loss": 4.2206, + "step": 233800 + }, + { + "epoch": 1.7, + "learning_rate": 0.00039324866420653506, + "loss": 4.2324, + "step": 233900 + }, + { + "epoch": 1.7, + "learning_rate": 0.00039324579324744627, + "loss": 4.2359, + "step": 234000 + }, + { + "epoch": 1.7, + "learning_rate": 0.00039324289328877067, + "loss": 4.2297, + "step": 234100 + }, + { + "epoch": 1.7, + "learning_rate": 0.0003932399933300951, + "loss": 4.2206, + "step": 234200 + }, + { + "epoch": 1.7, + "learning_rate": 0.0003932370933714195, + "loss": 4.224, + "step": 234300 + }, + { + "epoch": 1.7, + "learning_rate": 0.0003932341934127439, + "loss": 4.2439, + "step": 234400 + }, + { + "epoch": 1.7, + "learning_rate": 0.0003932312934540683, + "loss": 4.2251, + "step": 234500 + }, + { + "epoch": 1.7, + "learning_rate": 0.0003932283934953927, + "loss": 4.2433, + "step": 234600 + }, + { + "epoch": 1.7, + "learning_rate": 0.0003932254935367171, + "loss": 4.2327, + "step": 234700 + }, + { + "epoch": 1.7, + "learning_rate": 0.0003932225935780415, + "loss": 4.2289, + "step": 234800 + }, + { + "epoch": 1.7, + "learning_rate": 0.0003932196936193659, + "loss": 4.2257, + "step": 234900 + }, + { + "epoch": 1.7, + "learning_rate": 0.00039321679366069033, + "loss": 4.2312, + "step": 235000 + }, + { + "epoch": 1.7, + "learning_rate": 0.00039321389370201474, + "loss": 4.2348, + "step": 235100 + }, + { + "epoch": 1.71, + "learning_rate": 0.00039321099374333915, + "loss": 4.2285, + "step": 235200 + }, + { + "epoch": 1.71, + "learning_rate": 0.00039320809378466355, + "loss": 4.2272, + "step": 235300 + }, + { + "epoch": 1.71, + "learning_rate": 0.000393205193825988, + "loss": 4.2333, + "step": 235400 + }, + { + "epoch": 1.71, + "learning_rate": 0.0003932022938673124, + "loss": 4.2256, + "step": 235500 + }, + { + "epoch": 1.71, + "learning_rate": 0.00039319939390863683, + "loss": 4.2377, + "step": 235600 + }, + { + "epoch": 1.71, + "learning_rate": 0.00039319649394996124, + "loss": 4.2303, + "step": 235700 + }, + { + "epoch": 1.71, + "learning_rate": 0.0003931936229908724, + "loss": 4.2184, + "step": 235800 + }, + { + "epoch": 1.71, + "learning_rate": 0.0003931907230321968, + "loss": 4.2228, + "step": 235900 + }, + { + "epoch": 1.71, + "learning_rate": 0.0003931878230735212, + "loss": 4.2351, + "step": 236000 + }, + { + "epoch": 1.71, + "learning_rate": 0.0003931849231148456, + "loss": 4.2311, + "step": 236100 + }, + { + "epoch": 1.71, + "learning_rate": 0.00039318202315617, + "loss": 4.2358, + "step": 236200 + }, + { + "epoch": 1.71, + "learning_rate": 0.0003931791231974944, + "loss": 4.2354, + "step": 236300 + }, + { + "epoch": 1.71, + "learning_rate": 0.0003931762232388189, + "loss": 4.23, + "step": 236400 + }, + { + "epoch": 1.71, + "learning_rate": 0.0003931733232801433, + "loss": 4.2267, + "step": 236500 + }, + { + "epoch": 1.72, + "learning_rate": 0.0003931704233214677, + "loss": 4.2271, + "step": 236600 + }, + { + "epoch": 1.72, + "learning_rate": 0.0003931675233627921, + "loss": 4.2358, + "step": 236700 + }, + { + "epoch": 1.72, + "learning_rate": 0.0003931646234041165, + "loss": 4.2196, + "step": 236800 + }, + { + "epoch": 1.72, + "learning_rate": 0.0003931617234454409, + "loss": 4.2224, + "step": 236900 + }, + { + "epoch": 1.72, + "learning_rate": 0.00039315882348676533, + "loss": 4.2144, + "step": 237000 + }, + { + "epoch": 1.72, + "learning_rate": 0.0003931559235280898, + "loss": 4.2322, + "step": 237100 + }, + { + "epoch": 1.72, + "learning_rate": 0.00039315302356941414, + "loss": 4.2376, + "step": 237200 + }, + { + "epoch": 1.72, + "learning_rate": 0.00039315012361073855, + "loss": 4.225, + "step": 237300 + }, + { + "epoch": 1.72, + "learning_rate": 0.00039314722365206296, + "loss": 4.2386, + "step": 237400 + }, + { + "epoch": 1.72, + "learning_rate": 0.00039314432369338736, + "loss": 4.2344, + "step": 237500 + }, + { + "epoch": 1.72, + "learning_rate": 0.00039314142373471177, + "loss": 4.2215, + "step": 237600 + }, + { + "epoch": 1.72, + "learning_rate": 0.0003931385237760362, + "loss": 4.2233, + "step": 237700 + }, + { + "epoch": 1.72, + "learning_rate": 0.00039313562381736064, + "loss": 4.2414, + "step": 237800 + }, + { + "epoch": 1.72, + "learning_rate": 0.00039313272385868504, + "loss": 4.2174, + "step": 237900 + }, + { + "epoch": 1.73, + "learning_rate": 0.00039312982390000945, + "loss": 4.2208, + "step": 238000 + }, + { + "epoch": 1.73, + "learning_rate": 0.00039312692394133386, + "loss": 4.2431, + "step": 238100 + }, + { + "epoch": 1.73, + "learning_rate": 0.00039312402398265827, + "loss": 4.2315, + "step": 238200 + }, + { + "epoch": 1.73, + "learning_rate": 0.00039312112402398267, + "loss": 4.2365, + "step": 238300 + }, + { + "epoch": 1.73, + "learning_rate": 0.0003931182240653071, + "loss": 4.2297, + "step": 238400 + }, + { + "epoch": 1.73, + "learning_rate": 0.0003931153241066315, + "loss": 4.2325, + "step": 238500 + }, + { + "epoch": 1.73, + "learning_rate": 0.00039311242414795595, + "loss": 4.2347, + "step": 238600 + }, + { + "epoch": 1.73, + "learning_rate": 0.00039310952418928035, + "loss": 4.2263, + "step": 238700 + }, + { + "epoch": 1.73, + "learning_rate": 0.00039310662423060476, + "loss": 4.2114, + "step": 238800 + }, + { + "epoch": 1.73, + "learning_rate": 0.00039310372427192917, + "loss": 4.2334, + "step": 238900 + }, + { + "epoch": 1.73, + "learning_rate": 0.0003931008243132535, + "loss": 4.2277, + "step": 239000 + }, + { + "epoch": 1.73, + "learning_rate": 0.00039309792435457793, + "loss": 4.231, + "step": 239100 + }, + { + "epoch": 1.73, + "learning_rate": 0.00039309502439590233, + "loss": 4.2326, + "step": 239200 + }, + { + "epoch": 1.73, + "learning_rate": 0.0003930921244372268, + "loss": 4.2242, + "step": 239300 + }, + { + "epoch": 1.74, + "learning_rate": 0.0003930892244785512, + "loss": 4.2293, + "step": 239400 + }, + { + "epoch": 1.74, + "learning_rate": 0.00039308635351946236, + "loss": 4.2328, + "step": 239500 + }, + { + "epoch": 1.74, + "learning_rate": 0.0003930834535607868, + "loss": 4.2252, + "step": 239600 + }, + { + "epoch": 1.74, + "learning_rate": 0.0003930805536021112, + "loss": 4.2187, + "step": 239700 + }, + { + "epoch": 1.74, + "learning_rate": 0.00039307765364343563, + "loss": 4.2292, + "step": 239800 + }, + { + "epoch": 1.74, + "learning_rate": 0.00039307475368476, + "loss": 4.22, + "step": 239900 + }, + { + "epoch": 1.74, + "learning_rate": 0.0003930718537260844, + "loss": 4.2233, + "step": 240000 + }, + { + "epoch": 1.74, + "learning_rate": 0.0003930689537674088, + "loss": 4.22, + "step": 240100 + }, + { + "epoch": 1.74, + "learning_rate": 0.0003930660538087332, + "loss": 4.2232, + "step": 240200 + }, + { + "epoch": 1.74, + "learning_rate": 0.00039306315385005767, + "loss": 4.2287, + "step": 240300 + }, + { + "epoch": 1.74, + "learning_rate": 0.0003930602538913821, + "loss": 4.2309, + "step": 240400 + }, + { + "epoch": 1.74, + "learning_rate": 0.0003930573539327065, + "loss": 4.2298, + "step": 240500 + }, + { + "epoch": 1.74, + "learning_rate": 0.0003930544539740309, + "loss": 4.2257, + "step": 240600 + }, + { + "epoch": 1.74, + "learning_rate": 0.0003930515540153553, + "loss": 4.2141, + "step": 240700 + }, + { + "epoch": 1.75, + "learning_rate": 0.0003930486540566797, + "loss": 4.2245, + "step": 240800 + }, + { + "epoch": 1.75, + "learning_rate": 0.0003930457540980041, + "loss": 4.2244, + "step": 240900 + }, + { + "epoch": 1.75, + "learning_rate": 0.00039304285413932857, + "loss": 4.2254, + "step": 241000 + }, + { + "epoch": 1.75, + "learning_rate": 0.000393039954180653, + "loss": 4.2043, + "step": 241100 + }, + { + "epoch": 1.75, + "learning_rate": 0.0003930370542219774, + "loss": 4.2302, + "step": 241200 + }, + { + "epoch": 1.75, + "learning_rate": 0.0003930341542633018, + "loss": 4.225, + "step": 241300 + }, + { + "epoch": 1.75, + "learning_rate": 0.0003930312543046262, + "loss": 4.2296, + "step": 241400 + }, + { + "epoch": 1.75, + "learning_rate": 0.0003930283543459506, + "loss": 4.2264, + "step": 241500 + }, + { + "epoch": 1.75, + "learning_rate": 0.00039302545438727496, + "loss": 4.2262, + "step": 241600 + }, + { + "epoch": 1.75, + "learning_rate": 0.0003930225544285994, + "loss": 4.2385, + "step": 241700 + }, + { + "epoch": 1.75, + "learning_rate": 0.0003930196544699238, + "loss": 4.2271, + "step": 241800 + }, + { + "epoch": 1.75, + "learning_rate": 0.00039301675451124823, + "loss": 4.2234, + "step": 241900 + }, + { + "epoch": 1.75, + "learning_rate": 0.00039301385455257264, + "loss": 4.2112, + "step": 242000 + }, + { + "epoch": 1.76, + "learning_rate": 0.00039301095459389705, + "loss": 4.2178, + "step": 242100 + }, + { + "epoch": 1.76, + "learning_rate": 0.00039300805463522145, + "loss": 4.2258, + "step": 242200 + }, + { + "epoch": 1.76, + "learning_rate": 0.00039300515467654586, + "loss": 4.2245, + "step": 242300 + }, + { + "epoch": 1.76, + "learning_rate": 0.0003930022547178703, + "loss": 4.223, + "step": 242400 + }, + { + "epoch": 1.76, + "learning_rate": 0.0003929993547591947, + "loss": 4.2436, + "step": 242500 + }, + { + "epoch": 1.76, + "learning_rate": 0.00039299645480051913, + "loss": 4.2258, + "step": 242600 + }, + { + "epoch": 1.76, + "learning_rate": 0.00039299355484184354, + "loss": 4.2238, + "step": 242700 + }, + { + "epoch": 1.76, + "learning_rate": 0.00039299065488316795, + "loss": 4.2189, + "step": 242800 + }, + { + "epoch": 1.76, + "learning_rate": 0.00039298775492449235, + "loss": 4.2298, + "step": 242900 + }, + { + "epoch": 1.76, + "learning_rate": 0.00039298485496581676, + "loss": 4.2431, + "step": 243000 + }, + { + "epoch": 1.76, + "learning_rate": 0.00039298195500714117, + "loss": 4.2353, + "step": 243100 + }, + { + "epoch": 1.76, + "learning_rate": 0.0003929790550484656, + "loss": 4.2219, + "step": 243200 + }, + { + "epoch": 1.76, + "learning_rate": 0.00039297615508979, + "loss": 4.2176, + "step": 243300 + }, + { + "epoch": 1.76, + "learning_rate": 0.0003929732551311144, + "loss": 4.2246, + "step": 243400 + }, + { + "epoch": 1.77, + "learning_rate": 0.0003929703551724388, + "loss": 4.2195, + "step": 243500 + }, + { + "epoch": 1.77, + "learning_rate": 0.0003929674552137632, + "loss": 4.2338, + "step": 243600 + }, + { + "epoch": 1.77, + "learning_rate": 0.0003929645552550876, + "loss": 4.2263, + "step": 243700 + }, + { + "epoch": 1.77, + "learning_rate": 0.0003929616842959988, + "loss": 4.224, + "step": 243800 + }, + { + "epoch": 1.77, + "learning_rate": 0.0003929587843373232, + "loss": 4.2195, + "step": 243900 + }, + { + "epoch": 1.77, + "learning_rate": 0.00039295588437864763, + "loss": 4.2309, + "step": 244000 + }, + { + "epoch": 1.77, + "learning_rate": 0.00039295298441997204, + "loss": 4.2238, + "step": 244100 + }, + { + "epoch": 1.77, + "learning_rate": 0.00039295008446129645, + "loss": 4.2373, + "step": 244200 + }, + { + "epoch": 1.77, + "learning_rate": 0.00039294718450262085, + "loss": 4.2218, + "step": 244300 + }, + { + "epoch": 1.77, + "learning_rate": 0.00039294428454394526, + "loss": 4.2175, + "step": 244400 + }, + { + "epoch": 1.77, + "learning_rate": 0.00039294138458526967, + "loss": 4.224, + "step": 244500 + }, + { + "epoch": 1.77, + "learning_rate": 0.0003929384846265941, + "loss": 4.2303, + "step": 244600 + }, + { + "epoch": 1.77, + "learning_rate": 0.0003929355846679185, + "loss": 4.2257, + "step": 244700 + }, + { + "epoch": 1.77, + "learning_rate": 0.0003929326847092429, + "loss": 4.2332, + "step": 244800 + }, + { + "epoch": 1.78, + "learning_rate": 0.00039292978475056735, + "loss": 4.2152, + "step": 244900 + }, + { + "epoch": 1.78, + "learning_rate": 0.00039292688479189176, + "loss": 4.2379, + "step": 245000 + }, + { + "epoch": 1.78, + "learning_rate": 0.00039292398483321616, + "loss": 4.2345, + "step": 245100 + }, + { + "epoch": 1.78, + "learning_rate": 0.00039292108487454057, + "loss": 4.2207, + "step": 245200 + }, + { + "epoch": 1.78, + "learning_rate": 0.000392918184915865, + "loss": 4.2377, + "step": 245300 + }, + { + "epoch": 1.78, + "learning_rate": 0.0003929152849571894, + "loss": 4.2335, + "step": 245400 + }, + { + "epoch": 1.78, + "learning_rate": 0.0003929123849985138, + "loss": 4.2273, + "step": 245500 + }, + { + "epoch": 1.78, + "learning_rate": 0.0003929094850398382, + "loss": 4.2263, + "step": 245600 + }, + { + "epoch": 1.78, + "learning_rate": 0.0003929065850811626, + "loss": 4.2214, + "step": 245700 + }, + { + "epoch": 1.78, + "learning_rate": 0.00039290371412207376, + "loss": 4.2245, + "step": 245800 + }, + { + "epoch": 1.78, + "learning_rate": 0.0003929008141633982, + "loss": 4.2241, + "step": 245900 + }, + { + "epoch": 1.78, + "learning_rate": 0.00039289791420472263, + "loss": 4.2278, + "step": 246000 + }, + { + "epoch": 1.78, + "learning_rate": 0.00039289501424604704, + "loss": 4.2162, + "step": 246100 + }, + { + "epoch": 1.78, + "learning_rate": 0.00039289211428737144, + "loss": 4.2221, + "step": 246200 + }, + { + "epoch": 1.79, + "learning_rate": 0.00039288921432869585, + "loss": 4.2138, + "step": 246300 + }, + { + "epoch": 1.79, + "learning_rate": 0.00039288631437002026, + "loss": 4.224, + "step": 246400 + }, + { + "epoch": 1.79, + "learning_rate": 0.00039288341441134466, + "loss": 4.2243, + "step": 246500 + }, + { + "epoch": 1.79, + "learning_rate": 0.00039288051445266907, + "loss": 4.2204, + "step": 246600 + }, + { + "epoch": 1.79, + "learning_rate": 0.0003928776144939935, + "loss": 4.2208, + "step": 246700 + }, + { + "epoch": 1.79, + "learning_rate": 0.0003928747145353179, + "loss": 4.228, + "step": 246800 + }, + { + "epoch": 1.79, + "learning_rate": 0.0003928718145766423, + "loss": 4.2242, + "step": 246900 + }, + { + "epoch": 1.79, + "learning_rate": 0.0003928689146179667, + "loss": 4.2235, + "step": 247000 + }, + { + "epoch": 1.79, + "learning_rate": 0.0003928660146592911, + "loss": 4.2304, + "step": 247100 + }, + { + "epoch": 1.79, + "learning_rate": 0.0003928631147006155, + "loss": 4.235, + "step": 247200 + }, + { + "epoch": 1.79, + "learning_rate": 0.0003928602147419399, + "loss": 4.2272, + "step": 247300 + }, + { + "epoch": 1.79, + "learning_rate": 0.0003928573147832644, + "loss": 4.2281, + "step": 247400 + }, + { + "epoch": 1.79, + "learning_rate": 0.0003928544148245888, + "loss": 4.2351, + "step": 247500 + }, + { + "epoch": 1.79, + "learning_rate": 0.0003928515148659132, + "loss": 4.2236, + "step": 247600 + }, + { + "epoch": 1.8, + "learning_rate": 0.0003928486149072376, + "loss": 4.2227, + "step": 247700 + }, + { + "epoch": 1.8, + "learning_rate": 0.00039284574394814876, + "loss": 4.2412, + "step": 247800 + }, + { + "epoch": 1.8, + "learning_rate": 0.00039284284398947316, + "loss": 4.2321, + "step": 247900 + }, + { + "epoch": 1.8, + "learning_rate": 0.00039283994403079757, + "loss": 4.2221, + "step": 248000 + }, + { + "epoch": 1.8, + "learning_rate": 0.000392837044072122, + "loss": 4.2251, + "step": 248100 + }, + { + "epoch": 1.8, + "learning_rate": 0.0003928341441134464, + "loss": 4.2212, + "step": 248200 + }, + { + "epoch": 1.8, + "learning_rate": 0.0003928312441547708, + "loss": 4.2191, + "step": 248300 + }, + { + "epoch": 1.8, + "learning_rate": 0.00039282834419609525, + "loss": 4.2285, + "step": 248400 + }, + { + "epoch": 1.8, + "learning_rate": 0.00039282544423741966, + "loss": 4.221, + "step": 248500 + }, + { + "epoch": 1.8, + "learning_rate": 0.00039282254427874406, + "loss": 4.2246, + "step": 248600 + }, + { + "epoch": 1.8, + "learning_rate": 0.00039281964432006847, + "loss": 4.2064, + "step": 248700 + }, + { + "epoch": 1.8, + "learning_rate": 0.0003928167443613929, + "loss": 4.2218, + "step": 248800 + }, + { + "epoch": 1.8, + "learning_rate": 0.0003928138444027173, + "loss": 4.2196, + "step": 248900 + }, + { + "epoch": 1.81, + "learning_rate": 0.0003928109444440417, + "loss": 4.2296, + "step": 249000 + }, + { + "epoch": 1.81, + "learning_rate": 0.0003928080444853661, + "loss": 4.2191, + "step": 249100 + }, + { + "epoch": 1.81, + "learning_rate": 0.0003928051445266905, + "loss": 4.2281, + "step": 249200 + }, + { + "epoch": 1.81, + "learning_rate": 0.0003928022445680149, + "loss": 4.2367, + "step": 249300 + }, + { + "epoch": 1.81, + "learning_rate": 0.0003927993446093393, + "loss": 4.2158, + "step": 249400 + }, + { + "epoch": 1.81, + "learning_rate": 0.0003927964446506637, + "loss": 4.233, + "step": 249500 + }, + { + "epoch": 1.81, + "learning_rate": 0.00039279354469198813, + "loss": 4.2208, + "step": 249600 + }, + { + "epoch": 1.81, + "learning_rate": 0.00039279064473331254, + "loss": 4.238, + "step": 249700 + }, + { + "epoch": 1.81, + "learning_rate": 0.000392787744774637, + "loss": 4.2319, + "step": 249800 + }, + { + "epoch": 1.81, + "learning_rate": 0.0003927848448159614, + "loss": 4.2339, + "step": 249900 + }, + { + "epoch": 1.81, + "learning_rate": 0.0003927819448572858, + "loss": 4.2201, + "step": 250000 + }, + { + "epoch": 1.81, + "learning_rate": 0.00039277907389819697, + "loss": 4.2269, + "step": 250100 + }, + { + "epoch": 1.81, + "learning_rate": 0.0003927761739395214, + "loss": 4.2325, + "step": 250200 + }, + { + "epoch": 1.81, + "learning_rate": 0.0003927732739808458, + "loss": 4.2191, + "step": 250300 + }, + { + "epoch": 1.82, + "learning_rate": 0.0003927703740221702, + "loss": 4.2182, + "step": 250400 + }, + { + "epoch": 1.82, + "learning_rate": 0.0003927674740634946, + "loss": 4.2323, + "step": 250500 + }, + { + "epoch": 1.82, + "learning_rate": 0.000392764574104819, + "loss": 4.2055, + "step": 250600 + }, + { + "epoch": 1.82, + "learning_rate": 0.0003927616741461434, + "loss": 4.2199, + "step": 250700 + }, + { + "epoch": 1.82, + "learning_rate": 0.0003927587741874678, + "loss": 4.22, + "step": 250800 + }, + { + "epoch": 1.82, + "learning_rate": 0.0003927558742287923, + "loss": 4.2247, + "step": 250900 + }, + { + "epoch": 1.82, + "learning_rate": 0.0003927529742701167, + "loss": 4.2144, + "step": 251000 + }, + { + "epoch": 1.82, + "learning_rate": 0.0003927500743114411, + "loss": 4.2157, + "step": 251100 + }, + { + "epoch": 1.82, + "learning_rate": 0.0003927471743527655, + "loss": 4.2259, + "step": 251200 + }, + { + "epoch": 1.82, + "learning_rate": 0.0003927442743940899, + "loss": 4.2176, + "step": 251300 + }, + { + "epoch": 1.82, + "learning_rate": 0.0003927413744354143, + "loss": 4.2097, + "step": 251400 + }, + { + "epoch": 1.82, + "learning_rate": 0.0003927384744767387, + "loss": 4.228, + "step": 251500 + }, + { + "epoch": 1.82, + "learning_rate": 0.00039273557451806313, + "loss": 4.2199, + "step": 251600 + }, + { + "epoch": 1.82, + "learning_rate": 0.00039273267455938753, + "loss": 4.2343, + "step": 251700 + }, + { + "epoch": 1.83, + "learning_rate": 0.00039272977460071194, + "loss": 4.2322, + "step": 251800 + }, + { + "epoch": 1.83, + "learning_rate": 0.00039272687464203635, + "loss": 4.2139, + "step": 251900 + }, + { + "epoch": 1.83, + "learning_rate": 0.00039272397468336076, + "loss": 4.2159, + "step": 252000 + }, + { + "epoch": 1.83, + "learning_rate": 0.00039272110372427197, + "loss": 4.2161, + "step": 252100 + }, + { + "epoch": 1.83, + "learning_rate": 0.00039271820376559637, + "loss": 4.2156, + "step": 252200 + }, + { + "epoch": 1.83, + "learning_rate": 0.0003927153038069208, + "loss": 4.2229, + "step": 252300 + }, + { + "epoch": 1.83, + "learning_rate": 0.0003927124038482452, + "loss": 4.2403, + "step": 252400 + }, + { + "epoch": 1.83, + "learning_rate": 0.0003927095038895696, + "loss": 4.2282, + "step": 252500 + }, + { + "epoch": 1.83, + "learning_rate": 0.000392706603930894, + "loss": 4.2188, + "step": 252600 + }, + { + "epoch": 1.83, + "learning_rate": 0.0003927037039722184, + "loss": 4.2273, + "step": 252700 + }, + { + "epoch": 1.83, + "learning_rate": 0.0003927008040135428, + "loss": 4.2255, + "step": 252800 + }, + { + "epoch": 1.83, + "learning_rate": 0.0003926979040548672, + "loss": 4.2204, + "step": 252900 + }, + { + "epoch": 1.83, + "learning_rate": 0.00039269500409619163, + "loss": 4.2294, + "step": 253000 + }, + { + "epoch": 1.83, + "learning_rate": 0.00039269210413751603, + "loss": 4.2034, + "step": 253100 + }, + { + "epoch": 1.84, + "learning_rate": 0.00039268920417884044, + "loss": 4.231, + "step": 253200 + }, + { + "epoch": 1.84, + "learning_rate": 0.0003926863042201649, + "loss": 4.2291, + "step": 253300 + }, + { + "epoch": 1.84, + "learning_rate": 0.0003926834042614893, + "loss": 4.2436, + "step": 253400 + }, + { + "epoch": 1.84, + "learning_rate": 0.0003926805043028137, + "loss": 4.2281, + "step": 253500 + }, + { + "epoch": 1.84, + "learning_rate": 0.0003926776043441381, + "loss": 4.2249, + "step": 253600 + }, + { + "epoch": 1.84, + "learning_rate": 0.00039267470438546253, + "loss": 4.2182, + "step": 253700 + }, + { + "epoch": 1.84, + "learning_rate": 0.00039267180442678694, + "loss": 4.2127, + "step": 253800 + }, + { + "epoch": 1.84, + "learning_rate": 0.00039266890446811134, + "loss": 4.2165, + "step": 253900 + }, + { + "epoch": 1.84, + "learning_rate": 0.0003926660045094358, + "loss": 4.2194, + "step": 254000 + }, + { + "epoch": 1.84, + "learning_rate": 0.0003926631045507602, + "loss": 4.2148, + "step": 254100 + }, + { + "epoch": 1.84, + "learning_rate": 0.00039266020459208456, + "loss": 4.2077, + "step": 254200 + }, + { + "epoch": 1.84, + "learning_rate": 0.0003926573336329957, + "loss": 4.2151, + "step": 254300 + }, + { + "epoch": 1.84, + "learning_rate": 0.0003926544336743202, + "loss": 4.2235, + "step": 254400 + }, + { + "epoch": 1.84, + "learning_rate": 0.0003926515337156446, + "loss": 4.2213, + "step": 254500 + }, + { + "epoch": 1.85, + "learning_rate": 0.000392648633756969, + "loss": 4.2239, + "step": 254600 + }, + { + "epoch": 1.85, + "learning_rate": 0.0003926457337982934, + "loss": 4.226, + "step": 254700 + }, + { + "epoch": 1.85, + "learning_rate": 0.0003926428338396178, + "loss": 4.2269, + "step": 254800 + }, + { + "epoch": 1.85, + "learning_rate": 0.0003926399338809422, + "loss": 4.213, + "step": 254900 + }, + { + "epoch": 1.85, + "learning_rate": 0.0003926370339222666, + "loss": 4.2274, + "step": 255000 + }, + { + "epoch": 1.85, + "learning_rate": 0.00039263413396359103, + "loss": 4.2176, + "step": 255100 + }, + { + "epoch": 1.85, + "learning_rate": 0.00039263123400491544, + "loss": 4.2138, + "step": 255200 + }, + { + "epoch": 1.85, + "learning_rate": 0.00039262833404623984, + "loss": 4.2083, + "step": 255300 + }, + { + "epoch": 1.85, + "learning_rate": 0.00039262543408756425, + "loss": 4.2196, + "step": 255400 + }, + { + "epoch": 1.85, + "learning_rate": 0.00039262253412888866, + "loss": 4.2281, + "step": 255500 + }, + { + "epoch": 1.85, + "learning_rate": 0.00039261963417021306, + "loss": 4.2215, + "step": 255600 + }, + { + "epoch": 1.85, + "learning_rate": 0.00039261673421153747, + "loss": 4.2318, + "step": 255700 + }, + { + "epoch": 1.85, + "learning_rate": 0.00039261383425286193, + "loss": 4.2221, + "step": 255800 + }, + { + "epoch": 1.86, + "learning_rate": 0.00039261093429418634, + "loss": 4.2227, + "step": 255900 + }, + { + "epoch": 1.86, + "learning_rate": 0.00039260803433551075, + "loss": 4.2153, + "step": 256000 + }, + { + "epoch": 1.86, + "learning_rate": 0.00039260513437683515, + "loss": 4.2244, + "step": 256100 + }, + { + "epoch": 1.86, + "learning_rate": 0.00039260223441815956, + "loss": 4.2196, + "step": 256200 + }, + { + "epoch": 1.86, + "learning_rate": 0.0003925993634590707, + "loss": 4.2343, + "step": 256300 + }, + { + "epoch": 1.86, + "learning_rate": 0.0003925964635003951, + "loss": 4.2156, + "step": 256400 + }, + { + "epoch": 1.86, + "learning_rate": 0.00039259356354171953, + "loss": 4.2363, + "step": 256500 + }, + { + "epoch": 1.86, + "learning_rate": 0.00039259066358304394, + "loss": 4.2211, + "step": 256600 + }, + { + "epoch": 1.86, + "learning_rate": 0.00039258776362436834, + "loss": 4.2262, + "step": 256700 + }, + { + "epoch": 1.86, + "learning_rate": 0.0003925848636656928, + "loss": 4.2162, + "step": 256800 + }, + { + "epoch": 1.86, + "learning_rate": 0.0003925819637070172, + "loss": 4.2257, + "step": 256900 + }, + { + "epoch": 1.86, + "learning_rate": 0.0003925790637483416, + "loss": 4.2235, + "step": 257000 + }, + { + "epoch": 1.86, + "learning_rate": 0.000392576163789666, + "loss": 4.2267, + "step": 257100 + }, + { + "epoch": 1.86, + "learning_rate": 0.00039257326383099043, + "loss": 4.2233, + "step": 257200 + }, + { + "epoch": 1.87, + "learning_rate": 0.00039257036387231484, + "loss": 4.2195, + "step": 257300 + }, + { + "epoch": 1.87, + "learning_rate": 0.00039256746391363924, + "loss": 4.2235, + "step": 257400 + }, + { + "epoch": 1.87, + "learning_rate": 0.0003925645639549637, + "loss": 4.2104, + "step": 257500 + }, + { + "epoch": 1.87, + "learning_rate": 0.0003925616639962881, + "loss": 4.225, + "step": 257600 + }, + { + "epoch": 1.87, + "learning_rate": 0.00039255876403761247, + "loss": 4.2281, + "step": 257700 + }, + { + "epoch": 1.87, + "learning_rate": 0.00039255586407893687, + "loss": 4.2072, + "step": 257800 + }, + { + "epoch": 1.87, + "learning_rate": 0.0003925529641202613, + "loss": 4.2268, + "step": 257900 + }, + { + "epoch": 1.87, + "learning_rate": 0.0003925500641615857, + "loss": 4.217, + "step": 258000 + }, + { + "epoch": 1.87, + "learning_rate": 0.0003925471642029101, + "loss": 4.2218, + "step": 258100 + }, + { + "epoch": 1.87, + "learning_rate": 0.0003925442642442345, + "loss": 4.2229, + "step": 258200 + }, + { + "epoch": 1.87, + "learning_rate": 0.00039254136428555896, + "loss": 4.214, + "step": 258300 + }, + { + "epoch": 1.87, + "learning_rate": 0.0003925384933264701, + "loss": 4.2095, + "step": 258400 + }, + { + "epoch": 1.87, + "learning_rate": 0.0003925355933677945, + "loss": 4.2265, + "step": 258500 + }, + { + "epoch": 1.87, + "learning_rate": 0.00039253269340911893, + "loss": 4.2298, + "step": 258600 + }, + { + "epoch": 1.88, + "learning_rate": 0.00039252979345044334, + "loss": 4.222, + "step": 258700 + }, + { + "epoch": 1.88, + "learning_rate": 0.00039252689349176774, + "loss": 4.2284, + "step": 258800 + }, + { + "epoch": 1.88, + "learning_rate": 0.00039252399353309215, + "loss": 4.2266, + "step": 258900 + }, + { + "epoch": 1.88, + "learning_rate": 0.00039252109357441656, + "loss": 4.2377, + "step": 259000 + }, + { + "epoch": 1.88, + "learning_rate": 0.00039251819361574096, + "loss": 4.2169, + "step": 259100 + }, + { + "epoch": 1.88, + "learning_rate": 0.00039251529365706537, + "loss": 4.2367, + "step": 259200 + }, + { + "epoch": 1.88, + "learning_rate": 0.00039251239369838983, + "loss": 4.2234, + "step": 259300 + }, + { + "epoch": 1.88, + "learning_rate": 0.00039250949373971424, + "loss": 4.2108, + "step": 259400 + }, + { + "epoch": 1.88, + "learning_rate": 0.00039250659378103865, + "loss": 4.2261, + "step": 259500 + }, + { + "epoch": 1.88, + "learning_rate": 0.00039250369382236305, + "loss": 4.218, + "step": 259600 + }, + { + "epoch": 1.88, + "learning_rate": 0.00039250079386368746, + "loss": 4.2251, + "step": 259700 + }, + { + "epoch": 1.88, + "learning_rate": 0.00039249789390501187, + "loss": 4.2242, + "step": 259800 + }, + { + "epoch": 1.88, + "learning_rate": 0.0003924949939463363, + "loss": 4.2211, + "step": 259900 + }, + { + "epoch": 1.88, + "learning_rate": 0.00039249209398766074, + "loss": 4.2192, + "step": 260000 + }, + { + "epoch": 1.89, + "learning_rate": 0.00039248919402898514, + "loss": 4.2197, + "step": 260100 + }, + { + "epoch": 1.89, + "learning_rate": 0.00039248629407030955, + "loss": 4.226, + "step": 260200 + }, + { + "epoch": 1.89, + "learning_rate": 0.0003924833941116339, + "loss": 4.2269, + "step": 260300 + }, + { + "epoch": 1.89, + "learning_rate": 0.0003924805231525451, + "loss": 4.2264, + "step": 260400 + }, + { + "epoch": 1.89, + "learning_rate": 0.0003924776231938695, + "loss": 4.2213, + "step": 260500 + }, + { + "epoch": 1.89, + "learning_rate": 0.0003924747232351939, + "loss": 4.2132, + "step": 260600 + }, + { + "epoch": 1.89, + "learning_rate": 0.00039247182327651833, + "loss": 4.2262, + "step": 260700 + }, + { + "epoch": 1.89, + "learning_rate": 0.00039246892331784274, + "loss": 4.2176, + "step": 260800 + }, + { + "epoch": 1.89, + "learning_rate": 0.00039246602335916715, + "loss": 4.2177, + "step": 260900 + }, + { + "epoch": 1.89, + "learning_rate": 0.0003924631234004916, + "loss": 4.2446, + "step": 261000 + }, + { + "epoch": 1.89, + "learning_rate": 0.000392460223441816, + "loss": 4.2204, + "step": 261100 + }, + { + "epoch": 1.89, + "learning_rate": 0.00039245732348314037, + "loss": 4.2091, + "step": 261200 + }, + { + "epoch": 1.89, + "learning_rate": 0.0003924544235244648, + "loss": 4.2261, + "step": 261300 + }, + { + "epoch": 1.89, + "learning_rate": 0.0003924515235657892, + "loss": 4.2094, + "step": 261400 + }, + { + "epoch": 1.9, + "learning_rate": 0.0003924486236071136, + "loss": 4.2255, + "step": 261500 + }, + { + "epoch": 1.9, + "learning_rate": 0.000392445723648438, + "loss": 4.2216, + "step": 261600 + }, + { + "epoch": 1.9, + "learning_rate": 0.0003924428236897624, + "loss": 4.2154, + "step": 261700 + }, + { + "epoch": 1.9, + "learning_rate": 0.00039243992373108686, + "loss": 4.2286, + "step": 261800 + }, + { + "epoch": 1.9, + "learning_rate": 0.00039243702377241127, + "loss": 4.2273, + "step": 261900 + }, + { + "epoch": 1.9, + "learning_rate": 0.0003924341238137357, + "loss": 4.2227, + "step": 262000 + }, + { + "epoch": 1.9, + "learning_rate": 0.0003924312238550601, + "loss": 4.2204, + "step": 262100 + }, + { + "epoch": 1.9, + "learning_rate": 0.0003924283238963845, + "loss": 4.2383, + "step": 262200 + }, + { + "epoch": 1.9, + "learning_rate": 0.0003924254239377089, + "loss": 4.22, + "step": 262300 + }, + { + "epoch": 1.9, + "learning_rate": 0.0003924225239790333, + "loss": 4.2198, + "step": 262400 + }, + { + "epoch": 1.9, + "learning_rate": 0.00039241962402035776, + "loss": 4.225, + "step": 262500 + }, + { + "epoch": 1.9, + "learning_rate": 0.00039241675306126887, + "loss": 4.2176, + "step": 262600 + }, + { + "epoch": 1.9, + "learning_rate": 0.00039241385310259327, + "loss": 4.2182, + "step": 262700 + }, + { + "epoch": 1.91, + "learning_rate": 0.00039241095314391773, + "loss": 4.2311, + "step": 262800 + }, + { + "epoch": 1.91, + "learning_rate": 0.00039240805318524214, + "loss": 4.2204, + "step": 262900 + }, + { + "epoch": 1.91, + "learning_rate": 0.00039240515322656655, + "loss": 4.24, + "step": 263000 + }, + { + "epoch": 1.91, + "learning_rate": 0.00039240225326789095, + "loss": 4.2335, + "step": 263100 + }, + { + "epoch": 1.91, + "learning_rate": 0.00039239935330921536, + "loss": 4.2178, + "step": 263200 + }, + { + "epoch": 1.91, + "learning_rate": 0.00039239645335053977, + "loss": 4.2299, + "step": 263300 + }, + { + "epoch": 1.91, + "learning_rate": 0.0003923935533918642, + "loss": 4.2193, + "step": 263400 + }, + { + "epoch": 1.91, + "learning_rate": 0.00039239065343318864, + "loss": 4.2231, + "step": 263500 + }, + { + "epoch": 1.91, + "learning_rate": 0.00039238775347451304, + "loss": 4.2163, + "step": 263600 + }, + { + "epoch": 1.91, + "learning_rate": 0.00039238488251542414, + "loss": 4.2219, + "step": 263700 + }, + { + "epoch": 1.91, + "learning_rate": 0.0003923819825567486, + "loss": 4.2264, + "step": 263800 + }, + { + "epoch": 1.91, + "learning_rate": 0.000392379082598073, + "loss": 4.233, + "step": 263900 + }, + { + "epoch": 1.91, + "learning_rate": 0.0003923761826393974, + "loss": 4.2206, + "step": 264000 + }, + { + "epoch": 1.91, + "learning_rate": 0.0003923732826807218, + "loss": 4.2091, + "step": 264100 + }, + { + "epoch": 1.92, + "learning_rate": 0.00039237038272204623, + "loss": 4.2234, + "step": 264200 + }, + { + "epoch": 1.92, + "learning_rate": 0.00039236748276337064, + "loss": 4.2216, + "step": 264300 + }, + { + "epoch": 1.92, + "learning_rate": 0.00039236458280469505, + "loss": 4.2315, + "step": 264400 + }, + { + "epoch": 1.92, + "learning_rate": 0.0003923616828460195, + "loss": 4.217, + "step": 264500 + }, + { + "epoch": 1.92, + "learning_rate": 0.0003923587828873439, + "loss": 4.2229, + "step": 264600 + }, + { + "epoch": 1.92, + "learning_rate": 0.00039235588292866827, + "loss": 4.2251, + "step": 264700 + }, + { + "epoch": 1.92, + "learning_rate": 0.0003923529829699927, + "loss": 4.213, + "step": 264800 + }, + { + "epoch": 1.92, + "learning_rate": 0.0003923500830113171, + "loss": 4.2129, + "step": 264900 + }, + { + "epoch": 1.92, + "learning_rate": 0.0003923471830526415, + "loss": 4.2318, + "step": 265000 + }, + { + "epoch": 1.92, + "learning_rate": 0.0003923442830939659, + "loss": 4.2269, + "step": 265100 + }, + { + "epoch": 1.92, + "learning_rate": 0.0003923413831352903, + "loss": 4.2136, + "step": 265200 + }, + { + "epoch": 1.92, + "learning_rate": 0.00039233848317661476, + "loss": 4.224, + "step": 265300 + }, + { + "epoch": 1.92, + "learning_rate": 0.00039233558321793917, + "loss": 4.223, + "step": 265400 + }, + { + "epoch": 1.92, + "learning_rate": 0.0003923326832592636, + "loss": 4.2245, + "step": 265500 + }, + { + "epoch": 1.93, + "learning_rate": 0.000392329783300588, + "loss": 4.223, + "step": 265600 + }, + { + "epoch": 1.93, + "learning_rate": 0.0003923268833419124, + "loss": 4.2298, + "step": 265700 + }, + { + "epoch": 1.93, + "learning_rate": 0.0003923239833832368, + "loss": 4.2261, + "step": 265800 + }, + { + "epoch": 1.93, + "learning_rate": 0.0003923210834245612, + "loss": 4.2268, + "step": 265900 + }, + { + "epoch": 1.93, + "learning_rate": 0.00039231818346588567, + "loss": 4.2167, + "step": 266000 + }, + { + "epoch": 1.93, + "learning_rate": 0.00039231528350721007, + "loss": 4.2257, + "step": 266100 + }, + { + "epoch": 1.93, + "learning_rate": 0.0003923123835485345, + "loss": 4.2211, + "step": 266200 + }, + { + "epoch": 1.93, + "learning_rate": 0.0003923094835898589, + "loss": 4.2092, + "step": 266300 + }, + { + "epoch": 1.93, + "learning_rate": 0.0003923065836311833, + "loss": 4.2233, + "step": 266400 + }, + { + "epoch": 1.93, + "learning_rate": 0.00039230368367250765, + "loss": 4.2249, + "step": 266500 + }, + { + "epoch": 1.93, + "learning_rate": 0.00039230078371383205, + "loss": 4.2279, + "step": 266600 + }, + { + "epoch": 1.93, + "learning_rate": 0.0003922978837551565, + "loss": 4.2239, + "step": 266700 + }, + { + "epoch": 1.93, + "learning_rate": 0.0003922949837964809, + "loss": 4.2179, + "step": 266800 + }, + { + "epoch": 1.93, + "learning_rate": 0.00039229208383780533, + "loss": 4.208, + "step": 266900 + }, + { + "epoch": 1.94, + "learning_rate": 0.00039228918387912973, + "loss": 4.2217, + "step": 267000 + }, + { + "epoch": 1.94, + "learning_rate": 0.00039228628392045414, + "loss": 4.2343, + "step": 267100 + }, + { + "epoch": 1.94, + "learning_rate": 0.00039228338396177855, + "loss": 4.2151, + "step": 267200 + }, + { + "epoch": 1.94, + "learning_rate": 0.00039228048400310295, + "loss": 4.2092, + "step": 267300 + }, + { + "epoch": 1.94, + "learning_rate": 0.0003922775840444274, + "loss": 4.2285, + "step": 267400 + }, + { + "epoch": 1.94, + "learning_rate": 0.0003922746840857518, + "loss": 4.2173, + "step": 267500 + }, + { + "epoch": 1.94, + "learning_rate": 0.00039227178412707623, + "loss": 4.2263, + "step": 267600 + }, + { + "epoch": 1.94, + "learning_rate": 0.00039226888416840064, + "loss": 4.229, + "step": 267700 + }, + { + "epoch": 1.94, + "learning_rate": 0.00039226604220889854, + "loss": 4.2184, + "step": 267800 + }, + { + "epoch": 1.94, + "learning_rate": 0.00039226314225022295, + "loss": 4.2282, + "step": 267900 + }, + { + "epoch": 1.94, + "learning_rate": 0.0003922602422915474, + "loss": 4.2076, + "step": 268000 + }, + { + "epoch": 1.94, + "learning_rate": 0.0003922573423328718, + "loss": 4.2101, + "step": 268100 + }, + { + "epoch": 1.94, + "learning_rate": 0.0003922544423741962, + "loss": 4.2248, + "step": 268200 + }, + { + "epoch": 1.95, + "learning_rate": 0.0003922515424155206, + "loss": 4.212, + "step": 268300 + }, + { + "epoch": 1.95, + "learning_rate": 0.000392248642456845, + "loss": 4.2165, + "step": 268400 + }, + { + "epoch": 1.95, + "learning_rate": 0.0003922457424981694, + "loss": 4.2231, + "step": 268500 + }, + { + "epoch": 1.95, + "learning_rate": 0.0003922428425394938, + "loss": 4.214, + "step": 268600 + }, + { + "epoch": 1.95, + "learning_rate": 0.0003922399425808182, + "loss": 4.2167, + "step": 268700 + }, + { + "epoch": 1.95, + "learning_rate": 0.00039223704262214266, + "loss": 4.2236, + "step": 268800 + }, + { + "epoch": 1.95, + "learning_rate": 0.00039223414266346707, + "loss": 4.2216, + "step": 268900 + }, + { + "epoch": 1.95, + "learning_rate": 0.0003922312427047915, + "loss": 4.2232, + "step": 269000 + }, + { + "epoch": 1.95, + "learning_rate": 0.0003922283427461159, + "loss": 4.219, + "step": 269100 + }, + { + "epoch": 1.95, + "learning_rate": 0.0003922254427874403, + "loss": 4.2218, + "step": 269200 + }, + { + "epoch": 1.95, + "learning_rate": 0.0003922225428287647, + "loss": 4.2224, + "step": 269300 + }, + { + "epoch": 1.95, + "learning_rate": 0.0003922196428700891, + "loss": 4.2287, + "step": 269400 + }, + { + "epoch": 1.95, + "learning_rate": 0.00039221674291141357, + "loss": 4.2211, + "step": 269500 + }, + { + "epoch": 1.95, + "learning_rate": 0.000392213842952738, + "loss": 4.2253, + "step": 269600 + }, + { + "epoch": 1.96, + "learning_rate": 0.0003922109429940624, + "loss": 4.2249, + "step": 269700 + }, + { + "epoch": 1.96, + "learning_rate": 0.0003922080430353868, + "loss": 4.2232, + "step": 269800 + }, + { + "epoch": 1.96, + "learning_rate": 0.00039220517207629794, + "loss": 4.2212, + "step": 269900 + }, + { + "epoch": 1.96, + "learning_rate": 0.00039220227211762235, + "loss": 4.2321, + "step": 270000 + }, + { + "epoch": 1.96, + "learning_rate": 0.00039219937215894676, + "loss": 4.2208, + "step": 270100 + }, + { + "epoch": 1.96, + "learning_rate": 0.00039219647220027116, + "loss": 4.2151, + "step": 270200 + }, + { + "epoch": 1.96, + "learning_rate": 0.00039219357224159557, + "loss": 4.2184, + "step": 270300 + }, + { + "epoch": 1.96, + "learning_rate": 0.00039219067228292, + "loss": 4.2156, + "step": 270400 + }, + { + "epoch": 1.96, + "learning_rate": 0.00039218777232424444, + "loss": 4.2068, + "step": 270500 + }, + { + "epoch": 1.96, + "learning_rate": 0.00039218487236556885, + "loss": 4.2301, + "step": 270600 + }, + { + "epoch": 1.96, + "learning_rate": 0.00039218197240689325, + "loss": 4.2293, + "step": 270700 + }, + { + "epoch": 1.96, + "learning_rate": 0.00039217907244821766, + "loss": 4.2246, + "step": 270800 + }, + { + "epoch": 1.96, + "learning_rate": 0.000392176172489542, + "loss": 4.2144, + "step": 270900 + }, + { + "epoch": 1.96, + "learning_rate": 0.0003921732725308664, + "loss": 4.219, + "step": 271000 + }, + { + "epoch": 1.97, + "learning_rate": 0.0003921703725721908, + "loss": 4.2181, + "step": 271100 + }, + { + "epoch": 1.97, + "learning_rate": 0.0003921674726135153, + "loss": 4.2233, + "step": 271200 + }, + { + "epoch": 1.97, + "learning_rate": 0.0003921645726548397, + "loss": 4.2309, + "step": 271300 + }, + { + "epoch": 1.97, + "learning_rate": 0.0003921616726961641, + "loss": 4.2189, + "step": 271400 + }, + { + "epoch": 1.97, + "learning_rate": 0.0003921587727374885, + "loss": 4.2297, + "step": 271500 + }, + { + "epoch": 1.97, + "learning_rate": 0.0003921558727788129, + "loss": 4.217, + "step": 271600 + }, + { + "epoch": 1.97, + "learning_rate": 0.0003921529728201373, + "loss": 4.2307, + "step": 271700 + }, + { + "epoch": 1.97, + "learning_rate": 0.00039215007286146173, + "loss": 4.2162, + "step": 271800 + }, + { + "epoch": 1.97, + "learning_rate": 0.0003921471729027862, + "loss": 4.2079, + "step": 271900 + }, + { + "epoch": 1.97, + "learning_rate": 0.0003921442729441106, + "loss": 4.2162, + "step": 272000 + }, + { + "epoch": 1.97, + "learning_rate": 0.000392141372985435, + "loss": 4.2281, + "step": 272100 + }, + { + "epoch": 1.97, + "learning_rate": 0.0003921385020263461, + "loss": 4.2151, + "step": 272200 + }, + { + "epoch": 1.97, + "learning_rate": 0.00039213560206767057, + "loss": 4.2153, + "step": 272300 + }, + { + "epoch": 1.97, + "learning_rate": 0.00039213270210899497, + "loss": 4.2061, + "step": 272400 + }, + { + "epoch": 1.98, + "learning_rate": 0.0003921298021503194, + "loss": 4.2186, + "step": 272500 + }, + { + "epoch": 1.98, + "learning_rate": 0.0003921269021916438, + "loss": 4.2271, + "step": 272600 + }, + { + "epoch": 1.98, + "learning_rate": 0.0003921240022329682, + "loss": 4.2153, + "step": 272700 + }, + { + "epoch": 1.98, + "learning_rate": 0.0003921211022742926, + "loss": 4.2191, + "step": 272800 + }, + { + "epoch": 1.98, + "learning_rate": 0.000392118202315617, + "loss": 4.2198, + "step": 272900 + }, + { + "epoch": 1.98, + "learning_rate": 0.00039211530235694147, + "loss": 4.2024, + "step": 273000 + }, + { + "epoch": 1.98, + "learning_rate": 0.0003921124023982659, + "loss": 4.2215, + "step": 273100 + }, + { + "epoch": 1.98, + "learning_rate": 0.0003921095024395903, + "loss": 4.2231, + "step": 273200 + }, + { + "epoch": 1.98, + "learning_rate": 0.0003921066024809147, + "loss": 4.2272, + "step": 273300 + }, + { + "epoch": 1.98, + "learning_rate": 0.0003921037025222391, + "loss": 4.22, + "step": 273400 + }, + { + "epoch": 1.98, + "learning_rate": 0.00039210080256356345, + "loss": 4.2274, + "step": 273500 + }, + { + "epoch": 1.98, + "learning_rate": 0.00039209790260488785, + "loss": 4.2421, + "step": 273600 + }, + { + "epoch": 1.98, + "learning_rate": 0.0003920950026462123, + "loss": 4.2291, + "step": 273700 + }, + { + "epoch": 1.98, + "learning_rate": 0.0003920921026875367, + "loss": 4.1995, + "step": 273800 + }, + { + "epoch": 1.99, + "learning_rate": 0.00039208920272886113, + "loss": 4.2161, + "step": 273900 + }, + { + "epoch": 1.99, + "learning_rate": 0.00039208630277018554, + "loss": 4.2099, + "step": 274000 + }, + { + "epoch": 1.99, + "learning_rate": 0.00039208340281150994, + "loss": 4.228, + "step": 274100 + }, + { + "epoch": 1.99, + "learning_rate": 0.00039208050285283435, + "loss": 4.2214, + "step": 274200 + }, + { + "epoch": 1.99, + "learning_rate": 0.00039207760289415876, + "loss": 4.2242, + "step": 274300 + }, + { + "epoch": 1.99, + "learning_rate": 0.0003920747029354832, + "loss": 4.2018, + "step": 274400 + }, + { + "epoch": 1.99, + "learning_rate": 0.0003920718029768076, + "loss": 4.2253, + "step": 274500 + }, + { + "epoch": 1.99, + "learning_rate": 0.00039206890301813203, + "loss": 4.2283, + "step": 274600 + }, + { + "epoch": 1.99, + "learning_rate": 0.00039206600305945644, + "loss": 4.233, + "step": 274700 + }, + { + "epoch": 1.99, + "learning_rate": 0.00039206310310078085, + "loss": 4.2274, + "step": 274800 + }, + { + "epoch": 1.99, + "learning_rate": 0.00039206020314210525, + "loss": 4.2205, + "step": 274900 + }, + { + "epoch": 1.99, + "learning_rate": 0.00039205730318342966, + "loss": 4.2085, + "step": 275000 + }, + { + "epoch": 1.99, + "learning_rate": 0.00039205440322475407, + "loss": 4.2232, + "step": 275100 + }, + { + "epoch": 2.0, + "learning_rate": 0.0003920515032660785, + "loss": 4.2215, + "step": 275200 + }, + { + "epoch": 2.0, + "learning_rate": 0.0003920486033074029, + "loss": 4.2211, + "step": 275300 + }, + { + "epoch": 2.0, + "learning_rate": 0.0003920457033487273, + "loss": 4.2232, + "step": 275400 + }, + { + "epoch": 2.0, + "learning_rate": 0.0003920428033900517, + "loss": 4.2255, + "step": 275500 + }, + { + "epoch": 2.0, + "learning_rate": 0.0003920399034313761, + "loss": 4.2182, + "step": 275600 + }, + { + "epoch": 2.0, + "learning_rate": 0.0003920370034727005, + "loss": 4.2203, + "step": 275700 + }, + { + "epoch": 2.0, + "learning_rate": 0.00039203410351402497, + "loss": 4.2106, + "step": 275800 + }, + { + "epoch": 2.0, + "learning_rate": 0.0003920312035553494, + "loss": 4.2315, + "step": 275900 + }, + { + "epoch": 2.0, + "learning_rate": 0.0003920283035966738, + "loss": 4.2141, + "step": 276000 + }, + { + "epoch": 2.0, + "learning_rate": 0.0003920254036379982, + "loss": 4.2144, + "step": 276100 + }, + { + "epoch": 2.0, + "learning_rate": 0.0003920225036793226, + "loss": 4.2067, + "step": 276200 + }, + { + "epoch": 2.0, + "learning_rate": 0.000392019603720647, + "loss": 4.2042, + "step": 276300 + }, + { + "epoch": 2.0, + "learning_rate": 0.0003920167037619714, + "loss": 4.1975, + "step": 276400 + }, + { + "epoch": 2.0, + "learning_rate": 0.00039201383280288257, + "loss": 4.1942, + "step": 276500 + }, + { + "epoch": 2.01, + "learning_rate": 0.00039201093284420697, + "loss": 4.2142, + "step": 276600 + }, + { + "epoch": 2.01, + "learning_rate": 0.0003920080328855314, + "loss": 4.2113, + "step": 276700 + }, + { + "epoch": 2.01, + "learning_rate": 0.0003920051329268558, + "loss": 4.2034, + "step": 276800 + }, + { + "epoch": 2.01, + "learning_rate": 0.000392002261967767, + "loss": 4.2075, + "step": 276900 + }, + { + "epoch": 2.01, + "learning_rate": 0.00039199936200909135, + "loss": 4.2128, + "step": 277000 + }, + { + "epoch": 2.01, + "learning_rate": 0.00039199646205041576, + "loss": 4.2109, + "step": 277100 + }, + { + "epoch": 2.01, + "learning_rate": 0.0003919935620917402, + "loss": 4.201, + "step": 277200 + }, + { + "epoch": 2.01, + "learning_rate": 0.0003919906621330646, + "loss": 4.2107, + "step": 277300 + }, + { + "epoch": 2.01, + "learning_rate": 0.00039198776217438903, + "loss": 4.2123, + "step": 277400 + }, + { + "epoch": 2.01, + "learning_rate": 0.00039198486221571344, + "loss": 4.2211, + "step": 277500 + }, + { + "epoch": 2.01, + "learning_rate": 0.00039198196225703784, + "loss": 4.2054, + "step": 277600 + }, + { + "epoch": 2.01, + "learning_rate": 0.00039197906229836225, + "loss": 4.2116, + "step": 277700 + }, + { + "epoch": 2.01, + "learning_rate": 0.00039197616233968666, + "loss": 4.2041, + "step": 277800 + }, + { + "epoch": 2.01, + "learning_rate": 0.0003919732623810111, + "loss": 4.2127, + "step": 277900 + }, + { + "epoch": 2.02, + "learning_rate": 0.0003919703624223355, + "loss": 4.2014, + "step": 278000 + }, + { + "epoch": 2.02, + "learning_rate": 0.00039196746246365993, + "loss": 4.1906, + "step": 278100 + }, + { + "epoch": 2.02, + "learning_rate": 0.00039196456250498434, + "loss": 4.195, + "step": 278200 + }, + { + "epoch": 2.02, + "learning_rate": 0.00039196166254630875, + "loss": 4.2181, + "step": 278300 + }, + { + "epoch": 2.02, + "learning_rate": 0.00039195876258763315, + "loss": 4.2025, + "step": 278400 + }, + { + "epoch": 2.02, + "learning_rate": 0.00039195586262895756, + "loss": 4.2052, + "step": 278500 + }, + { + "epoch": 2.02, + "learning_rate": 0.00039195296267028197, + "loss": 4.1958, + "step": 278600 + }, + { + "epoch": 2.02, + "learning_rate": 0.0003919500627116064, + "loss": 4.204, + "step": 278700 + }, + { + "epoch": 2.02, + "learning_rate": 0.0003919471627529308, + "loss": 4.2088, + "step": 278800 + }, + { + "epoch": 2.02, + "learning_rate": 0.0003919442627942552, + "loss": 4.2088, + "step": 278900 + }, + { + "epoch": 2.02, + "learning_rate": 0.0003919413628355796, + "loss": 4.1875, + "step": 279000 + }, + { + "epoch": 2.02, + "learning_rate": 0.000391938462876904, + "loss": 4.2045, + "step": 279100 + }, + { + "epoch": 2.02, + "learning_rate": 0.0003919355629182284, + "loss": 4.2043, + "step": 279200 + }, + { + "epoch": 2.02, + "learning_rate": 0.00039193266295955287, + "loss": 4.2066, + "step": 279300 + }, + { + "epoch": 2.03, + "learning_rate": 0.0003919297630008773, + "loss": 4.2061, + "step": 279400 + }, + { + "epoch": 2.03, + "learning_rate": 0.0003919268630422017, + "loss": 4.2047, + "step": 279500 + }, + { + "epoch": 2.03, + "learning_rate": 0.0003919239630835261, + "loss": 4.2167, + "step": 279600 + }, + { + "epoch": 2.03, + "learning_rate": 0.0003919210631248505, + "loss": 4.2112, + "step": 279700 + }, + { + "epoch": 2.03, + "learning_rate": 0.0003919181631661749, + "loss": 4.2074, + "step": 279800 + }, + { + "epoch": 2.03, + "learning_rate": 0.0003919152632074993, + "loss": 4.1958, + "step": 279900 + }, + { + "epoch": 2.03, + "learning_rate": 0.0003919123632488237, + "loss": 4.2226, + "step": 280000 + }, + { + "epoch": 2.03, + "learning_rate": 0.0003919094632901481, + "loss": 4.2057, + "step": 280100 + }, + { + "epoch": 2.03, + "learning_rate": 0.0003919065923310593, + "loss": 4.2067, + "step": 280200 + }, + { + "epoch": 2.03, + "learning_rate": 0.0003919036923723837, + "loss": 4.2104, + "step": 280300 + }, + { + "epoch": 2.03, + "learning_rate": 0.00039190079241370815, + "loss": 4.2195, + "step": 280400 + }, + { + "epoch": 2.03, + "learning_rate": 0.00039189789245503256, + "loss": 4.2094, + "step": 280500 + }, + { + "epoch": 2.03, + "learning_rate": 0.00039189499249635696, + "loss": 4.2143, + "step": 280600 + }, + { + "epoch": 2.03, + "learning_rate": 0.00039189209253768137, + "loss": 4.2286, + "step": 280700 + }, + { + "epoch": 2.04, + "learning_rate": 0.0003918891925790058, + "loss": 4.2196, + "step": 280800 + }, + { + "epoch": 2.04, + "learning_rate": 0.0003918862926203302, + "loss": 4.2148, + "step": 280900 + }, + { + "epoch": 2.04, + "learning_rate": 0.0003918833926616546, + "loss": 4.2066, + "step": 281000 + }, + { + "epoch": 2.04, + "learning_rate": 0.000391880492702979, + "loss": 4.2122, + "step": 281100 + }, + { + "epoch": 2.04, + "learning_rate": 0.0003918775927443034, + "loss": 4.2182, + "step": 281200 + }, + { + "epoch": 2.04, + "learning_rate": 0.0003918746927856278, + "loss": 4.2209, + "step": 281300 + }, + { + "epoch": 2.04, + "learning_rate": 0.0003918717928269522, + "loss": 4.2138, + "step": 281400 + }, + { + "epoch": 2.04, + "learning_rate": 0.0003918688928682766, + "loss": 4.2253, + "step": 281500 + }, + { + "epoch": 2.04, + "learning_rate": 0.00039186599290960103, + "loss": 4.2116, + "step": 281600 + }, + { + "epoch": 2.04, + "learning_rate": 0.00039186309295092544, + "loss": 4.2121, + "step": 281700 + }, + { + "epoch": 2.04, + "learning_rate": 0.0003918601929922499, + "loss": 4.2125, + "step": 281800 + }, + { + "epoch": 2.04, + "learning_rate": 0.0003918572930335743, + "loss": 4.2132, + "step": 281900 + }, + { + "epoch": 2.04, + "learning_rate": 0.0003918543930748987, + "loss": 4.2163, + "step": 282000 + }, + { + "epoch": 2.05, + "learning_rate": 0.0003918514931162231, + "loss": 4.236, + "step": 282100 + }, + { + "epoch": 2.05, + "learning_rate": 0.0003918485931575475, + "loss": 4.2116, + "step": 282200 + }, + { + "epoch": 2.05, + "learning_rate": 0.00039184569319887193, + "loss": 4.2176, + "step": 282300 + }, + { + "epoch": 2.05, + "learning_rate": 0.00039184279324019634, + "loss": 4.2081, + "step": 282400 + }, + { + "epoch": 2.05, + "learning_rate": 0.0003918398932815208, + "loss": 4.2181, + "step": 282500 + }, + { + "epoch": 2.05, + "learning_rate": 0.00039183699332284515, + "loss": 4.2031, + "step": 282600 + }, + { + "epoch": 2.05, + "learning_rate": 0.00039183409336416956, + "loss": 4.2157, + "step": 282700 + }, + { + "epoch": 2.05, + "learning_rate": 0.00039183119340549397, + "loss": 4.2025, + "step": 282800 + }, + { + "epoch": 2.05, + "learning_rate": 0.0003918282934468184, + "loss": 4.2214, + "step": 282900 + }, + { + "epoch": 2.05, + "learning_rate": 0.0003918253934881428, + "loss": 4.2118, + "step": 283000 + }, + { + "epoch": 2.05, + "learning_rate": 0.0003918224935294672, + "loss": 4.2191, + "step": 283100 + }, + { + "epoch": 2.05, + "learning_rate": 0.00039181959357079165, + "loss": 4.2182, + "step": 283200 + }, + { + "epoch": 2.05, + "learning_rate": 0.00039181669361211606, + "loss": 4.208, + "step": 283300 + }, + { + "epoch": 2.05, + "learning_rate": 0.00039181379365344046, + "loss": 4.2125, + "step": 283400 + }, + { + "epoch": 2.06, + "learning_rate": 0.00039181089369476487, + "loss": 4.2205, + "step": 283500 + }, + { + "epoch": 2.06, + "learning_rate": 0.0003918079937360893, + "loss": 4.2023, + "step": 283600 + }, + { + "epoch": 2.06, + "learning_rate": 0.0003918050937774137, + "loss": 4.2153, + "step": 283700 + }, + { + "epoch": 2.06, + "learning_rate": 0.0003918021938187381, + "loss": 4.2064, + "step": 283800 + }, + { + "epoch": 2.06, + "learning_rate": 0.00039179929386006255, + "loss": 4.2145, + "step": 283900 + }, + { + "epoch": 2.06, + "learning_rate": 0.00039179639390138696, + "loss": 4.2062, + "step": 284000 + }, + { + "epoch": 2.06, + "learning_rate": 0.00039179349394271137, + "loss": 4.2143, + "step": 284100 + }, + { + "epoch": 2.06, + "learning_rate": 0.0003917906229836225, + "loss": 4.2092, + "step": 284200 + }, + { + "epoch": 2.06, + "learning_rate": 0.00039178772302494693, + "loss": 4.207, + "step": 284300 + }, + { + "epoch": 2.06, + "learning_rate": 0.00039178482306627134, + "loss": 4.2156, + "step": 284400 + }, + { + "epoch": 2.06, + "learning_rate": 0.00039178192310759574, + "loss": 4.1908, + "step": 284500 + }, + { + "epoch": 2.06, + "learning_rate": 0.00039177902314892015, + "loss": 4.2183, + "step": 284600 + }, + { + "epoch": 2.06, + "learning_rate": 0.00039177612319024456, + "loss": 4.2154, + "step": 284700 + }, + { + "epoch": 2.06, + "learning_rate": 0.00039177322323156896, + "loss": 4.2106, + "step": 284800 + }, + { + "epoch": 2.07, + "learning_rate": 0.00039177032327289337, + "loss": 4.2177, + "step": 284900 + }, + { + "epoch": 2.07, + "learning_rate": 0.00039176742331421783, + "loss": 4.2029, + "step": 285000 + }, + { + "epoch": 2.07, + "learning_rate": 0.00039176452335554224, + "loss": 4.2224, + "step": 285100 + }, + { + "epoch": 2.07, + "learning_rate": 0.0003917616233968666, + "loss": 4.2057, + "step": 285200 + }, + { + "epoch": 2.07, + "learning_rate": 0.000391758723438191, + "loss": 4.2025, + "step": 285300 + }, + { + "epoch": 2.07, + "learning_rate": 0.0003917558234795154, + "loss": 4.2207, + "step": 285400 + }, + { + "epoch": 2.07, + "learning_rate": 0.0003917529235208398, + "loss": 4.2098, + "step": 285500 + }, + { + "epoch": 2.07, + "learning_rate": 0.0003917500235621642, + "loss": 4.2034, + "step": 285600 + }, + { + "epoch": 2.07, + "learning_rate": 0.0003917471236034887, + "loss": 4.223, + "step": 285700 + }, + { + "epoch": 2.07, + "learning_rate": 0.0003917442236448131, + "loss": 4.2229, + "step": 285800 + }, + { + "epoch": 2.07, + "learning_rate": 0.0003917413236861375, + "loss": 4.2172, + "step": 285900 + }, + { + "epoch": 2.07, + "learning_rate": 0.0003917384237274619, + "loss": 4.2123, + "step": 286000 + }, + { + "epoch": 2.07, + "learning_rate": 0.0003917355237687863, + "loss": 4.2229, + "step": 286100 + }, + { + "epoch": 2.07, + "learning_rate": 0.0003917326238101107, + "loss": 4.2138, + "step": 286200 + }, + { + "epoch": 2.08, + "learning_rate": 0.00039172975285102187, + "loss": 4.2063, + "step": 286300 + }, + { + "epoch": 2.08, + "learning_rate": 0.0003917268528923463, + "loss": 4.2174, + "step": 286400 + }, + { + "epoch": 2.08, + "learning_rate": 0.0003917239529336707, + "loss": 4.2105, + "step": 286500 + }, + { + "epoch": 2.08, + "learning_rate": 0.0003917210529749951, + "loss": 4.213, + "step": 286600 + }, + { + "epoch": 2.08, + "learning_rate": 0.00039171815301631955, + "loss": 4.2194, + "step": 286700 + }, + { + "epoch": 2.08, + "learning_rate": 0.00039171525305764396, + "loss": 4.218, + "step": 286800 + }, + { + "epoch": 2.08, + "learning_rate": 0.00039171235309896836, + "loss": 4.2288, + "step": 286900 + }, + { + "epoch": 2.08, + "learning_rate": 0.00039170945314029277, + "loss": 4.2122, + "step": 287000 + }, + { + "epoch": 2.08, + "learning_rate": 0.0003917065531816172, + "loss": 4.2163, + "step": 287100 + }, + { + "epoch": 2.08, + "learning_rate": 0.0003917036532229416, + "loss": 4.2165, + "step": 287200 + }, + { + "epoch": 2.08, + "learning_rate": 0.000391700753264266, + "loss": 4.1967, + "step": 287300 + }, + { + "epoch": 2.08, + "learning_rate": 0.00039169785330559045, + "loss": 4.2219, + "step": 287400 + }, + { + "epoch": 2.08, + "learning_rate": 0.00039169495334691486, + "loss": 4.2042, + "step": 287500 + }, + { + "epoch": 2.08, + "learning_rate": 0.00039169205338823927, + "loss": 4.2058, + "step": 287600 + }, + { + "epoch": 2.09, + "learning_rate": 0.0003916891534295637, + "loss": 4.1974, + "step": 287700 + }, + { + "epoch": 2.09, + "learning_rate": 0.000391686253470888, + "loss": 4.2147, + "step": 287800 + }, + { + "epoch": 2.09, + "learning_rate": 0.00039168335351221243, + "loss": 4.2123, + "step": 287900 + }, + { + "epoch": 2.09, + "learning_rate": 0.00039168045355353684, + "loss": 4.2215, + "step": 288000 + }, + { + "epoch": 2.09, + "learning_rate": 0.0003916775535948613, + "loss": 4.2083, + "step": 288100 + }, + { + "epoch": 2.09, + "learning_rate": 0.0003916746536361857, + "loss": 4.2136, + "step": 288200 + }, + { + "epoch": 2.09, + "learning_rate": 0.0003916717536775101, + "loss": 4.2113, + "step": 288300 + }, + { + "epoch": 2.09, + "learning_rate": 0.0003916688537188345, + "loss": 4.2087, + "step": 288400 + }, + { + "epoch": 2.09, + "learning_rate": 0.00039166598275974573, + "loss": 4.2118, + "step": 288500 + }, + { + "epoch": 2.09, + "learning_rate": 0.00039166308280107014, + "loss": 4.2195, + "step": 288600 + }, + { + "epoch": 2.09, + "learning_rate": 0.0003916601828423945, + "loss": 4.2155, + "step": 288700 + }, + { + "epoch": 2.09, + "learning_rate": 0.0003916572828837189, + "loss": 4.2165, + "step": 288800 + }, + { + "epoch": 2.09, + "learning_rate": 0.0003916543829250433, + "loss": 4.2192, + "step": 288900 + }, + { + "epoch": 2.1, + "learning_rate": 0.0003916514829663677, + "loss": 4.2171, + "step": 289000 + }, + { + "epoch": 2.1, + "learning_rate": 0.0003916485830076921, + "loss": 4.2086, + "step": 289100 + }, + { + "epoch": 2.1, + "learning_rate": 0.0003916456830490166, + "loss": 4.1975, + "step": 289200 + }, + { + "epoch": 2.1, + "learning_rate": 0.000391642783090341, + "loss": 4.2182, + "step": 289300 + }, + { + "epoch": 2.1, + "learning_rate": 0.0003916398831316654, + "loss": 4.2132, + "step": 289400 + }, + { + "epoch": 2.1, + "learning_rate": 0.0003916369831729898, + "loss": 4.2096, + "step": 289500 + }, + { + "epoch": 2.1, + "learning_rate": 0.0003916340832143142, + "loss": 4.1979, + "step": 289600 + }, + { + "epoch": 2.1, + "learning_rate": 0.0003916311832556386, + "loss": 4.2094, + "step": 289700 + }, + { + "epoch": 2.1, + "learning_rate": 0.000391628283296963, + "loss": 4.2136, + "step": 289800 + }, + { + "epoch": 2.1, + "learning_rate": 0.0003916253833382875, + "loss": 4.2068, + "step": 289900 + }, + { + "epoch": 2.1, + "learning_rate": 0.0003916224833796119, + "loss": 4.2044, + "step": 290000 + }, + { + "epoch": 2.1, + "learning_rate": 0.0003916195834209363, + "loss": 4.2138, + "step": 290100 + }, + { + "epoch": 2.1, + "learning_rate": 0.0003916166834622607, + "loss": 4.2077, + "step": 290200 + }, + { + "epoch": 2.1, + "learning_rate": 0.0003916137835035851, + "loss": 4.2194, + "step": 290300 + }, + { + "epoch": 2.11, + "learning_rate": 0.0003916108835449095, + "loss": 4.2096, + "step": 290400 + }, + { + "epoch": 2.11, + "learning_rate": 0.00039160798358623387, + "loss": 4.2156, + "step": 290500 + }, + { + "epoch": 2.11, + "learning_rate": 0.0003916051126271451, + "loss": 4.2132, + "step": 290600 + }, + { + "epoch": 2.11, + "learning_rate": 0.0003916022126684695, + "loss": 4.2108, + "step": 290700 + }, + { + "epoch": 2.11, + "learning_rate": 0.0003915993127097939, + "loss": 4.211, + "step": 290800 + }, + { + "epoch": 2.11, + "learning_rate": 0.00039159641275111835, + "loss": 4.2155, + "step": 290900 + }, + { + "epoch": 2.11, + "learning_rate": 0.00039159351279244276, + "loss": 4.2061, + "step": 291000 + }, + { + "epoch": 2.11, + "learning_rate": 0.00039159061283376717, + "loss": 4.2115, + "step": 291100 + }, + { + "epoch": 2.11, + "learning_rate": 0.0003915877128750916, + "loss": 4.219, + "step": 291200 + }, + { + "epoch": 2.11, + "learning_rate": 0.000391584812916416, + "loss": 4.2155, + "step": 291300 + }, + { + "epoch": 2.11, + "learning_rate": 0.00039158191295774033, + "loss": 4.2115, + "step": 291400 + }, + { + "epoch": 2.11, + "learning_rate": 0.00039157901299906474, + "loss": 4.2122, + "step": 291500 + }, + { + "epoch": 2.11, + "learning_rate": 0.0003915761130403892, + "loss": 4.2201, + "step": 291600 + }, + { + "epoch": 2.11, + "learning_rate": 0.0003915732130817136, + "loss": 4.21, + "step": 291700 + }, + { + "epoch": 2.12, + "learning_rate": 0.000391570313123038, + "loss": 4.2204, + "step": 291800 + }, + { + "epoch": 2.12, + "learning_rate": 0.0003915674131643624, + "loss": 4.2111, + "step": 291900 + }, + { + "epoch": 2.12, + "learning_rate": 0.00039156451320568683, + "loss": 4.2064, + "step": 292000 + }, + { + "epoch": 2.12, + "learning_rate": 0.00039156161324701124, + "loss": 4.2227, + "step": 292100 + }, + { + "epoch": 2.12, + "learning_rate": 0.00039155871328833564, + "loss": 4.2178, + "step": 292200 + }, + { + "epoch": 2.12, + "learning_rate": 0.0003915558133296601, + "loss": 4.2136, + "step": 292300 + }, + { + "epoch": 2.12, + "learning_rate": 0.0003915529133709845, + "loss": 4.224, + "step": 292400 + }, + { + "epoch": 2.12, + "learning_rate": 0.0003915500134123089, + "loss": 4.2163, + "step": 292500 + }, + { + "epoch": 2.12, + "learning_rate": 0.0003915471134536333, + "loss": 4.2174, + "step": 292600 + }, + { + "epoch": 2.12, + "learning_rate": 0.00039154421349495773, + "loss": 4.2162, + "step": 292700 + }, + { + "epoch": 2.12, + "learning_rate": 0.0003915413425358689, + "loss": 4.2061, + "step": 292800 + }, + { + "epoch": 2.12, + "learning_rate": 0.0003915384425771933, + "loss": 4.2318, + "step": 292900 + }, + { + "epoch": 2.12, + "learning_rate": 0.0003915355426185177, + "loss": 4.2119, + "step": 293000 + }, + { + "epoch": 2.12, + "learning_rate": 0.0003915326426598421, + "loss": 4.2155, + "step": 293100 + }, + { + "epoch": 2.13, + "learning_rate": 0.0003915297427011665, + "loss": 4.2035, + "step": 293200 + }, + { + "epoch": 2.13, + "learning_rate": 0.0003915268427424909, + "loss": 4.2092, + "step": 293300 + }, + { + "epoch": 2.13, + "learning_rate": 0.0003915239427838154, + "loss": 4.2091, + "step": 293400 + }, + { + "epoch": 2.13, + "learning_rate": 0.0003915210428251398, + "loss": 4.2041, + "step": 293500 + }, + { + "epoch": 2.13, + "learning_rate": 0.0003915181428664642, + "loss": 4.2137, + "step": 293600 + }, + { + "epoch": 2.13, + "learning_rate": 0.00039151527190737535, + "loss": 4.2248, + "step": 293700 + }, + { + "epoch": 2.13, + "learning_rate": 0.00039151237194869976, + "loss": 4.2038, + "step": 293800 + }, + { + "epoch": 2.13, + "learning_rate": 0.00039150947199002417, + "loss": 4.2137, + "step": 293900 + }, + { + "epoch": 2.13, + "learning_rate": 0.0003915065720313486, + "loss": 4.2205, + "step": 294000 + }, + { + "epoch": 2.13, + "learning_rate": 0.000391503672072673, + "loss": 4.2066, + "step": 294100 + }, + { + "epoch": 2.13, + "learning_rate": 0.0003915007721139974, + "loss": 4.2235, + "step": 294200 + }, + { + "epoch": 2.13, + "learning_rate": 0.0003914978721553218, + "loss": 4.2146, + "step": 294300 + }, + { + "epoch": 2.13, + "learning_rate": 0.00039149497219664626, + "loss": 4.2142, + "step": 294400 + }, + { + "epoch": 2.13, + "learning_rate": 0.00039149207223797066, + "loss": 4.2103, + "step": 294500 + }, + { + "epoch": 8.54, + "learning_rate": 0.0003928901392440118, + "loss": 4.1411, + "step": 294600 + }, + { + "epoch": 8.55, + "learning_rate": 0.00039288532216332194, + "loss": 4.0957, + "step": 294700 + }, + { + "epoch": 8.55, + "learning_rate": 0.0003928805034808982, + "loss": 4.0677, + "step": 294800 + }, + { + "epoch": 8.55, + "learning_rate": 0.0003928756831967806, + "loss": 4.0663, + "step": 294900 + }, + { + "epoch": 8.55, + "learning_rate": 0.00039287086131100903, + "loss": 4.0514, + "step": 295000 + }, + { + "epoch": 21.39, + "learning_rate": 0.0003567606794640303, + "loss": 4.0252, + "step": 295100 + }, + { + "epoch": 21.4, + "learning_rate": 0.00035673236782667294, + "loss": 4.0012, + "step": 295200 + }, + { + "epoch": 21.41, + "learning_rate": 0.00035670404804775035, + "loss": 3.9916, + "step": 295300 + }, + { + "epoch": 21.42, + "learning_rate": 0.00035667572012873354, + "loss": 3.9865, + "step": 295400 + }, + { + "epoch": 21.42, + "learning_rate": 0.000356647384071094, + "loss": 3.9842, + "step": 295500 + }, + { + "epoch": 21.43, + "learning_rate": 0.00035661903987630383, + "loss": 3.9721, + "step": 295600 + }, + { + "epoch": 21.44, + "learning_rate": 0.0003565906875458352, + "loss": 3.9662, + "step": 295700 + }, + { + "epoch": 21.44, + "learning_rate": 0.00035656232708116103, + "loss": 3.965, + "step": 295800 + }, + { + "epoch": 21.45, + "learning_rate": 0.00035653395848375436, + "loss": 3.9588, + "step": 295900 + }, + { + "epoch": 21.46, + "learning_rate": 0.000356505581755089, + "loss": 3.9579, + "step": 296000 + }, + { + "epoch": 21.47, + "learning_rate": 0.00035647719689663887, + "loss": 3.9556, + "step": 296100 + }, + { + "epoch": 21.47, + "learning_rate": 0.0003564488039098785, + "loss": 3.9527, + "step": 296200 + }, + { + "epoch": 21.48, + "learning_rate": 0.0003564204027962828, + "loss": 3.9508, + "step": 296300 + }, + { + "epoch": 21.49, + "learning_rate": 0.00035639199355732697, + "loss": 3.9482, + "step": 296400 + }, + { + "epoch": 21.49, + "learning_rate": 0.0003563635761944869, + "loss": 3.9479, + "step": 296500 + }, + { + "epoch": 21.5, + "learning_rate": 0.0003563351507092386, + "loss": 3.9478, + "step": 296600 + }, + { + "epoch": 21.51, + "learning_rate": 0.00035630671710305876, + "loss": 3.9425, + "step": 296700 + }, + { + "epoch": 21.52, + "learning_rate": 0.00035627827537742433, + "loss": 3.9425, + "step": 296800 + }, + { + "epoch": 21.52, + "learning_rate": 0.00035624982553381286, + "loss": 3.9379, + "step": 296900 + }, + { + "epoch": 21.53, + "learning_rate": 0.00035622136757370193, + "loss": 3.9366, + "step": 297000 + }, + { + "epoch": 21.54, + "learning_rate": 0.00035619290149857004, + "loss": 3.9392, + "step": 297100 + }, + { + "epoch": 21.55, + "learning_rate": 0.0003561644273098958, + "loss": 3.9372, + "step": 297200 + }, + { + "epoch": 21.55, + "learning_rate": 0.0003561359450091583, + "loss": 3.9414, + "step": 297300 + }, + { + "epoch": 21.56, + "learning_rate": 0.00035610745459783717, + "loss": 3.9406, + "step": 297400 + }, + { + "epoch": 21.57, + "learning_rate": 0.00035607895607741224, + "loss": 3.939, + "step": 297500 + }, + { + "epoch": 5.57, + "learning_rate": 0.0003969648894231196, + "loss": 3.9775, + "step": 297600 + }, + { + "epoch": 5.57, + "learning_rate": 0.0003969628672822069, + "loss": 4.0215, + "step": 297700 + }, + { + "epoch": 5.58, + "learning_rate": 0.0003969608240372223, + "loss": 4.0282, + "step": 297800 + }, + { + "epoch": 5.58, + "learning_rate": 0.00039695878011042964, + "loss": 4.0355, + "step": 297900 + }, + { + "epoch": 5.58, + "learning_rate": 0.00039695673550183604, + "loss": 4.0281, + "step": 298000 + }, + { + "epoch": 5.58, + "learning_rate": 0.0003969546902114484, + "loss": 4.0262, + "step": 298100 + }, + { + "epoch": 5.58, + "learning_rate": 0.00039695264423927397, + "loss": 4.0216, + "step": 298200 + }, + { + "epoch": 5.59, + "learning_rate": 0.00039695059758531974, + "loss": 4.0174, + "step": 298300 + }, + { + "epoch": 5.59, + "learning_rate": 0.00039694855024959285, + "loss": 4.0109, + "step": 298400 + }, + { + "epoch": 5.59, + "learning_rate": 0.0003969465022321003, + "loss": 4.0171, + "step": 298500 + }, + { + "epoch": 5.59, + "learning_rate": 0.0003969444535328493, + "loss": 4.0163, + "step": 298600 + }, + { + "epoch": 5.59, + "learning_rate": 0.00039694240415184687, + "loss": 4.0065, + "step": 298700 + }, + { + "epoch": 5.59, + "learning_rate": 0.00039694035408910014, + "loss": 4.0091, + "step": 298800 + }, + { + "epoch": 5.6, + "learning_rate": 0.00039693830334461614, + "loss": 3.9978, + "step": 298900 + }, + { + "epoch": 5.6, + "learning_rate": 0.00039693625191840196, + "loss": 4.0069, + "step": 299000 + }, + { + "epoch": 5.6, + "learning_rate": 0.0003969341998104648, + "loss": 4.0029, + "step": 299100 + }, + { + "epoch": 5.6, + "learning_rate": 0.00039693214702081177, + "loss": 3.9981, + "step": 299200 + }, + { + "epoch": 5.6, + "learning_rate": 0.00039693009354944985, + "loss": 4.0088, + "step": 299300 + }, + { + "epoch": 5.61, + "learning_rate": 0.0003969280393963863, + "loss": 4.0046, + "step": 299400 + }, + { + "epoch": 5.61, + "learning_rate": 0.00039692598456162804, + "loss": 4.0022, + "step": 299500 + }, + { + "epoch": 5.61, + "learning_rate": 0.00039692392904518235, + "loss": 4.0023, + "step": 299600 + }, + { + "epoch": 5.61, + "learning_rate": 0.0003969218728470563, + "loss": 4.0058, + "step": 299700 + }, + { + "epoch": 5.61, + "learning_rate": 0.00039691981596725695, + "loss": 4.0067, + "step": 299800 + }, + { + "epoch": 5.62, + "learning_rate": 0.0003969177789847804, + "loss": 3.9932, + "step": 299900 + }, + { + "epoch": 5.62, + "learning_rate": 0.0003969157207484725, + "loss": 3.9903, + "step": 300000 + }, + { + "epoch": 5.62, + "learning_rate": 0.00039691366183051263, + "loss": 3.9929, + "step": 300100 + }, + { + "epoch": 5.62, + "learning_rate": 0.00039691160223090795, + "loss": 3.9917, + "step": 300200 + }, + { + "epoch": 5.62, + "learning_rate": 0.00039690954194966557, + "loss": 3.9997, + "step": 300300 + }, + { + "epoch": 5.62, + "learning_rate": 0.0003969074809867926, + "loss": 3.992, + "step": 300400 + }, + { + "epoch": 5.63, + "learning_rate": 0.00039690541934229615, + "loss": 3.9897, + "step": 300500 + }, + { + "epoch": 5.63, + "learning_rate": 0.0003969033570161835, + "loss": 3.9984, + "step": 300600 + }, + { + "epoch": 5.63, + "learning_rate": 0.00039690129400846163, + "loss": 3.9943, + "step": 300700 + }, + { + "epoch": 5.63, + "learning_rate": 0.00039689923031913777, + "loss": 3.9959, + "step": 300800 + }, + { + "epoch": 5.63, + "learning_rate": 0.00039689716594821904, + "loss": 4.0, + "step": 300900 + }, + { + "epoch": 5.64, + "learning_rate": 0.00039689510089571255, + "loss": 3.9895, + "step": 301000 + }, + { + "epoch": 5.64, + "learning_rate": 0.0003968930351616255, + "loss": 3.9888, + "step": 301100 + }, + { + "epoch": 5.64, + "learning_rate": 0.000396890968745965, + "loss": 3.9896, + "step": 301200 + }, + { + "epoch": 5.64, + "learning_rate": 0.0003968889016487383, + "loss": 3.9938, + "step": 301300 + }, + { + "epoch": 5.64, + "learning_rate": 0.0003968868338699524, + "loss": 3.9886, + "step": 301400 + }, + { + "epoch": 5.65, + "learning_rate": 0.00039688476540961456, + "loss": 3.9892, + "step": 301500 + }, + { + "epoch": 5.65, + "learning_rate": 0.00039688269626773193, + "loss": 3.9876, + "step": 301600 + }, + { + "epoch": 5.65, + "learning_rate": 0.00039688062644431174, + "loss": 3.9915, + "step": 301700 + }, + { + "epoch": 5.65, + "learning_rate": 0.00039687855593936093, + "loss": 3.9936, + "step": 301800 + }, + { + "epoch": 5.65, + "learning_rate": 0.00039687648475288687, + "loss": 3.9879, + "step": 301900 + }, + { + "epoch": 5.65, + "learning_rate": 0.0003968744128848967, + "loss": 3.9987, + "step": 302000 + }, + { + "epoch": 5.66, + "learning_rate": 0.0003968723403353976, + "loss": 3.982, + "step": 302100 + }, + { + "epoch": 5.66, + "learning_rate": 0.0003968702671043966, + "loss": 3.9934, + "step": 302200 + }, + { + "epoch": 5.66, + "learning_rate": 0.0003968681931919011, + "loss": 3.9886, + "step": 302300 + }, + { + "epoch": 5.66, + "learning_rate": 0.0003968661185979181, + "loss": 3.985, + "step": 302400 + }, + { + "epoch": 5.66, + "learning_rate": 0.00039686404332245486, + "loss": 3.9879, + "step": 302500 + }, + { + "epoch": 5.67, + "learning_rate": 0.00039686196736551856, + "loss": 3.9972, + "step": 302600 + }, + { + "epoch": 5.67, + "learning_rate": 0.0003968598907271164, + "loss": 3.9931, + "step": 302700 + }, + { + "epoch": 5.67, + "learning_rate": 0.0003968578134072555, + "loss": 3.9896, + "step": 302800 + }, + { + "epoch": 5.67, + "learning_rate": 0.00039685573540594317, + "loss": 3.9886, + "step": 302900 + }, + { + "epoch": 5.67, + "learning_rate": 0.0003968536567231865, + "loss": 3.9854, + "step": 303000 + }, + { + "epoch": 5.68, + "learning_rate": 0.00039685157735899263, + "loss": 3.9959, + "step": 303100 + }, + { + "epoch": 5.68, + "learning_rate": 0.00039684949731336893, + "loss": 3.9813, + "step": 303200 + }, + { + "epoch": 5.68, + "learning_rate": 0.00039684741658632254, + "loss": 3.9894, + "step": 303300 + }, + { + "epoch": 5.68, + "learning_rate": 0.00039684533517786056, + "loss": 3.9883, + "step": 303400 + }, + { + "epoch": 5.68, + "learning_rate": 0.0003968432530879903, + "loss": 3.9907, + "step": 303500 + }, + { + "epoch": 5.68, + "learning_rate": 0.00039684117031671894, + "loss": 3.9927, + "step": 303600 + }, + { + "epoch": 5.69, + "learning_rate": 0.00039683908686405367, + "loss": 3.985, + "step": 303700 + }, + { + "epoch": 5.69, + "learning_rate": 0.00039683700273000175, + "loss": 3.9813, + "step": 303800 + }, + { + "epoch": 5.69, + "learning_rate": 0.00039683493876609756, + "loss": 3.9809, + "step": 303900 + }, + { + "epoch": 5.69, + "learning_rate": 0.0003968328532761076, + "loss": 3.9944, + "step": 304000 + }, + { + "epoch": 5.69, + "learning_rate": 0.00039683076710475253, + "loss": 3.9808, + "step": 304100 + }, + { + "epoch": 5.7, + "learning_rate": 0.0003968286802520396, + "loss": 3.9819, + "step": 304200 + }, + { + "epoch": 5.7, + "learning_rate": 0.000396826592717976, + "loss": 3.9916, + "step": 304300 + }, + { + "epoch": 5.7, + "learning_rate": 0.00039682452538809576, + "loss": 3.9807, + "step": 304400 + }, + { + "epoch": 5.7, + "learning_rate": 0.00039682243649816586, + "loss": 3.9853, + "step": 304500 + }, + { + "epoch": 5.7, + "learning_rate": 0.000396820346926907, + "loss": 3.9882, + "step": 304600 + }, + { + "epoch": 5.71, + "learning_rate": 0.0003968182566743262, + "loss": 3.9923, + "step": 304700 + }, + { + "epoch": 5.71, + "learning_rate": 0.0003968161657404309, + "loss": 3.9872, + "step": 304800 + }, + { + "epoch": 5.71, + "learning_rate": 0.0003968140741252283, + "loss": 3.9955, + "step": 304900 + }, + { + "epoch": 5.71, + "learning_rate": 0.0003968119818287256, + "loss": 3.9846, + "step": 305000 + }, + { + "epoch": 5.71, + "learning_rate": 0.00039680988885093006, + "loss": 3.9832, + "step": 305100 + }, + { + "epoch": 5.71, + "learning_rate": 0.0003968077951918489, + "loss": 3.9816, + "step": 305200 + }, + { + "epoch": 5.72, + "learning_rate": 0.00039680570085148933, + "loss": 3.9895, + "step": 305300 + }, + { + "epoch": 5.72, + "learning_rate": 0.0003968036058298587, + "loss": 3.9753, + "step": 305400 + }, + { + "epoch": 5.72, + "learning_rate": 0.00039680151012696423, + "loss": 3.987, + "step": 305500 + }, + { + "epoch": 5.72, + "learning_rate": 0.00039679941374281316, + "loss": 3.9875, + "step": 305600 + }, + { + "epoch": 5.72, + "learning_rate": 0.0003967973166774127, + "loss": 3.9895, + "step": 305700 + }, + { + "epoch": 5.73, + "learning_rate": 0.0003967952189307702, + "loss": 3.9815, + "step": 305800 + }, + { + "epoch": 5.73, + "learning_rate": 0.0003967931205028928, + "loss": 3.9851, + "step": 305900 + }, + { + "epoch": 5.73, + "learning_rate": 0.0003967910213937879, + "loss": 3.9814, + "step": 306000 + }, + { + "epoch": 5.73, + "learning_rate": 0.00039678892160346273, + "loss": 3.9794, + "step": 306100 + }, + { + "epoch": 5.73, + "learning_rate": 0.0003967868211319245, + "loss": 3.9927, + "step": 306200 + }, + { + "epoch": 5.74, + "learning_rate": 0.00039678471997918056, + "loss": 3.9825, + "step": 306300 + }, + { + "epoch": 5.74, + "learning_rate": 0.0003967826181452381, + "loss": 3.9864, + "step": 306400 + }, + { + "epoch": 5.74, + "learning_rate": 0.00039678051563010443, + "loss": 3.9817, + "step": 306500 + }, + { + "epoch": 5.74, + "learning_rate": 0.00039677841243378686, + "loss": 3.9732, + "step": 306600 + }, + { + "epoch": 5.74, + "learning_rate": 0.00039677630855629266, + "loss": 3.9814, + "step": 306700 + }, + { + "epoch": 5.74, + "learning_rate": 0.00039677420399762903, + "loss": 3.9757, + "step": 306800 + }, + { + "epoch": 5.75, + "learning_rate": 0.0003967720987578034, + "loss": 3.9838, + "step": 306900 + }, + { + "epoch": 5.75, + "learning_rate": 0.00039676999283682295, + "loss": 3.9841, + "step": 307000 + }, + { + "epoch": 5.75, + "learning_rate": 0.00039676788623469497, + "loss": 3.9811, + "step": 307100 + }, + { + "epoch": 5.75, + "learning_rate": 0.00039676577895142684, + "loss": 3.986, + "step": 307200 + }, + { + "epoch": 5.75, + "learning_rate": 0.00039676367098702577, + "loss": 3.9778, + "step": 307300 + }, + { + "epoch": 5.76, + "learning_rate": 0.00039676156234149914, + "loss": 3.9862, + "step": 307400 + }, + { + "epoch": 5.76, + "learning_rate": 0.00039675945301485415, + "loss": 3.984, + "step": 307500 + }, + { + "epoch": 5.76, + "learning_rate": 0.0003967573430070981, + "loss": 3.985, + "step": 307600 + }, + { + "epoch": 5.76, + "learning_rate": 0.00039675523231823837, + "loss": 3.9858, + "step": 307700 + }, + { + "epoch": 5.76, + "learning_rate": 0.0003967531209482823, + "loss": 3.9826, + "step": 307800 + }, + { + "epoch": 5.77, + "learning_rate": 0.00039675100889723704, + "loss": 3.9826, + "step": 307900 + }, + { + "epoch": 5.77, + "learning_rate": 0.00039674889616511005, + "loss": 3.9791, + "step": 308000 + }, + { + "epoch": 5.77, + "learning_rate": 0.00039674678275190855, + "loss": 3.9895, + "step": 308100 + }, + { + "epoch": 5.77, + "learning_rate": 0.0003967446686576399, + "loss": 3.9848, + "step": 308200 + }, + { + "epoch": 5.77, + "learning_rate": 0.0003967425538823115, + "loss": 3.9773, + "step": 308300 + }, + { + "epoch": 5.77, + "learning_rate": 0.0003967404595838655, + "loss": 3.9734, + "step": 308400 + }, + { + "epoch": 5.78, + "learning_rate": 0.0003967383434532498, + "loss": 3.9831, + "step": 308500 + }, + { + "epoch": 5.78, + "learning_rate": 0.0003967362266415961, + "loss": 3.9703, + "step": 308600 + }, + { + "epoch": 5.78, + "learning_rate": 0.00039673410914891174, + "loss": 3.982, + "step": 308700 + }, + { + "epoch": 5.78, + "learning_rate": 0.0003967319909752042, + "loss": 3.9836, + "step": 308800 + }, + { + "epoch": 5.78, + "learning_rate": 0.0003967298721204806, + "loss": 3.9769, + "step": 308900 + }, + { + "epoch": 5.79, + "learning_rate": 0.0003967277525847485, + "loss": 3.9794, + "step": 309000 + }, + { + "epoch": 5.79, + "learning_rate": 0.00039672563236801506, + "loss": 3.9806, + "step": 309100 + }, + { + "epoch": 5.79, + "learning_rate": 0.0003967235114702877, + "loss": 3.9794, + "step": 309200 + }, + { + "epoch": 5.79, + "learning_rate": 0.0003967213898915737, + "loss": 3.9802, + "step": 309300 + }, + { + "epoch": 5.79, + "learning_rate": 0.0003967192676318805, + "loss": 3.9863, + "step": 309400 + }, + { + "epoch": 5.8, + "learning_rate": 0.0003967171446912154, + "loss": 3.9831, + "step": 309500 + }, + { + "epoch": 5.8, + "learning_rate": 0.00039671502106958565, + "loss": 3.9835, + "step": 309600 + }, + { + "epoch": 5.8, + "learning_rate": 0.0003967128967669988, + "loss": 3.9819, + "step": 309700 + }, + { + "epoch": 5.8, + "learning_rate": 0.00039671077178346205, + "loss": 3.9792, + "step": 309800 + }, + { + "epoch": 5.8, + "learning_rate": 0.0003967086461189828, + "loss": 3.9784, + "step": 309900 + }, + { + "epoch": 5.8, + "learning_rate": 0.00039670651977356846, + "loss": 3.9825, + "step": 310000 + }, + { + "epoch": 5.81, + "learning_rate": 0.0003967043927472263, + "loss": 3.9785, + "step": 310100 + }, + { + "epoch": 5.81, + "learning_rate": 0.00039670226503996374, + "loss": 3.9841, + "step": 310200 + }, + { + "epoch": 5.81, + "learning_rate": 0.0003967001366517881, + "loss": 3.9748, + "step": 310300 + }, + { + "epoch": 5.81, + "learning_rate": 0.00039669802887676807, + "loss": 3.9815, + "step": 310400 + }, + { + "epoch": 5.81, + "learning_rate": 0.00039669589913359746, + "loss": 3.9862, + "step": 310500 + }, + { + "epoch": 5.82, + "learning_rate": 0.00039669376870953574, + "loss": 3.9805, + "step": 310600 + }, + { + "epoch": 5.82, + "learning_rate": 0.00039669163760459045, + "loss": 3.9813, + "step": 310700 + }, + { + "epoch": 5.82, + "learning_rate": 0.00039668950581876885, + "loss": 3.9775, + "step": 310800 + }, + { + "epoch": 5.82, + "learning_rate": 0.0003966873946801156, + "loss": 3.9825, + "step": 310900 + }, + { + "epoch": 5.82, + "learning_rate": 0.00039668526153937216, + "loss": 3.9799, + "step": 311000 + }, + { + "epoch": 5.83, + "learning_rate": 0.00039668312771777456, + "loss": 3.9781, + "step": 311100 + }, + { + "epoch": 5.83, + "learning_rate": 0.00039668099321533024, + "loss": 3.9819, + "step": 311200 + }, + { + "epoch": 5.83, + "learning_rate": 0.00039667885803204643, + "loss": 3.98, + "step": 311300 + }, + { + "epoch": 5.83, + "learning_rate": 0.0003966767221679306, + "loss": 3.976, + "step": 311400 + }, + { + "epoch": 5.83, + "learning_rate": 0.00039667458562299017, + "loss": 3.9793, + "step": 311500 + }, + { + "epoch": 5.83, + "learning_rate": 0.0003966724483972325, + "loss": 3.9745, + "step": 311600 + }, + { + "epoch": 5.84, + "learning_rate": 0.00039667031049066503, + "loss": 3.9834, + "step": 311700 + }, + { + "epoch": 5.84, + "learning_rate": 0.00039666817190329507, + "loss": 3.9682, + "step": 311800 + }, + { + "epoch": 5.84, + "learning_rate": 0.0003966660326351301, + "loss": 3.9841, + "step": 311900 + }, + { + "epoch": 5.84, + "learning_rate": 0.0003966638926861775, + "loss": 3.9771, + "step": 312000 + }, + { + "epoch": 5.84, + "learning_rate": 0.00039666175205644466, + "loss": 3.9711, + "step": 312100 + }, + { + "epoch": 5.85, + "learning_rate": 0.00039665961074593906, + "loss": 3.992, + "step": 312200 + }, + { + "epoch": 5.85, + "learning_rate": 0.000396657468754668, + "loss": 3.9794, + "step": 312300 + }, + { + "epoch": 5.85, + "learning_rate": 0.000396655326082639, + "loss": 3.9697, + "step": 312400 + }, + { + "epoch": 5.85, + "learning_rate": 0.0003966531827298595, + "loss": 3.9766, + "step": 312500 + }, + { + "epoch": 5.85, + "learning_rate": 0.00039665103869633675, + "loss": 3.9824, + "step": 312600 + }, + { + "epoch": 5.86, + "learning_rate": 0.0003966488939820783, + "loss": 3.9837, + "step": 312700 + }, + { + "epoch": 5.86, + "learning_rate": 0.0003966467485870916, + "loss": 3.9797, + "step": 312800 + }, + { + "epoch": 5.86, + "learning_rate": 0.000396644602511384, + "loss": 3.975, + "step": 312900 + }, + { + "epoch": 5.86, + "learning_rate": 0.00039664245575496296, + "loss": 3.9725, + "step": 313000 + }, + { + "epoch": 5.86, + "learning_rate": 0.0003966403083178359, + "loss": 3.9709, + "step": 313100 + }, + { + "epoch": 5.86, + "learning_rate": 0.00039663816020001026, + "loss": 3.9748, + "step": 313200 + }, + { + "epoch": 5.87, + "learning_rate": 0.0003966360114014935, + "loss": 3.9732, + "step": 313300 + }, + { + "epoch": 5.87, + "learning_rate": 0.00039663386192229306, + "loss": 3.9755, + "step": 313400 + }, + { + "epoch": 5.87, + "learning_rate": 0.0003966317117624163, + "loss": 3.9727, + "step": 313500 + } + ], + "max_steps": 5340600, + "num_train_epochs": 100, + "total_flos": 2.1723600144992256e+19, + "trial_name": null, + "trial_params": null +}