| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.999549121276575, | |
| "global_step": 204000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2e-05, | |
| "loss": 4.2154, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.9950871056174036e-05, | |
| "loss": 3.5237, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.990174211234807e-05, | |
| "loss": 3.3401, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.9852613168522107e-05, | |
| "loss": 3.2179, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.980348422469614e-05, | |
| "loss": 3.1655, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.9754355280870174e-05, | |
| "loss": 3.1335, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.9705226337044208e-05, | |
| "loss": 3.1039, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9656097393218242e-05, | |
| "loss": 3.0406, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9606968449392276e-05, | |
| "loss": 3.0212, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9557839505566313e-05, | |
| "loss": 3.0054, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9508710561740344e-05, | |
| "loss": 2.9778, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.945958161791438e-05, | |
| "loss": 2.9428, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9410452674088415e-05, | |
| "loss": 2.9546, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.936132373026245e-05, | |
| "loss": 2.9442, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9312194786436483e-05, | |
| "loss": 2.9142, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9263065842610517e-05, | |
| "loss": 2.8991, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.921393689878455e-05, | |
| "loss": 2.8399, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9164807954958588e-05, | |
| "loss": 2.8758, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9115679011132618e-05, | |
| "loss": 2.8876, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9066550067306656e-05, | |
| "loss": 2.8404, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.901742112348069e-05, | |
| "loss": 2.8612, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.8968292179654723e-05, | |
| "loss": 2.8226, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.8919163235828757e-05, | |
| "loss": 2.8144, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.887003429200279e-05, | |
| "loss": 2.8132, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.8820905348176825e-05, | |
| "loss": 2.8182, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.8771776404350862e-05, | |
| "loss": 2.7883, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.8722647460524893e-05, | |
| "loss": 2.7961, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.867351851669893e-05, | |
| "loss": 2.7605, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.8624389572872964e-05, | |
| "loss": 2.7576, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.8575260629046998e-05, | |
| "loss": 2.7763, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.852613168522103e-05, | |
| "loss": 2.7519, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.8477002741395065e-05, | |
| "loss": 2.7704, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.8427873797569103e-05, | |
| "loss": 2.6919, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.8378744853743137e-05, | |
| "loss": 2.7287, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.832961590991717e-05, | |
| "loss": 2.7121, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8280486966091204e-05, | |
| "loss": 2.7137, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8231358022265238e-05, | |
| "loss": 2.7361, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.8182229078439272e-05, | |
| "loss": 2.7176, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.813310013461331e-05, | |
| "loss": 2.7055, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.8083971190787343e-05, | |
| "loss": 2.7325, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.8034842246961377e-05, | |
| "loss": 2.7259, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.798571330313541e-05, | |
| "loss": 2.6834, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.7936584359309445e-05, | |
| "loss": 2.6681, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.788745541548348e-05, | |
| "loss": 2.6709, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.7838326471657516e-05, | |
| "loss": 2.673, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.7789197527831547e-05, | |
| "loss": 2.6801, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.7740068584005584e-05, | |
| "loss": 2.6635, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.7690939640179618e-05, | |
| "loss": 2.6699, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.764181069635365e-05, | |
| "loss": 2.6431, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.7592681752527685e-05, | |
| "loss": 2.6895, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.754355280870172e-05, | |
| "loss": 2.6452, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.7494423864875753e-05, | |
| "loss": 2.6619, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.744529492104979e-05, | |
| "loss": 2.6232, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.739616597722382e-05, | |
| "loss": 2.663, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.7347037033397858e-05, | |
| "loss": 2.6151, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.7297908089571892e-05, | |
| "loss": 2.6166, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.7248779145745926e-05, | |
| "loss": 2.6394, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.719965020191996e-05, | |
| "loss": 2.6242, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.7150521258093994e-05, | |
| "loss": 2.6205, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.7101392314268028e-05, | |
| "loss": 2.6596, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.7052263370442065e-05, | |
| "loss": 2.6469, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.7003134426616095e-05, | |
| "loss": 2.6189, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.6954005482790133e-05, | |
| "loss": 2.6035, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.6904876538964167e-05, | |
| "loss": 2.5861, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.68557475951382e-05, | |
| "loss": 2.6504, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.6806618651312238e-05, | |
| "loss": 2.6006, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.6757489707486268e-05, | |
| "loss": 2.6398, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.6708360763660305e-05, | |
| "loss": 2.6024, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.665923181983434e-05, | |
| "loss": 2.6039, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.6610102876008373e-05, | |
| "loss": 2.6224, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.6560973932182407e-05, | |
| "loss": 2.5644, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.651184498835644e-05, | |
| "loss": 2.5826, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.6462716044530475e-05, | |
| "loss": 2.585, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.6413587100704512e-05, | |
| "loss": 2.5589, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.6364458156878546e-05, | |
| "loss": 2.6029, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.631532921305258e-05, | |
| "loss": 2.554, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.6266200269226614e-05, | |
| "loss": 2.5599, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.6217071325400648e-05, | |
| "loss": 2.5285, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.616794238157468e-05, | |
| "loss": 2.5684, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.611881343774872e-05, | |
| "loss": 2.5816, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.606968449392275e-05, | |
| "loss": 2.566, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.6020555550096787e-05, | |
| "loss": 2.5244, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.597142660627082e-05, | |
| "loss": 2.5571, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.5922297662444854e-05, | |
| "loss": 2.5071, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.5873168718618888e-05, | |
| "loss": 2.5497, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.5824039774792922e-05, | |
| "loss": 2.5566, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.5774910830966956e-05, | |
| "loss": 2.5448, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.5725781887140993e-05, | |
| "loss": 2.5649, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.5676652943315024e-05, | |
| "loss": 2.5562, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.562752399948906e-05, | |
| "loss": 2.5756, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.5578395055663095e-05, | |
| "loss": 2.5357, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.552926611183713e-05, | |
| "loss": 2.5394, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.5480137168011163e-05, | |
| "loss": 2.5416, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.5431008224185196e-05, | |
| "loss": 2.523, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.5381879280359234e-05, | |
| "loss": 2.4957, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.5332750336533268e-05, | |
| "loss": 2.5392, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.52836213927073e-05, | |
| "loss": 2.5505, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.5234492448881335e-05, | |
| "loss": 2.5251, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.5185363505055371e-05, | |
| "loss": 2.4836, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.5136234561229403e-05, | |
| "loss": 2.5077, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.5087105617403439e-05, | |
| "loss": 2.5103, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.5037976673577473e-05, | |
| "loss": 2.5157, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.4988847729751508e-05, | |
| "loss": 2.509, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.493971878592554e-05, | |
| "loss": 2.4982, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.4890589842099576e-05, | |
| "loss": 2.4741, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 1.484146089827361e-05, | |
| "loss": 2.5438, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 1.4792331954447645e-05, | |
| "loss": 2.4698, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.4743203010621678e-05, | |
| "loss": 2.5175, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.4694074066795713e-05, | |
| "loss": 2.4857, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.4644945122969747e-05, | |
| "loss": 2.5006, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.4595816179143783e-05, | |
| "loss": 2.4875, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.4546687235317816e-05, | |
| "loss": 2.4964, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.449755829149185e-05, | |
| "loss": 2.5175, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.4448429347665884e-05, | |
| "loss": 2.4912, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.439930040383992e-05, | |
| "loss": 2.5074, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.4350171460013954e-05, | |
| "loss": 2.4655, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.430104251618799e-05, | |
| "loss": 2.4985, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.4251913572362021e-05, | |
| "loss": 2.4791, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.4202784628536057e-05, | |
| "loss": 2.4881, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.4153655684710091e-05, | |
| "loss": 2.4805, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.4104526740884126e-05, | |
| "loss": 2.4591, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.4055397797058159e-05, | |
| "loss": 2.4958, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.4006268853232194e-05, | |
| "loss": 2.4691, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.3957139909406228e-05, | |
| "loss": 2.5049, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.3908010965580264e-05, | |
| "loss": 2.4584, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.38588820217543e-05, | |
| "loss": 2.4761, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.3809753077928331e-05, | |
| "loss": 2.4677, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.3760624134102367e-05, | |
| "loss": 2.4868, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.3711495190276401e-05, | |
| "loss": 2.4739, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.3662366246450436e-05, | |
| "loss": 2.4558, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.3613237302624469e-05, | |
| "loss": 2.4528, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.3564108358798504e-05, | |
| "loss": 2.4608, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.3514979414972538e-05, | |
| "loss": 2.4461, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.3465850471146574e-05, | |
| "loss": 2.4382, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.3416721527320606e-05, | |
| "loss": 2.4554, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.3367592583494641e-05, | |
| "loss": 2.4488, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.3318463639668675e-05, | |
| "loss": 2.4594, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.3269334695842711e-05, | |
| "loss": 2.4583, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.3220205752016743e-05, | |
| "loss": 2.4392, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.3171076808190779e-05, | |
| "loss": 2.4366, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.3121947864364813e-05, | |
| "loss": 2.4437, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.3072818920538848e-05, | |
| "loss": 2.4635, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.302368997671288e-05, | |
| "loss": 2.435, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.2974561032886916e-05, | |
| "loss": 2.4333, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.292543208906095e-05, | |
| "loss": 2.4678, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.2876303145234985e-05, | |
| "loss": 2.4538, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.282717420140902e-05, | |
| "loss": 2.4561, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.2778045257583053e-05, | |
| "loss": 2.5159, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.2728916313757087e-05, | |
| "loss": 2.4392, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.2679787369931123e-05, | |
| "loss": 2.4503, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.2630658426105156e-05, | |
| "loss": 2.4428, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.2581529482279192e-05, | |
| "loss": 2.426, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.2532400538453224e-05, | |
| "loss": 2.4809, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.248327159462726e-05, | |
| "loss": 2.4687, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.2434142650801294e-05, | |
| "loss": 2.4306, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.238501370697533e-05, | |
| "loss": 2.4455, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.2335884763149363e-05, | |
| "loss": 2.3706, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.2286755819323397e-05, | |
| "loss": 2.4224, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.2237626875497433e-05, | |
| "loss": 2.4242, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.2188497931671466e-05, | |
| "loss": 2.429, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.2139368987845502e-05, | |
| "loss": 2.4247, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.2090240044019534e-05, | |
| "loss": 2.4195, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.204111110019357e-05, | |
| "loss": 2.4293, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.1991982156367604e-05, | |
| "loss": 2.4245, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.194285321254164e-05, | |
| "loss": 2.4415, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.1893724268715671e-05, | |
| "loss": 2.4199, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.1844595324889707e-05, | |
| "loss": 2.4386, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.179546638106374e-05, | |
| "loss": 2.4063, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.1746337437237776e-05, | |
| "loss": 2.4376, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.1697208493411809e-05, | |
| "loss": 2.4177, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.1648079549585844e-05, | |
| "loss": 2.3778, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.1598950605759878e-05, | |
| "loss": 2.4355, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.1549821661933914e-05, | |
| "loss": 2.4214, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.1500692718107946e-05, | |
| "loss": 2.3941, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.1451563774281981e-05, | |
| "loss": 2.4122, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.1402434830456015e-05, | |
| "loss": 2.4021, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.135330588663005e-05, | |
| "loss": 2.4154, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.1304176942804083e-05, | |
| "loss": 2.4004, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.1255047998978119e-05, | |
| "loss": 2.4091, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.1205919055152152e-05, | |
| "loss": 2.4348, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.1156790111326188e-05, | |
| "loss": 2.3965, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.1107661167500222e-05, | |
| "loss": 2.3904, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.1058532223674256e-05, | |
| "loss": 2.3947, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.100940327984829e-05, | |
| "loss": 2.4075, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.0960274336022325e-05, | |
| "loss": 2.3987, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.0911145392196359e-05, | |
| "loss": 2.4116, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.0862016448370393e-05, | |
| "loss": 2.4416, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.0812887504544429e-05, | |
| "loss": 2.3899, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.0763758560718462e-05, | |
| "loss": 2.4015, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.0714629616892498e-05, | |
| "loss": 2.3741, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.0665500673066532e-05, | |
| "loss": 2.3951, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.0616371729240566e-05, | |
| "loss": 2.406, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.05672427854146e-05, | |
| "loss": 2.4102, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.0518113841588635e-05, | |
| "loss": 2.4031, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.0468984897762669e-05, | |
| "loss": 2.417, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.0419855953936705e-05, | |
| "loss": 2.3978, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.0370727010110737e-05, | |
| "loss": 2.4009, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.0321598066284772e-05, | |
| "loss": 2.3966, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.0272469122458806e-05, | |
| "loss": 2.3918, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.0223340178632842e-05, | |
| "loss": 2.3853, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.0174211234806874e-05, | |
| "loss": 2.3996, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.012508229098091e-05, | |
| "loss": 2.388, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.0075953347154944e-05, | |
| "loss": 2.3845, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.0026824403328979e-05, | |
| "loss": 2.385, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 9.977695459503013e-06, | |
| "loss": 2.3387, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 9.928566515677047e-06, | |
| "loss": 2.3401, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 9.87943757185108e-06, | |
| "loss": 2.3291, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 9.830308628025116e-06, | |
| "loss": 2.3783, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 9.78117968419915e-06, | |
| "loss": 2.3154, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 9.732050740373184e-06, | |
| "loss": 2.3229, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 9.68292179654722e-06, | |
| "loss": 2.3844, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 9.633792852721254e-06, | |
| "loss": 2.3945, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 9.584663908895287e-06, | |
| "loss": 2.3354, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 9.535534965069321e-06, | |
| "loss": 2.325, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 9.486406021243357e-06, | |
| "loss": 2.3722, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 9.43727707741739e-06, | |
| "loss": 2.37, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 9.388148133591425e-06, | |
| "loss": 2.3454, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 9.339019189765458e-06, | |
| "loss": 2.3118, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 9.289890245939494e-06, | |
| "loss": 2.34, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 9.240761302113528e-06, | |
| "loss": 2.3156, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 9.191632358287562e-06, | |
| "loss": 2.3686, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.142503414461596e-06, | |
| "loss": 2.3641, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.093374470635631e-06, | |
| "loss": 2.3399, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 9.044245526809665e-06, | |
| "loss": 2.3038, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 8.995116582983699e-06, | |
| "loss": 2.3394, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 8.945987639157735e-06, | |
| "loss": 2.3144, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 8.896858695331768e-06, | |
| "loss": 2.3542, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 8.847729751505802e-06, | |
| "loss": 2.3378, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 8.798600807679836e-06, | |
| "loss": 2.3562, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 8.749471863853872e-06, | |
| "loss": 2.3759, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 8.700342920027906e-06, | |
| "loss": 2.3459, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 8.65121397620194e-06, | |
| "loss": 2.3041, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 8.602085032375973e-06, | |
| "loss": 2.3076, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 8.552956088550009e-06, | |
| "loss": 2.3598, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 8.503827144724045e-06, | |
| "loss": 2.3463, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.454698200898078e-06, | |
| "loss": 2.3505, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.405569257072112e-06, | |
| "loss": 2.3106, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 8.356440313246146e-06, | |
| "loss": 2.3391, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 8.307311369420182e-06, | |
| "loss": 2.3316, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 8.258182425594216e-06, | |
| "loss": 2.3122, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 8.20905348176825e-06, | |
| "loss": 2.3244, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 8.159924537942283e-06, | |
| "loss": 2.3457, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 8.110795594116319e-06, | |
| "loss": 2.328, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 8.061666650290353e-06, | |
| "loss": 2.319, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 8.012537706464387e-06, | |
| "loss": 2.3168, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 7.963408762638422e-06, | |
| "loss": 2.3288, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 7.914279818812456e-06, | |
| "loss": 2.3169, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 7.86515087498649e-06, | |
| "loss": 2.3398, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 7.816021931160524e-06, | |
| "loss": 2.3182, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 7.76689298733456e-06, | |
| "loss": 2.3219, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 7.717764043508593e-06, | |
| "loss": 2.3407, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 7.668635099682627e-06, | |
| "loss": 2.2986, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 7.619506155856662e-06, | |
| "loss": 2.3192, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 7.570377212030696e-06, | |
| "loss": 2.3229, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 7.521248268204731e-06, | |
| "loss": 2.3073, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 7.4721193243787646e-06, | |
| "loss": 2.2936, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 7.422990380552799e-06, | |
| "loss": 2.3078, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 7.373861436726833e-06, | |
| "loss": 2.3507, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 7.324732492900868e-06, | |
| "loss": 2.3071, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 7.275603549074902e-06, | |
| "loss": 2.3076, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 7.2264746052489365e-06, | |
| "loss": 2.3321, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 7.177345661422971e-06, | |
| "loss": 2.3224, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 7.128216717597005e-06, | |
| "loss": 2.3083, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 7.07908777377104e-06, | |
| "loss": 2.3196, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 7.0299588299450745e-06, | |
| "loss": 2.3311, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 6.980829886119109e-06, | |
| "loss": 2.3505, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 6.931700942293144e-06, | |
| "loss": 2.3014, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 6.882571998467178e-06, | |
| "loss": 2.3001, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 6.833443054641213e-06, | |
| "loss": 2.3122, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 6.7843141108152465e-06, | |
| "loss": 2.3031, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 6.735185166989281e-06, | |
| "loss": 2.3426, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 6.686056223163315e-06, | |
| "loss": 2.3242, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 6.63692727933735e-06, | |
| "loss": 2.3312, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 6.587798335511384e-06, | |
| "loss": 2.3265, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 6.5386693916854184e-06, | |
| "loss": 2.3226, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 6.489540447859452e-06, | |
| "loss": 2.2682, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 6.440411504033487e-06, | |
| "loss": 2.2961, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 6.391282560207521e-06, | |
| "loss": 2.2935, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 6.342153616381556e-06, | |
| "loss": 2.324, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 6.2930246725555895e-06, | |
| "loss": 2.2955, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 6.243895728729624e-06, | |
| "loss": 2.292, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 6.194766784903659e-06, | |
| "loss": 2.3076, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 6.145637841077693e-06, | |
| "loss": 2.3151, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 6.0965088972517276e-06, | |
| "loss": 2.3051, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 6.0473799534257615e-06, | |
| "loss": 2.3247, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 5.998251009599796e-06, | |
| "loss": 2.2741, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 5.94912206577383e-06, | |
| "loss": 2.2937, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 5.899993121947865e-06, | |
| "loss": 2.2883, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 5.850864178121899e-06, | |
| "loss": 2.2985, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 5.801735234295933e-06, | |
| "loss": 2.3006, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 5.752606290469967e-06, | |
| "loss": 2.2663, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 5.703477346644002e-06, | |
| "loss": 2.3, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 5.654348402818036e-06, | |
| "loss": 2.2688, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 5.605219458992071e-06, | |
| "loss": 2.2915, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 5.5560905151661045e-06, | |
| "loss": 2.2897, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 5.50696157134014e-06, | |
| "loss": 2.3316, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 5.457832627514175e-06, | |
| "loss": 2.3386, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 5.408703683688209e-06, | |
| "loss": 2.3215, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 5.359574739862243e-06, | |
| "loss": 2.3012, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 5.310445796036277e-06, | |
| "loss": 2.2939, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 5.261316852210312e-06, | |
| "loss": 2.2933, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 5.212187908384346e-06, | |
| "loss": 2.3057, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 5.163058964558381e-06, | |
| "loss": 2.2883, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 5.113930020732415e-06, | |
| "loss": 2.3136, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 5.064801076906449e-06, | |
| "loss": 2.2841, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 5.015672133080484e-06, | |
| "loss": 2.2684, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 4.966543189254518e-06, | |
| "loss": 2.2535, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 4.9174142454285525e-06, | |
| "loss": 2.2899, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 4.868285301602586e-06, | |
| "loss": 2.308, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 4.819156357776621e-06, | |
| "loss": 2.297, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 4.770027413950655e-06, | |
| "loss": 2.3058, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 4.72089847012469e-06, | |
| "loss": 2.2709, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 4.671769526298724e-06, | |
| "loss": 2.2958, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 4.622640582472758e-06, | |
| "loss": 2.3003, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 4.573511638646792e-06, | |
| "loss": 2.2724, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 4.524382694820827e-06, | |
| "loss": 2.2806, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 4.475253750994861e-06, | |
| "loss": 2.2622, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 4.4261248071688956e-06, | |
| "loss": 2.2915, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 4.37699586334293e-06, | |
| "loss": 2.3153, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 4.327866919516965e-06, | |
| "loss": 2.2784, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 4.278737975690999e-06, | |
| "loss": 2.2704, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 4.229609031865034e-06, | |
| "loss": 2.2792, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 4.1804800880390675e-06, | |
| "loss": 2.2642, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 4.131351144213102e-06, | |
| "loss": 2.2858, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 4.082222200387136e-06, | |
| "loss": 2.2784, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 4.033093256561171e-06, | |
| "loss": 2.2851, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 3.983964312735205e-06, | |
| "loss": 2.2762, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.9348353689092394e-06, | |
| "loss": 2.3028, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.885706425083274e-06, | |
| "loss": 2.2735, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 3.836577481257308e-06, | |
| "loss": 2.2511, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 3.7874485374313424e-06, | |
| "loss": 2.2856, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 3.7383195936053767e-06, | |
| "loss": 2.2979, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 3.689190649779411e-06, | |
| "loss": 2.2537, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.6400617059534453e-06, | |
| "loss": 2.2746, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.5909327621274804e-06, | |
| "loss": 2.3058, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 3.5418038183015147e-06, | |
| "loss": 2.3015, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 3.492674874475549e-06, | |
| "loss": 2.2863, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.4435459306495833e-06, | |
| "loss": 2.2733, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.3944169868236176e-06, | |
| "loss": 2.2806, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.345288042997652e-06, | |
| "loss": 2.259, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.2961590991716862e-06, | |
| "loss": 2.3225, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.2470301553457205e-06, | |
| "loss": 2.2887, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.197901211519755e-06, | |
| "loss": 2.2957, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 3.148772267693789e-06, | |
| "loss": 2.294, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 3.0996433238678234e-06, | |
| "loss": 2.2976, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 3.050514380041858e-06, | |
| "loss": 2.3028, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 3.0013854362158925e-06, | |
| "loss": 2.2875, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.9522564923899268e-06, | |
| "loss": 2.3208, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.903127548563961e-06, | |
| "loss": 2.3068, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.853998604737996e-06, | |
| "loss": 2.28, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.80486966091203e-06, | |
| "loss": 2.301, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.7557407170860644e-06, | |
| "loss": 2.2428, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.7066117732600987e-06, | |
| "loss": 2.2843, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.657482829434133e-06, | |
| "loss": 2.2756, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.6083538856081673e-06, | |
| "loss": 2.289, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.5592249417822016e-06, | |
| "loss": 2.2902, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.5100959979562363e-06, | |
| "loss": 2.298, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.4609670541302707e-06, | |
| "loss": 2.2764, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.411838110304305e-06, | |
| "loss": 2.2487, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.3627091664783393e-06, | |
| "loss": 2.323, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.3135802226523736e-06, | |
| "loss": 2.3174, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.2644512788264083e-06, | |
| "loss": 2.3205, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.2153223350004426e-06, | |
| "loss": 2.313, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.166193391174477e-06, | |
| "loss": 2.3015, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.117064447348511e-06, | |
| "loss": 2.2836, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.0679355035225455e-06, | |
| "loss": 2.2737, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.01880655969658e-06, | |
| "loss": 2.2824, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 1.969677615870614e-06, | |
| "loss": 2.2577, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.920548672044649e-06, | |
| "loss": 2.2563, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.871419728218683e-06, | |
| "loss": 2.3253, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.8222907843927174e-06, | |
| "loss": 2.2504, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.7731618405667517e-06, | |
| "loss": 2.3124, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.724032896740786e-06, | |
| "loss": 2.2784, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.6749039529148203e-06, | |
| "loss": 2.2832, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.6257750090888546e-06, | |
| "loss": 2.2924, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.576646065262889e-06, | |
| "loss": 2.2843, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.5275171214369237e-06, | |
| "loss": 2.2724, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.478388177610958e-06, | |
| "loss": 2.3194, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.4292592337849923e-06, | |
| "loss": 2.2532, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.3801302899590266e-06, | |
| "loss": 2.2773, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.3310013461330609e-06, | |
| "loss": 2.2869, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.2818724023070952e-06, | |
| "loss": 2.2751, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.2327434584811297e-06, | |
| "loss": 2.3004, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.183614514655164e-06, | |
| "loss": 2.3015, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.1344855708291983e-06, | |
| "loss": 2.2566, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.0853566270032326e-06, | |
| "loss": 2.2978, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.0362276831772671e-06, | |
| "loss": 2.2898, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 9.870987393513014e-07, | |
| "loss": 2.2806, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 9.379697955253358e-07, | |
| "loss": 2.2777, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 8.888408516993701e-07, | |
| "loss": 2.261, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 8.397119078734047e-07, | |
| "loss": 2.252, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 7.90582964047439e-07, | |
| "loss": 2.2906, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 7.414540202214733e-07, | |
| "loss": 2.2968, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 6.923250763955077e-07, | |
| "loss": 2.2463, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 6.431961325695421e-07, | |
| "loss": 2.2681, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 5.940671887435765e-07, | |
| "loss": 2.2965, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 5.449382449176108e-07, | |
| "loss": 2.2705, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 4.958093010916452e-07, | |
| "loss": 2.2669, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 4.4668035726567956e-07, | |
| "loss": 2.2478, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 3.975514134397139e-07, | |
| "loss": 2.3053, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.484224696137483e-07, | |
| "loss": 2.2731, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 2.992935257877826e-07, | |
| "loss": 2.2518, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 2.50164581961817e-07, | |
| "loss": 2.2657, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 2.0103563813585136e-07, | |
| "loss": 2.2967, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.5190669430988575e-07, | |
| "loss": 2.2874, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.0277775048392012e-07, | |
| "loss": 2.2764, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 5.364880665795447e-08, | |
| "loss": 2.2628, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 4.519862831988839e-09, | |
| "loss": 2.2711, | |
| "step": 204000 | |
| } | |
| ], | |
| "max_steps": 204046, | |
| "num_train_epochs": 2, | |
| "total_flos": 6.966781556932454e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |