|
{ |
|
"best_metric": 0.9819265007972717, |
|
"best_model_checkpoint": "./checkpoints/random-in-domain-5-demos-t5-small/checkpoint-70000", |
|
"epoch": 8.970877518982475, |
|
"global_step": 70000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.001, |
|
"loss": 2.2565, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.001, |
|
"loss": 1.5897, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.001, |
|
"loss": 1.406, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.001, |
|
"loss": 1.3023, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.001, |
|
"loss": 1.2344, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.001, |
|
"loss": 1.1836, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.001, |
|
"loss": 1.1485, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.001, |
|
"loss": 1.1179, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.001, |
|
"loss": 1.0795, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.001, |
|
"loss": 1.0487, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.001, |
|
"loss": 1.0367, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.001, |
|
"loss": 1.0215, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.001, |
|
"loss": 1.0043, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.001, |
|
"loss": 0.979, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.9705, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.9737, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.001, |
|
"loss": 0.9429, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.9413, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.001, |
|
"loss": 0.9282, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.918, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.9178, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.903, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.9024, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.001, |
|
"loss": 0.886, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.8795, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.001, |
|
"loss": 0.8883, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.8601, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.001, |
|
"loss": 0.8643, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.8641, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.8625, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.8655, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.8538, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.001, |
|
"loss": 0.8575, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.8427, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.001, |
|
"loss": 0.8332, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.8373, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.845, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.8271, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.001, |
|
"loss": 0.8159, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.001, |
|
"loss": 0.8027, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.7981, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.001, |
|
"loss": 0.8025, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.8008, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.001, |
|
"loss": 0.7951, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.7945, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.001, |
|
"loss": 0.7824, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.7945, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.7939, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.001, |
|
"loss": 0.7971, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.791, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 1.0492665767669678, |
|
"eval_runtime": 236.4668, |
|
"eval_samples_per_second": 516.859, |
|
"eval_steps_per_second": 2.021, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.001, |
|
"loss": 0.8019, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.7752, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.001, |
|
"loss": 0.7862, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.7835, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.7751, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.001, |
|
"loss": 0.7747, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.7623, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.001, |
|
"loss": 0.7898, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.7629, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.7601, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.7698, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.7664, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.7616, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.764, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.001, |
|
"loss": 0.774, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.7524, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.001, |
|
"loss": 0.7637, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.7615, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.7702, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.7453, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.7369, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.001, |
|
"loss": 0.7499, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.7491, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.001, |
|
"loss": 0.7607, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.7417, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.7312, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.7464, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.001, |
|
"loss": 0.7555, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.001, |
|
"loss": 0.7236, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.716, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.001, |
|
"loss": 0.7199, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.7237, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.001, |
|
"loss": 0.7293, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.725, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.001, |
|
"loss": 0.7262, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.7129, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.7205, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.001, |
|
"loss": 0.7145, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.727, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.001, |
|
"loss": 0.7261, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.7226, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.001, |
|
"loss": 0.7262, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.7224, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.7123, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.7151, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.7059, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.001, |
|
"loss": 0.7125, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.7113, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.7083, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.7054, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 1.007745623588562, |
|
"eval_runtime": 236.2858, |
|
"eval_samples_per_second": 517.255, |
|
"eval_steps_per_second": 2.023, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.6979, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.7047, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.7163, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.001, |
|
"loss": 0.7064, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.7087, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.001, |
|
"loss": 0.7112, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.7174, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.7056, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.7008, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.7001, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.001, |
|
"loss": 0.7014, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.7007, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.001, |
|
"loss": 0.7051, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.7069, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.696, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.6893, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.001, |
|
"loss": 0.6865, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.001, |
|
"loss": 0.6848, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.6707, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.001, |
|
"loss": 0.6826, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.6754, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.001, |
|
"loss": 0.6722, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.6775, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.001, |
|
"loss": 0.6796, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.6826, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.6778, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.001, |
|
"loss": 0.6922, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.6706, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.001, |
|
"loss": 0.6721, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.6737, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.001, |
|
"loss": 0.7046, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.6754, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.6794, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.684, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.6776, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.001, |
|
"loss": 0.6826, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.6877, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.6773, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.6731, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.6704, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.681, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.6745, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.001, |
|
"loss": 0.6734, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.6759, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 0.001, |
|
"loss": 0.6729, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.6727, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.6734, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.6747, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.6739, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.001, |
|
"loss": 0.6564, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 0.9836647510528564, |
|
"eval_runtime": 236.2546, |
|
"eval_samples_per_second": 517.323, |
|
"eval_steps_per_second": 2.023, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.6748, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.001, |
|
"loss": 0.6582, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.6526, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.6587, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.6621, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.001, |
|
"loss": 0.6829, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.001, |
|
"loss": 0.6448, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.6391, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.001, |
|
"loss": 0.6596, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.6503, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.001, |
|
"loss": 0.6484, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.6564, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.001, |
|
"loss": 0.655, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.6421, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.6537, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.6459, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.6525, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.001, |
|
"loss": 0.6523, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.6496, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.001, |
|
"loss": 0.6477, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.6455, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.6413, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.6414, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.6552, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.001, |
|
"loss": 0.6496, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.6495, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.6495, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.6468, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.6583, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.66, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.6555, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.001, |
|
"loss": 0.6489, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.6361, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.001, |
|
"loss": 0.6493, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.6402, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.647, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.6533, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.6426, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.001, |
|
"loss": 0.6478, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.6441, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.001, |
|
"loss": 0.6519, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.6358, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.6546, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.6545, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.001, |
|
"loss": 0.6578, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.001, |
|
"loss": 0.6233, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.6281, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.001, |
|
"loss": 0.6272, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.6242, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 0.001, |
|
"loss": 0.6112, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_loss": 0.9964196085929871, |
|
"eval_runtime": 236.0055, |
|
"eval_samples_per_second": 517.869, |
|
"eval_steps_per_second": 2.025, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.6222, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.001, |
|
"loss": 0.6332, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.6206, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.6279, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.6191, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.6262, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.001, |
|
"loss": 0.6386, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.6321, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.001, |
|
"loss": 0.638, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.626, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.6298, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.6148, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.6225, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.001, |
|
"loss": 0.6331, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.6347, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.6331, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.6274, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.6388, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.6262, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.6245, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.001, |
|
"loss": 0.6317, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.6323, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.001, |
|
"loss": 0.6275, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.6284, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.6301, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.6453, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.6244, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.001, |
|
"loss": 0.6271, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.6275, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 0.001, |
|
"loss": 0.6483, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.6143, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.6272, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.6425, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.001, |
|
"loss": 0.6243, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.001, |
|
"loss": 0.6014, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.6004, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.001, |
|
"loss": 0.611, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.6111, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.001, |
|
"loss": 0.6101, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.6154, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.001, |
|
"loss": 0.6094, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.6082, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.6013, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.6148, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.6049, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 0.001, |
|
"loss": 0.6089, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.61, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.001, |
|
"loss": 0.6056, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.6138, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.6212, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"eval_loss": 0.9890182614326477, |
|
"eval_runtime": 236.1033, |
|
"eval_samples_per_second": 517.655, |
|
"eval_steps_per_second": 2.025, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.612, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.6111, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.001, |
|
"loss": 0.6146, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.615, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.6128, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.6183, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.6115, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.6068, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.6107, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.001, |
|
"loss": 0.6256, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.6149, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 0.001, |
|
"loss": 0.606, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.6203, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.6117, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.6079, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.6164, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.001, |
|
"loss": 0.6162, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.6094, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 0.001, |
|
"loss": 0.6134, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.617, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.6165, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.6127, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.001, |
|
"loss": 0.6127, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 0.001, |
|
"loss": 0.593, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.5891, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.001, |
|
"loss": 0.6004, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.5824, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 0.001, |
|
"loss": 0.5949, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.5931, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 0.001, |
|
"loss": 0.5861, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.6035, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.5859, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.5953, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.6032, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.001, |
|
"loss": 0.5945, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.598, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 0.001, |
|
"loss": 0.5981, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.6034, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.6075, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.6065, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.5972, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 0.001, |
|
"loss": 0.6048, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.5977, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.5998, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.5943, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.5883, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.5999, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.5957, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 0.001, |
|
"loss": 0.6103, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.6078, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"eval_loss": 0.9891012907028198, |
|
"eval_runtime": 236.1606, |
|
"eval_samples_per_second": 517.529, |
|
"eval_steps_per_second": 2.024, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 0.001, |
|
"loss": 0.5903, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.6007, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.5973, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.596, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.597, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.001, |
|
"loss": 0.6018, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.6024, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 0.001, |
|
"loss": 0.6011, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.5883, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.6006, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.6004, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.001, |
|
"loss": 0.6083, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 0.001, |
|
"loss": 0.5844, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.5751, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.001, |
|
"loss": 0.5716, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.5748, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 0.001, |
|
"loss": 0.5752, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.5697, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 0.001, |
|
"loss": 0.5751, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.5821, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.578, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.5768, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.5807, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 0.001, |
|
"loss": 0.5872, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.5802, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 0.001, |
|
"loss": 0.5883, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.5792, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.5888, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.6033, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.5967, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 0.001, |
|
"loss": 0.5909, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.5823, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.5883, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.5862, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.5924, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.5902, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.5841, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 0.001, |
|
"loss": 0.5919, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.5889, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 0.001, |
|
"loss": 0.5913, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.585, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.5887, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.5931, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.5799, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 0.001, |
|
"loss": 0.585, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.5765, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 0.001, |
|
"loss": 0.5827, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.5875, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.5909, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.6049, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"eval_loss": 0.9819265007972717, |
|
"eval_runtime": 236.1037, |
|
"eval_samples_per_second": 517.654, |
|
"eval_steps_per_second": 2.025, |
|
"step": 70000 |
|
} |
|
], |
|
"max_steps": 78030, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.0371070946901688e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|