|
{ |
|
"best_metric": 0.3038647472858429, |
|
"best_model_checkpoint": "/home/fbravo/data/all_results/pawsx/bertin_roberta_base/epochs_4_bs_32_lr_1e-5/checkpoint-1500", |
|
"epoch": 4.0, |
|
"global_step": 6176, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.7979999780654907, |
|
"eval_loss": 0.4853031635284424, |
|
"eval_runtime": 0.9414, |
|
"eval_samples_per_second": 2124.606, |
|
"eval_steps_per_second": 66.925, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.193652849740934e-06, |
|
"loss": 0.4315, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.8364999890327454, |
|
"eval_loss": 0.3721293807029724, |
|
"eval_runtime": 0.9441, |
|
"eval_samples_per_second": 2118.529, |
|
"eval_steps_per_second": 66.734, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.8544999957084656, |
|
"eval_loss": 0.3503261208534241, |
|
"eval_runtime": 0.9463, |
|
"eval_samples_per_second": 2113.472, |
|
"eval_steps_per_second": 66.574, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.384067357512954e-06, |
|
"loss": 0.25, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.8709999918937683, |
|
"eval_loss": 0.31852295994758606, |
|
"eval_runtime": 0.9416, |
|
"eval_samples_per_second": 2123.951, |
|
"eval_steps_per_second": 66.904, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.574481865284975e-06, |
|
"loss": 0.2072, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.8709999918937683, |
|
"eval_loss": 0.3038647472858429, |
|
"eval_runtime": 0.9413, |
|
"eval_samples_per_second": 2124.819, |
|
"eval_steps_per_second": 66.932, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_accuracy": 0.8684999942779541, |
|
"eval_loss": 0.41912218928337097, |
|
"eval_runtime": 0.9381, |
|
"eval_samples_per_second": 2132.007, |
|
"eval_steps_per_second": 67.158, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.766515544041451e-06, |
|
"loss": 0.1311, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.8600000143051147, |
|
"eval_loss": 0.4942796230316162, |
|
"eval_runtime": 0.9429, |
|
"eval_samples_per_second": 2121.101, |
|
"eval_steps_per_second": 66.815, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.8654999732971191, |
|
"eval_loss": 0.4631992280483246, |
|
"eval_runtime": 0.9996, |
|
"eval_samples_per_second": 2000.899, |
|
"eval_steps_per_second": 63.028, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5.956930051813472e-06, |
|
"loss": 0.1229, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.8569999933242798, |
|
"eval_loss": 0.48928430676460266, |
|
"eval_runtime": 0.9369, |
|
"eval_samples_per_second": 2134.751, |
|
"eval_steps_per_second": 67.245, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.147344559585493e-06, |
|
"loss": 0.1193, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.8640000224113464, |
|
"eval_loss": 0.538551390171051, |
|
"eval_runtime": 0.9376, |
|
"eval_samples_per_second": 2133.008, |
|
"eval_steps_per_second": 67.19, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.8650000095367432, |
|
"eval_loss": 0.5807957649230957, |
|
"eval_runtime": 0.9434, |
|
"eval_samples_per_second": 2119.923, |
|
"eval_steps_per_second": 66.778, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.337759067357513e-06, |
|
"loss": 0.0718, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_accuracy": 0.8730000257492065, |
|
"eval_loss": 0.5360826253890991, |
|
"eval_runtime": 0.9423, |
|
"eval_samples_per_second": 2122.415, |
|
"eval_steps_per_second": 66.856, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.8684999942779541, |
|
"eval_loss": 0.6216676235198975, |
|
"eval_runtime": 0.946, |
|
"eval_samples_per_second": 2114.204, |
|
"eval_steps_per_second": 66.597, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 3.5281735751295337e-06, |
|
"loss": 0.0637, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_accuracy": 0.8659999966621399, |
|
"eval_loss": 0.6211779713630676, |
|
"eval_runtime": 0.9442, |
|
"eval_samples_per_second": 2118.136, |
|
"eval_steps_per_second": 66.721, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.7202072538860106e-06, |
|
"loss": 0.0629, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_accuracy": 0.871999979019165, |
|
"eval_loss": 0.5773542523384094, |
|
"eval_runtime": 0.9435, |
|
"eval_samples_per_second": 2119.687, |
|
"eval_steps_per_second": 66.77, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_accuracy": 0.8669999837875366, |
|
"eval_loss": 0.614871084690094, |
|
"eval_runtime": 0.942, |
|
"eval_samples_per_second": 2123.196, |
|
"eval_steps_per_second": 66.881, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.9106217616580315e-06, |
|
"loss": 0.0385, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_accuracy": 0.8679999709129333, |
|
"eval_loss": 0.6947777271270752, |
|
"eval_runtime": 0.937, |
|
"eval_samples_per_second": 2134.496, |
|
"eval_steps_per_second": 67.237, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_accuracy": 0.8705000281333923, |
|
"eval_loss": 0.7105850577354431, |
|
"eval_runtime": 0.9421, |
|
"eval_samples_per_second": 2122.995, |
|
"eval_steps_per_second": 66.874, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.1010362694300518e-06, |
|
"loss": 0.0308, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_accuracy": 0.8679999709129333, |
|
"eval_loss": 0.7033668756484985, |
|
"eval_runtime": 0.9383, |
|
"eval_samples_per_second": 2131.574, |
|
"eval_steps_per_second": 67.145, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 2.9145077720207257e-07, |
|
"loss": 0.0291, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_accuracy": 0.8700000047683716, |
|
"eval_loss": 0.7085065841674805, |
|
"eval_runtime": 0.9424, |
|
"eval_samples_per_second": 2122.247, |
|
"eval_steps_per_second": 66.851, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 6176, |
|
"total_flos": 1.0169542344966144e+16, |
|
"train_loss": 0.12696161927954522, |
|
"train_runtime": 470.6783, |
|
"train_samples_per_second": 419.828, |
|
"train_steps_per_second": 13.121 |
|
} |
|
], |
|
"max_steps": 6176, |
|
"num_train_epochs": 4, |
|
"total_flos": 1.0169542344966144e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|