|
{ |
|
"best_metric": 0.276936411857605, |
|
"best_model_checkpoint": "/home/fbravo/data/all_results/pawsx/roberta_large_bne/epochs_2_bs_32_lr_1e-5/checkpoint-1200", |
|
"epoch": 2.0, |
|
"global_step": 3088, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.859499990940094, |
|
"eval_loss": 0.36283692717552185, |
|
"eval_runtime": 2.0197, |
|
"eval_samples_per_second": 990.267, |
|
"eval_steps_per_second": 31.193, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.380829015544043e-06, |
|
"loss": 0.3845, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.871999979019165, |
|
"eval_loss": 0.3212876617908478, |
|
"eval_runtime": 2.0153, |
|
"eval_samples_per_second": 992.393, |
|
"eval_steps_per_second": 31.26, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.890999972820282, |
|
"eval_loss": 0.281432181596756, |
|
"eval_runtime": 2.0204, |
|
"eval_samples_per_second": 989.885, |
|
"eval_steps_per_second": 31.181, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.764896373056995e-06, |
|
"loss": 0.2115, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.890999972820282, |
|
"eval_loss": 0.276936411857605, |
|
"eval_runtime": 2.0133, |
|
"eval_samples_per_second": 993.395, |
|
"eval_steps_per_second": 31.292, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.1457253886010375e-06, |
|
"loss": 0.1843, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.8934999704360962, |
|
"eval_loss": 0.2994357943534851, |
|
"eval_runtime": 2.0148, |
|
"eval_samples_per_second": 992.653, |
|
"eval_steps_per_second": 31.269, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_accuracy": 0.9010000228881836, |
|
"eval_loss": 0.3649846911430359, |
|
"eval_runtime": 2.0177, |
|
"eval_samples_per_second": 991.23, |
|
"eval_steps_per_second": 31.224, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.5265544041450777e-06, |
|
"loss": 0.1041, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.902999997138977, |
|
"eval_loss": 0.3713494837284088, |
|
"eval_runtime": 2.0175, |
|
"eval_samples_per_second": 991.324, |
|
"eval_steps_per_second": 31.227, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.8995000123977661, |
|
"eval_loss": 0.380949467420578, |
|
"eval_runtime": 2.0124, |
|
"eval_samples_per_second": 993.836, |
|
"eval_steps_per_second": 31.306, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.9073834196891196e-06, |
|
"loss": 0.0994, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.8974999785423279, |
|
"eval_loss": 0.34086307883262634, |
|
"eval_runtime": 2.0134, |
|
"eval_samples_per_second": 993.346, |
|
"eval_steps_per_second": 31.29, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 2.9145077720207257e-07, |
|
"loss": 0.09, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.902999997138977, |
|
"eval_loss": 0.3528608977794647, |
|
"eval_runtime": 2.0143, |
|
"eval_samples_per_second": 992.921, |
|
"eval_steps_per_second": 31.277, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 3088, |
|
"total_flos": 1.7847300994891776e+16, |
|
"train_loss": 0.17663975268447954, |
|
"train_runtime": 500.8558, |
|
"train_samples_per_second": 197.266, |
|
"train_steps_per_second": 6.165 |
|
} |
|
], |
|
"max_steps": 3088, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.7847300994891776e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|