|
{ |
|
"best_metric": 0.49369621090355154, |
|
"best_model_checkpoint": "./xlnet-base-cased/fine_tuned_models/checkpoint-1876", |
|
"epoch": 10.0, |
|
"global_step": 2680, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6214, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.8445629477500916, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 5.2858, |
|
"eval_samples_per_second": 197.321, |
|
"eval_steps_per_second": 24.783, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 0.5559, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.807519257068634, |
|
"eval_matthews_correlation": 0.21580374087877008, |
|
"eval_runtime": 5.302, |
|
"eval_samples_per_second": 196.72, |
|
"eval_steps_per_second": 24.708, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.555555555555556e-05, |
|
"loss": 0.4284, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.6560272574424744, |
|
"eval_matthews_correlation": 0.4538429410217894, |
|
"eval_runtime": 5.3043, |
|
"eval_samples_per_second": 196.635, |
|
"eval_steps_per_second": 24.697, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.3259, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.7219155430793762, |
|
"eval_matthews_correlation": 0.4369166632963792, |
|
"eval_runtime": 5.3104, |
|
"eval_samples_per_second": 196.406, |
|
"eval_steps_per_second": 24.668, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.1111111111111113e-05, |
|
"loss": 0.248, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.856024980545044, |
|
"eval_matthews_correlation": 0.43655986862605256, |
|
"eval_runtime": 5.2976, |
|
"eval_samples_per_second": 196.881, |
|
"eval_steps_per_second": 24.728, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.1894, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.9414917826652527, |
|
"eval_matthews_correlation": 0.45061623518481686, |
|
"eval_runtime": 5.2949, |
|
"eval_samples_per_second": 196.983, |
|
"eval_steps_per_second": 24.741, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.1492, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.9454582333564758, |
|
"eval_matthews_correlation": 0.49369621090355154, |
|
"eval_runtime": 5.2965, |
|
"eval_samples_per_second": 196.921, |
|
"eval_steps_per_second": 24.733, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.1223, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.105307936668396, |
|
"eval_matthews_correlation": 0.4859069062313801, |
|
"eval_runtime": 5.2949, |
|
"eval_samples_per_second": 196.982, |
|
"eval_steps_per_second": 24.741, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.1115, |
|
"step": 2412 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.2678706645965576, |
|
"eval_matthews_correlation": 0.4829638975063273, |
|
"eval_runtime": 5.3057, |
|
"eval_samples_per_second": 196.58, |
|
"eval_steps_per_second": 24.69, |
|
"step": 2412 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0913, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.2386504411697388, |
|
"eval_matthews_correlation": 0.4787316411367127, |
|
"eval_runtime": 5.3037, |
|
"eval_samples_per_second": 196.655, |
|
"eval_steps_per_second": 24.7, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2680, |
|
"total_flos": 3851987691901440.0, |
|
"train_loss": 0.28432635905137704, |
|
"train_runtime": 1347.9437, |
|
"train_samples_per_second": 63.437, |
|
"train_steps_per_second": 1.988 |
|
} |
|
], |
|
"max_steps": 2680, |
|
"num_train_epochs": 10, |
|
"total_flos": 3851987691901440.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|