|
{ |
|
"best_global_step": 5688, |
|
"best_metric": 0.31825923919677734, |
|
"best_model_checkpoint": "tinybert_base_train_book_ent_15p_s_init_qqp/checkpoint-5688", |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 12798, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.7183722257614136, |
|
"learning_rate": 4.900070323488045e-05, |
|
"loss": 0.4585, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8173138758347761, |
|
"eval_combined_score": 0.7915378208299848, |
|
"eval_f1": 0.7657617658251935, |
|
"eval_loss": 0.38073402643203735, |
|
"eval_runtime": 21.6007, |
|
"eval_samples_per_second": 1871.695, |
|
"eval_steps_per_second": 7.315, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.8384530544281006, |
|
"learning_rate": 4.800070323488045e-05, |
|
"loss": 0.3589, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8233737323769478, |
|
"eval_combined_score": 0.8073442495135272, |
|
"eval_f1": 0.7913147666501067, |
|
"eval_loss": 0.371241956949234, |
|
"eval_runtime": 21.5593, |
|
"eval_samples_per_second": 1875.294, |
|
"eval_steps_per_second": 7.329, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.3306589126586914, |
|
"learning_rate": 4.700070323488045e-05, |
|
"loss": 0.3115, |
|
"step": 4266 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8476873608706406, |
|
"eval_combined_score": 0.8294305809627335, |
|
"eval_f1": 0.8111738010548264, |
|
"eval_loss": 0.3361490070819855, |
|
"eval_runtime": 21.4495, |
|
"eval_samples_per_second": 1884.89, |
|
"eval_steps_per_second": 7.366, |
|
"step": 4266 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.4020464420318604, |
|
"learning_rate": 4.600070323488046e-05, |
|
"loss": 0.2742, |
|
"step": 5688 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8579520158298294, |
|
"eval_combined_score": 0.8305796812863981, |
|
"eval_f1": 0.8032073467429668, |
|
"eval_loss": 0.31825923919677734, |
|
"eval_runtime": 21.6362, |
|
"eval_samples_per_second": 1868.627, |
|
"eval_steps_per_second": 7.303, |
|
"step": 5688 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.4398610591888428, |
|
"learning_rate": 4.500070323488045e-05, |
|
"loss": 0.2431, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8610685134800891, |
|
"eval_combined_score": 0.8386481124573683, |
|
"eval_f1": 0.8162277114346475, |
|
"eval_loss": 0.3296893835067749, |
|
"eval_runtime": 21.5093, |
|
"eval_samples_per_second": 1879.655, |
|
"eval_steps_per_second": 7.346, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 4.022575378417969, |
|
"learning_rate": 4.400070323488045e-05, |
|
"loss": 0.2161, |
|
"step": 8532 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8658174622804848, |
|
"eval_combined_score": 0.8453324550287905, |
|
"eval_f1": 0.8248474477770962, |
|
"eval_loss": 0.32380661368370056, |
|
"eval_runtime": 21.4879, |
|
"eval_samples_per_second": 1881.524, |
|
"eval_steps_per_second": 7.353, |
|
"step": 8532 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 3.2843637466430664, |
|
"learning_rate": 4.300070323488045e-05, |
|
"loss": 0.1931, |
|
"step": 9954 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8625772940885481, |
|
"eval_combined_score": 0.843382818299258, |
|
"eval_f1": 0.8241883425099678, |
|
"eval_loss": 0.34378689527511597, |
|
"eval_runtime": 21.4885, |
|
"eval_samples_per_second": 1881.471, |
|
"eval_steps_per_second": 7.353, |
|
"step": 9954 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.7418906688690186, |
|
"learning_rate": 4.200070323488045e-05, |
|
"loss": 0.171, |
|
"step": 11376 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8671531041305961, |
|
"eval_combined_score": 0.8482060462955171, |
|
"eval_f1": 0.8292589884604381, |
|
"eval_loss": 0.37025919556617737, |
|
"eval_runtime": 21.5924, |
|
"eval_samples_per_second": 1872.419, |
|
"eval_steps_per_second": 7.317, |
|
"step": 11376 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.6632304191589355, |
|
"learning_rate": 4.100070323488045e-05, |
|
"loss": 0.1516, |
|
"step": 12798 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.862824635171902, |
|
"eval_combined_score": 0.845481112131969, |
|
"eval_f1": 0.8281375890920359, |
|
"eval_loss": 0.39689913392066956, |
|
"eval_runtime": 21.8663, |
|
"eval_samples_per_second": 1848.96, |
|
"eval_steps_per_second": 7.226, |
|
"step": 12798 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 12798, |
|
"total_flos": 6.475495387021517e+16, |
|
"train_loss": 0.26421997885831466, |
|
"train_runtime": 2604.7894, |
|
"train_samples_per_second": 6984.173, |
|
"train_steps_per_second": 27.296 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 71100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.475495387021517e+16, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|