|
{ |
|
"best_metric": 0.3550606966018677, |
|
"best_model_checkpoint": "bert_tiny_lda_100_v1_qqp/checkpoint-8532", |
|
"epoch": 11.0, |
|
"eval_steps": 500, |
|
"global_step": 15642, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.963968276977539, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.4874, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.797996537224833, |
|
"eval_combined_score": 0.7552586086455046, |
|
"eval_f1": 0.7125206800661763, |
|
"eval_loss": 0.4273848235607147, |
|
"eval_runtime": 12.4751, |
|
"eval_samples_per_second": 3240.852, |
|
"eval_steps_per_second": 12.665, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.7753087282180786, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.388, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.822384368043532, |
|
"eval_combined_score": 0.7974788031824944, |
|
"eval_f1": 0.7725732383214569, |
|
"eval_loss": 0.37860846519470215, |
|
"eval_runtime": 12.4162, |
|
"eval_samples_per_second": 3256.242, |
|
"eval_steps_per_second": 12.725, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.818178176879883, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.3354, |
|
"step": 4266 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8372248330447687, |
|
"eval_combined_score": 0.8135618147281756, |
|
"eval_f1": 0.7898987964115826, |
|
"eval_loss": 0.3613271117210388, |
|
"eval_runtime": 12.3656, |
|
"eval_samples_per_second": 3269.55, |
|
"eval_steps_per_second": 12.777, |
|
"step": 4266 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.026447296142578, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.2928, |
|
"step": 5688 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8447440019787287, |
|
"eval_combined_score": 0.8138597443078306, |
|
"eval_f1": 0.7829754866369325, |
|
"eval_loss": 0.3564006984233856, |
|
"eval_runtime": 12.2406, |
|
"eval_samples_per_second": 3302.952, |
|
"eval_steps_per_second": 12.908, |
|
"step": 5688 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.733372688293457, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.2583, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8509275290625773, |
|
"eval_combined_score": 0.8253042271895796, |
|
"eval_f1": 0.7996809253165819, |
|
"eval_loss": 0.3613673448562622, |
|
"eval_runtime": 12.2989, |
|
"eval_samples_per_second": 3287.292, |
|
"eval_steps_per_second": 12.847, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.535404682159424, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.2277, |
|
"step": 8532 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8543408360128617, |
|
"eval_combined_score": 0.83032142284148, |
|
"eval_f1": 0.8063020096700984, |
|
"eval_loss": 0.3550606966018677, |
|
"eval_runtime": 12.5469, |
|
"eval_samples_per_second": 3222.316, |
|
"eval_steps_per_second": 12.593, |
|
"step": 8532 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.6766912937164307, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.2014, |
|
"step": 9954 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8551570615879297, |
|
"eval_combined_score": 0.8322039379600886, |
|
"eval_f1": 0.8092508143322475, |
|
"eval_loss": 0.3854043185710907, |
|
"eval_runtime": 12.1595, |
|
"eval_samples_per_second": 3324.98, |
|
"eval_steps_per_second": 12.994, |
|
"step": 9954 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.472510576248169, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.1784, |
|
"step": 11376 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8544892406628741, |
|
"eval_combined_score": 0.8304560974134679, |
|
"eval_f1": 0.8064229541640617, |
|
"eval_loss": 0.3979368805885315, |
|
"eval_runtime": 12.3624, |
|
"eval_samples_per_second": 3270.399, |
|
"eval_steps_per_second": 12.781, |
|
"step": 11376 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.070095062255859, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.1578, |
|
"step": 12798 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8558496166213208, |
|
"eval_combined_score": 0.8330247887705053, |
|
"eval_f1": 0.8101999609196899, |
|
"eval_loss": 0.4261317253112793, |
|
"eval_runtime": 12.3748, |
|
"eval_samples_per_second": 3267.134, |
|
"eval_steps_per_second": 12.768, |
|
"step": 12798 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 2.72301983833313, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1403, |
|
"step": 14220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8587929755132327, |
|
"eval_combined_score": 0.8347857688543592, |
|
"eval_f1": 0.8107785621954857, |
|
"eval_loss": 0.4443197548389435, |
|
"eval_runtime": 12.4932, |
|
"eval_samples_per_second": 3236.159, |
|
"eval_steps_per_second": 12.647, |
|
"step": 14220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 4.5569281578063965, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.1246, |
|
"step": 15642 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8566658421963889, |
|
"eval_combined_score": 0.8329733686874654, |
|
"eval_f1": 0.8092808951785421, |
|
"eval_loss": 0.46776074171066284, |
|
"eval_runtime": 12.4239, |
|
"eval_samples_per_second": 3254.203, |
|
"eval_steps_per_second": 12.717, |
|
"step": 15642 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"step": 15642, |
|
"total_flos": 1.0495478401912627e+17, |
|
"train_loss": 0.2538350579257268, |
|
"train_runtime": 2291.1306, |
|
"train_samples_per_second": 7940.316, |
|
"train_steps_per_second": 31.033 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 71100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0495478401912627e+17, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|