|
{ |
|
"best_metric": 0.4055454134941101, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_lev/checkpoint-35434", |
|
"epoch": 12.0, |
|
"eval_steps": 500, |
|
"global_step": 60744, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.1182055473327637, |
|
"learning_rate": 4.773575540996625e-05, |
|
"loss": 0.8553, |
|
"step": 5062 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.3575613099028634, |
|
"eval_loss": 0.5277819633483887, |
|
"eval_rouge1": 0.5831886589720002, |
|
"eval_rouge2": 0.30791501071801863, |
|
"eval_rougeL": 0.5813351421522688, |
|
"eval_runtime": 48.8805, |
|
"eval_samples_per_second": 206.994, |
|
"eval_steps_per_second": 25.879, |
|
"step": 5062 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.507585287094116, |
|
"learning_rate": 4.522334723049435e-05, |
|
"loss": 0.4665, |
|
"step": 10124 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.38457648452761506, |
|
"eval_loss": 0.46819329261779785, |
|
"eval_rouge1": 0.6342397326708439, |
|
"eval_rouge2": 0.3690484855204119, |
|
"eval_rougeL": 0.6327616070759406, |
|
"eval_runtime": 33.2569, |
|
"eval_samples_per_second": 304.237, |
|
"eval_steps_per_second": 38.037, |
|
"step": 10124 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.12933349609375, |
|
"learning_rate": 4.271093905102244e-05, |
|
"loss": 0.3762, |
|
"step": 15186 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.4071777548010412, |
|
"eval_loss": 0.43943917751312256, |
|
"eval_rouge1": 0.6630887207777014, |
|
"eval_rouge2": 0.4093439150738768, |
|
"eval_rougeL": 0.6616155355063926, |
|
"eval_runtime": 32.6487, |
|
"eval_samples_per_second": 309.905, |
|
"eval_steps_per_second": 38.746, |
|
"step": 15186 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.2251956462860107, |
|
"learning_rate": 4.0198530871550526e-05, |
|
"loss": 0.3096, |
|
"step": 20248 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.4282823691369342, |
|
"eval_loss": 0.42224156856536865, |
|
"eval_rouge1": 0.6859331821051708, |
|
"eval_rouge2": 0.4421578092708104, |
|
"eval_rougeL": 0.6848333938194515, |
|
"eval_runtime": 155.4831, |
|
"eval_samples_per_second": 65.075, |
|
"eval_steps_per_second": 8.136, |
|
"step": 20248 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.957134485244751, |
|
"learning_rate": 3.7686122692078615e-05, |
|
"loss": 0.2588, |
|
"step": 25310 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.45182026317042456, |
|
"eval_loss": 0.4118480682373047, |
|
"eval_rouge1": 0.7053306010937058, |
|
"eval_rouge2": 0.4744709667500923, |
|
"eval_rougeL": 0.7041710642490953, |
|
"eval_runtime": 155.3106, |
|
"eval_samples_per_second": 65.147, |
|
"eval_steps_per_second": 8.145, |
|
"step": 25310 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.470399260520935, |
|
"learning_rate": 3.517371451260672e-05, |
|
"loss": 0.2202, |
|
"step": 30372 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.47794608300398783, |
|
"eval_loss": 0.4063816964626312, |
|
"eval_rouge1": 0.7202669607556247, |
|
"eval_rouge2": 0.5013877675516258, |
|
"eval_rougeL": 0.7193022759890708, |
|
"eval_runtime": 155.6504, |
|
"eval_samples_per_second": 65.005, |
|
"eval_steps_per_second": 8.127, |
|
"step": 30372 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 4.297893524169922, |
|
"learning_rate": 3.266130633313481e-05, |
|
"loss": 0.1906, |
|
"step": 35434 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.5006873092034939, |
|
"eval_loss": 0.4055454134941101, |
|
"eval_rouge1": 0.7334615308262713, |
|
"eval_rouge2": 0.5267444586395549, |
|
"eval_rougeL": 0.7327256455313074, |
|
"eval_runtime": 155.6299, |
|
"eval_samples_per_second": 65.013, |
|
"eval_steps_per_second": 8.128, |
|
"step": 35434 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.4034435749053955, |
|
"learning_rate": 3.0148898153662896e-05, |
|
"loss": 0.1676, |
|
"step": 40496 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.5192260163668514, |
|
"eval_loss": 0.40759241580963135, |
|
"eval_rouge1": 0.7432141723163292, |
|
"eval_rouge2": 0.5456323870457441, |
|
"eval_rougeL": 0.7422781215292358, |
|
"eval_runtime": 32.7322, |
|
"eval_samples_per_second": 309.114, |
|
"eval_steps_per_second": 38.647, |
|
"step": 40496 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.9320669174194336, |
|
"learning_rate": 2.763648997419099e-05, |
|
"loss": 0.1502, |
|
"step": 45558 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.5340534683820533, |
|
"eval_loss": 0.41223180294036865, |
|
"eval_rouge1": 0.749556396925456, |
|
"eval_rouge2": 0.5601889867058358, |
|
"eval_rougeL": 0.7486916826979337, |
|
"eval_runtime": 32.4664, |
|
"eval_samples_per_second": 311.645, |
|
"eval_steps_per_second": 38.963, |
|
"step": 45558 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.8942773938179016, |
|
"learning_rate": 2.5124081794719078e-05, |
|
"loss": 0.1371, |
|
"step": 50620 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.5453185871190115, |
|
"eval_loss": 0.4181581735610962, |
|
"eval_rouge1": 0.7533937778503056, |
|
"eval_rouge2": 0.5667501774119681, |
|
"eval_rougeL": 0.7524624935116784, |
|
"eval_runtime": 155.8002, |
|
"eval_samples_per_second": 64.942, |
|
"eval_steps_per_second": 8.119, |
|
"step": 50620 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 1.6828958988189697, |
|
"learning_rate": 2.2611673615247174e-05, |
|
"loss": 0.1275, |
|
"step": 55682 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 0.5522581095773064, |
|
"eval_loss": 0.4227945804595947, |
|
"eval_rouge1": 0.7561517013043558, |
|
"eval_rouge2": 0.5730376329889821, |
|
"eval_rougeL": 0.7551916709183668, |
|
"eval_runtime": 155.5737, |
|
"eval_samples_per_second": 65.037, |
|
"eval_steps_per_second": 8.131, |
|
"step": 55682 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 2.157827854156494, |
|
"learning_rate": 2.0099265435775263e-05, |
|
"loss": 0.1202, |
|
"step": 60744 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 0.5545084381756545, |
|
"eval_loss": 0.4293166697025299, |
|
"eval_rouge1": 0.7579786234496211, |
|
"eval_rouge2": 0.5761973994203244, |
|
"eval_rougeL": 0.7572229493785905, |
|
"eval_runtime": 155.3766, |
|
"eval_samples_per_second": 65.119, |
|
"eval_steps_per_second": 8.142, |
|
"step": 60744 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"step": 60744, |
|
"total_flos": 3.1738359250944e+16, |
|
"train_loss": 0.2816361450291773, |
|
"train_runtime": 6563.2408, |
|
"train_samples_per_second": 123.381, |
|
"train_steps_per_second": 15.425 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 101240, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.1738359250944e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|