|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.705079078674316, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5593, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7268170426065163, |
|
"eval_f1": 0.6589395923680764, |
|
"eval_loss": 0.5025668144226074, |
|
"eval_precision": 0.6658409387222947, |
|
"eval_recall": 0.6542098563375159, |
|
"eval_runtime": 5.1786, |
|
"eval_samples_per_second": 77.048, |
|
"eval_steps_per_second": 9.655, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.789808988571167, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4995, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7543859649122807, |
|
"eval_f1": 0.7226336397684712, |
|
"eval_loss": 0.47967690229415894, |
|
"eval_precision": 0.7148526077097506, |
|
"eval_recall": 0.7412256773958902, |
|
"eval_runtime": 5.0518, |
|
"eval_samples_per_second": 78.982, |
|
"eval_steps_per_second": 9.897, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.145122051239014, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4612, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7644110275689223, |
|
"eval_f1": 0.7262335766423358, |
|
"eval_loss": 0.4281724691390991, |
|
"eval_precision": 0.7199248120300752, |
|
"eval_recall": 0.7358156028368794, |
|
"eval_runtime": 5.0618, |
|
"eval_samples_per_second": 78.825, |
|
"eval_steps_per_second": 9.878, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.555657386779785, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4019, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8295739348370927, |
|
"eval_f1": 0.7933776044839771, |
|
"eval_loss": 0.3933873772621155, |
|
"eval_precision": 0.7949020208205757, |
|
"eval_recall": 0.7919167121294781, |
|
"eval_runtime": 5.0787, |
|
"eval_samples_per_second": 78.564, |
|
"eval_steps_per_second": 9.845, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.5041733980178833, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.3665, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7969924812030075, |
|
"eval_f1": 0.772043420300895, |
|
"eval_loss": 0.42343708872795105, |
|
"eval_precision": 0.7618072289156627, |
|
"eval_recall": 0.7963720676486634, |
|
"eval_runtime": 5.0556, |
|
"eval_samples_per_second": 78.922, |
|
"eval_steps_per_second": 9.89, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.9622180461883545, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.334, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8195488721804511, |
|
"eval_f1": 0.788441145281018, |
|
"eval_loss": 0.3723360300064087, |
|
"eval_precision": 0.7816537467700257, |
|
"eval_recall": 0.7973267866884888, |
|
"eval_runtime": 5.0636, |
|
"eval_samples_per_second": 78.798, |
|
"eval_steps_per_second": 9.874, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.7456966042518616, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3263, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8345864661654135, |
|
"eval_f1": 0.8086080586080586, |
|
"eval_loss": 0.37042734026908875, |
|
"eval_precision": 0.7989898989898989, |
|
"eval_recall": 0.8229678123295144, |
|
"eval_runtime": 5.0647, |
|
"eval_samples_per_second": 78.78, |
|
"eval_steps_per_second": 9.872, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.445054531097412, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3076, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.8160386984618873, |
|
"eval_loss": 0.352139413356781, |
|
"eval_precision": 0.8152632848784607, |
|
"eval_recall": 0.8168303327877796, |
|
"eval_runtime": 5.1593, |
|
"eval_samples_per_second": 77.336, |
|
"eval_steps_per_second": 9.691, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 5.338367462158203, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.298, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.8186863532409097, |
|
"eval_loss": 0.35223379731178284, |
|
"eval_precision": 0.8138123167155425, |
|
"eval_recall": 0.8243316966721222, |
|
"eval_runtime": 5.0524, |
|
"eval_samples_per_second": 78.972, |
|
"eval_steps_per_second": 9.896, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 6.702072620391846, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2923, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8263588263588264, |
|
"eval_loss": 0.3374755382537842, |
|
"eval_precision": 0.8289473684210527, |
|
"eval_recall": 0.8239225313693399, |
|
"eval_runtime": 5.0718, |
|
"eval_samples_per_second": 78.67, |
|
"eval_steps_per_second": 9.858, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 6.563529968261719, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2689, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8357422474382676, |
|
"eval_loss": 0.3392201364040375, |
|
"eval_precision": 0.8319228265372551, |
|
"eval_recall": 0.8399709038006911, |
|
"eval_runtime": 5.0641, |
|
"eval_samples_per_second": 78.79, |
|
"eval_steps_per_second": 9.873, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 14.090389251708984, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2686, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8372817261706151, |
|
"eval_loss": 0.34843868017196655, |
|
"eval_precision": 0.8308913308913308, |
|
"eval_recall": 0.8449718130569195, |
|
"eval_runtime": 5.0809, |
|
"eval_samples_per_second": 78.53, |
|
"eval_steps_per_second": 9.841, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.997685432434082, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.2726, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8298403801632752, |
|
"eval_loss": 0.3257535398006439, |
|
"eval_precision": 0.8315523576240049, |
|
"eval_recall": 0.8281960356428442, |
|
"eval_runtime": 5.0613, |
|
"eval_samples_per_second": 78.833, |
|
"eval_steps_per_second": 9.879, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 10.811493873596191, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2713, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8341332527115377, |
|
"eval_loss": 0.324627548456192, |
|
"eval_precision": 0.8333132275770553, |
|
"eval_recall": 0.8349699945444626, |
|
"eval_runtime": 5.0668, |
|
"eval_samples_per_second": 78.748, |
|
"eval_steps_per_second": 9.868, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.337926983833313, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2577, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8323529411764705, |
|
"eval_loss": 0.3306790590286255, |
|
"eval_precision": 0.8292704679231822, |
|
"eval_recall": 0.8356973995271868, |
|
"eval_runtime": 5.0474, |
|
"eval_samples_per_second": 79.051, |
|
"eval_steps_per_second": 9.906, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 6.205774784088135, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2519, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8365204824303285, |
|
"eval_loss": 0.3305480182170868, |
|
"eval_precision": 0.8313636363636363, |
|
"eval_recall": 0.8424713584288053, |
|
"eval_runtime": 5.1558, |
|
"eval_samples_per_second": 77.389, |
|
"eval_steps_per_second": 9.698, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 7.098486423492432, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2488, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8246499363520641, |
|
"eval_loss": 0.3233925998210907, |
|
"eval_precision": 0.8246499363520641, |
|
"eval_recall": 0.8246499363520641, |
|
"eval_runtime": 5.0648, |
|
"eval_samples_per_second": 78.779, |
|
"eval_steps_per_second": 9.872, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 4.508778095245361, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2546, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8391129032258065, |
|
"eval_loss": 0.32470664381980896, |
|
"eval_precision": 0.8345705196182396, |
|
"eval_recall": 0.8442444080741953, |
|
"eval_runtime": 5.0744, |
|
"eval_samples_per_second": 78.63, |
|
"eval_steps_per_second": 9.853, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.409043073654175, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2463, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8306964902709584, |
|
"eval_loss": 0.3204318881034851, |
|
"eval_precision": 0.8306964902709584, |
|
"eval_recall": 0.8306964902709584, |
|
"eval_runtime": 5.0451, |
|
"eval_samples_per_second": 79.087, |
|
"eval_steps_per_second": 9.911, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 8.363502502441406, |
|
"learning_rate": 0.0, |
|
"loss": 0.2458, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8349466368826011, |
|
"eval_loss": 0.32170674204826355, |
|
"eval_precision": 0.8325716845878136, |
|
"eval_recall": 0.8374704491725768, |
|
"eval_runtime": 5.0575, |
|
"eval_samples_per_second": 78.893, |
|
"eval_steps_per_second": 9.886, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7609911792720000.0, |
|
"train_loss": 0.3216621422376789, |
|
"train_runtime": 1957.5673, |
|
"train_samples_per_second": 37.169, |
|
"train_steps_per_second": 1.246 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7609911792720000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|