Mulaqua / trainer_state.json
ndhieunguyen's picture
Upload folder using huggingface_hub
1513aa7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 30.0,
"eval_steps": 500,
"global_step": 1200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.7807692307692308,
"eval_balanced_acc": 0.8043785310734464,
"eval_f1": 0.4123711340206186,
"eval_fn": 4,
"eval_fp": 53,
"eval_gmean": 0.8038572265508136,
"eval_loss": 0.4396936893463135,
"eval_matthews_correlation": 0.3921239795221355,
"eval_precision": 0.273972602739726,
"eval_recall": 0.8333333333333334,
"eval_roc_auc": 0.9016596045197741,
"eval_runtime": 3.587,
"eval_samples_per_second": 72.484,
"eval_specificity": 0.7754237288135594,
"eval_steps_per_second": 0.836,
"eval_tn": 183,
"eval_tp": 20,
"step": 40
},
{
"epoch": 2.0,
"eval_accuracy": 0.9076923076923077,
"eval_balanced_acc": 0.7807203389830508,
"eval_f1": 0.5555555555555556,
"eval_fn": 9,
"eval_fp": 15,
"eval_gmean": 0.7650329559756321,
"eval_loss": 0.25222039222717285,
"eval_matthews_correlation": 0.5086756541742437,
"eval_precision": 0.5,
"eval_recall": 0.625,
"eval_roc_auc": 0.9323799435028248,
"eval_runtime": 1.3968,
"eval_samples_per_second": 186.135,
"eval_specificity": 0.9364406779661016,
"eval_steps_per_second": 2.148,
"eval_tn": 221,
"eval_tp": 15,
"step": 80
},
{
"epoch": 3.0,
"eval_accuracy": 0.8730769230769231,
"eval_balanced_acc": 0.8365112994350282,
"eval_f1": 0.5352112676056338,
"eval_fn": 5,
"eval_fp": 28,
"eval_gmean": 0.8353083939446259,
"eval_loss": 0.335420697927475,
"eval_matthews_correlation": 0.5062354323287003,
"eval_precision": 0.40425531914893614,
"eval_recall": 0.7916666666666666,
"eval_roc_auc": 0.9120762711864406,
"eval_runtime": 1.3956,
"eval_samples_per_second": 186.302,
"eval_specificity": 0.8813559322033898,
"eval_steps_per_second": 2.15,
"eval_tn": 208,
"eval_tp": 19,
"step": 120
},
{
"epoch": 4.0,
"eval_accuracy": 0.8769230769230769,
"eval_balanced_acc": 0.7450564971751412,
"eval_f1": 0.4666666666666666,
"eval_fn": 10,
"eval_fp": 22,
"eval_gmean": 0.7272927899133107,
"eval_loss": 0.28464266657829285,
"eval_matthews_correlation": 0.41075473632357545,
"eval_precision": 0.3888888888888889,
"eval_recall": 0.5833333333333334,
"eval_roc_auc": 0.8908898305084745,
"eval_runtime": 1.4016,
"eval_samples_per_second": 185.508,
"eval_specificity": 0.9067796610169492,
"eval_steps_per_second": 2.14,
"eval_tn": 214,
"eval_tp": 14,
"step": 160
},
{
"epoch": 5.0,
"eval_accuracy": 0.926923076923077,
"eval_balanced_acc": 0.7538841807909604,
"eval_f1": 0.5777777777777778,
"eval_fn": 11,
"eval_fp": 8,
"eval_gmean": 0.7233982891504256,
"eval_loss": 0.20127272605895996,
"eval_matthews_correlation": 0.5394095877061748,
"eval_precision": 0.6190476190476191,
"eval_recall": 0.5416666666666666,
"eval_roc_auc": 0.9207274011299434,
"eval_runtime": 1.3962,
"eval_samples_per_second": 186.223,
"eval_specificity": 0.9661016949152542,
"eval_steps_per_second": 2.149,
"eval_tn": 228,
"eval_tp": 13,
"step": 200
},
{
"epoch": 6.0,
"eval_accuracy": 0.9115384615384615,
"eval_balanced_acc": 0.8202683615819208,
"eval_f1": 0.5964912280701754,
"eval_fn": 7,
"eval_fp": 16,
"eval_gmean": 0.8125950618009421,
"eval_loss": 0.24586564302444458,
"eval_matthews_correlation": 0.5569750709808192,
"eval_precision": 0.5151515151515151,
"eval_recall": 0.7083333333333334,
"eval_roc_auc": 0.8983050847457626,
"eval_runtime": 1.3991,
"eval_samples_per_second": 185.831,
"eval_specificity": 0.9322033898305084,
"eval_steps_per_second": 2.144,
"eval_tn": 220,
"eval_tp": 17,
"step": 240
},
{
"epoch": 7.0,
"eval_accuracy": 0.9230769230769231,
"eval_balanced_acc": 0.6956214689265536,
"eval_f1": 0.5,
"eval_fn": 14,
"eval_fp": 6,
"eval_gmean": 0.6372389240525752,
"eval_loss": 0.222215473651886,
"eval_matthews_correlation": 0.47125202479303824,
"eval_precision": 0.625,
"eval_recall": 0.4166666666666667,
"eval_roc_auc": 0.9180790960451978,
"eval_runtime": 1.3975,
"eval_samples_per_second": 186.041,
"eval_specificity": 0.9745762711864406,
"eval_steps_per_second": 2.147,
"eval_tn": 230,
"eval_tp": 10,
"step": 280
},
{
"epoch": 8.0,
"eval_accuracy": 0.9153846153846154,
"eval_balanced_acc": 0.6913841807909604,
"eval_f1": 0.4761904761904762,
"eval_fn": 14,
"eval_fp": 8,
"eval_gmean": 0.6344622706523658,
"eval_loss": 0.2277352660894394,
"eval_matthews_correlation": 0.43646931798694205,
"eval_precision": 0.5555555555555556,
"eval_recall": 0.4166666666666667,
"eval_roc_auc": 0.934322033898305,
"eval_runtime": 1.3984,
"eval_samples_per_second": 185.931,
"eval_specificity": 0.9661016949152542,
"eval_steps_per_second": 2.145,
"eval_tn": 228,
"eval_tp": 10,
"step": 320
},
{
"epoch": 9.0,
"eval_accuracy": 0.9,
"eval_balanced_acc": 0.8139124293785311,
"eval_f1": 0.5666666666666667,
"eval_fn": 7,
"eval_fp": 19,
"eval_gmean": 0.8070356232379964,
"eval_loss": 0.29547229409217834,
"eval_matthews_correlation": 0.5261685311119,
"eval_precision": 0.4722222222222222,
"eval_recall": 0.7083333333333334,
"eval_roc_auc": 0.9141949152542372,
"eval_runtime": 1.4006,
"eval_samples_per_second": 185.628,
"eval_specificity": 0.9194915254237288,
"eval_steps_per_second": 2.142,
"eval_tn": 217,
"eval_tp": 17,
"step": 360
},
{
"epoch": 10.0,
"eval_accuracy": 0.9115384615384615,
"eval_balanced_acc": 0.6518361581920904,
"eval_f1": 0.4102564102564102,
"eval_fn": 16,
"eval_fp": 7,
"eval_gmean": 0.5687234193205127,
"eval_loss": 0.2870628833770752,
"eval_matthews_correlation": 0.3769972244726467,
"eval_precision": 0.5333333333333333,
"eval_recall": 0.3333333333333333,
"eval_roc_auc": 0.9037782485875705,
"eval_runtime": 1.3972,
"eval_samples_per_second": 186.081,
"eval_specificity": 0.9703389830508474,
"eval_steps_per_second": 2.147,
"eval_tn": 229,
"eval_tp": 8,
"step": 400
},
{
"epoch": 11.0,
"eval_accuracy": 0.9153846153846154,
"eval_balanced_acc": 0.8036723163841808,
"eval_f1": 0.5925925925925926,
"eval_fn": 8,
"eval_fp": 14,
"eval_gmean": 0.7919082295744858,
"eval_loss": 0.29901543259620667,
"eval_matthews_correlation": 0.5502654875344019,
"eval_precision": 0.5333333333333333,
"eval_recall": 0.6666666666666666,
"eval_roc_auc": 0.9090748587570622,
"eval_runtime": 1.3984,
"eval_samples_per_second": 185.929,
"eval_specificity": 0.940677966101695,
"eval_steps_per_second": 2.145,
"eval_tn": 222,
"eval_tp": 16,
"step": 440
},
{
"epoch": 12.0,
"eval_accuracy": 0.9307692307692308,
"eval_balanced_acc": 0.699858757062147,
"eval_f1": 0.5263157894736842,
"eval_fn": 14,
"eval_fp": 4,
"eval_gmean": 0.6400035310637054,
"eval_loss": 0.23940503597259521,
"eval_matthews_correlation": 0.5126054612257419,
"eval_precision": 0.7142857142857143,
"eval_recall": 0.4166666666666667,
"eval_roc_auc": 0.9262005649717514,
"eval_runtime": 1.4001,
"eval_samples_per_second": 185.698,
"eval_specificity": 0.9830508474576272,
"eval_steps_per_second": 2.143,
"eval_tn": 232,
"eval_tp": 10,
"step": 480
},
{
"epoch": 12.5,
"grad_norm": 1.699501872062683,
"learning_rate": 0.00030701754385964913,
"loss": 0.1858,
"step": 500
},
{
"epoch": 13.0,
"eval_accuracy": 0.9,
"eval_balanced_acc": 0.7951977401129944,
"eval_f1": 0.5517241379310345,
"eval_fn": 8,
"eval_fp": 18,
"eval_gmean": 0.7847414918554695,
"eval_loss": 0.2966707646846771,
"eval_matthews_correlation": 0.5068867843147262,
"eval_precision": 0.47058823529411764,
"eval_recall": 0.6666666666666666,
"eval_roc_auc": 0.911723163841808,
"eval_runtime": 1.7684,
"eval_samples_per_second": 147.028,
"eval_specificity": 0.923728813559322,
"eval_steps_per_second": 1.696,
"eval_tn": 218,
"eval_tp": 16,
"step": 520
},
{
"epoch": 14.0,
"eval_accuracy": 0.926923076923077,
"eval_balanced_acc": 0.7351694915254237,
"eval_f1": 0.5581395348837209,
"eval_fn": 12,
"eval_fp": 7,
"eval_gmean": 0.6965410910530863,
"eval_loss": 0.2445915788412094,
"eval_matthews_correlation": 0.5231030791067123,
"eval_precision": 0.631578947368421,
"eval_recall": 0.5,
"eval_roc_auc": 0.9212570621468926,
"eval_runtime": 1.3996,
"eval_samples_per_second": 185.761,
"eval_specificity": 0.9703389830508474,
"eval_steps_per_second": 2.143,
"eval_tn": 229,
"eval_tp": 12,
"step": 560
},
{
"epoch": 15.0,
"eval_accuracy": 0.9230769230769231,
"eval_balanced_acc": 0.7143361581920904,
"eval_f1": 0.5238095238095238,
"eval_fn": 13,
"eval_fp": 7,
"eval_gmean": 0.6668873222404005,
"eval_loss": 0.27021560072898865,
"eval_matthews_correlation": 0.4888134243875901,
"eval_precision": 0.6111111111111112,
"eval_recall": 0.4583333333333333,
"eval_roc_auc": 0.9030720338983051,
"eval_runtime": 1.3991,
"eval_samples_per_second": 185.839,
"eval_specificity": 0.9703389830508474,
"eval_steps_per_second": 2.144,
"eval_tn": 229,
"eval_tp": 11,
"step": 600
},
{
"epoch": 16.0,
"eval_accuracy": 0.9076923076923077,
"eval_balanced_acc": 0.7807203389830508,
"eval_f1": 0.5555555555555556,
"eval_fn": 9,
"eval_fp": 15,
"eval_gmean": 0.7650329559756321,
"eval_loss": 0.32207679748535156,
"eval_matthews_correlation": 0.5086756541742437,
"eval_precision": 0.5,
"eval_recall": 0.625,
"eval_roc_auc": 0.909957627118644,
"eval_runtime": 1.3979,
"eval_samples_per_second": 185.997,
"eval_specificity": 0.9364406779661016,
"eval_steps_per_second": 2.146,
"eval_tn": 221,
"eval_tp": 15,
"step": 640
},
{
"epoch": 17.0,
"eval_accuracy": 0.9192307692307692,
"eval_balanced_acc": 0.7496468926553672,
"eval_f1": 0.5531914893617021,
"eval_fn": 11,
"eval_fp": 10,
"eval_gmean": 0.7202185010575171,
"eval_loss": 0.3051339089870453,
"eval_matthews_correlation": 0.5089553592113205,
"eval_precision": 0.5652173913043478,
"eval_recall": 0.5416666666666666,
"eval_roc_auc": 0.9210805084745763,
"eval_runtime": 1.4041,
"eval_samples_per_second": 185.169,
"eval_specificity": 0.9576271186440678,
"eval_steps_per_second": 2.137,
"eval_tn": 226,
"eval_tp": 13,
"step": 680
},
{
"epoch": 18.0,
"eval_accuracy": 0.9115384615384615,
"eval_balanced_acc": 0.7828389830508475,
"eval_f1": 0.5660377358490567,
"eval_fn": 9,
"eval_fp": 14,
"eval_gmean": 0.7667618462166459,
"eval_loss": 0.2930068373680115,
"eval_matthews_correlation": 0.5201467582857955,
"eval_precision": 0.5172413793103449,
"eval_recall": 0.625,
"eval_roc_auc": 0.9300847457627118,
"eval_runtime": 1.4006,
"eval_samples_per_second": 185.64,
"eval_specificity": 0.940677966101695,
"eval_steps_per_second": 2.142,
"eval_tn": 222,
"eval_tp": 15,
"step": 720
},
{
"epoch": 19.0,
"eval_accuracy": 0.9192307692307692,
"eval_balanced_acc": 0.7496468926553672,
"eval_f1": 0.5531914893617021,
"eval_fn": 11,
"eval_fp": 10,
"eval_gmean": 0.7202185010575171,
"eval_loss": 0.31224697828292847,
"eval_matthews_correlation": 0.5089553592113205,
"eval_precision": 0.5652173913043478,
"eval_recall": 0.5416666666666666,
"eval_roc_auc": 0.9027189265536724,
"eval_runtime": 1.4077,
"eval_samples_per_second": 184.704,
"eval_specificity": 0.9576271186440678,
"eval_steps_per_second": 2.131,
"eval_tn": 226,
"eval_tp": 13,
"step": 760
},
{
"epoch": 20.0,
"eval_accuracy": 0.9153846153846154,
"eval_balanced_acc": 0.784957627118644,
"eval_f1": 0.576923076923077,
"eval_fn": 9,
"eval_fp": 13,
"eval_gmean": 0.7684868469260259,
"eval_loss": 0.32145386934280396,
"eval_matthews_correlation": 0.5321681707195826,
"eval_precision": 0.5357142857142857,
"eval_recall": 0.625,
"eval_roc_auc": 0.9267302259887006,
"eval_runtime": 1.3971,
"eval_samples_per_second": 186.099,
"eval_specificity": 0.9449152542372882,
"eval_steps_per_second": 2.147,
"eval_tn": 223,
"eval_tp": 15,
"step": 800
},
{
"epoch": 21.0,
"eval_accuracy": 0.926923076923077,
"eval_balanced_acc": 0.7725988700564972,
"eval_f1": 0.5957446808510638,
"eval_fn": 10,
"eval_fp": 9,
"eval_gmean": 0.7490577885727302,
"eval_loss": 0.29852786660194397,
"eval_matthews_correlation": 0.5557475775263641,
"eval_precision": 0.6086956521739131,
"eval_recall": 0.5833333333333334,
"eval_roc_auc": 0.929731638418079,
"eval_runtime": 1.4044,
"eval_samples_per_second": 185.127,
"eval_specificity": 0.961864406779661,
"eval_steps_per_second": 2.136,
"eval_tn": 227,
"eval_tp": 14,
"step": 840
},
{
"epoch": 22.0,
"eval_accuracy": 0.9230769230769231,
"eval_balanced_acc": 0.7704802259887006,
"eval_f1": 0.5833333333333334,
"eval_fn": 10,
"eval_fp": 10,
"eval_gmean": 0.747406060457794,
"eval_loss": 0.3123900890350342,
"eval_matthews_correlation": 0.5409604519774012,
"eval_precision": 0.5833333333333334,
"eval_recall": 0.5833333333333334,
"eval_roc_auc": 0.9219632768361582,
"eval_runtime": 1.4011,
"eval_samples_per_second": 185.567,
"eval_specificity": 0.9576271186440678,
"eval_steps_per_second": 2.141,
"eval_tn": 226,
"eval_tp": 14,
"step": 880
},
{
"epoch": 23.0,
"eval_accuracy": 0.9307692307692308,
"eval_balanced_acc": 0.756002824858757,
"eval_f1": 0.5909090909090908,
"eval_fn": 11,
"eval_fp": 7,
"eval_gmean": 0.7249829532381267,
"eval_loss": 0.29459962248802185,
"eval_matthews_correlation": 0.5561804983135704,
"eval_precision": 0.65,
"eval_recall": 0.5416666666666666,
"eval_roc_auc": 0.9156073446327683,
"eval_runtime": 1.7446,
"eval_samples_per_second": 149.032,
"eval_specificity": 0.9703389830508474,
"eval_steps_per_second": 1.72,
"eval_tn": 229,
"eval_tp": 13,
"step": 920
},
{
"epoch": 24.0,
"eval_accuracy": 0.926923076923077,
"eval_balanced_acc": 0.7351694915254237,
"eval_f1": 0.5581395348837209,
"eval_fn": 12,
"eval_fp": 7,
"eval_gmean": 0.6965410910530863,
"eval_loss": 0.3256016969680786,
"eval_matthews_correlation": 0.5231030791067123,
"eval_precision": 0.631578947368421,
"eval_recall": 0.5,
"eval_roc_auc": 0.8930084745762712,
"eval_runtime": 1.4003,
"eval_samples_per_second": 185.678,
"eval_specificity": 0.9703389830508474,
"eval_steps_per_second": 2.142,
"eval_tn": 229,
"eval_tp": 12,
"step": 960
},
{
"epoch": 25.0,
"grad_norm": 0.08078446239233017,
"learning_rate": 8.771929824561403e-05,
"loss": 0.0306,
"step": 1000
},
{
"epoch": 25.0,
"eval_accuracy": 0.926923076923077,
"eval_balanced_acc": 0.7538841807909604,
"eval_f1": 0.5777777777777778,
"eval_fn": 11,
"eval_fp": 8,
"eval_gmean": 0.7233982891504256,
"eval_loss": 0.31878602504730225,
"eval_matthews_correlation": 0.5394095877061748,
"eval_precision": 0.6190476190476191,
"eval_recall": 0.5416666666666666,
"eval_roc_auc": 0.908545197740113,
"eval_runtime": 1.3995,
"eval_samples_per_second": 185.775,
"eval_specificity": 0.9661016949152542,
"eval_steps_per_second": 2.144,
"eval_tn": 228,
"eval_tp": 13,
"step": 1000
},
{
"epoch": 26.0,
"eval_accuracy": 0.9307692307692308,
"eval_balanced_acc": 0.756002824858757,
"eval_f1": 0.5909090909090908,
"eval_fn": 11,
"eval_fp": 7,
"eval_gmean": 0.7249829532381267,
"eval_loss": 0.3249245882034302,
"eval_matthews_correlation": 0.5561804983135704,
"eval_precision": 0.65,
"eval_recall": 0.5416666666666666,
"eval_roc_auc": 0.9027189265536723,
"eval_runtime": 1.3993,
"eval_samples_per_second": 185.806,
"eval_specificity": 0.9703389830508474,
"eval_steps_per_second": 2.144,
"eval_tn": 229,
"eval_tp": 13,
"step": 1040
},
{
"epoch": 27.0,
"eval_accuracy": 0.9192307692307692,
"eval_balanced_acc": 0.7309322033898304,
"eval_f1": 0.5333333333333333,
"eval_fn": 12,
"eval_fp": 9,
"eval_gmean": 0.6934927565518118,
"eval_loss": 0.3429410457611084,
"eval_matthews_correlation": 0.4906451604448377,
"eval_precision": 0.5714285714285714,
"eval_recall": 0.5,
"eval_roc_auc": 0.9027189265536724,
"eval_runtime": 1.4151,
"eval_samples_per_second": 183.736,
"eval_specificity": 0.961864406779661,
"eval_steps_per_second": 2.12,
"eval_tn": 227,
"eval_tp": 12,
"step": 1080
},
{
"epoch": 28.0,
"eval_accuracy": 0.9230769230769231,
"eval_balanced_acc": 0.7517655367231638,
"eval_f1": 0.5652173913043478,
"eval_fn": 11,
"eval_fp": 9,
"eval_gmean": 0.7218101460949754,
"eval_loss": 0.3275804817676544,
"eval_matthews_correlation": 0.5237066395259845,
"eval_precision": 0.5909090909090909,
"eval_recall": 0.5416666666666666,
"eval_roc_auc": 0.9055437853107344,
"eval_runtime": 1.3958,
"eval_samples_per_second": 186.271,
"eval_specificity": 0.961864406779661,
"eval_steps_per_second": 2.149,
"eval_tn": 227,
"eval_tp": 13,
"step": 1120
},
{
"epoch": 29.0,
"eval_accuracy": 0.9307692307692308,
"eval_balanced_acc": 0.756002824858757,
"eval_f1": 0.5909090909090908,
"eval_fn": 11,
"eval_fp": 7,
"eval_gmean": 0.7249829532381267,
"eval_loss": 0.32472190260887146,
"eval_matthews_correlation": 0.5561804983135704,
"eval_precision": 0.65,
"eval_recall": 0.5416666666666666,
"eval_roc_auc": 0.909427966101695,
"eval_runtime": 1.4002,
"eval_samples_per_second": 185.691,
"eval_specificity": 0.9703389830508474,
"eval_steps_per_second": 2.143,
"eval_tn": 229,
"eval_tp": 13,
"step": 1160
},
{
"epoch": 30.0,
"eval_accuracy": 0.9346153846153846,
"eval_balanced_acc": 0.7581214689265536,
"eval_f1": 0.6046511627906976,
"eval_fn": 11,
"eval_fp": 6,
"eval_gmean": 0.7265641611213621,
"eval_loss": 0.32732099294662476,
"eval_matthews_correlation": 0.5741566829234335,
"eval_precision": 0.6842105263157895,
"eval_recall": 0.5416666666666666,
"eval_roc_auc": 0.907662429378531,
"eval_runtime": 1.4048,
"eval_samples_per_second": 185.075,
"eval_specificity": 0.9745762711864406,
"eval_steps_per_second": 2.135,
"eval_tn": 230,
"eval_tp": 13,
"step": 1200
}
],
"logging_steps": 500,
"max_steps": 1200,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.010965840168624e+16,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}