llama_3b_prompt_synth_sbdh_mlc / trainer_state.json
avijit's picture
Upload folder using huggingface_hub
38fba74 verified
{
"best_metric": 0.9220723199736294,
"best_model_checkpoint": "./saved_models/llama_3b_prompt_sbdh_gpt4_v2_0/checkpoint-144",
"epoch": 6.0,
"eval_steps": 500,
"global_step": 144,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 8.041247367858887,
"learning_rate": 3.3333333333333335e-05,
"loss": 3.9133,
"step": 24
},
{
"epoch": 1.0,
"eval_acc_macro": 0.4694269521783949,
"eval_acc_micro": 0.509025270758086,
"eval_auc_macro": 0.9508939125710083,
"eval_auc_micro": 0.9142539505353097,
"eval_f1_at_5": 0.39182832882982277,
"eval_f1_at_8": 0.2863186309798368,
"eval_f1_macro": 0.6055711778382781,
"eval_f1_micro": 0.6746411483252942,
"eval_loss": 0.25163671374320984,
"eval_prec_at_5": 0.2502283105022831,
"eval_prec_at_8": 0.168236301369863,
"eval_prec_macro": 0.8351317849349306,
"eval_prec_micro": 0.7975113122171044,
"eval_rec_at_5": 0.9025875190258752,
"eval_rec_at_8": 0.9604261796042617,
"eval_rec_macro": 0.5519678498265336,
"eval_rec_micro": 0.5845771144278122,
"eval_runtime": 36.2308,
"eval_samples_per_second": 24.178,
"eval_steps_per_second": 3.036,
"eval_threshold": -0.625,
"step": 24
},
{
"epoch": 2.0,
"grad_norm": 1.1691702604293823,
"learning_rate": 6.666666666666667e-05,
"loss": 0.1193,
"step": 48
},
{
"epoch": 2.0,
"eval_acc_macro": 0.8113749934046343,
"eval_acc_micro": 0.8189149560116702,
"eval_auc_macro": 0.9940036594427881,
"eval_auc_micro": 0.9939886141325751,
"eval_f1_at_5": 0.42918048633589306,
"eval_f1_at_8": 0.29274329072668936,
"eval_f1_macro": 0.8921678080506279,
"eval_f1_micro": 0.900443369608956,
"eval_loss": 0.06727338582277298,
"eval_prec_at_5": 0.27488584474885847,
"eval_prec_at_8": 0.1720890410958904,
"eval_prec_macro": 0.8696485876912569,
"eval_prec_micro": 0.8760784313724803,
"eval_rec_at_5": 0.978310502283105,
"eval_rec_at_8": 0.9794520547945206,
"eval_rec_macro": 0.9217289354703175,
"eval_rec_micro": 0.9262023217246329,
"eval_runtime": 121.244,
"eval_samples_per_second": 7.225,
"eval_steps_per_second": 0.907,
"eval_threshold": -1.125,
"step": 48
},
{
"epoch": 3.0,
"grad_norm": 0.8648784160614014,
"learning_rate": 0.0001,
"loss": 0.0587,
"step": 72
},
{
"epoch": 3.0,
"eval_acc_macro": 0.8392595891870926,
"eval_acc_micro": 0.849157733537454,
"eval_auc_macro": 0.9951075369988027,
"eval_auc_micro": 0.9959963596303174,
"eval_f1_at_5": 0.4298468624753246,
"eval_f1_at_8": 0.29274329072668936,
"eval_f1_macro": 0.9088894089226979,
"eval_f1_micro": 0.9184265010351206,
"eval_loss": 0.05153823271393776,
"eval_prec_at_5": 0.2753424657534247,
"eval_prec_at_8": 0.1720890410958904,
"eval_prec_macro": 0.9249501329228883,
"eval_prec_micro": 0.9172870140611317,
"eval_rec_at_5": 0.9794520547945206,
"eval_rec_at_8": 0.9794520547945206,
"eval_rec_macro": 0.9031612618645798,
"eval_rec_micro": 0.9195688225538209,
"eval_runtime": 118.514,
"eval_samples_per_second": 7.392,
"eval_steps_per_second": 0.928,
"eval_threshold": 0.125,
"step": 72
},
{
"epoch": 4.0,
"grad_norm": 0.2277711182832718,
"learning_rate": 9.411764705882353e-05,
"loss": 0.0465,
"step": 96
},
{
"epoch": 4.0,
"eval_acc_macro": 0.8500885385546534,
"eval_acc_micro": 0.8541033434649806,
"eval_auc_macro": 0.9952461152918444,
"eval_auc_micro": 0.9963732184410706,
"eval_f1_at_5": 0.4295136788854873,
"eval_f1_at_8": 0.29274329072668936,
"eval_f1_macro": 0.9175170894131637,
"eval_f1_micro": 0.9213114754097605,
"eval_loss": 0.05052410811185837,
"eval_prec_at_5": 0.2751141552511416,
"eval_prec_at_8": 0.1720890410958904,
"eval_prec_macro": 0.9087740459598467,
"eval_prec_micro": 0.9108589951376895,
"eval_rec_at_5": 0.9788812785388128,
"eval_rec_at_8": 0.9794520547945206,
"eval_rec_macro": 0.9275029071213349,
"eval_rec_micro": 0.9320066334990935,
"eval_runtime": 112.7846,
"eval_samples_per_second": 7.767,
"eval_steps_per_second": 0.975,
"eval_threshold": 0.125,
"step": 96
},
{
"epoch": 5.0,
"grad_norm": 0.3336959481239319,
"learning_rate": 8.823529411764706e-05,
"loss": 0.0368,
"step": 120
},
{
"epoch": 5.0,
"eval_acc_macro": 0.8560051578711357,
"eval_acc_micro": 0.8632148377124526,
"eval_auc_macro": 0.9960314271621913,
"eval_auc_micro": 0.9969526516710807,
"eval_f1_at_5": 0.4298468624753246,
"eval_f1_at_8": 0.29274329072668936,
"eval_f1_macro": 0.920584293633742,
"eval_f1_micro": 0.9265864786394916,
"eval_loss": 0.047832097858190536,
"eval_prec_at_5": 0.2753424657534247,
"eval_prec_at_8": 0.1720890410958904,
"eval_prec_macro": 0.9209783471417811,
"eval_prec_micro": 0.9269709543567695,
"eval_rec_at_5": 0.9794520547945206,
"eval_rec_at_8": 0.9794520547945206,
"eval_rec_macro": 0.9204814122555302,
"eval_rec_micro": 0.9262023217246329,
"eval_runtime": 116.9753,
"eval_samples_per_second": 7.489,
"eval_steps_per_second": 0.94,
"eval_threshold": 0.125,
"step": 120
},
{
"epoch": 6.0,
"grad_norm": 0.15532971918582916,
"learning_rate": 8.23529411764706e-05,
"loss": 0.0307,
"step": 144
},
{
"epoch": 6.0,
"eval_acc_macro": 0.8581393259022515,
"eval_acc_micro": 0.8658346333852679,
"eval_auc_macro": 0.9960477018781981,
"eval_auc_micro": 0.9967622842874246,
"eval_f1_at_5": 0.4298468624753246,
"eval_f1_at_8": 0.29274329072668936,
"eval_f1_macro": 0.9220723199736294,
"eval_f1_micro": 0.9280936454848722,
"eval_loss": 0.05041336640715599,
"eval_prec_at_5": 0.2753424657534247,
"eval_prec_at_8": 0.1720890410958904,
"eval_prec_macro": 0.9315146645321158,
"eval_prec_micro": 0.9359190556491622,
"eval_rec_at_5": 0.9794520547945206,
"eval_rec_at_8": 0.9794520547945206,
"eval_rec_macro": 0.9137683032294842,
"eval_rec_micro": 0.9203980099501724,
"eval_runtime": 118.5157,
"eval_samples_per_second": 7.391,
"eval_steps_per_second": 0.928,
"eval_threshold": 1.125,
"step": 144
}
],
"logging_steps": 500,
"max_steps": 480,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.653734367119278e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}