climamba_prompt_synth_sbdh_mlc / trainer_state.json
avijit's picture
Upload folder using huggingface_hub
da78c38 verified
{
"best_metric": 0.9063403443920213,
"best_model_checkpoint": "./saved_models/climamba_prompt_sbdh_gpt4_v2_0/checkpoint-192",
"epoch": 8.0,
"eval_steps": 500,
"global_step": 192,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 2.0851898193359375,
"learning_rate": 0.0003,
"loss": 1.4049,
"step": 24
},
{
"epoch": 1.0,
"eval_acc_macro": 0.05554596988321858,
"eval_acc_micro": 0.17600550017187433,
"eval_auc_macro": 0.6058912515968368,
"eval_auc_micro": 0.6488212280527301,
"eval_f1_at_5": 0.25079507780987353,
"eval_f1_at_8": 0.24432286984749932,
"eval_f1_macro": 0.09066299141125865,
"eval_f1_micro": 0.2993276819643204,
"eval_loss": 0.394575834274292,
"eval_prec_at_5": 0.16187214611872147,
"eval_prec_at_8": 0.1436929223744292,
"eval_prec_macro": 0.05666691972017087,
"eval_prec_micro": 0.23115124153497826,
"eval_rec_at_5": 0.5565068493150684,
"eval_rec_at_8": 0.8152587519025876,
"eval_rec_macro": 0.22834645669278442,
"eval_rec_micro": 0.4245439469319714,
"eval_runtime": 6.3514,
"eval_samples_per_second": 137.922,
"eval_steps_per_second": 17.319,
"step": 24
},
{
"epoch": 2.0,
"grad_norm": 0.4896126985549927,
"learning_rate": 0.0003,
"loss": 0.3397,
"step": 48
},
{
"epoch": 2.0,
"eval_acc_macro": 0.5307003853873716,
"eval_acc_micro": 0.5814606741572625,
"eval_auc_macro": 0.9303990338907523,
"eval_auc_micro": 0.9371828621061181,
"eval_f1_at_5": 0.4088221844905389,
"eval_f1_at_8": 0.28901449364842885,
"eval_f1_macro": 0.6465579930529745,
"eval_f1_micro": 0.7353463587921194,
"eval_loss": 0.18900679051876068,
"eval_prec_at_5": 0.2614155251141553,
"eval_prec_at_8": 0.16980593607305935,
"eval_prec_macro": 0.7313857838097021,
"eval_prec_micro": 0.7915869980878784,
"eval_rec_at_5": 0.9374048706240486,
"eval_rec_at_8": 0.9699391171993911,
"eval_rec_macro": 0.6410238611610916,
"eval_rec_micro": 0.6865671641790475,
"eval_runtime": 6.4111,
"eval_samples_per_second": 136.637,
"eval_steps_per_second": 17.158,
"step": 48
},
{
"epoch": 3.0,
"grad_norm": 0.1705099642276764,
"learning_rate": 0.0003,
"loss": 0.0916,
"step": 72
},
{
"epoch": 3.0,
"eval_acc_macro": 0.8244239681810236,
"eval_acc_micro": 0.8311011904761286,
"eval_auc_macro": 0.9919756925761466,
"eval_auc_micro": 0.9920428839397825,
"eval_f1_at_5": 0.42784762619628447,
"eval_f1_at_8": 0.29274329072668936,
"eval_f1_macro": 0.9019972941167671,
"eval_f1_micro": 0.907761072734587,
"eval_loss": 0.0666864886879921,
"eval_prec_at_5": 0.273972602739726,
"eval_prec_at_8": 0.1720890410958904,
"eval_prec_macro": 0.8888004919536745,
"eval_prec_micro": 0.8900398406373793,
"eval_rec_at_5": 0.976027397260274,
"eval_rec_at_8": 0.9794520547945206,
"eval_rec_macro": 0.9176060797993615,
"eval_rec_micro": 0.9262023217246329,
"eval_runtime": 6.4724,
"eval_samples_per_second": 135.345,
"eval_steps_per_second": 16.995,
"step": 72
},
{
"epoch": 4.0,
"grad_norm": 0.16448086500167847,
"learning_rate": 0.0003,
"loss": 0.0471,
"step": 96
},
{
"epoch": 4.0,
"eval_acc_macro": 0.8271437680276396,
"eval_acc_micro": 0.8323529411764093,
"eval_auc_macro": 0.993671108576282,
"eval_auc_micro": 0.9937186337101654,
"eval_f1_at_5": 0.4285323653750887,
"eval_f1_at_8": 0.29274329072668936,
"eval_f1_macro": 0.9038769548624298,
"eval_f1_micro": 0.9085072231138916,
"eval_loss": 0.06305181980133057,
"eval_prec_at_5": 0.27442922374429224,
"eval_prec_at_8": 0.1720890410958904,
"eval_prec_macro": 0.8786407478949387,
"eval_prec_micro": 0.8802488335924665,
"eval_rec_at_5": 0.977359208523592,
"eval_rec_at_8": 0.9794520547945206,
"eval_rec_macro": 0.9344636739690416,
"eval_rec_micro": 0.9386401326699055,
"eval_runtime": 6.3196,
"eval_samples_per_second": 138.617,
"eval_steps_per_second": 17.406,
"step": 96
},
{
"epoch": 5.0,
"grad_norm": 0.22940804064273834,
"learning_rate": 0.0003,
"loss": 0.0292,
"step": 120
},
{
"epoch": 5.0,
"eval_acc_macro": 0.8308612963912455,
"eval_acc_micro": 0.8424710424709774,
"eval_auc_macro": 0.9930217118288992,
"eval_auc_micro": 0.9944139892271574,
"eval_f1_at_5": 0.4295136788854873,
"eval_f1_at_8": 0.29274329072668936,
"eval_f1_macro": 0.90558341223627,
"eval_f1_micro": 0.9145012573343744,
"eval_loss": 0.06955315172672272,
"eval_prec_at_5": 0.2751141552511416,
"eval_prec_at_8": 0.1720890410958904,
"eval_prec_macro": 0.9134147654048902,
"eval_prec_micro": 0.9245762711863623,
"eval_rec_at_5": 0.9788812785388128,
"eval_rec_at_8": 0.9794520547945206,
"eval_rec_macro": 0.9000407679288313,
"eval_rec_micro": 0.9046434494194938,
"eval_runtime": 6.3259,
"eval_samples_per_second": 138.479,
"eval_steps_per_second": 17.389,
"step": 120
},
{
"epoch": 6.0,
"grad_norm": 0.14079643785953522,
"learning_rate": 0.0003,
"loss": 0.0169,
"step": 144
},
{
"epoch": 6.0,
"eval_acc_macro": 0.8220596818065521,
"eval_acc_micro": 0.8319327731091801,
"eval_auc_macro": 0.9927035556143388,
"eval_auc_micro": 0.9939625962172801,
"eval_f1_at_5": 0.4277378477874334,
"eval_f1_at_8": 0.29274329072668936,
"eval_f1_macro": 0.8993295914850711,
"eval_f1_micro": 0.9082568807338691,
"eval_loss": 0.07188171148300171,
"eval_prec_at_5": 0.27397260273972607,
"eval_prec_at_8": 0.1720890410958904,
"eval_prec_macro": 0.9063614647378323,
"eval_prec_micro": 0.913590604026769,
"eval_rec_at_5": 0.9748858447488584,
"eval_rec_at_8": 0.9794520547945206,
"eval_rec_macro": 0.8937033781116202,
"eval_rec_micro": 0.9029850746267908,
"eval_runtime": 6.3449,
"eval_samples_per_second": 138.064,
"eval_steps_per_second": 17.337,
"step": 144
},
{
"epoch": 7.0,
"grad_norm": 0.12200725823640823,
"learning_rate": 0.0003,
"loss": 0.0095,
"step": 168
},
{
"epoch": 7.0,
"eval_acc_macro": 0.8261283592776246,
"eval_acc_micro": 0.8320433436531863,
"eval_auc_macro": 0.9914073448442141,
"eval_auc_micro": 0.9922562843066707,
"eval_f1_at_5": 0.42747783226694747,
"eval_f1_at_8": 0.2925113305987028,
"eval_f1_macro": 0.9026710595366999,
"eval_f1_micro": 0.9083227714405654,
"eval_loss": 0.09724259376525879,
"eval_prec_at_5": 0.27374429223744295,
"eval_prec_at_8": 0.17194634703196346,
"eval_prec_macro": 0.9186608707729786,
"eval_prec_micro": 0.9259259259258461,
"eval_rec_at_5": 0.975076103500761,
"eval_rec_at_8": 0.9788812785388128,
"eval_rec_macro": 0.8890001231603133,
"eval_rec_micro": 0.8913764510778697,
"eval_runtime": 6.3561,
"eval_samples_per_second": 137.82,
"eval_steps_per_second": 17.306,
"step": 168
},
{
"epoch": 8.0,
"grad_norm": 0.1273776739835739,
"learning_rate": 0.0003,
"loss": 0.0061,
"step": 192
},
{
"epoch": 8.0,
"eval_acc_macro": 0.8331638553801589,
"eval_acc_micro": 0.8417047184169831,
"eval_auc_macro": 0.9887523718801591,
"eval_auc_micro": 0.9910290762677764,
"eval_f1_at_5": 0.42681135828205624,
"eval_f1_at_8": 0.29225388037164507,
"eval_f1_macro": 0.9063403443920213,
"eval_f1_micro": 0.9140495867767839,
"eval_loss": 0.09798464179039001,
"eval_prec_at_5": 0.2732876712328767,
"eval_prec_at_8": 0.17180365296803654,
"eval_prec_macro": 0.9010794509609382,
"eval_prec_micro": 0.9110378912684587,
"eval_rec_at_5": 0.9739345509893454,
"eval_rec_at_8": 0.9777397260273972,
"eval_rec_macro": 0.9130526960570995,
"eval_rec_micro": 0.9170812603647663,
"eval_runtime": 6.4145,
"eval_samples_per_second": 136.565,
"eval_steps_per_second": 17.149,
"step": 192
}
],
"logging_steps": 500,
"max_steps": 960,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}