mamba_prompt_synth_sbdh_mlc / trainer_state.json
avijit's picture
Upload folder using huggingface_hub
5d06d18 verified
{
"best_metric": 0.9134487053983307,
"best_model_checkpoint": "./saved_models/mamba_prompt_sbdh_gpt4_v2_0/checkpoint-912",
"epoch": 38.0,
"eval_steps": 500,
"global_step": 912,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 8.483881950378418,
"learning_rate": 0.0003,
"loss": 1.2055,
"step": 24
},
{
"epoch": 1.0,
"eval_acc_macro": 0.07039666648893216,
"eval_acc_micro": 0.17493620640770696,
"eval_auc_macro": 0.7109566412757288,
"eval_auc_micro": 0.6509757698362547,
"eval_f1_at_5": 0.2568182822325969,
"eval_f1_at_8": 0.22372279922068555,
"eval_f1_macro": 0.11555043034927992,
"eval_f1_micro": 0.2977799227799084,
"eval_loss": 0.4126754105091095,
"eval_prec_at_5": 0.16506849315068495,
"eval_prec_at_8": 0.1317066210045662,
"eval_prec_macro": 0.0712520271567465,
"eval_prec_micro": 0.21000680735193294,
"eval_rec_at_5": 0.5781963470319634,
"eval_rec_at_8": 0.7423896499238966,
"eval_rec_macro": 0.3187634216175277,
"eval_rec_micro": 0.5116086235488796,
"eval_runtime": 6.0156,
"eval_samples_per_second": 145.621,
"eval_steps_per_second": 18.286,
"step": 24
},
{
"epoch": 2.0,
"grad_norm": 2.6864304542541504,
"learning_rate": 0.0003,
"loss": 0.3514,
"step": 48
},
{
"epoch": 2.0,
"eval_acc_macro": 0.5158272708489711,
"eval_acc_micro": 0.5578747628083138,
"eval_auc_macro": 0.94707367087266,
"eval_auc_micro": 0.913787855621242,
"eval_f1_at_5": 0.3898187899179248,
"eval_f1_at_8": 0.2799604575624912,
"eval_f1_macro": 0.646736079674674,
"eval_f1_micro": 0.7161997563945827,
"eval_loss": 0.2387184202671051,
"eval_prec_at_5": 0.24908675799086763,
"eval_prec_at_8": 0.1643835616438356,
"eval_prec_macro": 0.6802826117671307,
"eval_prec_micro": 0.7016706443913523,
"eval_rec_at_5": 0.8961187214611872,
"eval_rec_at_8": 0.9429223744292238,
"eval_rec_macro": 0.6832268849511623,
"eval_rec_micro": 0.7313432835820289,
"eval_runtime": 6.048,
"eval_samples_per_second": 144.841,
"eval_steps_per_second": 18.188,
"step": 48
},
{
"epoch": 3.0,
"grad_norm": 2.4849371910095215,
"learning_rate": 0.0003,
"loss": 0.1682,
"step": 72
},
{
"epoch": 3.0,
"eval_acc_macro": 0.7840879005682694,
"eval_acc_micro": 0.8028064992613883,
"eval_auc_macro": 0.9886749124239317,
"eval_auc_micro": 0.990273843904626,
"eval_f1_at_5": 0.42751438858977425,
"eval_f1_at_8": 0.2925113305987028,
"eval_f1_macro": 0.8710656054515781,
"eval_f1_micro": 0.8906185989347897,
"eval_loss": 0.07425953447818756,
"eval_prec_at_5": 0.27374429223744295,
"eval_prec_at_8": 0.17194634703196346,
"eval_prec_macro": 0.878761745264005,
"eval_prec_micro": 0.8801619433197667,
"eval_rec_at_5": 0.9754566210045662,
"eval_rec_at_8": 0.9788812785388128,
"eval_rec_macro": 0.8815695470069095,
"eval_rec_micro": 0.9013266998340878,
"eval_runtime": 6.0775,
"eval_samples_per_second": 144.138,
"eval_steps_per_second": 18.1,
"step": 72
},
{
"epoch": 4.0,
"grad_norm": 1.911177396774292,
"learning_rate": 0.0003,
"loss": 0.0646,
"step": 96
},
{
"epoch": 4.0,
"eval_acc_macro": 0.8321415173591932,
"eval_acc_micro": 0.8384442782347914,
"eval_auc_macro": 0.9920630589905021,
"eval_auc_micro": 0.9932832791412234,
"eval_f1_at_5": 0.4278476261962846,
"eval_f1_at_8": 0.29274329072668936,
"eval_f1_macro": 0.9063058610149609,
"eval_f1_micro": 0.9121236777867442,
"eval_loss": 0.06308761239051819,
"eval_prec_at_5": 0.27397260273972607,
"eval_prec_at_8": 0.1720890410958904,
"eval_prec_macro": 0.8935275833247776,
"eval_prec_micro": 0.8953674121405035,
"eval_rec_at_5": 0.976027397260274,
"eval_rec_at_8": 0.9794520547945206,
"eval_rec_macro": 0.9215877913582228,
"eval_rec_micro": 0.929519071310039,
"eval_runtime": 6.085,
"eval_samples_per_second": 143.962,
"eval_steps_per_second": 18.077,
"step": 96
},
{
"epoch": 5.0,
"grad_norm": 0.14926180243492126,
"learning_rate": 0.0003,
"loss": 0.0416,
"step": 120
},
{
"epoch": 5.0,
"eval_acc_macro": 0.835711620798801,
"eval_acc_micro": 0.844599844599779,
"eval_auc_macro": 0.99367524750647,
"eval_auc_micro": 0.9950181038357179,
"eval_f1_at_5": 0.428217417200729,
"eval_f1_at_8": 0.2925113305987028,
"eval_f1_macro": 0.9086207273252621,
"eval_f1_micro": 0.9157540016848428,
"eval_loss": 0.056059833616018295,
"eval_prec_at_5": 0.2742009132420091,
"eval_prec_at_8": 0.17194634703196346,
"eval_prec_macro": 0.9238859033605576,
"eval_prec_micro": 0.9306506849314271,
"eval_rec_at_5": 0.9769786910197868,
"eval_rec_at_8": 0.9788812785388128,
"eval_rec_macro": 0.8949097880182088,
"eval_rec_micro": 0.9013266998340878,
"eval_runtime": 6.0515,
"eval_samples_per_second": 144.758,
"eval_steps_per_second": 18.177,
"step": 120
},
{
"epoch": 6.0,
"grad_norm": 0.24564415216445923,
"learning_rate": 0.0003,
"loss": 0.0281,
"step": 144
},
{
"epoch": 6.0,
"eval_acc_macro": 0.8427241930303886,
"eval_acc_micro": 0.8486486486485831,
"eval_auc_macro": 0.9930039223823353,
"eval_auc_micro": 0.9943083137218841,
"eval_f1_at_5": 0.42721766176318765,
"eval_f1_at_8": 0.2925113305987028,
"eval_f1_macro": 0.9126603684260771,
"eval_f1_micro": 0.9181286549706835,
"eval_loss": 0.05612677335739136,
"eval_prec_at_5": 0.27351598173515984,
"eval_prec_at_8": 0.17194634703196346,
"eval_prec_macro": 0.9258898765719902,
"eval_prec_micro": 0.9250841750840971,
"eval_rec_at_5": 0.9752663622526635,
"eval_rec_at_8": 0.9788812785388128,
"eval_rec_macro": 0.9012770790636472,
"eval_rec_micro": 0.9112769485903058,
"eval_runtime": 6.0637,
"eval_samples_per_second": 144.466,
"eval_steps_per_second": 18.141,
"step": 144
},
{
"epoch": 7.0,
"grad_norm": 0.17717961966991425,
"learning_rate": 0.0003,
"loss": 0.0163,
"step": 168
},
{
"epoch": 7.0,
"eval_acc_macro": 0.8320704316636002,
"eval_acc_micro": 0.8395155185464921,
"eval_auc_macro": 0.9927596537595381,
"eval_auc_micro": 0.9942892457976611,
"eval_f1_at_5": 0.4287923569488756,
"eval_f1_at_8": 0.29274329072668936,
"eval_f1_macro": 0.9054968756763103,
"eval_f1_micro": 0.9127572016460155,
"eval_loss": 0.07535412162542343,
"eval_prec_at_5": 0.27465753424657535,
"eval_prec_at_8": 0.1720890410958904,
"eval_prec_macro": 0.895837475867586,
"eval_prec_micro": 0.9060457516339129,
"eval_rec_at_5": 0.9771689497716894,
"eval_rec_at_8": 0.9794520547945206,
"eval_rec_macro": 0.9167258374697121,
"eval_rec_micro": 0.9195688225538209,
"eval_runtime": 6.0576,
"eval_samples_per_second": 144.612,
"eval_steps_per_second": 18.159,
"step": 168
},
{
"epoch": 8.0,
"grad_norm": 0.27908530831336975,
"learning_rate": 0.0003,
"loss": 0.0109,
"step": 192
},
{
"epoch": 8.0,
"eval_acc_macro": 0.8295268992370044,
"eval_acc_micro": 0.832209737827653,
"eval_auc_macro": 0.9930334009743317,
"eval_auc_micro": 0.9938628905761329,
"eval_f1_at_5": 0.42918048633589306,
"eval_f1_at_8": 0.29274329072668936,
"eval_f1_macro": 0.9041576767954438,
"eval_f1_micro": 0.908421913327808,
"eval_loss": 0.07922064512968063,
"eval_prec_at_5": 0.27488584474885847,
"eval_prec_at_8": 0.1720890410958904,
"eval_prec_macro": 0.8907484139672496,
"eval_prec_micro": 0.8959677419354116,
"eval_rec_at_5": 0.978310502283105,
"eval_rec_at_8": 0.9794520547945206,
"eval_rec_macro": 0.9213085265888775,
"eval_rec_micro": 0.9212271973465239,
"eval_runtime": 6.0722,
"eval_samples_per_second": 144.265,
"eval_steps_per_second": 18.115,
"step": 192
},
{
"epoch": 9.0,
"grad_norm": 0.14036260545253754,
"learning_rate": 0.0003,
"loss": 0.0074,
"step": 216
},
{
"epoch": 9.0,
"eval_acc_macro": 0.8385687066413228,
"eval_acc_micro": 0.844961240310012,
"eval_auc_macro": 0.9927809510576332,
"eval_auc_micro": 0.9942359625327764,
"eval_f1_at_5": 0.4295136788854873,
"eval_f1_at_8": 0.29274329072668936,
"eval_f1_macro": 0.9094505520360644,
"eval_f1_micro": 0.915966386554545,
"eval_loss": 0.08278516680002213,
"eval_prec_at_5": 0.2751141552511416,
"eval_prec_at_8": 0.1720890410958904,
"eval_prec_macro": 0.9210993789406117,
"eval_prec_micro": 0.928449744463294,
"eval_rec_at_5": 0.9788812785388128,
"eval_rec_at_8": 0.9794520547945206,
"eval_rec_macro": 0.8989352557922053,
"eval_rec_micro": 0.9038142620231423,
"eval_runtime": 6.0864,
"eval_samples_per_second": 143.928,
"eval_steps_per_second": 18.073,
"step": 216
},
{
"epoch": 10.0,
"grad_norm": 0.192245751619339,
"learning_rate": 0.0003,
"loss": 0.0066,
"step": 240
},
{
"epoch": 10.0,
"eval_acc_macro": 0.841830141960627,
"eval_acc_micro": 0.8498475609755449,
"eval_auc_macro": 0.9923313106695196,
"eval_auc_micro": 0.9933376316355039,
"eval_f1_at_5": 0.4275509226486327,
"eval_f1_at_8": 0.2922793659426448,
"eval_f1_macro": 0.9117612718437879,
"eval_f1_micro": 0.9188298310670853,
"eval_loss": 0.0772981271147728,
"eval_prec_at_5": 0.27374429223744295,
"eval_prec_at_8": 0.17180365296803654,
"eval_prec_macro": 0.9114916513520281,
"eval_prec_micro": 0.9131859131858384,
"eval_rec_at_5": 0.9758371385083713,
"eval_rec_at_8": 0.978310502283105,
"eval_rec_macro": 0.9136710504527534,
"eval_rec_micro": 0.92454394693193,
"eval_runtime": 5.9768,
"eval_samples_per_second": 146.566,
"eval_steps_per_second": 18.404,
"step": 240
},
{
"epoch": 11.0,
"grad_norm": 0.11837983131408691,
"learning_rate": 0.0003,
"loss": 0.0057,
"step": 264
},
{
"epoch": 11.0,
"eval_acc_macro": 0.8320358706720068,
"eval_acc_micro": 0.836842105263095,
"eval_auc_macro": 0.990286728874436,
"eval_auc_micro": 0.9913156297458193,
"eval_f1_at_5": 0.42747783226694747,
"eval_f1_at_8": 0.2920219218917755,
"eval_f1_macro": 0.9058723874403682,
"eval_f1_micro": 0.911174785100212,
"eval_loss": 0.10789646208286285,
"eval_prec_at_5": 0.27374429223744295,
"eval_prec_at_8": 0.1716609589041096,
"eval_prec_macro": 0.8914304927691732,
"eval_prec_micro": 0.8997574777687227,
"eval_rec_at_5": 0.975076103500761,
"eval_rec_at_8": 0.9771689497716894,
"eval_rec_macro": 0.9231550617780325,
"eval_rec_micro": 0.922885572139227,
"eval_runtime": 6.0106,
"eval_samples_per_second": 145.742,
"eval_steps_per_second": 18.301,
"step": 264
},
{
"epoch": 12.0,
"grad_norm": 0.16925422847270966,
"learning_rate": 0.0003,
"loss": 0.005,
"step": 288
},
{
"epoch": 12.0,
"eval_acc_macro": 0.8364454962531706,
"eval_acc_micro": 0.8397581254723477,
"eval_auc_macro": 0.9908748071505288,
"eval_auc_micro": 0.9917851996554229,
"eval_f1_at_5": 0.42708974483795964,
"eval_f1_at_8": 0.2925113305987028,
"eval_f1_macro": 0.9084368033771916,
"eval_f1_micro": 0.9129005751848058,
"eval_loss": 0.09441287070512772,
"eval_prec_at_5": 0.27351598173515984,
"eval_prec_at_8": 0.17194634703196346,
"eval_prec_macro": 0.8995515858307809,
"eval_prec_micro": 0.9047231270357569,
"eval_rec_at_5": 0.9739345509893454,
"eval_rec_at_8": 0.9788812785388128,
"eval_rec_macro": 0.9188760380015673,
"eval_rec_micro": 0.9212271973465239,
"eval_runtime": 6.0148,
"eval_samples_per_second": 145.64,
"eval_steps_per_second": 18.288,
"step": 288
},
{
"epoch": 13.0,
"grad_norm": 0.25462788343429565,
"learning_rate": 0.0003,
"loss": 0.0045,
"step": 312
},
{
"epoch": 13.0,
"eval_acc_macro": 0.8219219437469505,
"eval_acc_micro": 0.8327165062915741,
"eval_auc_macro": 0.9892036337462419,
"eval_auc_micro": 0.9914230873000853,
"eval_f1_at_5": 0.42851407428878907,
"eval_f1_at_8": 0.2918154230125642,
"eval_f1_macro": 0.8990693006867976,
"eval_f1_micro": 0.9087237479805405,
"eval_loss": 0.11245805770158768,
"eval_prec_at_5": 0.2744292237442923,
"eval_prec_at_8": 0.17151826484018265,
"eval_prec_macro": 0.872997498581981,
"eval_prec_micro": 0.8858267716534736,
"eval_rec_at_5": 0.9771689497716894,
"eval_rec_at_8": 0.9771689497716894,
"eval_rec_macro": 0.9280030786669166,
"eval_rec_micro": 0.932835820895445,
"eval_runtime": 6.2398,
"eval_samples_per_second": 140.389,
"eval_steps_per_second": 17.629,
"step": 312
},
{
"epoch": 14.0,
"grad_norm": 0.13963262736797333,
"learning_rate": 0.0003,
"loss": 0.0029,
"step": 336
},
{
"epoch": 14.0,
"eval_acc_macro": 0.8398618464555664,
"eval_acc_micro": 0.8437499999999356,
"eval_auc_macro": 0.9909268418920506,
"eval_auc_micro": 0.9918402649693007,
"eval_f1_at_5": 0.42740465274793293,
"eval_f1_at_8": 0.2920473967500829,
"eval_f1_macro": 0.910421950810437,
"eval_f1_micro": 0.9152542372880599,
"eval_loss": 0.13447707891464233,
"eval_prec_at_5": 0.27374429223744295,
"eval_prec_at_8": 0.1716609589041096,
"eval_prec_macro": 0.907525335697792,
"eval_prec_micro": 0.9126133553173196,
"eval_rec_at_5": 0.9743150684931506,
"eval_rec_at_8": 0.9777397260273972,
"eval_rec_macro": 0.9137157897519602,
"eval_rec_micro": 0.9179104477611179,
"eval_runtime": 6.2625,
"eval_samples_per_second": 139.881,
"eval_steps_per_second": 17.565,
"step": 336
},
{
"epoch": 15.0,
"grad_norm": 0.1901983767747879,
"learning_rate": 0.0003,
"loss": 0.0032,
"step": 360
},
{
"epoch": 15.0,
"eval_acc_macro": 0.8357224650035576,
"eval_acc_micro": 0.8455098934550346,
"eval_auc_macro": 0.9892990457525107,
"eval_auc_micro": 0.990561377509615,
"eval_f1_at_5": 0.4254418080308462,
"eval_f1_at_8": 0.2922793659426448,
"eval_f1_macro": 0.9080581868434588,
"eval_f1_micro": 0.9162886597937389,
"eval_loss": 0.10820472985506058,
"eval_prec_at_5": 0.27237442922374433,
"eval_prec_at_8": 0.17180365296803654,
"eval_prec_macro": 0.9057447684648601,
"eval_prec_micro": 0.9114027891713772,
"eval_rec_at_5": 0.9712709284627092,
"eval_rec_at_8": 0.978310502283105,
"eval_rec_macro": 0.9111621041819294,
"eval_rec_micro": 0.9212271973465239,
"eval_runtime": 6.3038,
"eval_samples_per_second": 138.963,
"eval_steps_per_second": 17.45,
"step": 360
},
{
"epoch": 16.0,
"grad_norm": 0.17108500003814697,
"learning_rate": 0.0003,
"loss": 0.0027,
"step": 384
},
{
"epoch": 16.0,
"eval_acc_macro": 0.8297448303381417,
"eval_acc_micro": 0.8398169336383798,
"eval_auc_macro": 0.9902632756647023,
"eval_auc_micro": 0.9922601157120051,
"eval_f1_at_5": 0.4277927620668724,
"eval_f1_at_8": 0.2922793659426448,
"eval_f1_macro": 0.9041211387604378,
"eval_f1_micro": 0.912935323383009,
"eval_loss": 0.12706510722637177,
"eval_prec_at_5": 0.273972602739726,
"eval_prec_at_8": 0.17180365296803654,
"eval_prec_macro": 0.9060353096841967,
"eval_prec_micro": 0.9129353233830089,
"eval_rec_at_5": 0.9754566210045662,
"eval_rec_at_8": 0.978310502283105,
"eval_rec_macro": 0.9032650308288451,
"eval_rec_micro": 0.9129353233830089,
"eval_runtime": 6.7247,
"eval_samples_per_second": 130.266,
"eval_steps_per_second": 16.358,
"step": 384
},
{
"epoch": 17.0,
"grad_norm": 0.06544364243745804,
"learning_rate": 0.0003,
"loss": 0.0029,
"step": 408
},
{
"epoch": 17.0,
"eval_acc_macro": 0.829765891496947,
"eval_acc_micro": 0.8365384615383996,
"eval_auc_macro": 0.9893010896141785,
"eval_auc_micro": 0.9914586391774918,
"eval_f1_at_5": 0.42714459980714947,
"eval_f1_at_8": 0.2922793659426448,
"eval_f1_macro": 0.9037391479019817,
"eval_f1_micro": 0.9109947643978323,
"eval_loss": 0.10541332513093948,
"eval_prec_at_5": 0.27351598173515984,
"eval_prec_at_8": 0.17180365296803654,
"eval_prec_macro": 0.8801020034653725,
"eval_prec_micro": 0.8856695379795704,
"eval_rec_at_5": 0.9745053272450532,
"eval_rec_at_8": 0.978310502283105,
"eval_rec_macro": 0.9310175234124923,
"eval_rec_micro": 0.937810945273554,
"eval_runtime": 6.3322,
"eval_samples_per_second": 138.34,
"eval_steps_per_second": 17.371,
"step": 408
},
{
"epoch": 18.0,
"grad_norm": 0.12380898743867874,
"learning_rate": 0.0003,
"loss": 0.0028,
"step": 432
},
{
"epoch": 18.0,
"eval_acc_macro": 0.8295264706506825,
"eval_acc_micro": 0.8390718562873624,
"eval_auc_macro": 0.9886105199219366,
"eval_auc_micro": 0.9908788495376829,
"eval_f1_at_5": 0.42679308597884263,
"eval_f1_at_8": 0.2918154230125642,
"eval_f1_macro": 0.9042231261610075,
"eval_f1_micro": 0.9124949124948383,
"eval_loss": 0.11824628710746765,
"eval_prec_at_5": 0.2732876712328767,
"eval_prec_at_8": 0.17151826484018265,
"eval_prec_macro": 0.8888519630940449,
"eval_prec_micro": 0.8960831334931337,
"eval_rec_at_5": 0.973744292237443,
"eval_rec_at_8": 0.9771689497716894,
"eval_rec_macro": 0.9215852252593782,
"eval_rec_micro": 0.929519071310039,
"eval_runtime": 6.3155,
"eval_samples_per_second": 138.707,
"eval_steps_per_second": 17.418,
"step": 432
},
{
"epoch": 19.0,
"grad_norm": 0.21812734007835388,
"learning_rate": 0.0003,
"loss": 0.0024,
"step": 456
},
{
"epoch": 19.0,
"eval_acc_macro": 0.829661993373597,
"eval_acc_micro": 0.8388554216866838,
"eval_auc_macro": 0.9894816577253542,
"eval_auc_micro": 0.991705185655646,
"eval_f1_at_5": 0.42714459980714947,
"eval_f1_at_8": 0.2922793659426448,
"eval_f1_macro": 0.9036099130579357,
"eval_f1_micro": 0.9123669123668376,
"eval_loss": 0.1079055592417717,
"eval_prec_at_5": 0.27351598173515984,
"eval_prec_at_8": 0.17180365296803654,
"eval_prec_macro": 0.8959752170714762,
"eval_prec_micro": 0.9012944983818041,
"eval_rec_at_5": 0.9745053272450532,
"eval_rec_at_8": 0.978310502283105,
"eval_rec_macro": 0.9119091133829933,
"eval_rec_micro": 0.9237147595355785,
"eval_runtime": 6.2636,
"eval_samples_per_second": 139.855,
"eval_steps_per_second": 17.562,
"step": 456
},
{
"epoch": 20.0,
"grad_norm": 0.14500826597213745,
"learning_rate": 0.0003,
"loss": 0.0026,
"step": 480
},
{
"epoch": 20.0,
"eval_acc_macro": 0.8388928681043534,
"eval_acc_micro": 0.8470764617690519,
"eval_auc_macro": 0.9914346196687225,
"eval_auc_micro": 0.9929207212736375,
"eval_f1_at_5": 0.42845916764894976,
"eval_f1_at_8": 0.2925113305987028,
"eval_f1_macro": 0.9099987722528428,
"eval_f1_micro": 0.9172077922077178,
"eval_loss": 0.12154436111450195,
"eval_prec_at_5": 0.2744292237442923,
"eval_prec_at_8": 0.17194634703196346,
"eval_prec_macro": 0.8884541003489376,
"eval_prec_micro": 0.898251192368768,
"eval_rec_at_5": 0.9765981735159818,
"eval_rec_at_8": 0.9788812785388128,
"eval_rec_macro": 0.9349802408736304,
"eval_rec_micro": 0.9369817578772025,
"eval_runtime": 6.1697,
"eval_samples_per_second": 141.984,
"eval_steps_per_second": 17.829,
"step": 480
},
{
"epoch": 21.0,
"grad_norm": 0.12059065699577332,
"learning_rate": 0.0003,
"loss": 0.002,
"step": 504
},
{
"epoch": 21.0,
"eval_acc_macro": 0.840415915936909,
"eval_acc_micro": 0.8496978851963104,
"eval_auc_macro": 0.9902023505450398,
"eval_auc_micro": 0.9918164746152466,
"eval_f1_at_5": 0.42708974483795964,
"eval_f1_at_8": 0.2922793659426448,
"eval_f1_macro": 0.9110529776466078,
"eval_f1_micro": 0.9187423438137264,
"eval_loss": 0.1368735283613205,
"eval_prec_at_5": 0.27351598173515984,
"eval_prec_at_8": 0.17180365296803654,
"eval_prec_macro": 0.8969397087072427,
"eval_prec_micro": 0.9050683829444163,
"eval_rec_at_5": 0.9739345509893454,
"eval_rec_at_8": 0.978310502283105,
"eval_rec_macro": 0.9266197213944322,
"eval_rec_micro": 0.932835820895445,
"eval_runtime": 6.2964,
"eval_samples_per_second": 139.126,
"eval_steps_per_second": 17.47,
"step": 504
},
{
"epoch": 22.0,
"grad_norm": 0.1459818333387375,
"learning_rate": 0.0003,
"loss": 0.0022,
"step": 528
},
{
"epoch": 22.0,
"eval_acc_macro": 0.8299074532155761,
"eval_acc_micro": 0.8387573964496421,
"eval_auc_macro": 0.9912484493432677,
"eval_auc_micro": 0.9923215964022568,
"eval_f1_at_5": 0.42714459980714947,
"eval_f1_at_8": 0.29205588274802374,
"eval_f1_macro": 0.9036433853106406,
"eval_f1_micro": 0.9123089300079716,
"eval_loss": 0.12988413870334625,
"eval_prec_at_5": 0.27351598173515984,
"eval_prec_at_8": 0.1716609589041096,
"eval_prec_macro": 0.8778252010823914,
"eval_prec_micro": 0.8859374999999308,
"eval_rec_at_5": 0.9745053272450532,
"eval_rec_at_8": 0.9779299847792998,
"eval_rec_macro": 0.9334072472404634,
"eval_rec_micro": 0.9402985074626086,
"eval_runtime": 6.2795,
"eval_samples_per_second": 139.501,
"eval_steps_per_second": 17.517,
"step": 528
},
{
"epoch": 23.0,
"grad_norm": 0.23651210963726044,
"learning_rate": 0.0003,
"loss": 0.0023,
"step": 552
},
{
"epoch": 23.0,
"eval_acc_macro": 0.8331982367002823,
"eval_acc_micro": 0.8412213740457373,
"eval_auc_macro": 0.9907246610096498,
"eval_auc_micro": 0.9922808765827714,
"eval_f1_at_5": 0.42686614175859805,
"eval_f1_at_8": 0.2922793659426448,
"eval_f1_macro": 0.9057038011597318,
"eval_f1_micro": 0.9137645107793605,
"eval_loss": 0.1514243185520172,
"eval_prec_at_5": 0.2732876712328767,
"eval_prec_at_8": 0.17180365296803654,
"eval_prec_macro": 0.9057572791903493,
"eval_prec_micro": 0.9137645107793604,
"eval_rec_at_5": 0.9745053272450532,
"eval_rec_at_8": 0.978310502283105,
"eval_rec_macro": 0.9065147380552868,
"eval_rec_micro": 0.9137645107793604,
"eval_runtime": 6.3102,
"eval_samples_per_second": 138.822,
"eval_steps_per_second": 17.432,
"step": 552
},
{
"epoch": 24.0,
"grad_norm": 0.09079229086637497,
"learning_rate": 0.0003,
"loss": 0.0026,
"step": 576
},
{
"epoch": 24.0,
"eval_acc_macro": 0.8361929136599193,
"eval_acc_micro": 0.8474446987032153,
"eval_auc_macro": 0.990970820997881,
"eval_auc_micro": 0.9927761079978714,
"eval_f1_at_5": 0.4275326584009282,
"eval_f1_at_8": 0.2922793659426448,
"eval_f1_macro": 0.9082187775109158,
"eval_f1_micro": 0.9174236168455063,
"eval_loss": 0.1161712110042572,
"eval_prec_at_5": 0.27374429223744295,
"eval_prec_at_8": 0.17180365296803654,
"eval_prec_macro": 0.9024833914958587,
"eval_prec_micro": 0.9136513157893985,
"eval_rec_at_5": 0.9756468797564688,
"eval_rec_at_8": 0.978310502283105,
"eval_rec_macro": 0.9147050110934346,
"eval_rec_micro": 0.9212271973465239,
"eval_runtime": 6.2478,
"eval_samples_per_second": 140.209,
"eval_steps_per_second": 17.606,
"step": 576
},
{
"epoch": 25.0,
"grad_norm": 0.1555357277393341,
"learning_rate": 0.0003,
"loss": 0.0011,
"step": 600
},
{
"epoch": 25.0,
"eval_acc_macro": 0.8261213062726463,
"eval_acc_micro": 0.8319763138415373,
"eval_auc_macro": 0.990040119063344,
"eval_auc_micro": 0.9911265543476828,
"eval_f1_at_5": 0.4265511126863068,
"eval_f1_at_8": 0.2920473967500829,
"eval_f1_macro": 0.9014179315917096,
"eval_f1_micro": 0.9082828282827549,
"eval_loss": 0.17262162268161774,
"eval_prec_at_5": 0.2730593607305936,
"eval_prec_at_8": 0.1716609589041096,
"eval_prec_macro": 0.8792666911124373,
"eval_prec_micro": 0.8857368006303479,
"eval_rec_at_5": 0.974124809741248,
"eval_rec_at_8": 0.9777397260273972,
"eval_rec_macro": 0.9306207402078307,
"eval_rec_micro": 0.9320066334990935,
"eval_runtime": 6.1427,
"eval_samples_per_second": 142.608,
"eval_steps_per_second": 17.907,
"step": 600
},
{
"epoch": 26.0,
"grad_norm": 0.054534025490283966,
"learning_rate": 0.0003,
"loss": 0.0016,
"step": 624
},
{
"epoch": 26.0,
"eval_acc_macro": 0.826194090509476,
"eval_acc_micro": 0.8362004487658312,
"eval_auc_macro": 0.9894373225891121,
"eval_auc_micro": 0.9909356077981039,
"eval_f1_at_5": 0.42686614175859805,
"eval_f1_at_8": 0.2918154230125642,
"eval_f1_macro": 0.9018300611095432,
"eval_f1_micro": 0.910794297352268,
"eval_loss": 0.1341952532529831,
"eval_prec_at_5": 0.2732876712328767,
"eval_prec_at_8": 0.17151826484018265,
"eval_prec_macro": 0.8863083621122576,
"eval_prec_micro": 0.8951160928742278,
"eval_rec_at_5": 0.9745053272450532,
"eval_rec_at_8": 0.9771689497716894,
"eval_rec_macro": 0.9193115078475559,
"eval_rec_micro": 0.9270315091209844,
"eval_runtime": 6.2441,
"eval_samples_per_second": 140.292,
"eval_steps_per_second": 17.617,
"step": 624
},
{
"epoch": 27.0,
"grad_norm": 0.18318401277065277,
"learning_rate": 0.0003,
"loss": 0.0017,
"step": 648
},
{
"epoch": 27.0,
"eval_acc_macro": 0.8331625409086375,
"eval_acc_micro": 0.844547563805039,
"eval_auc_macro": 0.9903700036429086,
"eval_auc_micro": 0.9911557799511647,
"eval_f1_at_5": 0.4260900570497148,
"eval_f1_at_8": 0.2915919235644548,
"eval_f1_macro": 0.9059994264183651,
"eval_f1_micro": 0.9157232704401748,
"eval_loss": 0.16900603473186493,
"eval_prec_at_5": 0.27283105022831056,
"eval_prec_at_8": 0.1713755707762557,
"eval_prec_macro": 0.9152116568385661,
"eval_prec_micro": 0.9262086513994126,
"eval_rec_at_5": 0.9722222222222222,
"eval_rec_at_8": 0.9767884322678843,
"eval_rec_macro": 0.8973109240252097,
"eval_rec_micro": 0.9054726368158453,
"eval_runtime": 6.3369,
"eval_samples_per_second": 138.239,
"eval_steps_per_second": 17.359,
"step": 648
},
{
"epoch": 28.0,
"grad_norm": 0.09100370109081268,
"learning_rate": 0.0003,
"loss": 0.002,
"step": 672
},
{
"epoch": 28.0,
"eval_acc_macro": 0.834589729212194,
"eval_acc_micro": 0.8436781609194756,
"eval_auc_macro": 0.9901591404278554,
"eval_auc_micro": 0.9904570385410867,
"eval_f1_at_5": 0.4261448479661988,
"eval_f1_at_8": 0.29135993712848907,
"eval_f1_macro": 0.906536458365489,
"eval_f1_micro": 0.9152119700747368,
"eval_loss": 0.12356158345937729,
"eval_prec_at_5": 0.27283105022831056,
"eval_prec_at_8": 0.17123287671232876,
"eval_prec_macro": 0.9101814375182885,
"eval_prec_micro": 0.9174999999999235,
"eval_rec_at_5": 0.97279299847793,
"eval_rec_at_8": 0.9762176560121765,
"eval_rec_macro": 0.9036660250083727,
"eval_rec_micro": 0.9129353233830089,
"eval_runtime": 6.2292,
"eval_samples_per_second": 140.629,
"eval_steps_per_second": 17.659,
"step": 672
},
{
"epoch": 29.0,
"grad_norm": 0.031184401363134384,
"learning_rate": 0.0003,
"loss": 0.0013,
"step": 696
},
{
"epoch": 29.0,
"eval_acc_macro": 0.8341647633718688,
"eval_acc_micro": 0.8435321456234822,
"eval_auc_macro": 0.9909608628516886,
"eval_auc_micro": 0.9916707030076353,
"eval_f1_at_5": 0.42647810767423283,
"eval_f1_at_8": 0.2915834447216144,
"eval_f1_macro": 0.9066166365653049,
"eval_f1_micro": 0.9151260504200911,
"eval_loss": 0.1419263482093811,
"eval_prec_at_5": 0.27305936073059367,
"eval_prec_at_8": 0.1713755707762557,
"eval_prec_macro": 0.9206967590925624,
"eval_prec_micro": 0.9275979557069056,
"eval_rec_at_5": 0.9733637747336377,
"eval_rec_at_8": 0.9765981735159818,
"eval_rec_macro": 0.8938908104868867,
"eval_rec_micro": 0.9029850746267908,
"eval_runtime": 6.2406,
"eval_samples_per_second": 140.372,
"eval_steps_per_second": 17.627,
"step": 696
},
{
"epoch": 30.0,
"grad_norm": 0.12148793786764145,
"learning_rate": 0.0003,
"loss": 0.001,
"step": 720
},
{
"epoch": 30.0,
"eval_acc_macro": 0.8301734612803756,
"eval_acc_micro": 0.8415007656967196,
"eval_auc_macro": 0.9904576570433402,
"eval_auc_micro": 0.9915527313643118,
"eval_f1_at_5": 0.42708974483795964,
"eval_f1_at_8": 0.2918154230125642,
"eval_f1_macro": 0.9043513567183213,
"eval_f1_micro": 0.9139293139292379,
"eval_loss": 0.1600634753704071,
"eval_prec_at_5": 0.27351598173515984,
"eval_prec_at_8": 0.17151826484018265,
"eval_prec_macro": 0.9050210447593683,
"eval_prec_micro": 0.9165971643035098,
"eval_rec_at_5": 0.9739345509893454,
"eval_rec_at_8": 0.9771689497716894,
"eval_rec_macro": 0.9048079473209437,
"eval_rec_micro": 0.9112769485903058,
"eval_runtime": 6.2514,
"eval_samples_per_second": 140.128,
"eval_steps_per_second": 17.596,
"step": 720
},
{
"epoch": 31.0,
"grad_norm": 0.11479064077138901,
"learning_rate": 0.0003,
"loss": 0.0016,
"step": 744
},
{
"epoch": 31.0,
"eval_acc_macro": 0.833818894129361,
"eval_acc_micro": 0.8425787106446144,
"eval_auc_macro": 0.9902376130208236,
"eval_auc_micro": 0.9914569462309486,
"eval_f1_at_5": 0.4271994046239887,
"eval_f1_at_8": 0.2920473967500829,
"eval_f1_macro": 0.9060990360615312,
"eval_f1_micro": 0.9145646867371103,
"eval_loss": 0.1426621824502945,
"eval_prec_at_5": 0.27351598173515984,
"eval_prec_at_8": 0.1716609589041096,
"eval_prec_macro": 0.8877458031473394,
"eval_prec_micro": 0.8977635782746887,
"eval_rec_at_5": 0.975076103500761,
"eval_rec_at_8": 0.9777397260273972,
"eval_rec_macro": 0.9271635027618895,
"eval_rec_micro": 0.9320066334990935,
"eval_runtime": 6.1703,
"eval_samples_per_second": 141.971,
"eval_steps_per_second": 17.827,
"step": 744
},
{
"epoch": 32.0,
"grad_norm": 0.0649285838007927,
"learning_rate": 0.0003,
"loss": 0.0011,
"step": 768
},
{
"epoch": 32.0,
"eval_acc_macro": 0.830982026700095,
"eval_acc_micro": 0.8421862971515902,
"eval_auc_macro": 0.9898071389389859,
"eval_auc_micro": 0.9906281152443956,
"eval_f1_at_5": 0.4261448479661988,
"eval_f1_at_8": 0.2918154230125642,
"eval_f1_macro": 0.9044437561510542,
"eval_f1_micro": 0.9143334726284233,
"eval_loss": 0.15522228181362152,
"eval_prec_at_5": 0.27283105022831056,
"eval_prec_at_8": 0.17151826484018265,
"eval_prec_macro": 0.914549760838821,
"eval_prec_micro": 0.9216512215668979,
"eval_rec_at_5": 0.97279299847793,
"eval_rec_at_8": 0.9771689497716894,
"eval_rec_macro": 0.89634766919242,
"eval_rec_micro": 0.9071310116085483,
"eval_runtime": 6.2856,
"eval_samples_per_second": 139.366,
"eval_steps_per_second": 17.5,
"step": 768
},
{
"epoch": 33.0,
"grad_norm": 0.10257603228092194,
"learning_rate": 0.0003,
"loss": 0.001,
"step": 792
},
{
"epoch": 33.0,
"eval_acc_macro": 0.8321851395971892,
"eval_acc_micro": 0.8429878048779845,
"eval_auc_macro": 0.9889132300057429,
"eval_auc_micro": 0.9893601873860157,
"eval_f1_at_5": 0.42647810767423283,
"eval_f1_at_8": 0.2915919235644548,
"eval_f1_macro": 0.9051919207399707,
"eval_f1_micro": 0.9148056244829681,
"eval_loss": 0.149822399020195,
"eval_prec_at_5": 0.27305936073059367,
"eval_prec_at_8": 0.1713755707762557,
"eval_prec_macro": 0.9067171865229405,
"eval_prec_micro": 0.9125412541253373,
"eval_rec_at_5": 0.9733637747336377,
"eval_rec_at_8": 0.9767884322678843,
"eval_rec_macro": 0.906988014574614,
"eval_rec_micro": 0.9170812603647663,
"eval_runtime": 6.3392,
"eval_samples_per_second": 138.188,
"eval_steps_per_second": 17.352,
"step": 792
},
{
"epoch": 34.0,
"grad_norm": 0.06269343197345734,
"learning_rate": 0.0003,
"loss": 0.0012,
"step": 816
},
{
"epoch": 34.0,
"eval_acc_macro": 0.8356570074540155,
"eval_acc_micro": 0.8457446808509995,
"eval_auc_macro": 0.9885679122519199,
"eval_auc_micro": 0.9892555811101381,
"eval_f1_at_5": 0.42581157914042916,
"eval_f1_at_8": 0.29135993712848907,
"eval_f1_macro": 0.907980034255221,
"eval_f1_micro": 0.9164265129682243,
"eval_loss": 0.18450404703617096,
"eval_prec_at_5": 0.2726027397260274,
"eval_prec_at_8": 0.17123287671232876,
"eval_prec_macro": 0.9065894385720981,
"eval_prec_micro": 0.9100572363040956,
"eval_rec_at_5": 0.9722222222222222,
"eval_rec_at_8": 0.9762176560121765,
"eval_rec_macro": 0.9109305984894259,
"eval_rec_micro": 0.922885572139227,
"eval_runtime": 6.2222,
"eval_samples_per_second": 140.786,
"eval_steps_per_second": 17.679,
"step": 816
},
{
"epoch": 35.0,
"grad_norm": 0.060569193214178085,
"learning_rate": 0.0003,
"loss": 0.0015,
"step": 840
},
{
"epoch": 35.0,
"eval_acc_macro": 0.8271139319864672,
"eval_acc_micro": 0.8386855862583391,
"eval_auc_macro": 0.9891685604004818,
"eval_auc_micro": 0.9908738152492784,
"eval_f1_at_5": 0.4271994046239887,
"eval_f1_at_8": 0.29205588274802374,
"eval_f1_macro": 0.9021260521405651,
"eval_f1_micro": 0.9122664500405433,
"eval_loss": 0.13544484972953796,
"eval_prec_at_5": 0.27351598173515984,
"eval_prec_at_8": 0.1716609589041096,
"eval_prec_macro": 0.8873506319173318,
"eval_prec_micro": 0.8941082802547059,
"eval_rec_at_5": 0.975076103500761,
"eval_rec_at_8": 0.9779299847792998,
"eval_rec_macro": 0.9204235623584355,
"eval_rec_micro": 0.931177446102742,
"eval_runtime": 6.3105,
"eval_samples_per_second": 138.815,
"eval_steps_per_second": 17.431,
"step": 840
},
{
"epoch": 36.0,
"grad_norm": 0.011365901678800583,
"learning_rate": 0.0003,
"loss": 0.0013,
"step": 864
},
{
"epoch": 36.0,
"eval_acc_macro": 0.8432467559973965,
"eval_acc_micro": 0.8506097560974961,
"eval_auc_macro": 0.9889068205774477,
"eval_auc_micro": 0.9905521999573019,
"eval_f1_at_5": 0.42684788616938474,
"eval_f1_at_8": 0.2922793659426448,
"eval_f1_macro": 0.9125863753805709,
"eval_f1_micro": 0.9192751235584086,
"eval_loss": 0.1388498842716217,
"eval_prec_at_5": 0.2732876712328767,
"eval_prec_at_8": 0.17180365296803654,
"eval_prec_macro": 0.9106178664851478,
"eval_prec_micro": 0.9132569558100725,
"eval_rec_at_5": 0.9743150684931506,
"eval_rec_at_8": 0.978310502283105,
"eval_rec_macro": 0.9154554431692706,
"eval_rec_micro": 0.9253731343282815,
"eval_runtime": 6.1794,
"eval_samples_per_second": 141.761,
"eval_steps_per_second": 17.801,
"step": 864
},
{
"epoch": 37.0,
"grad_norm": 0.00758476834744215,
"learning_rate": 0.0003,
"loss": 0.0009,
"step": 888
},
{
"epoch": 37.0,
"eval_acc_macro": 0.8416703156465917,
"eval_acc_micro": 0.848985725018719,
"eval_auc_macro": 0.9891403369644753,
"eval_auc_micro": 0.9905162916700971,
"eval_f1_at_5": 0.4261995886925304,
"eval_f1_at_8": 0.2915919235644548,
"eval_f1_macro": 0.9111295247504767,
"eval_f1_micro": 0.9183258837870039,
"eval_loss": 0.13994979858398438,
"eval_prec_at_5": 0.27283105022831056,
"eval_prec_at_8": 0.1713755707762557,
"eval_prec_macro": 0.8945120800094545,
"eval_prec_micro": 0.9003984063744302,
"eval_rec_at_5": 0.9733637747336377,
"eval_rec_at_8": 0.9767884322678843,
"eval_rec_macro": 0.9295761415839934,
"eval_rec_micro": 0.9369817578772025,
"eval_runtime": 6.299,
"eval_samples_per_second": 139.071,
"eval_steps_per_second": 17.463,
"step": 888
},
{
"epoch": 38.0,
"grad_norm": 0.005984355695545673,
"learning_rate": 0.0003,
"loss": 0.0009,
"step": 912
},
{
"epoch": 38.0,
"eval_acc_macro": 0.8453744008028368,
"eval_acc_micro": 0.8551617873651113,
"eval_auc_macro": 0.9890338148789555,
"eval_auc_micro": 0.9911242376839919,
"eval_f1_at_5": 0.4271994046239887,
"eval_f1_at_8": 0.29182390543479947,
"eval_f1_macro": 0.9134487053983307,
"eval_f1_micro": 0.9219269102989267,
"eval_loss": 0.13640232384204865,
"eval_prec_at_5": 0.27351598173515984,
"eval_prec_at_8": 0.17151826484018265,
"eval_prec_macro": 0.9185694511313961,
"eval_prec_micro": 0.923460898502419,
"eval_rec_at_5": 0.975076103500761,
"eval_rec_at_8": 0.977359208523592,
"eval_rec_macro": 0.9098787791583011,
"eval_rec_micro": 0.9203980099501724,
"eval_runtime": 6.2788,
"eval_samples_per_second": 139.517,
"eval_steps_per_second": 17.519,
"step": 912
}
],
"logging_steps": 500,
"max_steps": 960,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}