{ "best_metric": 0.9134487053983307, "best_model_checkpoint": "./saved_models/mamba_prompt_sbdh_gpt4_v2_0/checkpoint-912", "epoch": 38.0, "eval_steps": 500, "global_step": 912, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 8.483881950378418, "learning_rate": 0.0003, "loss": 1.2055, "step": 24 }, { "epoch": 1.0, "eval_acc_macro": 0.07039666648893216, "eval_acc_micro": 0.17493620640770696, "eval_auc_macro": 0.7109566412757288, "eval_auc_micro": 0.6509757698362547, "eval_f1_at_5": 0.2568182822325969, "eval_f1_at_8": 0.22372279922068555, "eval_f1_macro": 0.11555043034927992, "eval_f1_micro": 0.2977799227799084, "eval_loss": 0.4126754105091095, "eval_prec_at_5": 0.16506849315068495, "eval_prec_at_8": 0.1317066210045662, "eval_prec_macro": 0.0712520271567465, "eval_prec_micro": 0.21000680735193294, "eval_rec_at_5": 0.5781963470319634, "eval_rec_at_8": 0.7423896499238966, "eval_rec_macro": 0.3187634216175277, "eval_rec_micro": 0.5116086235488796, "eval_runtime": 6.0156, "eval_samples_per_second": 145.621, "eval_steps_per_second": 18.286, "step": 24 }, { "epoch": 2.0, "grad_norm": 2.6864304542541504, "learning_rate": 0.0003, "loss": 0.3514, "step": 48 }, { "epoch": 2.0, "eval_acc_macro": 0.5158272708489711, "eval_acc_micro": 0.5578747628083138, "eval_auc_macro": 0.94707367087266, "eval_auc_micro": 0.913787855621242, "eval_f1_at_5": 0.3898187899179248, "eval_f1_at_8": 0.2799604575624912, "eval_f1_macro": 0.646736079674674, "eval_f1_micro": 0.7161997563945827, "eval_loss": 0.2387184202671051, "eval_prec_at_5": 0.24908675799086763, "eval_prec_at_8": 0.1643835616438356, "eval_prec_macro": 0.6802826117671307, "eval_prec_micro": 0.7016706443913523, "eval_rec_at_5": 0.8961187214611872, "eval_rec_at_8": 0.9429223744292238, "eval_rec_macro": 0.6832268849511623, "eval_rec_micro": 0.7313432835820289, "eval_runtime": 6.048, "eval_samples_per_second": 144.841, "eval_steps_per_second": 18.188, "step": 48 }, { "epoch": 3.0, "grad_norm": 2.4849371910095215, "learning_rate": 0.0003, "loss": 0.1682, "step": 72 }, { "epoch": 3.0, "eval_acc_macro": 0.7840879005682694, "eval_acc_micro": 0.8028064992613883, "eval_auc_macro": 0.9886749124239317, "eval_auc_micro": 0.990273843904626, "eval_f1_at_5": 0.42751438858977425, "eval_f1_at_8": 0.2925113305987028, "eval_f1_macro": 0.8710656054515781, "eval_f1_micro": 0.8906185989347897, "eval_loss": 0.07425953447818756, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.17194634703196346, "eval_prec_macro": 0.878761745264005, "eval_prec_micro": 0.8801619433197667, "eval_rec_at_5": 0.9754566210045662, "eval_rec_at_8": 0.9788812785388128, "eval_rec_macro": 0.8815695470069095, "eval_rec_micro": 0.9013266998340878, "eval_runtime": 6.0775, "eval_samples_per_second": 144.138, "eval_steps_per_second": 18.1, "step": 72 }, { "epoch": 4.0, "grad_norm": 1.911177396774292, "learning_rate": 0.0003, "loss": 0.0646, "step": 96 }, { "epoch": 4.0, "eval_acc_macro": 0.8321415173591932, "eval_acc_micro": 0.8384442782347914, "eval_auc_macro": 0.9920630589905021, "eval_auc_micro": 0.9932832791412234, "eval_f1_at_5": 0.4278476261962846, "eval_f1_at_8": 0.29274329072668936, "eval_f1_macro": 0.9063058610149609, "eval_f1_micro": 0.9121236777867442, "eval_loss": 0.06308761239051819, "eval_prec_at_5": 0.27397260273972607, "eval_prec_at_8": 0.1720890410958904, "eval_prec_macro": 0.8935275833247776, "eval_prec_micro": 0.8953674121405035, "eval_rec_at_5": 0.976027397260274, "eval_rec_at_8": 0.9794520547945206, "eval_rec_macro": 0.9215877913582228, "eval_rec_micro": 0.929519071310039, "eval_runtime": 6.085, "eval_samples_per_second": 143.962, "eval_steps_per_second": 18.077, "step": 96 }, { "epoch": 5.0, "grad_norm": 0.14926180243492126, "learning_rate": 0.0003, "loss": 0.0416, "step": 120 }, { "epoch": 5.0, "eval_acc_macro": 0.835711620798801, "eval_acc_micro": 0.844599844599779, "eval_auc_macro": 0.99367524750647, "eval_auc_micro": 0.9950181038357179, "eval_f1_at_5": 0.428217417200729, "eval_f1_at_8": 0.2925113305987028, "eval_f1_macro": 0.9086207273252621, "eval_f1_micro": 0.9157540016848428, "eval_loss": 0.056059833616018295, "eval_prec_at_5": 0.2742009132420091, "eval_prec_at_8": 0.17194634703196346, "eval_prec_macro": 0.9238859033605576, "eval_prec_micro": 0.9306506849314271, "eval_rec_at_5": 0.9769786910197868, "eval_rec_at_8": 0.9788812785388128, "eval_rec_macro": 0.8949097880182088, "eval_rec_micro": 0.9013266998340878, "eval_runtime": 6.0515, "eval_samples_per_second": 144.758, "eval_steps_per_second": 18.177, "step": 120 }, { "epoch": 6.0, "grad_norm": 0.24564415216445923, "learning_rate": 0.0003, "loss": 0.0281, "step": 144 }, { "epoch": 6.0, "eval_acc_macro": 0.8427241930303886, "eval_acc_micro": 0.8486486486485831, "eval_auc_macro": 0.9930039223823353, "eval_auc_micro": 0.9943083137218841, "eval_f1_at_5": 0.42721766176318765, "eval_f1_at_8": 0.2925113305987028, "eval_f1_macro": 0.9126603684260771, "eval_f1_micro": 0.9181286549706835, "eval_loss": 0.05612677335739136, "eval_prec_at_5": 0.27351598173515984, "eval_prec_at_8": 0.17194634703196346, "eval_prec_macro": 0.9258898765719902, "eval_prec_micro": 0.9250841750840971, "eval_rec_at_5": 0.9752663622526635, "eval_rec_at_8": 0.9788812785388128, "eval_rec_macro": 0.9012770790636472, "eval_rec_micro": 0.9112769485903058, "eval_runtime": 6.0637, "eval_samples_per_second": 144.466, "eval_steps_per_second": 18.141, "step": 144 }, { "epoch": 7.0, "grad_norm": 0.17717961966991425, "learning_rate": 0.0003, "loss": 0.0163, "step": 168 }, { "epoch": 7.0, "eval_acc_macro": 0.8320704316636002, "eval_acc_micro": 0.8395155185464921, "eval_auc_macro": 0.9927596537595381, "eval_auc_micro": 0.9942892457976611, "eval_f1_at_5": 0.4287923569488756, "eval_f1_at_8": 0.29274329072668936, "eval_f1_macro": 0.9054968756763103, "eval_f1_micro": 0.9127572016460155, "eval_loss": 0.07535412162542343, "eval_prec_at_5": 0.27465753424657535, "eval_prec_at_8": 0.1720890410958904, "eval_prec_macro": 0.895837475867586, "eval_prec_micro": 0.9060457516339129, "eval_rec_at_5": 0.9771689497716894, "eval_rec_at_8": 0.9794520547945206, "eval_rec_macro": 0.9167258374697121, "eval_rec_micro": 0.9195688225538209, "eval_runtime": 6.0576, "eval_samples_per_second": 144.612, "eval_steps_per_second": 18.159, "step": 168 }, { "epoch": 8.0, "grad_norm": 0.27908530831336975, "learning_rate": 0.0003, "loss": 0.0109, "step": 192 }, { "epoch": 8.0, "eval_acc_macro": 0.8295268992370044, "eval_acc_micro": 0.832209737827653, "eval_auc_macro": 0.9930334009743317, "eval_auc_micro": 0.9938628905761329, "eval_f1_at_5": 0.42918048633589306, "eval_f1_at_8": 0.29274329072668936, "eval_f1_macro": 0.9041576767954438, "eval_f1_micro": 0.908421913327808, "eval_loss": 0.07922064512968063, "eval_prec_at_5": 0.27488584474885847, "eval_prec_at_8": 0.1720890410958904, "eval_prec_macro": 0.8907484139672496, "eval_prec_micro": 0.8959677419354116, "eval_rec_at_5": 0.978310502283105, "eval_rec_at_8": 0.9794520547945206, "eval_rec_macro": 0.9213085265888775, "eval_rec_micro": 0.9212271973465239, "eval_runtime": 6.0722, "eval_samples_per_second": 144.265, "eval_steps_per_second": 18.115, "step": 192 }, { "epoch": 9.0, "grad_norm": 0.14036260545253754, "learning_rate": 0.0003, "loss": 0.0074, "step": 216 }, { "epoch": 9.0, "eval_acc_macro": 0.8385687066413228, "eval_acc_micro": 0.844961240310012, "eval_auc_macro": 0.9927809510576332, "eval_auc_micro": 0.9942359625327764, "eval_f1_at_5": 0.4295136788854873, "eval_f1_at_8": 0.29274329072668936, "eval_f1_macro": 0.9094505520360644, "eval_f1_micro": 0.915966386554545, "eval_loss": 0.08278516680002213, "eval_prec_at_5": 0.2751141552511416, "eval_prec_at_8": 0.1720890410958904, "eval_prec_macro": 0.9210993789406117, "eval_prec_micro": 0.928449744463294, "eval_rec_at_5": 0.9788812785388128, "eval_rec_at_8": 0.9794520547945206, "eval_rec_macro": 0.8989352557922053, "eval_rec_micro": 0.9038142620231423, "eval_runtime": 6.0864, "eval_samples_per_second": 143.928, "eval_steps_per_second": 18.073, "step": 216 }, { "epoch": 10.0, "grad_norm": 0.192245751619339, "learning_rate": 0.0003, "loss": 0.0066, "step": 240 }, { "epoch": 10.0, "eval_acc_macro": 0.841830141960627, "eval_acc_micro": 0.8498475609755449, "eval_auc_macro": 0.9923313106695196, "eval_auc_micro": 0.9933376316355039, "eval_f1_at_5": 0.4275509226486327, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9117612718437879, "eval_f1_micro": 0.9188298310670853, "eval_loss": 0.0772981271147728, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9114916513520281, "eval_prec_micro": 0.9131859131858384, "eval_rec_at_5": 0.9758371385083713, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.9136710504527534, "eval_rec_micro": 0.92454394693193, "eval_runtime": 5.9768, "eval_samples_per_second": 146.566, "eval_steps_per_second": 18.404, "step": 240 }, { "epoch": 11.0, "grad_norm": 0.11837983131408691, "learning_rate": 0.0003, "loss": 0.0057, "step": 264 }, { "epoch": 11.0, "eval_acc_macro": 0.8320358706720068, "eval_acc_micro": 0.836842105263095, "eval_auc_macro": 0.990286728874436, "eval_auc_micro": 0.9913156297458193, "eval_f1_at_5": 0.42747783226694747, "eval_f1_at_8": 0.2920219218917755, "eval_f1_macro": 0.9058723874403682, "eval_f1_micro": 0.911174785100212, "eval_loss": 0.10789646208286285, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.1716609589041096, "eval_prec_macro": 0.8914304927691732, "eval_prec_micro": 0.8997574777687227, "eval_rec_at_5": 0.975076103500761, "eval_rec_at_8": 0.9771689497716894, "eval_rec_macro": 0.9231550617780325, "eval_rec_micro": 0.922885572139227, "eval_runtime": 6.0106, "eval_samples_per_second": 145.742, "eval_steps_per_second": 18.301, "step": 264 }, { "epoch": 12.0, "grad_norm": 0.16925422847270966, "learning_rate": 0.0003, "loss": 0.005, "step": 288 }, { "epoch": 12.0, "eval_acc_macro": 0.8364454962531706, "eval_acc_micro": 0.8397581254723477, "eval_auc_macro": 0.9908748071505288, "eval_auc_micro": 0.9917851996554229, "eval_f1_at_5": 0.42708974483795964, "eval_f1_at_8": 0.2925113305987028, "eval_f1_macro": 0.9084368033771916, "eval_f1_micro": 0.9129005751848058, "eval_loss": 0.09441287070512772, "eval_prec_at_5": 0.27351598173515984, "eval_prec_at_8": 0.17194634703196346, "eval_prec_macro": 0.8995515858307809, "eval_prec_micro": 0.9047231270357569, "eval_rec_at_5": 0.9739345509893454, "eval_rec_at_8": 0.9788812785388128, "eval_rec_macro": 0.9188760380015673, "eval_rec_micro": 0.9212271973465239, "eval_runtime": 6.0148, "eval_samples_per_second": 145.64, "eval_steps_per_second": 18.288, "step": 288 }, { "epoch": 13.0, "grad_norm": 0.25462788343429565, "learning_rate": 0.0003, "loss": 0.0045, "step": 312 }, { "epoch": 13.0, "eval_acc_macro": 0.8219219437469505, "eval_acc_micro": 0.8327165062915741, "eval_auc_macro": 0.9892036337462419, "eval_auc_micro": 0.9914230873000853, "eval_f1_at_5": 0.42851407428878907, "eval_f1_at_8": 0.2918154230125642, "eval_f1_macro": 0.8990693006867976, "eval_f1_micro": 0.9087237479805405, "eval_loss": 0.11245805770158768, "eval_prec_at_5": 0.2744292237442923, "eval_prec_at_8": 0.17151826484018265, "eval_prec_macro": 0.872997498581981, "eval_prec_micro": 0.8858267716534736, "eval_rec_at_5": 0.9771689497716894, "eval_rec_at_8": 0.9771689497716894, "eval_rec_macro": 0.9280030786669166, "eval_rec_micro": 0.932835820895445, "eval_runtime": 6.2398, "eval_samples_per_second": 140.389, "eval_steps_per_second": 17.629, "step": 312 }, { "epoch": 14.0, "grad_norm": 0.13963262736797333, "learning_rate": 0.0003, "loss": 0.0029, "step": 336 }, { "epoch": 14.0, "eval_acc_macro": 0.8398618464555664, "eval_acc_micro": 0.8437499999999356, "eval_auc_macro": 0.9909268418920506, "eval_auc_micro": 0.9918402649693007, "eval_f1_at_5": 0.42740465274793293, "eval_f1_at_8": 0.2920473967500829, "eval_f1_macro": 0.910421950810437, "eval_f1_micro": 0.9152542372880599, "eval_loss": 0.13447707891464233, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.1716609589041096, "eval_prec_macro": 0.907525335697792, "eval_prec_micro": 0.9126133553173196, "eval_rec_at_5": 0.9743150684931506, "eval_rec_at_8": 0.9777397260273972, "eval_rec_macro": 0.9137157897519602, "eval_rec_micro": 0.9179104477611179, "eval_runtime": 6.2625, "eval_samples_per_second": 139.881, "eval_steps_per_second": 17.565, "step": 336 }, { "epoch": 15.0, "grad_norm": 0.1901983767747879, "learning_rate": 0.0003, "loss": 0.0032, "step": 360 }, { "epoch": 15.0, "eval_acc_macro": 0.8357224650035576, "eval_acc_micro": 0.8455098934550346, "eval_auc_macro": 0.9892990457525107, "eval_auc_micro": 0.990561377509615, "eval_f1_at_5": 0.4254418080308462, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9080581868434588, "eval_f1_micro": 0.9162886597937389, "eval_loss": 0.10820472985506058, "eval_prec_at_5": 0.27237442922374433, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9057447684648601, "eval_prec_micro": 0.9114027891713772, "eval_rec_at_5": 0.9712709284627092, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.9111621041819294, "eval_rec_micro": 0.9212271973465239, "eval_runtime": 6.3038, "eval_samples_per_second": 138.963, "eval_steps_per_second": 17.45, "step": 360 }, { "epoch": 16.0, "grad_norm": 0.17108500003814697, "learning_rate": 0.0003, "loss": 0.0027, "step": 384 }, { "epoch": 16.0, "eval_acc_macro": 0.8297448303381417, "eval_acc_micro": 0.8398169336383798, "eval_auc_macro": 0.9902632756647023, "eval_auc_micro": 0.9922601157120051, "eval_f1_at_5": 0.4277927620668724, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9041211387604378, "eval_f1_micro": 0.912935323383009, "eval_loss": 0.12706510722637177, "eval_prec_at_5": 0.273972602739726, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9060353096841967, "eval_prec_micro": 0.9129353233830089, "eval_rec_at_5": 0.9754566210045662, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.9032650308288451, "eval_rec_micro": 0.9129353233830089, "eval_runtime": 6.7247, "eval_samples_per_second": 130.266, "eval_steps_per_second": 16.358, "step": 384 }, { "epoch": 17.0, "grad_norm": 0.06544364243745804, "learning_rate": 0.0003, "loss": 0.0029, "step": 408 }, { "epoch": 17.0, "eval_acc_macro": 0.829765891496947, "eval_acc_micro": 0.8365384615383996, "eval_auc_macro": 0.9893010896141785, "eval_auc_micro": 0.9914586391774918, "eval_f1_at_5": 0.42714459980714947, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9037391479019817, "eval_f1_micro": 0.9109947643978323, "eval_loss": 0.10541332513093948, "eval_prec_at_5": 0.27351598173515984, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.8801020034653725, "eval_prec_micro": 0.8856695379795704, "eval_rec_at_5": 0.9745053272450532, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.9310175234124923, "eval_rec_micro": 0.937810945273554, "eval_runtime": 6.3322, "eval_samples_per_second": 138.34, "eval_steps_per_second": 17.371, "step": 408 }, { "epoch": 18.0, "grad_norm": 0.12380898743867874, "learning_rate": 0.0003, "loss": 0.0028, "step": 432 }, { "epoch": 18.0, "eval_acc_macro": 0.8295264706506825, "eval_acc_micro": 0.8390718562873624, "eval_auc_macro": 0.9886105199219366, "eval_auc_micro": 0.9908788495376829, "eval_f1_at_5": 0.42679308597884263, "eval_f1_at_8": 0.2918154230125642, "eval_f1_macro": 0.9042231261610075, "eval_f1_micro": 0.9124949124948383, "eval_loss": 0.11824628710746765, "eval_prec_at_5": 0.2732876712328767, "eval_prec_at_8": 0.17151826484018265, "eval_prec_macro": 0.8888519630940449, "eval_prec_micro": 0.8960831334931337, "eval_rec_at_5": 0.973744292237443, "eval_rec_at_8": 0.9771689497716894, "eval_rec_macro": 0.9215852252593782, "eval_rec_micro": 0.929519071310039, "eval_runtime": 6.3155, "eval_samples_per_second": 138.707, "eval_steps_per_second": 17.418, "step": 432 }, { "epoch": 19.0, "grad_norm": 0.21812734007835388, "learning_rate": 0.0003, "loss": 0.0024, "step": 456 }, { "epoch": 19.0, "eval_acc_macro": 0.829661993373597, "eval_acc_micro": 0.8388554216866838, "eval_auc_macro": 0.9894816577253542, "eval_auc_micro": 0.991705185655646, "eval_f1_at_5": 0.42714459980714947, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9036099130579357, "eval_f1_micro": 0.9123669123668376, "eval_loss": 0.1079055592417717, "eval_prec_at_5": 0.27351598173515984, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.8959752170714762, "eval_prec_micro": 0.9012944983818041, "eval_rec_at_5": 0.9745053272450532, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.9119091133829933, "eval_rec_micro": 0.9237147595355785, "eval_runtime": 6.2636, "eval_samples_per_second": 139.855, "eval_steps_per_second": 17.562, "step": 456 }, { "epoch": 20.0, "grad_norm": 0.14500826597213745, "learning_rate": 0.0003, "loss": 0.0026, "step": 480 }, { "epoch": 20.0, "eval_acc_macro": 0.8388928681043534, "eval_acc_micro": 0.8470764617690519, "eval_auc_macro": 0.9914346196687225, "eval_auc_micro": 0.9929207212736375, "eval_f1_at_5": 0.42845916764894976, "eval_f1_at_8": 0.2925113305987028, "eval_f1_macro": 0.9099987722528428, "eval_f1_micro": 0.9172077922077178, "eval_loss": 0.12154436111450195, "eval_prec_at_5": 0.2744292237442923, "eval_prec_at_8": 0.17194634703196346, "eval_prec_macro": 0.8884541003489376, "eval_prec_micro": 0.898251192368768, "eval_rec_at_5": 0.9765981735159818, "eval_rec_at_8": 0.9788812785388128, "eval_rec_macro": 0.9349802408736304, "eval_rec_micro": 0.9369817578772025, "eval_runtime": 6.1697, "eval_samples_per_second": 141.984, "eval_steps_per_second": 17.829, "step": 480 }, { "epoch": 21.0, "grad_norm": 0.12059065699577332, "learning_rate": 0.0003, "loss": 0.002, "step": 504 }, { "epoch": 21.0, "eval_acc_macro": 0.840415915936909, "eval_acc_micro": 0.8496978851963104, "eval_auc_macro": 0.9902023505450398, "eval_auc_micro": 0.9918164746152466, "eval_f1_at_5": 0.42708974483795964, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9110529776466078, "eval_f1_micro": 0.9187423438137264, "eval_loss": 0.1368735283613205, "eval_prec_at_5": 0.27351598173515984, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.8969397087072427, "eval_prec_micro": 0.9050683829444163, "eval_rec_at_5": 0.9739345509893454, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.9266197213944322, "eval_rec_micro": 0.932835820895445, "eval_runtime": 6.2964, "eval_samples_per_second": 139.126, "eval_steps_per_second": 17.47, "step": 504 }, { "epoch": 22.0, "grad_norm": 0.1459818333387375, "learning_rate": 0.0003, "loss": 0.0022, "step": 528 }, { "epoch": 22.0, "eval_acc_macro": 0.8299074532155761, "eval_acc_micro": 0.8387573964496421, "eval_auc_macro": 0.9912484493432677, "eval_auc_micro": 0.9923215964022568, "eval_f1_at_5": 0.42714459980714947, "eval_f1_at_8": 0.29205588274802374, "eval_f1_macro": 0.9036433853106406, "eval_f1_micro": 0.9123089300079716, "eval_loss": 0.12988413870334625, "eval_prec_at_5": 0.27351598173515984, "eval_prec_at_8": 0.1716609589041096, "eval_prec_macro": 0.8778252010823914, "eval_prec_micro": 0.8859374999999308, "eval_rec_at_5": 0.9745053272450532, "eval_rec_at_8": 0.9779299847792998, "eval_rec_macro": 0.9334072472404634, "eval_rec_micro": 0.9402985074626086, "eval_runtime": 6.2795, "eval_samples_per_second": 139.501, "eval_steps_per_second": 17.517, "step": 528 }, { "epoch": 23.0, "grad_norm": 0.23651210963726044, "learning_rate": 0.0003, "loss": 0.0023, "step": 552 }, { "epoch": 23.0, "eval_acc_macro": 0.8331982367002823, "eval_acc_micro": 0.8412213740457373, "eval_auc_macro": 0.9907246610096498, "eval_auc_micro": 0.9922808765827714, "eval_f1_at_5": 0.42686614175859805, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9057038011597318, "eval_f1_micro": 0.9137645107793605, "eval_loss": 0.1514243185520172, "eval_prec_at_5": 0.2732876712328767, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9057572791903493, "eval_prec_micro": 0.9137645107793604, "eval_rec_at_5": 0.9745053272450532, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.9065147380552868, "eval_rec_micro": 0.9137645107793604, "eval_runtime": 6.3102, "eval_samples_per_second": 138.822, "eval_steps_per_second": 17.432, "step": 552 }, { "epoch": 24.0, "grad_norm": 0.09079229086637497, "learning_rate": 0.0003, "loss": 0.0026, "step": 576 }, { "epoch": 24.0, "eval_acc_macro": 0.8361929136599193, "eval_acc_micro": 0.8474446987032153, "eval_auc_macro": 0.990970820997881, "eval_auc_micro": 0.9927761079978714, "eval_f1_at_5": 0.4275326584009282, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9082187775109158, "eval_f1_micro": 0.9174236168455063, "eval_loss": 0.1161712110042572, "eval_prec_at_5": 0.27374429223744295, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9024833914958587, "eval_prec_micro": 0.9136513157893985, "eval_rec_at_5": 0.9756468797564688, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.9147050110934346, "eval_rec_micro": 0.9212271973465239, "eval_runtime": 6.2478, "eval_samples_per_second": 140.209, "eval_steps_per_second": 17.606, "step": 576 }, { "epoch": 25.0, "grad_norm": 0.1555357277393341, "learning_rate": 0.0003, "loss": 0.0011, "step": 600 }, { "epoch": 25.0, "eval_acc_macro": 0.8261213062726463, "eval_acc_micro": 0.8319763138415373, "eval_auc_macro": 0.990040119063344, "eval_auc_micro": 0.9911265543476828, "eval_f1_at_5": 0.4265511126863068, "eval_f1_at_8": 0.2920473967500829, "eval_f1_macro": 0.9014179315917096, "eval_f1_micro": 0.9082828282827549, "eval_loss": 0.17262162268161774, "eval_prec_at_5": 0.2730593607305936, "eval_prec_at_8": 0.1716609589041096, "eval_prec_macro": 0.8792666911124373, "eval_prec_micro": 0.8857368006303479, "eval_rec_at_5": 0.974124809741248, "eval_rec_at_8": 0.9777397260273972, "eval_rec_macro": 0.9306207402078307, "eval_rec_micro": 0.9320066334990935, "eval_runtime": 6.1427, "eval_samples_per_second": 142.608, "eval_steps_per_second": 17.907, "step": 600 }, { "epoch": 26.0, "grad_norm": 0.054534025490283966, "learning_rate": 0.0003, "loss": 0.0016, "step": 624 }, { "epoch": 26.0, "eval_acc_macro": 0.826194090509476, "eval_acc_micro": 0.8362004487658312, "eval_auc_macro": 0.9894373225891121, "eval_auc_micro": 0.9909356077981039, "eval_f1_at_5": 0.42686614175859805, "eval_f1_at_8": 0.2918154230125642, "eval_f1_macro": 0.9018300611095432, "eval_f1_micro": 0.910794297352268, "eval_loss": 0.1341952532529831, "eval_prec_at_5": 0.2732876712328767, "eval_prec_at_8": 0.17151826484018265, "eval_prec_macro": 0.8863083621122576, "eval_prec_micro": 0.8951160928742278, "eval_rec_at_5": 0.9745053272450532, "eval_rec_at_8": 0.9771689497716894, "eval_rec_macro": 0.9193115078475559, "eval_rec_micro": 0.9270315091209844, "eval_runtime": 6.2441, "eval_samples_per_second": 140.292, "eval_steps_per_second": 17.617, "step": 624 }, { "epoch": 27.0, "grad_norm": 0.18318401277065277, "learning_rate": 0.0003, "loss": 0.0017, "step": 648 }, { "epoch": 27.0, "eval_acc_macro": 0.8331625409086375, "eval_acc_micro": 0.844547563805039, "eval_auc_macro": 0.9903700036429086, "eval_auc_micro": 0.9911557799511647, "eval_f1_at_5": 0.4260900570497148, "eval_f1_at_8": 0.2915919235644548, "eval_f1_macro": 0.9059994264183651, "eval_f1_micro": 0.9157232704401748, "eval_loss": 0.16900603473186493, "eval_prec_at_5": 0.27283105022831056, "eval_prec_at_8": 0.1713755707762557, "eval_prec_macro": 0.9152116568385661, "eval_prec_micro": 0.9262086513994126, "eval_rec_at_5": 0.9722222222222222, "eval_rec_at_8": 0.9767884322678843, "eval_rec_macro": 0.8973109240252097, "eval_rec_micro": 0.9054726368158453, "eval_runtime": 6.3369, "eval_samples_per_second": 138.239, "eval_steps_per_second": 17.359, "step": 648 }, { "epoch": 28.0, "grad_norm": 0.09100370109081268, "learning_rate": 0.0003, "loss": 0.002, "step": 672 }, { "epoch": 28.0, "eval_acc_macro": 0.834589729212194, "eval_acc_micro": 0.8436781609194756, "eval_auc_macro": 0.9901591404278554, "eval_auc_micro": 0.9904570385410867, "eval_f1_at_5": 0.4261448479661988, "eval_f1_at_8": 0.29135993712848907, "eval_f1_macro": 0.906536458365489, "eval_f1_micro": 0.9152119700747368, "eval_loss": 0.12356158345937729, "eval_prec_at_5": 0.27283105022831056, "eval_prec_at_8": 0.17123287671232876, "eval_prec_macro": 0.9101814375182885, "eval_prec_micro": 0.9174999999999235, "eval_rec_at_5": 0.97279299847793, "eval_rec_at_8": 0.9762176560121765, "eval_rec_macro": 0.9036660250083727, "eval_rec_micro": 0.9129353233830089, "eval_runtime": 6.2292, "eval_samples_per_second": 140.629, "eval_steps_per_second": 17.659, "step": 672 }, { "epoch": 29.0, "grad_norm": 0.031184401363134384, "learning_rate": 0.0003, "loss": 0.0013, "step": 696 }, { "epoch": 29.0, "eval_acc_macro": 0.8341647633718688, "eval_acc_micro": 0.8435321456234822, "eval_auc_macro": 0.9909608628516886, "eval_auc_micro": 0.9916707030076353, "eval_f1_at_5": 0.42647810767423283, "eval_f1_at_8": 0.2915834447216144, "eval_f1_macro": 0.9066166365653049, "eval_f1_micro": 0.9151260504200911, "eval_loss": 0.1419263482093811, "eval_prec_at_5": 0.27305936073059367, "eval_prec_at_8": 0.1713755707762557, "eval_prec_macro": 0.9206967590925624, "eval_prec_micro": 0.9275979557069056, "eval_rec_at_5": 0.9733637747336377, "eval_rec_at_8": 0.9765981735159818, "eval_rec_macro": 0.8938908104868867, "eval_rec_micro": 0.9029850746267908, "eval_runtime": 6.2406, "eval_samples_per_second": 140.372, "eval_steps_per_second": 17.627, "step": 696 }, { "epoch": 30.0, "grad_norm": 0.12148793786764145, "learning_rate": 0.0003, "loss": 0.001, "step": 720 }, { "epoch": 30.0, "eval_acc_macro": 0.8301734612803756, "eval_acc_micro": 0.8415007656967196, "eval_auc_macro": 0.9904576570433402, "eval_auc_micro": 0.9915527313643118, "eval_f1_at_5": 0.42708974483795964, "eval_f1_at_8": 0.2918154230125642, "eval_f1_macro": 0.9043513567183213, "eval_f1_micro": 0.9139293139292379, "eval_loss": 0.1600634753704071, "eval_prec_at_5": 0.27351598173515984, "eval_prec_at_8": 0.17151826484018265, "eval_prec_macro": 0.9050210447593683, "eval_prec_micro": 0.9165971643035098, "eval_rec_at_5": 0.9739345509893454, "eval_rec_at_8": 0.9771689497716894, "eval_rec_macro": 0.9048079473209437, "eval_rec_micro": 0.9112769485903058, "eval_runtime": 6.2514, "eval_samples_per_second": 140.128, "eval_steps_per_second": 17.596, "step": 720 }, { "epoch": 31.0, "grad_norm": 0.11479064077138901, "learning_rate": 0.0003, "loss": 0.0016, "step": 744 }, { "epoch": 31.0, "eval_acc_macro": 0.833818894129361, "eval_acc_micro": 0.8425787106446144, "eval_auc_macro": 0.9902376130208236, "eval_auc_micro": 0.9914569462309486, "eval_f1_at_5": 0.4271994046239887, "eval_f1_at_8": 0.2920473967500829, "eval_f1_macro": 0.9060990360615312, "eval_f1_micro": 0.9145646867371103, "eval_loss": 0.1426621824502945, "eval_prec_at_5": 0.27351598173515984, "eval_prec_at_8": 0.1716609589041096, "eval_prec_macro": 0.8877458031473394, "eval_prec_micro": 0.8977635782746887, "eval_rec_at_5": 0.975076103500761, "eval_rec_at_8": 0.9777397260273972, "eval_rec_macro": 0.9271635027618895, "eval_rec_micro": 0.9320066334990935, "eval_runtime": 6.1703, "eval_samples_per_second": 141.971, "eval_steps_per_second": 17.827, "step": 744 }, { "epoch": 32.0, "grad_norm": 0.0649285838007927, "learning_rate": 0.0003, "loss": 0.0011, "step": 768 }, { "epoch": 32.0, "eval_acc_macro": 0.830982026700095, "eval_acc_micro": 0.8421862971515902, "eval_auc_macro": 0.9898071389389859, "eval_auc_micro": 0.9906281152443956, "eval_f1_at_5": 0.4261448479661988, "eval_f1_at_8": 0.2918154230125642, "eval_f1_macro": 0.9044437561510542, "eval_f1_micro": 0.9143334726284233, "eval_loss": 0.15522228181362152, "eval_prec_at_5": 0.27283105022831056, "eval_prec_at_8": 0.17151826484018265, "eval_prec_macro": 0.914549760838821, "eval_prec_micro": 0.9216512215668979, "eval_rec_at_5": 0.97279299847793, "eval_rec_at_8": 0.9771689497716894, "eval_rec_macro": 0.89634766919242, "eval_rec_micro": 0.9071310116085483, "eval_runtime": 6.2856, "eval_samples_per_second": 139.366, "eval_steps_per_second": 17.5, "step": 768 }, { "epoch": 33.0, "grad_norm": 0.10257603228092194, "learning_rate": 0.0003, "loss": 0.001, "step": 792 }, { "epoch": 33.0, "eval_acc_macro": 0.8321851395971892, "eval_acc_micro": 0.8429878048779845, "eval_auc_macro": 0.9889132300057429, "eval_auc_micro": 0.9893601873860157, "eval_f1_at_5": 0.42647810767423283, "eval_f1_at_8": 0.2915919235644548, "eval_f1_macro": 0.9051919207399707, "eval_f1_micro": 0.9148056244829681, "eval_loss": 0.149822399020195, "eval_prec_at_5": 0.27305936073059367, "eval_prec_at_8": 0.1713755707762557, "eval_prec_macro": 0.9067171865229405, "eval_prec_micro": 0.9125412541253373, "eval_rec_at_5": 0.9733637747336377, "eval_rec_at_8": 0.9767884322678843, "eval_rec_macro": 0.906988014574614, "eval_rec_micro": 0.9170812603647663, "eval_runtime": 6.3392, "eval_samples_per_second": 138.188, "eval_steps_per_second": 17.352, "step": 792 }, { "epoch": 34.0, "grad_norm": 0.06269343197345734, "learning_rate": 0.0003, "loss": 0.0012, "step": 816 }, { "epoch": 34.0, "eval_acc_macro": 0.8356570074540155, "eval_acc_micro": 0.8457446808509995, "eval_auc_macro": 0.9885679122519199, "eval_auc_micro": 0.9892555811101381, "eval_f1_at_5": 0.42581157914042916, "eval_f1_at_8": 0.29135993712848907, "eval_f1_macro": 0.907980034255221, "eval_f1_micro": 0.9164265129682243, "eval_loss": 0.18450404703617096, "eval_prec_at_5": 0.2726027397260274, "eval_prec_at_8": 0.17123287671232876, "eval_prec_macro": 0.9065894385720981, "eval_prec_micro": 0.9100572363040956, "eval_rec_at_5": 0.9722222222222222, "eval_rec_at_8": 0.9762176560121765, "eval_rec_macro": 0.9109305984894259, "eval_rec_micro": 0.922885572139227, "eval_runtime": 6.2222, "eval_samples_per_second": 140.786, "eval_steps_per_second": 17.679, "step": 816 }, { "epoch": 35.0, "grad_norm": 0.060569193214178085, "learning_rate": 0.0003, "loss": 0.0015, "step": 840 }, { "epoch": 35.0, "eval_acc_macro": 0.8271139319864672, "eval_acc_micro": 0.8386855862583391, "eval_auc_macro": 0.9891685604004818, "eval_auc_micro": 0.9908738152492784, "eval_f1_at_5": 0.4271994046239887, "eval_f1_at_8": 0.29205588274802374, "eval_f1_macro": 0.9021260521405651, "eval_f1_micro": 0.9122664500405433, "eval_loss": 0.13544484972953796, "eval_prec_at_5": 0.27351598173515984, "eval_prec_at_8": 0.1716609589041096, "eval_prec_macro": 0.8873506319173318, "eval_prec_micro": 0.8941082802547059, "eval_rec_at_5": 0.975076103500761, "eval_rec_at_8": 0.9779299847792998, "eval_rec_macro": 0.9204235623584355, "eval_rec_micro": 0.931177446102742, "eval_runtime": 6.3105, "eval_samples_per_second": 138.815, "eval_steps_per_second": 17.431, "step": 840 }, { "epoch": 36.0, "grad_norm": 0.011365901678800583, "learning_rate": 0.0003, "loss": 0.0013, "step": 864 }, { "epoch": 36.0, "eval_acc_macro": 0.8432467559973965, "eval_acc_micro": 0.8506097560974961, "eval_auc_macro": 0.9889068205774477, "eval_auc_micro": 0.9905521999573019, "eval_f1_at_5": 0.42684788616938474, "eval_f1_at_8": 0.2922793659426448, "eval_f1_macro": 0.9125863753805709, "eval_f1_micro": 0.9192751235584086, "eval_loss": 0.1388498842716217, "eval_prec_at_5": 0.2732876712328767, "eval_prec_at_8": 0.17180365296803654, "eval_prec_macro": 0.9106178664851478, "eval_prec_micro": 0.9132569558100725, "eval_rec_at_5": 0.9743150684931506, "eval_rec_at_8": 0.978310502283105, "eval_rec_macro": 0.9154554431692706, "eval_rec_micro": 0.9253731343282815, "eval_runtime": 6.1794, "eval_samples_per_second": 141.761, "eval_steps_per_second": 17.801, "step": 864 }, { "epoch": 37.0, "grad_norm": 0.00758476834744215, "learning_rate": 0.0003, "loss": 0.0009, "step": 888 }, { "epoch": 37.0, "eval_acc_macro": 0.8416703156465917, "eval_acc_micro": 0.848985725018719, "eval_auc_macro": 0.9891403369644753, "eval_auc_micro": 0.9905162916700971, "eval_f1_at_5": 0.4261995886925304, "eval_f1_at_8": 0.2915919235644548, "eval_f1_macro": 0.9111295247504767, "eval_f1_micro": 0.9183258837870039, "eval_loss": 0.13994979858398438, "eval_prec_at_5": 0.27283105022831056, "eval_prec_at_8": 0.1713755707762557, "eval_prec_macro": 0.8945120800094545, "eval_prec_micro": 0.9003984063744302, "eval_rec_at_5": 0.9733637747336377, "eval_rec_at_8": 0.9767884322678843, "eval_rec_macro": 0.9295761415839934, "eval_rec_micro": 0.9369817578772025, "eval_runtime": 6.299, "eval_samples_per_second": 139.071, "eval_steps_per_second": 17.463, "step": 888 }, { "epoch": 38.0, "grad_norm": 0.005984355695545673, "learning_rate": 0.0003, "loss": 0.0009, "step": 912 }, { "epoch": 38.0, "eval_acc_macro": 0.8453744008028368, "eval_acc_micro": 0.8551617873651113, "eval_auc_macro": 0.9890338148789555, "eval_auc_micro": 0.9911242376839919, "eval_f1_at_5": 0.4271994046239887, "eval_f1_at_8": 0.29182390543479947, "eval_f1_macro": 0.9134487053983307, "eval_f1_micro": 0.9219269102989267, "eval_loss": 0.13640232384204865, "eval_prec_at_5": 0.27351598173515984, "eval_prec_at_8": 0.17151826484018265, "eval_prec_macro": 0.9185694511313961, "eval_prec_micro": 0.923460898502419, "eval_rec_at_5": 0.975076103500761, "eval_rec_at_8": 0.977359208523592, "eval_rec_macro": 0.9098787791583011, "eval_rec_micro": 0.9203980099501724, "eval_runtime": 6.2788, "eval_samples_per_second": 139.517, "eval_steps_per_second": 17.519, "step": 912 } ], "logging_steps": 500, "max_steps": 960, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }