|
{ |
|
"best_metric": 0.9134487053983307, |
|
"best_model_checkpoint": "./saved_models/mamba_prompt_sbdh_gpt4_v2_0/checkpoint-912", |
|
"epoch": 38.0, |
|
"eval_steps": 500, |
|
"global_step": 912, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 8.483881950378418, |
|
"learning_rate": 0.0003, |
|
"loss": 1.2055, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_acc_macro": 0.07039666648893216, |
|
"eval_acc_micro": 0.17493620640770696, |
|
"eval_auc_macro": 0.7109566412757288, |
|
"eval_auc_micro": 0.6509757698362547, |
|
"eval_f1_at_5": 0.2568182822325969, |
|
"eval_f1_at_8": 0.22372279922068555, |
|
"eval_f1_macro": 0.11555043034927992, |
|
"eval_f1_micro": 0.2977799227799084, |
|
"eval_loss": 0.4126754105091095, |
|
"eval_prec_at_5": 0.16506849315068495, |
|
"eval_prec_at_8": 0.1317066210045662, |
|
"eval_prec_macro": 0.0712520271567465, |
|
"eval_prec_micro": 0.21000680735193294, |
|
"eval_rec_at_5": 0.5781963470319634, |
|
"eval_rec_at_8": 0.7423896499238966, |
|
"eval_rec_macro": 0.3187634216175277, |
|
"eval_rec_micro": 0.5116086235488796, |
|
"eval_runtime": 6.0156, |
|
"eval_samples_per_second": 145.621, |
|
"eval_steps_per_second": 18.286, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.6864304542541504, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3514, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_acc_macro": 0.5158272708489711, |
|
"eval_acc_micro": 0.5578747628083138, |
|
"eval_auc_macro": 0.94707367087266, |
|
"eval_auc_micro": 0.913787855621242, |
|
"eval_f1_at_5": 0.3898187899179248, |
|
"eval_f1_at_8": 0.2799604575624912, |
|
"eval_f1_macro": 0.646736079674674, |
|
"eval_f1_micro": 0.7161997563945827, |
|
"eval_loss": 0.2387184202671051, |
|
"eval_prec_at_5": 0.24908675799086763, |
|
"eval_prec_at_8": 0.1643835616438356, |
|
"eval_prec_macro": 0.6802826117671307, |
|
"eval_prec_micro": 0.7016706443913523, |
|
"eval_rec_at_5": 0.8961187214611872, |
|
"eval_rec_at_8": 0.9429223744292238, |
|
"eval_rec_macro": 0.6832268849511623, |
|
"eval_rec_micro": 0.7313432835820289, |
|
"eval_runtime": 6.048, |
|
"eval_samples_per_second": 144.841, |
|
"eval_steps_per_second": 18.188, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.4849371910095215, |
|
"learning_rate": 0.0003, |
|
"loss": 0.1682, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_acc_macro": 0.7840879005682694, |
|
"eval_acc_micro": 0.8028064992613883, |
|
"eval_auc_macro": 0.9886749124239317, |
|
"eval_auc_micro": 0.990273843904626, |
|
"eval_f1_at_5": 0.42751438858977425, |
|
"eval_f1_at_8": 0.2925113305987028, |
|
"eval_f1_macro": 0.8710656054515781, |
|
"eval_f1_micro": 0.8906185989347897, |
|
"eval_loss": 0.07425953447818756, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.17194634703196346, |
|
"eval_prec_macro": 0.878761745264005, |
|
"eval_prec_micro": 0.8801619433197667, |
|
"eval_rec_at_5": 0.9754566210045662, |
|
"eval_rec_at_8": 0.9788812785388128, |
|
"eval_rec_macro": 0.8815695470069095, |
|
"eval_rec_micro": 0.9013266998340878, |
|
"eval_runtime": 6.0775, |
|
"eval_samples_per_second": 144.138, |
|
"eval_steps_per_second": 18.1, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.911177396774292, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0646, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_acc_macro": 0.8321415173591932, |
|
"eval_acc_micro": 0.8384442782347914, |
|
"eval_auc_macro": 0.9920630589905021, |
|
"eval_auc_micro": 0.9932832791412234, |
|
"eval_f1_at_5": 0.4278476261962846, |
|
"eval_f1_at_8": 0.29274329072668936, |
|
"eval_f1_macro": 0.9063058610149609, |
|
"eval_f1_micro": 0.9121236777867442, |
|
"eval_loss": 0.06308761239051819, |
|
"eval_prec_at_5": 0.27397260273972607, |
|
"eval_prec_at_8": 0.1720890410958904, |
|
"eval_prec_macro": 0.8935275833247776, |
|
"eval_prec_micro": 0.8953674121405035, |
|
"eval_rec_at_5": 0.976027397260274, |
|
"eval_rec_at_8": 0.9794520547945206, |
|
"eval_rec_macro": 0.9215877913582228, |
|
"eval_rec_micro": 0.929519071310039, |
|
"eval_runtime": 6.085, |
|
"eval_samples_per_second": 143.962, |
|
"eval_steps_per_second": 18.077, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.14926180243492126, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0416, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_acc_macro": 0.835711620798801, |
|
"eval_acc_micro": 0.844599844599779, |
|
"eval_auc_macro": 0.99367524750647, |
|
"eval_auc_micro": 0.9950181038357179, |
|
"eval_f1_at_5": 0.428217417200729, |
|
"eval_f1_at_8": 0.2925113305987028, |
|
"eval_f1_macro": 0.9086207273252621, |
|
"eval_f1_micro": 0.9157540016848428, |
|
"eval_loss": 0.056059833616018295, |
|
"eval_prec_at_5": 0.2742009132420091, |
|
"eval_prec_at_8": 0.17194634703196346, |
|
"eval_prec_macro": 0.9238859033605576, |
|
"eval_prec_micro": 0.9306506849314271, |
|
"eval_rec_at_5": 0.9769786910197868, |
|
"eval_rec_at_8": 0.9788812785388128, |
|
"eval_rec_macro": 0.8949097880182088, |
|
"eval_rec_micro": 0.9013266998340878, |
|
"eval_runtime": 6.0515, |
|
"eval_samples_per_second": 144.758, |
|
"eval_steps_per_second": 18.177, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.24564415216445923, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0281, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_acc_macro": 0.8427241930303886, |
|
"eval_acc_micro": 0.8486486486485831, |
|
"eval_auc_macro": 0.9930039223823353, |
|
"eval_auc_micro": 0.9943083137218841, |
|
"eval_f1_at_5": 0.42721766176318765, |
|
"eval_f1_at_8": 0.2925113305987028, |
|
"eval_f1_macro": 0.9126603684260771, |
|
"eval_f1_micro": 0.9181286549706835, |
|
"eval_loss": 0.05612677335739136, |
|
"eval_prec_at_5": 0.27351598173515984, |
|
"eval_prec_at_8": 0.17194634703196346, |
|
"eval_prec_macro": 0.9258898765719902, |
|
"eval_prec_micro": 0.9250841750840971, |
|
"eval_rec_at_5": 0.9752663622526635, |
|
"eval_rec_at_8": 0.9788812785388128, |
|
"eval_rec_macro": 0.9012770790636472, |
|
"eval_rec_micro": 0.9112769485903058, |
|
"eval_runtime": 6.0637, |
|
"eval_samples_per_second": 144.466, |
|
"eval_steps_per_second": 18.141, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.17717961966991425, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0163, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_acc_macro": 0.8320704316636002, |
|
"eval_acc_micro": 0.8395155185464921, |
|
"eval_auc_macro": 0.9927596537595381, |
|
"eval_auc_micro": 0.9942892457976611, |
|
"eval_f1_at_5": 0.4287923569488756, |
|
"eval_f1_at_8": 0.29274329072668936, |
|
"eval_f1_macro": 0.9054968756763103, |
|
"eval_f1_micro": 0.9127572016460155, |
|
"eval_loss": 0.07535412162542343, |
|
"eval_prec_at_5": 0.27465753424657535, |
|
"eval_prec_at_8": 0.1720890410958904, |
|
"eval_prec_macro": 0.895837475867586, |
|
"eval_prec_micro": 0.9060457516339129, |
|
"eval_rec_at_5": 0.9771689497716894, |
|
"eval_rec_at_8": 0.9794520547945206, |
|
"eval_rec_macro": 0.9167258374697121, |
|
"eval_rec_micro": 0.9195688225538209, |
|
"eval_runtime": 6.0576, |
|
"eval_samples_per_second": 144.612, |
|
"eval_steps_per_second": 18.159, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.27908530831336975, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0109, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_acc_macro": 0.8295268992370044, |
|
"eval_acc_micro": 0.832209737827653, |
|
"eval_auc_macro": 0.9930334009743317, |
|
"eval_auc_micro": 0.9938628905761329, |
|
"eval_f1_at_5": 0.42918048633589306, |
|
"eval_f1_at_8": 0.29274329072668936, |
|
"eval_f1_macro": 0.9041576767954438, |
|
"eval_f1_micro": 0.908421913327808, |
|
"eval_loss": 0.07922064512968063, |
|
"eval_prec_at_5": 0.27488584474885847, |
|
"eval_prec_at_8": 0.1720890410958904, |
|
"eval_prec_macro": 0.8907484139672496, |
|
"eval_prec_micro": 0.8959677419354116, |
|
"eval_rec_at_5": 0.978310502283105, |
|
"eval_rec_at_8": 0.9794520547945206, |
|
"eval_rec_macro": 0.9213085265888775, |
|
"eval_rec_micro": 0.9212271973465239, |
|
"eval_runtime": 6.0722, |
|
"eval_samples_per_second": 144.265, |
|
"eval_steps_per_second": 18.115, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.14036260545253754, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0074, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_acc_macro": 0.8385687066413228, |
|
"eval_acc_micro": 0.844961240310012, |
|
"eval_auc_macro": 0.9927809510576332, |
|
"eval_auc_micro": 0.9942359625327764, |
|
"eval_f1_at_5": 0.4295136788854873, |
|
"eval_f1_at_8": 0.29274329072668936, |
|
"eval_f1_macro": 0.9094505520360644, |
|
"eval_f1_micro": 0.915966386554545, |
|
"eval_loss": 0.08278516680002213, |
|
"eval_prec_at_5": 0.2751141552511416, |
|
"eval_prec_at_8": 0.1720890410958904, |
|
"eval_prec_macro": 0.9210993789406117, |
|
"eval_prec_micro": 0.928449744463294, |
|
"eval_rec_at_5": 0.9788812785388128, |
|
"eval_rec_at_8": 0.9794520547945206, |
|
"eval_rec_macro": 0.8989352557922053, |
|
"eval_rec_micro": 0.9038142620231423, |
|
"eval_runtime": 6.0864, |
|
"eval_samples_per_second": 143.928, |
|
"eval_steps_per_second": 18.073, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.192245751619339, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0066, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_acc_macro": 0.841830141960627, |
|
"eval_acc_micro": 0.8498475609755449, |
|
"eval_auc_macro": 0.9923313106695196, |
|
"eval_auc_micro": 0.9933376316355039, |
|
"eval_f1_at_5": 0.4275509226486327, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9117612718437879, |
|
"eval_f1_micro": 0.9188298310670853, |
|
"eval_loss": 0.0772981271147728, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.9114916513520281, |
|
"eval_prec_micro": 0.9131859131858384, |
|
"eval_rec_at_5": 0.9758371385083713, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.9136710504527534, |
|
"eval_rec_micro": 0.92454394693193, |
|
"eval_runtime": 5.9768, |
|
"eval_samples_per_second": 146.566, |
|
"eval_steps_per_second": 18.404, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.11837983131408691, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0057, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_acc_macro": 0.8320358706720068, |
|
"eval_acc_micro": 0.836842105263095, |
|
"eval_auc_macro": 0.990286728874436, |
|
"eval_auc_micro": 0.9913156297458193, |
|
"eval_f1_at_5": 0.42747783226694747, |
|
"eval_f1_at_8": 0.2920219218917755, |
|
"eval_f1_macro": 0.9058723874403682, |
|
"eval_f1_micro": 0.911174785100212, |
|
"eval_loss": 0.10789646208286285, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.1716609589041096, |
|
"eval_prec_macro": 0.8914304927691732, |
|
"eval_prec_micro": 0.8997574777687227, |
|
"eval_rec_at_5": 0.975076103500761, |
|
"eval_rec_at_8": 0.9771689497716894, |
|
"eval_rec_macro": 0.9231550617780325, |
|
"eval_rec_micro": 0.922885572139227, |
|
"eval_runtime": 6.0106, |
|
"eval_samples_per_second": 145.742, |
|
"eval_steps_per_second": 18.301, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.16925422847270966, |
|
"learning_rate": 0.0003, |
|
"loss": 0.005, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_acc_macro": 0.8364454962531706, |
|
"eval_acc_micro": 0.8397581254723477, |
|
"eval_auc_macro": 0.9908748071505288, |
|
"eval_auc_micro": 0.9917851996554229, |
|
"eval_f1_at_5": 0.42708974483795964, |
|
"eval_f1_at_8": 0.2925113305987028, |
|
"eval_f1_macro": 0.9084368033771916, |
|
"eval_f1_micro": 0.9129005751848058, |
|
"eval_loss": 0.09441287070512772, |
|
"eval_prec_at_5": 0.27351598173515984, |
|
"eval_prec_at_8": 0.17194634703196346, |
|
"eval_prec_macro": 0.8995515858307809, |
|
"eval_prec_micro": 0.9047231270357569, |
|
"eval_rec_at_5": 0.9739345509893454, |
|
"eval_rec_at_8": 0.9788812785388128, |
|
"eval_rec_macro": 0.9188760380015673, |
|
"eval_rec_micro": 0.9212271973465239, |
|
"eval_runtime": 6.0148, |
|
"eval_samples_per_second": 145.64, |
|
"eval_steps_per_second": 18.288, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.25462788343429565, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0045, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_acc_macro": 0.8219219437469505, |
|
"eval_acc_micro": 0.8327165062915741, |
|
"eval_auc_macro": 0.9892036337462419, |
|
"eval_auc_micro": 0.9914230873000853, |
|
"eval_f1_at_5": 0.42851407428878907, |
|
"eval_f1_at_8": 0.2918154230125642, |
|
"eval_f1_macro": 0.8990693006867976, |
|
"eval_f1_micro": 0.9087237479805405, |
|
"eval_loss": 0.11245805770158768, |
|
"eval_prec_at_5": 0.2744292237442923, |
|
"eval_prec_at_8": 0.17151826484018265, |
|
"eval_prec_macro": 0.872997498581981, |
|
"eval_prec_micro": 0.8858267716534736, |
|
"eval_rec_at_5": 0.9771689497716894, |
|
"eval_rec_at_8": 0.9771689497716894, |
|
"eval_rec_macro": 0.9280030786669166, |
|
"eval_rec_micro": 0.932835820895445, |
|
"eval_runtime": 6.2398, |
|
"eval_samples_per_second": 140.389, |
|
"eval_steps_per_second": 17.629, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.13963262736797333, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0029, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_acc_macro": 0.8398618464555664, |
|
"eval_acc_micro": 0.8437499999999356, |
|
"eval_auc_macro": 0.9909268418920506, |
|
"eval_auc_micro": 0.9918402649693007, |
|
"eval_f1_at_5": 0.42740465274793293, |
|
"eval_f1_at_8": 0.2920473967500829, |
|
"eval_f1_macro": 0.910421950810437, |
|
"eval_f1_micro": 0.9152542372880599, |
|
"eval_loss": 0.13447707891464233, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.1716609589041096, |
|
"eval_prec_macro": 0.907525335697792, |
|
"eval_prec_micro": 0.9126133553173196, |
|
"eval_rec_at_5": 0.9743150684931506, |
|
"eval_rec_at_8": 0.9777397260273972, |
|
"eval_rec_macro": 0.9137157897519602, |
|
"eval_rec_micro": 0.9179104477611179, |
|
"eval_runtime": 6.2625, |
|
"eval_samples_per_second": 139.881, |
|
"eval_steps_per_second": 17.565, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.1901983767747879, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0032, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_acc_macro": 0.8357224650035576, |
|
"eval_acc_micro": 0.8455098934550346, |
|
"eval_auc_macro": 0.9892990457525107, |
|
"eval_auc_micro": 0.990561377509615, |
|
"eval_f1_at_5": 0.4254418080308462, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9080581868434588, |
|
"eval_f1_micro": 0.9162886597937389, |
|
"eval_loss": 0.10820472985506058, |
|
"eval_prec_at_5": 0.27237442922374433, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.9057447684648601, |
|
"eval_prec_micro": 0.9114027891713772, |
|
"eval_rec_at_5": 0.9712709284627092, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.9111621041819294, |
|
"eval_rec_micro": 0.9212271973465239, |
|
"eval_runtime": 6.3038, |
|
"eval_samples_per_second": 138.963, |
|
"eval_steps_per_second": 17.45, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.17108500003814697, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0027, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_acc_macro": 0.8297448303381417, |
|
"eval_acc_micro": 0.8398169336383798, |
|
"eval_auc_macro": 0.9902632756647023, |
|
"eval_auc_micro": 0.9922601157120051, |
|
"eval_f1_at_5": 0.4277927620668724, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9041211387604378, |
|
"eval_f1_micro": 0.912935323383009, |
|
"eval_loss": 0.12706510722637177, |
|
"eval_prec_at_5": 0.273972602739726, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.9060353096841967, |
|
"eval_prec_micro": 0.9129353233830089, |
|
"eval_rec_at_5": 0.9754566210045662, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.9032650308288451, |
|
"eval_rec_micro": 0.9129353233830089, |
|
"eval_runtime": 6.7247, |
|
"eval_samples_per_second": 130.266, |
|
"eval_steps_per_second": 16.358, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.06544364243745804, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0029, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_acc_macro": 0.829765891496947, |
|
"eval_acc_micro": 0.8365384615383996, |
|
"eval_auc_macro": 0.9893010896141785, |
|
"eval_auc_micro": 0.9914586391774918, |
|
"eval_f1_at_5": 0.42714459980714947, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9037391479019817, |
|
"eval_f1_micro": 0.9109947643978323, |
|
"eval_loss": 0.10541332513093948, |
|
"eval_prec_at_5": 0.27351598173515984, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.8801020034653725, |
|
"eval_prec_micro": 0.8856695379795704, |
|
"eval_rec_at_5": 0.9745053272450532, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.9310175234124923, |
|
"eval_rec_micro": 0.937810945273554, |
|
"eval_runtime": 6.3322, |
|
"eval_samples_per_second": 138.34, |
|
"eval_steps_per_second": 17.371, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.12380898743867874, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0028, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_acc_macro": 0.8295264706506825, |
|
"eval_acc_micro": 0.8390718562873624, |
|
"eval_auc_macro": 0.9886105199219366, |
|
"eval_auc_micro": 0.9908788495376829, |
|
"eval_f1_at_5": 0.42679308597884263, |
|
"eval_f1_at_8": 0.2918154230125642, |
|
"eval_f1_macro": 0.9042231261610075, |
|
"eval_f1_micro": 0.9124949124948383, |
|
"eval_loss": 0.11824628710746765, |
|
"eval_prec_at_5": 0.2732876712328767, |
|
"eval_prec_at_8": 0.17151826484018265, |
|
"eval_prec_macro": 0.8888519630940449, |
|
"eval_prec_micro": 0.8960831334931337, |
|
"eval_rec_at_5": 0.973744292237443, |
|
"eval_rec_at_8": 0.9771689497716894, |
|
"eval_rec_macro": 0.9215852252593782, |
|
"eval_rec_micro": 0.929519071310039, |
|
"eval_runtime": 6.3155, |
|
"eval_samples_per_second": 138.707, |
|
"eval_steps_per_second": 17.418, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.21812734007835388, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0024, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_acc_macro": 0.829661993373597, |
|
"eval_acc_micro": 0.8388554216866838, |
|
"eval_auc_macro": 0.9894816577253542, |
|
"eval_auc_micro": 0.991705185655646, |
|
"eval_f1_at_5": 0.42714459980714947, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9036099130579357, |
|
"eval_f1_micro": 0.9123669123668376, |
|
"eval_loss": 0.1079055592417717, |
|
"eval_prec_at_5": 0.27351598173515984, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.8959752170714762, |
|
"eval_prec_micro": 0.9012944983818041, |
|
"eval_rec_at_5": 0.9745053272450532, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.9119091133829933, |
|
"eval_rec_micro": 0.9237147595355785, |
|
"eval_runtime": 6.2636, |
|
"eval_samples_per_second": 139.855, |
|
"eval_steps_per_second": 17.562, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.14500826597213745, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0026, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_acc_macro": 0.8388928681043534, |
|
"eval_acc_micro": 0.8470764617690519, |
|
"eval_auc_macro": 0.9914346196687225, |
|
"eval_auc_micro": 0.9929207212736375, |
|
"eval_f1_at_5": 0.42845916764894976, |
|
"eval_f1_at_8": 0.2925113305987028, |
|
"eval_f1_macro": 0.9099987722528428, |
|
"eval_f1_micro": 0.9172077922077178, |
|
"eval_loss": 0.12154436111450195, |
|
"eval_prec_at_5": 0.2744292237442923, |
|
"eval_prec_at_8": 0.17194634703196346, |
|
"eval_prec_macro": 0.8884541003489376, |
|
"eval_prec_micro": 0.898251192368768, |
|
"eval_rec_at_5": 0.9765981735159818, |
|
"eval_rec_at_8": 0.9788812785388128, |
|
"eval_rec_macro": 0.9349802408736304, |
|
"eval_rec_micro": 0.9369817578772025, |
|
"eval_runtime": 6.1697, |
|
"eval_samples_per_second": 141.984, |
|
"eval_steps_per_second": 17.829, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 0.12059065699577332, |
|
"learning_rate": 0.0003, |
|
"loss": 0.002, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_acc_macro": 0.840415915936909, |
|
"eval_acc_micro": 0.8496978851963104, |
|
"eval_auc_macro": 0.9902023505450398, |
|
"eval_auc_micro": 0.9918164746152466, |
|
"eval_f1_at_5": 0.42708974483795964, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9110529776466078, |
|
"eval_f1_micro": 0.9187423438137264, |
|
"eval_loss": 0.1368735283613205, |
|
"eval_prec_at_5": 0.27351598173515984, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.8969397087072427, |
|
"eval_prec_micro": 0.9050683829444163, |
|
"eval_rec_at_5": 0.9739345509893454, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.9266197213944322, |
|
"eval_rec_micro": 0.932835820895445, |
|
"eval_runtime": 6.2964, |
|
"eval_samples_per_second": 139.126, |
|
"eval_steps_per_second": 17.47, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 0.1459818333387375, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0022, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_acc_macro": 0.8299074532155761, |
|
"eval_acc_micro": 0.8387573964496421, |
|
"eval_auc_macro": 0.9912484493432677, |
|
"eval_auc_micro": 0.9923215964022568, |
|
"eval_f1_at_5": 0.42714459980714947, |
|
"eval_f1_at_8": 0.29205588274802374, |
|
"eval_f1_macro": 0.9036433853106406, |
|
"eval_f1_micro": 0.9123089300079716, |
|
"eval_loss": 0.12988413870334625, |
|
"eval_prec_at_5": 0.27351598173515984, |
|
"eval_prec_at_8": 0.1716609589041096, |
|
"eval_prec_macro": 0.8778252010823914, |
|
"eval_prec_micro": 0.8859374999999308, |
|
"eval_rec_at_5": 0.9745053272450532, |
|
"eval_rec_at_8": 0.9779299847792998, |
|
"eval_rec_macro": 0.9334072472404634, |
|
"eval_rec_micro": 0.9402985074626086, |
|
"eval_runtime": 6.2795, |
|
"eval_samples_per_second": 139.501, |
|
"eval_steps_per_second": 17.517, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 0.23651210963726044, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0023, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_acc_macro": 0.8331982367002823, |
|
"eval_acc_micro": 0.8412213740457373, |
|
"eval_auc_macro": 0.9907246610096498, |
|
"eval_auc_micro": 0.9922808765827714, |
|
"eval_f1_at_5": 0.42686614175859805, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9057038011597318, |
|
"eval_f1_micro": 0.9137645107793605, |
|
"eval_loss": 0.1514243185520172, |
|
"eval_prec_at_5": 0.2732876712328767, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.9057572791903493, |
|
"eval_prec_micro": 0.9137645107793604, |
|
"eval_rec_at_5": 0.9745053272450532, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.9065147380552868, |
|
"eval_rec_micro": 0.9137645107793604, |
|
"eval_runtime": 6.3102, |
|
"eval_samples_per_second": 138.822, |
|
"eval_steps_per_second": 17.432, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 0.09079229086637497, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0026, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_acc_macro": 0.8361929136599193, |
|
"eval_acc_micro": 0.8474446987032153, |
|
"eval_auc_macro": 0.990970820997881, |
|
"eval_auc_micro": 0.9927761079978714, |
|
"eval_f1_at_5": 0.4275326584009282, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9082187775109158, |
|
"eval_f1_micro": 0.9174236168455063, |
|
"eval_loss": 0.1161712110042572, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.9024833914958587, |
|
"eval_prec_micro": 0.9136513157893985, |
|
"eval_rec_at_5": 0.9756468797564688, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.9147050110934346, |
|
"eval_rec_micro": 0.9212271973465239, |
|
"eval_runtime": 6.2478, |
|
"eval_samples_per_second": 140.209, |
|
"eval_steps_per_second": 17.606, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 0.1555357277393341, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0011, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_acc_macro": 0.8261213062726463, |
|
"eval_acc_micro": 0.8319763138415373, |
|
"eval_auc_macro": 0.990040119063344, |
|
"eval_auc_micro": 0.9911265543476828, |
|
"eval_f1_at_5": 0.4265511126863068, |
|
"eval_f1_at_8": 0.2920473967500829, |
|
"eval_f1_macro": 0.9014179315917096, |
|
"eval_f1_micro": 0.9082828282827549, |
|
"eval_loss": 0.17262162268161774, |
|
"eval_prec_at_5": 0.2730593607305936, |
|
"eval_prec_at_8": 0.1716609589041096, |
|
"eval_prec_macro": 0.8792666911124373, |
|
"eval_prec_micro": 0.8857368006303479, |
|
"eval_rec_at_5": 0.974124809741248, |
|
"eval_rec_at_8": 0.9777397260273972, |
|
"eval_rec_macro": 0.9306207402078307, |
|
"eval_rec_micro": 0.9320066334990935, |
|
"eval_runtime": 6.1427, |
|
"eval_samples_per_second": 142.608, |
|
"eval_steps_per_second": 17.907, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 0.054534025490283966, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0016, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_acc_macro": 0.826194090509476, |
|
"eval_acc_micro": 0.8362004487658312, |
|
"eval_auc_macro": 0.9894373225891121, |
|
"eval_auc_micro": 0.9909356077981039, |
|
"eval_f1_at_5": 0.42686614175859805, |
|
"eval_f1_at_8": 0.2918154230125642, |
|
"eval_f1_macro": 0.9018300611095432, |
|
"eval_f1_micro": 0.910794297352268, |
|
"eval_loss": 0.1341952532529831, |
|
"eval_prec_at_5": 0.2732876712328767, |
|
"eval_prec_at_8": 0.17151826484018265, |
|
"eval_prec_macro": 0.8863083621122576, |
|
"eval_prec_micro": 0.8951160928742278, |
|
"eval_rec_at_5": 0.9745053272450532, |
|
"eval_rec_at_8": 0.9771689497716894, |
|
"eval_rec_macro": 0.9193115078475559, |
|
"eval_rec_micro": 0.9270315091209844, |
|
"eval_runtime": 6.2441, |
|
"eval_samples_per_second": 140.292, |
|
"eval_steps_per_second": 17.617, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"grad_norm": 0.18318401277065277, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0017, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_acc_macro": 0.8331625409086375, |
|
"eval_acc_micro": 0.844547563805039, |
|
"eval_auc_macro": 0.9903700036429086, |
|
"eval_auc_micro": 0.9911557799511647, |
|
"eval_f1_at_5": 0.4260900570497148, |
|
"eval_f1_at_8": 0.2915919235644548, |
|
"eval_f1_macro": 0.9059994264183651, |
|
"eval_f1_micro": 0.9157232704401748, |
|
"eval_loss": 0.16900603473186493, |
|
"eval_prec_at_5": 0.27283105022831056, |
|
"eval_prec_at_8": 0.1713755707762557, |
|
"eval_prec_macro": 0.9152116568385661, |
|
"eval_prec_micro": 0.9262086513994126, |
|
"eval_rec_at_5": 0.9722222222222222, |
|
"eval_rec_at_8": 0.9767884322678843, |
|
"eval_rec_macro": 0.8973109240252097, |
|
"eval_rec_micro": 0.9054726368158453, |
|
"eval_runtime": 6.3369, |
|
"eval_samples_per_second": 138.239, |
|
"eval_steps_per_second": 17.359, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 0.09100370109081268, |
|
"learning_rate": 0.0003, |
|
"loss": 0.002, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_acc_macro": 0.834589729212194, |
|
"eval_acc_micro": 0.8436781609194756, |
|
"eval_auc_macro": 0.9901591404278554, |
|
"eval_auc_micro": 0.9904570385410867, |
|
"eval_f1_at_5": 0.4261448479661988, |
|
"eval_f1_at_8": 0.29135993712848907, |
|
"eval_f1_macro": 0.906536458365489, |
|
"eval_f1_micro": 0.9152119700747368, |
|
"eval_loss": 0.12356158345937729, |
|
"eval_prec_at_5": 0.27283105022831056, |
|
"eval_prec_at_8": 0.17123287671232876, |
|
"eval_prec_macro": 0.9101814375182885, |
|
"eval_prec_micro": 0.9174999999999235, |
|
"eval_rec_at_5": 0.97279299847793, |
|
"eval_rec_at_8": 0.9762176560121765, |
|
"eval_rec_macro": 0.9036660250083727, |
|
"eval_rec_micro": 0.9129353233830089, |
|
"eval_runtime": 6.2292, |
|
"eval_samples_per_second": 140.629, |
|
"eval_steps_per_second": 17.659, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"grad_norm": 0.031184401363134384, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0013, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_acc_macro": 0.8341647633718688, |
|
"eval_acc_micro": 0.8435321456234822, |
|
"eval_auc_macro": 0.9909608628516886, |
|
"eval_auc_micro": 0.9916707030076353, |
|
"eval_f1_at_5": 0.42647810767423283, |
|
"eval_f1_at_8": 0.2915834447216144, |
|
"eval_f1_macro": 0.9066166365653049, |
|
"eval_f1_micro": 0.9151260504200911, |
|
"eval_loss": 0.1419263482093811, |
|
"eval_prec_at_5": 0.27305936073059367, |
|
"eval_prec_at_8": 0.1713755707762557, |
|
"eval_prec_macro": 0.9206967590925624, |
|
"eval_prec_micro": 0.9275979557069056, |
|
"eval_rec_at_5": 0.9733637747336377, |
|
"eval_rec_at_8": 0.9765981735159818, |
|
"eval_rec_macro": 0.8938908104868867, |
|
"eval_rec_micro": 0.9029850746267908, |
|
"eval_runtime": 6.2406, |
|
"eval_samples_per_second": 140.372, |
|
"eval_steps_per_second": 17.627, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 0.12148793786764145, |
|
"learning_rate": 0.0003, |
|
"loss": 0.001, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_acc_macro": 0.8301734612803756, |
|
"eval_acc_micro": 0.8415007656967196, |
|
"eval_auc_macro": 0.9904576570433402, |
|
"eval_auc_micro": 0.9915527313643118, |
|
"eval_f1_at_5": 0.42708974483795964, |
|
"eval_f1_at_8": 0.2918154230125642, |
|
"eval_f1_macro": 0.9043513567183213, |
|
"eval_f1_micro": 0.9139293139292379, |
|
"eval_loss": 0.1600634753704071, |
|
"eval_prec_at_5": 0.27351598173515984, |
|
"eval_prec_at_8": 0.17151826484018265, |
|
"eval_prec_macro": 0.9050210447593683, |
|
"eval_prec_micro": 0.9165971643035098, |
|
"eval_rec_at_5": 0.9739345509893454, |
|
"eval_rec_at_8": 0.9771689497716894, |
|
"eval_rec_macro": 0.9048079473209437, |
|
"eval_rec_micro": 0.9112769485903058, |
|
"eval_runtime": 6.2514, |
|
"eval_samples_per_second": 140.128, |
|
"eval_steps_per_second": 17.596, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"grad_norm": 0.11479064077138901, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0016, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_acc_macro": 0.833818894129361, |
|
"eval_acc_micro": 0.8425787106446144, |
|
"eval_auc_macro": 0.9902376130208236, |
|
"eval_auc_micro": 0.9914569462309486, |
|
"eval_f1_at_5": 0.4271994046239887, |
|
"eval_f1_at_8": 0.2920473967500829, |
|
"eval_f1_macro": 0.9060990360615312, |
|
"eval_f1_micro": 0.9145646867371103, |
|
"eval_loss": 0.1426621824502945, |
|
"eval_prec_at_5": 0.27351598173515984, |
|
"eval_prec_at_8": 0.1716609589041096, |
|
"eval_prec_macro": 0.8877458031473394, |
|
"eval_prec_micro": 0.8977635782746887, |
|
"eval_rec_at_5": 0.975076103500761, |
|
"eval_rec_at_8": 0.9777397260273972, |
|
"eval_rec_macro": 0.9271635027618895, |
|
"eval_rec_micro": 0.9320066334990935, |
|
"eval_runtime": 6.1703, |
|
"eval_samples_per_second": 141.971, |
|
"eval_steps_per_second": 17.827, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 0.0649285838007927, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0011, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_acc_macro": 0.830982026700095, |
|
"eval_acc_micro": 0.8421862971515902, |
|
"eval_auc_macro": 0.9898071389389859, |
|
"eval_auc_micro": 0.9906281152443956, |
|
"eval_f1_at_5": 0.4261448479661988, |
|
"eval_f1_at_8": 0.2918154230125642, |
|
"eval_f1_macro": 0.9044437561510542, |
|
"eval_f1_micro": 0.9143334726284233, |
|
"eval_loss": 0.15522228181362152, |
|
"eval_prec_at_5": 0.27283105022831056, |
|
"eval_prec_at_8": 0.17151826484018265, |
|
"eval_prec_macro": 0.914549760838821, |
|
"eval_prec_micro": 0.9216512215668979, |
|
"eval_rec_at_5": 0.97279299847793, |
|
"eval_rec_at_8": 0.9771689497716894, |
|
"eval_rec_macro": 0.89634766919242, |
|
"eval_rec_micro": 0.9071310116085483, |
|
"eval_runtime": 6.2856, |
|
"eval_samples_per_second": 139.366, |
|
"eval_steps_per_second": 17.5, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"grad_norm": 0.10257603228092194, |
|
"learning_rate": 0.0003, |
|
"loss": 0.001, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_acc_macro": 0.8321851395971892, |
|
"eval_acc_micro": 0.8429878048779845, |
|
"eval_auc_macro": 0.9889132300057429, |
|
"eval_auc_micro": 0.9893601873860157, |
|
"eval_f1_at_5": 0.42647810767423283, |
|
"eval_f1_at_8": 0.2915919235644548, |
|
"eval_f1_macro": 0.9051919207399707, |
|
"eval_f1_micro": 0.9148056244829681, |
|
"eval_loss": 0.149822399020195, |
|
"eval_prec_at_5": 0.27305936073059367, |
|
"eval_prec_at_8": 0.1713755707762557, |
|
"eval_prec_macro": 0.9067171865229405, |
|
"eval_prec_micro": 0.9125412541253373, |
|
"eval_rec_at_5": 0.9733637747336377, |
|
"eval_rec_at_8": 0.9767884322678843, |
|
"eval_rec_macro": 0.906988014574614, |
|
"eval_rec_micro": 0.9170812603647663, |
|
"eval_runtime": 6.3392, |
|
"eval_samples_per_second": 138.188, |
|
"eval_steps_per_second": 17.352, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"grad_norm": 0.06269343197345734, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0012, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_acc_macro": 0.8356570074540155, |
|
"eval_acc_micro": 0.8457446808509995, |
|
"eval_auc_macro": 0.9885679122519199, |
|
"eval_auc_micro": 0.9892555811101381, |
|
"eval_f1_at_5": 0.42581157914042916, |
|
"eval_f1_at_8": 0.29135993712848907, |
|
"eval_f1_macro": 0.907980034255221, |
|
"eval_f1_micro": 0.9164265129682243, |
|
"eval_loss": 0.18450404703617096, |
|
"eval_prec_at_5": 0.2726027397260274, |
|
"eval_prec_at_8": 0.17123287671232876, |
|
"eval_prec_macro": 0.9065894385720981, |
|
"eval_prec_micro": 0.9100572363040956, |
|
"eval_rec_at_5": 0.9722222222222222, |
|
"eval_rec_at_8": 0.9762176560121765, |
|
"eval_rec_macro": 0.9109305984894259, |
|
"eval_rec_micro": 0.922885572139227, |
|
"eval_runtime": 6.2222, |
|
"eval_samples_per_second": 140.786, |
|
"eval_steps_per_second": 17.679, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 0.060569193214178085, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0015, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_acc_macro": 0.8271139319864672, |
|
"eval_acc_micro": 0.8386855862583391, |
|
"eval_auc_macro": 0.9891685604004818, |
|
"eval_auc_micro": 0.9908738152492784, |
|
"eval_f1_at_5": 0.4271994046239887, |
|
"eval_f1_at_8": 0.29205588274802374, |
|
"eval_f1_macro": 0.9021260521405651, |
|
"eval_f1_micro": 0.9122664500405433, |
|
"eval_loss": 0.13544484972953796, |
|
"eval_prec_at_5": 0.27351598173515984, |
|
"eval_prec_at_8": 0.1716609589041096, |
|
"eval_prec_macro": 0.8873506319173318, |
|
"eval_prec_micro": 0.8941082802547059, |
|
"eval_rec_at_5": 0.975076103500761, |
|
"eval_rec_at_8": 0.9779299847792998, |
|
"eval_rec_macro": 0.9204235623584355, |
|
"eval_rec_micro": 0.931177446102742, |
|
"eval_runtime": 6.3105, |
|
"eval_samples_per_second": 138.815, |
|
"eval_steps_per_second": 17.431, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"grad_norm": 0.011365901678800583, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0013, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_acc_macro": 0.8432467559973965, |
|
"eval_acc_micro": 0.8506097560974961, |
|
"eval_auc_macro": 0.9889068205774477, |
|
"eval_auc_micro": 0.9905521999573019, |
|
"eval_f1_at_5": 0.42684788616938474, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9125863753805709, |
|
"eval_f1_micro": 0.9192751235584086, |
|
"eval_loss": 0.1388498842716217, |
|
"eval_prec_at_5": 0.2732876712328767, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.9106178664851478, |
|
"eval_prec_micro": 0.9132569558100725, |
|
"eval_rec_at_5": 0.9743150684931506, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.9154554431692706, |
|
"eval_rec_micro": 0.9253731343282815, |
|
"eval_runtime": 6.1794, |
|
"eval_samples_per_second": 141.761, |
|
"eval_steps_per_second": 17.801, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"grad_norm": 0.00758476834744215, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0009, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_acc_macro": 0.8416703156465917, |
|
"eval_acc_micro": 0.848985725018719, |
|
"eval_auc_macro": 0.9891403369644753, |
|
"eval_auc_micro": 0.9905162916700971, |
|
"eval_f1_at_5": 0.4261995886925304, |
|
"eval_f1_at_8": 0.2915919235644548, |
|
"eval_f1_macro": 0.9111295247504767, |
|
"eval_f1_micro": 0.9183258837870039, |
|
"eval_loss": 0.13994979858398438, |
|
"eval_prec_at_5": 0.27283105022831056, |
|
"eval_prec_at_8": 0.1713755707762557, |
|
"eval_prec_macro": 0.8945120800094545, |
|
"eval_prec_micro": 0.9003984063744302, |
|
"eval_rec_at_5": 0.9733637747336377, |
|
"eval_rec_at_8": 0.9767884322678843, |
|
"eval_rec_macro": 0.9295761415839934, |
|
"eval_rec_micro": 0.9369817578772025, |
|
"eval_runtime": 6.299, |
|
"eval_samples_per_second": 139.071, |
|
"eval_steps_per_second": 17.463, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"grad_norm": 0.005984355695545673, |
|
"learning_rate": 0.0003, |
|
"loss": 0.0009, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_acc_macro": 0.8453744008028368, |
|
"eval_acc_micro": 0.8551617873651113, |
|
"eval_auc_macro": 0.9890338148789555, |
|
"eval_auc_micro": 0.9911242376839919, |
|
"eval_f1_at_5": 0.4271994046239887, |
|
"eval_f1_at_8": 0.29182390543479947, |
|
"eval_f1_macro": 0.9134487053983307, |
|
"eval_f1_micro": 0.9219269102989267, |
|
"eval_loss": 0.13640232384204865, |
|
"eval_prec_at_5": 0.27351598173515984, |
|
"eval_prec_at_8": 0.17151826484018265, |
|
"eval_prec_macro": 0.9185694511313961, |
|
"eval_prec_micro": 0.923460898502419, |
|
"eval_rec_at_5": 0.975076103500761, |
|
"eval_rec_at_8": 0.977359208523592, |
|
"eval_rec_macro": 0.9098787791583011, |
|
"eval_rec_micro": 0.9203980099501724, |
|
"eval_runtime": 6.2788, |
|
"eval_samples_per_second": 139.517, |
|
"eval_steps_per_second": 17.519, |
|
"step": 912 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 960, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|