|
{ |
|
"best_metric": 0.9175407198197038, |
|
"best_model_checkpoint": "./saved_models/roberta_sbdh_gpt4_v2_0/checkpoint-792", |
|
"epoch": 33.0, |
|
"eval_steps": 500, |
|
"global_step": 792, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.3806931972503662, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.6134, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_acc_macro": 0.10215572903385978, |
|
"eval_acc_micro": 0.17855121675155125, |
|
"eval_auc_macro": 0.6100041638713678, |
|
"eval_auc_micro": 0.6464031657743947, |
|
"eval_f1_at_5": 0.2718421152169764, |
|
"eval_f1_at_8": 0.22628042555556532, |
|
"eval_f1_macro": 0.1715749294146787, |
|
"eval_f1_micro": 0.30300120048017753, |
|
"eval_loss": 0.4604409337043762, |
|
"eval_prec_at_5": 0.17442922374429226, |
|
"eval_prec_at_8": 0.1329908675799087, |
|
"eval_prec_macro": 0.1401339887841245, |
|
"eval_prec_micro": 0.2132477188239198, |
|
"eval_rec_at_5": 0.6156773211567731, |
|
"eval_rec_at_8": 0.7579908675799086, |
|
"eval_rec_macro": 0.36147047478751165, |
|
"eval_rec_micro": 0.5232172470978007, |
|
"eval_runtime": 3.307, |
|
"eval_samples_per_second": 264.896, |
|
"eval_steps_per_second": 33.263, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.42557960748672485, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4178, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_acc_macro": 0.054161895439312095, |
|
"eval_acc_micro": 0.18999494694289085, |
|
"eval_auc_macro": 0.777689574836493, |
|
"eval_auc_micro": 0.6837933158193559, |
|
"eval_f1_at_5": 0.2980163960623497, |
|
"eval_f1_at_8": 0.2355215334598575, |
|
"eval_f1_macro": 0.08160871746824805, |
|
"eval_f1_micro": 0.31932059447980304, |
|
"eval_loss": 0.35270196199417114, |
|
"eval_prec_at_5": 0.19109589041095887, |
|
"eval_prec_at_8": 0.13855593607305935, |
|
"eval_prec_macro": 0.057278901143779114, |
|
"eval_prec_micro": 0.32724107919927525, |
|
"eval_rec_at_5": 0.6765601217656013, |
|
"eval_rec_at_8": 0.784627092846271, |
|
"eval_rec_macro": 0.14846491228063083, |
|
"eval_rec_micro": 0.3117744610281665, |
|
"eval_runtime": 3.2254, |
|
"eval_samples_per_second": 271.594, |
|
"eval_steps_per_second": 34.104, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.21710661053657532, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.3548, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_acc_macro": 0.08172631395735021, |
|
"eval_acc_micro": 0.2394548994159481, |
|
"eval_auc_macro": 0.8898708262347639, |
|
"eval_auc_micro": 0.7531371635981566, |
|
"eval_f1_at_5": 0.3273134867709266, |
|
"eval_f1_at_8": 0.2656356996119607, |
|
"eval_f1_macro": 0.10962566844915878, |
|
"eval_f1_micro": 0.3863874345549333, |
|
"eval_loss": 0.3343456983566284, |
|
"eval_prec_at_5": 0.20958904109589044, |
|
"eval_prec_at_8": 0.15625, |
|
"eval_prec_macro": 0.08775419624473717, |
|
"eval_prec_micro": 0.5241477272726528, |
|
"eval_rec_at_5": 0.746765601217656, |
|
"eval_rec_at_8": 0.8856544901065448, |
|
"eval_rec_macro": 0.14795321637419723, |
|
"eval_rec_micro": 0.30597014925370597, |
|
"eval_runtime": 3.2799, |
|
"eval_samples_per_second": 267.084, |
|
"eval_steps_per_second": 33.538, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.26009050011634827, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3315, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_acc_macro": 0.2205899892707659, |
|
"eval_acc_micro": 0.31238003838770095, |
|
"eval_auc_macro": 0.9150780627599414, |
|
"eval_auc_micro": 0.8412487048958944, |
|
"eval_f1_at_5": 0.36996556565347266, |
|
"eval_f1_at_8": 0.27616272202971137, |
|
"eval_f1_macro": 0.32075046434231563, |
|
"eval_f1_micro": 0.4760511882997823, |
|
"eval_loss": 0.30232226848602295, |
|
"eval_prec_at_5": 0.23675799086757995, |
|
"eval_prec_at_8": 0.16238584474885845, |
|
"eval_prec_macro": 0.534140737674402, |
|
"eval_prec_micro": 0.4257684761281605, |
|
"eval_rec_at_5": 0.8458904109589042, |
|
"eval_rec_at_8": 0.9225646879756468, |
|
"eval_rec_macro": 0.37458794170416176, |
|
"eval_rec_micro": 0.5398009950248308, |
|
"eval_runtime": 3.2394, |
|
"eval_samples_per_second": 270.421, |
|
"eval_steps_per_second": 33.957, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.37228670716285706, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.2947, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_acc_macro": 0.4283107268415045, |
|
"eval_acc_micro": 0.5237793278376333, |
|
"eval_auc_macro": 0.944198889889774, |
|
"eval_auc_micro": 0.9267578754982164, |
|
"eval_f1_at_5": 0.4096518568367693, |
|
"eval_f1_at_8": 0.29017455585500507, |
|
"eval_f1_macro": 0.539018158110013, |
|
"eval_f1_micro": 0.6874739908447202, |
|
"eval_loss": 0.25655001401901245, |
|
"eval_prec_at_5": 0.2618721461187215, |
|
"eval_prec_at_8": 0.17051940639269406, |
|
"eval_prec_macro": 0.5661692193127564, |
|
"eval_prec_micro": 0.6900584795321061, |
|
"eval_rec_at_5": 0.9402587519025875, |
|
"eval_rec_at_8": 0.9727929984779301, |
|
"eval_rec_macro": 0.564018051489352, |
|
"eval_rec_micro": 0.6849087893863445, |
|
"eval_runtime": 3.287, |
|
"eval_samples_per_second": 266.502, |
|
"eval_steps_per_second": 33.465, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.2922113835811615, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 0.255, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_acc_macro": 0.5136685674094098, |
|
"eval_acc_micro": 0.6013651877132695, |
|
"eval_auc_macro": 0.9633338119361814, |
|
"eval_auc_micro": 0.9605888281923002, |
|
"eval_f1_at_5": 0.42005389856494574, |
|
"eval_f1_at_8": 0.29155799132187266, |
|
"eval_f1_macro": 0.6233454431290059, |
|
"eval_f1_micro": 0.7510656436486999, |
|
"eval_loss": 0.21982233226299286, |
|
"eval_prec_at_5": 0.2687214611872146, |
|
"eval_prec_at_8": 0.1713755707762557, |
|
"eval_prec_macro": 0.7586900162484641, |
|
"eval_prec_micro": 0.7728070175437918, |
|
"eval_rec_at_5": 0.9615677321156773, |
|
"eval_rec_at_8": 0.976027397260274, |
|
"eval_rec_macro": 0.6250160961046504, |
|
"eval_rec_micro": 0.7305140961856774, |
|
"eval_runtime": 3.2407, |
|
"eval_samples_per_second": 270.308, |
|
"eval_steps_per_second": 33.943, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.6698095202445984, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.2228, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_acc_macro": 0.6303220555404457, |
|
"eval_acc_micro": 0.6847290640393606, |
|
"eval_auc_macro": 0.9734325874763358, |
|
"eval_auc_micro": 0.9748103810769206, |
|
"eval_f1_at_5": 0.42340560395061977, |
|
"eval_f1_at_8": 0.2920219218917755, |
|
"eval_f1_macro": 0.7458516061176264, |
|
"eval_f1_micro": 0.8128654970759555, |
|
"eval_loss": 0.19146637618541718, |
|
"eval_prec_at_5": 0.27100456621004565, |
|
"eval_prec_at_8": 0.1716609589041096, |
|
"eval_prec_macro": 0.86104613287547, |
|
"eval_prec_micro": 0.8190235690235, |
|
"eval_rec_at_5": 0.9674657534246576, |
|
"eval_rec_at_8": 0.9771689497716894, |
|
"eval_rec_macro": 0.7345526249297327, |
|
"eval_rec_micro": 0.806799336650016, |
|
"eval_runtime": 3.3221, |
|
"eval_samples_per_second": 263.692, |
|
"eval_steps_per_second": 33.112, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.3862050771713257, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.1968, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_acc_macro": 0.6911445121462139, |
|
"eval_acc_micro": 0.7442922374428657, |
|
"eval_auc_macro": 0.9794839519003108, |
|
"eval_auc_micro": 0.9822416144793619, |
|
"eval_f1_at_5": 0.4247570125938748, |
|
"eval_f1_at_8": 0.291789958878004, |
|
"eval_f1_macro": 0.7968110573167572, |
|
"eval_f1_micro": 0.853403141361182, |
|
"eval_loss": 0.16935397684574127, |
|
"eval_prec_at_5": 0.2719178082191781, |
|
"eval_prec_at_8": 0.17151826484018265, |
|
"eval_prec_macro": 0.9142914075642903, |
|
"eval_prec_micro": 0.9005524861877623, |
|
"eval_rec_at_5": 0.9699391171993911, |
|
"eval_rec_at_8": 0.9765981735159818, |
|
"eval_rec_macro": 0.7524753915565773, |
|
"eval_rec_micro": 0.8109452736317735, |
|
"eval_runtime": 3.3593, |
|
"eval_samples_per_second": 260.765, |
|
"eval_steps_per_second": 32.745, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.3425958752632141, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 0.1756, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_acc_macro": 0.7538851024227721, |
|
"eval_acc_micro": 0.7733711048158092, |
|
"eval_auc_macro": 0.984468975401199, |
|
"eval_auc_micro": 0.9869544212457306, |
|
"eval_f1_at_5": 0.42679308597884263, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.8539710926649278, |
|
"eval_f1_micro": 0.8722044728433809, |
|
"eval_loss": 0.15195928514003754, |
|
"eval_prec_at_5": 0.2732876712328767, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.8477325049832564, |
|
"eval_prec_micro": 0.8412942989213528, |
|
"eval_rec_at_5": 0.973744292237443, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.8733258072194854, |
|
"eval_rec_micro": 0.9054726368158453, |
|
"eval_runtime": 3.3296, |
|
"eval_samples_per_second": 263.093, |
|
"eval_steps_per_second": 33.037, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.3327239453792572, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.1578, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_acc_macro": 0.7658814025380632, |
|
"eval_acc_micro": 0.7931292008961319, |
|
"eval_auc_macro": 0.9863575312994878, |
|
"eval_auc_micro": 0.9888696784007464, |
|
"eval_f1_at_5": 0.42745954575018524, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.8608080570922364, |
|
"eval_f1_micro": 0.8846314035817673, |
|
"eval_loss": 0.13785400986671448, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.8916528884446233, |
|
"eval_prec_micro": 0.8887029288702185, |
|
"eval_rec_at_5": 0.9748858447488584, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.8473101584524304, |
|
"eval_rec_micro": 0.8805970149253001, |
|
"eval_runtime": 3.3169, |
|
"eval_samples_per_second": 264.104, |
|
"eval_steps_per_second": 33.164, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.4798758029937744, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 0.1437, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_acc_macro": 0.7817093750541505, |
|
"eval_acc_micro": 0.8088347296267472, |
|
"eval_auc_macro": 0.987966835700279, |
|
"eval_auc_micro": 0.990288011194119, |
|
"eval_f1_at_5": 0.4281808547568474, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.8718351509550711, |
|
"eval_f1_micro": 0.8943157894736088, |
|
"eval_loss": 0.1261390894651413, |
|
"eval_prec_at_5": 0.2742009132420091, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.9059016261714952, |
|
"eval_prec_micro": 0.9084687767321721, |
|
"eval_rec_at_5": 0.9765981735159818, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.852231468930321, |
|
"eval_rec_micro": 0.8805970149253001, |
|
"eval_runtime": 3.1998, |
|
"eval_samples_per_second": 273.768, |
|
"eval_steps_per_second": 34.377, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.5915816426277161, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.1315, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_acc_macro": 0.8067338258553937, |
|
"eval_acc_micro": 0.8225075528700285, |
|
"eval_auc_macro": 0.9890198593241079, |
|
"eval_auc_micro": 0.9912727714675422, |
|
"eval_f1_at_5": 0.42745954575018524, |
|
"eval_f1_at_8": 0.2925113305987028, |
|
"eval_f1_macro": 0.8889792204408117, |
|
"eval_f1_micro": 0.9026108578532198, |
|
"eval_loss": 0.11689846962690353, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.17194634703196346, |
|
"eval_prec_macro": 0.8957261916294993, |
|
"eval_prec_micro": 0.9022369511184007, |
|
"eval_rec_at_5": 0.9748858447488584, |
|
"eval_rec_at_8": 0.9788812785388128, |
|
"eval_rec_macro": 0.8884178007276748, |
|
"eval_rec_micro": 0.9029850746267908, |
|
"eval_runtime": 3.2003, |
|
"eval_samples_per_second": 273.728, |
|
"eval_steps_per_second": 34.372, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.492089182138443, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.1215, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_acc_macro": 0.8126972127948302, |
|
"eval_acc_micro": 0.8267363704256291, |
|
"eval_auc_macro": 0.9893756572609064, |
|
"eval_auc_micro": 0.9916252607583189, |
|
"eval_f1_at_5": 0.42745954575018524, |
|
"eval_f1_at_8": 0.2925113305987028, |
|
"eval_f1_macro": 0.8933532206974574, |
|
"eval_f1_micro": 0.9051512673752325, |
|
"eval_loss": 0.10864967107772827, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.17194634703196346, |
|
"eval_prec_macro": 0.8891547780046246, |
|
"eval_prec_micro": 0.892741935483799, |
|
"eval_rec_at_5": 0.9748858447488584, |
|
"eval_rec_at_8": 0.9788812785388128, |
|
"eval_rec_macro": 0.9014409945020224, |
|
"eval_rec_micro": 0.9179104477611179, |
|
"eval_runtime": 3.3389, |
|
"eval_samples_per_second": 262.36, |
|
"eval_steps_per_second": 32.945, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.3663789927959442, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 0.1129, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_acc_macro": 0.8228971770857018, |
|
"eval_acc_micro": 0.8329588014980649, |
|
"eval_auc_macro": 0.9899641824346502, |
|
"eval_auc_micro": 0.9921304716477787, |
|
"eval_f1_at_5": 0.4281991387591175, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9004325486077462, |
|
"eval_f1_micro": 0.9088680016345804, |
|
"eval_loss": 0.10234559327363968, |
|
"eval_prec_at_5": 0.2742009132420091, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.890683762800314, |
|
"eval_prec_micro": 0.8960515713133846, |
|
"eval_rec_at_5": 0.9767884322678843, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.9120840636750266, |
|
"eval_rec_micro": 0.9220563847428754, |
|
"eval_runtime": 3.2169, |
|
"eval_samples_per_second": 272.316, |
|
"eval_steps_per_second": 34.195, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.37758299708366394, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.1046, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_acc_macro": 0.8302977681001992, |
|
"eval_acc_micro": 0.8413059984813333, |
|
"eval_auc_macro": 0.9901671140838944, |
|
"eval_auc_micro": 0.9919789083809407, |
|
"eval_f1_at_5": 0.42784762619628447, |
|
"eval_f1_at_8": 0.2925113305987028, |
|
"eval_f1_macro": 0.9052032341883497, |
|
"eval_f1_micro": 0.9138144329896153, |
|
"eval_loss": 0.09653711318969727, |
|
"eval_prec_at_5": 0.273972602739726, |
|
"eval_prec_at_8": 0.17194634703196346, |
|
"eval_prec_macro": 0.9028285099698952, |
|
"eval_prec_micro": 0.9089417555372511, |
|
"eval_rec_at_5": 0.976027397260274, |
|
"eval_rec_at_8": 0.9788812785388128, |
|
"eval_rec_macro": 0.9099411755814296, |
|
"eval_rec_micro": 0.9187396351574694, |
|
"eval_runtime": 3.2472, |
|
"eval_samples_per_second": 269.767, |
|
"eval_steps_per_second": 33.875, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.4621961712837219, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0979, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_acc_macro": 0.8273587016423724, |
|
"eval_acc_micro": 0.8396299151888327, |
|
"eval_auc_macro": 0.989802544326232, |
|
"eval_auc_micro": 0.9918006143792109, |
|
"eval_f1_at_5": 0.42751438858977425, |
|
"eval_f1_at_8": 0.2920473967500829, |
|
"eval_f1_macro": 0.9028525753108699, |
|
"eval_f1_micro": 0.9128248113997558, |
|
"eval_loss": 0.09215801954269409, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.1716609589041096, |
|
"eval_prec_macro": 0.9158518703241821, |
|
"eval_prec_micro": 0.9228813559321252, |
|
"eval_rec_at_5": 0.9754566210045662, |
|
"eval_rec_at_8": 0.9777397260273972, |
|
"eval_rec_macro": 0.8926981936861532, |
|
"eval_rec_micro": 0.9029850746267908, |
|
"eval_runtime": 3.233, |
|
"eval_samples_per_second": 270.955, |
|
"eval_steps_per_second": 34.024, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.4058220386505127, |
|
"learning_rate": 6.3888888888888885e-06, |
|
"loss": 0.092, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_acc_macro": 0.8347539263761662, |
|
"eval_acc_micro": 0.8474708171205566, |
|
"eval_auc_macro": 0.9899894858470043, |
|
"eval_auc_micro": 0.9920498339308542, |
|
"eval_f1_at_5": 0.4275326584009282, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9071161229466114, |
|
"eval_f1_micro": 0.9174389216511443, |
|
"eval_loss": 0.08766299486160278, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.9264703324172373, |
|
"eval_prec_micro": 0.9323630136985502, |
|
"eval_rec_at_5": 0.9756468797564688, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.8904830078050127, |
|
"eval_rec_micro": 0.9029850746267908, |
|
"eval_runtime": 3.2006, |
|
"eval_samples_per_second": 273.696, |
|
"eval_steps_per_second": 34.368, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.4305408000946045, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.0863, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_acc_macro": 0.8361202794579444, |
|
"eval_acc_micro": 0.8485316846985433, |
|
"eval_auc_macro": 0.9904755972939142, |
|
"eval_auc_micro": 0.9926707889023967, |
|
"eval_f1_at_5": 0.4278476261962846, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9082261738031537, |
|
"eval_f1_micro": 0.9180602006688194, |
|
"eval_loss": 0.08403860032558441, |
|
"eval_prec_at_5": 0.27397260273972607, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.9194984843968411, |
|
"eval_prec_micro": 0.9258010118043064, |
|
"eval_rec_at_5": 0.976027397260274, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.8989752524534329, |
|
"eval_rec_micro": 0.9104477611939543, |
|
"eval_runtime": 3.2293, |
|
"eval_samples_per_second": 271.27, |
|
"eval_steps_per_second": 34.064, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.3943960964679718, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.0814, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_acc_macro": 0.839106311750719, |
|
"eval_acc_micro": 0.8515684774291621, |
|
"eval_auc_macro": 0.990108145855512, |
|
"eval_auc_micro": 0.992045690666946, |
|
"eval_f1_at_5": 0.42751438858977425, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9099447887675026, |
|
"eval_f1_micro": 0.9198347107437256, |
|
"eval_loss": 0.08095283061265945, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.9089344385619288, |
|
"eval_prec_micro": 0.9168039538714237, |
|
"eval_rec_at_5": 0.9754566210045662, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.912673576698828, |
|
"eval_rec_micro": 0.922885572139227, |
|
"eval_runtime": 3.2592, |
|
"eval_samples_per_second": 268.78, |
|
"eval_steps_per_second": 33.751, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.40355971455574036, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.077, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_acc_macro": 0.841293209326155, |
|
"eval_acc_micro": 0.8523335883702484, |
|
"eval_auc_macro": 0.9901090394728352, |
|
"eval_auc_micro": 0.9921565786655233, |
|
"eval_f1_at_5": 0.42712632039330145, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9118247332403943, |
|
"eval_f1_micro": 0.9202808756711341, |
|
"eval_loss": 0.07820397615432739, |
|
"eval_prec_at_5": 0.27351598173515984, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.9128408681170823, |
|
"eval_prec_micro": 0.9168724279834636, |
|
"eval_rec_at_5": 0.9743150684931506, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.913025214091804, |
|
"eval_rec_micro": 0.9237147595355785, |
|
"eval_runtime": 3.335, |
|
"eval_samples_per_second": 262.673, |
|
"eval_steps_per_second": 32.984, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 0.4673054814338684, |
|
"learning_rate": 5.2777777777777785e-06, |
|
"loss": 0.0732, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_acc_macro": 0.8435745126219377, |
|
"eval_acc_micro": 0.8546017014693847, |
|
"eval_auc_macro": 0.990212670611912, |
|
"eval_auc_micro": 0.9923085874446093, |
|
"eval_f1_at_5": 0.4278476261962846, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9131099703856408, |
|
"eval_f1_micro": 0.9216013344452942, |
|
"eval_loss": 0.07635616511106491, |
|
"eval_prec_at_5": 0.27397260273972607, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.9220960220647866, |
|
"eval_prec_micro": 0.9270134228187141, |
|
"eval_rec_at_5": 0.976027397260274, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.9063401498441861, |
|
"eval_rec_micro": 0.9162520729684148, |
|
"eval_runtime": 3.335, |
|
"eval_samples_per_second": 262.666, |
|
"eval_steps_per_second": 32.983, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 0.4005274772644043, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0693, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_acc_macro": 0.8455313120389342, |
|
"eval_acc_micro": 0.8565950920244741, |
|
"eval_auc_macro": 0.9901732890859408, |
|
"eval_auc_micro": 0.9921753792823973, |
|
"eval_f1_at_5": 0.42745954575018524, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9143994666596676, |
|
"eval_f1_micro": 0.9227591904171067, |
|
"eval_loss": 0.07342522591352463, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.9139300975502155, |
|
"eval_prec_micro": 0.9193415637859326, |
|
"eval_rec_at_5": 0.9748858447488584, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.9161172406072337, |
|
"eval_rec_micro": 0.9262023217246329, |
|
"eval_runtime": 3.2711, |
|
"eval_samples_per_second": 267.802, |
|
"eval_steps_per_second": 33.628, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 0.45327135920524597, |
|
"learning_rate": 4.722222222222222e-06, |
|
"loss": 0.0661, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_acc_macro": 0.8467210056386675, |
|
"eval_acc_micro": 0.8575827559660617, |
|
"eval_auc_macro": 0.990201788357871, |
|
"eval_auc_micro": 0.9922312465183218, |
|
"eval_f1_at_5": 0.42745954575018524, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9148197613459009, |
|
"eval_f1_micro": 0.9233319519269851, |
|
"eval_loss": 0.07164816558361053, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.9172647910903643, |
|
"eval_prec_micro": 0.9229494614746543, |
|
"eval_rec_at_5": 0.9748858447488584, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.9139228248292297, |
|
"eval_rec_micro": 0.9237147595355785, |
|
"eval_runtime": 3.2794, |
|
"eval_samples_per_second": 267.12, |
|
"eval_steps_per_second": 33.542, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 0.44867074489593506, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.0626, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_acc_macro": 0.8443607544183838, |
|
"eval_acc_micro": 0.8557098765431438, |
|
"eval_auc_macro": 0.9902370599717555, |
|
"eval_auc_micro": 0.992274817616196, |
|
"eval_f1_at_5": 0.42712632039330145, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9135513991863947, |
|
"eval_f1_micro": 0.9222453222452455, |
|
"eval_loss": 0.07073331624269485, |
|
"eval_prec_at_5": 0.27351598173515984, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.9185147069172412, |
|
"eval_prec_micro": 0.9249374478731505, |
|
"eval_rec_at_5": 0.9743150684931506, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.9094868029221347, |
|
"eval_rec_micro": 0.9195688225538209, |
|
"eval_runtime": 3.2386, |
|
"eval_samples_per_second": 270.487, |
|
"eval_steps_per_second": 33.965, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 0.509304940700531, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.06, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_acc_macro": 0.8405381328154341, |
|
"eval_acc_micro": 0.8539238539237876, |
|
"eval_auc_macro": 0.9899142306185617, |
|
"eval_auc_micro": 0.9919983327149624, |
|
"eval_f1_at_5": 0.42751438858977425, |
|
"eval_f1_at_8": 0.2920473967500829, |
|
"eval_f1_macro": 0.91134605843459, |
|
"eval_f1_micro": 0.9212070410728481, |
|
"eval_loss": 0.06922697275876999, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.1716609589041096, |
|
"eval_prec_macro": 0.9243865602122843, |
|
"eval_prec_micro": 0.9313559322033109, |
|
"eval_rec_at_5": 0.9754566210045662, |
|
"eval_rec_at_8": 0.9777397260273972, |
|
"eval_rec_macro": 0.8997359146591782, |
|
"eval_rec_micro": 0.9112769485903058, |
|
"eval_runtime": 3.3027, |
|
"eval_samples_per_second": 265.24, |
|
"eval_steps_per_second": 33.306, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 0.5278392434120178, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.0575, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_acc_macro": 0.8460307636339017, |
|
"eval_acc_micro": 0.8563664596272627, |
|
"eval_auc_macro": 0.9901957256905628, |
|
"eval_auc_micro": 0.9919254469111567, |
|
"eval_f1_at_5": 0.42718114191994605, |
|
"eval_f1_at_8": 0.2925113305987028, |
|
"eval_f1_macro": 0.9146326110180735, |
|
"eval_f1_micro": 0.9226265161019722, |
|
"eval_loss": 0.06812591105699539, |
|
"eval_prec_at_5": 0.27351598173515984, |
|
"eval_prec_at_8": 0.17194634703196346, |
|
"eval_prec_macro": 0.9259544901379119, |
|
"eval_prec_micro": 0.9308016877636345, |
|
"eval_rec_at_5": 0.9748858447488584, |
|
"eval_rec_at_8": 0.9788812785388128, |
|
"eval_rec_macro": 0.9052694584097175, |
|
"eval_rec_micro": 0.9145936981757119, |
|
"eval_runtime": 3.238, |
|
"eval_samples_per_second": 270.536, |
|
"eval_steps_per_second": 33.971, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"grad_norm": 0.46283113956451416, |
|
"learning_rate": 3.6111111111111115e-06, |
|
"loss": 0.0547, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_acc_macro": 0.8437978381816325, |
|
"eval_acc_micro": 0.8569194683345694, |
|
"eval_auc_macro": 0.9900672235373739, |
|
"eval_auc_micro": 0.9922836387587103, |
|
"eval_f1_at_5": 0.42751438858977425, |
|
"eval_f1_at_8": 0.2922793659426448, |
|
"eval_f1_macro": 0.9133228707359765, |
|
"eval_f1_micro": 0.9229473684209749, |
|
"eval_loss": 0.06721309572458267, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.17180365296803654, |
|
"eval_prec_macro": 0.9302491335296357, |
|
"eval_prec_micro": 0.9375534644994921, |
|
"eval_rec_at_5": 0.9754566210045662, |
|
"eval_rec_at_8": 0.978310502283105, |
|
"eval_rec_macro": 0.8981640840687014, |
|
"eval_rec_micro": 0.9087893864012513, |
|
"eval_runtime": 3.2966, |
|
"eval_samples_per_second": 265.728, |
|
"eval_steps_per_second": 33.368, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 0.5683040022850037, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0523, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_acc_macro": 0.847085539786285, |
|
"eval_acc_micro": 0.8558139534883057, |
|
"eval_auc_macro": 0.9896638187547525, |
|
"eval_auc_micro": 0.99160271783856, |
|
"eval_f1_at_5": 0.42751438858977425, |
|
"eval_f1_at_8": 0.2915834447216144, |
|
"eval_f1_macro": 0.9152884545582967, |
|
"eval_f1_micro": 0.9223057644109505, |
|
"eval_loss": 0.06634338945150375, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.1713755707762557, |
|
"eval_prec_macro": 0.9276311524633641, |
|
"eval_prec_micro": 0.929292929292851, |
|
"eval_rec_at_5": 0.9754566210045662, |
|
"eval_rec_at_8": 0.9765981735159818, |
|
"eval_rec_macro": 0.9050852833749707, |
|
"eval_rec_micro": 0.9154228855720634, |
|
"eval_runtime": 3.3379, |
|
"eval_samples_per_second": 262.442, |
|
"eval_steps_per_second": 32.955, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"grad_norm": 0.5064497590065002, |
|
"learning_rate": 3.055555555555556e-06, |
|
"loss": 0.0507, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_acc_macro": 0.8424595332709467, |
|
"eval_acc_micro": 0.8554687499999332, |
|
"eval_auc_macro": 0.9903800574812361, |
|
"eval_auc_micro": 0.9924352911279979, |
|
"eval_f1_at_5": 0.42751438858977425, |
|
"eval_f1_at_8": 0.2918154230125642, |
|
"eval_f1_macro": 0.9125139257396052, |
|
"eval_f1_micro": 0.9221052631578169, |
|
"eval_loss": 0.06477358192205429, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.17151826484018265, |
|
"eval_prec_macro": 0.9271042210984995, |
|
"eval_prec_micro": 0.9366980325063355, |
|
"eval_rec_at_5": 0.9754566210045662, |
|
"eval_rec_at_8": 0.9771689497716894, |
|
"eval_rec_macro": 0.8991646180232234, |
|
"eval_rec_micro": 0.9079601990048998, |
|
"eval_runtime": 3.3033, |
|
"eval_samples_per_second": 265.185, |
|
"eval_steps_per_second": 33.3, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 0.5206765532493591, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.0484, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_acc_macro": 0.8482879664337308, |
|
"eval_acc_micro": 0.8564920273347869, |
|
"eval_auc_macro": 0.9902796769975294, |
|
"eval_auc_micro": 0.9923823642729116, |
|
"eval_f1_at_5": 0.42712632039330145, |
|
"eval_f1_at_8": 0.2925113305987028, |
|
"eval_f1_macro": 0.9155214146126874, |
|
"eval_f1_micro": 0.9226993865029919, |
|
"eval_loss": 0.06509387493133545, |
|
"eval_prec_at_5": 0.27351598173515984, |
|
"eval_prec_at_8": 0.17194634703196346, |
|
"eval_prec_macro": 0.9025052330017584, |
|
"eval_prec_micro": 0.9104116222759555, |
|
"eval_rec_at_5": 0.9743150684931506, |
|
"eval_rec_at_8": 0.9788812785388128, |
|
"eval_rec_macro": 0.9295157618718927, |
|
"eval_rec_micro": 0.9353233830844995, |
|
"eval_runtime": 3.2722, |
|
"eval_samples_per_second": 267.709, |
|
"eval_steps_per_second": 33.616, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"grad_norm": 0.5268795490264893, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0456, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_acc_macro": 0.847691548444017, |
|
"eval_acc_micro": 0.8558421851289183, |
|
"eval_auc_macro": 0.9898163244235566, |
|
"eval_auc_micro": 0.9917076805242361, |
|
"eval_f1_at_5": 0.42751438858977425, |
|
"eval_f1_at_8": 0.2918154230125642, |
|
"eval_f1_macro": 0.9153671638721068, |
|
"eval_f1_micro": 0.9223221586262532, |
|
"eval_loss": 0.0645858645439148, |
|
"eval_prec_at_5": 0.27374429223744295, |
|
"eval_prec_at_8": 0.17151826484018265, |
|
"eval_prec_macro": 0.9015493618181559, |
|
"eval_prec_micro": 0.9096774193547653, |
|
"eval_rec_at_5": 0.9754566210045662, |
|
"eval_rec_at_8": 0.9771689497716894, |
|
"eval_rec_macro": 0.9300094432719982, |
|
"eval_rec_micro": 0.9353233830844995, |
|
"eval_runtime": 3.3402, |
|
"eval_samples_per_second": 262.263, |
|
"eval_steps_per_second": 32.933, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 0.4488939046859741, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.0446, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_acc_macro": 0.8474584233282965, |
|
"eval_acc_micro": 0.8549848942597541, |
|
"eval_auc_macro": 0.9896580840322796, |
|
"eval_auc_micro": 0.9914708462130924, |
|
"eval_f1_at_5": 0.42712632039330145, |
|
"eval_f1_at_8": 0.29135146186873845, |
|
"eval_f1_macro": 0.9153561345178298, |
|
"eval_f1_micro": 0.9218241042344526, |
|
"eval_loss": 0.06430496275424957, |
|
"eval_prec_at_5": 0.27351598173515984, |
|
"eval_prec_at_8": 0.17123287671232876, |
|
"eval_prec_macro": 0.8975121306620646, |
|
"eval_prec_micro": 0.9055999999999276, |
|
"eval_rec_at_5": 0.9743150684931506, |
|
"eval_rec_at_8": 0.976027397260274, |
|
"eval_rec_macro": 0.9345814562185115, |
|
"eval_rec_micro": 0.9386401326699055, |
|
"eval_runtime": 3.2443, |
|
"eval_samples_per_second": 270.009, |
|
"eval_steps_per_second": 33.905, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"grad_norm": 0.5452151298522949, |
|
"learning_rate": 1.944444444444445e-06, |
|
"loss": 0.0425, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_acc_macro": 0.8512273558913187, |
|
"eval_acc_micro": 0.8594224924011504, |
|
"eval_auc_macro": 0.9896851270268344, |
|
"eval_auc_micro": 0.9917295997268475, |
|
"eval_f1_at_5": 0.42712632039330145, |
|
"eval_f1_at_8": 0.2918154230125642, |
|
"eval_f1_macro": 0.9175407198197038, |
|
"eval_f1_micro": 0.9243972210869698, |
|
"eval_loss": 0.06364509463310242, |
|
"eval_prec_at_5": 0.27351598173515984, |
|
"eval_prec_at_8": 0.17151826484018265, |
|
"eval_prec_macro": 0.9039169093915104, |
|
"eval_prec_micro": 0.9113618049958975, |
|
"eval_rec_at_5": 0.9743150684931506, |
|
"eval_rec_at_8": 0.9771689497716894, |
|
"eval_rec_macro": 0.9318495447404759, |
|
"eval_rec_micro": 0.937810945273554, |
|
"eval_runtime": 3.2885, |
|
"eval_samples_per_second": 266.382, |
|
"eval_steps_per_second": 33.45, |
|
"step": 792 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 960, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"total_flos": 1.332040372236288e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|