{ "best_metric": null, "best_model_checkpoint": null, "epoch": 43.0, "eval_steps": 500, "global_step": 3483, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_B-Claim": { "f1-score": 0.0, "precision": 0.0, "recall": 0.0, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.0, "precision": 0.0, "recall": 0.0, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.6746310611384398, "precision": 0.6075949367088608, "recall": 0.7582938388625592, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.4376506024096386, "precision": 0.550587343690792, "recall": 0.3631592101974506, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.620034542314335, "precision": 0.5483008781977854, "recall": 0.7133631395926477, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8620734063103671, "precision": 0.8396186972154862, "recall": 0.8857621736062103, "support": 11336.0 }, "eval_O": { "f1-score": 0.8807729058286443, "precision": 0.8630878069080317, "recall": 0.8991979189247779, "support": 9226.0 }, "eval_accuracy": 0.785908251565951, "eval_loss": 0.5655931830406189, "eval_macro avg": { "f1-score": 0.49645178828591785, "precision": 0.48702709467442235, "recall": 0.5171108973119495, "support": 27619.0 }, "eval_runtime": 4.8252, "eval_samples_per_second": 16.58, "eval_steps_per_second": 2.072, "eval_weighted avg": { "f1-score": 0.7721020318885458, "precision": 0.766573839857488, "recall": 0.785908251565951, "support": 27619.0 }, "step": 81 }, { "epoch": 2.0, "eval_B-Claim": { "f1-score": 0.36036036036036034, "precision": 0.4624277456647399, "recall": 0.2952029520295203, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.5836909871244635, "precision": 0.723404255319149, "recall": 0.4892086330935252, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7396491228070177, "precision": 0.6654040404040404, "recall": 0.8325434439178515, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.46704216775764956, "precision": 0.5973520249221184, "recall": 0.3834041489627593, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7326456649010316, "precision": 0.8348157560355781, "recall": 0.6527570789865872, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8679505932065659, "precision": 0.8045344983428744, "recall": 0.9422194777699365, "support": 11336.0 }, "eval_O": { "f1-score": 0.9101698606271778, "precision": 0.9143513454386348, "recall": 0.9060264469976155, "support": 9226.0 }, "eval_accuracy": 0.8169376154096818, "eval_loss": 0.4831336438655853, "eval_macro avg": { "f1-score": 0.6659298223977524, "precision": 0.7146128094467336, "recall": 0.6430517402511137, "support": 27619.0 }, "eval_runtime": 4.8125, "eval_samples_per_second": 16.623, "eval_steps_per_second": 2.078, "eval_weighted avg": { "f1-score": 0.8047632099274016, "precision": 0.8064582361050344, "recall": 0.8169376154096818, "support": 27619.0 }, "step": 162 }, { "epoch": 3.0, "eval_B-Claim": { "f1-score": 0.5806451612903225, "precision": 0.59765625, "recall": 0.5645756457564576, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7138047138047138, "precision": 0.6708860759493671, "recall": 0.762589928057554, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7739984882842026, "precision": 0.7420289855072464, "recall": 0.8088467614533965, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6128912532475566, "precision": 0.6068103870651641, "recall": 0.6190952261934516, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8072942336126171, "precision": 0.8009779951100244, "recall": 0.8137108792846498, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8797559224694903, "precision": 0.8951789627465303, "recall": 0.8648553281580804, "support": 11336.0 }, "eval_O": { "f1-score": 0.9226235130211126, "precision": 0.912356930902925, "recall": 0.933123780619987, "support": 9226.0 }, "eval_accuracy": 0.8435859372171332, "eval_loss": 0.4283505082130432, "eval_macro avg": { "f1-score": 0.7558590408185736, "precision": 0.7465565124687511, "recall": 0.7666853642176539, "support": 27619.0 }, "eval_runtime": 4.8554, "eval_samples_per_second": 16.476, "eval_steps_per_second": 2.06, "eval_weighted avg": { "f1-score": 0.8439412578936614, "precision": 0.8447189682878564, "recall": 0.8435859372171332, "support": 27619.0 }, "step": 243 }, { "epoch": 4.0, "eval_B-Claim": { "f1-score": 0.6033333333333333, "precision": 0.5501519756838906, "recall": 0.6678966789667896, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.6847457627118644, "precision": 0.6474358974358975, "recall": 0.7266187050359713, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7818471337579618, "precision": 0.7881219903691814, "recall": 0.7756714060031595, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6216657593903103, "precision": 0.550733024691358, "recall": 0.7135716070982254, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7652218782249742, "precision": 0.7960279119699409, "recall": 0.7367113760556383, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8739165400768475, "precision": 0.8853883758826725, "recall": 0.8627381792519407, "support": 11336.0 }, "eval_O": { "f1-score": 0.9062570845613239, "precision": 0.9497505345687812, "recall": 0.8665727292434424, "support": 9226.0 }, "eval_accuracy": 0.8286324631594192, "eval_loss": 0.4826153814792633, "eval_macro avg": { "f1-score": 0.7481410702938023, "precision": 0.7382299586573888, "recall": 0.7642543830935953, "support": 27619.0 }, "eval_runtime": 4.8516, "eval_samples_per_second": 16.49, "eval_steps_per_second": 2.061, "eval_weighted avg": { "f1-score": 0.8345383371838407, "precision": 0.8451795530099128, "recall": 0.8286324631594192, "support": 27619.0 }, "step": 324 }, { "epoch": 5.0, "eval_B-Claim": { "f1-score": 0.5825242718446603, "precision": 0.6147540983606558, "recall": 0.5535055350553506, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7317073170731707, "precision": 0.7094594594594594, "recall": 0.7553956834532374, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7840735068912711, "precision": 0.7607726597325408, "recall": 0.8088467614533965, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5830055074744296, "precision": 0.6132413793103448, "recall": 0.5556110972256936, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.803728638011393, "precision": 0.8393726338561385, "recall": 0.7709885742672627, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8823829787234042, "precision": 0.8523512002630713, "recall": 0.9146083274523642, "support": 11336.0 }, "eval_O": { "f1-score": 0.9254767941792524, "precision": 0.9415657245401525, "recall": 0.9099284630392369, "support": 9226.0 }, "eval_accuracy": 0.8438031789709982, "eval_loss": 0.513404905796051, "eval_macro avg": { "f1-score": 0.7561284305996544, "precision": 0.7616453079317661, "recall": 0.7526977774209346, "support": 27619.0 }, "eval_runtime": 4.8184, "eval_samples_per_second": 16.603, "eval_steps_per_second": 2.075, "eval_weighted avg": { "f1-score": 0.8417228378374912, "precision": 0.8414191958613282, "recall": 0.8438031789709982, "support": 27619.0 }, "step": 405 }, { "epoch": 6.0, "eval_B-Claim": { "f1-score": 0.5851851851851851, "precision": 0.587360594795539, "recall": 0.5830258302583026, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7460815047021944, "precision": 0.6611111111111111, "recall": 0.8561151079136691, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7778631821675633, "precision": 0.7574850299401198, "recall": 0.7993680884676145, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5999495204442201, "precision": 0.6059138414478715, "recall": 0.5941014746313422, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.800587227795449, "precision": 0.7888138862102217, "recall": 0.8127173373075013, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8798354629791703, "precision": 0.8729593608891977, "recall": 0.8868207480592801, "support": 11336.0 }, "eval_O": { "f1-score": 0.9269283557507548, "precision": 0.9391478473690066, "recall": 0.9150227617602428, "support": 9226.0 }, "eval_accuracy": 0.843296281545313, "eval_loss": 0.5017187595367432, "eval_macro avg": { "f1-score": 0.7594900627177908, "precision": 0.744684524537581, "recall": 0.7781673354854218, "support": 27619.0 }, "eval_runtime": 4.8298, "eval_samples_per_second": 16.564, "eval_steps_per_second": 2.07, "eval_weighted avg": { "f1-score": 0.8433438519855446, "precision": 0.8437360576786502, "recall": 0.843296281545313, "support": 27619.0 }, "step": 486 }, { "epoch": 6.17, "grad_norm": 7.375185489654541, "learning_rate": 1.7530864197530865e-05, "loss": 0.4085, "step": 500 }, { "epoch": 7.0, "eval_B-Claim": { "f1-score": 0.5725338491295938, "precision": 0.6016260162601627, "recall": 0.5461254612546126, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.745644599303136, "precision": 0.722972972972973, "recall": 0.7697841726618705, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7915087187263078, "precision": 0.760932944606414, "recall": 0.8246445497630331, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5842240562846706, "precision": 0.6368731563421829, "recall": 0.5396150962259435, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7921810699588476, "precision": 0.8213333333333334, "recall": 0.7650273224043715, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8817056396148556, "precision": 0.8598256203890007, "recall": 0.9047282992237121, "support": 11336.0 }, "eval_O": { "f1-score": 0.9274176179194485, "precision": 0.9214637277979885, "recall": 0.9334489486234554, "support": 9226.0 }, "eval_accuracy": 0.8452152503711213, "eval_loss": 0.6029371023178101, "eval_macro avg": { "f1-score": 0.7564593644195513, "precision": 0.7607182531002935, "recall": 0.7547676928795714, "support": 27619.0 }, "eval_runtime": 4.8043, "eval_samples_per_second": 16.652, "eval_steps_per_second": 2.081, "eval_weighted avg": { "f1-score": 0.8415705604110203, "precision": 0.8398235103191511, "recall": 0.8452152503711213, "support": 27619.0 }, "step": 567 }, { "epoch": 8.0, "eval_B-Claim": { "f1-score": 0.5641891891891891, "precision": 0.5202492211838006, "recall": 0.6162361623616236, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7318840579710144, "precision": 0.7372262773722628, "recall": 0.7266187050359713, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7673716012084594, "precision": 0.7351664254703328, "recall": 0.8025276461295419, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5930426495115559, "precision": 0.5665832005463237, "recall": 0.6220944763809048, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7816030123722432, "precision": 0.852199413489736, "recall": 0.7218082463984103, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8746564286025915, "precision": 0.8652567975830816, "recall": 0.8842625264643613, "support": 11336.0 }, "eval_O": { "f1-score": 0.924221395658691, "precision": 0.9473085239558439, "recall": 0.9022328202904835, "support": 9226.0 }, "eval_accuracy": 0.835149715775372, "eval_loss": 0.6411188840866089, "eval_macro avg": { "f1-score": 0.7481383335019636, "precision": 0.7462842656573401, "recall": 0.7536829404373281, "support": 27619.0 }, "eval_runtime": 4.8296, "eval_samples_per_second": 16.564, "eval_steps_per_second": 2.071, "eval_weighted avg": { "f1-score": 0.8374117728187233, "precision": 0.8414359188593861, "recall": 0.835149715775372, "support": 27619.0 }, "step": 648 }, { "epoch": 9.0, "eval_B-Claim": { "f1-score": 0.6241610738255032, "precision": 0.5723076923076923, "recall": 0.6863468634686347, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.756578947368421, "precision": 0.696969696969697, "recall": 0.8273381294964028, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.780448717948718, "precision": 0.791869918699187, "recall": 0.7693522906793049, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.640754369825207, "precision": 0.5933972310969116, "recall": 0.69632591852037, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.797244094488189, "precision": 0.7898586055582643, "recall": 0.8047690014903129, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8789423814465549, "precision": 0.8996028447400019, "recall": 0.8592095977417078, "support": 11336.0 }, "eval_O": { "f1-score": 0.9287169042769858, "precision": 0.9435186220780674, "recall": 0.9143724257533059, "support": 9226.0 }, "eval_accuracy": 0.8461566313045368, "eval_loss": 0.6282346844673157, "eval_macro avg": { "f1-score": 0.7724066413113684, "precision": 0.7553606587785459, "recall": 0.7939591753071484, "support": 27619.0 }, "eval_runtime": 4.8069, "eval_samples_per_second": 16.643, "eval_steps_per_second": 2.08, "eval_weighted avg": { "f1-score": 0.8497367665000708, "precision": 0.85521547416283, "recall": 0.8461566313045368, "support": 27619.0 }, "step": 729 }, { "epoch": 10.0, "eval_B-Claim": { "f1-score": 0.5714285714285714, "precision": 0.5602836879432624, "recall": 0.5830258302583026, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7407407407407407, "precision": 0.6962025316455697, "recall": 0.7913669064748201, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.789156626506024, "precision": 0.753956834532374, "recall": 0.8278041074249605, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6017810109118275, "precision": 0.6039778449144008, "recall": 0.5996000999750063, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7822841502861408, "precision": 0.7836490528414756, "recall": 0.7809239940387481, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8826781594183972, "precision": 0.871336484744306, "recall": 0.894318983768525, "support": 11336.0 }, "eval_O": { "f1-score": 0.9204840581311821, "precision": 0.9389020403562169, "recall": 0.9027747669629308, "support": 9226.0 }, "eval_accuracy": 0.8410876570476845, "eval_loss": 0.7145851254463196, "eval_macro avg": { "f1-score": 0.7555076167746976, "precision": 0.744044068139658, "recall": 0.7685449555576133, "support": 27619.0 }, "eval_runtime": 4.8288, "eval_samples_per_second": 16.567, "eval_steps_per_second": 2.071, "eval_weighted avg": { "f1-score": 0.841386205332122, "precision": 0.842161020451525, "recall": 0.8410876570476845, "support": 27619.0 }, "step": 810 }, { "epoch": 11.0, "eval_B-Claim": { "f1-score": 0.6018348623853211, "precision": 0.5985401459854015, "recall": 0.6051660516605166, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7272727272727274, "precision": 0.768, "recall": 0.6906474820143885, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7833733013589128, "precision": 0.7928802588996764, "recall": 0.7740916271721959, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6083602886290123, "precision": 0.6056477582363141, "recall": 0.6110972256935766, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7648026315789475, "precision": 0.8532110091743119, "recall": 0.6929955290611028, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.882720702345586, "precision": 0.8874030489435678, "recall": 0.8780875088214538, "support": 11336.0 }, "eval_O": { "f1-score": 0.921590369079677, "precision": 0.8984865643982292, "recall": 0.9459137220897463, "support": 9226.0 }, "eval_accuracy": 0.8425721423657627, "eval_loss": 0.7725272178649902, "eval_macro avg": { "f1-score": 0.7557078403785977, "precision": 0.7720241122339289, "recall": 0.7425713066447115, "support": 27619.0 }, "eval_runtime": 4.7985, "eval_samples_per_second": 16.672, "eval_steps_per_second": 2.084, "eval_weighted avg": { "f1-score": 0.8415502116531838, "precision": 0.8421955187218679, "recall": 0.8425721423657627, "support": 27619.0 }, "step": 891 }, { "epoch": 12.0, "eval_B-Claim": { "f1-score": 0.5907473309608542, "precision": 0.570446735395189, "recall": 0.6125461254612546, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7328767123287672, "precision": 0.6993464052287581, "recall": 0.7697841726618705, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7870370370370371, "precision": 0.7692307692307693, "recall": 0.8056872037914692, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6039273805113003, "precision": 0.596923828125, "recall": 0.6110972256935766, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7876195269250126, "precision": 0.7980622131565528, "recall": 0.7774465971187282, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8808608098180718, "precision": 0.8633629817873782, "recall": 0.8990825688073395, "support": 11336.0 }, "eval_O": { "f1-score": 0.9129559185500112, "precision": 0.9433526011560693, "recall": 0.8844569694342077, "support": 9226.0 }, "eval_accuracy": 0.8380100655345958, "eval_loss": 0.8826749920845032, "eval_macro avg": { "f1-score": 0.7565749594472935, "precision": 0.7486750762971024, "recall": 0.765728694709778, "support": 27619.0 }, "eval_runtime": 4.8258, "eval_samples_per_second": 16.577, "eval_steps_per_second": 2.072, "eval_weighted avg": { "f1-score": 0.8389267473809466, "precision": 0.8408692792556519, "recall": 0.8380100655345958, "support": 27619.0 }, "step": 972 }, { "epoch": 12.35, "grad_norm": 6.464661598205566, "learning_rate": 1.506172839506173e-05, "loss": 0.0768, "step": 1000 }, { "epoch": 13.0, "eval_B-Claim": { "f1-score": 0.6059544658493871, "precision": 0.5766666666666667, "recall": 0.6383763837638377, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.75, "precision": 0.6909090909090909, "recall": 0.8201438848920863, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7835703001579779, "precision": 0.7835703001579779, "recall": 0.7835703001579779, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6174981923355025, "precision": 0.5962299278566442, "recall": 0.6403399150212447, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7900677200902934, "precision": 0.7978723404255319, "recall": 0.7824143070044709, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8801810597319606, "precision": 0.8856045722450437, "recall": 0.8748235709244884, "support": 11336.0 }, "eval_O": { "f1-score": 0.9129007550060182, "precision": 0.9216747680070703, "recall": 0.9042922176457836, "support": 9226.0 }, "eval_accuracy": 0.8392773090988088, "eval_loss": 0.8458374738693237, "eval_macro avg": { "f1-score": 0.7628817847387342, "precision": 0.7503610951811466, "recall": 0.7777086542014128, "support": 27619.0 }, "eval_runtime": 4.8414, "eval_samples_per_second": 16.524, "eval_steps_per_second": 2.065, "eval_weighted avg": { "f1-score": 0.8409296175505384, "precision": 0.8429896387826172, "recall": 0.8392773090988088, "support": 27619.0 }, "step": 1053 }, { "epoch": 14.0, "eval_B-Claim": { "f1-score": 0.5909090909090908, "precision": 0.5614617940199336, "recall": 0.6236162361623616, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7424749163879598, "precision": 0.69375, "recall": 0.7985611510791367, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7811764705882354, "precision": 0.7757009345794392, "recall": 0.7867298578199052, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6013087736306351, "precision": 0.5836273817925194, "recall": 0.620094976255936, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7706057596822246, "precision": 0.7702233250620347, "recall": 0.7709885742672627, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8808171531721921, "precision": 0.8792300254900237, "recall": 0.882410021171489, "support": 11336.0 }, "eval_O": { "f1-score": 0.9170672412840489, "precision": 0.9353093654908148, "recall": 0.8995230869282462, "support": 9226.0 }, "eval_accuracy": 0.8368514428473153, "eval_loss": 0.9143030047416687, "eval_macro avg": { "f1-score": 0.7549084865220552, "precision": 0.7427575466335379, "recall": 0.7688462719549054, "support": 27619.0 }, "eval_runtime": 4.8212, "eval_samples_per_second": 16.594, "eval_steps_per_second": 2.074, "eval_weighted avg": { "f1-score": 0.838578387953211, "precision": 0.8407717357848979, "recall": 0.8368514428473153, "support": 27619.0 }, "step": 1134 }, { "epoch": 15.0, "eval_B-Claim": { "f1-score": 0.5709090909090909, "precision": 0.5627240143369175, "recall": 0.5793357933579336, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7446808510638296, "precision": 0.7342657342657343, "recall": 0.7553956834532374, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7841269841269841, "precision": 0.7878787878787878, "recall": 0.7804107424960506, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5929925907321362, "precision": 0.5959111559818273, "recall": 0.5901024743814046, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7486910994764399, "precision": 0.7913669064748201, "recall": 0.7103825136612022, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.882824477638092, "precision": 0.8767945706081963, "recall": 0.8889378969654199, "support": 11336.0 }, "eval_O": { "f1-score": 0.9183122909247868, "precision": 0.9142672969488612, "recall": 0.9223932365055278, "support": 9226.0 }, "eval_accuracy": 0.8376117889858431, "eval_loss": 0.9049480557441711, "eval_macro avg": { "f1-score": 0.74893391212448, "precision": 0.7518869237850208, "recall": 0.746708334402968, "support": 27619.0 }, "eval_runtime": 4.8265, "eval_samples_per_second": 16.575, "eval_steps_per_second": 2.072, "eval_weighted avg": { "f1-score": 0.8368987072750449, "precision": 0.8365590307236803, "recall": 0.8376117889858431, "support": 27619.0 }, "step": 1215 }, { "epoch": 16.0, "eval_B-Claim": { "f1-score": 0.5880077369439072, "precision": 0.6178861788617886, "recall": 0.5608856088560885, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7475409836065575, "precision": 0.6867469879518072, "recall": 0.8201438848920863, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7921259842519685, "precision": 0.7896389324960753, "recall": 0.7946287519747235, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5849176980987624, "precision": 0.5974973931178311, "recall": 0.5728567858035492, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7757869249394673, "precision": 0.7567312234293812, "recall": 0.7958271236959762, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8798468592797044, "precision": 0.8881100026961445, "recall": 0.8717360621030346, "support": 11336.0 }, "eval_O": { "f1-score": 0.92327420388972, "precision": 0.9104320337197049, "recall": 0.9364838499891611, "support": 9226.0 }, "eval_accuracy": 0.8394583438936963, "eval_loss": 0.9981638789176941, "eval_macro avg": { "f1-score": 0.7559286272871554, "precision": 0.7495775360389619, "recall": 0.7646517239020884, "support": 27619.0 }, "eval_runtime": 4.8136, "eval_samples_per_second": 16.62, "eval_steps_per_second": 2.077, "eval_weighted avg": { "f1-score": 0.8385046755256232, "precision": 0.8379700346267476, "recall": 0.8394583438936963, "support": 27619.0 }, "step": 1296 }, { "epoch": 17.0, "eval_B-Claim": { "f1-score": 0.5753424657534246, "precision": 0.6125, "recall": 0.5424354243542435, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7160493827160496, "precision": 0.6270270270270271, "recall": 0.8345323741007195, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7974882260596545, "precision": 0.7925117004680188, "recall": 0.8025276461295419, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5781887408213218, "precision": 0.6340590515955861, "recall": 0.5313671582104474, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7680890538033396, "precision": 0.7203131796433232, "recall": 0.8226527570789866, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8883316761056591, "precision": 0.8710470538363714, "recall": 0.9063161609033169, "support": 11336.0 }, "eval_O": { "f1-score": 0.9186122627100153, "precision": 0.9246650560070283, "recall": 0.9126381964014741, "support": 9226.0 }, "eval_accuracy": 0.8417031753503024, "eval_loss": 1.0078351497650146, "eval_macro avg": { "f1-score": 0.7488716868527806, "precision": 0.7403032955110508, "recall": 0.7646385310255328, "support": 27619.0 }, "eval_runtime": 4.8211, "eval_samples_per_second": 16.594, "eval_steps_per_second": 2.074, "eval_weighted avg": { "f1-score": 0.838734195768893, "precision": 0.8380757474281803, "recall": 0.8417031753503024, "support": 27619.0 }, "step": 1377 }, { "epoch": 18.0, "eval_B-Claim": { "f1-score": 0.5923076923076923, "precision": 0.6184738955823293, "recall": 0.5682656826568265, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.745644599303136, "precision": 0.722972972972973, "recall": 0.7697841726618705, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7912087912087912, "precision": 0.7862714508580343, "recall": 0.7962085308056872, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6042421107087429, "precision": 0.6261056017153578, "recall": 0.5838540364908773, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7803540264273249, "precision": 0.7832832832832833, "recall": 0.7774465971187282, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8878702702702702, "precision": 0.8708117736873356, "recall": 0.9056104446012703, "support": 11336.0 }, "eval_O": { "f1-score": 0.9169446115151184, "precision": 0.925190334326382, "recall": 0.9088445696943421, "support": 9226.0 }, "eval_accuracy": 0.8442376624787284, "eval_loss": 0.9712997674942017, "eval_macro avg": { "f1-score": 0.7597960145344393, "precision": 0.7618727589179565, "recall": 0.7585734334328001, "support": 27619.0 }, "eval_runtime": 4.8169, "eval_samples_per_second": 16.608, "eval_steps_per_second": 2.076, "eval_weighted avg": { "f1-score": 0.8428273600970289, "precision": 0.8419904974860213, "recall": 0.8442376624787284, "support": 27619.0 }, "step": 1458 }, { "epoch": 18.52, "grad_norm": 7.254913806915283, "learning_rate": 1.2592592592592593e-05, "loss": 0.0261, "step": 1500 }, { "epoch": 19.0, "eval_B-Claim": { "f1-score": 0.588235294117647, "precision": 0.5689655172413793, "recall": 0.6088560885608856, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7224080267558529, "precision": 0.675, "recall": 0.7769784172661871, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7804107424960506, "precision": 0.7804107424960506, "recall": 0.7804107424960506, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6187416656564432, "precision": 0.60075329566855, "recall": 0.6378405398650338, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7764764515325194, "precision": 0.779, "recall": 0.7739692001987084, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8802663706992231, "precision": 0.886048797926535, "recall": 0.8745589273112209, "support": 11336.0 }, "eval_O": { "f1-score": 0.9251841746248295, "precision": 0.931640839652709, "recall": 0.9188163884673748, "support": 9226.0 }, "eval_accuracy": 0.8424635214888302, "eval_loss": 0.9876435995101929, "eval_macro avg": { "f1-score": 0.7559603894117952, "precision": 0.7459741704264606, "recall": 0.7673471863093516, "support": 27619.0 }, "eval_runtime": 4.8824, "eval_samples_per_second": 16.385, "eval_steps_per_second": 2.048, "eval_weighted avg": { "f1-score": 0.8438722529775301, "precision": 0.8455528133195762, "recall": 0.8424635214888302, "support": 27619.0 }, "step": 1539 }, { "epoch": 20.0, "eval_B-Claim": { "f1-score": 0.6126760563380281, "precision": 0.5858585858585859, "recall": 0.6420664206642066, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7375415282392027, "precision": 0.6851851851851852, "recall": 0.7985611510791367, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7827476038338658, "precision": 0.7915993537964459, "recall": 0.7740916271721959, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6214689265536723, "precision": 0.6243693239152371, "recall": 0.6185953511622094, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7863746958637469, "precision": 0.7706247019551741, "recall": 0.8027819175360159, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8849448123620308, "precision": 0.8858051971009369, "recall": 0.8840860973888497, "support": 11336.0 }, "eval_O": { "f1-score": 0.9158329708568943, "precision": 0.9188304603971198, "recall": 0.9128549750704531, "support": 9226.0 }, "eval_accuracy": 0.8439842137658858, "eval_loss": 1.0289872884750366, "eval_macro avg": { "f1-score": 0.7630837991496344, "precision": 0.7517532583155264, "recall": 0.7761482200104383, "support": 27619.0 }, "eval_runtime": 4.857, "eval_samples_per_second": 16.471, "eval_steps_per_second": 2.059, "eval_weighted avg": { "f1-score": 0.8441548041890129, "precision": 0.844457711736205, "recall": 0.8439842137658858, "support": 27619.0 }, "step": 1620 }, { "epoch": 21.0, "eval_B-Claim": { "f1-score": 0.6, "precision": 0.5913978494623656, "recall": 0.6088560885608856, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.754325259515571, "precision": 0.7266666666666667, "recall": 0.7841726618705036, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7965435978004713, "precision": 0.7921875, "recall": 0.8009478672985783, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6180417495029821, "precision": 0.614529280948851, "recall": 0.6215946013496626, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7913561847988076, "precision": 0.7913561847988078, "recall": 0.7913561847988078, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8852830847073192, "precision": 0.8755823986194996, "recall": 0.8952011291460833, "support": 11336.0 }, "eval_O": { "f1-score": 0.9224318658280921, "precision": 0.9393258426966292, "recall": 0.9061348363321049, "support": 9226.0 }, "eval_accuracy": 0.8461204243455592, "eval_loss": 1.0996745824813843, "eval_macro avg": { "f1-score": 0.7668545345933204, "precision": 0.7615779604561171, "recall": 0.7726090527652323, "support": 27619.0 }, "eval_runtime": 4.8412, "eval_samples_per_second": 16.525, "eval_steps_per_second": 2.066, "eval_weighted avg": { "f1-score": 0.8466408555926697, "precision": 0.8474703689459093, "recall": 0.8461204243455592, "support": 27619.0 }, "step": 1701 }, { "epoch": 22.0, "eval_B-Claim": { "f1-score": 0.5984251968503936, "precision": 0.6413502109704642, "recall": 0.5608856088560885, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7266666666666667, "precision": 0.6770186335403726, "recall": 0.7841726618705036, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7909162098668755, "precision": 0.7841614906832298, "recall": 0.7977883096366508, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6076327583865272, "precision": 0.665377751338489, "recall": 0.5591102224443889, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7673170731707317, "precision": 0.753713464302827, "recall": 0.7814207650273224, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8918528587205845, "precision": 0.8600688186137965, "recall": 0.9260762173606211, "support": 11336.0 }, "eval_O": { "f1-score": 0.9196605686577033, "precision": 0.9353284017036539, "recall": 0.9045089963147627, "support": 9226.0 }, "eval_accuracy": 0.847930772294435, "eval_loss": 1.1058261394500732, "eval_macro avg": { "f1-score": 0.757495904617069, "precision": 0.7595741101646905, "recall": 0.7591375402157626, "support": 27619.0 }, "eval_runtime": 4.8079, "eval_samples_per_second": 16.639, "eval_steps_per_second": 2.08, "eval_weighted avg": { "f1-score": 0.8448680329971596, "precision": 0.8444464777692308, "recall": 0.847930772294435, "support": 27619.0 }, "step": 1782 }, { "epoch": 23.0, "eval_B-Claim": { "f1-score": 0.6037099494097807, "precision": 0.5559006211180124, "recall": 0.6605166051660517, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7397260273972602, "precision": 0.7058823529411765, "recall": 0.7769784172661871, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7720706260032103, "precision": 0.7846655791190864, "recall": 0.7598736176935229, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6147824084734056, "precision": 0.5699039487726788, "recall": 0.6673331667083229, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7702471250305848, "precision": 0.7589199614271939, "recall": 0.7819175360158966, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8730755460078767, "precision": 0.8860828488372093, "recall": 0.8604446012702893, "support": 11336.0 }, "eval_O": { "f1-score": 0.9182879377431906, "precision": 0.9424920127795527, "recall": 0.8952959028831563, "support": 9226.0 }, "eval_accuracy": 0.8337014374162715, "eval_loss": 1.1438162326812744, "eval_macro avg": { "f1-score": 0.75598566000933, "precision": 0.7434067607135587, "recall": 0.771765692429061, "support": 27619.0 }, "eval_runtime": 4.8472, "eval_samples_per_second": 16.504, "eval_steps_per_second": 2.063, "eval_weighted avg": { "f1-score": 0.8376374525692796, "precision": 0.8433838333277589, "recall": 0.8337014374162715, "support": 27619.0 }, "step": 1863 }, { "epoch": 24.0, "eval_B-Claim": { "f1-score": 0.6189624329159212, "precision": 0.6006944444444444, "recall": 0.6383763837638377, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7397260273972602, "precision": 0.7058823529411765, "recall": 0.7769784172661871, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7880690737833596, "precision": 0.7831513260530422, "recall": 0.7930489731437599, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6222925285957654, "precision": 0.6063552288356652, "recall": 0.6390902274431393, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7763733592610598, "precision": 0.7601142313184198, "recall": 0.7933432687531048, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8828359526752604, "precision": 0.8836161187698833, "recall": 0.8820571630204658, "support": 11336.0 }, "eval_O": { "f1-score": 0.9172044790115285, "precision": 0.9338425249915758, "recall": 0.9011489269455886, "support": 9226.0 }, "eval_accuracy": 0.8418117962272349, "eval_loss": 1.1221617460250854, "eval_macro avg": { "f1-score": 0.7636376933771649, "precision": 0.753379461050601, "recall": 0.7748633371908691, "support": 27619.0 }, "eval_runtime": 4.8078, "eval_samples_per_second": 16.64, "eval_steps_per_second": 2.08, "eval_weighted avg": { "f1-score": 0.8433323915719101, "precision": 0.8452544012114124, "recall": 0.8418117962272349, "support": 27619.0 }, "step": 1944 }, { "epoch": 24.69, "grad_norm": 0.008052258752286434, "learning_rate": 1.0123456790123458e-05, "loss": 0.0118, "step": 2000 }, { "epoch": 25.0, "eval_B-Claim": { "f1-score": 0.6019417475728155, "precision": 0.6352459016393442, "recall": 0.5719557195571956, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7574750830564784, "precision": 0.7037037037037037, "recall": 0.8201438848920863, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7897196261682242, "precision": 0.7788018433179723, "recall": 0.8009478672985783, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6033311125916057, "precision": 0.6461187214611872, "recall": 0.5658585353661585, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7857317371121426, "precision": 0.7730769230769231, "recall": 0.7988077496274217, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8852726492056677, "precision": 0.8623891584406893, "recall": 0.9094036697247706, "support": 11336.0 }, "eval_O": { "f1-score": 0.9221917808219178, "precision": 0.9325132978723404, "recall": 0.9120962497290267, "support": 9226.0 }, "eval_accuracy": 0.8462290452224918, "eval_loss": 1.1378806829452515, "eval_macro avg": { "f1-score": 0.7636662480755503, "precision": 0.7616927927874515, "recall": 0.7684590965993198, "support": 27619.0 }, "eval_runtime": 4.8628, "eval_samples_per_second": 16.452, "eval_steps_per_second": 2.056, "eval_weighted avg": { "f1-score": 0.8438939014101086, "precision": 0.8430313935317453, "recall": 0.8462290452224918, "support": 27619.0 }, "step": 2025 }, { "epoch": 26.0, "eval_B-Claim": { "f1-score": 0.6040515653775324, "precision": 0.6029411764705882, "recall": 0.6051660516605166, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7389830508474576, "precision": 0.6987179487179487, "recall": 0.7841726618705036, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7824037706205812, "precision": 0.778125, "recall": 0.7867298578199052, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6289062499999999, "precision": 0.614650441422095, "recall": 0.64383904023994, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7797139141742523, "precision": 0.8182314410480349, "recall": 0.7446597118728266, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8828411672199895, "precision": 0.8744375216337833, "recall": 0.891407904022583, "support": 11336.0 }, "eval_O": { "f1-score": 0.9202110121991427, "precision": 0.9332367365135978, "recall": 0.9075438976804683, "support": 9226.0 }, "eval_accuracy": 0.844491111191571, "eval_loss": 1.116464376449585, "eval_macro avg": { "f1-score": 0.762444390062708, "precision": 0.760048609400864, "recall": 0.7662170178809634, "support": 27619.0 }, "eval_runtime": 4.8569, "eval_samples_per_second": 16.471, "eval_steps_per_second": 2.059, "eval_weighted avg": { "f1-score": 0.8452605268375321, "precision": 0.8465930758524871, "recall": 0.844491111191571, "support": 27619.0 }, "step": 2106 }, { "epoch": 27.0, "eval_B-Claim": { "f1-score": 0.5996409335727109, "precision": 0.583916083916084, "recall": 0.6162361623616236, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7414965986394558, "precision": 0.7032258064516129, "recall": 0.7841726618705036, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7847113884555382, "precision": 0.7750385208012327, "recall": 0.7946287519747235, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6112161310649024, "precision": 0.6164209456024403, "recall": 0.6060984753811547, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.785261102977062, "precision": 0.7717026378896883, "recall": 0.799304520615996, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8810789780832576, "precision": 0.8638636941595321, "recall": 0.8989943542695836, "support": 11336.0 }, "eval_O": { "f1-score": 0.9194492446624672, "precision": 0.9465166991851257, "recall": 0.893886841534793, "support": 9226.0 }, "eval_accuracy": 0.8418480031862123, "eval_loss": 1.133851170539856, "eval_macro avg": { "f1-score": 0.7604077682079133, "precision": 0.7515263411436737, "recall": 0.7704745382869111, "support": 27619.0 }, "eval_runtime": 4.8334, "eval_samples_per_second": 16.551, "eval_steps_per_second": 2.069, "eval_weighted avg": { "f1-score": 0.8421466887502327, "precision": 0.8433197634457057, "recall": 0.8418480031862123, "support": 27619.0 }, "step": 2187 }, { "epoch": 28.0, "eval_B-Claim": { "f1-score": 0.608, "precision": 0.6637554585152838, "recall": 0.5608856088560885, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7241379310344827, "precision": 0.695364238410596, "recall": 0.7553956834532374, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7953307392996108, "precision": 0.7837423312883436, "recall": 0.8072669826224329, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6028865979381443, "precision": 0.6698228466707391, "recall": 0.5481129717570608, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.774845869297164, "precision": 0.7693437806072478, "recall": 0.7804272230501739, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8894579598804951, "precision": 0.8615842566561932, "recall": 0.9191954834156669, "support": 11336.0 }, "eval_O": { "f1-score": 0.923653752105635, "precision": 0.9261196469434456, "recall": 0.9212009538261435, "support": 9226.0 }, "eval_accuracy": 0.8490893949817155, "eval_loss": 1.1788508892059326, "eval_macro avg": { "f1-score": 0.7597589785079332, "precision": 0.7671046512988356, "recall": 0.756069272425829, "support": 27619.0 }, "eval_runtime": 4.8209, "eval_samples_per_second": 16.595, "eval_steps_per_second": 2.074, "eval_weighted avg": { "f1-score": 0.8452625545373849, "precision": 0.8440780437434046, "recall": 0.8490893949817155, "support": 27619.0 }, "step": 2268 }, { "epoch": 29.0, "eval_B-Claim": { "f1-score": 0.6103646833013436, "precision": 0.636, "recall": 0.5867158671586716, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7733333333333333, "precision": 0.7204968944099379, "recall": 0.8345323741007195, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7955449482895783, "precision": 0.8012820512820513, "recall": 0.7898894154818326, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6074445617740233, "precision": 0.6436363636363637, "recall": 0.575106223444139, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8131604226705091, "precision": 0.7870757787075778, "recall": 0.8410332836562344, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8914829226113272, "precision": 0.8741733084619298, "recall": 0.9094918842625265, "support": 11336.0 }, "eval_O": { "f1-score": 0.9266265718972116, "precision": 0.9349073256840247, "recall": 0.9184912204639063, "support": 9226.0 }, "eval_accuracy": 0.852782504797422, "eval_loss": 1.1232454776763916, "eval_macro avg": { "f1-score": 0.7739939205539039, "precision": 0.7710816745974122, "recall": 0.7793228955097186, "support": 27619.0 }, "eval_runtime": 4.7958, "eval_samples_per_second": 16.681, "eval_steps_per_second": 2.085, "eval_weighted avg": { "f1-score": 0.850815268485991, "precision": 0.8499356548650282, "recall": 0.852782504797422, "support": 27619.0 }, "step": 2349 }, { "epoch": 30.0, "eval_B-Claim": { "f1-score": 0.5788423153692615, "precision": 0.6304347826086957, "recall": 0.5350553505535055, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7422680412371134, "precision": 0.7105263157894737, "recall": 0.7769784172661871, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7920184190330007, "precision": 0.7701492537313432, "recall": 0.8151658767772512, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5994579945799458, "precision": 0.6546315477952057, "recall": 0.5528617845538615, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7890781563126252, "precision": 0.7958564931783729, "recall": 0.7824143070044709, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8883685286103541, "precision": 0.8585418038183015, "recall": 0.9203422724064926, "support": 11336.0 }, "eval_O": { "f1-score": 0.9204178745282503, "precision": 0.9290051893562989, "recall": 0.9119878603945372, "support": 9226.0 }, "eval_accuracy": 0.8473514609507947, "eval_loss": 1.144108533859253, "eval_macro avg": { "f1-score": 0.7586359042386501, "precision": 0.7641636266110988, "recall": 0.7564008384223293, "support": 27619.0 }, "eval_runtime": 4.786, "eval_samples_per_second": 16.715, "eval_steps_per_second": 2.089, "eval_weighted avg": { "f1-score": 0.8440043412194941, "precision": 0.8429627129938011, "recall": 0.8473514609507947, "support": 27619.0 }, "step": 2430 }, { "epoch": 30.86, "grad_norm": 0.6330540180206299, "learning_rate": 7.654320987654322e-06, "loss": 0.0077, "step": 2500 }, { "epoch": 31.0, "eval_B-Claim": { "f1-score": 0.5895953757225434, "precision": 0.6169354838709677, "recall": 0.5645756457564576, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7357142857142858, "precision": 0.7304964539007093, "recall": 0.7410071942446043, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7916666666666666, "precision": 0.7737556561085973, "recall": 0.8104265402843602, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6163372859025034, "precision": 0.6517135692393424, "recall": 0.5846038490377405, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7895956734483647, "precision": 0.8197860962566845, "recall": 0.7615499254843517, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8899611426619412, "precision": 0.8624513779690475, "recall": 0.9192836979534227, "support": 11336.0 }, "eval_O": { "f1-score": 0.9214837543148321, "precision": 0.9317451523545707, "recall": 0.9114459137220897, "support": 9226.0 }, "eval_accuracy": 0.8498135341612658, "eval_loss": 1.1577571630477905, "eval_macro avg": { "f1-score": 0.7620505977758768, "precision": 0.7695548270999886, "recall": 0.7561275380690038, "support": 27619.0 }, "eval_runtime": 4.8274, "eval_samples_per_second": 16.572, "eval_steps_per_second": 2.071, "eval_weighted avg": { "f1-score": 0.8475614553244858, "precision": 0.8468547323259233, "recall": 0.8498135341612658, "support": 27619.0 }, "step": 2511 }, { "epoch": 32.0, "eval_B-Claim": { "f1-score": 0.620817843866171, "precision": 0.6254681647940075, "recall": 0.6162361623616236, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7516778523489932, "precision": 0.7044025157232704, "recall": 0.8057553956834532, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8068535825545171, "precision": 0.7956989247311828, "recall": 0.8183254344391785, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6207257958862832, "precision": 0.6522577092511013, "recall": 0.5921019745063734, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7961023142509135, "precision": 0.7810707456978967, "recall": 0.8117237953303527, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8936483212184148, "precision": 0.876953125, "recall": 0.9109915314043754, "support": 11336.0 }, "eval_O": { "f1-score": 0.9254433982920954, "precision": 0.9348595443485954, "recall": 0.9162150444396271, "support": 9226.0 }, "eval_accuracy": 0.853760092689815, "eval_loss": 1.1496995687484741, "eval_macro avg": { "f1-score": 0.7736098726310556, "precision": 0.7672443899351507, "recall": 0.7816213340235691, "support": 27619.0 }, "eval_runtime": 4.8513, "eval_samples_per_second": 16.49, "eval_steps_per_second": 2.061, "eval_weighted avg": { "f1-score": 0.8522422681785793, "precision": 0.8515595976922847, "recall": 0.853760092689815, "support": 27619.0 }, "step": 2592 }, { "epoch": 33.0, "eval_B-Claim": { "f1-score": 0.5996055226824456, "precision": 0.6440677966101694, "recall": 0.5608856088560885, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7645051194539249, "precision": 0.7272727272727273, "recall": 0.8057553956834532, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8009223674096849, "precision": 0.7799401197604791, "recall": 0.8230647709320695, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6167238195726722, "precision": 0.6528902541189612, "recall": 0.5843539115221195, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8000990834778301, "precision": 0.7979249011857708, "recall": 0.8022851465474417, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8918248674533948, "precision": 0.8653335545967474, "recall": 0.9199894142554693, "support": 11336.0 }, "eval_O": { "f1-score": 0.9253171538885825, "precision": 0.9420485175202157, "recall": 0.9091697376978105, "support": 9226.0 }, "eval_accuracy": 0.8528549187153771, "eval_loss": 1.1728317737579346, "eval_macro avg": { "f1-score": 0.7712854191340764, "precision": 0.7727825530092959, "recall": 0.7722148550706361, "support": 27619.0 }, "eval_runtime": 4.8324, "eval_samples_per_second": 16.555, "eval_steps_per_second": 2.069, "eval_weighted avg": { "f1-score": 0.8508837171222798, "precision": 0.8504483474473299, "recall": 0.8528549187153771, "support": 27619.0 }, "step": 2673 }, { "epoch": 34.0, "eval_B-Claim": { "f1-score": 0.6007751937984497, "precision": 0.6326530612244898, "recall": 0.5719557195571956, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7605633802816901, "precision": 0.7448275862068966, "recall": 0.7769784172661871, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.8042813455657492, "precision": 0.7792592592592592, "recall": 0.8309636650868878, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6172130077053677, "precision": 0.6463347921225383, "recall": 0.5906023494126469, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7980720446473871, "precision": 0.8154484188698807, "recall": 0.7814207650273224, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8901654805796108, "precision": 0.8658882402001669, "recall": 0.9158433309809456, "support": 11336.0 }, "eval_O": { "f1-score": 0.9227135402361988, "precision": 0.9354048335003898, "recall": 0.9103620203771949, "support": 9226.0 }, "eval_accuracy": 0.8510807777254789, "eval_loss": 1.1639914512634277, "eval_macro avg": { "f1-score": 0.7705405704020647, "precision": 0.774259455911946, "recall": 0.7683037525297687, "support": 27619.0 }, "eval_runtime": 4.8372, "eval_samples_per_second": 16.538, "eval_steps_per_second": 2.067, "eval_weighted avg": { "f1-score": 0.8493246590714963, "precision": 0.8487450081623789, "recall": 0.8510807777254789, "support": 27619.0 }, "step": 2754 }, { "epoch": 35.0, "eval_B-Claim": { "f1-score": 0.5849056603773586, "precision": 0.5984555984555985, "recall": 0.5719557195571956, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7526132404181185, "precision": 0.7297297297297297, "recall": 0.7769784172661871, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.787086856264412, "precision": 0.7664670658682635, "recall": 0.8088467614533965, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6162639646661471, "precision": 0.6416012983500136, "recall": 0.5928517870532367, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7944501541623843, "precision": 0.8227780734433209, "recall": 0.7680079483358172, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8870680139254479, "precision": 0.8550499263381895, "recall": 0.9215772759350741, "support": 11336.0 }, "eval_O": { "f1-score": 0.9231197151757901, "precision": 0.9482285714285714, "recall": 0.8993063082592673, "support": 9226.0 }, "eval_accuracy": 0.8485824975560303, "eval_loss": 1.1459705829620361, "eval_macro avg": { "f1-score": 0.7636439435699512, "precision": 0.7660443233733839, "recall": 0.7627891739800249, "support": 27619.0 }, "eval_runtime": 4.847, "eval_samples_per_second": 16.505, "eval_steps_per_second": 2.063, "eval_weighted avg": { "f1-score": 0.847197741968377, "precision": 0.8477240078865287, "recall": 0.8485824975560303, "support": 27619.0 }, "step": 2835 }, { "epoch": 36.0, "eval_B-Claim": { "f1-score": 0.5928705440900564, "precision": 0.6030534351145038, "recall": 0.5830258302583026, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7446808510638296, "precision": 0.7342657342657343, "recall": 0.7553956834532374, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7941403238242097, "precision": 0.7756024096385542, "recall": 0.8135860979462876, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6141773476327491, "precision": 0.6441700960219479, "recall": 0.5868532866783304, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.797969543147208, "precision": 0.8157758173326414, "recall": 0.7809239940387481, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8887460909051964, "precision": 0.8639127175814109, "recall": 0.9150494001411432, "support": 11336.0 }, "eval_O": { "f1-score": 0.9241083695114579, "precision": 0.9372422249470516, "recall": 0.9113375243876003, "support": 9226.0 }, "eval_accuracy": 0.850103189833086, "eval_loss": 1.204019546508789, "eval_macro avg": { "f1-score": 0.7652418671678154, "precision": 0.7677174907002635, "recall": 0.7637388309862356, "support": 27619.0 }, "eval_runtime": 4.8338, "eval_samples_per_second": 16.55, "eval_steps_per_second": 2.069, "eval_weighted avg": { "f1-score": 0.8483708730430131, "precision": 0.8478308176117514, "recall": 0.850103189833086, "support": 27619.0 }, "step": 2916 }, { "epoch": 37.0, "eval_B-Claim": { "f1-score": 0.6037037037037036, "precision": 0.6059479553903345, "recall": 0.6014760147601476, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7508771929824561, "precision": 0.7328767123287672, "recall": 0.7697841726618705, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.797805642633229, "precision": 0.7916018662519441, "recall": 0.8041074249605056, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6031746031746031, "precision": 0.6182104434531619, "recall": 0.5888527868032992, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.783661119515885, "precision": 0.7956989247311828, "recall": 0.7719821162444114, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8861057776226217, "precision": 0.8767702936096718, "recall": 0.8956422018348624, "support": 11336.0 }, "eval_O": { "f1-score": 0.9233855663395326, "precision": 0.923836389280677, "recall": 0.9229351831779753, "support": 9226.0 }, "eval_accuracy": 0.845685940837829, "eval_loss": 1.218312382698059, "eval_macro avg": { "f1-score": 0.7641019437102903, "precision": 0.7635632264351057, "recall": 0.7649685572061531, "support": 27619.0 }, "eval_runtime": 4.8054, "eval_samples_per_second": 16.648, "eval_steps_per_second": 2.081, "eval_weighted avg": { "f1-score": 0.844630448473179, "precision": 0.8437941357711972, "recall": 0.845685940837829, "support": 27619.0 }, "step": 2997 }, { "epoch": 37.04, "grad_norm": 0.3002346456050873, "learning_rate": 5.185185185185185e-06, "loss": 0.0047, "step": 3000 }, { "epoch": 38.0, "eval_B-Claim": { "f1-score": 0.5791505791505792, "precision": 0.6072874493927125, "recall": 0.5535055350553506, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7457627118644068, "precision": 0.7051282051282052, "recall": 0.7913669064748201, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7886435331230285, "precision": 0.7874015748031497, "recall": 0.7898894154818326, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5943221013531441, "precision": 0.6333050607859768, "recall": 0.5598600349912521, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7950372208436725, "precision": 0.7942488844819038, "recall": 0.7958271236959762, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8875953951623334, "precision": 0.8680947963228473, "recall": 0.9079922371206774, "support": 11336.0 }, "eval_O": { "f1-score": 0.9227005870841487, "precision": 0.9255179934569248, "recall": 0.9199002818122697, "support": 9226.0 }, "eval_accuracy": 0.8465911148122669, "eval_loss": 1.2214044332504272, "eval_macro avg": { "f1-score": 0.7590303040830447, "precision": 0.7601405663388171, "recall": 0.7597630763760256, "support": 27619.0 }, "eval_runtime": 4.8542, "eval_samples_per_second": 16.48, "eval_steps_per_second": 2.06, "eval_weighted avg": { "f1-score": 0.8440831257263712, "precision": 0.842653285331286, "recall": 0.8465911148122669, "support": 27619.0 }, "step": 3078 }, { "epoch": 39.0, "eval_B-Claim": { "f1-score": 0.5985401459854014, "precision": 0.592057761732852, "recall": 0.6051660516605166, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.75, "precision": 0.7248322147651006, "recall": 0.7769784172661871, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7913446676970634, "precision": 0.7745839636913767, "recall": 0.8088467614533965, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6315523556902868, "precision": 0.6384065372829418, "recall": 0.6248437890527369, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8023373983739839, "precision": 0.8211128445137805, "recall": 0.784401390958768, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8895184135977336, "precision": 0.8662431031600066, "recall": 0.9140790402258292, "support": 11336.0 }, "eval_O": { "f1-score": 0.9247647157097512, "precision": 0.9509792692704158, "recall": 0.8999566442662043, "support": 9226.0 }, "eval_accuracy": 0.8518773308229842, "eval_loss": 1.2323095798492432, "eval_macro avg": { "f1-score": 0.7697225281506029, "precision": 0.7668879563452106, "recall": 0.7734674421262341, "support": 27619.0 }, "eval_runtime": 4.8312, "eval_samples_per_second": 16.559, "eval_steps_per_second": 2.07, "eval_weighted avg": { "f1-score": 0.8517608065440465, "precision": 0.8527514481945246, "recall": 0.8518773308229842, "support": 27619.0 }, "step": 3159 }, { "epoch": 40.0, "eval_B-Claim": { "f1-score": 0.6181818181818182, "precision": 0.6093189964157706, "recall": 0.6273062730627307, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7671232876712328, "precision": 0.7320261437908496, "recall": 0.8057553956834532, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7863924050632911, "precision": 0.7876386687797148, "recall": 0.7851500789889415, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6347544253303415, "precision": 0.6331758269087292, "recall": 0.6363409147713072, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7987093571605857, "precision": 0.7981150793650794, "recall": 0.799304520615996, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8882548739895388, "precision": 0.8708993811986098, "recall": 0.9063161609033169, "support": 11336.0 }, "eval_O": { "f1-score": 0.9246712725651884, "precision": 0.9513872964916303, "recall": 0.8994146975937568, "support": 9226.0 }, "eval_accuracy": 0.8510807777254789, "eval_loss": 1.2150152921676636, "eval_macro avg": { "f1-score": 0.7740124914231423, "precision": 0.7689373418500549, "recall": 0.7799411488027861, "support": 27619.0 }, "eval_runtime": 4.8694, "eval_samples_per_second": 16.429, "eval_steps_per_second": 2.054, "eval_weighted avg": { "f1-score": 0.8515758212122608, "precision": 0.8528697302681528, "recall": 0.8510807777254789, "support": 27619.0 }, "step": 3240 }, { "epoch": 41.0, "eval_B-Claim": { "f1-score": 0.6189624329159212, "precision": 0.6006944444444444, "recall": 0.6383763837638377, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.763888888888889, "precision": 0.738255033557047, "recall": 0.7913669064748201, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7888446215139443, "precision": 0.7958199356913184, "recall": 0.7819905213270142, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6221727845754542, "precision": 0.6154034229828851, "recall": 0.6290927268182954, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8000988386459105, "precision": 0.795968534906588, "recall": 0.8042722305017387, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8858267716535434, "precision": 0.8786011801457827, "recall": 0.8931721947776994, "support": 11336.0 }, "eval_O": { "f1-score": 0.9220421215128459, "precision": 0.9382854578096947, "recall": 0.9063516150010839, "support": 9226.0 }, "eval_accuracy": 0.8472790470328397, "eval_loss": 1.2192243337631226, "eval_macro avg": { "f1-score": 0.7716909228152156, "precision": 0.7661468585053944, "recall": 0.7778032255234985, "support": 27619.0 }, "eval_runtime": 4.8538, "eval_samples_per_second": 16.482, "eval_steps_per_second": 2.06, "eval_weighted avg": { "f1-score": 0.848027198309743, "precision": 0.8490574615810783, "recall": 0.8472790470328397, "support": 27619.0 }, "step": 3321 }, { "epoch": 42.0, "eval_B-Claim": { "f1-score": 0.6057347670250897, "precision": 0.5888501742160279, "recall": 0.6236162361623616, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7464788732394366, "precision": 0.7310344827586207, "recall": 0.762589928057554, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7974683544303797, "precision": 0.7987321711568938, "recall": 0.7962085308056872, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6247534516765286, "precision": 0.6163950377037217, "recall": 0.633341664583854, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8016297428062135, "precision": 0.8223615464994776, "recall": 0.7819175360158966, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8880753138075314, "precision": 0.877670572019297, "recall": 0.8987297106563161, "support": 11336.0 }, "eval_O": { "f1-score": 0.9210424816794314, "precision": 0.9366804886249019, "recall": 0.9059180576631259, "support": 9226.0 }, "eval_accuracy": 0.8484376697201202, "eval_loss": 1.1968408823013306, "eval_macro avg": { "f1-score": 0.7693118549520872, "precision": 0.767389210425563, "recall": 0.7717602377063992, "support": 27619.0 }, "eval_runtime": 4.8135, "eval_samples_per_second": 16.62, "eval_steps_per_second": 2.077, "eval_weighted avg": { "f1-score": 0.849081830599291, "precision": 0.8501208510836178, "recall": 0.8484376697201202, "support": 27619.0 }, "step": 3402 }, { "epoch": 43.0, "eval_B-Claim": { "f1-score": 0.6112115732368897, "precision": 0.599290780141844, "recall": 0.6236162361623616, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7491166077738515, "precision": 0.7361111111111112, "recall": 0.762589928057554, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7968503937007874, "precision": 0.7943485086342229, "recall": 0.7993680884676145, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6379624014554275, "precision": 0.6196983977379831, "recall": 0.6573356660834792, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8047155304971809, "precision": 0.8311275807305453, "recall": 0.7799304520615996, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8881976693244547, "precision": 0.8822454308093994, "recall": 0.8942307692307693, "support": 11336.0 }, "eval_O": { "f1-score": 0.9249408007048846, "precision": 0.9401097055860294, "recall": 0.9102536310427054, "support": 9226.0 }, "eval_accuracy": 0.851442847315254, "eval_loss": 1.2053319215774536, "eval_macro avg": { "f1-score": 0.7732849966704965, "precision": 0.7718473592501622, "recall": 0.775332110158012, "support": 27619.0 }, "eval_runtime": 4.8135, "eval_samples_per_second": 16.62, "eval_steps_per_second": 2.077, "eval_weighted avg": { "f1-score": 0.8526255252514442, "precision": 0.854289051326285, "recall": 0.851442847315254, "support": 27619.0 }, "step": 3483 } ], "logging_steps": 500, "max_steps": 4050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 6183004946874000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }