{ "best_metric": 0.508861243724823, "best_model_checkpoint": "longformer-full_labels/checkpoint-324", "epoch": 10.0, "eval_steps": 500, "global_step": 810, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_B-Claim": { "f1-score": 0.006993006993006993, "precision": 0.5, "recall": 0.0035211267605633804, "support": 284.0 }, "eval_B-MajorClaim": { "f1-score": 0.0, "precision": 0.0, "recall": 0.0, "support": 141.0 }, "eval_B-Premise": { "f1-score": 0.6336375488917861, "precision": 0.5883777239709443, "recall": 0.6864406779661016, "support": 708.0 }, "eval_I-Claim": { "f1-score": 0.37228850325379614, "precision": 0.41618672324946954, "recall": 0.3367672308069659, "support": 4077.0 }, "eval_I-MajorClaim": { "f1-score": 0.49153013051930017, "precision": 0.5611921369689283, "recall": 0.43725296442687744, "support": 2024.0 }, "eval_I-Premise": { "f1-score": 0.846019669697532, "precision": 0.7746890504995582, "recall": 0.9318181818181818, "support": 12232.0 }, "eval_O": { "f1-score": 0.8650519031141869, "precision": 0.9111808904340025, "recall": 0.8233684637211187, "support": 9868.0 }, "eval_accuracy": 0.7591191109292971, "eval_loss": 0.6442636251449585, "eval_macro avg": { "f1-score": 0.4593601089242298, "precision": 0.535946646446129, "recall": 0.4598812350714013, "support": 29334.0 }, "eval_runtime": 6.1153, "eval_samples_per_second": 13.082, "eval_steps_per_second": 1.635, "eval_weighted avg": { "f1-score": 0.7448054609057474, "precision": 0.7451676238152983, "recall": 0.7591191109292971, "support": 29334.0 }, "step": 81 }, { "epoch": 2.0, "eval_B-Claim": { "f1-score": 0.47385620915032683, "precision": 0.4420731707317073, "recall": 0.5105633802816901, "support": 284.0 }, "eval_B-MajorClaim": { "f1-score": 0.5267489711934156, "precision": 0.6274509803921569, "recall": 0.45390070921985815, "support": 141.0 }, "eval_B-Premise": { "f1-score": 0.7686170212765958, "precision": 0.7261306532663316, "recall": 0.8163841807909604, "support": 708.0 }, "eval_I-Claim": { "f1-score": 0.5068787075602675, "precision": 0.5221008840353614, "recall": 0.49251900907530044, "support": 4077.0 }, "eval_I-MajorClaim": { "f1-score": 0.6644462947543713, "precision": 0.5741007194244604, "recall": 0.7885375494071146, "support": 2024.0 }, "eval_I-Premise": { "f1-score": 0.8735948241002829, "precision": 0.8642982877260361, "recall": 0.8830935251798561, "support": 12232.0 }, "eval_O": { "f1-score": 0.8825588796944621, "precision": 0.925979519145147, "recall": 0.8430279691933522, "support": 9868.0 }, "eval_accuracy": 0.8015272380173177, "eval_loss": 0.5138605833053589, "eval_macro avg": { "f1-score": 0.6709572725328175, "precision": 0.668876316388743, "recall": 0.6840037604497332, "support": 29334.0 }, "eval_runtime": 6.1482, "eval_samples_per_second": 13.012, "eval_steps_per_second": 1.627, "eval_weighted avg": { "f1-score": 0.8031401896750007, "precision": 0.8089032379475054, "recall": 0.8015272380173177, "support": 29334.0 }, "step": 162 }, { "epoch": 3.0, "eval_B-Claim": { "f1-score": 0.4233576642335767, "precision": 0.4393939393939394, "recall": 0.4084507042253521, "support": 284.0 }, "eval_B-MajorClaim": { "f1-score": 0.6294820717131473, "precision": 0.7181818181818181, "recall": 0.5602836879432624, "support": 141.0 }, "eval_B-Premise": { "f1-score": 0.7608562691131499, "precision": 0.6709816612729234, "recall": 0.8785310734463276, "support": 708.0 }, "eval_I-Claim": { "f1-score": 0.46000546000546, "precision": 0.5186211141889813, "recall": 0.41329408879077756, "support": 4077.0 }, "eval_I-MajorClaim": { "f1-score": 0.6850351161534306, "precision": 0.7556615017878426, "recall": 0.6264822134387352, "support": 2024.0 }, "eval_I-Premise": { "f1-score": 0.8689253296477533, "precision": 0.7999862438957287, "recall": 0.9508665794637018, "support": 12232.0 }, "eval_O": { "f1-score": 0.8740981828044481, "precision": 0.9404692424419283, "recall": 0.8164775030401297, "support": 9868.0 }, "eval_accuracy": 0.7996863707643008, "eval_loss": 0.5769864320755005, "eval_macro avg": { "f1-score": 0.6716800133815666, "precision": 0.6918993601661659, "recall": 0.6649122643354695, "support": 29334.0 }, "eval_runtime": 6.0693, "eval_samples_per_second": 13.181, "eval_steps_per_second": 1.648, "eval_weighted avg": { "f1-score": 0.7930703491848509, "precision": 0.7980829724295806, "recall": 0.7996863707643008, "support": 29334.0 }, "step": 243 }, { "epoch": 4.0, "eval_B-Claim": { "f1-score": 0.5486211901306242, "precision": 0.4666666666666667, "recall": 0.6654929577464789, "support": 284.0 }, "eval_B-MajorClaim": { "f1-score": 0.6866666666666668, "precision": 0.6477987421383647, "recall": 0.7304964539007093, "support": 141.0 }, "eval_B-Premise": { "f1-score": 0.7603550295857988, "precision": 0.7981366459627329, "recall": 0.7259887005649718, "support": 708.0 }, "eval_I-Claim": { "f1-score": 0.5798045602605862, "precision": 0.5201636469900643, "recall": 0.6548933038999264, "support": 4077.0 }, "eval_I-MajorClaim": { "f1-score": 0.7716646989374262, "precision": 0.7390321121664405, "recall": 0.8073122529644269, "support": 2024.0 }, "eval_I-Premise": { "f1-score": 0.8704094554664417, "precision": 0.899807994414383, "recall": 0.842871157619359, "support": 12232.0 }, "eval_O": { "f1-score": 0.8969362234264276, "precision": 0.9231016731016731, "recall": 0.8722132144304824, "support": 9868.0 }, "eval_accuracy": 0.8190836571896093, "eval_loss": 0.508861243724823, "eval_macro avg": { "f1-score": 0.7306368320677102, "precision": 0.7135296402057607, "recall": 0.7570382915894793, "support": 29334.0 }, "eval_runtime": 6.0931, "eval_samples_per_second": 13.13, "eval_steps_per_second": 1.641, "eval_weighted avg": { "f1-score": 0.825475128990697, "precision": 0.835926930625345, "recall": 0.8190836571896093, "support": 29334.0 }, "step": 324 }, { "epoch": 5.0, "eval_B-Claim": { "f1-score": 0.5466237942122186, "precision": 0.5029585798816568, "recall": 0.5985915492957746, "support": 284.0 }, "eval_B-MajorClaim": { "f1-score": 0.6842105263157895, "precision": 0.728, "recall": 0.6453900709219859, "support": 141.0 }, "eval_B-Premise": { "f1-score": 0.775623268698061, "precision": 0.7608695652173914, "recall": 0.7909604519774012, "support": 708.0 }, "eval_I-Claim": { "f1-score": 0.5628513335725392, "precision": 0.5492530345471522, "recall": 0.577140053961246, "support": 4077.0 }, "eval_I-MajorClaim": { "f1-score": 0.7028394714647174, "precision": 0.8153946510110893, "recall": 0.6175889328063241, "support": 2024.0 }, "eval_I-Premise": { "f1-score": 0.8796072750684049, "precision": 0.8660855784469097, "recall": 0.8935578809679529, "support": 12232.0 }, "eval_O": { "f1-score": 0.8964530307676581, "precision": 0.9043101670447515, "recall": 0.8887312525334414, "support": 9868.0 }, "eval_accuracy": 0.8223904002181769, "eval_loss": 0.5749920606613159, "eval_macro avg": { "f1-score": 0.7211726714427698, "precision": 0.7324102251641358, "recall": 0.715994313209161, "support": 29334.0 }, "eval_runtime": 6.0798, "eval_samples_per_second": 13.158, "eval_steps_per_second": 1.645, "eval_weighted avg": { "f1-score": 0.8223802682715222, "precision": 0.8246928072651426, "recall": 0.8223904002181769, "support": 29334.0 }, "step": 405 }, { "epoch": 6.0, "eval_B-Claim": { "f1-score": 0.5645933014354066, "precision": 0.5160349854227405, "recall": 0.6232394366197183, "support": 284.0 }, "eval_B-MajorClaim": { "f1-score": 0.7272727272727273, "precision": 0.6923076923076923, "recall": 0.7659574468085106, "support": 141.0 }, "eval_B-Premise": { "f1-score": 0.7851644506648006, "precision": 0.7780859916782247, "recall": 0.7923728813559322, "support": 708.0 }, "eval_I-Claim": { "f1-score": 0.5951509606587375, "precision": 0.5575316048853654, "recall": 0.6382143733137111, "support": 4077.0 }, "eval_I-MajorClaim": { "f1-score": 0.7690405539070228, "precision": 0.7698019801980198, "recall": 0.7682806324110671, "support": 2024.0 }, "eval_I-Premise": { "f1-score": 0.8794871794871795, "precision": 0.8899397388684298, "recall": 0.8692773054283846, "support": 12232.0 }, "eval_O": { "f1-score": 0.907427508140797, "precision": 0.9260470513767275, "recall": 0.8895419537900284, "support": 9868.0 }, "eval_accuracy": 0.8322765391695643, "eval_loss": 0.5503024458885193, "eval_macro avg": { "f1-score": 0.7468766687952387, "precision": 0.7328212921053143, "recall": 0.763840575675336, "support": 29334.0 }, "eval_runtime": 6.0682, "eval_samples_per_second": 13.184, "eval_steps_per_second": 1.648, "eval_weighted avg": { "f1-score": 0.835690214793681, "precision": 0.8403274341189826, "recall": 0.8322765391695643, "support": 29334.0 }, "step": 486 }, { "epoch": 6.17, "grad_norm": 7.526494026184082, "learning_rate": 1.3827160493827162e-05, "loss": 0.4181, "step": 500 }, { "epoch": 7.0, "eval_B-Claim": { "f1-score": 0.5531914893617021, "precision": 0.5571428571428572, "recall": 0.5492957746478874, "support": 284.0 }, "eval_B-MajorClaim": { "f1-score": 0.7228070175438596, "precision": 0.7152777777777778, "recall": 0.7304964539007093, "support": 141.0 }, "eval_B-Premise": { "f1-score": 0.7938420348058902, "precision": 0.7544529262086515, "recall": 0.8375706214689266, "support": 708.0 }, "eval_I-Claim": { "f1-score": 0.5534057778955738, "precision": 0.6019025655808591, "recall": 0.5121412803532008, "support": 4077.0 }, "eval_I-MajorClaim": { "f1-score": 0.7677873338545738, "precision": 0.8124655267512411, "recall": 0.7277667984189723, "support": 2024.0 }, "eval_I-Premise": { "f1-score": 0.8876130554463233, "precision": 0.855129565085619, "recall": 0.9226618705035972, "support": 12232.0 }, "eval_O": { "f1-score": 0.9097990979909799, "precision": 0.9203649937785151, "recall": 0.8994730441832185, "support": 9868.0 }, "eval_accuracy": 0.8377650507943001, "eval_loss": 0.6418657302856445, "eval_macro avg": { "f1-score": 0.7412065438427005, "precision": 0.7452480303322171, "recall": 0.7399151204966445, "support": 29334.0 }, "eval_runtime": 6.075, "eval_samples_per_second": 13.169, "eval_steps_per_second": 1.646, "eval_weighted avg": { "f1-score": 0.8340655773672634, "precision": 0.8329491032454597, "recall": 0.8377650507943001, "support": 29334.0 }, "step": 567 }, { "epoch": 8.0, "eval_B-Claim": { "f1-score": 0.5652951699463328, "precision": 0.5745454545454546, "recall": 0.5563380281690141, "support": 284.0 }, "eval_B-MajorClaim": { "f1-score": 0.7197231833910034, "precision": 0.7027027027027027, "recall": 0.7375886524822695, "support": 141.0 }, "eval_B-Premise": { "f1-score": 0.7941176470588234, "precision": 0.7538071065989848, "recall": 0.8389830508474576, "support": 708.0 }, "eval_I-Claim": { "f1-score": 0.5740005295207837, "precision": 0.6235260281852172, "recall": 0.5317635516311013, "support": 4077.0 }, "eval_I-MajorClaim": { "f1-score": 0.773091849935317, "precision": 0.8115154807170016, "recall": 0.7381422924901185, "support": 2024.0 }, "eval_I-Premise": { "f1-score": 0.8920165582495565, "precision": 0.8614178024822965, "recall": 0.9248691955526488, "support": 12232.0 }, "eval_O": { "f1-score": 0.9097236438075741, "precision": 0.9189412737799835, "recall": 0.9006890960680989, "support": 9868.0 }, "eval_accuracy": 0.8426740301356788, "eval_loss": 0.6667934656143188, "eval_macro avg": { "f1-score": 0.7468526545584844, "precision": 0.7494936927159488, "recall": 0.7469105524629585, "support": 29334.0 }, "eval_runtime": 6.0698, "eval_samples_per_second": 13.18, "eval_steps_per_second": 1.648, "eval_weighted avg": { "f1-score": 0.8392138000943469, "precision": 0.8381245456177384, "recall": 0.8426740301356788, "support": 29334.0 }, "step": 648 }, { "epoch": 9.0, "eval_B-Claim": { "f1-score": 0.5863192182410424, "precision": 0.5454545454545454, "recall": 0.6338028169014085, "support": 284.0 }, "eval_B-MajorClaim": { "f1-score": 0.7210884353741497, "precision": 0.6928104575163399, "recall": 0.75177304964539, "support": 141.0 }, "eval_B-Premise": { "f1-score": 0.7762879322512349, "precision": 0.7757404795486601, "recall": 0.7768361581920904, "support": 708.0 }, "eval_I-Claim": { "f1-score": 0.6114039290848108, "precision": 0.5975181456333412, "recall": 0.6259504537650233, "support": 4077.0 }, "eval_I-MajorClaim": { "f1-score": 0.7696835908756438, "precision": 0.7642474427666829, "recall": 0.775197628458498, "support": 2024.0 }, "eval_I-Premise": { "f1-score": 0.8845693533077462, "precision": 0.893157763146929, "recall": 0.8761445389143231, "support": 12232.0 }, "eval_O": { "f1-score": 0.9076040026413369, "precision": 0.9098686220592729, "recall": 0.9053506282934739, "support": 9868.0 }, "eval_accuracy": 0.8388900252266994, "eval_loss": 0.7191675305366516, "eval_macro avg": { "f1-score": 0.7509937802537092, "precision": 0.7398282080179673, "recall": 0.7635793248814581, "support": 29334.0 }, "eval_runtime": 6.0723, "eval_samples_per_second": 13.175, "eval_steps_per_second": 1.647, "eval_weighted avg": { "f1-score": 0.8401384747371046, "precision": 0.8416318009865815, "recall": 0.8388900252266994, "support": 29334.0 }, "step": 729 }, { "epoch": 10.0, "eval_B-Claim": { "f1-score": 0.5391621129326047, "precision": 0.5584905660377358, "recall": 0.5211267605633803, "support": 284.0 }, "eval_B-MajorClaim": { "f1-score": 0.7254237288135594, "precision": 0.6948051948051948, "recall": 0.7588652482269503, "support": 141.0 }, "eval_B-Premise": { "f1-score": 0.7871054398925452, "precision": 0.7503201024327785, "recall": 0.827683615819209, "support": 708.0 }, "eval_I-Claim": { "f1-score": 0.5272531030760929, "precision": 0.5859070464767616, "recall": 0.4792739759627177, "support": 4077.0 }, "eval_I-MajorClaim": { "f1-score": 0.7522123893805309, "precision": 0.7485322896281801, "recall": 0.7559288537549407, "support": 2024.0 }, "eval_I-Premise": { "f1-score": 0.8810152126668737, "precision": 0.8385786052009456, "recall": 0.92797580117724, "support": 12232.0 }, "eval_O": { "f1-score": 0.9004034159375491, "precision": 0.9320967566981234, "recall": 0.8707944872314552, "support": 9868.0 }, "eval_accuracy": 0.8273334696938706, "eval_loss": 0.8727766871452332, "eval_macro avg": { "f1-score": 0.7303679146713938, "precision": 0.7298186516113886, "recall": 0.7345212489622704, "support": 29334.0 }, "eval_runtime": 6.1014, "eval_samples_per_second": 13.112, "eval_steps_per_second": 1.639, "eval_weighted avg": { "f1-score": 0.8231582874630071, "precision": 0.8231745470223255, "recall": 0.8273334696938706, "support": 29334.0 }, "step": 810 } ], "logging_steps": 500, "max_steps": 1620, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 1437908127180000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }