{ "best_metric": 0.8000965798413886, "best_model_checkpoint": "/content/drive/MyDrive/Projects/ModernBertTuning/2025-02-19_17:10:03/checkpoint-3300", "epoch": 5.809859154929578, "eval_steps": 100, "global_step": 3300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.176056338028169, "grad_norm": 5.5684285163879395, "learning_rate": 8.802816901408451e-06, "loss": 0.9029, "step": 100 }, { "epoch": 0.176056338028169, "eval_action_accuracy": 0.7632042253521126, "eval_action_f1": 0.3275, "eval_action_precision": 0.6787564766839378, "eval_action_recall": 0.2158154859967051, "eval_combined_f1": 0.39009832159236646, "eval_loss": 1.5983511209487915, "eval_runtime": 53.3406, "eval_samples_per_second": 42.594, "eval_severity_accuracy": 0.4942781690140845, "eval_severity_f1": 0.4526966431847329, "eval_severity_precision": 0.5190575556900403, "eval_severity_recall": 0.4942781690140845, "eval_steps_per_second": 2.662, "step": 100 }, { "epoch": 0.352112676056338, "grad_norm": 10.437986373901367, "learning_rate": 1.7605633802816902e-05, "loss": 0.6875, "step": 200 }, { "epoch": 0.352112676056338, "eval_action_accuracy": 0.8393485915492958, "eval_action_f1": 0.7253574115876599, "eval_action_precision": 0.667590027700831, "eval_action_recall": 0.7940691927512356, "eval_combined_f1": 0.6889564202307465, "eval_loss": 1.1708728075027466, "eval_runtime": 53.402, "eval_samples_per_second": 42.545, "eval_severity_accuracy": 0.6615316901408451, "eval_severity_f1": 0.6525554288738333, "eval_severity_precision": 0.6774842234837531, "eval_severity_recall": 0.6615316901408451, "eval_steps_per_second": 2.659, "step": 200 }, { "epoch": 0.528169014084507, "grad_norm": 8.755282402038574, "learning_rate": 2.640845070422535e-05, "loss": 0.5914, "step": 300 }, { "epoch": 0.528169014084507, "eval_action_accuracy": 0.8653169014084507, "eval_action_f1": 0.7688821752265861, "eval_action_precision": 0.7099023709902371, "eval_action_recall": 0.8385502471169687, "eval_combined_f1": 0.7012112761418396, "eval_loss": 1.1276360750198364, "eval_runtime": 53.4046, "eval_samples_per_second": 42.543, "eval_severity_accuracy": 0.6307218309859155, "eval_severity_f1": 0.6335403770570933, "eval_severity_precision": 0.6726339478831171, "eval_severity_recall": 0.6307218309859155, "eval_steps_per_second": 2.659, "step": 300 }, { "epoch": 0.704225352112676, "grad_norm": 4.745133399963379, "learning_rate": 3.5211267605633805e-05, "loss": 0.5167, "step": 400 }, { "epoch": 0.704225352112676, "eval_action_accuracy": 0.8820422535211268, "eval_action_f1": 0.750465549348231, "eval_action_precision": 0.8629550321199143, "eval_action_recall": 0.6639209225700164, "eval_combined_f1": 0.7324396418875856, "eval_loss": 1.0260754823684692, "eval_runtime": 53.3961, "eval_samples_per_second": 42.55, "eval_severity_accuracy": 0.715669014084507, "eval_severity_f1": 0.7144137344269402, "eval_severity_precision": 0.717618479930179, "eval_severity_recall": 0.715669014084507, "eval_steps_per_second": 2.659, "step": 400 }, { "epoch": 0.8802816901408451, "grad_norm": 4.491367816925049, "learning_rate": 4.4014084507042256e-05, "loss": 0.5228, "step": 500 }, { "epoch": 0.8802816901408451, "eval_action_accuracy": 0.8811619718309859, "eval_action_f1": 0.7652173913043478, "eval_action_precision": 0.8103130755064457, "eval_action_recall": 0.7248764415156508, "eval_combined_f1": 0.7303003562324298, "eval_loss": 1.0010697841644287, "eval_runtime": 53.3858, "eval_samples_per_second": 42.558, "eval_severity_accuracy": 0.6941021126760564, "eval_severity_f1": 0.6953833211605118, "eval_severity_precision": 0.7027972731452775, "eval_severity_recall": 0.6941021126760564, "eval_steps_per_second": 2.66, "step": 500 }, { "epoch": 1.056338028169014, "grad_norm": 7.762014865875244, "learning_rate": 4.968701095461659e-05, "loss": 0.4707, "step": 600 }, { "epoch": 1.056338028169014, "eval_action_accuracy": 0.8816021126760564, "eval_action_f1": 0.7497674418604651, "eval_action_precision": 0.8611111111111112, "eval_action_recall": 0.6639209225700164, "eval_combined_f1": 0.7288028162963264, "eval_loss": 1.05709969997406, "eval_runtime": 53.3852, "eval_samples_per_second": 42.559, "eval_severity_accuracy": 0.710387323943662, "eval_severity_f1": 0.7078381907321877, "eval_severity_precision": 0.7185583041734807, "eval_severity_recall": 0.710387323943662, "eval_steps_per_second": 2.66, "step": 600 }, { "epoch": 1.232394366197183, "grad_norm": 9.271745681762695, "learning_rate": 4.870892018779343e-05, "loss": 0.4103, "step": 700 }, { "epoch": 1.232394366197183, "eval_action_accuracy": 0.8503521126760564, "eval_action_f1": 0.621380846325167, "eval_action_precision": 0.9587628865979382, "eval_action_recall": 0.4596375617792422, "eval_combined_f1": 0.6340972974514396, "eval_loss": 1.1522221565246582, "eval_runtime": 53.3981, "eval_samples_per_second": 42.548, "eval_severity_accuracy": 0.6553697183098591, "eval_severity_f1": 0.6468137485777123, "eval_severity_precision": 0.7014511458318834, "eval_severity_recall": 0.6553697183098591, "eval_steps_per_second": 2.659, "step": 700 }, { "epoch": 1.408450704225352, "grad_norm": 7.816728115081787, "learning_rate": 4.773082942097027e-05, "loss": 0.4, "step": 800 }, { "epoch": 1.408450704225352, "eval_action_accuracy": 0.8820422535211268, "eval_action_f1": 0.7922480620155039, "eval_action_precision": 0.7481698389458272, "eval_action_recall": 0.841845140032949, "eval_combined_f1": 0.7584899763574804, "eval_loss": 0.9812976717948914, "eval_runtime": 53.3978, "eval_samples_per_second": 42.549, "eval_severity_accuracy": 0.7235915492957746, "eval_severity_f1": 0.724731890699457, "eval_severity_precision": 0.7317594548978289, "eval_severity_recall": 0.7235915492957746, "eval_steps_per_second": 2.659, "step": 800 }, { "epoch": 1.584507042253521, "grad_norm": 7.098080158233643, "learning_rate": 4.675273865414711e-05, "loss": 0.4179, "step": 900 }, { "epoch": 1.584507042253521, "eval_action_accuracy": 0.894806338028169, "eval_action_f1": 0.7879325643300799, "eval_action_precision": 0.8538461538461538, "eval_action_recall": 0.7314662273476112, "eval_combined_f1": 0.7270919018906684, "eval_loss": 1.177077054977417, "eval_runtime": 53.4076, "eval_samples_per_second": 42.541, "eval_severity_accuracy": 0.6822183098591549, "eval_severity_f1": 0.666251239451257, "eval_severity_precision": 0.7051808476907062, "eval_severity_recall": 0.6822183098591549, "eval_steps_per_second": 2.659, "step": 900 }, { "epoch": 1.76056338028169, "grad_norm": 3.6362385749816895, "learning_rate": 4.577464788732395e-05, "loss": 0.381, "step": 1000 }, { "epoch": 1.76056338028169, "eval_action_accuracy": 0.8934859154929577, "eval_action_f1": 0.8, "eval_action_precision": 0.802653399668325, "eval_action_recall": 0.7973640856672158, "eval_combined_f1": 0.7276179625363967, "eval_loss": 1.0336343050003052, "eval_runtime": 53.3973, "eval_samples_per_second": 42.549, "eval_severity_accuracy": 0.6597711267605634, "eval_severity_f1": 0.6552359250727933, "eval_severity_precision": 0.690194073616287, "eval_severity_recall": 0.6597711267605634, "eval_steps_per_second": 2.659, "step": 1000 }, { "epoch": 1.936619718309859, "grad_norm": 4.175089359283447, "learning_rate": 4.479655712050079e-05, "loss": 0.384, "step": 1100 }, { "epoch": 1.936619718309859, "eval_action_accuracy": 0.8899647887323944, "eval_action_f1": 0.7987117552334944, "eval_action_precision": 0.7811023622047244, "eval_action_recall": 0.8171334431630972, "eval_combined_f1": 0.7613638710145674, "eval_loss": 0.9859868288040161, "eval_runtime": 53.3975, "eval_samples_per_second": 42.549, "eval_severity_accuracy": 0.7240316901408451, "eval_severity_f1": 0.7240159867956404, "eval_severity_precision": 0.7333723870533466, "eval_severity_recall": 0.7240316901408451, "eval_steps_per_second": 2.659, "step": 1100 }, { "epoch": 2.112676056338028, "grad_norm": 5.42644739151001, "learning_rate": 4.3818466353677626e-05, "loss": 0.287, "step": 1200 }, { "epoch": 2.112676056338028, "eval_action_accuracy": 0.8798415492957746, "eval_action_f1": 0.7272727272727273, "eval_action_precision": 0.9238578680203046, "eval_action_recall": 0.5996705107084019, "eval_combined_f1": 0.7194719198906718, "eval_loss": 1.2687214612960815, "eval_runtime": 53.3984, "eval_samples_per_second": 42.548, "eval_severity_accuracy": 0.7130281690140845, "eval_severity_f1": 0.7116711125086165, "eval_severity_precision": 0.7299090747607111, "eval_severity_recall": 0.7130281690140845, "eval_steps_per_second": 2.659, "step": 1200 }, { "epoch": 2.288732394366197, "grad_norm": 4.9831342697143555, "learning_rate": 4.284037558685447e-05, "loss": 0.234, "step": 1300 }, { "epoch": 2.288732394366197, "eval_action_accuracy": 0.8758802816901409, "eval_action_f1": 0.7904903417533432, "eval_action_precision": 0.7198917456021651, "eval_action_recall": 0.8764415156507414, "eval_combined_f1": 0.7408713797328776, "eval_loss": 1.2081983089447021, "eval_runtime": 53.3856, "eval_samples_per_second": 42.558, "eval_severity_accuracy": 0.6954225352112676, "eval_severity_f1": 0.691252417712412, "eval_severity_precision": 0.710940220005423, "eval_severity_recall": 0.6954225352112676, "eval_steps_per_second": 2.66, "step": 1300 }, { "epoch": 2.464788732394366, "grad_norm": 8.448158264160156, "learning_rate": 4.18622848200313e-05, "loss": 0.2284, "step": 1400 }, { "epoch": 2.464788732394366, "eval_action_accuracy": 0.9097711267605634, "eval_action_f1": 0.820333041191937, "eval_action_precision": 0.8764044943820225, "eval_action_recall": 0.771004942339374, "eval_combined_f1": 0.7709403023211041, "eval_loss": 1.1236168146133423, "eval_runtime": 53.3827, "eval_samples_per_second": 42.561, "eval_severity_accuracy": 0.7205105633802817, "eval_severity_f1": 0.7215475634502713, "eval_severity_precision": 0.7388563066943578, "eval_severity_recall": 0.7205105633802817, "eval_steps_per_second": 2.66, "step": 1400 }, { "epoch": 2.640845070422535, "grad_norm": 3.273317575454712, "learning_rate": 4.0884194053208144e-05, "loss": 0.2147, "step": 1500 }, { "epoch": 2.640845070422535, "eval_action_accuracy": 0.9066901408450704, "eval_action_f1": 0.8233333333333334, "eval_action_precision": 0.8330522765598651, "eval_action_recall": 0.8138385502471169, "eval_combined_f1": 0.7802089055740911, "eval_loss": 1.08566153049469, "eval_runtime": 53.3876, "eval_samples_per_second": 42.557, "eval_severity_accuracy": 0.7394366197183099, "eval_severity_f1": 0.7370844778148489, "eval_severity_precision": 0.7443892609781334, "eval_severity_recall": 0.7394366197183099, "eval_steps_per_second": 2.66, "step": 1500 }, { "epoch": 2.816901408450704, "grad_norm": 4.577557563781738, "learning_rate": 3.990610328638498e-05, "loss": 0.2258, "step": 1600 }, { "epoch": 2.816901408450704, "eval_action_accuracy": 0.8939260563380281, "eval_action_f1": 0.7872903795233892, "eval_action_precision": 0.8479087452471483, "eval_action_recall": 0.7347611202635914, "eval_combined_f1": 0.7659288423194548, "eval_loss": 1.0919924974441528, "eval_runtime": 53.4132, "eval_samples_per_second": 42.536, "eval_severity_accuracy": 0.7442781690140845, "eval_severity_f1": 0.7445673051155204, "eval_severity_precision": 0.7537282342243937, "eval_severity_recall": 0.7442781690140845, "eval_steps_per_second": 2.659, "step": 1600 }, { "epoch": 2.992957746478873, "grad_norm": 5.347113609313965, "learning_rate": 3.892801251956182e-05, "loss": 0.2055, "step": 1700 }, { "epoch": 2.992957746478873, "eval_action_accuracy": 0.90625, "eval_action_f1": 0.813321647677476, "eval_action_precision": 0.8689138576779026, "eval_action_recall": 0.7644151565074135, "eval_combined_f1": 0.7792135732932114, "eval_loss": 1.0456966161727905, "eval_runtime": 53.4219, "eval_samples_per_second": 42.529, "eval_severity_accuracy": 0.7447183098591549, "eval_severity_f1": 0.7451054989089468, "eval_severity_precision": 0.7459307596916318, "eval_severity_recall": 0.7447183098591549, "eval_steps_per_second": 2.658, "step": 1700 }, { "epoch": 3.169014084507042, "grad_norm": 8.061402320861816, "learning_rate": 3.7949921752738656e-05, "loss": 0.1559, "step": 1800 }, { "epoch": 3.169014084507042, "eval_action_accuracy": 0.9049295774647887, "eval_action_f1": 0.8036363636363636, "eval_action_precision": 0.896551724137931, "eval_action_recall": 0.728171334431631, "eval_combined_f1": 0.7705730204982484, "eval_loss": 1.3266334533691406, "eval_runtime": 53.3935, "eval_samples_per_second": 42.552, "eval_severity_accuracy": 0.7376760563380281, "eval_severity_f1": 0.7375096773601331, "eval_severity_precision": 0.7543615150494364, "eval_severity_recall": 0.7376760563380281, "eval_steps_per_second": 2.659, "step": 1800 }, { "epoch": 3.345070422535211, "grad_norm": 4.052455902099609, "learning_rate": 3.69718309859155e-05, "loss": 0.1174, "step": 1900 }, { "epoch": 3.345070422535211, "eval_action_accuracy": 0.8992077464788732, "eval_action_f1": 0.7893284268629255, "eval_action_precision": 0.89375, "eval_action_recall": 0.7067545304777595, "eval_combined_f1": 0.7628428370446565, "eval_loss": 1.4744161367416382, "eval_runtime": 53.3995, "eval_samples_per_second": 42.547, "eval_severity_accuracy": 0.7376760563380281, "eval_severity_f1": 0.7363572472263874, "eval_severity_precision": 0.7497424115719165, "eval_severity_recall": 0.7376760563380281, "eval_steps_per_second": 2.659, "step": 1900 }, { "epoch": 3.52112676056338, "grad_norm": 6.726060390472412, "learning_rate": 3.599374021909233e-05, "loss": 0.1325, "step": 2000 }, { "epoch": 3.52112676056338, "eval_action_accuracy": 0.9044894366197183, "eval_action_f1": 0.8021877848678214, "eval_action_precision": 0.8979591836734694, "eval_action_recall": 0.7248764415156508, "eval_combined_f1": 0.7739176088843763, "eval_loss": 1.592252492904663, "eval_runtime": 53.4078, "eval_samples_per_second": 42.541, "eval_severity_accuracy": 0.7460387323943662, "eval_severity_f1": 0.7456474329009313, "eval_severity_precision": 0.7468342652042663, "eval_severity_recall": 0.7460387323943662, "eval_steps_per_second": 2.659, "step": 2000 }, { "epoch": 3.697183098591549, "grad_norm": 1.6415315866470337, "learning_rate": 3.5015649452269174e-05, "loss": 0.1318, "step": 2100 }, { "epoch": 3.697183098591549, "eval_action_accuracy": 0.909330985915493, "eval_action_f1": 0.815742397137746, "eval_action_precision": 0.8923679060665362, "eval_action_recall": 0.7512355848434926, "eval_combined_f1": 0.7744664753736676, "eval_loss": 1.3085289001464844, "eval_runtime": 53.3948, "eval_samples_per_second": 42.551, "eval_severity_accuracy": 0.7345950704225352, "eval_severity_f1": 0.7331905536095894, "eval_severity_precision": 0.7463951981865823, "eval_severity_recall": 0.7345950704225352, "eval_steps_per_second": 2.659, "step": 2100 }, { "epoch": 3.873239436619718, "grad_norm": 1.168869137763977, "learning_rate": 3.4037558685446016e-05, "loss": 0.1303, "step": 2200 }, { "epoch": 3.873239436619718, "eval_action_accuracy": 0.9080105633802817, "eval_action_f1": 0.8271298593879239, "eval_action_precision": 0.8305647840531561, "eval_action_recall": 0.8237232289950577, "eval_combined_f1": 0.7891663886796052, "eval_loss": 1.2526799440383911, "eval_runtime": 53.3936, "eval_samples_per_second": 42.552, "eval_severity_accuracy": 0.7513204225352113, "eval_severity_f1": 0.7512029179712866, "eval_severity_precision": 0.755462312536582, "eval_severity_recall": 0.7513204225352113, "eval_steps_per_second": 2.659, "step": 2200 }, { "epoch": 4.049295774647887, "grad_norm": 1.3796656131744385, "learning_rate": 3.305946791862285e-05, "loss": 0.1226, "step": 2300 }, { "epoch": 4.049295774647887, "eval_action_accuracy": 0.9163732394366197, "eval_action_f1": 0.8403361344537815, "eval_action_precision": 0.8576329331046312, "eval_action_recall": 0.8237232289950577, "eval_combined_f1": 0.7939814504731293, "eval_loss": 1.3127936124801636, "eval_runtime": 53.3969, "eval_samples_per_second": 42.549, "eval_severity_accuracy": 0.7508802816901409, "eval_severity_f1": 0.747626766492477, "eval_severity_precision": 0.7590158294773308, "eval_severity_recall": 0.7508802816901409, "eval_steps_per_second": 2.659, "step": 2300 }, { "epoch": 4.225352112676056, "grad_norm": 4.387660503387451, "learning_rate": 3.208137715179969e-05, "loss": 0.0703, "step": 2400 }, { "epoch": 4.225352112676056, "eval_action_accuracy": 0.9119718309859155, "eval_action_f1": 0.8242530755711776, "eval_action_precision": 0.8832391713747646, "eval_action_recall": 0.7726523887973641, "eval_combined_f1": 0.7795952339344467, "eval_loss": 1.73722505569458, "eval_runtime": 53.3612, "eval_samples_per_second": 42.578, "eval_severity_accuracy": 0.7372359154929577, "eval_severity_f1": 0.7349373922977159, "eval_severity_precision": 0.7478231277482102, "eval_severity_recall": 0.7372359154929577, "eval_steps_per_second": 2.661, "step": 2400 }, { "epoch": 4.401408450704225, "grad_norm": 3.3632314205169678, "learning_rate": 3.110328638497653e-05, "loss": 0.0749, "step": 2500 }, { "epoch": 4.401408450704225, "eval_action_accuracy": 0.9058098591549296, "eval_action_f1": 0.8145580589254766, "eval_action_precision": 0.8592321755027422, "eval_action_recall": 0.7742998352553542, "eval_combined_f1": 0.7737646532726048, "eval_loss": 1.8485854864120483, "eval_runtime": 53.3448, "eval_samples_per_second": 42.591, "eval_severity_accuracy": 0.7319542253521126, "eval_severity_f1": 0.7329712476197329, "eval_severity_precision": 0.7371728830594799, "eval_severity_recall": 0.7319542253521126, "eval_steps_per_second": 2.662, "step": 2500 }, { "epoch": 4.577464788732394, "grad_norm": 4.060558319091797, "learning_rate": 3.0125195618153366e-05, "loss": 0.0776, "step": 2600 }, { "epoch": 4.577464788732394, "eval_action_accuracy": 0.8877640845070423, "eval_action_f1": 0.746772591857001, "eval_action_precision": 0.94, "eval_action_recall": 0.6194398682042833, "eval_combined_f1": 0.7451658251590665, "eval_loss": 2.1151413917541504, "eval_runtime": 53.345, "eval_samples_per_second": 42.591, "eval_severity_accuracy": 0.7438380281690141, "eval_severity_f1": 0.7435590584611321, "eval_severity_precision": 0.7627996939449937, "eval_severity_recall": 0.7438380281690141, "eval_steps_per_second": 2.662, "step": 2600 }, { "epoch": 4.753521126760563, "grad_norm": 1.934782862663269, "learning_rate": 2.9147104851330205e-05, "loss": 0.0842, "step": 2700 }, { "epoch": 4.753521126760563, "eval_action_accuracy": 0.9154929577464789, "eval_action_f1": 0.8449111470113085, "eval_action_precision": 0.8288431061806656, "eval_action_recall": 0.8616144975288303, "eval_combined_f1": 0.7964890047427877, "eval_loss": 1.6187307834625244, "eval_runtime": 53.3536, "eval_samples_per_second": 42.584, "eval_severity_accuracy": 0.7477992957746479, "eval_severity_f1": 0.748066862474267, "eval_severity_precision": 0.751069587929407, "eval_severity_recall": 0.7477992957746479, "eval_steps_per_second": 2.661, "step": 2700 }, { "epoch": 4.929577464788732, "grad_norm": 0.15578486025333405, "learning_rate": 2.8169014084507046e-05, "loss": 0.0718, "step": 2800 }, { "epoch": 4.929577464788732, "eval_action_accuracy": 0.9080105633802817, "eval_action_f1": 0.8125560538116592, "eval_action_precision": 0.8917322834645669, "eval_action_recall": 0.7462932454695222, "eval_combined_f1": 0.7831897220338088, "eval_loss": 1.7883614301681519, "eval_runtime": 53.3648, "eval_samples_per_second": 42.575, "eval_severity_accuracy": 0.7535211267605634, "eval_severity_f1": 0.7538233902559583, "eval_severity_precision": 0.7606986906314367, "eval_severity_recall": 0.7535211267605634, "eval_steps_per_second": 2.661, "step": 2800 }, { "epoch": 5.105633802816901, "grad_norm": 0.7947894930839539, "learning_rate": 2.719092331768388e-05, "loss": 0.0732, "step": 2900 }, { "epoch": 5.105633802816901, "eval_action_accuracy": 0.909330985915493, "eval_action_f1": 0.83003300330033, "eval_action_precision": 0.8314049586776859, "eval_action_recall": 0.828665568369028, "eval_combined_f1": 0.7889699480003767, "eval_loss": 1.63761305809021, "eval_runtime": 53.3715, "eval_samples_per_second": 42.57, "eval_severity_accuracy": 0.7473591549295775, "eval_severity_f1": 0.7479068927004233, "eval_severity_precision": 0.7576437021292298, "eval_severity_recall": 0.7473591549295775, "eval_steps_per_second": 2.661, "step": 2900 }, { "epoch": 5.28169014084507, "grad_norm": 6.216972827911377, "learning_rate": 2.6212832550860723e-05, "loss": 0.0433, "step": 3000 }, { "epoch": 5.28169014084507, "eval_action_accuracy": 0.9119718309859155, "eval_action_f1": 0.823943661971831, "eval_action_precision": 0.8846880907372401, "eval_action_recall": 0.771004942339374, "eval_combined_f1": 0.7916775895426225, "eval_loss": 2.1584179401397705, "eval_runtime": 53.3702, "eval_samples_per_second": 42.571, "eval_severity_accuracy": 0.7588028169014085, "eval_severity_f1": 0.759411517113414, "eval_severity_precision": 0.7667529788427636, "eval_severity_recall": 0.7588028169014085, "eval_steps_per_second": 2.661, "step": 3000 }, { "epoch": 5.457746478873239, "grad_norm": 7.994290351867676, "learning_rate": 2.5234741784037558e-05, "loss": 0.0437, "step": 3100 }, { "epoch": 5.457746478873239, "eval_action_accuracy": 0.9132922535211268, "eval_action_f1": 0.8365145228215768, "eval_action_precision": 0.842809364548495, "eval_action_recall": 0.8303130148270181, "eval_combined_f1": 0.7970826496584587, "eval_loss": 1.9302828311920166, "eval_runtime": 53.3908, "eval_samples_per_second": 42.554, "eval_severity_accuracy": 0.758362676056338, "eval_severity_f1": 0.7576507764953404, "eval_severity_precision": 0.7661980956912092, "eval_severity_recall": 0.758362676056338, "eval_steps_per_second": 2.66, "step": 3100 }, { "epoch": 5.633802816901408, "grad_norm": 0.0936056599020958, "learning_rate": 2.42566510172144e-05, "loss": 0.0329, "step": 3200 }, { "epoch": 5.633802816901408, "eval_action_accuracy": 0.9137323943661971, "eval_action_f1": 0.833616298811545, "eval_action_precision": 0.8598949211908932, "eval_action_recall": 0.8088962108731467, "eval_combined_f1": 0.7923034119567994, "eval_loss": 2.4584481716156006, "eval_runtime": 53.4118, "eval_samples_per_second": 42.537, "eval_severity_accuracy": 0.7522007042253521, "eval_severity_f1": 0.7509905251020539, "eval_severity_precision": 0.7646859410918823, "eval_severity_recall": 0.7522007042253521, "eval_steps_per_second": 2.659, "step": 3200 }, { "epoch": 5.809859154929578, "grad_norm": 0.029598630964756012, "learning_rate": 2.327856025039124e-05, "loss": 0.0566, "step": 3300 }, { "epoch": 5.809859154929578, "eval_action_accuracy": 0.9132922535211268, "eval_action_f1": 0.841000807102502, "eval_action_precision": 0.8243670886075949, "eval_action_recall": 0.8583196046128501, "eval_combined_f1": 0.8000965798413886, "eval_loss": 1.6451172828674316, "eval_runtime": 53.3855, "eval_samples_per_second": 42.558, "eval_severity_accuracy": 0.7588028169014085, "eval_severity_f1": 0.7591923525802752, "eval_severity_precision": 0.7658252855908393, "eval_severity_recall": 0.7588028169014085, "eval_steps_per_second": 2.66, "step": 3300 } ], "logging_steps": 100, "max_steps": 5680, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.581830484199424e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }