|
{ |
|
"best_metric": 0.8000965798413886, |
|
"best_model_checkpoint": "/content/drive/MyDrive/Projects/ModernBertTuning/2025-02-19_17:10:03/checkpoint-3300", |
|
"epoch": 5.809859154929578, |
|
"eval_steps": 100, |
|
"global_step": 3300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.176056338028169, |
|
"grad_norm": 5.5684285163879395, |
|
"learning_rate": 8.802816901408451e-06, |
|
"loss": 0.9029, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.176056338028169, |
|
"eval_action_accuracy": 0.7632042253521126, |
|
"eval_action_f1": 0.3275, |
|
"eval_action_precision": 0.6787564766839378, |
|
"eval_action_recall": 0.2158154859967051, |
|
"eval_combined_f1": 0.39009832159236646, |
|
"eval_loss": 1.5983511209487915, |
|
"eval_runtime": 53.3406, |
|
"eval_samples_per_second": 42.594, |
|
"eval_severity_accuracy": 0.4942781690140845, |
|
"eval_severity_f1": 0.4526966431847329, |
|
"eval_severity_precision": 0.5190575556900403, |
|
"eval_severity_recall": 0.4942781690140845, |
|
"eval_steps_per_second": 2.662, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.352112676056338, |
|
"grad_norm": 10.437986373901367, |
|
"learning_rate": 1.7605633802816902e-05, |
|
"loss": 0.6875, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.352112676056338, |
|
"eval_action_accuracy": 0.8393485915492958, |
|
"eval_action_f1": 0.7253574115876599, |
|
"eval_action_precision": 0.667590027700831, |
|
"eval_action_recall": 0.7940691927512356, |
|
"eval_combined_f1": 0.6889564202307465, |
|
"eval_loss": 1.1708728075027466, |
|
"eval_runtime": 53.402, |
|
"eval_samples_per_second": 42.545, |
|
"eval_severity_accuracy": 0.6615316901408451, |
|
"eval_severity_f1": 0.6525554288738333, |
|
"eval_severity_precision": 0.6774842234837531, |
|
"eval_severity_recall": 0.6615316901408451, |
|
"eval_steps_per_second": 2.659, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.528169014084507, |
|
"grad_norm": 8.755282402038574, |
|
"learning_rate": 2.640845070422535e-05, |
|
"loss": 0.5914, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.528169014084507, |
|
"eval_action_accuracy": 0.8653169014084507, |
|
"eval_action_f1": 0.7688821752265861, |
|
"eval_action_precision": 0.7099023709902371, |
|
"eval_action_recall": 0.8385502471169687, |
|
"eval_combined_f1": 0.7012112761418396, |
|
"eval_loss": 1.1276360750198364, |
|
"eval_runtime": 53.4046, |
|
"eval_samples_per_second": 42.543, |
|
"eval_severity_accuracy": 0.6307218309859155, |
|
"eval_severity_f1": 0.6335403770570933, |
|
"eval_severity_precision": 0.6726339478831171, |
|
"eval_severity_recall": 0.6307218309859155, |
|
"eval_steps_per_second": 2.659, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.704225352112676, |
|
"grad_norm": 4.745133399963379, |
|
"learning_rate": 3.5211267605633805e-05, |
|
"loss": 0.5167, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.704225352112676, |
|
"eval_action_accuracy": 0.8820422535211268, |
|
"eval_action_f1": 0.750465549348231, |
|
"eval_action_precision": 0.8629550321199143, |
|
"eval_action_recall": 0.6639209225700164, |
|
"eval_combined_f1": 0.7324396418875856, |
|
"eval_loss": 1.0260754823684692, |
|
"eval_runtime": 53.3961, |
|
"eval_samples_per_second": 42.55, |
|
"eval_severity_accuracy": 0.715669014084507, |
|
"eval_severity_f1": 0.7144137344269402, |
|
"eval_severity_precision": 0.717618479930179, |
|
"eval_severity_recall": 0.715669014084507, |
|
"eval_steps_per_second": 2.659, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8802816901408451, |
|
"grad_norm": 4.491367816925049, |
|
"learning_rate": 4.4014084507042256e-05, |
|
"loss": 0.5228, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8802816901408451, |
|
"eval_action_accuracy": 0.8811619718309859, |
|
"eval_action_f1": 0.7652173913043478, |
|
"eval_action_precision": 0.8103130755064457, |
|
"eval_action_recall": 0.7248764415156508, |
|
"eval_combined_f1": 0.7303003562324298, |
|
"eval_loss": 1.0010697841644287, |
|
"eval_runtime": 53.3858, |
|
"eval_samples_per_second": 42.558, |
|
"eval_severity_accuracy": 0.6941021126760564, |
|
"eval_severity_f1": 0.6953833211605118, |
|
"eval_severity_precision": 0.7027972731452775, |
|
"eval_severity_recall": 0.6941021126760564, |
|
"eval_steps_per_second": 2.66, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.056338028169014, |
|
"grad_norm": 7.762014865875244, |
|
"learning_rate": 4.968701095461659e-05, |
|
"loss": 0.4707, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.056338028169014, |
|
"eval_action_accuracy": 0.8816021126760564, |
|
"eval_action_f1": 0.7497674418604651, |
|
"eval_action_precision": 0.8611111111111112, |
|
"eval_action_recall": 0.6639209225700164, |
|
"eval_combined_f1": 0.7288028162963264, |
|
"eval_loss": 1.05709969997406, |
|
"eval_runtime": 53.3852, |
|
"eval_samples_per_second": 42.559, |
|
"eval_severity_accuracy": 0.710387323943662, |
|
"eval_severity_f1": 0.7078381907321877, |
|
"eval_severity_precision": 0.7185583041734807, |
|
"eval_severity_recall": 0.710387323943662, |
|
"eval_steps_per_second": 2.66, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.232394366197183, |
|
"grad_norm": 9.271745681762695, |
|
"learning_rate": 4.870892018779343e-05, |
|
"loss": 0.4103, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.232394366197183, |
|
"eval_action_accuracy": 0.8503521126760564, |
|
"eval_action_f1": 0.621380846325167, |
|
"eval_action_precision": 0.9587628865979382, |
|
"eval_action_recall": 0.4596375617792422, |
|
"eval_combined_f1": 0.6340972974514396, |
|
"eval_loss": 1.1522221565246582, |
|
"eval_runtime": 53.3981, |
|
"eval_samples_per_second": 42.548, |
|
"eval_severity_accuracy": 0.6553697183098591, |
|
"eval_severity_f1": 0.6468137485777123, |
|
"eval_severity_precision": 0.7014511458318834, |
|
"eval_severity_recall": 0.6553697183098591, |
|
"eval_steps_per_second": 2.659, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.408450704225352, |
|
"grad_norm": 7.816728115081787, |
|
"learning_rate": 4.773082942097027e-05, |
|
"loss": 0.4, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.408450704225352, |
|
"eval_action_accuracy": 0.8820422535211268, |
|
"eval_action_f1": 0.7922480620155039, |
|
"eval_action_precision": 0.7481698389458272, |
|
"eval_action_recall": 0.841845140032949, |
|
"eval_combined_f1": 0.7584899763574804, |
|
"eval_loss": 0.9812976717948914, |
|
"eval_runtime": 53.3978, |
|
"eval_samples_per_second": 42.549, |
|
"eval_severity_accuracy": 0.7235915492957746, |
|
"eval_severity_f1": 0.724731890699457, |
|
"eval_severity_precision": 0.7317594548978289, |
|
"eval_severity_recall": 0.7235915492957746, |
|
"eval_steps_per_second": 2.659, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.584507042253521, |
|
"grad_norm": 7.098080158233643, |
|
"learning_rate": 4.675273865414711e-05, |
|
"loss": 0.4179, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.584507042253521, |
|
"eval_action_accuracy": 0.894806338028169, |
|
"eval_action_f1": 0.7879325643300799, |
|
"eval_action_precision": 0.8538461538461538, |
|
"eval_action_recall": 0.7314662273476112, |
|
"eval_combined_f1": 0.7270919018906684, |
|
"eval_loss": 1.177077054977417, |
|
"eval_runtime": 53.4076, |
|
"eval_samples_per_second": 42.541, |
|
"eval_severity_accuracy": 0.6822183098591549, |
|
"eval_severity_f1": 0.666251239451257, |
|
"eval_severity_precision": 0.7051808476907062, |
|
"eval_severity_recall": 0.6822183098591549, |
|
"eval_steps_per_second": 2.659, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.76056338028169, |
|
"grad_norm": 3.6362385749816895, |
|
"learning_rate": 4.577464788732395e-05, |
|
"loss": 0.381, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.76056338028169, |
|
"eval_action_accuracy": 0.8934859154929577, |
|
"eval_action_f1": 0.8, |
|
"eval_action_precision": 0.802653399668325, |
|
"eval_action_recall": 0.7973640856672158, |
|
"eval_combined_f1": 0.7276179625363967, |
|
"eval_loss": 1.0336343050003052, |
|
"eval_runtime": 53.3973, |
|
"eval_samples_per_second": 42.549, |
|
"eval_severity_accuracy": 0.6597711267605634, |
|
"eval_severity_f1": 0.6552359250727933, |
|
"eval_severity_precision": 0.690194073616287, |
|
"eval_severity_recall": 0.6597711267605634, |
|
"eval_steps_per_second": 2.659, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.936619718309859, |
|
"grad_norm": 4.175089359283447, |
|
"learning_rate": 4.479655712050079e-05, |
|
"loss": 0.384, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.936619718309859, |
|
"eval_action_accuracy": 0.8899647887323944, |
|
"eval_action_f1": 0.7987117552334944, |
|
"eval_action_precision": 0.7811023622047244, |
|
"eval_action_recall": 0.8171334431630972, |
|
"eval_combined_f1": 0.7613638710145674, |
|
"eval_loss": 0.9859868288040161, |
|
"eval_runtime": 53.3975, |
|
"eval_samples_per_second": 42.549, |
|
"eval_severity_accuracy": 0.7240316901408451, |
|
"eval_severity_f1": 0.7240159867956404, |
|
"eval_severity_precision": 0.7333723870533466, |
|
"eval_severity_recall": 0.7240316901408451, |
|
"eval_steps_per_second": 2.659, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.112676056338028, |
|
"grad_norm": 5.42644739151001, |
|
"learning_rate": 4.3818466353677626e-05, |
|
"loss": 0.287, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.112676056338028, |
|
"eval_action_accuracy": 0.8798415492957746, |
|
"eval_action_f1": 0.7272727272727273, |
|
"eval_action_precision": 0.9238578680203046, |
|
"eval_action_recall": 0.5996705107084019, |
|
"eval_combined_f1": 0.7194719198906718, |
|
"eval_loss": 1.2687214612960815, |
|
"eval_runtime": 53.3984, |
|
"eval_samples_per_second": 42.548, |
|
"eval_severity_accuracy": 0.7130281690140845, |
|
"eval_severity_f1": 0.7116711125086165, |
|
"eval_severity_precision": 0.7299090747607111, |
|
"eval_severity_recall": 0.7130281690140845, |
|
"eval_steps_per_second": 2.659, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.288732394366197, |
|
"grad_norm": 4.9831342697143555, |
|
"learning_rate": 4.284037558685447e-05, |
|
"loss": 0.234, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.288732394366197, |
|
"eval_action_accuracy": 0.8758802816901409, |
|
"eval_action_f1": 0.7904903417533432, |
|
"eval_action_precision": 0.7198917456021651, |
|
"eval_action_recall": 0.8764415156507414, |
|
"eval_combined_f1": 0.7408713797328776, |
|
"eval_loss": 1.2081983089447021, |
|
"eval_runtime": 53.3856, |
|
"eval_samples_per_second": 42.558, |
|
"eval_severity_accuracy": 0.6954225352112676, |
|
"eval_severity_f1": 0.691252417712412, |
|
"eval_severity_precision": 0.710940220005423, |
|
"eval_severity_recall": 0.6954225352112676, |
|
"eval_steps_per_second": 2.66, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.464788732394366, |
|
"grad_norm": 8.448158264160156, |
|
"learning_rate": 4.18622848200313e-05, |
|
"loss": 0.2284, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.464788732394366, |
|
"eval_action_accuracy": 0.9097711267605634, |
|
"eval_action_f1": 0.820333041191937, |
|
"eval_action_precision": 0.8764044943820225, |
|
"eval_action_recall": 0.771004942339374, |
|
"eval_combined_f1": 0.7709403023211041, |
|
"eval_loss": 1.1236168146133423, |
|
"eval_runtime": 53.3827, |
|
"eval_samples_per_second": 42.561, |
|
"eval_severity_accuracy": 0.7205105633802817, |
|
"eval_severity_f1": 0.7215475634502713, |
|
"eval_severity_precision": 0.7388563066943578, |
|
"eval_severity_recall": 0.7205105633802817, |
|
"eval_steps_per_second": 2.66, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.640845070422535, |
|
"grad_norm": 3.273317575454712, |
|
"learning_rate": 4.0884194053208144e-05, |
|
"loss": 0.2147, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.640845070422535, |
|
"eval_action_accuracy": 0.9066901408450704, |
|
"eval_action_f1": 0.8233333333333334, |
|
"eval_action_precision": 0.8330522765598651, |
|
"eval_action_recall": 0.8138385502471169, |
|
"eval_combined_f1": 0.7802089055740911, |
|
"eval_loss": 1.08566153049469, |
|
"eval_runtime": 53.3876, |
|
"eval_samples_per_second": 42.557, |
|
"eval_severity_accuracy": 0.7394366197183099, |
|
"eval_severity_f1": 0.7370844778148489, |
|
"eval_severity_precision": 0.7443892609781334, |
|
"eval_severity_recall": 0.7394366197183099, |
|
"eval_steps_per_second": 2.66, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.816901408450704, |
|
"grad_norm": 4.577557563781738, |
|
"learning_rate": 3.990610328638498e-05, |
|
"loss": 0.2258, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.816901408450704, |
|
"eval_action_accuracy": 0.8939260563380281, |
|
"eval_action_f1": 0.7872903795233892, |
|
"eval_action_precision": 0.8479087452471483, |
|
"eval_action_recall": 0.7347611202635914, |
|
"eval_combined_f1": 0.7659288423194548, |
|
"eval_loss": 1.0919924974441528, |
|
"eval_runtime": 53.4132, |
|
"eval_samples_per_second": 42.536, |
|
"eval_severity_accuracy": 0.7442781690140845, |
|
"eval_severity_f1": 0.7445673051155204, |
|
"eval_severity_precision": 0.7537282342243937, |
|
"eval_severity_recall": 0.7442781690140845, |
|
"eval_steps_per_second": 2.659, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.992957746478873, |
|
"grad_norm": 5.347113609313965, |
|
"learning_rate": 3.892801251956182e-05, |
|
"loss": 0.2055, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.992957746478873, |
|
"eval_action_accuracy": 0.90625, |
|
"eval_action_f1": 0.813321647677476, |
|
"eval_action_precision": 0.8689138576779026, |
|
"eval_action_recall": 0.7644151565074135, |
|
"eval_combined_f1": 0.7792135732932114, |
|
"eval_loss": 1.0456966161727905, |
|
"eval_runtime": 53.4219, |
|
"eval_samples_per_second": 42.529, |
|
"eval_severity_accuracy": 0.7447183098591549, |
|
"eval_severity_f1": 0.7451054989089468, |
|
"eval_severity_precision": 0.7459307596916318, |
|
"eval_severity_recall": 0.7447183098591549, |
|
"eval_steps_per_second": 2.658, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.169014084507042, |
|
"grad_norm": 8.061402320861816, |
|
"learning_rate": 3.7949921752738656e-05, |
|
"loss": 0.1559, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.169014084507042, |
|
"eval_action_accuracy": 0.9049295774647887, |
|
"eval_action_f1": 0.8036363636363636, |
|
"eval_action_precision": 0.896551724137931, |
|
"eval_action_recall": 0.728171334431631, |
|
"eval_combined_f1": 0.7705730204982484, |
|
"eval_loss": 1.3266334533691406, |
|
"eval_runtime": 53.3935, |
|
"eval_samples_per_second": 42.552, |
|
"eval_severity_accuracy": 0.7376760563380281, |
|
"eval_severity_f1": 0.7375096773601331, |
|
"eval_severity_precision": 0.7543615150494364, |
|
"eval_severity_recall": 0.7376760563380281, |
|
"eval_steps_per_second": 2.659, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.345070422535211, |
|
"grad_norm": 4.052455902099609, |
|
"learning_rate": 3.69718309859155e-05, |
|
"loss": 0.1174, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.345070422535211, |
|
"eval_action_accuracy": 0.8992077464788732, |
|
"eval_action_f1": 0.7893284268629255, |
|
"eval_action_precision": 0.89375, |
|
"eval_action_recall": 0.7067545304777595, |
|
"eval_combined_f1": 0.7628428370446565, |
|
"eval_loss": 1.4744161367416382, |
|
"eval_runtime": 53.3995, |
|
"eval_samples_per_second": 42.547, |
|
"eval_severity_accuracy": 0.7376760563380281, |
|
"eval_severity_f1": 0.7363572472263874, |
|
"eval_severity_precision": 0.7497424115719165, |
|
"eval_severity_recall": 0.7376760563380281, |
|
"eval_steps_per_second": 2.659, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.52112676056338, |
|
"grad_norm": 6.726060390472412, |
|
"learning_rate": 3.599374021909233e-05, |
|
"loss": 0.1325, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.52112676056338, |
|
"eval_action_accuracy": 0.9044894366197183, |
|
"eval_action_f1": 0.8021877848678214, |
|
"eval_action_precision": 0.8979591836734694, |
|
"eval_action_recall": 0.7248764415156508, |
|
"eval_combined_f1": 0.7739176088843763, |
|
"eval_loss": 1.592252492904663, |
|
"eval_runtime": 53.4078, |
|
"eval_samples_per_second": 42.541, |
|
"eval_severity_accuracy": 0.7460387323943662, |
|
"eval_severity_f1": 0.7456474329009313, |
|
"eval_severity_precision": 0.7468342652042663, |
|
"eval_severity_recall": 0.7460387323943662, |
|
"eval_steps_per_second": 2.659, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.697183098591549, |
|
"grad_norm": 1.6415315866470337, |
|
"learning_rate": 3.5015649452269174e-05, |
|
"loss": 0.1318, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.697183098591549, |
|
"eval_action_accuracy": 0.909330985915493, |
|
"eval_action_f1": 0.815742397137746, |
|
"eval_action_precision": 0.8923679060665362, |
|
"eval_action_recall": 0.7512355848434926, |
|
"eval_combined_f1": 0.7744664753736676, |
|
"eval_loss": 1.3085289001464844, |
|
"eval_runtime": 53.3948, |
|
"eval_samples_per_second": 42.551, |
|
"eval_severity_accuracy": 0.7345950704225352, |
|
"eval_severity_f1": 0.7331905536095894, |
|
"eval_severity_precision": 0.7463951981865823, |
|
"eval_severity_recall": 0.7345950704225352, |
|
"eval_steps_per_second": 2.659, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.873239436619718, |
|
"grad_norm": 1.168869137763977, |
|
"learning_rate": 3.4037558685446016e-05, |
|
"loss": 0.1303, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.873239436619718, |
|
"eval_action_accuracy": 0.9080105633802817, |
|
"eval_action_f1": 0.8271298593879239, |
|
"eval_action_precision": 0.8305647840531561, |
|
"eval_action_recall": 0.8237232289950577, |
|
"eval_combined_f1": 0.7891663886796052, |
|
"eval_loss": 1.2526799440383911, |
|
"eval_runtime": 53.3936, |
|
"eval_samples_per_second": 42.552, |
|
"eval_severity_accuracy": 0.7513204225352113, |
|
"eval_severity_f1": 0.7512029179712866, |
|
"eval_severity_precision": 0.755462312536582, |
|
"eval_severity_recall": 0.7513204225352113, |
|
"eval_steps_per_second": 2.659, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.049295774647887, |
|
"grad_norm": 1.3796656131744385, |
|
"learning_rate": 3.305946791862285e-05, |
|
"loss": 0.1226, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.049295774647887, |
|
"eval_action_accuracy": 0.9163732394366197, |
|
"eval_action_f1": 0.8403361344537815, |
|
"eval_action_precision": 0.8576329331046312, |
|
"eval_action_recall": 0.8237232289950577, |
|
"eval_combined_f1": 0.7939814504731293, |
|
"eval_loss": 1.3127936124801636, |
|
"eval_runtime": 53.3969, |
|
"eval_samples_per_second": 42.549, |
|
"eval_severity_accuracy": 0.7508802816901409, |
|
"eval_severity_f1": 0.747626766492477, |
|
"eval_severity_precision": 0.7590158294773308, |
|
"eval_severity_recall": 0.7508802816901409, |
|
"eval_steps_per_second": 2.659, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.225352112676056, |
|
"grad_norm": 4.387660503387451, |
|
"learning_rate": 3.208137715179969e-05, |
|
"loss": 0.0703, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.225352112676056, |
|
"eval_action_accuracy": 0.9119718309859155, |
|
"eval_action_f1": 0.8242530755711776, |
|
"eval_action_precision": 0.8832391713747646, |
|
"eval_action_recall": 0.7726523887973641, |
|
"eval_combined_f1": 0.7795952339344467, |
|
"eval_loss": 1.73722505569458, |
|
"eval_runtime": 53.3612, |
|
"eval_samples_per_second": 42.578, |
|
"eval_severity_accuracy": 0.7372359154929577, |
|
"eval_severity_f1": 0.7349373922977159, |
|
"eval_severity_precision": 0.7478231277482102, |
|
"eval_severity_recall": 0.7372359154929577, |
|
"eval_steps_per_second": 2.661, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.401408450704225, |
|
"grad_norm": 3.3632314205169678, |
|
"learning_rate": 3.110328638497653e-05, |
|
"loss": 0.0749, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.401408450704225, |
|
"eval_action_accuracy": 0.9058098591549296, |
|
"eval_action_f1": 0.8145580589254766, |
|
"eval_action_precision": 0.8592321755027422, |
|
"eval_action_recall": 0.7742998352553542, |
|
"eval_combined_f1": 0.7737646532726048, |
|
"eval_loss": 1.8485854864120483, |
|
"eval_runtime": 53.3448, |
|
"eval_samples_per_second": 42.591, |
|
"eval_severity_accuracy": 0.7319542253521126, |
|
"eval_severity_f1": 0.7329712476197329, |
|
"eval_severity_precision": 0.7371728830594799, |
|
"eval_severity_recall": 0.7319542253521126, |
|
"eval_steps_per_second": 2.662, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.577464788732394, |
|
"grad_norm": 4.060558319091797, |
|
"learning_rate": 3.0125195618153366e-05, |
|
"loss": 0.0776, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.577464788732394, |
|
"eval_action_accuracy": 0.8877640845070423, |
|
"eval_action_f1": 0.746772591857001, |
|
"eval_action_precision": 0.94, |
|
"eval_action_recall": 0.6194398682042833, |
|
"eval_combined_f1": 0.7451658251590665, |
|
"eval_loss": 2.1151413917541504, |
|
"eval_runtime": 53.345, |
|
"eval_samples_per_second": 42.591, |
|
"eval_severity_accuracy": 0.7438380281690141, |
|
"eval_severity_f1": 0.7435590584611321, |
|
"eval_severity_precision": 0.7627996939449937, |
|
"eval_severity_recall": 0.7438380281690141, |
|
"eval_steps_per_second": 2.662, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.753521126760563, |
|
"grad_norm": 1.934782862663269, |
|
"learning_rate": 2.9147104851330205e-05, |
|
"loss": 0.0842, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.753521126760563, |
|
"eval_action_accuracy": 0.9154929577464789, |
|
"eval_action_f1": 0.8449111470113085, |
|
"eval_action_precision": 0.8288431061806656, |
|
"eval_action_recall": 0.8616144975288303, |
|
"eval_combined_f1": 0.7964890047427877, |
|
"eval_loss": 1.6187307834625244, |
|
"eval_runtime": 53.3536, |
|
"eval_samples_per_second": 42.584, |
|
"eval_severity_accuracy": 0.7477992957746479, |
|
"eval_severity_f1": 0.748066862474267, |
|
"eval_severity_precision": 0.751069587929407, |
|
"eval_severity_recall": 0.7477992957746479, |
|
"eval_steps_per_second": 2.661, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.929577464788732, |
|
"grad_norm": 0.15578486025333405, |
|
"learning_rate": 2.8169014084507046e-05, |
|
"loss": 0.0718, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.929577464788732, |
|
"eval_action_accuracy": 0.9080105633802817, |
|
"eval_action_f1": 0.8125560538116592, |
|
"eval_action_precision": 0.8917322834645669, |
|
"eval_action_recall": 0.7462932454695222, |
|
"eval_combined_f1": 0.7831897220338088, |
|
"eval_loss": 1.7883614301681519, |
|
"eval_runtime": 53.3648, |
|
"eval_samples_per_second": 42.575, |
|
"eval_severity_accuracy": 0.7535211267605634, |
|
"eval_severity_f1": 0.7538233902559583, |
|
"eval_severity_precision": 0.7606986906314367, |
|
"eval_severity_recall": 0.7535211267605634, |
|
"eval_steps_per_second": 2.661, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.105633802816901, |
|
"grad_norm": 0.7947894930839539, |
|
"learning_rate": 2.719092331768388e-05, |
|
"loss": 0.0732, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.105633802816901, |
|
"eval_action_accuracy": 0.909330985915493, |
|
"eval_action_f1": 0.83003300330033, |
|
"eval_action_precision": 0.8314049586776859, |
|
"eval_action_recall": 0.828665568369028, |
|
"eval_combined_f1": 0.7889699480003767, |
|
"eval_loss": 1.63761305809021, |
|
"eval_runtime": 53.3715, |
|
"eval_samples_per_second": 42.57, |
|
"eval_severity_accuracy": 0.7473591549295775, |
|
"eval_severity_f1": 0.7479068927004233, |
|
"eval_severity_precision": 0.7576437021292298, |
|
"eval_severity_recall": 0.7473591549295775, |
|
"eval_steps_per_second": 2.661, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.28169014084507, |
|
"grad_norm": 6.216972827911377, |
|
"learning_rate": 2.6212832550860723e-05, |
|
"loss": 0.0433, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.28169014084507, |
|
"eval_action_accuracy": 0.9119718309859155, |
|
"eval_action_f1": 0.823943661971831, |
|
"eval_action_precision": 0.8846880907372401, |
|
"eval_action_recall": 0.771004942339374, |
|
"eval_combined_f1": 0.7916775895426225, |
|
"eval_loss": 2.1584179401397705, |
|
"eval_runtime": 53.3702, |
|
"eval_samples_per_second": 42.571, |
|
"eval_severity_accuracy": 0.7588028169014085, |
|
"eval_severity_f1": 0.759411517113414, |
|
"eval_severity_precision": 0.7667529788427636, |
|
"eval_severity_recall": 0.7588028169014085, |
|
"eval_steps_per_second": 2.661, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.457746478873239, |
|
"grad_norm": 7.994290351867676, |
|
"learning_rate": 2.5234741784037558e-05, |
|
"loss": 0.0437, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 5.457746478873239, |
|
"eval_action_accuracy": 0.9132922535211268, |
|
"eval_action_f1": 0.8365145228215768, |
|
"eval_action_precision": 0.842809364548495, |
|
"eval_action_recall": 0.8303130148270181, |
|
"eval_combined_f1": 0.7970826496584587, |
|
"eval_loss": 1.9302828311920166, |
|
"eval_runtime": 53.3908, |
|
"eval_samples_per_second": 42.554, |
|
"eval_severity_accuracy": 0.758362676056338, |
|
"eval_severity_f1": 0.7576507764953404, |
|
"eval_severity_precision": 0.7661980956912092, |
|
"eval_severity_recall": 0.758362676056338, |
|
"eval_steps_per_second": 2.66, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 5.633802816901408, |
|
"grad_norm": 0.0936056599020958, |
|
"learning_rate": 2.42566510172144e-05, |
|
"loss": 0.0329, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.633802816901408, |
|
"eval_action_accuracy": 0.9137323943661971, |
|
"eval_action_f1": 0.833616298811545, |
|
"eval_action_precision": 0.8598949211908932, |
|
"eval_action_recall": 0.8088962108731467, |
|
"eval_combined_f1": 0.7923034119567994, |
|
"eval_loss": 2.4584481716156006, |
|
"eval_runtime": 53.4118, |
|
"eval_samples_per_second": 42.537, |
|
"eval_severity_accuracy": 0.7522007042253521, |
|
"eval_severity_f1": 0.7509905251020539, |
|
"eval_severity_precision": 0.7646859410918823, |
|
"eval_severity_recall": 0.7522007042253521, |
|
"eval_steps_per_second": 2.659, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.809859154929578, |
|
"grad_norm": 0.029598630964756012, |
|
"learning_rate": 2.327856025039124e-05, |
|
"loss": 0.0566, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.809859154929578, |
|
"eval_action_accuracy": 0.9132922535211268, |
|
"eval_action_f1": 0.841000807102502, |
|
"eval_action_precision": 0.8243670886075949, |
|
"eval_action_recall": 0.8583196046128501, |
|
"eval_combined_f1": 0.8000965798413886, |
|
"eval_loss": 1.6451172828674316, |
|
"eval_runtime": 53.3855, |
|
"eval_samples_per_second": 42.558, |
|
"eval_severity_accuracy": 0.7588028169014085, |
|
"eval_severity_f1": 0.7591923525802752, |
|
"eval_severity_precision": 0.7658252855908393, |
|
"eval_severity_recall": 0.7588028169014085, |
|
"eval_steps_per_second": 2.66, |
|
"step": 3300 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 5680, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.581830484199424e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|