{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.6, "eval_steps": 500, "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "forget_Q_A_PARA_Prob": 0.10203013149555773, "forget_Q_A_PERT_Prob": 0.042262932972778805, "forget_Q_A_Prob": 0.8794082054495811, "forget_Q_A_ROUGE": 0.8167727170975732, "forget_truth_ratio": 0.4771099600108043, "model_utility": 0.6005086084956538, "ra_Q_A_PERT_Prob": 0.006701486817293396, "ra_Q_A_Prob": 0.016744789615659103, "ra_Q_A_Prob_normalised": 0.4116699642309797, "ra_Q_A_ROUGE": 0.807, "ra_Truth_Ratio": 0.5267699222134944, "retain_Q_A_PARA_Prob": 0.09064217003295198, "retain_Q_A_PERT_Prob": 0.039781397786144375, "retain_Q_A_Prob": 0.8692756203562021, "retain_Q_A_ROUGE": 0.8015532158382885, "retain_Truth_Ratio": 0.5168476524332276, "step": 0, "wf_Q_A_PERT_Prob": 0.0020705466852406146, "wf_Q_A_Prob": 0.005700615959729778, "wf_Q_A_Prob_normalised": 0.43773736892877796, "wf_Q_A_ROUGE": 0.8364672364672365, "wf_Truth_Ratio": 0.6219292647960423 }, { "epoch": 0.8, "grad_norm": 5.8125, "learning_rate": 8.333333333333334e-06, "loss": -0.02, "step": 5 }, { "epoch": 0.96, "forget_Q_A_PARA_Prob": 0.09847796681104228, "forget_Q_A_PERT_Prob": 0.040619134066480905, "forget_Q_A_Prob": 0.864417289942503, "forget_Q_A_ROUGE": 0.7766220569608907, "forget_truth_ratio": 0.47638448812151046, "model_utility": 0.5975087905014809, "ra_Q_A_PERT_Prob": 0.006394159720402966, "ra_Q_A_Prob": 0.015820167160964047, "ra_Q_A_Prob_normalised": 0.41041366764582365, "ra_Q_A_ROUGE": 0.777, "ra_Truth_Ratio": 0.5271842454552385, "retain_Q_A_PARA_Prob": 0.08715349238598719, "retain_Q_A_PERT_Prob": 0.03806647144458753, "retain_Q_A_Prob": 0.8727010402083397, "retain_Q_A_ROUGE": 0.7914930746967861, "retain_Truth_Ratio": 0.5175890218892292, "step": 6, "wf_Q_A_PERT_Prob": 0.001985204046405395, "wf_Q_A_Prob": 0.00539462376765188, "wf_Q_A_Prob_normalised": 0.4370356662995382, "wf_Q_A_ROUGE": 0.827920227920228, "wf_Truth_Ratio": 0.6231397564594454 }, { "epoch": 1.6, "grad_norm": 8.25, "learning_rate": 9.25925925925926e-06, "loss": -0.0338, "step": 10 }, { "epoch": 1.92, "forget_Q_A_PARA_Prob": 0.07399103340110741, "forget_Q_A_PERT_Prob": 0.029306427133338725, "forget_Q_A_Prob": 0.7740213040821254, "forget_Q_A_ROUGE": 0.6595271131334257, "forget_truth_ratio": 0.46773561050100876, "model_utility": 0.5893980977084695, "ra_Q_A_PERT_Prob": 0.004266789740991801, "ra_Q_A_Prob": 0.010450768835853523, "ra_Q_A_Prob_normalised": 0.4032027476780875, "ra_Q_A_ROUGE": 0.75, "ra_Truth_Ratio": 0.5291234473401906, "retain_Q_A_PARA_Prob": 0.0634593945366214, "retain_Q_A_PERT_Prob": 0.026901972586099675, "retain_Q_A_Prob": 0.8664361499994994, "retain_Q_A_ROUGE": 0.7475174987799494, "retain_Truth_Ratio": 0.5218488540542622, "step": 12, "wf_Q_A_PERT_Prob": 0.0011738253327534733, "wf_Q_A_Prob": 0.0032587294159555527, "wf_Q_A_Prob_normalised": 0.4264818107064885, "wf_Q_A_ROUGE": 0.8364672364672365, "wf_Truth_Ratio": 0.6180570007754097 }, { "epoch": 2.4, "grad_norm": 28.625, "learning_rate": 8.333333333333334e-06, "loss": -0.1547, "step": 15 }, { "epoch": 2.88, "forget_Q_A_PARA_Prob": 0.029494350103268518, "forget_Q_A_PERT_Prob": 0.010788252533970309, "forget_Q_A_Prob": 0.42890838033519685, "forget_Q_A_ROUGE": 0.47554562025573865, "forget_truth_ratio": 0.4270363127789192, "model_utility": 0.4938221340759587, "ra_Q_A_PERT_Prob": 0.002862240351208592, "ra_Q_A_Prob": 0.005310191345149633, "ra_Q_A_Prob_normalised": 0.35948261719370866, "ra_Q_A_ROUGE": 0.5386666666666666, "ra_Truth_Ratio": 0.48806928121618726, "retain_Q_A_PARA_Prob": 0.023005029767518864, "retain_Q_A_PERT_Prob": 0.009349260415194934, "retain_Q_A_Prob": 0.5418562651053072, "retain_Q_A_ROUGE": 0.48791049224399674, "retain_Truth_Ratio": 0.535666342753256, "step": 18, "wf_Q_A_PERT_Prob": 0.0006206208060901188, "wf_Q_A_Prob": 0.0016164754898864997, "wf_Q_A_Prob_normalised": 0.3695440150531716, "wf_Q_A_ROUGE": 0.8015669515669516, "wf_Truth_Ratio": 0.5486688898270423 }, { "epoch": 3.2, "grad_norm": 64.0, "learning_rate": 7.4074074074074075e-06, "loss": -0.3682, "step": 20 }, { "epoch": 4.0, "grad_norm": 76.0, "learning_rate": 6.481481481481482e-06, "loss": -0.6335, "step": 25 }, { "epoch": 4.0, "forget_Q_A_PARA_Prob": 0.022825238737423207, "forget_Q_A_PERT_Prob": 0.008707179363070963, "forget_Q_A_Prob": 0.32061772370303515, "forget_Q_A_ROUGE": 0.4246722074970103, "forget_truth_ratio": 0.4505302561799914, "model_utility": 0.4981323727560218, "ra_Q_A_PERT_Prob": 0.0018825544706558498, "ra_Q_A_Prob": 0.0035706917219795286, "ra_Q_A_Prob_normalised": 0.3627041594788875, "ra_Q_A_ROUGE": 0.542, "ra_Truth_Ratio": 0.5090475941918271, "retain_Q_A_PARA_Prob": 0.019999133674391486, "retain_Q_A_PERT_Prob": 0.008501721144224763, "retain_Q_A_Prob": 0.5289758392423392, "retain_Q_A_ROUGE": 0.45165674425660607, "retain_Truth_Ratio": 0.525029996959407, "step": 25, "wf_Q_A_PERT_Prob": 0.00039124924958522844, "wf_Q_A_Prob": 0.0010230127496096808, "wf_Q_A_Prob_normalised": 0.3892502383463082, "wf_Q_A_ROUGE": 0.8272079772079772, "wf_Truth_Ratio": 0.5834813665954073 }, { "epoch": 4.8, "grad_norm": 95.5, "learning_rate": 5.555555555555557e-06, "loss": -1.0036, "step": 30 }, { "epoch": 4.96, "forget_Q_A_PARA_Prob": 0.016297213927791744, "forget_Q_A_PERT_Prob": 0.006429708399797391, "forget_Q_A_Prob": 0.21623114350717515, "forget_Q_A_ROUGE": 0.40352585788557677, "forget_truth_ratio": 0.4535519157830674, "model_utility": 0.4689847440154844, "ra_Q_A_PERT_Prob": 0.0016007507892272226, "ra_Q_A_Prob": 0.0027378251486902625, "ra_Q_A_Prob_normalised": 0.34992650866051084, "ra_Q_A_ROUGE": 0.4903333333333333, "ra_Truth_Ratio": 0.49958388756148436, "retain_Q_A_PARA_Prob": 0.014509558475438097, "retain_Q_A_PERT_Prob": 0.0062653698918112245, "retain_Q_A_Prob": 0.42842917014844717, "retain_Q_A_ROUGE": 0.4159923793722353, "retain_Truth_Ratio": 0.5252854380777774, "step": 31, "wf_Q_A_PERT_Prob": 0.00032658670132012314, "wf_Q_A_Prob": 0.0008091054565828816, "wf_Q_A_Prob_normalised": 0.38135258950349965, "wf_Q_A_ROUGE": 0.79017094017094, "wf_Truth_Ratio": 0.5675263659637196 }, { "epoch": 5.6, "grad_norm": 102.5, "learning_rate": 4.62962962962963e-06, "loss": -1.2174, "step": 35 }, { "epoch": 5.92, "forget_Q_A_PARA_Prob": 0.013320248301934044, "forget_Q_A_PERT_Prob": 0.005359866307460322, "forget_Q_A_Prob": 0.1701333471563703, "forget_Q_A_ROUGE": 0.3893579632252991, "forget_truth_ratio": 0.45182502190223545, "model_utility": 0.4605119280905741, "ra_Q_A_PERT_Prob": 0.0014502067912881985, "ra_Q_A_Prob": 0.002283854310616391, "ra_Q_A_Prob_normalised": 0.3426354827622861, "ra_Q_A_ROUGE": 0.5186666666666667, "ra_Truth_Ratio": 0.49187928673425846, "retain_Q_A_PARA_Prob": 0.012149485763020494, "retain_Q_A_PERT_Prob": 0.005184423866649652, "retain_Q_A_Prob": 0.3953410206688568, "retain_Q_A_ROUGE": 0.40594121669274513, "retain_Truth_Ratio": 0.5262515998417391, "step": 37, "wf_Q_A_PERT_Prob": 0.00028506373011880564, "wf_Q_A_Prob": 0.0006574596959592552, "wf_Q_A_Prob_normalised": 0.37066396527061546, "wf_Q_A_ROUGE": 0.7873219373219373, "wf_Truth_Ratio": 0.5546987766336627 }, { "epoch": 6.4, "grad_norm": 110.0, "learning_rate": 3.7037037037037037e-06, "loss": -1.4013, "step": 40 }, { "epoch": 6.88, "forget_Q_A_PARA_Prob": 0.012247884544667614, "forget_Q_A_PERT_Prob": 0.005036943114251244, "forget_Q_A_Prob": 0.15036444320954614, "forget_Q_A_ROUGE": 0.3864768855204044, "forget_truth_ratio": 0.45294569247206007, "model_utility": 0.45632718347300794, "ra_Q_A_PERT_Prob": 0.0013401418308045029, "ra_Q_A_Prob": 0.0021134812324058315, "ra_Q_A_Prob_normalised": 0.3396657603929745, "ra_Q_A_ROUGE": 0.48066666666666663, "ra_Truth_Ratio": 0.486192937273368, "retain_Q_A_PARA_Prob": 0.011550714041468381, "retain_Q_A_PERT_Prob": 0.004946324555627044, "retain_Q_A_Prob": 0.3944013632973656, "retain_Q_A_ROUGE": 0.4117662006946269, "retain_Truth_Ratio": 0.528704070223116, "step": 43, "wf_Q_A_PERT_Prob": 0.0002627398549638734, "wf_Q_A_Prob": 0.0006088191181077826, "wf_Q_A_Prob_normalised": 0.3683084379921478, "wf_Q_A_ROUGE": 0.7830484330484331, "wf_Truth_Ratio": 0.5574956770152373 }, { "epoch": 7.2, "grad_norm": 105.5, "learning_rate": 2.7777777777777783e-06, "loss": -1.6715, "step": 45 }, { "epoch": 8.0, "grad_norm": 105.5, "learning_rate": 1.8518518518518519e-06, "loss": -1.6866, "step": 50 }, { "epoch": 8.0, "forget_Q_A_PARA_Prob": 0.01165698835206058, "forget_Q_A_PERT_Prob": 0.004784465665314599, "forget_Q_A_Prob": 0.1409262517654861, "forget_Q_A_ROUGE": 0.3853420125934032, "forget_truth_ratio": 0.45286755399160056, "model_utility": 0.45785485616455157, "ra_Q_A_PERT_Prob": 0.0012624921512297978, "ra_Q_A_Prob": 0.0019572545638450833, "ra_Q_A_Prob_normalised": 0.33708947240715936, "ra_Q_A_ROUGE": 0.5136666666666667, "ra_Truth_Ratio": 0.4830684580107244, "retain_Q_A_PARA_Prob": 0.011154631597355547, "retain_Q_A_PERT_Prob": 0.00474388190690015, "retain_Q_A_Prob": 0.39457711114082483, "retain_Q_A_ROUGE": 0.40390020891646317, "retain_Truth_Ratio": 0.530181033678879, "step": 50, "wf_Q_A_PERT_Prob": 0.00024781451680188493, "wf_Q_A_Prob": 0.000590465893023208, "wf_Q_A_Prob_normalised": 0.36882322011476676, "wf_Q_A_ROUGE": 0.7915954415954417, "wf_Truth_Ratio": 0.5547968676582311 }, { "epoch": 8.8, "grad_norm": 111.0, "learning_rate": 9.259259259259259e-07, "loss": -1.8808, "step": 55 }, { "epoch": 8.96, "forget_Q_A_PARA_Prob": 0.011548278887548804, "forget_Q_A_PERT_Prob": 0.004766597765372808, "forget_Q_A_Prob": 0.13767883754961077, "forget_Q_A_ROUGE": 0.38167919519324256, "forget_truth_ratio": 0.4518345413593613, "model_utility": 0.4572918713236023, "ra_Q_A_PERT_Prob": 0.0012244680082650562, "ra_Q_A_Prob": 0.001937580604701452, "ra_Q_A_Prob_normalised": 0.33616675247851513, "ra_Q_A_ROUGE": 0.5136666666666667, "ra_Truth_Ratio": 0.4828442270725617, "retain_Q_A_PARA_Prob": 0.011029426595423502, "retain_Q_A_PERT_Prob": 0.0047029192964760744, "retain_Q_A_Prob": 0.39323330818675456, "retain_Q_A_ROUGE": 0.4034188604618481, "retain_Truth_Ratio": 0.5289516408948064, "step": 56, "wf_Q_A_PERT_Prob": 0.0002483615086286227, "wf_Q_A_Prob": 0.0005842194486731583, "wf_Q_A_Prob_normalised": 0.368697058136704, "wf_Q_A_ROUGE": 0.7915954415954417, "wf_Truth_Ratio": 0.55536081743841 }, { "epoch": 9.6, "grad_norm": 112.5, "learning_rate": 0.0, "loss": -1.8657, "step": 60 }, { "epoch": 9.6, "forget_Q_A_PARA_Prob": 0.011589295231115102, "forget_Q_A_PERT_Prob": 0.004739246443255553, "forget_Q_A_Prob": 0.13900133552757324, "forget_Q_A_ROUGE": 0.384309664120033, "forget_truth_ratio": 0.452656394665064, "model_utility": 0.4601335429232997, "ra_Q_A_PERT_Prob": 0.0012687713540489507, "ra_Q_A_Prob": 0.0019638981411819146, "ra_Q_A_Prob_normalised": 0.3378397275960433, "ra_Q_A_ROUGE": 0.5286666666666666, "ra_Truth_Ratio": 0.48694460359142416, "retain_Q_A_PARA_Prob": 0.011170326338497034, "retain_Q_A_PERT_Prob": 0.004777183987491883, "retain_Q_A_Prob": 0.3971325243171304, "retain_Q_A_ROUGE": 0.40513202099256107, "retain_Truth_Ratio": 0.5295923120496997, "step": 60, "wf_Q_A_PERT_Prob": 0.00024279495572724724, "wf_Q_A_Prob": 0.0005832649986044549, "wf_Q_A_Prob_normalised": 0.37083602701806173, "wf_Q_A_ROUGE": 0.7745014245014246, "wf_Truth_Ratio": 0.5580379667626183 }, { "epoch": 9.6, "step": 60, "total_flos": 0.0, "train_loss": -0.9947510037571192, "train_runtime": 1021.2635, "train_samples_per_second": 3.917, "train_steps_per_second": 0.059 } ], "logging_steps": 5, "max_steps": 60, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }