| { | |
| "best_metric": 0.06431461870670319, | |
| "best_model_checkpoint": "./teapotllm/checkpoint-1855", | |
| "epoch": 42.0, | |
| "eval_steps": 500, | |
| "global_step": 11130, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "em_boolean": 0.11764705882352941, | |
| "em_extraction": 0.06666666666666667, | |
| "em_qa": 0.0, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 1.0, | |
| "mean_em": 0.0375, | |
| "mean_similarity": 0.3622433557640761, | |
| "mean_word_count_diff": 17.7125, | |
| "similarity_boolean": 0.18269754047779477, | |
| "similarity_extraction": 0.2946929598848025, | |
| "similarity_qa": 0.43210459053516387, | |
| "similarity_summarization": 0.6143242551220788, | |
| "similarity_unanswerable": 0.26092069496711096, | |
| "word_count_diff_boolean": 9.0, | |
| "word_count_diff_extraction": 7.666666666666667, | |
| "word_count_diff_qa": 6.933333333333334, | |
| "word_count_diff_summarization": 53.111111111111114, | |
| "word_count_diff_unanswerable": 5.933333333333334 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 101.51641082763672, | |
| "learning_rate": 1.59e-05, | |
| "loss": 40.3479, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 19.639965057373047, | |
| "eval_runtime": 1.7948, | |
| "eval_samples_per_second": 44.573, | |
| "eval_steps_per_second": 5.572, | |
| "step": 265 | |
| }, | |
| { | |
| "em_boolean": 0.058823529411764705, | |
| "em_extraction": 0.0, | |
| "em_qa": 0.0, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 2.0, | |
| "mean_em": 0.0125, | |
| "mean_similarity": 0.37655651543755086, | |
| "mean_word_count_diff": 17.1625, | |
| "similarity_boolean": 0.10583376435234267, | |
| "similarity_extraction": 0.21972424462437629, | |
| "similarity_qa": 0.5285325924555461, | |
| "similarity_summarization": 0.6654400361908807, | |
| "similarity_unanswerable": 0.34157160222530364, | |
| "word_count_diff_boolean": 10.176470588235293, | |
| "word_count_diff_extraction": 6.133333333333334, | |
| "word_count_diff_qa": 5.866666666666666, | |
| "word_count_diff_summarization": 52.166666666666664, | |
| "word_count_diff_unanswerable": 5.4 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.2927296459674835, | |
| "learning_rate": 2.9999590191302685e-05, | |
| "loss": 1.8688, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.08876434713602066, | |
| "eval_runtime": 1.7881, | |
| "eval_samples_per_second": 44.74, | |
| "eval_steps_per_second": 5.593, | |
| "step": 530 | |
| }, | |
| { | |
| "em_boolean": 0.29411764705882354, | |
| "em_extraction": 0.13333333333333333, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 3.0, | |
| "mean_em": 0.1, | |
| "mean_similarity": 0.468679612770211, | |
| "mean_word_count_diff": 16.8875, | |
| "similarity_boolean": 0.36136592250755606, | |
| "similarity_extraction": 0.3513699695467949, | |
| "similarity_qa": 0.5638724292318026, | |
| "similarity_summarization": 0.661304693048199, | |
| "similarity_unanswerable": 0.3812685254961252, | |
| "word_count_diff_boolean": 5.411764705882353, | |
| "word_count_diff_extraction": 8.066666666666666, | |
| "word_count_diff_qa": 5.6, | |
| "word_count_diff_summarization": 53.333333333333336, | |
| "word_count_diff_unanswerable": 6.266666666666667 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.2717432379722595, | |
| "learning_rate": 2.9960391039419217e-05, | |
| "loss": 0.0745, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.07186319679021835, | |
| "eval_runtime": 1.7817, | |
| "eval_samples_per_second": 44.901, | |
| "eval_steps_per_second": 5.613, | |
| "step": 795 | |
| }, | |
| { | |
| "em_boolean": 0.5294117647058824, | |
| "em_extraction": 0.0, | |
| "em_qa": 0.0, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 4.0, | |
| "mean_em": 0.1125, | |
| "mean_similarity": 0.5406636619823985, | |
| "mean_word_count_diff": 16.4, | |
| "similarity_boolean": 0.5694808649337467, | |
| "similarity_extraction": 0.2759806969513496, | |
| "similarity_qa": 0.5888164043426514, | |
| "similarity_summarization": 0.6777413653002845, | |
| "similarity_unanswerable": 0.5600411439935367, | |
| "word_count_diff_boolean": 3.411764705882353, | |
| "word_count_diff_extraction": 9.133333333333333, | |
| "word_count_diff_qa": 5.733333333333333, | |
| "word_count_diff_summarization": 52.611111111111114, | |
| "word_count_diff_unanswerable": 5.6 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.33120226860046387, | |
| "learning_rate": 2.985743020616636e-05, | |
| "loss": 0.0583, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.06767071783542633, | |
| "eval_runtime": 1.8031, | |
| "eval_samples_per_second": 44.368, | |
| "eval_steps_per_second": 5.546, | |
| "step": 1060 | |
| }, | |
| { | |
| "em_boolean": 0.35294117647058826, | |
| "em_extraction": 0.06666666666666667, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 5.0, | |
| "mean_em": 0.1, | |
| "mean_similarity": 0.5391882328083739, | |
| "mean_word_count_diff": 16.5375, | |
| "similarity_boolean": 0.4291324294446146, | |
| "similarity_extraction": 0.3934509038925171, | |
| "similarity_qa": 0.5968125452597935, | |
| "similarity_summarization": 0.6669783542553583, | |
| "similarity_unanswerable": 0.5986830140153567, | |
| "word_count_diff_boolean": 5.9411764705882355, | |
| "word_count_diff_extraction": 5.866666666666666, | |
| "word_count_diff_qa": 5.4, | |
| "word_count_diff_summarization": 53.77777777777778, | |
| "word_count_diff_unanswerable": 5.666666666666667 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.05014026165008545, | |
| "learning_rate": 2.969114651402049e-05, | |
| "loss": 0.0512, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.06544757634401321, | |
| "eval_runtime": 1.797, | |
| "eval_samples_per_second": 44.518, | |
| "eval_steps_per_second": 5.565, | |
| "step": 1325 | |
| }, | |
| { | |
| "em_boolean": 0.35294117647058826, | |
| "em_extraction": 0.13333333333333333, | |
| "em_qa": 0.0, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 6.0, | |
| "mean_em": 0.1, | |
| "mean_similarity": 0.5817075100494549, | |
| "mean_word_count_diff": 16.1375, | |
| "similarity_boolean": 0.3759182594059145, | |
| "similarity_extraction": 0.5534958879152934, | |
| "similarity_qa": 0.6812621881564458, | |
| "similarity_summarization": 0.661298368126154, | |
| "similarity_unanswerable": 0.648083241780599, | |
| "word_count_diff_boolean": 6.294117647058823, | |
| "word_count_diff_extraction": 4.466666666666667, | |
| "word_count_diff_qa": 5.666666666666667, | |
| "word_count_diff_summarization": 52.55555555555556, | |
| "word_count_diff_unanswerable": 5.733333333333333 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.37450432777404785, | |
| "learning_rate": 2.9462248669576738e-05, | |
| "loss": 0.0466, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.0656035840511322, | |
| "eval_runtime": 1.8115, | |
| "eval_samples_per_second": 44.161, | |
| "eval_steps_per_second": 5.52, | |
| "step": 1590 | |
| }, | |
| { | |
| "em_boolean": 0.29411764705882354, | |
| "em_extraction": 0.06666666666666667, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 7.0, | |
| "mean_em": 0.0875, | |
| "mean_similarity": 0.584748298721388, | |
| "mean_word_count_diff": 16.4875, | |
| "similarity_boolean": 0.3921016369234113, | |
| "similarity_extraction": 0.4258095269401868, | |
| "similarity_qa": 0.8010195712248485, | |
| "similarity_summarization": 0.7269510593679216, | |
| "similarity_unanswerable": 0.5751053685943286, | |
| "word_count_diff_boolean": 6.529411764705882, | |
| "word_count_diff_extraction": 7.4, | |
| "word_count_diff_qa": 4.266666666666667, | |
| "word_count_diff_summarization": 51.5, | |
| "word_count_diff_unanswerable": 7.066666666666666 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.2656797170639038, | |
| "learning_rate": 2.9171712243018076e-05, | |
| "loss": 0.043, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 0.06431461870670319, | |
| "eval_runtime": 1.779, | |
| "eval_samples_per_second": 44.968, | |
| "eval_steps_per_second": 5.621, | |
| "step": 1855 | |
| }, | |
| { | |
| "em_boolean": 0.23529411764705882, | |
| "em_extraction": 0.3333333333333333, | |
| "em_qa": 0.13333333333333333, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 8.0, | |
| "mean_em": 0.1375, | |
| "mean_similarity": 0.5707565736840479, | |
| "mean_word_count_diff": 15.9125, | |
| "similarity_boolean": 0.3595361856962828, | |
| "similarity_extraction": 0.6590617413322131, | |
| "similarity_qa": 0.6671084851026535, | |
| "similarity_summarization": 0.672764014866617, | |
| "similarity_unanswerable": 0.503073671584328, | |
| "word_count_diff_boolean": 6.705882352941177, | |
| "word_count_diff_extraction": 4.133333333333334, | |
| "word_count_diff_qa": 4.0, | |
| "word_count_diff_summarization": 51.833333333333336, | |
| "word_count_diff_unanswerable": 6.933333333333334 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.1501343548297882, | |
| "learning_rate": 2.882077551020291e-05, | |
| "loss": 0.0393, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.064610555768013, | |
| "eval_runtime": 1.7854, | |
| "eval_samples_per_second": 44.809, | |
| "eval_steps_per_second": 5.601, | |
| "step": 2120 | |
| }, | |
| { | |
| "em_boolean": 0.29411764705882354, | |
| "em_extraction": 0.13333333333333333, | |
| "em_qa": 0.0, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 9.0, | |
| "mean_em": 0.0875, | |
| "mean_similarity": 0.5716628619702533, | |
| "mean_word_count_diff": 16.075, | |
| "similarity_boolean": 0.46071117371320724, | |
| "similarity_extraction": 0.5175981452067693, | |
| "similarity_qa": 0.6631501737982035, | |
| "similarity_summarization": 0.6426751317663325, | |
| "similarity_unanswerable": 0.5747707898418108, | |
| "word_count_diff_boolean": 5.235294117647059, | |
| "word_count_diff_extraction": 7.333333333333333, | |
| "word_count_diff_qa": 5.6, | |
| "word_count_diff_summarization": 52.22222222222222, | |
| "word_count_diff_unanswerable": 4.2 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.23849624395370483, | |
| "learning_rate": 2.8410934175092426e-05, | |
| "loss": 0.0363, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.06528123468160629, | |
| "eval_runtime": 1.7964, | |
| "eval_samples_per_second": 44.533, | |
| "eval_steps_per_second": 5.567, | |
| "step": 2385 | |
| }, | |
| { | |
| "em_boolean": 0.4117647058823529, | |
| "em_extraction": 0.13333333333333333, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 10.0, | |
| "mean_em": 0.125, | |
| "mean_similarity": 0.5984957347856834, | |
| "mean_word_count_diff": 16.2375, | |
| "similarity_boolean": 0.4425226738128592, | |
| "similarity_extraction": 0.5075533439715704, | |
| "similarity_qa": 0.6813998910288016, | |
| "similarity_summarization": 0.6938931312825944, | |
| "similarity_unanswerable": 0.6688265626629194, | |
| "word_count_diff_boolean": 5.352941176470588, | |
| "word_count_diff_extraction": 6.466666666666667, | |
| "word_count_diff_qa": 6.2, | |
| "word_count_diff_summarization": 52.166666666666664, | |
| "word_count_diff_unanswerable": 5.266666666666667 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.2859129309654236, | |
| "learning_rate": 2.7943934995010845e-05, | |
| "loss": 0.0338, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.06541204452514648, | |
| "eval_runtime": 1.7939, | |
| "eval_samples_per_second": 44.595, | |
| "eval_steps_per_second": 5.574, | |
| "step": 2650 | |
| }, | |
| { | |
| "em_boolean": 0.35294117647058826, | |
| "em_extraction": 0.2, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 11.0, | |
| "mean_em": 0.125, | |
| "mean_similarity": 0.584741565072909, | |
| "mean_word_count_diff": 15.85, | |
| "similarity_boolean": 0.41579638640670213, | |
| "similarity_extraction": 0.575649573157231, | |
| "similarity_qa": 0.68668532371521, | |
| "similarity_summarization": 0.7162102411190668, | |
| "similarity_unanswerable": 0.5255985895792643, | |
| "word_count_diff_boolean": 5.0, | |
| "word_count_diff_extraction": 4.333333333333333, | |
| "word_count_diff_qa": 5.4, | |
| "word_count_diff_summarization": 51.72222222222222, | |
| "word_count_diff_unanswerable": 7.066666666666666 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 0.19708961248397827, | |
| "learning_rate": 2.7421768335907942e-05, | |
| "loss": 0.0314, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 0.06535650044679642, | |
| "eval_runtime": 1.7968, | |
| "eval_samples_per_second": 44.525, | |
| "eval_steps_per_second": 5.566, | |
| "step": 2915 | |
| }, | |
| { | |
| "em_boolean": 0.4117647058823529, | |
| "em_extraction": 0.13333333333333333, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 12.0, | |
| "mean_em": 0.125, | |
| "mean_similarity": 0.5985993921523913, | |
| "mean_word_count_diff": 15.65, | |
| "similarity_boolean": 0.49136487109696164, | |
| "similarity_extraction": 0.4834645986557007, | |
| "similarity_qa": 0.7439890225728353, | |
| "similarity_summarization": 0.7199151118596395, | |
| "similarity_unanswerable": 0.5442981487760942, | |
| "word_count_diff_boolean": 5.294117647058823, | |
| "word_count_diff_extraction": 6.466666666666667, | |
| "word_count_diff_qa": 5.0, | |
| "word_count_diff_summarization": 51.333333333333336, | |
| "word_count_diff_unanswerable": 4.4 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.32636699080467224, | |
| "learning_rate": 2.6846659689353532e-05, | |
| "loss": 0.0291, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 0.06654994189739227, | |
| "eval_runtime": 1.7794, | |
| "eval_samples_per_second": 44.96, | |
| "eval_steps_per_second": 5.62, | |
| "step": 3180 | |
| }, | |
| { | |
| "em_boolean": 0.29411764705882354, | |
| "em_extraction": 0.2, | |
| "em_qa": 0.13333333333333333, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 13.0, | |
| "mean_em": 0.125, | |
| "mean_similarity": 0.5608657638425939, | |
| "mean_word_count_diff": 15.4875, | |
| "similarity_boolean": 0.3726646105265793, | |
| "similarity_extraction": 0.4880456611514091, | |
| "similarity_qa": 0.6968116387724876, | |
| "similarity_summarization": 0.7003769092261791, | |
| "similarity_unanswerable": 0.5436212575683991, | |
| "word_count_diff_boolean": 6.470588235294118, | |
| "word_count_diff_extraction": 4.0, | |
| "word_count_diff_qa": 4.266666666666667, | |
| "word_count_diff_summarization": 50.22222222222222, | |
| "word_count_diff_unanswerable": 6.733333333333333 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 0.09956305474042892, | |
| "learning_rate": 2.622106018741882e-05, | |
| "loss": 0.0268, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 0.06867102533578873, | |
| "eval_runtime": 1.7923, | |
| "eval_samples_per_second": 44.635, | |
| "eval_steps_per_second": 5.579, | |
| "step": 3445 | |
| }, | |
| { | |
| "em_boolean": 0.4117647058823529, | |
| "em_extraction": 0.13333333333333333, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 14.0, | |
| "mean_em": 0.125, | |
| "mean_similarity": 0.6118219185853377, | |
| "mean_word_count_diff": 15.1, | |
| "similarity_boolean": 0.5559550164376988, | |
| "similarity_extraction": 0.4480490814894438, | |
| "similarity_qa": 0.8226341267426809, | |
| "similarity_summarization": 0.7094271017445458, | |
| "similarity_unanswerable": 0.5109721501668294, | |
| "word_count_diff_boolean": 3.235294117647059, | |
| "word_count_diff_extraction": 5.333333333333333, | |
| "word_count_diff_qa": 4.066666666666666, | |
| "word_count_diff_summarization": 51.333333333333336, | |
| "word_count_diff_unanswerable": 5.866666666666666 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.2182687222957611, | |
| "learning_rate": 2.554763615587042e-05, | |
| "loss": 0.0254, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 0.06904994696378708, | |
| "eval_runtime": 1.7971, | |
| "eval_samples_per_second": 44.517, | |
| "eval_steps_per_second": 5.565, | |
| "step": 3710 | |
| }, | |
| { | |
| "em_boolean": 0.17647058823529413, | |
| "em_extraction": 0.13333333333333333, | |
| "em_qa": 0.13333333333333333, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 15.0, | |
| "mean_em": 0.0875, | |
| "mean_similarity": 0.5906956384656951, | |
| "mean_word_count_diff": 15.8, | |
| "similarity_boolean": 0.30824649366824064, | |
| "similarity_extraction": 0.5498731901248296, | |
| "similarity_qa": 0.7911941662430764, | |
| "similarity_summarization": 0.7216936614778307, | |
| "similarity_unanswerable": 0.5939309621850649, | |
| "word_count_diff_boolean": 5.294117647058823, | |
| "word_count_diff_extraction": 5.533333333333333, | |
| "word_count_diff_qa": 4.933333333333334, | |
| "word_count_diff_summarization": 50.833333333333336, | |
| "word_count_diff_unanswerable": 6.8 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.278495728969574, | |
| "learning_rate": 2.4829257750201806e-05, | |
| "loss": 0.0235, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 0.07152120769023895, | |
| "eval_runtime": 1.8045, | |
| "eval_samples_per_second": 44.334, | |
| "eval_steps_per_second": 5.542, | |
| "step": 3975 | |
| }, | |
| { | |
| "em_boolean": 0.35294117647058826, | |
| "em_extraction": 0.13333333333333333, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 16.0, | |
| "mean_em": 0.1125, | |
| "mean_similarity": 0.6044320391491056, | |
| "mean_word_count_diff": 15.3625, | |
| "similarity_boolean": 0.4709787254824358, | |
| "similarity_extraction": 0.48890374253193536, | |
| "similarity_qa": 0.7086231042941411, | |
| "similarity_summarization": 0.6860101450648572, | |
| "similarity_unanswerable": 0.6691226323445638, | |
| "word_count_diff_boolean": 5.176470588235294, | |
| "word_count_diff_extraction": 4.133333333333334, | |
| "word_count_diff_qa": 5.8, | |
| "word_count_diff_summarization": 51.388888888888886, | |
| "word_count_diff_unanswerable": 4.466666666666667 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 0.12528866529464722, | |
| "learning_rate": 2.4068986722935625e-05, | |
| "loss": 0.0219, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 0.07122211903333664, | |
| "eval_runtime": 1.7912, | |
| "eval_samples_per_second": 44.663, | |
| "eval_steps_per_second": 5.583, | |
| "step": 4240 | |
| }, | |
| { | |
| "em_boolean": 0.23529411764705882, | |
| "em_extraction": 0.2, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 17.0, | |
| "mean_em": 0.1, | |
| "mean_similarity": 0.5854583959793672, | |
| "mean_word_count_diff": 16.1125, | |
| "similarity_boolean": 0.3930098557954325, | |
| "similarity_extraction": 0.5295473781724771, | |
| "similarity_qa": 0.7186618636051814, | |
| "similarity_summarization": 0.6619544287936555, | |
| "similarity_unanswerable": 0.6344790523250897, | |
| "word_count_diff_boolean": 5.764705882352941, | |
| "word_count_diff_extraction": 5.066666666666666, | |
| "word_count_diff_qa": 6.066666666666666, | |
| "word_count_diff_summarization": 52.22222222222222, | |
| "word_count_diff_unanswerable": 5.6 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 0.18347615003585815, | |
| "learning_rate": 2.327006337433323e-05, | |
| "loss": 0.0208, | |
| "step": 4505 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 0.07514352351427078, | |
| "eval_runtime": 1.7952, | |
| "eval_samples_per_second": 44.563, | |
| "eval_steps_per_second": 5.57, | |
| "step": 4505 | |
| }, | |
| { | |
| "em_boolean": 0.35294117647058826, | |
| "em_extraction": 0.13333333333333333, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 18.0, | |
| "mean_em": 0.1125, | |
| "mean_similarity": 0.6148898253683001, | |
| "mean_word_count_diff": 15.95, | |
| "similarity_boolean": 0.45182113108389516, | |
| "similarity_extraction": 0.5246008743842443, | |
| "similarity_qa": 0.7161807785431544, | |
| "similarity_summarization": 0.695978145632479, | |
| "similarity_unanswerable": 0.6913930257161458, | |
| "word_count_diff_boolean": 6.705882352941177, | |
| "word_count_diff_extraction": 5.333333333333333, | |
| "word_count_diff_qa": 5.133333333333334, | |
| "word_count_diff_summarization": 51.72222222222222, | |
| "word_count_diff_unanswerable": 4.933333333333334 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.39281344413757324, | |
| "learning_rate": 2.2435892742127786e-05, | |
| "loss": 0.0197, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 0.07489313185214996, | |
| "eval_runtime": 1.7966, | |
| "eval_samples_per_second": 44.528, | |
| "eval_steps_per_second": 5.566, | |
| "step": 4770 | |
| }, | |
| { | |
| "em_boolean": 0.35294117647058826, | |
| "em_extraction": 0.13333333333333333, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 19.0, | |
| "mean_em": 0.1125, | |
| "mean_similarity": 0.6110048872418702, | |
| "mean_word_count_diff": 15.175, | |
| "similarity_boolean": 0.4688412766246235, | |
| "similarity_extraction": 0.5354909280935923, | |
| "similarity_qa": 0.7334268649419149, | |
| "similarity_summarization": 0.6523812972009182, | |
| "similarity_unanswerable": 0.6755639354387919, | |
| "word_count_diff_boolean": 5.411764705882353, | |
| "word_count_diff_extraction": 3.6666666666666665, | |
| "word_count_diff_qa": 5.333333333333333, | |
| "word_count_diff_summarization": 51.27777777777778, | |
| "word_count_diff_unanswerable": 4.266666666666667 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.26503777503967285, | |
| "learning_rate": 2.1570030089140857e-05, | |
| "loss": 0.0184, | |
| "step": 5035 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 0.07855142652988434, | |
| "eval_runtime": 1.7852, | |
| "eval_samples_per_second": 44.813, | |
| "eval_steps_per_second": 5.602, | |
| "step": 5035 | |
| }, | |
| { | |
| "em_boolean": 0.058823529411764705, | |
| "em_extraction": 0.26666666666666666, | |
| "em_qa": 0.0, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 20.0, | |
| "mean_em": 0.0625, | |
| "mean_similarity": 0.5894458804745227, | |
| "mean_word_count_diff": 15.7625, | |
| "similarity_boolean": 0.3066763941417722, | |
| "similarity_extraction": 0.567964817583561, | |
| "similarity_qa": 0.7356137126684189, | |
| "similarity_summarization": 0.7311095827155643, | |
| "similarity_unanswerable": 0.615234752992789, | |
| "word_count_diff_boolean": 7.0588235294117645, | |
| "word_count_diff_extraction": 4.133333333333334, | |
| "word_count_diff_qa": 6.4, | |
| "word_count_diff_summarization": 50.833333333333336, | |
| "word_count_diff_unanswerable": 4.533333333333333 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.325967013835907, | |
| "learning_rate": 2.0676165750634656e-05, | |
| "loss": 0.0173, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 0.07865440845489502, | |
| "eval_runtime": 1.7895, | |
| "eval_samples_per_second": 44.705, | |
| "eval_steps_per_second": 5.588, | |
| "step": 5300 | |
| }, | |
| { | |
| "em_boolean": 0.5294117647058824, | |
| "em_extraction": 0.26666666666666666, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 21.0, | |
| "mean_em": 0.175, | |
| "mean_similarity": 0.6464177187765017, | |
| "mean_word_count_diff": 14.8375, | |
| "similarity_boolean": 0.6461278642801678, | |
| "similarity_extraction": 0.6178931772708893, | |
| "similarity_qa": 0.7566967884699504, | |
| "similarity_summarization": 0.6613570315142473, | |
| "similarity_unanswerable": 0.5470645170658827, | |
| "word_count_diff_boolean": 3.176470588235294, | |
| "word_count_diff_extraction": 5.4, | |
| "word_count_diff_qa": 4.6, | |
| "word_count_diff_summarization": 50.5, | |
| "word_count_diff_unanswerable": 4.933333333333334 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "grad_norm": 0.18580074608325958, | |
| "learning_rate": 1.975810940598114e-05, | |
| "loss": 0.0162, | |
| "step": 5565 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 0.08283738046884537, | |
| "eval_runtime": 1.8062, | |
| "eval_samples_per_second": 44.292, | |
| "eval_steps_per_second": 5.537, | |
| "step": 5565 | |
| }, | |
| { | |
| "em_boolean": 0.47058823529411764, | |
| "em_extraction": 0.26666666666666666, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 22.0, | |
| "mean_em": 0.1625, | |
| "mean_similarity": 0.6508607531315647, | |
| "mean_word_count_diff": 15.1875, | |
| "similarity_boolean": 0.5319088667302447, | |
| "similarity_extraction": 0.6322630804032088, | |
| "similarity_qa": 0.7358093798160553, | |
| "similarity_summarization": 0.6958086478213469, | |
| "similarity_unanswerable": 0.6653844634691874, | |
| "word_count_diff_boolean": 5.352941176470588, | |
| "word_count_diff_extraction": 3.933333333333333, | |
| "word_count_diff_qa": 5.266666666666667, | |
| "word_count_diff_summarization": 51.166666666666664, | |
| "word_count_diff_unanswerable": 4.333333333333333 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 0.7463592886924744, | |
| "learning_rate": 1.881977384168254e-05, | |
| "loss": 0.0158, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 0.08268458396196365, | |
| "eval_runtime": 1.7886, | |
| "eval_samples_per_second": 44.728, | |
| "eval_steps_per_second": 5.591, | |
| "step": 5830 | |
| }, | |
| { | |
| "em_boolean": 0.4117647058823529, | |
| "em_extraction": 0.2, | |
| "em_qa": 0.13333333333333333, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 23.0, | |
| "mean_em": 0.15, | |
| "mean_similarity": 0.6376353252329864, | |
| "mean_word_count_diff": 15.5625, | |
| "similarity_boolean": 0.5682552300722283, | |
| "similarity_extraction": 0.5148679026712973, | |
| "similarity_qa": 0.7483514944712321, | |
| "similarity_summarization": 0.7200961951166391, | |
| "similarity_unanswerable": 0.629364309211572, | |
| "word_count_diff_boolean": 3.823529411764706, | |
| "word_count_diff_extraction": 5.266666666666667, | |
| "word_count_diff_qa": 4.933333333333334, | |
| "word_count_diff_summarization": 51.388888888888886, | |
| "word_count_diff_unanswerable": 6.8 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "grad_norm": 0.30431216955184937, | |
| "learning_rate": 1.7865158274945846e-05, | |
| "loss": 0.0149, | |
| "step": 6095 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 0.08500902354717255, | |
| "eval_runtime": 1.8006, | |
| "eval_samples_per_second": 44.43, | |
| "eval_steps_per_second": 5.554, | |
| "step": 6095 | |
| }, | |
| { | |
| "em_boolean": 0.35294117647058826, | |
| "em_extraction": 0.2, | |
| "em_qa": 0.0, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 24.0, | |
| "mean_em": 0.1125, | |
| "mean_similarity": 0.6209718830883503, | |
| "mean_word_count_diff": 15.625, | |
| "similarity_boolean": 0.4452236132586704, | |
| "similarity_extraction": 0.5458283141255379, | |
| "similarity_qa": 0.7422831366459529, | |
| "similarity_summarization": 0.7023132857349184, | |
| "similarity_unanswerable": 0.6763758877913157, | |
| "word_count_diff_boolean": 5.470588235294118, | |
| "word_count_diff_extraction": 5.6, | |
| "word_count_diff_qa": 5.533333333333333, | |
| "word_count_diff_summarization": 51.611111111111114, | |
| "word_count_diff_unanswerable": 4.066666666666666 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 0.1550023853778839, | |
| "learning_rate": 1.6898331308886863e-05, | |
| "loss": 0.0138, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 0.08557449281215668, | |
| "eval_runtime": 1.7865, | |
| "eval_samples_per_second": 44.78, | |
| "eval_steps_per_second": 5.598, | |
| "step": 6360 | |
| }, | |
| { | |
| "em_boolean": 0.4117647058823529, | |
| "em_extraction": 0.26666666666666666, | |
| "em_qa": 0.13333333333333333, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 25.0, | |
| "mean_em": 0.1625, | |
| "mean_similarity": 0.6411203743191436, | |
| "mean_word_count_diff": 15.1875, | |
| "similarity_boolean": 0.5644924241830321, | |
| "similarity_extraction": 0.5594843897968531, | |
| "similarity_qa": 0.7792828718821208, | |
| "similarity_summarization": 0.6979754573355118, | |
| "similarity_unanswerable": 0.6032127718130748, | |
| "word_count_diff_boolean": 3.7058823529411766, | |
| "word_count_diff_extraction": 4.533333333333333, | |
| "word_count_diff_qa": 4.933333333333334, | |
| "word_count_diff_summarization": 51.833333333333336, | |
| "word_count_diff_unanswerable": 5.133333333333334 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 0.23536083102226257, | |
| "learning_rate": 1.5923413592009145e-05, | |
| "loss": 0.0132, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 0.0886365994811058, | |
| "eval_runtime": 1.7918, | |
| "eval_samples_per_second": 44.648, | |
| "eval_steps_per_second": 5.581, | |
| "step": 6625 | |
| }, | |
| { | |
| "em_boolean": 0.29411764705882354, | |
| "em_extraction": 0.13333333333333333, | |
| "em_qa": 0.0, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 26.0, | |
| "mean_em": 0.0875, | |
| "mean_similarity": 0.5999299102462828, | |
| "mean_word_count_diff": 15.6625, | |
| "similarity_boolean": 0.44055906716076765, | |
| "similarity_extraction": 0.4907683741301298, | |
| "similarity_qa": 0.64702517837286, | |
| "similarity_summarization": 0.7336946874856949, | |
| "similarity_unanswerable": 0.6820987343788147, | |
| "word_count_diff_boolean": 5.9411764705882355, | |
| "word_count_diff_extraction": 5.466666666666667, | |
| "word_count_diff_qa": 7.066666666666666, | |
| "word_count_diff_summarization": 50.27777777777778, | |
| "word_count_diff_unanswerable": 3.933333333333333 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "grad_norm": 0.0521920807659626, | |
| "learning_rate": 1.4944560255863805e-05, | |
| "loss": 0.0127, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 0.09115185588598251, | |
| "eval_runtime": 1.7871, | |
| "eval_samples_per_second": 44.764, | |
| "eval_steps_per_second": 5.596, | |
| "step": 6890 | |
| }, | |
| { | |
| "em_boolean": 0.29411764705882354, | |
| "em_extraction": 0.2, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 27.0, | |
| "mean_em": 0.1125, | |
| "mean_similarity": 0.5961971989367157, | |
| "mean_word_count_diff": 15.9, | |
| "similarity_boolean": 0.47167208264855776, | |
| "similarity_extraction": 0.563706802825133, | |
| "similarity_qa": 0.6419207287331422, | |
| "similarity_summarization": 0.6927380098236932, | |
| "similarity_unanswerable": 0.608243557314078, | |
| "word_count_diff_boolean": 5.0, | |
| "word_count_diff_extraction": 4.933333333333334, | |
| "word_count_diff_qa": 6.0, | |
| "word_count_diff_summarization": 52.0, | |
| "word_count_diff_unanswerable": 5.8 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "grad_norm": 0.23173846304416656, | |
| "learning_rate": 1.396594320574144e-05, | |
| "loss": 0.0121, | |
| "step": 7155 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_loss": 0.09045173972845078, | |
| "eval_runtime": 1.7961, | |
| "eval_samples_per_second": 44.541, | |
| "eval_steps_per_second": 5.568, | |
| "step": 7155 | |
| }, | |
| { | |
| "em_boolean": 0.5294117647058824, | |
| "em_extraction": 0.13333333333333333, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 28.0, | |
| "mean_em": 0.15, | |
| "mean_similarity": 0.6426682359306142, | |
| "mean_word_count_diff": 15.4625, | |
| "similarity_boolean": 0.6154902246506775, | |
| "similarity_extraction": 0.559150509784619, | |
| "similarity_qa": 0.7585609555244446, | |
| "similarity_summarization": 0.7132770286666023, | |
| "similarity_unanswerable": 0.5563644373168548, | |
| "word_count_diff_boolean": 3.9411764705882355, | |
| "word_count_diff_extraction": 5.933333333333334, | |
| "word_count_diff_qa": 5.4, | |
| "word_count_diff_summarization": 51.94444444444444, | |
| "word_count_diff_unanswerable": 4.333333333333333 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 0.15653730928897858, | |
| "learning_rate": 1.2991733339873795e-05, | |
| "loss": 0.0114, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_loss": 0.09521345049142838, | |
| "eval_runtime": 1.7969, | |
| "eval_samples_per_second": 44.521, | |
| "eval_steps_per_second": 5.565, | |
| "step": 7420 | |
| }, | |
| { | |
| "em_boolean": 0.5882352941176471, | |
| "em_extraction": 0.2, | |
| "em_qa": 0.0, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 29.0, | |
| "mean_em": 0.1625, | |
| "mean_similarity": 0.670323112141341, | |
| "mean_word_count_diff": 14.8125, | |
| "similarity_boolean": 0.6801450522068668, | |
| "similarity_extraction": 0.5754425642391046, | |
| "similarity_qa": 0.7391265859206517, | |
| "similarity_summarization": 0.7395255797439151, | |
| "similarity_unanswerable": 0.6022256930669149, | |
| "word_count_diff_boolean": 2.588235294117647, | |
| "word_count_diff_extraction": 4.0, | |
| "word_count_diff_qa": 5.666666666666667, | |
| "word_count_diff_summarization": 50.333333333333336, | |
| "word_count_diff_unanswerable": 6.0 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "grad_norm": 0.13572590053081512, | |
| "learning_rate": 1.2026082772927574e-05, | |
| "loss": 0.0112, | |
| "step": 7685 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_loss": 0.09561847150325775, | |
| "eval_runtime": 1.7863, | |
| "eval_samples_per_second": 44.786, | |
| "eval_steps_per_second": 5.598, | |
| "step": 7685 | |
| }, | |
| { | |
| "em_boolean": 0.47058823529411764, | |
| "em_extraction": 0.26666666666666666, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 30.0, | |
| "mean_em": 0.1625, | |
| "mean_similarity": 0.6361284551327117, | |
| "mean_word_count_diff": 15.1625, | |
| "similarity_boolean": 0.5678887126748177, | |
| "similarity_extraction": 0.528444591164589, | |
| "similarity_qa": 0.7746080329020818, | |
| "similarity_summarization": 0.7153815428415934, | |
| "similarity_unanswerable": 0.5875674108664195, | |
| "word_count_diff_boolean": 3.6470588235294117, | |
| "word_count_diff_extraction": 4.2, | |
| "word_count_diff_qa": 5.666666666666667, | |
| "word_count_diff_summarization": 51.05555555555556, | |
| "word_count_diff_unanswerable": 5.6 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 0.11106608062982559, | |
| "learning_rate": 1.1073107139554395e-05, | |
| "loss": 0.0111, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_loss": 0.09475980699062347, | |
| "eval_runtime": 1.8027, | |
| "eval_samples_per_second": 44.379, | |
| "eval_steps_per_second": 5.547, | |
| "step": 7950 | |
| }, | |
| { | |
| "em_boolean": 0.47058823529411764, | |
| "em_extraction": 0.0, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 31.0, | |
| "mean_em": 0.1125, | |
| "mean_similarity": 0.5996635487768799, | |
| "mean_word_count_diff": 15.9625, | |
| "similarity_boolean": 0.5703230380792829, | |
| "similarity_extraction": 0.43614052472015225, | |
| "similarity_qa": 0.6746748656034469, | |
| "similarity_summarization": 0.6909497512711419, | |
| "similarity_unanswerable": 0.6118843918045361, | |
| "word_count_diff_boolean": 4.176470588235294, | |
| "word_count_diff_extraction": 6.933333333333334, | |
| "word_count_diff_qa": 6.466666666666667, | |
| "word_count_diff_summarization": 51.888888888888886, | |
| "word_count_diff_unanswerable": 4.733333333333333 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "grad_norm": 0.15586383640766144, | |
| "learning_rate": 1.0136868053419842e-05, | |
| "loss": 0.0105, | |
| "step": 8215 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_loss": 0.09651915729045868, | |
| "eval_runtime": 1.7878, | |
| "eval_samples_per_second": 44.749, | |
| "eval_steps_per_second": 5.594, | |
| "step": 8215 | |
| }, | |
| { | |
| "em_boolean": 0.35294117647058826, | |
| "em_extraction": 0.26666666666666666, | |
| "em_qa": 0.0, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 32.0, | |
| "mean_em": 0.125, | |
| "mean_similarity": 0.6647563714534044, | |
| "mean_word_count_diff": 15.3125, | |
| "similarity_boolean": 0.4992627663647427, | |
| "similarity_extraction": 0.6371853078405062, | |
| "similarity_qa": 0.7779242674509684, | |
| "similarity_summarization": 0.7545208003785875, | |
| "similarity_unanswerable": 0.6590016434590021, | |
| "word_count_diff_boolean": 4.352941176470588, | |
| "word_count_diff_extraction": 3.6666666666666665, | |
| "word_count_diff_qa": 7.133333333333334, | |
| "word_count_diff_summarization": 52.22222222222222, | |
| "word_count_diff_unanswerable": 3.2666666666666666 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "grad_norm": 0.15895779430866241, | |
| "learning_rate": 9.22135579647165e-06, | |
| "loss": 0.0103, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_loss": 0.09846033155918121, | |
| "eval_runtime": 1.7981, | |
| "eval_samples_per_second": 44.492, | |
| "eval_steps_per_second": 5.562, | |
| "step": 8480 | |
| }, | |
| { | |
| "em_boolean": 0.35294117647058826, | |
| "em_extraction": 0.13333333333333333, | |
| "em_qa": 0.0, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 33.0, | |
| "mean_em": 0.1, | |
| "mean_similarity": 0.6497376295737922, | |
| "mean_word_count_diff": 15.2, | |
| "similarity_boolean": 0.4850216718281017, | |
| "similarity_extraction": 0.6169686233003934, | |
| "similarity_qa": 0.7874227881431579, | |
| "similarity_summarization": 0.720199970735444, | |
| "similarity_unanswerable": 0.6469447533289592, | |
| "word_count_diff_boolean": 5.529411764705882, | |
| "word_count_diff_extraction": 4.0, | |
| "word_count_diff_qa": 5.466666666666667, | |
| "word_count_diff_summarization": 50.72222222222222, | |
| "word_count_diff_unanswerable": 4.466666666666667 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "grad_norm": 0.16729232668876648, | |
| "learning_rate": 8.330472312226091e-06, | |
| "loss": 0.0099, | |
| "step": 8745 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_loss": 0.09990086406469345, | |
| "eval_runtime": 1.7757, | |
| "eval_samples_per_second": 45.052, | |
| "eval_steps_per_second": 5.631, | |
| "step": 8745 | |
| }, | |
| { | |
| "em_boolean": 0.4117647058823529, | |
| "em_extraction": 0.2, | |
| "em_qa": 0.0, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 34.0, | |
| "mean_em": 0.125, | |
| "mean_similarity": 0.6532467395067215, | |
| "mean_word_count_diff": 15.3375, | |
| "similarity_boolean": 0.5624497304944431, | |
| "similarity_extraction": 0.6123799696564675, | |
| "similarity_qa": 0.7896470129489899, | |
| "similarity_summarization": 0.6943622730258439, | |
| "similarity_unanswerable": 0.6112778725723426, | |
| "word_count_diff_boolean": 4.588235294117647, | |
| "word_count_diff_extraction": 5.266666666666667, | |
| "word_count_diff_qa": 5.933333333333334, | |
| "word_count_diff_summarization": 50.666666666666664, | |
| "word_count_diff_unanswerable": 4.6 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "grad_norm": 0.3746591806411743, | |
| "learning_rate": 7.468014575555688e-06, | |
| "loss": 0.0096, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_loss": 0.10108506679534912, | |
| "eval_runtime": 1.7759, | |
| "eval_samples_per_second": 45.048, | |
| "eval_steps_per_second": 5.631, | |
| "step": 9010 | |
| }, | |
| { | |
| "em_boolean": 0.35294117647058826, | |
| "em_extraction": 0.13333333333333333, | |
| "em_qa": 0.13333333333333333, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 35.0, | |
| "mean_em": 0.125, | |
| "mean_similarity": 0.6299693588167429, | |
| "mean_word_count_diff": 15.35, | |
| "similarity_boolean": 0.5166072974748471, | |
| "similarity_extraction": 0.55517836039265, | |
| "similarity_qa": 0.7496427396933237, | |
| "similarity_summarization": 0.7208915799856186, | |
| "similarity_unanswerable": 0.6044573138157526, | |
| "word_count_diff_boolean": 4.352941176470588, | |
| "word_count_diff_extraction": 5.266666666666667, | |
| "word_count_diff_qa": 4.8, | |
| "word_count_diff_summarization": 51.388888888888886, | |
| "word_count_diff_unanswerable": 5.2 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "grad_norm": 0.310463547706604, | |
| "learning_rate": 6.637658409856717e-06, | |
| "loss": 0.0096, | |
| "step": 9275 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_loss": 0.10128389298915863, | |
| "eval_runtime": 1.7949, | |
| "eval_samples_per_second": 44.571, | |
| "eval_steps_per_second": 5.571, | |
| "step": 9275 | |
| }, | |
| { | |
| "em_boolean": 0.5294117647058824, | |
| "em_extraction": 0.2, | |
| "em_qa": 0.06666666666666667, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 36.0, | |
| "mean_em": 0.1625, | |
| "mean_similarity": 0.6648560125031509, | |
| "mean_word_count_diff": 14.9, | |
| "similarity_boolean": 0.6631740967688315, | |
| "similarity_extraction": 0.5298445565005143, | |
| "similarity_qa": 0.7434355229139328, | |
| "similarity_summarization": 0.7201196981800927, | |
| "similarity_unanswerable": 0.6568777064482371, | |
| "word_count_diff_boolean": 2.2941176470588234, | |
| "word_count_diff_extraction": 5.2, | |
| "word_count_diff_qa": 5.333333333333333, | |
| "word_count_diff_summarization": 52.0, | |
| "word_count_diff_unanswerable": 3.933333333333333 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "grad_norm": 0.23128336668014526, | |
| "learning_rate": 5.84294282056834e-06, | |
| "loss": 0.0094, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_loss": 0.10348665714263916, | |
| "eval_runtime": 1.7894, | |
| "eval_samples_per_second": 44.707, | |
| "eval_steps_per_second": 5.588, | |
| "step": 9540 | |
| }, | |
| { | |
| "em_boolean": 0.47058823529411764, | |
| "em_extraction": 0.2, | |
| "em_qa": 0.0, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 37.0, | |
| "mean_em": 0.1375, | |
| "mean_similarity": 0.6319401320070028, | |
| "mean_word_count_diff": 15.0875, | |
| "similarity_boolean": 0.5821067885879208, | |
| "similarity_extraction": 0.5178047935167949, | |
| "similarity_qa": 0.769520037372907, | |
| "similarity_summarization": 0.6793310083448887, | |
| "similarity_unanswerable": 0.6081043027341366, | |
| "word_count_diff_boolean": 4.235294117647059, | |
| "word_count_diff_extraction": 4.866666666666666, | |
| "word_count_diff_qa": 5.0, | |
| "word_count_diff_summarization": 50.833333333333336, | |
| "word_count_diff_unanswerable": 4.8 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "grad_norm": 0.10078708827495575, | |
| "learning_rate": 5.087254911814361e-06, | |
| "loss": 0.0092, | |
| "step": 9805 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_loss": 0.10369674116373062, | |
| "eval_runtime": 1.7998, | |
| "eval_samples_per_second": 44.45, | |
| "eval_steps_per_second": 5.556, | |
| "step": 9805 | |
| }, | |
| { | |
| "em_boolean": 0.4117647058823529, | |
| "em_extraction": 0.13333333333333333, | |
| "em_qa": 0.0, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 38.0, | |
| "mean_em": 0.1125, | |
| "mean_similarity": 0.6402258106973022, | |
| "mean_word_count_diff": 15.3875, | |
| "similarity_boolean": 0.5555457597708001, | |
| "similarity_extraction": 0.5125755973160266, | |
| "similarity_qa": 0.729976910352707, | |
| "similarity_summarization": 0.7154302892999517, | |
| "similarity_unanswerable": 0.6838502744833629, | |
| "word_count_diff_boolean": 4.352941176470588, | |
| "word_count_diff_extraction": 4.466666666666667, | |
| "word_count_diff_qa": 6.266666666666667, | |
| "word_count_diff_summarization": 51.166666666666664, | |
| "word_count_diff_unanswerable": 5.0 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "grad_norm": 0.14647193253040314, | |
| "learning_rate": 4.373815450453467e-06, | |
| "loss": 0.0087, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_loss": 0.10511450469493866, | |
| "eval_runtime": 1.8033, | |
| "eval_samples_per_second": 44.364, | |
| "eval_steps_per_second": 5.545, | |
| "step": 10070 | |
| }, | |
| { | |
| "em_boolean": 0.29411764705882354, | |
| "em_extraction": 0.26666666666666666, | |
| "em_qa": 0.13333333333333333, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 39.0, | |
| "mean_em": 0.1375, | |
| "mean_similarity": 0.5966290417010895, | |
| "mean_word_count_diff": 15.375, | |
| "similarity_boolean": 0.48111217118361416, | |
| "similarity_extraction": 0.5464107090607285, | |
| "similarity_qa": 0.7710953007141749, | |
| "similarity_summarization": 0.6638153013255861, | |
| "similarity_unanswerable": 0.5226767236987749, | |
| "word_count_diff_boolean": 4.294117647058823, | |
| "word_count_diff_extraction": 5.133333333333334, | |
| "word_count_diff_qa": 5.466666666666667, | |
| "word_count_diff_summarization": 50.611111111111114, | |
| "word_count_diff_unanswerable": 5.8 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "grad_norm": 0.8468394875526428, | |
| "learning_rate": 3.7056651390643413e-06, | |
| "loss": 0.0088, | |
| "step": 10335 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_loss": 0.10422440618276596, | |
| "eval_runtime": 1.7878, | |
| "eval_samples_per_second": 44.747, | |
| "eval_steps_per_second": 5.593, | |
| "step": 10335 | |
| }, | |
| { | |
| "em_boolean": 0.47058823529411764, | |
| "em_extraction": 0.26666666666666666, | |
| "em_qa": 0.13333333333333333, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 40.0, | |
| "mean_em": 0.175, | |
| "mean_similarity": 0.6348526364192366, | |
| "mean_word_count_diff": 15.1875, | |
| "similarity_boolean": 0.5687163450262126, | |
| "similarity_extraction": 0.62134765163064, | |
| "similarity_qa": 0.7562251538038254, | |
| "similarity_summarization": 0.6440524767256446, | |
| "similarity_unanswerable": 0.5908997590343158, | |
| "word_count_diff_boolean": 4.529411764705882, | |
| "word_count_diff_extraction": 4.133333333333334, | |
| "word_count_diff_qa": 4.866666666666666, | |
| "word_count_diff_summarization": 50.666666666666664, | |
| "word_count_diff_unanswerable": 6.066666666666666 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 0.15559451282024384, | |
| "learning_rate": 3.08565165637071e-06, | |
| "loss": 0.0088, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_loss": 0.1041550487279892, | |
| "eval_runtime": 1.7986, | |
| "eval_samples_per_second": 44.479, | |
| "eval_steps_per_second": 5.56, | |
| "step": 10600 | |
| }, | |
| { | |
| "em_boolean": 0.4117647058823529, | |
| "em_extraction": 0.2, | |
| "em_qa": 0.0, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 41.0, | |
| "mean_em": 0.125, | |
| "mean_similarity": 0.6427899678237736, | |
| "mean_word_count_diff": 15.15, | |
| "similarity_boolean": 0.5819402319543502, | |
| "similarity_extraction": 0.5856266627709071, | |
| "similarity_qa": 0.7046878506739934, | |
| "similarity_summarization": 0.7580131689707438, | |
| "similarity_unanswerable": 0.5687505826354027, | |
| "word_count_diff_boolean": 3.0, | |
| "word_count_diff_extraction": 5.266666666666667, | |
| "word_count_diff_qa": 5.466666666666667, | |
| "word_count_diff_summarization": 51.44444444444444, | |
| "word_count_diff_unanswerable": 4.933333333333334 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "grad_norm": 0.2692953050136566, | |
| "learning_rate": 2.516417520340457e-06, | |
| "loss": 0.0085, | |
| "step": 10865 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_loss": 0.1051572784781456, | |
| "eval_runtime": 1.7897, | |
| "eval_samples_per_second": 44.701, | |
| "eval_steps_per_second": 5.588, | |
| "step": 10865 | |
| }, | |
| { | |
| "em_boolean": 0.47058823529411764, | |
| "em_extraction": 0.3333333333333333, | |
| "em_qa": 0.0, | |
| "em_summarization": 0.0, | |
| "em_unanswerable": 0.0, | |
| "epoch": 42.0, | |
| "mean_em": 0.1625, | |
| "mean_similarity": 0.6774277022806927, | |
| "mean_word_count_diff": 15.4, | |
| "similarity_boolean": 0.600595106206396, | |
| "similarity_extraction": 0.644695137689511, | |
| "similarity_qa": 0.7859726687272389, | |
| "similarity_summarization": 0.7171196627120177, | |
| "similarity_unanswerable": 0.6410618901252747, | |
| "word_count_diff_boolean": 3.8823529411764706, | |
| "word_count_diff_extraction": 4.0, | |
| "word_count_diff_qa": 6.333333333333333, | |
| "word_count_diff_summarization": 53.05555555555556, | |
| "word_count_diff_unanswerable": 3.7333333333333334 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "grad_norm": 0.028788737952709198, | |
| "learning_rate": 2.000388825686658e-06, | |
| "loss": 0.0084, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_loss": 0.10606852918863297, | |
| "eval_runtime": 1.7957, | |
| "eval_samples_per_second": 44.551, | |
| "eval_steps_per_second": 5.569, | |
| "step": 11130 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 13250, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.042787771337933e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |