teapotllm-chat / checkpoint-11130 /trainer_state.json
zakerytclarke's picture
Upload folder using huggingface_hub
4ff0462 verified
{
"best_metric": 0.06431461870670319,
"best_model_checkpoint": "./teapotllm/checkpoint-1855",
"epoch": 42.0,
"eval_steps": 500,
"global_step": 11130,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"em_boolean": 0.11764705882352941,
"em_extraction": 0.06666666666666667,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 1.0,
"mean_em": 0.0375,
"mean_similarity": 0.3622433557640761,
"mean_word_count_diff": 17.7125,
"similarity_boolean": 0.18269754047779477,
"similarity_extraction": 0.2946929598848025,
"similarity_qa": 0.43210459053516387,
"similarity_summarization": 0.6143242551220788,
"similarity_unanswerable": 0.26092069496711096,
"word_count_diff_boolean": 9.0,
"word_count_diff_extraction": 7.666666666666667,
"word_count_diff_qa": 6.933333333333334,
"word_count_diff_summarization": 53.111111111111114,
"word_count_diff_unanswerable": 5.933333333333334
},
{
"epoch": 1.0,
"grad_norm": 101.51641082763672,
"learning_rate": 1.59e-05,
"loss": 40.3479,
"step": 265
},
{
"epoch": 1.0,
"eval_loss": 19.639965057373047,
"eval_runtime": 1.7948,
"eval_samples_per_second": 44.573,
"eval_steps_per_second": 5.572,
"step": 265
},
{
"em_boolean": 0.058823529411764705,
"em_extraction": 0.0,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 2.0,
"mean_em": 0.0125,
"mean_similarity": 0.37655651543755086,
"mean_word_count_diff": 17.1625,
"similarity_boolean": 0.10583376435234267,
"similarity_extraction": 0.21972424462437629,
"similarity_qa": 0.5285325924555461,
"similarity_summarization": 0.6654400361908807,
"similarity_unanswerable": 0.34157160222530364,
"word_count_diff_boolean": 10.176470588235293,
"word_count_diff_extraction": 6.133333333333334,
"word_count_diff_qa": 5.866666666666666,
"word_count_diff_summarization": 52.166666666666664,
"word_count_diff_unanswerable": 5.4
},
{
"epoch": 2.0,
"grad_norm": 0.2927296459674835,
"learning_rate": 2.9999590191302685e-05,
"loss": 1.8688,
"step": 530
},
{
"epoch": 2.0,
"eval_loss": 0.08876434713602066,
"eval_runtime": 1.7881,
"eval_samples_per_second": 44.74,
"eval_steps_per_second": 5.593,
"step": 530
},
{
"em_boolean": 0.29411764705882354,
"em_extraction": 0.13333333333333333,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 3.0,
"mean_em": 0.1,
"mean_similarity": 0.468679612770211,
"mean_word_count_diff": 16.8875,
"similarity_boolean": 0.36136592250755606,
"similarity_extraction": 0.3513699695467949,
"similarity_qa": 0.5638724292318026,
"similarity_summarization": 0.661304693048199,
"similarity_unanswerable": 0.3812685254961252,
"word_count_diff_boolean": 5.411764705882353,
"word_count_diff_extraction": 8.066666666666666,
"word_count_diff_qa": 5.6,
"word_count_diff_summarization": 53.333333333333336,
"word_count_diff_unanswerable": 6.266666666666667
},
{
"epoch": 3.0,
"grad_norm": 0.2717432379722595,
"learning_rate": 2.9960391039419217e-05,
"loss": 0.0745,
"step": 795
},
{
"epoch": 3.0,
"eval_loss": 0.07186319679021835,
"eval_runtime": 1.7817,
"eval_samples_per_second": 44.901,
"eval_steps_per_second": 5.613,
"step": 795
},
{
"em_boolean": 0.5294117647058824,
"em_extraction": 0.0,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 4.0,
"mean_em": 0.1125,
"mean_similarity": 0.5406636619823985,
"mean_word_count_diff": 16.4,
"similarity_boolean": 0.5694808649337467,
"similarity_extraction": 0.2759806969513496,
"similarity_qa": 0.5888164043426514,
"similarity_summarization": 0.6777413653002845,
"similarity_unanswerable": 0.5600411439935367,
"word_count_diff_boolean": 3.411764705882353,
"word_count_diff_extraction": 9.133333333333333,
"word_count_diff_qa": 5.733333333333333,
"word_count_diff_summarization": 52.611111111111114,
"word_count_diff_unanswerable": 5.6
},
{
"epoch": 4.0,
"grad_norm": 0.33120226860046387,
"learning_rate": 2.985743020616636e-05,
"loss": 0.0583,
"step": 1060
},
{
"epoch": 4.0,
"eval_loss": 0.06767071783542633,
"eval_runtime": 1.8031,
"eval_samples_per_second": 44.368,
"eval_steps_per_second": 5.546,
"step": 1060
},
{
"em_boolean": 0.35294117647058826,
"em_extraction": 0.06666666666666667,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 5.0,
"mean_em": 0.1,
"mean_similarity": 0.5391882328083739,
"mean_word_count_diff": 16.5375,
"similarity_boolean": 0.4291324294446146,
"similarity_extraction": 0.3934509038925171,
"similarity_qa": 0.5968125452597935,
"similarity_summarization": 0.6669783542553583,
"similarity_unanswerable": 0.5986830140153567,
"word_count_diff_boolean": 5.9411764705882355,
"word_count_diff_extraction": 5.866666666666666,
"word_count_diff_qa": 5.4,
"word_count_diff_summarization": 53.77777777777778,
"word_count_diff_unanswerable": 5.666666666666667
},
{
"epoch": 5.0,
"grad_norm": 0.05014026165008545,
"learning_rate": 2.969114651402049e-05,
"loss": 0.0512,
"step": 1325
},
{
"epoch": 5.0,
"eval_loss": 0.06544757634401321,
"eval_runtime": 1.797,
"eval_samples_per_second": 44.518,
"eval_steps_per_second": 5.565,
"step": 1325
},
{
"em_boolean": 0.35294117647058826,
"em_extraction": 0.13333333333333333,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 6.0,
"mean_em": 0.1,
"mean_similarity": 0.5817075100494549,
"mean_word_count_diff": 16.1375,
"similarity_boolean": 0.3759182594059145,
"similarity_extraction": 0.5534958879152934,
"similarity_qa": 0.6812621881564458,
"similarity_summarization": 0.661298368126154,
"similarity_unanswerable": 0.648083241780599,
"word_count_diff_boolean": 6.294117647058823,
"word_count_diff_extraction": 4.466666666666667,
"word_count_diff_qa": 5.666666666666667,
"word_count_diff_summarization": 52.55555555555556,
"word_count_diff_unanswerable": 5.733333333333333
},
{
"epoch": 6.0,
"grad_norm": 0.37450432777404785,
"learning_rate": 2.9462248669576738e-05,
"loss": 0.0466,
"step": 1590
},
{
"epoch": 6.0,
"eval_loss": 0.0656035840511322,
"eval_runtime": 1.8115,
"eval_samples_per_second": 44.161,
"eval_steps_per_second": 5.52,
"step": 1590
},
{
"em_boolean": 0.29411764705882354,
"em_extraction": 0.06666666666666667,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 7.0,
"mean_em": 0.0875,
"mean_similarity": 0.584748298721388,
"mean_word_count_diff": 16.4875,
"similarity_boolean": 0.3921016369234113,
"similarity_extraction": 0.4258095269401868,
"similarity_qa": 0.8010195712248485,
"similarity_summarization": 0.7269510593679216,
"similarity_unanswerable": 0.5751053685943286,
"word_count_diff_boolean": 6.529411764705882,
"word_count_diff_extraction": 7.4,
"word_count_diff_qa": 4.266666666666667,
"word_count_diff_summarization": 51.5,
"word_count_diff_unanswerable": 7.066666666666666
},
{
"epoch": 7.0,
"grad_norm": 0.2656797170639038,
"learning_rate": 2.9171712243018076e-05,
"loss": 0.043,
"step": 1855
},
{
"epoch": 7.0,
"eval_loss": 0.06431461870670319,
"eval_runtime": 1.779,
"eval_samples_per_second": 44.968,
"eval_steps_per_second": 5.621,
"step": 1855
},
{
"em_boolean": 0.23529411764705882,
"em_extraction": 0.3333333333333333,
"em_qa": 0.13333333333333333,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 8.0,
"mean_em": 0.1375,
"mean_similarity": 0.5707565736840479,
"mean_word_count_diff": 15.9125,
"similarity_boolean": 0.3595361856962828,
"similarity_extraction": 0.6590617413322131,
"similarity_qa": 0.6671084851026535,
"similarity_summarization": 0.672764014866617,
"similarity_unanswerable": 0.503073671584328,
"word_count_diff_boolean": 6.705882352941177,
"word_count_diff_extraction": 4.133333333333334,
"word_count_diff_qa": 4.0,
"word_count_diff_summarization": 51.833333333333336,
"word_count_diff_unanswerable": 6.933333333333334
},
{
"epoch": 8.0,
"grad_norm": 0.1501343548297882,
"learning_rate": 2.882077551020291e-05,
"loss": 0.0393,
"step": 2120
},
{
"epoch": 8.0,
"eval_loss": 0.064610555768013,
"eval_runtime": 1.7854,
"eval_samples_per_second": 44.809,
"eval_steps_per_second": 5.601,
"step": 2120
},
{
"em_boolean": 0.29411764705882354,
"em_extraction": 0.13333333333333333,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 9.0,
"mean_em": 0.0875,
"mean_similarity": 0.5716628619702533,
"mean_word_count_diff": 16.075,
"similarity_boolean": 0.46071117371320724,
"similarity_extraction": 0.5175981452067693,
"similarity_qa": 0.6631501737982035,
"similarity_summarization": 0.6426751317663325,
"similarity_unanswerable": 0.5747707898418108,
"word_count_diff_boolean": 5.235294117647059,
"word_count_diff_extraction": 7.333333333333333,
"word_count_diff_qa": 5.6,
"word_count_diff_summarization": 52.22222222222222,
"word_count_diff_unanswerable": 4.2
},
{
"epoch": 9.0,
"grad_norm": 0.23849624395370483,
"learning_rate": 2.8410934175092426e-05,
"loss": 0.0363,
"step": 2385
},
{
"epoch": 9.0,
"eval_loss": 0.06528123468160629,
"eval_runtime": 1.7964,
"eval_samples_per_second": 44.533,
"eval_steps_per_second": 5.567,
"step": 2385
},
{
"em_boolean": 0.4117647058823529,
"em_extraction": 0.13333333333333333,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 10.0,
"mean_em": 0.125,
"mean_similarity": 0.5984957347856834,
"mean_word_count_diff": 16.2375,
"similarity_boolean": 0.4425226738128592,
"similarity_extraction": 0.5075533439715704,
"similarity_qa": 0.6813998910288016,
"similarity_summarization": 0.6938931312825944,
"similarity_unanswerable": 0.6688265626629194,
"word_count_diff_boolean": 5.352941176470588,
"word_count_diff_extraction": 6.466666666666667,
"word_count_diff_qa": 6.2,
"word_count_diff_summarization": 52.166666666666664,
"word_count_diff_unanswerable": 5.266666666666667
},
{
"epoch": 10.0,
"grad_norm": 0.2859129309654236,
"learning_rate": 2.7943934995010845e-05,
"loss": 0.0338,
"step": 2650
},
{
"epoch": 10.0,
"eval_loss": 0.06541204452514648,
"eval_runtime": 1.7939,
"eval_samples_per_second": 44.595,
"eval_steps_per_second": 5.574,
"step": 2650
},
{
"em_boolean": 0.35294117647058826,
"em_extraction": 0.2,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 11.0,
"mean_em": 0.125,
"mean_similarity": 0.584741565072909,
"mean_word_count_diff": 15.85,
"similarity_boolean": 0.41579638640670213,
"similarity_extraction": 0.575649573157231,
"similarity_qa": 0.68668532371521,
"similarity_summarization": 0.7162102411190668,
"similarity_unanswerable": 0.5255985895792643,
"word_count_diff_boolean": 5.0,
"word_count_diff_extraction": 4.333333333333333,
"word_count_diff_qa": 5.4,
"word_count_diff_summarization": 51.72222222222222,
"word_count_diff_unanswerable": 7.066666666666666
},
{
"epoch": 11.0,
"grad_norm": 0.19708961248397827,
"learning_rate": 2.7421768335907942e-05,
"loss": 0.0314,
"step": 2915
},
{
"epoch": 11.0,
"eval_loss": 0.06535650044679642,
"eval_runtime": 1.7968,
"eval_samples_per_second": 44.525,
"eval_steps_per_second": 5.566,
"step": 2915
},
{
"em_boolean": 0.4117647058823529,
"em_extraction": 0.13333333333333333,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 12.0,
"mean_em": 0.125,
"mean_similarity": 0.5985993921523913,
"mean_word_count_diff": 15.65,
"similarity_boolean": 0.49136487109696164,
"similarity_extraction": 0.4834645986557007,
"similarity_qa": 0.7439890225728353,
"similarity_summarization": 0.7199151118596395,
"similarity_unanswerable": 0.5442981487760942,
"word_count_diff_boolean": 5.294117647058823,
"word_count_diff_extraction": 6.466666666666667,
"word_count_diff_qa": 5.0,
"word_count_diff_summarization": 51.333333333333336,
"word_count_diff_unanswerable": 4.4
},
{
"epoch": 12.0,
"grad_norm": 0.32636699080467224,
"learning_rate": 2.6846659689353532e-05,
"loss": 0.0291,
"step": 3180
},
{
"epoch": 12.0,
"eval_loss": 0.06654994189739227,
"eval_runtime": 1.7794,
"eval_samples_per_second": 44.96,
"eval_steps_per_second": 5.62,
"step": 3180
},
{
"em_boolean": 0.29411764705882354,
"em_extraction": 0.2,
"em_qa": 0.13333333333333333,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 13.0,
"mean_em": 0.125,
"mean_similarity": 0.5608657638425939,
"mean_word_count_diff": 15.4875,
"similarity_boolean": 0.3726646105265793,
"similarity_extraction": 0.4880456611514091,
"similarity_qa": 0.6968116387724876,
"similarity_summarization": 0.7003769092261791,
"similarity_unanswerable": 0.5436212575683991,
"word_count_diff_boolean": 6.470588235294118,
"word_count_diff_extraction": 4.0,
"word_count_diff_qa": 4.266666666666667,
"word_count_diff_summarization": 50.22222222222222,
"word_count_diff_unanswerable": 6.733333333333333
},
{
"epoch": 13.0,
"grad_norm": 0.09956305474042892,
"learning_rate": 2.622106018741882e-05,
"loss": 0.0268,
"step": 3445
},
{
"epoch": 13.0,
"eval_loss": 0.06867102533578873,
"eval_runtime": 1.7923,
"eval_samples_per_second": 44.635,
"eval_steps_per_second": 5.579,
"step": 3445
},
{
"em_boolean": 0.4117647058823529,
"em_extraction": 0.13333333333333333,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 14.0,
"mean_em": 0.125,
"mean_similarity": 0.6118219185853377,
"mean_word_count_diff": 15.1,
"similarity_boolean": 0.5559550164376988,
"similarity_extraction": 0.4480490814894438,
"similarity_qa": 0.8226341267426809,
"similarity_summarization": 0.7094271017445458,
"similarity_unanswerable": 0.5109721501668294,
"word_count_diff_boolean": 3.235294117647059,
"word_count_diff_extraction": 5.333333333333333,
"word_count_diff_qa": 4.066666666666666,
"word_count_diff_summarization": 51.333333333333336,
"word_count_diff_unanswerable": 5.866666666666666
},
{
"epoch": 14.0,
"grad_norm": 0.2182687222957611,
"learning_rate": 2.554763615587042e-05,
"loss": 0.0254,
"step": 3710
},
{
"epoch": 14.0,
"eval_loss": 0.06904994696378708,
"eval_runtime": 1.7971,
"eval_samples_per_second": 44.517,
"eval_steps_per_second": 5.565,
"step": 3710
},
{
"em_boolean": 0.17647058823529413,
"em_extraction": 0.13333333333333333,
"em_qa": 0.13333333333333333,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 15.0,
"mean_em": 0.0875,
"mean_similarity": 0.5906956384656951,
"mean_word_count_diff": 15.8,
"similarity_boolean": 0.30824649366824064,
"similarity_extraction": 0.5498731901248296,
"similarity_qa": 0.7911941662430764,
"similarity_summarization": 0.7216936614778307,
"similarity_unanswerable": 0.5939309621850649,
"word_count_diff_boolean": 5.294117647058823,
"word_count_diff_extraction": 5.533333333333333,
"word_count_diff_qa": 4.933333333333334,
"word_count_diff_summarization": 50.833333333333336,
"word_count_diff_unanswerable": 6.8
},
{
"epoch": 15.0,
"grad_norm": 0.278495728969574,
"learning_rate": 2.4829257750201806e-05,
"loss": 0.0235,
"step": 3975
},
{
"epoch": 15.0,
"eval_loss": 0.07152120769023895,
"eval_runtime": 1.8045,
"eval_samples_per_second": 44.334,
"eval_steps_per_second": 5.542,
"step": 3975
},
{
"em_boolean": 0.35294117647058826,
"em_extraction": 0.13333333333333333,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 16.0,
"mean_em": 0.1125,
"mean_similarity": 0.6044320391491056,
"mean_word_count_diff": 15.3625,
"similarity_boolean": 0.4709787254824358,
"similarity_extraction": 0.48890374253193536,
"similarity_qa": 0.7086231042941411,
"similarity_summarization": 0.6860101450648572,
"similarity_unanswerable": 0.6691226323445638,
"word_count_diff_boolean": 5.176470588235294,
"word_count_diff_extraction": 4.133333333333334,
"word_count_diff_qa": 5.8,
"word_count_diff_summarization": 51.388888888888886,
"word_count_diff_unanswerable": 4.466666666666667
},
{
"epoch": 16.0,
"grad_norm": 0.12528866529464722,
"learning_rate": 2.4068986722935625e-05,
"loss": 0.0219,
"step": 4240
},
{
"epoch": 16.0,
"eval_loss": 0.07122211903333664,
"eval_runtime": 1.7912,
"eval_samples_per_second": 44.663,
"eval_steps_per_second": 5.583,
"step": 4240
},
{
"em_boolean": 0.23529411764705882,
"em_extraction": 0.2,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 17.0,
"mean_em": 0.1,
"mean_similarity": 0.5854583959793672,
"mean_word_count_diff": 16.1125,
"similarity_boolean": 0.3930098557954325,
"similarity_extraction": 0.5295473781724771,
"similarity_qa": 0.7186618636051814,
"similarity_summarization": 0.6619544287936555,
"similarity_unanswerable": 0.6344790523250897,
"word_count_diff_boolean": 5.764705882352941,
"word_count_diff_extraction": 5.066666666666666,
"word_count_diff_qa": 6.066666666666666,
"word_count_diff_summarization": 52.22222222222222,
"word_count_diff_unanswerable": 5.6
},
{
"epoch": 17.0,
"grad_norm": 0.18347615003585815,
"learning_rate": 2.327006337433323e-05,
"loss": 0.0208,
"step": 4505
},
{
"epoch": 17.0,
"eval_loss": 0.07514352351427078,
"eval_runtime": 1.7952,
"eval_samples_per_second": 44.563,
"eval_steps_per_second": 5.57,
"step": 4505
},
{
"em_boolean": 0.35294117647058826,
"em_extraction": 0.13333333333333333,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 18.0,
"mean_em": 0.1125,
"mean_similarity": 0.6148898253683001,
"mean_word_count_diff": 15.95,
"similarity_boolean": 0.45182113108389516,
"similarity_extraction": 0.5246008743842443,
"similarity_qa": 0.7161807785431544,
"similarity_summarization": 0.695978145632479,
"similarity_unanswerable": 0.6913930257161458,
"word_count_diff_boolean": 6.705882352941177,
"word_count_diff_extraction": 5.333333333333333,
"word_count_diff_qa": 5.133333333333334,
"word_count_diff_summarization": 51.72222222222222,
"word_count_diff_unanswerable": 4.933333333333334
},
{
"epoch": 18.0,
"grad_norm": 0.39281344413757324,
"learning_rate": 2.2435892742127786e-05,
"loss": 0.0197,
"step": 4770
},
{
"epoch": 18.0,
"eval_loss": 0.07489313185214996,
"eval_runtime": 1.7966,
"eval_samples_per_second": 44.528,
"eval_steps_per_second": 5.566,
"step": 4770
},
{
"em_boolean": 0.35294117647058826,
"em_extraction": 0.13333333333333333,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 19.0,
"mean_em": 0.1125,
"mean_similarity": 0.6110048872418702,
"mean_word_count_diff": 15.175,
"similarity_boolean": 0.4688412766246235,
"similarity_extraction": 0.5354909280935923,
"similarity_qa": 0.7334268649419149,
"similarity_summarization": 0.6523812972009182,
"similarity_unanswerable": 0.6755639354387919,
"word_count_diff_boolean": 5.411764705882353,
"word_count_diff_extraction": 3.6666666666666665,
"word_count_diff_qa": 5.333333333333333,
"word_count_diff_summarization": 51.27777777777778,
"word_count_diff_unanswerable": 4.266666666666667
},
{
"epoch": 19.0,
"grad_norm": 0.26503777503967285,
"learning_rate": 2.1570030089140857e-05,
"loss": 0.0184,
"step": 5035
},
{
"epoch": 19.0,
"eval_loss": 0.07855142652988434,
"eval_runtime": 1.7852,
"eval_samples_per_second": 44.813,
"eval_steps_per_second": 5.602,
"step": 5035
},
{
"em_boolean": 0.058823529411764705,
"em_extraction": 0.26666666666666666,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 20.0,
"mean_em": 0.0625,
"mean_similarity": 0.5894458804745227,
"mean_word_count_diff": 15.7625,
"similarity_boolean": 0.3066763941417722,
"similarity_extraction": 0.567964817583561,
"similarity_qa": 0.7356137126684189,
"similarity_summarization": 0.7311095827155643,
"similarity_unanswerable": 0.615234752992789,
"word_count_diff_boolean": 7.0588235294117645,
"word_count_diff_extraction": 4.133333333333334,
"word_count_diff_qa": 6.4,
"word_count_diff_summarization": 50.833333333333336,
"word_count_diff_unanswerable": 4.533333333333333
},
{
"epoch": 20.0,
"grad_norm": 0.325967013835907,
"learning_rate": 2.0676165750634656e-05,
"loss": 0.0173,
"step": 5300
},
{
"epoch": 20.0,
"eval_loss": 0.07865440845489502,
"eval_runtime": 1.7895,
"eval_samples_per_second": 44.705,
"eval_steps_per_second": 5.588,
"step": 5300
},
{
"em_boolean": 0.5294117647058824,
"em_extraction": 0.26666666666666666,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 21.0,
"mean_em": 0.175,
"mean_similarity": 0.6464177187765017,
"mean_word_count_diff": 14.8375,
"similarity_boolean": 0.6461278642801678,
"similarity_extraction": 0.6178931772708893,
"similarity_qa": 0.7566967884699504,
"similarity_summarization": 0.6613570315142473,
"similarity_unanswerable": 0.5470645170658827,
"word_count_diff_boolean": 3.176470588235294,
"word_count_diff_extraction": 5.4,
"word_count_diff_qa": 4.6,
"word_count_diff_summarization": 50.5,
"word_count_diff_unanswerable": 4.933333333333334
},
{
"epoch": 21.0,
"grad_norm": 0.18580074608325958,
"learning_rate": 1.975810940598114e-05,
"loss": 0.0162,
"step": 5565
},
{
"epoch": 21.0,
"eval_loss": 0.08283738046884537,
"eval_runtime": 1.8062,
"eval_samples_per_second": 44.292,
"eval_steps_per_second": 5.537,
"step": 5565
},
{
"em_boolean": 0.47058823529411764,
"em_extraction": 0.26666666666666666,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 22.0,
"mean_em": 0.1625,
"mean_similarity": 0.6508607531315647,
"mean_word_count_diff": 15.1875,
"similarity_boolean": 0.5319088667302447,
"similarity_extraction": 0.6322630804032088,
"similarity_qa": 0.7358093798160553,
"similarity_summarization": 0.6958086478213469,
"similarity_unanswerable": 0.6653844634691874,
"word_count_diff_boolean": 5.352941176470588,
"word_count_diff_extraction": 3.933333333333333,
"word_count_diff_qa": 5.266666666666667,
"word_count_diff_summarization": 51.166666666666664,
"word_count_diff_unanswerable": 4.333333333333333
},
{
"epoch": 22.0,
"grad_norm": 0.7463592886924744,
"learning_rate": 1.881977384168254e-05,
"loss": 0.0158,
"step": 5830
},
{
"epoch": 22.0,
"eval_loss": 0.08268458396196365,
"eval_runtime": 1.7886,
"eval_samples_per_second": 44.728,
"eval_steps_per_second": 5.591,
"step": 5830
},
{
"em_boolean": 0.4117647058823529,
"em_extraction": 0.2,
"em_qa": 0.13333333333333333,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 23.0,
"mean_em": 0.15,
"mean_similarity": 0.6376353252329864,
"mean_word_count_diff": 15.5625,
"similarity_boolean": 0.5682552300722283,
"similarity_extraction": 0.5148679026712973,
"similarity_qa": 0.7483514944712321,
"similarity_summarization": 0.7200961951166391,
"similarity_unanswerable": 0.629364309211572,
"word_count_diff_boolean": 3.823529411764706,
"word_count_diff_extraction": 5.266666666666667,
"word_count_diff_qa": 4.933333333333334,
"word_count_diff_summarization": 51.388888888888886,
"word_count_diff_unanswerable": 6.8
},
{
"epoch": 23.0,
"grad_norm": 0.30431216955184937,
"learning_rate": 1.7865158274945846e-05,
"loss": 0.0149,
"step": 6095
},
{
"epoch": 23.0,
"eval_loss": 0.08500902354717255,
"eval_runtime": 1.8006,
"eval_samples_per_second": 44.43,
"eval_steps_per_second": 5.554,
"step": 6095
},
{
"em_boolean": 0.35294117647058826,
"em_extraction": 0.2,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 24.0,
"mean_em": 0.1125,
"mean_similarity": 0.6209718830883503,
"mean_word_count_diff": 15.625,
"similarity_boolean": 0.4452236132586704,
"similarity_extraction": 0.5458283141255379,
"similarity_qa": 0.7422831366459529,
"similarity_summarization": 0.7023132857349184,
"similarity_unanswerable": 0.6763758877913157,
"word_count_diff_boolean": 5.470588235294118,
"word_count_diff_extraction": 5.6,
"word_count_diff_qa": 5.533333333333333,
"word_count_diff_summarization": 51.611111111111114,
"word_count_diff_unanswerable": 4.066666666666666
},
{
"epoch": 24.0,
"grad_norm": 0.1550023853778839,
"learning_rate": 1.6898331308886863e-05,
"loss": 0.0138,
"step": 6360
},
{
"epoch": 24.0,
"eval_loss": 0.08557449281215668,
"eval_runtime": 1.7865,
"eval_samples_per_second": 44.78,
"eval_steps_per_second": 5.598,
"step": 6360
},
{
"em_boolean": 0.4117647058823529,
"em_extraction": 0.26666666666666666,
"em_qa": 0.13333333333333333,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 25.0,
"mean_em": 0.1625,
"mean_similarity": 0.6411203743191436,
"mean_word_count_diff": 15.1875,
"similarity_boolean": 0.5644924241830321,
"similarity_extraction": 0.5594843897968531,
"similarity_qa": 0.7792828718821208,
"similarity_summarization": 0.6979754573355118,
"similarity_unanswerable": 0.6032127718130748,
"word_count_diff_boolean": 3.7058823529411766,
"word_count_diff_extraction": 4.533333333333333,
"word_count_diff_qa": 4.933333333333334,
"word_count_diff_summarization": 51.833333333333336,
"word_count_diff_unanswerable": 5.133333333333334
},
{
"epoch": 25.0,
"grad_norm": 0.23536083102226257,
"learning_rate": 1.5923413592009145e-05,
"loss": 0.0132,
"step": 6625
},
{
"epoch": 25.0,
"eval_loss": 0.0886365994811058,
"eval_runtime": 1.7918,
"eval_samples_per_second": 44.648,
"eval_steps_per_second": 5.581,
"step": 6625
},
{
"em_boolean": 0.29411764705882354,
"em_extraction": 0.13333333333333333,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 26.0,
"mean_em": 0.0875,
"mean_similarity": 0.5999299102462828,
"mean_word_count_diff": 15.6625,
"similarity_boolean": 0.44055906716076765,
"similarity_extraction": 0.4907683741301298,
"similarity_qa": 0.64702517837286,
"similarity_summarization": 0.7336946874856949,
"similarity_unanswerable": 0.6820987343788147,
"word_count_diff_boolean": 5.9411764705882355,
"word_count_diff_extraction": 5.466666666666667,
"word_count_diff_qa": 7.066666666666666,
"word_count_diff_summarization": 50.27777777777778,
"word_count_diff_unanswerable": 3.933333333333333
},
{
"epoch": 26.0,
"grad_norm": 0.0521920807659626,
"learning_rate": 1.4944560255863805e-05,
"loss": 0.0127,
"step": 6890
},
{
"epoch": 26.0,
"eval_loss": 0.09115185588598251,
"eval_runtime": 1.7871,
"eval_samples_per_second": 44.764,
"eval_steps_per_second": 5.596,
"step": 6890
},
{
"em_boolean": 0.29411764705882354,
"em_extraction": 0.2,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 27.0,
"mean_em": 0.1125,
"mean_similarity": 0.5961971989367157,
"mean_word_count_diff": 15.9,
"similarity_boolean": 0.47167208264855776,
"similarity_extraction": 0.563706802825133,
"similarity_qa": 0.6419207287331422,
"similarity_summarization": 0.6927380098236932,
"similarity_unanswerable": 0.608243557314078,
"word_count_diff_boolean": 5.0,
"word_count_diff_extraction": 4.933333333333334,
"word_count_diff_qa": 6.0,
"word_count_diff_summarization": 52.0,
"word_count_diff_unanswerable": 5.8
},
{
"epoch": 27.0,
"grad_norm": 0.23173846304416656,
"learning_rate": 1.396594320574144e-05,
"loss": 0.0121,
"step": 7155
},
{
"epoch": 27.0,
"eval_loss": 0.09045173972845078,
"eval_runtime": 1.7961,
"eval_samples_per_second": 44.541,
"eval_steps_per_second": 5.568,
"step": 7155
},
{
"em_boolean": 0.5294117647058824,
"em_extraction": 0.13333333333333333,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 28.0,
"mean_em": 0.15,
"mean_similarity": 0.6426682359306142,
"mean_word_count_diff": 15.4625,
"similarity_boolean": 0.6154902246506775,
"similarity_extraction": 0.559150509784619,
"similarity_qa": 0.7585609555244446,
"similarity_summarization": 0.7132770286666023,
"similarity_unanswerable": 0.5563644373168548,
"word_count_diff_boolean": 3.9411764705882355,
"word_count_diff_extraction": 5.933333333333334,
"word_count_diff_qa": 5.4,
"word_count_diff_summarization": 51.94444444444444,
"word_count_diff_unanswerable": 4.333333333333333
},
{
"epoch": 28.0,
"grad_norm": 0.15653730928897858,
"learning_rate": 1.2991733339873795e-05,
"loss": 0.0114,
"step": 7420
},
{
"epoch": 28.0,
"eval_loss": 0.09521345049142838,
"eval_runtime": 1.7969,
"eval_samples_per_second": 44.521,
"eval_steps_per_second": 5.565,
"step": 7420
},
{
"em_boolean": 0.5882352941176471,
"em_extraction": 0.2,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 29.0,
"mean_em": 0.1625,
"mean_similarity": 0.670323112141341,
"mean_word_count_diff": 14.8125,
"similarity_boolean": 0.6801450522068668,
"similarity_extraction": 0.5754425642391046,
"similarity_qa": 0.7391265859206517,
"similarity_summarization": 0.7395255797439151,
"similarity_unanswerable": 0.6022256930669149,
"word_count_diff_boolean": 2.588235294117647,
"word_count_diff_extraction": 4.0,
"word_count_diff_qa": 5.666666666666667,
"word_count_diff_summarization": 50.333333333333336,
"word_count_diff_unanswerable": 6.0
},
{
"epoch": 29.0,
"grad_norm": 0.13572590053081512,
"learning_rate": 1.2026082772927574e-05,
"loss": 0.0112,
"step": 7685
},
{
"epoch": 29.0,
"eval_loss": 0.09561847150325775,
"eval_runtime": 1.7863,
"eval_samples_per_second": 44.786,
"eval_steps_per_second": 5.598,
"step": 7685
},
{
"em_boolean": 0.47058823529411764,
"em_extraction": 0.26666666666666666,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 30.0,
"mean_em": 0.1625,
"mean_similarity": 0.6361284551327117,
"mean_word_count_diff": 15.1625,
"similarity_boolean": 0.5678887126748177,
"similarity_extraction": 0.528444591164589,
"similarity_qa": 0.7746080329020818,
"similarity_summarization": 0.7153815428415934,
"similarity_unanswerable": 0.5875674108664195,
"word_count_diff_boolean": 3.6470588235294117,
"word_count_diff_extraction": 4.2,
"word_count_diff_qa": 5.666666666666667,
"word_count_diff_summarization": 51.05555555555556,
"word_count_diff_unanswerable": 5.6
},
{
"epoch": 30.0,
"grad_norm": 0.11106608062982559,
"learning_rate": 1.1073107139554395e-05,
"loss": 0.0111,
"step": 7950
},
{
"epoch": 30.0,
"eval_loss": 0.09475980699062347,
"eval_runtime": 1.8027,
"eval_samples_per_second": 44.379,
"eval_steps_per_second": 5.547,
"step": 7950
},
{
"em_boolean": 0.47058823529411764,
"em_extraction": 0.0,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 31.0,
"mean_em": 0.1125,
"mean_similarity": 0.5996635487768799,
"mean_word_count_diff": 15.9625,
"similarity_boolean": 0.5703230380792829,
"similarity_extraction": 0.43614052472015225,
"similarity_qa": 0.6746748656034469,
"similarity_summarization": 0.6909497512711419,
"similarity_unanswerable": 0.6118843918045361,
"word_count_diff_boolean": 4.176470588235294,
"word_count_diff_extraction": 6.933333333333334,
"word_count_diff_qa": 6.466666666666667,
"word_count_diff_summarization": 51.888888888888886,
"word_count_diff_unanswerable": 4.733333333333333
},
{
"epoch": 31.0,
"grad_norm": 0.15586383640766144,
"learning_rate": 1.0136868053419842e-05,
"loss": 0.0105,
"step": 8215
},
{
"epoch": 31.0,
"eval_loss": 0.09651915729045868,
"eval_runtime": 1.7878,
"eval_samples_per_second": 44.749,
"eval_steps_per_second": 5.594,
"step": 8215
},
{
"em_boolean": 0.35294117647058826,
"em_extraction": 0.26666666666666666,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 32.0,
"mean_em": 0.125,
"mean_similarity": 0.6647563714534044,
"mean_word_count_diff": 15.3125,
"similarity_boolean": 0.4992627663647427,
"similarity_extraction": 0.6371853078405062,
"similarity_qa": 0.7779242674509684,
"similarity_summarization": 0.7545208003785875,
"similarity_unanswerable": 0.6590016434590021,
"word_count_diff_boolean": 4.352941176470588,
"word_count_diff_extraction": 3.6666666666666665,
"word_count_diff_qa": 7.133333333333334,
"word_count_diff_summarization": 52.22222222222222,
"word_count_diff_unanswerable": 3.2666666666666666
},
{
"epoch": 32.0,
"grad_norm": 0.15895779430866241,
"learning_rate": 9.22135579647165e-06,
"loss": 0.0103,
"step": 8480
},
{
"epoch": 32.0,
"eval_loss": 0.09846033155918121,
"eval_runtime": 1.7981,
"eval_samples_per_second": 44.492,
"eval_steps_per_second": 5.562,
"step": 8480
},
{
"em_boolean": 0.35294117647058826,
"em_extraction": 0.13333333333333333,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 33.0,
"mean_em": 0.1,
"mean_similarity": 0.6497376295737922,
"mean_word_count_diff": 15.2,
"similarity_boolean": 0.4850216718281017,
"similarity_extraction": 0.6169686233003934,
"similarity_qa": 0.7874227881431579,
"similarity_summarization": 0.720199970735444,
"similarity_unanswerable": 0.6469447533289592,
"word_count_diff_boolean": 5.529411764705882,
"word_count_diff_extraction": 4.0,
"word_count_diff_qa": 5.466666666666667,
"word_count_diff_summarization": 50.72222222222222,
"word_count_diff_unanswerable": 4.466666666666667
},
{
"epoch": 33.0,
"grad_norm": 0.16729232668876648,
"learning_rate": 8.330472312226091e-06,
"loss": 0.0099,
"step": 8745
},
{
"epoch": 33.0,
"eval_loss": 0.09990086406469345,
"eval_runtime": 1.7757,
"eval_samples_per_second": 45.052,
"eval_steps_per_second": 5.631,
"step": 8745
},
{
"em_boolean": 0.4117647058823529,
"em_extraction": 0.2,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 34.0,
"mean_em": 0.125,
"mean_similarity": 0.6532467395067215,
"mean_word_count_diff": 15.3375,
"similarity_boolean": 0.5624497304944431,
"similarity_extraction": 0.6123799696564675,
"similarity_qa": 0.7896470129489899,
"similarity_summarization": 0.6943622730258439,
"similarity_unanswerable": 0.6112778725723426,
"word_count_diff_boolean": 4.588235294117647,
"word_count_diff_extraction": 5.266666666666667,
"word_count_diff_qa": 5.933333333333334,
"word_count_diff_summarization": 50.666666666666664,
"word_count_diff_unanswerable": 4.6
},
{
"epoch": 34.0,
"grad_norm": 0.3746591806411743,
"learning_rate": 7.468014575555688e-06,
"loss": 0.0096,
"step": 9010
},
{
"epoch": 34.0,
"eval_loss": 0.10108506679534912,
"eval_runtime": 1.7759,
"eval_samples_per_second": 45.048,
"eval_steps_per_second": 5.631,
"step": 9010
},
{
"em_boolean": 0.35294117647058826,
"em_extraction": 0.13333333333333333,
"em_qa": 0.13333333333333333,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 35.0,
"mean_em": 0.125,
"mean_similarity": 0.6299693588167429,
"mean_word_count_diff": 15.35,
"similarity_boolean": 0.5166072974748471,
"similarity_extraction": 0.55517836039265,
"similarity_qa": 0.7496427396933237,
"similarity_summarization": 0.7208915799856186,
"similarity_unanswerable": 0.6044573138157526,
"word_count_diff_boolean": 4.352941176470588,
"word_count_diff_extraction": 5.266666666666667,
"word_count_diff_qa": 4.8,
"word_count_diff_summarization": 51.388888888888886,
"word_count_diff_unanswerable": 5.2
},
{
"epoch": 35.0,
"grad_norm": 0.310463547706604,
"learning_rate": 6.637658409856717e-06,
"loss": 0.0096,
"step": 9275
},
{
"epoch": 35.0,
"eval_loss": 0.10128389298915863,
"eval_runtime": 1.7949,
"eval_samples_per_second": 44.571,
"eval_steps_per_second": 5.571,
"step": 9275
},
{
"em_boolean": 0.5294117647058824,
"em_extraction": 0.2,
"em_qa": 0.06666666666666667,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 36.0,
"mean_em": 0.1625,
"mean_similarity": 0.6648560125031509,
"mean_word_count_diff": 14.9,
"similarity_boolean": 0.6631740967688315,
"similarity_extraction": 0.5298445565005143,
"similarity_qa": 0.7434355229139328,
"similarity_summarization": 0.7201196981800927,
"similarity_unanswerable": 0.6568777064482371,
"word_count_diff_boolean": 2.2941176470588234,
"word_count_diff_extraction": 5.2,
"word_count_diff_qa": 5.333333333333333,
"word_count_diff_summarization": 52.0,
"word_count_diff_unanswerable": 3.933333333333333
},
{
"epoch": 36.0,
"grad_norm": 0.23128336668014526,
"learning_rate": 5.84294282056834e-06,
"loss": 0.0094,
"step": 9540
},
{
"epoch": 36.0,
"eval_loss": 0.10348665714263916,
"eval_runtime": 1.7894,
"eval_samples_per_second": 44.707,
"eval_steps_per_second": 5.588,
"step": 9540
},
{
"em_boolean": 0.47058823529411764,
"em_extraction": 0.2,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 37.0,
"mean_em": 0.1375,
"mean_similarity": 0.6319401320070028,
"mean_word_count_diff": 15.0875,
"similarity_boolean": 0.5821067885879208,
"similarity_extraction": 0.5178047935167949,
"similarity_qa": 0.769520037372907,
"similarity_summarization": 0.6793310083448887,
"similarity_unanswerable": 0.6081043027341366,
"word_count_diff_boolean": 4.235294117647059,
"word_count_diff_extraction": 4.866666666666666,
"word_count_diff_qa": 5.0,
"word_count_diff_summarization": 50.833333333333336,
"word_count_diff_unanswerable": 4.8
},
{
"epoch": 37.0,
"grad_norm": 0.10078708827495575,
"learning_rate": 5.087254911814361e-06,
"loss": 0.0092,
"step": 9805
},
{
"epoch": 37.0,
"eval_loss": 0.10369674116373062,
"eval_runtime": 1.7998,
"eval_samples_per_second": 44.45,
"eval_steps_per_second": 5.556,
"step": 9805
},
{
"em_boolean": 0.4117647058823529,
"em_extraction": 0.13333333333333333,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 38.0,
"mean_em": 0.1125,
"mean_similarity": 0.6402258106973022,
"mean_word_count_diff": 15.3875,
"similarity_boolean": 0.5555457597708001,
"similarity_extraction": 0.5125755973160266,
"similarity_qa": 0.729976910352707,
"similarity_summarization": 0.7154302892999517,
"similarity_unanswerable": 0.6838502744833629,
"word_count_diff_boolean": 4.352941176470588,
"word_count_diff_extraction": 4.466666666666667,
"word_count_diff_qa": 6.266666666666667,
"word_count_diff_summarization": 51.166666666666664,
"word_count_diff_unanswerable": 5.0
},
{
"epoch": 38.0,
"grad_norm": 0.14647193253040314,
"learning_rate": 4.373815450453467e-06,
"loss": 0.0087,
"step": 10070
},
{
"epoch": 38.0,
"eval_loss": 0.10511450469493866,
"eval_runtime": 1.8033,
"eval_samples_per_second": 44.364,
"eval_steps_per_second": 5.545,
"step": 10070
},
{
"em_boolean": 0.29411764705882354,
"em_extraction": 0.26666666666666666,
"em_qa": 0.13333333333333333,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 39.0,
"mean_em": 0.1375,
"mean_similarity": 0.5966290417010895,
"mean_word_count_diff": 15.375,
"similarity_boolean": 0.48111217118361416,
"similarity_extraction": 0.5464107090607285,
"similarity_qa": 0.7710953007141749,
"similarity_summarization": 0.6638153013255861,
"similarity_unanswerable": 0.5226767236987749,
"word_count_diff_boolean": 4.294117647058823,
"word_count_diff_extraction": 5.133333333333334,
"word_count_diff_qa": 5.466666666666667,
"word_count_diff_summarization": 50.611111111111114,
"word_count_diff_unanswerable": 5.8
},
{
"epoch": 39.0,
"grad_norm": 0.8468394875526428,
"learning_rate": 3.7056651390643413e-06,
"loss": 0.0088,
"step": 10335
},
{
"epoch": 39.0,
"eval_loss": 0.10422440618276596,
"eval_runtime": 1.7878,
"eval_samples_per_second": 44.747,
"eval_steps_per_second": 5.593,
"step": 10335
},
{
"em_boolean": 0.47058823529411764,
"em_extraction": 0.26666666666666666,
"em_qa": 0.13333333333333333,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 40.0,
"mean_em": 0.175,
"mean_similarity": 0.6348526364192366,
"mean_word_count_diff": 15.1875,
"similarity_boolean": 0.5687163450262126,
"similarity_extraction": 0.62134765163064,
"similarity_qa": 0.7562251538038254,
"similarity_summarization": 0.6440524767256446,
"similarity_unanswerable": 0.5908997590343158,
"word_count_diff_boolean": 4.529411764705882,
"word_count_diff_extraction": 4.133333333333334,
"word_count_diff_qa": 4.866666666666666,
"word_count_diff_summarization": 50.666666666666664,
"word_count_diff_unanswerable": 6.066666666666666
},
{
"epoch": 40.0,
"grad_norm": 0.15559451282024384,
"learning_rate": 3.08565165637071e-06,
"loss": 0.0088,
"step": 10600
},
{
"epoch": 40.0,
"eval_loss": 0.1041550487279892,
"eval_runtime": 1.7986,
"eval_samples_per_second": 44.479,
"eval_steps_per_second": 5.56,
"step": 10600
},
{
"em_boolean": 0.4117647058823529,
"em_extraction": 0.2,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 41.0,
"mean_em": 0.125,
"mean_similarity": 0.6427899678237736,
"mean_word_count_diff": 15.15,
"similarity_boolean": 0.5819402319543502,
"similarity_extraction": 0.5856266627709071,
"similarity_qa": 0.7046878506739934,
"similarity_summarization": 0.7580131689707438,
"similarity_unanswerable": 0.5687505826354027,
"word_count_diff_boolean": 3.0,
"word_count_diff_extraction": 5.266666666666667,
"word_count_diff_qa": 5.466666666666667,
"word_count_diff_summarization": 51.44444444444444,
"word_count_diff_unanswerable": 4.933333333333334
},
{
"epoch": 41.0,
"grad_norm": 0.2692953050136566,
"learning_rate": 2.516417520340457e-06,
"loss": 0.0085,
"step": 10865
},
{
"epoch": 41.0,
"eval_loss": 0.1051572784781456,
"eval_runtime": 1.7897,
"eval_samples_per_second": 44.701,
"eval_steps_per_second": 5.588,
"step": 10865
},
{
"em_boolean": 0.47058823529411764,
"em_extraction": 0.3333333333333333,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 42.0,
"mean_em": 0.1625,
"mean_similarity": 0.6774277022806927,
"mean_word_count_diff": 15.4,
"similarity_boolean": 0.600595106206396,
"similarity_extraction": 0.644695137689511,
"similarity_qa": 0.7859726687272389,
"similarity_summarization": 0.7171196627120177,
"similarity_unanswerable": 0.6410618901252747,
"word_count_diff_boolean": 3.8823529411764706,
"word_count_diff_extraction": 4.0,
"word_count_diff_qa": 6.333333333333333,
"word_count_diff_summarization": 53.05555555555556,
"word_count_diff_unanswerable": 3.7333333333333334
},
{
"epoch": 42.0,
"grad_norm": 0.028788737952709198,
"learning_rate": 2.000388825686658e-06,
"loss": 0.0084,
"step": 11130
},
{
"epoch": 42.0,
"eval_loss": 0.10606852918863297,
"eval_runtime": 1.7957,
"eval_samples_per_second": 44.551,
"eval_steps_per_second": 5.569,
"step": 11130
}
],
"logging_steps": 500,
"max_steps": 13250,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.042787771337933e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}