sumcse-sts-roberta-large / trainer_state.json
bcai001's picture
Update trainer_state.json
8eb4b07 verified
{
"best_metric": 0.8770001309177381,
"best_model_checkpoint": "sumcse/5401.reimple-robert-large",
"epoch": 3.0,
"global_step": 6462,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"eval_avg_sts": 0.8284010538709128,
"eval_sickr_spearman": 0.8087322680313793,
"eval_stsb_spearman": 0.8480698397104464,
"step": 125
},
{
"epoch": 0.12,
"eval_avg_sts": 0.8484297373111971,
"eval_sickr_spearman": 0.8391122278775942,
"eval_stsb_spearman": 0.8577472467448001,
"step": 250
},
{
"epoch": 0.17,
"eval_avg_sts": 0.8522751763001004,
"eval_sickr_spearman": 0.8446321541792678,
"eval_stsb_spearman": 0.859918198420933,
"step": 375
},
{
"epoch": 0.23,
"learning_rate": 9.226245744351595e-06,
"loss": 0.9779,
"step": 500
},
{
"epoch": 0.23,
"eval_avg_sts": 0.8529384795629767,
"eval_sickr_spearman": 0.8425804096191359,
"eval_stsb_spearman": 0.8632965495068176,
"step": 500
},
{
"epoch": 0.29,
"eval_avg_sts": 0.854393833444526,
"eval_sickr_spearman": 0.8428722668204894,
"eval_stsb_spearman": 0.8659154000685627,
"step": 625
},
{
"epoch": 0.35,
"eval_avg_sts": 0.8530765893420603,
"eval_sickr_spearman": 0.8375766255335162,
"eval_stsb_spearman": 0.8685765531506044,
"step": 750
},
{
"epoch": 0.41,
"eval_avg_sts": 0.8598428877676623,
"eval_sickr_spearman": 0.8496016440584608,
"eval_stsb_spearman": 0.8700841314768637,
"step": 875
},
{
"epoch": 0.46,
"learning_rate": 8.452491488703189e-06,
"loss": 0.5021,
"step": 1000
},
{
"epoch": 0.46,
"eval_avg_sts": 0.8553352649068404,
"eval_sickr_spearman": 0.8414943783845188,
"eval_stsb_spearman": 0.8691761514291622,
"step": 1000
},
{
"epoch": 0.52,
"eval_avg_sts": 0.8611558909552111,
"eval_sickr_spearman": 0.8500423294141554,
"eval_stsb_spearman": 0.8722694524962668,
"step": 1125
},
{
"epoch": 0.58,
"eval_avg_sts": 0.8470077761608523,
"eval_sickr_spearman": 0.824938778185323,
"eval_stsb_spearman": 0.8690767741363816,
"step": 1250
},
{
"epoch": 0.64,
"eval_avg_sts": 0.8523760853013311,
"eval_sickr_spearman": 0.8380687041677498,
"eval_stsb_spearman": 0.8666834664349123,
"step": 1375
},
{
"epoch": 0.7,
"learning_rate": 7.678737233054782e-06,
"loss": 0.4518,
"step": 1500
},
{
"epoch": 0.7,
"eval_avg_sts": 0.8575383621060841,
"eval_sickr_spearman": 0.8451627057257532,
"eval_stsb_spearman": 0.869914018486415,
"step": 1500
},
{
"epoch": 0.75,
"eval_avg_sts": 0.8567243246034306,
"eval_sickr_spearman": 0.8404439381960673,
"eval_stsb_spearman": 0.8730047110107938,
"step": 1625
},
{
"epoch": 0.81,
"eval_avg_sts": 0.855931320312622,
"eval_sickr_spearman": 0.8433319042322368,
"eval_stsb_spearman": 0.8685307363930072,
"step": 1750
},
{
"epoch": 0.87,
"eval_avg_sts": 0.8568209870234867,
"eval_sickr_spearman": 0.8408516742161756,
"eval_stsb_spearman": 0.8727902998307979,
"step": 1875
},
{
"epoch": 0.93,
"learning_rate": 6.904982977406376e-06,
"loss": 0.4282,
"step": 2000
},
{
"epoch": 0.93,
"eval_avg_sts": 0.8540671502188022,
"eval_sickr_spearman": 0.8388130421467362,
"eval_stsb_spearman": 0.869321258290868,
"step": 2000
},
{
"epoch": 0.99,
"eval_avg_sts": 0.8558945239823259,
"eval_sickr_spearman": 0.8423186881473997,
"eval_stsb_spearman": 0.869470359817252,
"step": 2125
},
{
"epoch": 1.04,
"eval_avg_sts": 0.8555080041232501,
"eval_sickr_spearman": 0.8408939896165424,
"eval_stsb_spearman": 0.870122018629958,
"step": 2250
},
{
"epoch": 1.1,
"eval_avg_sts": 0.8568942691339498,
"eval_sickr_spearman": 0.8418429400876551,
"eval_stsb_spearman": 0.8719455981802446,
"step": 2375
},
{
"epoch": 1.16,
"learning_rate": 6.13122872175797e-06,
"loss": 0.3707,
"step": 2500
},
{
"epoch": 1.16,
"eval_avg_sts": 0.8557911670636686,
"eval_sickr_spearman": 0.8392199616381198,
"eval_stsb_spearman": 0.8723623724892173,
"step": 2500
},
{
"epoch": 1.22,
"eval_avg_sts": 0.85588148387946,
"eval_sickr_spearman": 0.83787566717107,
"eval_stsb_spearman": 0.8738873005878499,
"step": 2625
},
{
"epoch": 1.28,
"eval_avg_sts": 0.8622210733781358,
"eval_sickr_spearman": 0.8489977970511808,
"eval_stsb_spearman": 0.8754443497050908,
"step": 2750
},
{
"epoch": 1.33,
"eval_avg_sts": 0.8624794115602034,
"eval_sickr_spearman": 0.8479586922026686,
"eval_stsb_spearman": 0.8770001309177381,
"step": 2875
},
{
"epoch": 1.39,
"learning_rate": 5.357474466109565e-06,
"loss": 0.3458,
"step": 3000
},
{
"epoch": 1.39,
"eval_avg_sts": 0.8548885349566437,
"eval_sickr_spearman": 0.8383297051729646,
"eval_stsb_spearman": 0.871447364740323,
"step": 3000
},
{
"epoch": 1.45,
"eval_avg_sts": 0.8562784353766513,
"eval_sickr_spearman": 0.841656867600684,
"eval_stsb_spearman": 0.8709000031526186,
"step": 3125
},
{
"epoch": 1.51,
"eval_avg_sts": 0.8578722811343277,
"eval_sickr_spearman": 0.8429833425291002,
"eval_stsb_spearman": 0.8727612197395551,
"step": 3250
},
{
"epoch": 1.57,
"eval_avg_sts": 0.8600916280834878,
"eval_sickr_spearman": 0.8445091945595862,
"eval_stsb_spearman": 0.8756740616073895,
"step": 3375
},
{
"epoch": 1.62,
"learning_rate": 4.583720210461158e-06,
"loss": 0.3435,
"step": 3500
},
{
"epoch": 1.62,
"eval_avg_sts": 0.8593220407937633,
"eval_sickr_spearman": 0.8451772111183875,
"eval_stsb_spearman": 0.8734668704691391,
"step": 3500
},
{
"epoch": 1.68,
"eval_avg_sts": 0.861320493545486,
"eval_sickr_spearman": 0.8473822229232085,
"eval_stsb_spearman": 0.8752587641677635,
"step": 3625
},
{
"epoch": 1.74,
"eval_avg_sts": 0.860298885852978,
"eval_sickr_spearman": 0.8464292858706762,
"eval_stsb_spearman": 0.8741684858352797,
"step": 3750
},
{
"epoch": 1.8,
"eval_avg_sts": 0.8550809080819418,
"eval_sickr_spearman": 0.8354855435013065,
"eval_stsb_spearman": 0.8746762726625771,
"step": 3875
},
{
"epoch": 1.86,
"learning_rate": 3.8099659548127517e-06,
"loss": 0.3442,
"step": 4000
},
{
"epoch": 1.86,
"eval_avg_sts": 0.8559180414679683,
"eval_sickr_spearman": 0.8384507435485884,
"eval_stsb_spearman": 0.8733853393873484,
"step": 4000
},
{
"epoch": 1.92,
"eval_avg_sts": 0.8571707122552362,
"eval_sickr_spearman": 0.8428135045544151,
"eval_stsb_spearman": 0.8715279199560574,
"step": 4125
},
{
"epoch": 1.97,
"eval_avg_sts": 0.8560277042936426,
"eval_sickr_spearman": 0.8397806286852069,
"eval_stsb_spearman": 0.8722747799020784,
"step": 4250
},
{
"epoch": 2.03,
"eval_avg_sts": 0.8578281901982555,
"eval_sickr_spearman": 0.8433445364119149,
"eval_stsb_spearman": 0.8723118439845962,
"step": 4375
},
{
"epoch": 2.09,
"learning_rate": 3.036211699164346e-06,
"loss": 0.3131,
"step": 4500
},
{
"epoch": 2.09,
"eval_avg_sts": 0.8546342996226579,
"eval_sickr_spearman": 0.8383026636628548,
"eval_stsb_spearman": 0.8709659355824609,
"step": 4500
},
{
"epoch": 2.15,
"eval_avg_sts": 0.8559652822309323,
"eval_sickr_spearman": 0.840341391794497,
"eval_stsb_spearman": 0.8715891726673677,
"step": 4625
},
{
"epoch": 2.21,
"eval_avg_sts": 0.8546617687967435,
"eval_sickr_spearman": 0.8391962823051108,
"eval_stsb_spearman": 0.8701272552883762,
"step": 4750
},
{
"epoch": 2.26,
"eval_avg_sts": 0.858705271470795,
"eval_sickr_spearman": 0.8465492195309672,
"eval_stsb_spearman": 0.8708613234106228,
"step": 4875
},
{
"epoch": 2.32,
"learning_rate": 2.2624574435159395e-06,
"loss": 0.2846,
"step": 5000
},
{
"epoch": 2.32,
"eval_avg_sts": 0.8597192689340263,
"eval_sickr_spearman": 0.8464290457151691,
"eval_stsb_spearman": 0.8730094921528834,
"step": 5000
},
{
"epoch": 2.38,
"eval_avg_sts": 0.8606101916960156,
"eval_sickr_spearman": 0.8477548001770637,
"eval_stsb_spearman": 0.8734655832149676,
"step": 5125
},
{
"epoch": 2.44,
"eval_avg_sts": 0.859970718351403,
"eval_sickr_spearman": 0.8465689603136584,
"eval_stsb_spearman": 0.8733724763891477,
"step": 5250
},
{
"epoch": 2.5,
"eval_avg_sts": 0.8611164416935826,
"eval_sickr_spearman": 0.8490797381102341,
"eval_stsb_spearman": 0.873153145276931,
"step": 5375
},
{
"epoch": 2.55,
"learning_rate": 1.4887031878675335e-06,
"loss": 0.2804,
"step": 5500
},
{
"epoch": 2.55,
"eval_avg_sts": 0.860553080718742,
"eval_sickr_spearman": 0.8488836271230623,
"eval_stsb_spearman": 0.8722225343144216,
"step": 5500
},
{
"epoch": 2.61,
"eval_avg_sts": 0.8602837879281291,
"eval_sickr_spearman": 0.8481850627837464,
"eval_stsb_spearman": 0.8723825130725117,
"step": 5625
},
{
"epoch": 2.67,
"eval_avg_sts": 0.857967378909783,
"eval_sickr_spearman": 0.8440642824669651,
"eval_stsb_spearman": 0.8718704753526009,
"step": 5750
},
{
"epoch": 2.73,
"eval_avg_sts": 0.8601815553282466,
"eval_sickr_spearman": 0.8467775593872041,
"eval_stsb_spearman": 0.873585551269289,
"step": 5875
},
{
"epoch": 2.79,
"learning_rate": 7.149489322191272e-07,
"loss": 0.2824,
"step": 6000
},
{
"epoch": 2.79,
"eval_avg_sts": 0.860910326239195,
"eval_sickr_spearman": 0.8483497614305777,
"eval_stsb_spearman": 0.8734708910478122,
"step": 6000
},
{
"epoch": 2.84,
"eval_avg_sts": 0.860010166556528,
"eval_sickr_spearman": 0.8470775616467865,
"eval_stsb_spearman": 0.8729427714662694,
"step": 6125
},
{
"epoch": 2.9,
"eval_avg_sts": 0.8590174618570985,
"eval_sickr_spearman": 0.8456035832058536,
"eval_stsb_spearman": 0.8724313405083433,
"step": 6250
},
{
"epoch": 2.96,
"eval_avg_sts": 0.8593514591951028,
"eval_sickr_spearman": 0.8458110775640663,
"eval_stsb_spearman": 0.8728918408261394,
"step": 6375
},
{
"epoch": 3.0,
"step": 6462,
"train_runtime": 8943.5159,
"train_samples_per_second": 0.723
}
],
"max_steps": 6462,
"num_train_epochs": 3,
"total_flos": 169236001393016832,
"trial_name": null,
"trial_params": null
}