|
{ |
|
"best_metric": 0.8770001309177381, |
|
"best_model_checkpoint": "sumcse/5401.reimple-robert-large", |
|
"epoch": 3.0, |
|
"global_step": 6462, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"eval_avg_sts": 0.8284010538709128, |
|
"eval_sickr_spearman": 0.8087322680313793, |
|
"eval_stsb_spearman": 0.8480698397104464, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_avg_sts": 0.8484297373111971, |
|
"eval_sickr_spearman": 0.8391122278775942, |
|
"eval_stsb_spearman": 0.8577472467448001, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_avg_sts": 0.8522751763001004, |
|
"eval_sickr_spearman": 0.8446321541792678, |
|
"eval_stsb_spearman": 0.859918198420933, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.226245744351595e-06, |
|
"loss": 0.9779, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_avg_sts": 0.8529384795629767, |
|
"eval_sickr_spearman": 0.8425804096191359, |
|
"eval_stsb_spearman": 0.8632965495068176, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_avg_sts": 0.854393833444526, |
|
"eval_sickr_spearman": 0.8428722668204894, |
|
"eval_stsb_spearman": 0.8659154000685627, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_avg_sts": 0.8530765893420603, |
|
"eval_sickr_spearman": 0.8375766255335162, |
|
"eval_stsb_spearman": 0.8685765531506044, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_avg_sts": 0.8598428877676623, |
|
"eval_sickr_spearman": 0.8496016440584608, |
|
"eval_stsb_spearman": 0.8700841314768637, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.452491488703189e-06, |
|
"loss": 0.5021, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_avg_sts": 0.8553352649068404, |
|
"eval_sickr_spearman": 0.8414943783845188, |
|
"eval_stsb_spearman": 0.8691761514291622, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_avg_sts": 0.8611558909552111, |
|
"eval_sickr_spearman": 0.8500423294141554, |
|
"eval_stsb_spearman": 0.8722694524962668, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_avg_sts": 0.8470077761608523, |
|
"eval_sickr_spearman": 0.824938778185323, |
|
"eval_stsb_spearman": 0.8690767741363816, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_avg_sts": 0.8523760853013311, |
|
"eval_sickr_spearman": 0.8380687041677498, |
|
"eval_stsb_spearman": 0.8666834664349123, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.678737233054782e-06, |
|
"loss": 0.4518, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_avg_sts": 0.8575383621060841, |
|
"eval_sickr_spearman": 0.8451627057257532, |
|
"eval_stsb_spearman": 0.869914018486415, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_avg_sts": 0.8567243246034306, |
|
"eval_sickr_spearman": 0.8404439381960673, |
|
"eval_stsb_spearman": 0.8730047110107938, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_avg_sts": 0.855931320312622, |
|
"eval_sickr_spearman": 0.8433319042322368, |
|
"eval_stsb_spearman": 0.8685307363930072, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_avg_sts": 0.8568209870234867, |
|
"eval_sickr_spearman": 0.8408516742161756, |
|
"eval_stsb_spearman": 0.8727902998307979, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.904982977406376e-06, |
|
"loss": 0.4282, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_avg_sts": 0.8540671502188022, |
|
"eval_sickr_spearman": 0.8388130421467362, |
|
"eval_stsb_spearman": 0.869321258290868, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_avg_sts": 0.8558945239823259, |
|
"eval_sickr_spearman": 0.8423186881473997, |
|
"eval_stsb_spearman": 0.869470359817252, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_avg_sts": 0.8555080041232501, |
|
"eval_sickr_spearman": 0.8408939896165424, |
|
"eval_stsb_spearman": 0.870122018629958, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_avg_sts": 0.8568942691339498, |
|
"eval_sickr_spearman": 0.8418429400876551, |
|
"eval_stsb_spearman": 0.8719455981802446, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 6.13122872175797e-06, |
|
"loss": 0.3707, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_avg_sts": 0.8557911670636686, |
|
"eval_sickr_spearman": 0.8392199616381198, |
|
"eval_stsb_spearman": 0.8723623724892173, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_avg_sts": 0.85588148387946, |
|
"eval_sickr_spearman": 0.83787566717107, |
|
"eval_stsb_spearman": 0.8738873005878499, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_avg_sts": 0.8622210733781358, |
|
"eval_sickr_spearman": 0.8489977970511808, |
|
"eval_stsb_spearman": 0.8754443497050908, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_avg_sts": 0.8624794115602034, |
|
"eval_sickr_spearman": 0.8479586922026686, |
|
"eval_stsb_spearman": 0.8770001309177381, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.357474466109565e-06, |
|
"loss": 0.3458, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_avg_sts": 0.8548885349566437, |
|
"eval_sickr_spearman": 0.8383297051729646, |
|
"eval_stsb_spearman": 0.871447364740323, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_avg_sts": 0.8562784353766513, |
|
"eval_sickr_spearman": 0.841656867600684, |
|
"eval_stsb_spearman": 0.8709000031526186, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_avg_sts": 0.8578722811343277, |
|
"eval_sickr_spearman": 0.8429833425291002, |
|
"eval_stsb_spearman": 0.8727612197395551, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_avg_sts": 0.8600916280834878, |
|
"eval_sickr_spearman": 0.8445091945595862, |
|
"eval_stsb_spearman": 0.8756740616073895, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.583720210461158e-06, |
|
"loss": 0.3435, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_avg_sts": 0.8593220407937633, |
|
"eval_sickr_spearman": 0.8451772111183875, |
|
"eval_stsb_spearman": 0.8734668704691391, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_avg_sts": 0.861320493545486, |
|
"eval_sickr_spearman": 0.8473822229232085, |
|
"eval_stsb_spearman": 0.8752587641677635, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_avg_sts": 0.860298885852978, |
|
"eval_sickr_spearman": 0.8464292858706762, |
|
"eval_stsb_spearman": 0.8741684858352797, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_avg_sts": 0.8550809080819418, |
|
"eval_sickr_spearman": 0.8354855435013065, |
|
"eval_stsb_spearman": 0.8746762726625771, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.8099659548127517e-06, |
|
"loss": 0.3442, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_avg_sts": 0.8559180414679683, |
|
"eval_sickr_spearman": 0.8384507435485884, |
|
"eval_stsb_spearman": 0.8733853393873484, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_avg_sts": 0.8571707122552362, |
|
"eval_sickr_spearman": 0.8428135045544151, |
|
"eval_stsb_spearman": 0.8715279199560574, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_avg_sts": 0.8560277042936426, |
|
"eval_sickr_spearman": 0.8397806286852069, |
|
"eval_stsb_spearman": 0.8722747799020784, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_avg_sts": 0.8578281901982555, |
|
"eval_sickr_spearman": 0.8433445364119149, |
|
"eval_stsb_spearman": 0.8723118439845962, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.036211699164346e-06, |
|
"loss": 0.3131, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_avg_sts": 0.8546342996226579, |
|
"eval_sickr_spearman": 0.8383026636628548, |
|
"eval_stsb_spearman": 0.8709659355824609, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_avg_sts": 0.8559652822309323, |
|
"eval_sickr_spearman": 0.840341391794497, |
|
"eval_stsb_spearman": 0.8715891726673677, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_avg_sts": 0.8546617687967435, |
|
"eval_sickr_spearman": 0.8391962823051108, |
|
"eval_stsb_spearman": 0.8701272552883762, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_avg_sts": 0.858705271470795, |
|
"eval_sickr_spearman": 0.8465492195309672, |
|
"eval_stsb_spearman": 0.8708613234106228, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.2624574435159395e-06, |
|
"loss": 0.2846, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_avg_sts": 0.8597192689340263, |
|
"eval_sickr_spearman": 0.8464290457151691, |
|
"eval_stsb_spearman": 0.8730094921528834, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_avg_sts": 0.8606101916960156, |
|
"eval_sickr_spearman": 0.8477548001770637, |
|
"eval_stsb_spearman": 0.8734655832149676, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_avg_sts": 0.859970718351403, |
|
"eval_sickr_spearman": 0.8465689603136584, |
|
"eval_stsb_spearman": 0.8733724763891477, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_avg_sts": 0.8611164416935826, |
|
"eval_sickr_spearman": 0.8490797381102341, |
|
"eval_stsb_spearman": 0.873153145276931, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.4887031878675335e-06, |
|
"loss": 0.2804, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_avg_sts": 0.860553080718742, |
|
"eval_sickr_spearman": 0.8488836271230623, |
|
"eval_stsb_spearman": 0.8722225343144216, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_avg_sts": 0.8602837879281291, |
|
"eval_sickr_spearman": 0.8481850627837464, |
|
"eval_stsb_spearman": 0.8723825130725117, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_avg_sts": 0.857967378909783, |
|
"eval_sickr_spearman": 0.8440642824669651, |
|
"eval_stsb_spearman": 0.8718704753526009, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_avg_sts": 0.8601815553282466, |
|
"eval_sickr_spearman": 0.8467775593872041, |
|
"eval_stsb_spearman": 0.873585551269289, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.149489322191272e-07, |
|
"loss": 0.2824, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_avg_sts": 0.860910326239195, |
|
"eval_sickr_spearman": 0.8483497614305777, |
|
"eval_stsb_spearman": 0.8734708910478122, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_avg_sts": 0.860010166556528, |
|
"eval_sickr_spearman": 0.8470775616467865, |
|
"eval_stsb_spearman": 0.8729427714662694, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_avg_sts": 0.8590174618570985, |
|
"eval_sickr_spearman": 0.8456035832058536, |
|
"eval_stsb_spearman": 0.8724313405083433, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_avg_sts": 0.8593514591951028, |
|
"eval_sickr_spearman": 0.8458110775640663, |
|
"eval_stsb_spearman": 0.8728918408261394, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 6462, |
|
"train_runtime": 8943.5159, |
|
"train_samples_per_second": 0.723 |
|
} |
|
], |
|
"max_steps": 6462, |
|
"num_train_epochs": 3, |
|
"total_flos": 169236001393016832, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|