esol_model / checkpoint-4450 /trainer_state.json
osbm's picture
Upload with huggingface_hub
b19a93f
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 50.0,
"global_step": 4450,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_loss": 1.7501213550567627,
"eval_mae": 1.0687230825424194,
"eval_mse": 1.750120997428894,
"eval_rmse": 1.3229213953018188,
"eval_runtime": 0.213,
"eval_samples_per_second": 323.879,
"eval_steps_per_second": 42.245,
"step": 89
},
{
"epoch": 2.0,
"eval_loss": 0.9408809542655945,
"eval_mae": 0.7522120475769043,
"eval_mse": 0.9408809542655945,
"eval_rmse": 0.9699901938438416,
"eval_runtime": 0.4675,
"eval_samples_per_second": 147.6,
"eval_steps_per_second": 19.252,
"step": 178
},
{
"epoch": 3.0,
"eval_loss": 0.7912163138389587,
"eval_mae": 0.7013395428657532,
"eval_mse": 0.7912163734436035,
"eval_rmse": 0.8895034193992615,
"eval_runtime": 0.4829,
"eval_samples_per_second": 142.893,
"eval_steps_per_second": 18.638,
"step": 267
},
{
"epoch": 4.0,
"eval_loss": 0.7405096292495728,
"eval_mae": 0.6587470769882202,
"eval_mse": 0.7405097484588623,
"eval_rmse": 0.8605287671089172,
"eval_runtime": 0.4491,
"eval_samples_per_second": 153.654,
"eval_steps_per_second": 20.042,
"step": 356
},
{
"epoch": 5.0,
"eval_loss": 0.6923832297325134,
"eval_mae": 0.6610296964645386,
"eval_mse": 0.6923832893371582,
"eval_rmse": 0.832095742225647,
"eval_runtime": 0.4191,
"eval_samples_per_second": 164.64,
"eval_steps_per_second": 21.475,
"step": 445
},
{
"epoch": 5.62,
"learning_rate": 8.876404494382023e-06,
"loss": 0.9104,
"step": 500
},
{
"epoch": 6.0,
"eval_loss": 0.6709772944450378,
"eval_mae": 0.6442688703536987,
"eval_mse": 0.6709771752357483,
"eval_rmse": 0.8191319704055786,
"eval_runtime": 0.4743,
"eval_samples_per_second": 145.467,
"eval_steps_per_second": 18.974,
"step": 534
},
{
"epoch": 7.0,
"eval_loss": 0.5820761322975159,
"eval_mae": 0.6152850389480591,
"eval_mse": 0.5820761919021606,
"eval_rmse": 0.7629391551017761,
"eval_runtime": 0.364,
"eval_samples_per_second": 189.553,
"eval_steps_per_second": 24.724,
"step": 623
},
{
"epoch": 8.0,
"eval_loss": 0.49572908878326416,
"eval_mae": 0.5509689450263977,
"eval_mse": 0.49572914838790894,
"eval_rmse": 0.70408034324646,
"eval_runtime": 0.4466,
"eval_samples_per_second": 154.492,
"eval_steps_per_second": 20.151,
"step": 712
},
{
"epoch": 9.0,
"eval_loss": 0.5924321413040161,
"eval_mae": 0.6066040396690369,
"eval_mse": 0.5924323201179504,
"eval_rmse": 0.7696962356567383,
"eval_runtime": 0.4645,
"eval_samples_per_second": 148.549,
"eval_steps_per_second": 19.376,
"step": 801
},
{
"epoch": 10.0,
"eval_loss": 0.4954969882965088,
"eval_mae": 0.5473751425743103,
"eval_mse": 0.4954971671104431,
"eval_rmse": 0.7039155960083008,
"eval_runtime": 0.3698,
"eval_samples_per_second": 186.61,
"eval_steps_per_second": 24.34,
"step": 890
},
{
"epoch": 11.0,
"eval_loss": 0.5200049877166748,
"eval_mae": 0.5685440897941589,
"eval_mse": 0.52000492811203,
"eval_rmse": 0.7211136817932129,
"eval_runtime": 0.4266,
"eval_samples_per_second": 161.746,
"eval_steps_per_second": 21.097,
"step": 979
},
{
"epoch": 11.24,
"learning_rate": 7.752808988764046e-06,
"loss": 0.128,
"step": 1000
},
{
"epoch": 12.0,
"eval_loss": 0.43686971068382263,
"eval_mae": 0.5061944723129272,
"eval_mse": 0.436869740486145,
"eval_rmse": 0.6609612107276917,
"eval_runtime": 0.4821,
"eval_samples_per_second": 143.124,
"eval_steps_per_second": 18.668,
"step": 1068
},
{
"epoch": 13.0,
"eval_loss": 0.41416943073272705,
"eval_mae": 0.46771711111068726,
"eval_mse": 0.41416940093040466,
"eval_rmse": 0.6435599327087402,
"eval_runtime": 0.4624,
"eval_samples_per_second": 149.23,
"eval_steps_per_second": 19.465,
"step": 1157
},
{
"epoch": 14.0,
"eval_loss": 0.40698668360710144,
"eval_mae": 0.45521026849746704,
"eval_mse": 0.4069867730140686,
"eval_rmse": 0.6379551291465759,
"eval_runtime": 0.4757,
"eval_samples_per_second": 145.044,
"eval_steps_per_second": 18.919,
"step": 1246
},
{
"epoch": 15.0,
"eval_loss": 0.49930572509765625,
"eval_mae": 0.5383840799331665,
"eval_mse": 0.4993056654930115,
"eval_rmse": 0.7066156268119812,
"eval_runtime": 0.4656,
"eval_samples_per_second": 148.181,
"eval_steps_per_second": 19.328,
"step": 1335
},
{
"epoch": 16.0,
"eval_loss": 0.5578888654708862,
"eval_mae": 0.585189163684845,
"eval_mse": 0.5578888058662415,
"eval_rmse": 0.746919572353363,
"eval_runtime": 0.4755,
"eval_samples_per_second": 145.098,
"eval_steps_per_second": 18.926,
"step": 1424
},
{
"epoch": 16.85,
"learning_rate": 6.629213483146067e-06,
"loss": 0.0895,
"step": 1500
},
{
"epoch": 17.0,
"eval_loss": 0.5015009045600891,
"eval_mae": 0.561897337436676,
"eval_mse": 0.5015009641647339,
"eval_rmse": 0.708167314529419,
"eval_runtime": 0.3883,
"eval_samples_per_second": 177.714,
"eval_steps_per_second": 23.18,
"step": 1513
},
{
"epoch": 18.0,
"eval_loss": 0.4590393602848053,
"eval_mae": 0.5263462066650391,
"eval_mse": 0.45903947949409485,
"eval_rmse": 0.6775245070457458,
"eval_runtime": 0.4835,
"eval_samples_per_second": 142.705,
"eval_steps_per_second": 18.614,
"step": 1602
},
{
"epoch": 19.0,
"eval_loss": 0.4880666434764862,
"eval_mae": 0.5377508997917175,
"eval_mse": 0.4880666732788086,
"eval_rmse": 0.698617696762085,
"eval_runtime": 0.4715,
"eval_samples_per_second": 146.34,
"eval_steps_per_second": 19.088,
"step": 1691
},
{
"epoch": 20.0,
"eval_loss": 0.3925124704837799,
"eval_mae": 0.46625784039497375,
"eval_mse": 0.3925124406814575,
"eval_rmse": 0.6265081167221069,
"eval_runtime": 0.3158,
"eval_samples_per_second": 218.521,
"eval_steps_per_second": 28.503,
"step": 1780
},
{
"epoch": 21.0,
"eval_loss": 0.4392476975917816,
"eval_mae": 0.5062677264213562,
"eval_mse": 0.4392476975917816,
"eval_rmse": 0.6627576351165771,
"eval_runtime": 0.4321,
"eval_samples_per_second": 159.696,
"eval_steps_per_second": 20.83,
"step": 1869
},
{
"epoch": 22.0,
"eval_loss": 0.42705094814300537,
"eval_mae": 0.48886218667030334,
"eval_mse": 0.42705097794532776,
"eval_rmse": 0.6534913778305054,
"eval_runtime": 0.3907,
"eval_samples_per_second": 176.595,
"eval_steps_per_second": 23.034,
"step": 1958
},
{
"epoch": 22.47,
"learning_rate": 5.50561797752809e-06,
"loss": 0.0694,
"step": 2000
},
{
"epoch": 23.0,
"eval_loss": 0.40933796763420105,
"eval_mae": 0.4783601760864258,
"eval_mse": 0.4093380868434906,
"eval_rmse": 0.6397953629493713,
"eval_runtime": 0.3371,
"eval_samples_per_second": 204.706,
"eval_steps_per_second": 26.701,
"step": 2047
},
{
"epoch": 24.0,
"eval_loss": 0.42077454924583435,
"eval_mae": 0.49500545859336853,
"eval_mse": 0.42077454924583435,
"eval_rmse": 0.6486713886260986,
"eval_runtime": 0.4775,
"eval_samples_per_second": 144.493,
"eval_steps_per_second": 18.847,
"step": 2136
},
{
"epoch": 25.0,
"eval_loss": 0.40650996565818787,
"eval_mae": 0.4869938790798187,
"eval_mse": 0.4065099358558655,
"eval_rmse": 0.6375812888145447,
"eval_runtime": 0.4751,
"eval_samples_per_second": 145.227,
"eval_steps_per_second": 18.943,
"step": 2225
},
{
"epoch": 26.0,
"eval_loss": 0.4467940032482147,
"eval_mae": 0.5186977386474609,
"eval_mse": 0.44679397344589233,
"eval_rmse": 0.668426513671875,
"eval_runtime": 0.4523,
"eval_samples_per_second": 152.552,
"eval_steps_per_second": 19.898,
"step": 2314
},
{
"epoch": 27.0,
"eval_loss": 0.448551744222641,
"eval_mae": 0.5286442041397095,
"eval_mse": 0.4485517740249634,
"eval_rmse": 0.6697400808334351,
"eval_runtime": 0.3607,
"eval_samples_per_second": 191.288,
"eval_steps_per_second": 24.951,
"step": 2403
},
{
"epoch": 28.0,
"eval_loss": 0.4263148605823517,
"eval_mae": 0.5110523700714111,
"eval_mse": 0.4263148009777069,
"eval_rmse": 0.6529278755187988,
"eval_runtime": 0.4741,
"eval_samples_per_second": 145.544,
"eval_steps_per_second": 18.984,
"step": 2492
},
{
"epoch": 28.09,
"learning_rate": 4.382022471910113e-06,
"loss": 0.0575,
"step": 2500
},
{
"epoch": 29.0,
"eval_loss": 0.4624464213848114,
"eval_mae": 0.5141972303390503,
"eval_mse": 0.462446391582489,
"eval_rmse": 0.6800341010093689,
"eval_runtime": 0.4737,
"eval_samples_per_second": 145.658,
"eval_steps_per_second": 18.999,
"step": 2581
},
{
"epoch": 30.0,
"eval_loss": 0.4065593481063843,
"eval_mae": 0.4846087396144867,
"eval_mse": 0.40655940771102905,
"eval_rmse": 0.6376200914382935,
"eval_runtime": 0.3326,
"eval_samples_per_second": 207.482,
"eval_steps_per_second": 27.063,
"step": 2670
},
{
"epoch": 31.0,
"eval_loss": 0.4373004138469696,
"eval_mae": 0.5060880780220032,
"eval_mse": 0.437300443649292,
"eval_rmse": 0.6612869501113892,
"eval_runtime": 0.4249,
"eval_samples_per_second": 162.393,
"eval_steps_per_second": 21.182,
"step": 2759
},
{
"epoch": 32.0,
"eval_loss": 0.44729650020599365,
"eval_mae": 0.5081753134727478,
"eval_mse": 0.44729653000831604,
"eval_rmse": 0.6688023209571838,
"eval_runtime": 0.4601,
"eval_samples_per_second": 149.977,
"eval_steps_per_second": 19.562,
"step": 2848
},
{
"epoch": 33.0,
"eval_loss": 0.43939414620399475,
"eval_mae": 0.5079318881034851,
"eval_mse": 0.43939417600631714,
"eval_rmse": 0.6628681421279907,
"eval_runtime": 0.4736,
"eval_samples_per_second": 145.702,
"eval_steps_per_second": 19.005,
"step": 2937
},
{
"epoch": 33.71,
"learning_rate": 3.258426966292135e-06,
"loss": 0.0532,
"step": 3000
},
{
"epoch": 34.0,
"eval_loss": 0.4430878460407257,
"eval_mae": 0.5065318942070007,
"eval_mse": 0.44308778643608093,
"eval_rmse": 0.6656484007835388,
"eval_runtime": 0.4788,
"eval_samples_per_second": 144.103,
"eval_steps_per_second": 18.796,
"step": 3026
},
{
"epoch": 35.0,
"eval_loss": 0.4311515688896179,
"eval_mae": 0.5022226572036743,
"eval_mse": 0.4311515688896179,
"eval_rmse": 0.6566213369369507,
"eval_runtime": 0.47,
"eval_samples_per_second": 146.806,
"eval_steps_per_second": 19.149,
"step": 3115
},
{
"epoch": 36.0,
"eval_loss": 0.42474353313446045,
"eval_mae": 0.49367982149124146,
"eval_mse": 0.42474350333213806,
"eval_rmse": 0.6517235040664673,
"eval_runtime": 0.4548,
"eval_samples_per_second": 151.706,
"eval_steps_per_second": 19.788,
"step": 3204
},
{
"epoch": 37.0,
"eval_loss": 0.4552953541278839,
"eval_mae": 0.5187087655067444,
"eval_mse": 0.45529526472091675,
"eval_rmse": 0.6747556924819946,
"eval_runtime": 0.329,
"eval_samples_per_second": 209.749,
"eval_steps_per_second": 27.359,
"step": 3293
},
{
"epoch": 38.0,
"eval_loss": 0.42223644256591797,
"eval_mae": 0.4933069348335266,
"eval_mse": 0.4222363829612732,
"eval_rmse": 0.6497972011566162,
"eval_runtime": 0.3523,
"eval_samples_per_second": 195.847,
"eval_steps_per_second": 25.545,
"step": 3382
},
{
"epoch": 39.0,
"eval_loss": 0.4451429545879364,
"eval_mae": 0.5115242004394531,
"eval_mse": 0.4451429843902588,
"eval_rmse": 0.6671903729438782,
"eval_runtime": 0.1851,
"eval_samples_per_second": 372.87,
"eval_steps_per_second": 48.635,
"step": 3471
},
{
"epoch": 39.33,
"learning_rate": 2.1348314606741574e-06,
"loss": 0.0421,
"step": 3500
},
{
"epoch": 40.0,
"eval_loss": 0.42086702585220337,
"eval_mae": 0.49490445852279663,
"eval_mse": 0.4208669662475586,
"eval_rmse": 0.6487426161766052,
"eval_runtime": 0.3399,
"eval_samples_per_second": 203.01,
"eval_steps_per_second": 26.48,
"step": 3560
},
{
"epoch": 41.0,
"eval_loss": 0.44048014283180237,
"eval_mae": 0.5092083215713501,
"eval_mse": 0.4404800832271576,
"eval_rmse": 0.6636867523193359,
"eval_runtime": 0.3606,
"eval_samples_per_second": 191.355,
"eval_steps_per_second": 24.959,
"step": 3649
},
{
"epoch": 42.0,
"eval_loss": 0.41600948572158813,
"eval_mae": 0.49518799781799316,
"eval_mse": 0.4160095453262329,
"eval_rmse": 0.6449880003929138,
"eval_runtime": 0.3412,
"eval_samples_per_second": 202.233,
"eval_steps_per_second": 26.378,
"step": 3738
},
{
"epoch": 43.0,
"eval_loss": 0.42693421244621277,
"eval_mae": 0.5002013444900513,
"eval_mse": 0.42693421244621277,
"eval_rmse": 0.6534020304679871,
"eval_runtime": 0.2082,
"eval_samples_per_second": 331.427,
"eval_steps_per_second": 43.23,
"step": 3827
},
{
"epoch": 44.0,
"eval_loss": 0.4115591049194336,
"eval_mae": 0.491961270570755,
"eval_mse": 0.4115590751171112,
"eval_rmse": 0.6415287256240845,
"eval_runtime": 0.3428,
"eval_samples_per_second": 201.27,
"eval_steps_per_second": 26.253,
"step": 3916
},
{
"epoch": 44.94,
"learning_rate": 1.01123595505618e-06,
"loss": 0.0419,
"step": 4000
},
{
"epoch": 45.0,
"eval_loss": 0.41869696974754333,
"eval_mae": 0.500180721282959,
"eval_mse": 0.41869693994522095,
"eval_rmse": 0.6470679640769958,
"eval_runtime": 0.3406,
"eval_samples_per_second": 202.605,
"eval_steps_per_second": 26.427,
"step": 4005
},
{
"epoch": 46.0,
"eval_loss": 0.4199928641319275,
"eval_mae": 0.504236102104187,
"eval_mse": 0.41999292373657227,
"eval_rmse": 0.6480686068534851,
"eval_runtime": 0.3427,
"eval_samples_per_second": 201.361,
"eval_steps_per_second": 26.264,
"step": 4094
},
{
"epoch": 47.0,
"eval_loss": 0.4173473119735718,
"eval_mae": 0.49992823600769043,
"eval_mse": 0.41734734177589417,
"eval_rmse": 0.6460242867469788,
"eval_runtime": 0.3094,
"eval_samples_per_second": 222.982,
"eval_steps_per_second": 29.085,
"step": 4183
},
{
"epoch": 48.0,
"eval_loss": 0.41815370321273804,
"eval_mae": 0.4995117783546448,
"eval_mse": 0.4181537628173828,
"eval_rmse": 0.646648108959198,
"eval_runtime": 0.3301,
"eval_samples_per_second": 209.019,
"eval_steps_per_second": 27.263,
"step": 4272
},
{
"epoch": 49.0,
"eval_loss": 0.41537874937057495,
"eval_mae": 0.49763771891593933,
"eval_mse": 0.41537871956825256,
"eval_rmse": 0.6444988250732422,
"eval_runtime": 0.3396,
"eval_samples_per_second": 203.201,
"eval_steps_per_second": 26.504,
"step": 4361
},
{
"epoch": 50.0,
"eval_loss": 0.41592642664909363,
"eval_mae": 0.4979737401008606,
"eval_mse": 0.4159264862537384,
"eval_rmse": 0.6449236273765564,
"eval_runtime": 0.3167,
"eval_samples_per_second": 217.87,
"eval_steps_per_second": 28.418,
"step": 4450
}
],
"max_steps": 4450,
"num_train_epochs": 50,
"total_flos": 2354503087987200.0,
"trial_name": null,
"trial_params": null
}