|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"global_step": 4450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.7501213550567627, |
|
"eval_mae": 1.0687230825424194, |
|
"eval_mse": 1.750120997428894, |
|
"eval_rmse": 1.3229213953018188, |
|
"eval_runtime": 0.213, |
|
"eval_samples_per_second": 323.879, |
|
"eval_steps_per_second": 42.245, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.9408809542655945, |
|
"eval_mae": 0.7522120475769043, |
|
"eval_mse": 0.9408809542655945, |
|
"eval_rmse": 0.9699901938438416, |
|
"eval_runtime": 0.4675, |
|
"eval_samples_per_second": 147.6, |
|
"eval_steps_per_second": 19.252, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.7912163138389587, |
|
"eval_mae": 0.7013395428657532, |
|
"eval_mse": 0.7912163734436035, |
|
"eval_rmse": 0.8895034193992615, |
|
"eval_runtime": 0.4829, |
|
"eval_samples_per_second": 142.893, |
|
"eval_steps_per_second": 18.638, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.7405096292495728, |
|
"eval_mae": 0.6587470769882202, |
|
"eval_mse": 0.7405097484588623, |
|
"eval_rmse": 0.8605287671089172, |
|
"eval_runtime": 0.4491, |
|
"eval_samples_per_second": 153.654, |
|
"eval_steps_per_second": 20.042, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.6923832297325134, |
|
"eval_mae": 0.6610296964645386, |
|
"eval_mse": 0.6923832893371582, |
|
"eval_rmse": 0.832095742225647, |
|
"eval_runtime": 0.4191, |
|
"eval_samples_per_second": 164.64, |
|
"eval_steps_per_second": 21.475, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 8.876404494382023e-06, |
|
"loss": 0.9104, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.6709772944450378, |
|
"eval_mae": 0.6442688703536987, |
|
"eval_mse": 0.6709771752357483, |
|
"eval_rmse": 0.8191319704055786, |
|
"eval_runtime": 0.4743, |
|
"eval_samples_per_second": 145.467, |
|
"eval_steps_per_second": 18.974, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.5820761322975159, |
|
"eval_mae": 0.6152850389480591, |
|
"eval_mse": 0.5820761919021606, |
|
"eval_rmse": 0.7629391551017761, |
|
"eval_runtime": 0.364, |
|
"eval_samples_per_second": 189.553, |
|
"eval_steps_per_second": 24.724, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.49572908878326416, |
|
"eval_mae": 0.5509689450263977, |
|
"eval_mse": 0.49572914838790894, |
|
"eval_rmse": 0.70408034324646, |
|
"eval_runtime": 0.4466, |
|
"eval_samples_per_second": 154.492, |
|
"eval_steps_per_second": 20.151, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.5924321413040161, |
|
"eval_mae": 0.6066040396690369, |
|
"eval_mse": 0.5924323201179504, |
|
"eval_rmse": 0.7696962356567383, |
|
"eval_runtime": 0.4645, |
|
"eval_samples_per_second": 148.549, |
|
"eval_steps_per_second": 19.376, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.4954969882965088, |
|
"eval_mae": 0.5473751425743103, |
|
"eval_mse": 0.4954971671104431, |
|
"eval_rmse": 0.7039155960083008, |
|
"eval_runtime": 0.3698, |
|
"eval_samples_per_second": 186.61, |
|
"eval_steps_per_second": 24.34, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.5200049877166748, |
|
"eval_mae": 0.5685440897941589, |
|
"eval_mse": 0.52000492811203, |
|
"eval_rmse": 0.7211136817932129, |
|
"eval_runtime": 0.4266, |
|
"eval_samples_per_second": 161.746, |
|
"eval_steps_per_second": 21.097, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 7.752808988764046e-06, |
|
"loss": 0.128, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.43686971068382263, |
|
"eval_mae": 0.5061944723129272, |
|
"eval_mse": 0.436869740486145, |
|
"eval_rmse": 0.6609612107276917, |
|
"eval_runtime": 0.4821, |
|
"eval_samples_per_second": 143.124, |
|
"eval_steps_per_second": 18.668, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.41416943073272705, |
|
"eval_mae": 0.46771711111068726, |
|
"eval_mse": 0.41416940093040466, |
|
"eval_rmse": 0.6435599327087402, |
|
"eval_runtime": 0.4624, |
|
"eval_samples_per_second": 149.23, |
|
"eval_steps_per_second": 19.465, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.40698668360710144, |
|
"eval_mae": 0.45521026849746704, |
|
"eval_mse": 0.4069867730140686, |
|
"eval_rmse": 0.6379551291465759, |
|
"eval_runtime": 0.4757, |
|
"eval_samples_per_second": 145.044, |
|
"eval_steps_per_second": 18.919, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.49930572509765625, |
|
"eval_mae": 0.5383840799331665, |
|
"eval_mse": 0.4993056654930115, |
|
"eval_rmse": 0.7066156268119812, |
|
"eval_runtime": 0.4656, |
|
"eval_samples_per_second": 148.181, |
|
"eval_steps_per_second": 19.328, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.5578888654708862, |
|
"eval_mae": 0.585189163684845, |
|
"eval_mse": 0.5578888058662415, |
|
"eval_rmse": 0.746919572353363, |
|
"eval_runtime": 0.4755, |
|
"eval_samples_per_second": 145.098, |
|
"eval_steps_per_second": 18.926, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"learning_rate": 6.629213483146067e-06, |
|
"loss": 0.0895, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.5015009045600891, |
|
"eval_mae": 0.561897337436676, |
|
"eval_mse": 0.5015009641647339, |
|
"eval_rmse": 0.708167314529419, |
|
"eval_runtime": 0.3883, |
|
"eval_samples_per_second": 177.714, |
|
"eval_steps_per_second": 23.18, |
|
"step": 1513 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.4590393602848053, |
|
"eval_mae": 0.5263462066650391, |
|
"eval_mse": 0.45903947949409485, |
|
"eval_rmse": 0.6775245070457458, |
|
"eval_runtime": 0.4835, |
|
"eval_samples_per_second": 142.705, |
|
"eval_steps_per_second": 18.614, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.4880666434764862, |
|
"eval_mae": 0.5377508997917175, |
|
"eval_mse": 0.4880666732788086, |
|
"eval_rmse": 0.698617696762085, |
|
"eval_runtime": 0.4715, |
|
"eval_samples_per_second": 146.34, |
|
"eval_steps_per_second": 19.088, |
|
"step": 1691 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.3925124704837799, |
|
"eval_mae": 0.46625784039497375, |
|
"eval_mse": 0.3925124406814575, |
|
"eval_rmse": 0.6265081167221069, |
|
"eval_runtime": 0.3158, |
|
"eval_samples_per_second": 218.521, |
|
"eval_steps_per_second": 28.503, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.4392476975917816, |
|
"eval_mae": 0.5062677264213562, |
|
"eval_mse": 0.4392476975917816, |
|
"eval_rmse": 0.6627576351165771, |
|
"eval_runtime": 0.4321, |
|
"eval_samples_per_second": 159.696, |
|
"eval_steps_per_second": 20.83, |
|
"step": 1869 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.42705094814300537, |
|
"eval_mae": 0.48886218667030334, |
|
"eval_mse": 0.42705097794532776, |
|
"eval_rmse": 0.6534913778305054, |
|
"eval_runtime": 0.3907, |
|
"eval_samples_per_second": 176.595, |
|
"eval_steps_per_second": 23.034, |
|
"step": 1958 |
|
}, |
|
{ |
|
"epoch": 22.47, |
|
"learning_rate": 5.50561797752809e-06, |
|
"loss": 0.0694, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 0.40933796763420105, |
|
"eval_mae": 0.4783601760864258, |
|
"eval_mse": 0.4093380868434906, |
|
"eval_rmse": 0.6397953629493713, |
|
"eval_runtime": 0.3371, |
|
"eval_samples_per_second": 204.706, |
|
"eval_steps_per_second": 26.701, |
|
"step": 2047 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.42077454924583435, |
|
"eval_mae": 0.49500545859336853, |
|
"eval_mse": 0.42077454924583435, |
|
"eval_rmse": 0.6486713886260986, |
|
"eval_runtime": 0.4775, |
|
"eval_samples_per_second": 144.493, |
|
"eval_steps_per_second": 18.847, |
|
"step": 2136 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.40650996565818787, |
|
"eval_mae": 0.4869938790798187, |
|
"eval_mse": 0.4065099358558655, |
|
"eval_rmse": 0.6375812888145447, |
|
"eval_runtime": 0.4751, |
|
"eval_samples_per_second": 145.227, |
|
"eval_steps_per_second": 18.943, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.4467940032482147, |
|
"eval_mae": 0.5186977386474609, |
|
"eval_mse": 0.44679397344589233, |
|
"eval_rmse": 0.668426513671875, |
|
"eval_runtime": 0.4523, |
|
"eval_samples_per_second": 152.552, |
|
"eval_steps_per_second": 19.898, |
|
"step": 2314 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 0.448551744222641, |
|
"eval_mae": 0.5286442041397095, |
|
"eval_mse": 0.4485517740249634, |
|
"eval_rmse": 0.6697400808334351, |
|
"eval_runtime": 0.3607, |
|
"eval_samples_per_second": 191.288, |
|
"eval_steps_per_second": 24.951, |
|
"step": 2403 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.4263148605823517, |
|
"eval_mae": 0.5110523700714111, |
|
"eval_mse": 0.4263148009777069, |
|
"eval_rmse": 0.6529278755187988, |
|
"eval_runtime": 0.4741, |
|
"eval_samples_per_second": 145.544, |
|
"eval_steps_per_second": 18.984, |
|
"step": 2492 |
|
}, |
|
{ |
|
"epoch": 28.09, |
|
"learning_rate": 4.382022471910113e-06, |
|
"loss": 0.0575, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 0.4624464213848114, |
|
"eval_mae": 0.5141972303390503, |
|
"eval_mse": 0.462446391582489, |
|
"eval_rmse": 0.6800341010093689, |
|
"eval_runtime": 0.4737, |
|
"eval_samples_per_second": 145.658, |
|
"eval_steps_per_second": 18.999, |
|
"step": 2581 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.4065593481063843, |
|
"eval_mae": 0.4846087396144867, |
|
"eval_mse": 0.40655940771102905, |
|
"eval_rmse": 0.6376200914382935, |
|
"eval_runtime": 0.3326, |
|
"eval_samples_per_second": 207.482, |
|
"eval_steps_per_second": 27.063, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 0.4373004138469696, |
|
"eval_mae": 0.5060880780220032, |
|
"eval_mse": 0.437300443649292, |
|
"eval_rmse": 0.6612869501113892, |
|
"eval_runtime": 0.4249, |
|
"eval_samples_per_second": 162.393, |
|
"eval_steps_per_second": 21.182, |
|
"step": 2759 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 0.44729650020599365, |
|
"eval_mae": 0.5081753134727478, |
|
"eval_mse": 0.44729653000831604, |
|
"eval_rmse": 0.6688023209571838, |
|
"eval_runtime": 0.4601, |
|
"eval_samples_per_second": 149.977, |
|
"eval_steps_per_second": 19.562, |
|
"step": 2848 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 0.43939414620399475, |
|
"eval_mae": 0.5079318881034851, |
|
"eval_mse": 0.43939417600631714, |
|
"eval_rmse": 0.6628681421279907, |
|
"eval_runtime": 0.4736, |
|
"eval_samples_per_second": 145.702, |
|
"eval_steps_per_second": 19.005, |
|
"step": 2937 |
|
}, |
|
{ |
|
"epoch": 33.71, |
|
"learning_rate": 3.258426966292135e-06, |
|
"loss": 0.0532, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 0.4430878460407257, |
|
"eval_mae": 0.5065318942070007, |
|
"eval_mse": 0.44308778643608093, |
|
"eval_rmse": 0.6656484007835388, |
|
"eval_runtime": 0.4788, |
|
"eval_samples_per_second": 144.103, |
|
"eval_steps_per_second": 18.796, |
|
"step": 3026 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 0.4311515688896179, |
|
"eval_mae": 0.5022226572036743, |
|
"eval_mse": 0.4311515688896179, |
|
"eval_rmse": 0.6566213369369507, |
|
"eval_runtime": 0.47, |
|
"eval_samples_per_second": 146.806, |
|
"eval_steps_per_second": 19.149, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 0.42474353313446045, |
|
"eval_mae": 0.49367982149124146, |
|
"eval_mse": 0.42474350333213806, |
|
"eval_rmse": 0.6517235040664673, |
|
"eval_runtime": 0.4548, |
|
"eval_samples_per_second": 151.706, |
|
"eval_steps_per_second": 19.788, |
|
"step": 3204 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 0.4552953541278839, |
|
"eval_mae": 0.5187087655067444, |
|
"eval_mse": 0.45529526472091675, |
|
"eval_rmse": 0.6747556924819946, |
|
"eval_runtime": 0.329, |
|
"eval_samples_per_second": 209.749, |
|
"eval_steps_per_second": 27.359, |
|
"step": 3293 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 0.42223644256591797, |
|
"eval_mae": 0.4933069348335266, |
|
"eval_mse": 0.4222363829612732, |
|
"eval_rmse": 0.6497972011566162, |
|
"eval_runtime": 0.3523, |
|
"eval_samples_per_second": 195.847, |
|
"eval_steps_per_second": 25.545, |
|
"step": 3382 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 0.4451429545879364, |
|
"eval_mae": 0.5115242004394531, |
|
"eval_mse": 0.4451429843902588, |
|
"eval_rmse": 0.6671903729438782, |
|
"eval_runtime": 0.1851, |
|
"eval_samples_per_second": 372.87, |
|
"eval_steps_per_second": 48.635, |
|
"step": 3471 |
|
}, |
|
{ |
|
"epoch": 39.33, |
|
"learning_rate": 2.1348314606741574e-06, |
|
"loss": 0.0421, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 0.42086702585220337, |
|
"eval_mae": 0.49490445852279663, |
|
"eval_mse": 0.4208669662475586, |
|
"eval_rmse": 0.6487426161766052, |
|
"eval_runtime": 0.3399, |
|
"eval_samples_per_second": 203.01, |
|
"eval_steps_per_second": 26.48, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 0.44048014283180237, |
|
"eval_mae": 0.5092083215713501, |
|
"eval_mse": 0.4404800832271576, |
|
"eval_rmse": 0.6636867523193359, |
|
"eval_runtime": 0.3606, |
|
"eval_samples_per_second": 191.355, |
|
"eval_steps_per_second": 24.959, |
|
"step": 3649 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 0.41600948572158813, |
|
"eval_mae": 0.49518799781799316, |
|
"eval_mse": 0.4160095453262329, |
|
"eval_rmse": 0.6449880003929138, |
|
"eval_runtime": 0.3412, |
|
"eval_samples_per_second": 202.233, |
|
"eval_steps_per_second": 26.378, |
|
"step": 3738 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 0.42693421244621277, |
|
"eval_mae": 0.5002013444900513, |
|
"eval_mse": 0.42693421244621277, |
|
"eval_rmse": 0.6534020304679871, |
|
"eval_runtime": 0.2082, |
|
"eval_samples_per_second": 331.427, |
|
"eval_steps_per_second": 43.23, |
|
"step": 3827 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 0.4115591049194336, |
|
"eval_mae": 0.491961270570755, |
|
"eval_mse": 0.4115590751171112, |
|
"eval_rmse": 0.6415287256240845, |
|
"eval_runtime": 0.3428, |
|
"eval_samples_per_second": 201.27, |
|
"eval_steps_per_second": 26.253, |
|
"step": 3916 |
|
}, |
|
{ |
|
"epoch": 44.94, |
|
"learning_rate": 1.01123595505618e-06, |
|
"loss": 0.0419, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 0.41869696974754333, |
|
"eval_mae": 0.500180721282959, |
|
"eval_mse": 0.41869693994522095, |
|
"eval_rmse": 0.6470679640769958, |
|
"eval_runtime": 0.3406, |
|
"eval_samples_per_second": 202.605, |
|
"eval_steps_per_second": 26.427, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 0.4199928641319275, |
|
"eval_mae": 0.504236102104187, |
|
"eval_mse": 0.41999292373657227, |
|
"eval_rmse": 0.6480686068534851, |
|
"eval_runtime": 0.3427, |
|
"eval_samples_per_second": 201.361, |
|
"eval_steps_per_second": 26.264, |
|
"step": 4094 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 0.4173473119735718, |
|
"eval_mae": 0.49992823600769043, |
|
"eval_mse": 0.41734734177589417, |
|
"eval_rmse": 0.6460242867469788, |
|
"eval_runtime": 0.3094, |
|
"eval_samples_per_second": 222.982, |
|
"eval_steps_per_second": 29.085, |
|
"step": 4183 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 0.41815370321273804, |
|
"eval_mae": 0.4995117783546448, |
|
"eval_mse": 0.4181537628173828, |
|
"eval_rmse": 0.646648108959198, |
|
"eval_runtime": 0.3301, |
|
"eval_samples_per_second": 209.019, |
|
"eval_steps_per_second": 27.263, |
|
"step": 4272 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 0.41537874937057495, |
|
"eval_mae": 0.49763771891593933, |
|
"eval_mse": 0.41537871956825256, |
|
"eval_rmse": 0.6444988250732422, |
|
"eval_runtime": 0.3396, |
|
"eval_samples_per_second": 203.201, |
|
"eval_steps_per_second": 26.504, |
|
"step": 4361 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 0.41592642664909363, |
|
"eval_mae": 0.4979737401008606, |
|
"eval_mse": 0.4159264862537384, |
|
"eval_rmse": 0.6449236273765564, |
|
"eval_runtime": 0.3167, |
|
"eval_samples_per_second": 217.87, |
|
"eval_steps_per_second": 28.418, |
|
"step": 4450 |
|
} |
|
], |
|
"max_steps": 4450, |
|
"num_train_epochs": 50, |
|
"total_flos": 2354503087987200.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|